From 885b937b0b4a093a83e16c22b43b4dd388bee673 Mon Sep 17 00:00:00 2001 From: Polianin Nikita Date: Mon, 3 Feb 2025 03:44:40 +0300 Subject: [PATCH] feat: add parsing from files --- .../V1/Configuration/ScheduleController.cs | 68 ++++- Endpoint/Endpoint.csproj | 7 +- Endpoint/Sync/ScheduleSynchronizer.cs | 272 ++++++++++++++++-- 3 files changed, 319 insertions(+), 28 deletions(-) diff --git a/Endpoint/Controllers/V1/Configuration/ScheduleController.cs b/Endpoint/Controllers/V1/Configuration/ScheduleController.cs index b841b9f..178704b 100644 --- a/Endpoint/Controllers/V1/Configuration/ScheduleController.cs +++ b/Endpoint/Controllers/V1/Configuration/ScheduleController.cs @@ -1,6 +1,8 @@ using Asp.Versioning; using Cronos; +using Microsoft.AspNetCore.Http; using Microsoft.AspNetCore.Mvc; +using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using Mirea.Api.DataAccess.Persistence; @@ -11,15 +13,19 @@ using Mirea.Api.Endpoint.Common.MapperDto; using Mirea.Api.Endpoint.Common.Services; using Mirea.Api.Endpoint.Configuration.Model; using Mirea.Api.Endpoint.Configuration.Model.GeneralSettings; +using Mirea.Api.Endpoint.Sync; using System; using System.Collections.Generic; using System.ComponentModel.DataAnnotations; +using System.IO; using System.Linq; +using System.Threading; +using System.Threading.Tasks; namespace Mirea.Api.Endpoint.Controllers.V1.Configuration; [ApiVersion("1.0")] -public class ScheduleController(ILogger logger, IOptionsSnapshot config, UberDbContext dbContext) : ConfigurationBaseController +public class ScheduleController(ILogger logger, IOptionsSnapshot config, UberDbContext dbContext, IServiceProvider provider) : ConfigurationBaseController { /// /// Retrieves the cron update schedule and calculates the next scheduled tasks based on the provided depth. @@ -113,7 +119,7 @@ public class ScheduleController(ILogger logger, IOptionsSnap return generalConfig.ScheduleSettings!.CronUpdateSkipDateList .ConvertToDto(); } - + /// /// Updates the list of cron update skip dates in the configuration. @@ -139,4 +145,62 @@ public class ScheduleController(ILogger logger, IOptionsSnap return Ok(); } + + /// + /// Uploads schedule files and initiates synchronization. + /// + /// The list of schedule files to upload. + /// The default campus for each uploaded file. Must match the number of files. + /// If true, removes all existing lessons before synchronization. Default is false. + /// Success or failure. + /// + /// Thrown if: + /// - No files are provided. + /// - The number of default campuses does not match the number of files. + /// - Any default campus is null or empty. + /// + [HttpPost("Upload")] + public async Task UploadScheduleFiles(List? files, [FromQuery] string[]? defaultCampus, [FromQuery] bool force = false) + { + if (files == null || files.Count == 0) + throw new ControllerArgumentException("No files were found."); + + if (defaultCampus == null || files.Count != defaultCampus.Length) + throw new ControllerArgumentException("No default campuses are specified for the file."); + + if (defaultCampus.Any(string.IsNullOrEmpty)) + throw new ControllerArgumentException("Each file should have a default campus."); + + var tempDirectory = Path.Combine(Path.GetTempPath(), Path.GetFileNameWithoutExtension(Path.GetRandomFileName())); + + if (!Directory.Exists(tempDirectory)) + Directory.CreateDirectory(tempDirectory); + + List<(string, string)> filePaths = []; + + for (var i = 0; i < files.Count; i++) + { + if (files[i].Length <= 0) + continue; + + var filePath = Path.Combine(tempDirectory, files[i].FileName); + + await using var stream = new FileStream(filePath, FileMode.Create); + await files[i].CopyToAsync(stream); + + filePaths.Add((filePath, defaultCampus[i])); + } + + var sync = (ScheduleSynchronizer)ActivatorUtilities.GetServiceOrCreateInstance(provider, typeof(ScheduleSynchronizer)); + + if (force) + { + dbContext.Lessons.RemoveRange(dbContext.Lessons.ToList()); + await dbContext.SaveChangesAsync(); + } + + _ = sync.StartSync(filePaths, CancellationToken.None); + + return Ok(); + } } \ No newline at end of file diff --git a/Endpoint/Endpoint.csproj b/Endpoint/Endpoint.csproj index 0d86702..6f8bd07 100644 --- a/Endpoint/Endpoint.csproj +++ b/Endpoint/Endpoint.csproj @@ -5,9 +5,9 @@ disable enable Winsomnia - 1.0-rc6 - 1.0.2.6 - 1.0.2.6 + 1.0-rc7 + 1.0.2.7 + 1.0.2.7 Mirea.Api.Endpoint $(AssemblyName) Exe @@ -41,6 +41,7 @@ + diff --git a/Endpoint/Sync/ScheduleSynchronizer.cs b/Endpoint/Sync/ScheduleSynchronizer.cs index fc6f3f6..2dd533c 100644 --- a/Endpoint/Sync/ScheduleSynchronizer.cs +++ b/Endpoint/Sync/ScheduleSynchronizer.cs @@ -6,9 +6,11 @@ using Mirea.Api.DataAccess.Persistence; using Mirea.Api.Endpoint.Common.Interfaces; using Mirea.Api.Endpoint.Configuration.Model; using Mirea.Api.Endpoint.Sync.Common; +using Mirea.Tools.Schedule.Parser.Domain; using Mirea.Tools.Schedule.WebParser; using Mirea.Tools.Schedule.WebParser.Common.Domain; using System; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics; using System.Linq; @@ -220,35 +222,15 @@ internal partial class ScheduleSynchronizer(UberDbContext dbContext, IOptionsSna await dbContext.LessonAssociations.BulkSynchronizeAsync(_lessonAssociation.GetAll(), bulkOperation => bulkOperation.BatchSize = 1000, cancellationToken); } - public async Task StartSync(CancellationToken cancellationToken) + private async Task Sync(Func>> parseDataAsync, CancellationToken cancellationToken) { - var pairPeriods = config.Value.ScheduleSettings?.PairPeriod; - var startTerm = config.Value.ScheduleSettings?.StartTerm; - - if (pairPeriods == null || startTerm == null) - { - logger.LogWarning("It is not possible to synchronize the schedule due to the fact that the {Arg1} or {Arg2} variable is not initialized.", - nameof(pairPeriods), - nameof(startTerm)); - - return; - } - Stopwatch watch = new(); watch.Start(); - var parser = new Parser - { - Pairs = pairPeriods - .ToDictionary(x => x.Key, - x => (x.Value.Start, x.Value.End)), - TermStart = startTerm.Value.ToDateTime(new TimeOnly(0, 0, 0)) - }; - try { logger.LogDebug("Start parsing schedule"); - var data = await parser.ParseAsync(cancellationToken); + var data = await parseDataAsync(cancellationToken); watch.Stop(); var parsingTime = watch.ElapsedMilliseconds; @@ -282,11 +264,255 @@ internal partial class ScheduleSynchronizer(UberDbContext dbContext, IOptionsSna catch (Exception ex) { logger.LogError(ex, "An error occurred during synchronization."); - maintenanceMode.DisableMaintenanceMode(); throw; } + finally + { + maintenanceMode.DisableMaintenanceMode(); + } } + public async Task StartSync(CancellationToken cancellationToken) + { + var pairPeriods = config.Value.ScheduleSettings?.PairPeriod + .ToDictionary(x => x.Key, x => (x.Value.Start, x.Value.End)); + + var startTerm = config.Value.ScheduleSettings?.StartTerm; + + if (pairPeriods == null || startTerm == null) + { + logger.LogWarning("It is not possible to synchronize the schedule due to the fact that the {Arg1} or {Arg2} variable is not initialized.", + nameof(pairPeriods), + nameof(startTerm)); + + return; + } + + var parser = new Parser + { + Pairs = pairPeriods + .ToDictionary(x => x.Key, + x => (x.Value.Start, x.Value.End)), + TermStart = startTerm.Value.ToDateTime(new TimeOnly(0, 0, 0)) + }; + + try + { + await Sync(parser.ParseAsync, cancellationToken); + } + catch (Exception ex) + { + logger.LogError(ex, "An error occurred during synchronization."); + throw; + } + finally + { + maintenanceMode.DisableMaintenanceMode(); + } + } + + public async Task StartSync(List<(string File, string Campus)> files, CancellationToken cancellationToken) + { + await Task.Yield(); + var pairPeriods = config.Value.ScheduleSettings?.PairPeriod + .ToDictionary(x => x.Key, x => (x.Value.Start, x.Value.End)); + + if (pairPeriods == null) + { + logger.LogWarning("It is not possible to synchronize the schedule due to the fact that the {Arg1} variable is not initialized.", + nameof(pairPeriods)); + + return; + } + + try + { + Task> ParseTask(CancellationToken ctx) + { + var mappedData = new ConcurrentBag(); + + ParallelOptions options = new() { CancellationToken = ctx, MaxDegreeOfParallelism = Environment.ProcessorCount }; + Parallel.ForEach(files, options, (file) => + { + var parser = new Tools.Schedule.Parser.Parser(); + var result = ConvertToGroupResults(parser.Parse(file.File, pairPeriods), file.Campus); + + foreach (var item in result) mappedData.Add(item); + }); + + return Task.FromResult(mappedData.ToList()); + } + + await Sync(ParseTask, cancellationToken); + } + catch (Exception ex) + { + logger.LogError(ex, "An error occurred during synchronization."); + throw; + } + finally + { + maintenanceMode.DisableMaintenanceMode(); + } + } + + private static List ConvertToGroupResults(IEnumerable groups, string campusDefault, CancellationToken cancellationToken = default) + { + var result = new List(); + + foreach (var group in groups) + { + cancellationToken.ThrowIfCancellationRequested(); + + foreach (var day in group.Days) + { + foreach (var pair in day.Lessons) + { + foreach (var lesson in pair.Value) + { + if (string.IsNullOrWhiteSpace(lesson.TypeOfOccupation)) + continue; + + var (weeks, isExclude) = ParseWeeks(lesson.Discipline); + + var (lectureHalls, campuses) = ParseLectureHalls(lesson.LectureHall, campusDefault); + + var groupResult = new GroupResult + { + Day = day.DayOfWeek, + Pair = pair.Key, + IsEven = lesson.IsEven, + Group = group.GroupName, + Discipline = NormalizeDiscipline(lesson.Discipline), + Professor = ParseProfessors(lesson.Professor), + TypeOfOccupation = lesson.TypeOfOccupation, + LectureHalls = lectureHalls, + Campuses = campuses, + SpecialWeek = weeks, + IsExclude = isExclude + }; + + result.Add(groupResult); + } + } + } + } + + return result; + } + + private static string[]? ParseProfessors(string? input) + { + if (string.IsNullOrWhiteSpace(input)) return null; + + var normalized = Regex.Replace(input + .Replace("\n", " ") + .Replace(",", " "), + @"\s+", " ").Trim(); + + return ProfessorFullName().Matches(normalized) + .Select(m => $"{m.Groups["surname"].Value} {m.Groups["initials"].Value}".Trim()) + .Where(x => !string.IsNullOrEmpty(x)) + .ToArray(); + } + + private static (int[]? weeks, bool? isExclude) ParseWeeks(string discipline) + { + var match = ParseSpecificWeeks().Match(discipline); + + if (!match.Success) return (null, null); + + var numbers = new List(); + var ranges = match.Groups[2].Value.Split(','); + + foreach (var range in ranges) + { + if (range.Contains('-')) + { + var parts = range.Split('-'); + if (int.TryParse(parts[0], out var start) && + int.TryParse(parts[1], out var end)) + { + numbers.AddRange(Enumerable.Range(start, end - start + 1)); + } + } + else + if (int.TryParse(range, out var num)) numbers.Add(num); + } + + return ( + weeks: numbers.Distinct().OrderBy(x => x).ToArray(), + isExclude: match.Groups[1].Success + ); + } + + private static string NormalizeDiscipline(string input) + { + var normalized = Regex.Replace(input + .Replace("\n", " ") + .Replace("\r", " "), + @"\s{2,}", " "); + + normalized = Regex.Replace(normalized, + @"(\S+)\s(\S{3,})", + "$1 $2"); + + normalized = ParseSpecificWeeks().Replace(normalized, ""); + + return normalized.Trim(); + } + + private static (string[]? lectureHalls, string[]? campuses) ParseLectureHalls(string? input, string defaultCampus) + { + if (string.IsNullOrWhiteSpace(input)) + return (null, null); + + var matches = ParseLectureCampus().Matches(input); + var lectureHalls = new List(); + var campuses = new List(); + + foreach (Match match in matches) + { + if (match.Groups["lectureWithCampus"].Success) + { + var raw = match.Value.Split('('); + var campus = raw.LastOrDefault()?.Trim(')').Trim(); + var lecture = raw.FirstOrDefault()?.Trim(); + + if (string.IsNullOrEmpty(campus) || string.IsNullOrEmpty(lecture)) + continue; + + campuses.Add(campus); + lectureHalls.Add(lecture); + } + else if (match.Groups["lecture"].Success) + { + var lecture = match.Value.Trim(); + if (string.IsNullOrEmpty(lecture)) + continue; + + campuses.Add(defaultCampus); + lectureHalls.Add(lecture); + } + } + + return ( + lectureHalls: lectureHalls.ToArray(), + campuses: campuses.ToArray() + ); + } + + + [GeneratedRegex(@"\w{4}-\d{2}-\d{2}(?=\s?\d?\s?[Пп]/?[Гг]\s?\d?)?")] private static partial Regex OnlyGroupName(); + + [GeneratedRegex(@"(?[А-ЯЁ][а-яё]+(-[А-ЯЁ][а-яё]+)?)\s*(?[А-ЯЁ]\.[А-ЯЁ]?\.?)?", RegexOptions.IgnorePatternWhitespace)] + private static partial Regex ProfessorFullName(); + + [GeneratedRegex(@"([Кк]р\.?)?\s*((\d+-\d+|\d+)(,\s*\d+(-\d+)?)*)\s*[Нн]\.?", RegexOptions.IgnoreCase, "ru-RU")] + private static partial Regex ParseSpecificWeeks(); + + [GeneratedRegex(@"(?[^,.\n]+\s?\([А-Яа-яA-Za-z]+-?\d+\))|(?[^,.\n]+)")] + private static partial Regex ParseLectureCampus(); } \ No newline at end of file