feat: add parsing from files
Some checks failed
.NET Test Pipeline / build-and-test (push) Failing after 49s
Build and Deploy Docker Container / build-and-deploy (push) Successful in 1m42s

This commit is contained in:
2025-02-03 03:44:40 +03:00
parent dc08285ec8
commit 885b937b0b
3 changed files with 319 additions and 28 deletions

View File

@ -6,9 +6,11 @@ using Mirea.Api.DataAccess.Persistence;
using Mirea.Api.Endpoint.Common.Interfaces;
using Mirea.Api.Endpoint.Configuration.Model;
using Mirea.Api.Endpoint.Sync.Common;
using Mirea.Tools.Schedule.Parser.Domain;
using Mirea.Tools.Schedule.WebParser;
using Mirea.Tools.Schedule.WebParser.Common.Domain;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
@ -220,35 +222,15 @@ internal partial class ScheduleSynchronizer(UberDbContext dbContext, IOptionsSna
await dbContext.LessonAssociations.BulkSynchronizeAsync(_lessonAssociation.GetAll(), bulkOperation => bulkOperation.BatchSize = 1000, cancellationToken);
}
public async Task StartSync(CancellationToken cancellationToken)
private async Task Sync(Func<CancellationToken, Task<List<GroupResult>>> parseDataAsync, CancellationToken cancellationToken)
{
var pairPeriods = config.Value.ScheduleSettings?.PairPeriod;
var startTerm = config.Value.ScheduleSettings?.StartTerm;
if (pairPeriods == null || startTerm == null)
{
logger.LogWarning("It is not possible to synchronize the schedule due to the fact that the {Arg1} or {Arg2} variable is not initialized.",
nameof(pairPeriods),
nameof(startTerm));
return;
}
Stopwatch watch = new();
watch.Start();
var parser = new Parser
{
Pairs = pairPeriods
.ToDictionary(x => x.Key,
x => (x.Value.Start, x.Value.End)),
TermStart = startTerm.Value.ToDateTime(new TimeOnly(0, 0, 0))
};
try
{
logger.LogDebug("Start parsing schedule");
var data = await parser.ParseAsync(cancellationToken);
var data = await parseDataAsync(cancellationToken);
watch.Stop();
var parsingTime = watch.ElapsedMilliseconds;
@ -282,11 +264,255 @@ internal partial class ScheduleSynchronizer(UberDbContext dbContext, IOptionsSna
catch (Exception ex)
{
logger.LogError(ex, "An error occurred during synchronization.");
maintenanceMode.DisableMaintenanceMode();
throw;
}
finally
{
maintenanceMode.DisableMaintenanceMode();
}
}
public async Task StartSync(CancellationToken cancellationToken)
{
var pairPeriods = config.Value.ScheduleSettings?.PairPeriod
.ToDictionary(x => x.Key, x => (x.Value.Start, x.Value.End));
var startTerm = config.Value.ScheduleSettings?.StartTerm;
if (pairPeriods == null || startTerm == null)
{
logger.LogWarning("It is not possible to synchronize the schedule due to the fact that the {Arg1} or {Arg2} variable is not initialized.",
nameof(pairPeriods),
nameof(startTerm));
return;
}
var parser = new Parser
{
Pairs = pairPeriods
.ToDictionary(x => x.Key,
x => (x.Value.Start, x.Value.End)),
TermStart = startTerm.Value.ToDateTime(new TimeOnly(0, 0, 0))
};
try
{
await Sync(parser.ParseAsync, cancellationToken);
}
catch (Exception ex)
{
logger.LogError(ex, "An error occurred during synchronization.");
throw;
}
finally
{
maintenanceMode.DisableMaintenanceMode();
}
}
public async Task StartSync(List<(string File, string Campus)> files, CancellationToken cancellationToken)
{
await Task.Yield();
var pairPeriods = config.Value.ScheduleSettings?.PairPeriod
.ToDictionary(x => x.Key, x => (x.Value.Start, x.Value.End));
if (pairPeriods == null)
{
logger.LogWarning("It is not possible to synchronize the schedule due to the fact that the {Arg1} variable is not initialized.",
nameof(pairPeriods));
return;
}
try
{
Task<List<GroupResult>> ParseTask(CancellationToken ctx)
{
var mappedData = new ConcurrentBag<GroupResult>();
ParallelOptions options = new() { CancellationToken = ctx, MaxDegreeOfParallelism = Environment.ProcessorCount };
Parallel.ForEach(files, options, (file) =>
{
var parser = new Tools.Schedule.Parser.Parser();
var result = ConvertToGroupResults(parser.Parse(file.File, pairPeriods), file.Campus);
foreach (var item in result) mappedData.Add(item);
});
return Task.FromResult(mappedData.ToList());
}
await Sync(ParseTask, cancellationToken);
}
catch (Exception ex)
{
logger.LogError(ex, "An error occurred during synchronization.");
throw;
}
finally
{
maintenanceMode.DisableMaintenanceMode();
}
}
private static List<GroupResult> ConvertToGroupResults(IEnumerable<GroupInfo> groups, string campusDefault, CancellationToken cancellationToken = default)
{
var result = new List<GroupResult>();
foreach (var group in groups)
{
cancellationToken.ThrowIfCancellationRequested();
foreach (var day in group.Days)
{
foreach (var pair in day.Lessons)
{
foreach (var lesson in pair.Value)
{
if (string.IsNullOrWhiteSpace(lesson.TypeOfOccupation))
continue;
var (weeks, isExclude) = ParseWeeks(lesson.Discipline);
var (lectureHalls, campuses) = ParseLectureHalls(lesson.LectureHall, campusDefault);
var groupResult = new GroupResult
{
Day = day.DayOfWeek,
Pair = pair.Key,
IsEven = lesson.IsEven,
Group = group.GroupName,
Discipline = NormalizeDiscipline(lesson.Discipline),
Professor = ParseProfessors(lesson.Professor),
TypeOfOccupation = lesson.TypeOfOccupation,
LectureHalls = lectureHalls,
Campuses = campuses,
SpecialWeek = weeks,
IsExclude = isExclude
};
result.Add(groupResult);
}
}
}
}
return result;
}
private static string[]? ParseProfessors(string? input)
{
if (string.IsNullOrWhiteSpace(input)) return null;
var normalized = Regex.Replace(input
.Replace("\n", " ")
.Replace(",", " "),
@"\s+", " ").Trim();
return ProfessorFullName().Matches(normalized)
.Select(m => $"{m.Groups["surname"].Value} {m.Groups["initials"].Value}".Trim())
.Where(x => !string.IsNullOrEmpty(x))
.ToArray();
}
private static (int[]? weeks, bool? isExclude) ParseWeeks(string discipline)
{
var match = ParseSpecificWeeks().Match(discipline);
if (!match.Success) return (null, null);
var numbers = new List<int>();
var ranges = match.Groups[2].Value.Split(',');
foreach (var range in ranges)
{
if (range.Contains('-'))
{
var parts = range.Split('-');
if (int.TryParse(parts[0], out var start) &&
int.TryParse(parts[1], out var end))
{
numbers.AddRange(Enumerable.Range(start, end - start + 1));
}
}
else
if (int.TryParse(range, out var num)) numbers.Add(num);
}
return (
weeks: numbers.Distinct().OrderBy(x => x).ToArray(),
isExclude: match.Groups[1].Success
);
}
private static string NormalizeDiscipline(string input)
{
var normalized = Regex.Replace(input
.Replace("\n", " ")
.Replace("\r", " "),
@"\s{2,}", " ");
normalized = Regex.Replace(normalized,
@"(\S+)\s(\S{3,})",
"$1 $2");
normalized = ParseSpecificWeeks().Replace(normalized, "");
return normalized.Trim();
}
private static (string[]? lectureHalls, string[]? campuses) ParseLectureHalls(string? input, string defaultCampus)
{
if (string.IsNullOrWhiteSpace(input))
return (null, null);
var matches = ParseLectureCampus().Matches(input);
var lectureHalls = new List<string>();
var campuses = new List<string>();
foreach (Match match in matches)
{
if (match.Groups["lectureWithCampus"].Success)
{
var raw = match.Value.Split('(');
var campus = raw.LastOrDefault()?.Trim(')').Trim();
var lecture = raw.FirstOrDefault()?.Trim();
if (string.IsNullOrEmpty(campus) || string.IsNullOrEmpty(lecture))
continue;
campuses.Add(campus);
lectureHalls.Add(lecture);
}
else if (match.Groups["lecture"].Success)
{
var lecture = match.Value.Trim();
if (string.IsNullOrEmpty(lecture))
continue;
campuses.Add(defaultCampus);
lectureHalls.Add(lecture);
}
}
return (
lectureHalls: lectureHalls.ToArray(),
campuses: campuses.ToArray()
);
}
[GeneratedRegex(@"\w{4}-\d{2}-\d{2}(?=\s?\d?\s?[Пп]/?[Гг]\s?\d?)?")]
private static partial Regex OnlyGroupName();
[GeneratedRegex(@"(?<surname>[А-ЯЁ][а-яё]+(-[А-ЯЁ][а-яё]+)?)\s*(?<initials>[А-ЯЁ]\.[А-ЯЁ]?\.?)?", RegexOptions.IgnorePatternWhitespace)]
private static partial Regex ProfessorFullName();
[GeneratedRegex(@"([Кк]р\.?)?\s*((\d+-\d+|\d+)(,\s*\d+(-\d+)?)*)\s*[Нн]\.?", RegexOptions.IgnoreCase, "ru-RU")]
private static partial Regex ParseSpecificWeeks();
[GeneratedRegex(@"(?<lectureWithCampus>[^,.\n]+\s?\([А-Яа-яA-Za-z]+-?\d+\))|(?<lecture>[^,.\n]+)")]
private static partial Regex ParseLectureCampus();
}