MireaBackend/Endpoint/Sync/ScheduleSynchronizer.cs
Polianin Nikita 885b937b0b
Some checks failed
.NET Test Pipeline / build-and-test (push) Failing after 49s
Build and Deploy Docker Container / build-and-deploy (push) Successful in 1m42s
feat: add parsing from files
2025-02-03 03:44:40 +03:00

518 lines
19 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Mirea.Api.DataAccess.Domain.Schedule;
using Mirea.Api.DataAccess.Persistence;
using Mirea.Api.Endpoint.Common.Interfaces;
using Mirea.Api.Endpoint.Configuration.Model;
using Mirea.Api.Endpoint.Sync.Common;
using Mirea.Tools.Schedule.Parser.Domain;
using Mirea.Tools.Schedule.WebParser;
using Mirea.Tools.Schedule.WebParser.Common.Domain;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using Group = Mirea.Api.DataAccess.Domain.Schedule.Group;
namespace Mirea.Api.Endpoint.Sync;
internal partial class ScheduleSynchronizer(UberDbContext dbContext, IOptionsSnapshot<GeneralConfig> config, ILogger<ScheduleSynchronizer> logger,
IMaintenanceModeService maintenanceMode)
{
private readonly DataRepository<Campus> _campuses = new([.. dbContext.Campuses]);
private readonly DataRepository<Discipline> _disciplines = new([.. dbContext.Disciplines]);
private readonly DataRepository<Faculty> _faculties = new([.. dbContext.Faculties]);
private readonly DataRepository<Group> _groups = new([.. dbContext.Groups]);
private readonly DataRepository<LectureHall> _lectureHalls = new([.. dbContext.LectureHalls]);
private readonly DataRepository<Lesson> _lessons = new([]);
private readonly DataRepository<LessonAssociation> _lessonAssociation = new([]);
private readonly DataRepository<Professor> _professors = new([.. dbContext.Professors]);
private readonly DataRepository<TypeOfOccupation> _typeOfOccupations = new([.. dbContext.TypeOfOccupations]);
private readonly DataRepository<SpecificWeek> _specificWeeks = new([]);
// todo: transfer data to storage
private static string GetFaculty(char c) =>
c switch
{
'У' => "ИТУ",
'Б' => "ИКБ",
'Х' => "ИТХТ",
'Э' => "ИПТИП",
'Т' => "ИПТИП",
'Р' => "ИРИ",
'К' => "ИИИ",
'И' => "ИИТ",
'П' => "ИИТ",
_ => throw new ArgumentOutOfRangeException(nameof(c), c, null)
};
private void ParallelSync(GroupResult groupInfo)
{
var facultyName = GetFaculty(groupInfo.Group[0]);
var faculty = _faculties.GetOrCreate(
f => f.Name.Equals(facultyName, StringComparison.OrdinalIgnoreCase),
() => new Faculty
{
Name = facultyName
});
var groupName = OnlyGroupName().Match(groupInfo.Group.ToUpper()).Value;
var group = _groups.GetOrCreate(
g => g.Name.Equals(groupName, StringComparison.OrdinalIgnoreCase),
() => new Group
{
Name = groupName,
Faculty = faculty
});
var typeOfOccupation = _typeOfOccupations.GetOrCreate(
t => t.ShortName.Equals(groupInfo.TypeOfOccupation.Trim(), StringComparison.OrdinalIgnoreCase),
() => new TypeOfOccupation
{
ShortName = groupInfo.TypeOfOccupation.ToUpper()
});
List<Professor>? professor = [];
if (groupInfo.Professor != null)
{
foreach (var prof in groupInfo.Professor)
{
var professorParts = prof.Split(' ').ToList();
string? altName = null;
if (professorParts is { Count: >= 2 })
{
altName = professorParts.ElementAtOrDefault(0);
if (professorParts.ElementAtOrDefault(1) != null)
altName += $" {professorParts.ElementAtOrDefault(1)?[0]}.";
if (professorParts.ElementAtOrDefault(2) != null)
altName += $"{professorParts.ElementAtOrDefault(2)?[0]}.";
}
if (string.IsNullOrEmpty(altName))
continue;
var profDb = _professors.GetOrCreate(x =>
(x.AltName == null || x.AltName.Equals(prof, StringComparison.OrdinalIgnoreCase)) &&
x.Name.Equals(altName, StringComparison.OrdinalIgnoreCase),
() => new Professor
{
AltName = prof,
Name = altName
});
professor.Add(profDb);
}
}
else
professor = null;
List<LectureHall>? hall = null;
List<Campus>? campuses;
if (groupInfo.Campuses != null && groupInfo.Campuses.Length != 0)
{
hall = [];
campuses = [];
for (var i = 0; i < groupInfo.Campuses.Length; i++)
{
var campus = groupInfo.Campuses[i];
campuses.Add(_campuses.GetOrCreate(
c => c.CodeName.Equals(campus, StringComparison.OrdinalIgnoreCase),
() => new Campus
{
CodeName = campus.ToUpper()
}));
if (groupInfo.LectureHalls == null || groupInfo.LectureHalls.Length <= i)
continue;
var lectureHall = groupInfo.LectureHalls[i];
hall.Add(_lectureHalls.GetOrCreate(l =>
l.Name.Equals(lectureHall, StringComparison.OrdinalIgnoreCase) &&
string.Equals(l.Campus?.CodeName, campuses[^1].CodeName, StringComparison.CurrentCultureIgnoreCase),
() => new LectureHall
{
Name = lectureHall,
Campus = campuses[^1]
}));
}
}
var discipline = _disciplines.GetOrCreate(
d => d.Name.Equals(groupInfo.Discipline, StringComparison.OrdinalIgnoreCase),
() => new Discipline
{
Name = groupInfo.Discipline
});
Lesson lesson = _lessons.GetOrCreate(l =>
l.IsEven == groupInfo.IsEven &&
l.DayOfWeek == groupInfo.Day &&
l.PairNumber == groupInfo.Pair &&
l.Discipline?.Name == discipline.Name &&
l.Group?.Name == group.Name,
() =>
{
var lesson = new Lesson
{
IsEven = groupInfo.IsEven,
DayOfWeek = groupInfo.Day,
PairNumber = groupInfo.Pair,
Discipline = discipline,
Group = group,
IsExcludedWeeks = groupInfo.IsExclude
};
if (groupInfo.SpecialWeek == null)
return lesson;
foreach (var week in groupInfo.SpecialWeek)
_specificWeeks.Create(() => new SpecificWeek
{
Lesson = lesson,
WeekNumber = week
});
return lesson;
});
var maxValue = int.Max(int.Max(professor?.Count ?? -1, hall?.Count ?? -1), 1);
for (var i = 0; i < maxValue; i++)
{
var prof = professor?.ElementAtOrDefault(i);
var lectureHall = hall?.ElementAtOrDefault(i);
_lessonAssociation.Create(() => new LessonAssociation
{
Professor = prof,
Lesson = lesson,
LectureHall = lectureHall,
TypeOfOccupation = typeOfOccupation
});
}
}
private async Task SaveChanges(CancellationToken cancellationToken)
{
foreach (var group in _groups.GetAll())
{
var existingGroup = await dbContext.Groups.FirstOrDefaultAsync(g => g.Id == group.Id, cancellationToken);
if (existingGroup != null)
dbContext.Remove(existingGroup);
}
await dbContext.Disciplines.BulkSynchronizeAsync(_disciplines.GetAll(), bulkOperation => bulkOperation.BatchSize = 1000, cancellationToken);
await dbContext.Professors.BulkSynchronizeAsync(_professors.GetAll(), bulkOperation => bulkOperation.BatchSize = 1000, cancellationToken);
await dbContext.TypeOfOccupations.BulkSynchronizeAsync(_typeOfOccupations.GetAll(), bulkOperation => bulkOperation.BatchSize = 100, cancellationToken);
await dbContext.Faculties.BulkSynchronizeAsync(_faculties.GetAll(), bulkOperation => bulkOperation.BatchSize = 100, cancellationToken);
await dbContext.Campuses.BulkSynchronizeAsync(_campuses.GetAll(), bulkOperation => bulkOperation.BatchSize = 10, cancellationToken);
await dbContext.LectureHalls.BulkSynchronizeAsync(_lectureHalls.GetAll(), bulkOperation => bulkOperation.BatchSize = 1000, cancellationToken);
await dbContext.Groups.BulkSynchronizeAsync(_groups.GetAll(), bulkOperation => bulkOperation.BatchSize = 100, cancellationToken);
await dbContext.Lessons.BulkSynchronizeAsync(_lessons.GetAll(), bulkOperation => bulkOperation.BatchSize = 1000, cancellationToken);
await dbContext.SpecificWeeks.BulkSynchronizeAsync(_specificWeeks.GetAll(), bulkOperation => bulkOperation.BatchSize = 1000, cancellationToken);
await dbContext.LessonAssociations.BulkSynchronizeAsync(_lessonAssociation.GetAll(), bulkOperation => bulkOperation.BatchSize = 1000, cancellationToken);
}
private async Task Sync(Func<CancellationToken, Task<List<GroupResult>>> parseDataAsync, CancellationToken cancellationToken)
{
Stopwatch watch = new();
watch.Start();
try
{
logger.LogDebug("Start parsing schedule");
var data = await parseDataAsync(cancellationToken);
watch.Stop();
var parsingTime = watch.ElapsedMilliseconds;
watch.Restart();
ParallelOptions options = new()
{
CancellationToken = cancellationToken,
MaxDegreeOfParallelism = Environment.ProcessorCount
};
logger.LogDebug("Start mapping parsed data");
Parallel.ForEach(data, options, ParallelSync);
watch.Stop();
var mappingTime = watch.ElapsedMilliseconds;
watch.Restart();
maintenanceMode.EnableMaintenanceMode();
logger.LogDebug("Start saving changing");
await SaveChanges(cancellationToken);
maintenanceMode.DisableMaintenanceMode();
watch.Stop();
logger.LogInformation("Parsing time: {ParsingTime}ms Mapping time: {MappingTime}ms Saving time: {SavingTime}ms Total time: {TotalTime}ms",
parsingTime, mappingTime, watch.ElapsedMilliseconds, parsingTime + mappingTime + watch.ElapsedMilliseconds);
}
catch (Exception ex)
{
logger.LogError(ex, "An error occurred during synchronization.");
throw;
}
finally
{
maintenanceMode.DisableMaintenanceMode();
}
}
public async Task StartSync(CancellationToken cancellationToken)
{
var pairPeriods = config.Value.ScheduleSettings?.PairPeriod
.ToDictionary(x => x.Key, x => (x.Value.Start, x.Value.End));
var startTerm = config.Value.ScheduleSettings?.StartTerm;
if (pairPeriods == null || startTerm == null)
{
logger.LogWarning("It is not possible to synchronize the schedule due to the fact that the {Arg1} or {Arg2} variable is not initialized.",
nameof(pairPeriods),
nameof(startTerm));
return;
}
var parser = new Parser
{
Pairs = pairPeriods
.ToDictionary(x => x.Key,
x => (x.Value.Start, x.Value.End)),
TermStart = startTerm.Value.ToDateTime(new TimeOnly(0, 0, 0))
};
try
{
await Sync(parser.ParseAsync, cancellationToken);
}
catch (Exception ex)
{
logger.LogError(ex, "An error occurred during synchronization.");
throw;
}
finally
{
maintenanceMode.DisableMaintenanceMode();
}
}
public async Task StartSync(List<(string File, string Campus)> files, CancellationToken cancellationToken)
{
await Task.Yield();
var pairPeriods = config.Value.ScheduleSettings?.PairPeriod
.ToDictionary(x => x.Key, x => (x.Value.Start, x.Value.End));
if (pairPeriods == null)
{
logger.LogWarning("It is not possible to synchronize the schedule due to the fact that the {Arg1} variable is not initialized.",
nameof(pairPeriods));
return;
}
try
{
Task<List<GroupResult>> ParseTask(CancellationToken ctx)
{
var mappedData = new ConcurrentBag<GroupResult>();
ParallelOptions options = new() { CancellationToken = ctx, MaxDegreeOfParallelism = Environment.ProcessorCount };
Parallel.ForEach(files, options, (file) =>
{
var parser = new Tools.Schedule.Parser.Parser();
var result = ConvertToGroupResults(parser.Parse(file.File, pairPeriods), file.Campus);
foreach (var item in result) mappedData.Add(item);
});
return Task.FromResult(mappedData.ToList());
}
await Sync(ParseTask, cancellationToken);
}
catch (Exception ex)
{
logger.LogError(ex, "An error occurred during synchronization.");
throw;
}
finally
{
maintenanceMode.DisableMaintenanceMode();
}
}
private static List<GroupResult> ConvertToGroupResults(IEnumerable<GroupInfo> groups, string campusDefault, CancellationToken cancellationToken = default)
{
var result = new List<GroupResult>();
foreach (var group in groups)
{
cancellationToken.ThrowIfCancellationRequested();
foreach (var day in group.Days)
{
foreach (var pair in day.Lessons)
{
foreach (var lesson in pair.Value)
{
if (string.IsNullOrWhiteSpace(lesson.TypeOfOccupation))
continue;
var (weeks, isExclude) = ParseWeeks(lesson.Discipline);
var (lectureHalls, campuses) = ParseLectureHalls(lesson.LectureHall, campusDefault);
var groupResult = new GroupResult
{
Day = day.DayOfWeek,
Pair = pair.Key,
IsEven = lesson.IsEven,
Group = group.GroupName,
Discipline = NormalizeDiscipline(lesson.Discipline),
Professor = ParseProfessors(lesson.Professor),
TypeOfOccupation = lesson.TypeOfOccupation,
LectureHalls = lectureHalls,
Campuses = campuses,
SpecialWeek = weeks,
IsExclude = isExclude
};
result.Add(groupResult);
}
}
}
}
return result;
}
private static string[]? ParseProfessors(string? input)
{
if (string.IsNullOrWhiteSpace(input)) return null;
var normalized = Regex.Replace(input
.Replace("\n", " ")
.Replace(",", " "),
@"\s+", " ").Trim();
return ProfessorFullName().Matches(normalized)
.Select(m => $"{m.Groups["surname"].Value} {m.Groups["initials"].Value}".Trim())
.Where(x => !string.IsNullOrEmpty(x))
.ToArray();
}
private static (int[]? weeks, bool? isExclude) ParseWeeks(string discipline)
{
var match = ParseSpecificWeeks().Match(discipline);
if (!match.Success) return (null, null);
var numbers = new List<int>();
var ranges = match.Groups[2].Value.Split(',');
foreach (var range in ranges)
{
if (range.Contains('-'))
{
var parts = range.Split('-');
if (int.TryParse(parts[0], out var start) &&
int.TryParse(parts[1], out var end))
{
numbers.AddRange(Enumerable.Range(start, end - start + 1));
}
}
else
if (int.TryParse(range, out var num)) numbers.Add(num);
}
return (
weeks: numbers.Distinct().OrderBy(x => x).ToArray(),
isExclude: match.Groups[1].Success
);
}
private static string NormalizeDiscipline(string input)
{
var normalized = Regex.Replace(input
.Replace("\n", " ")
.Replace("\r", " "),
@"\s{2,}", " ");
normalized = Regex.Replace(normalized,
@"(\S+)\s(\S{3,})",
"$1 $2");
normalized = ParseSpecificWeeks().Replace(normalized, "");
return normalized.Trim();
}
private static (string[]? lectureHalls, string[]? campuses) ParseLectureHalls(string? input, string defaultCampus)
{
if (string.IsNullOrWhiteSpace(input))
return (null, null);
var matches = ParseLectureCampus().Matches(input);
var lectureHalls = new List<string>();
var campuses = new List<string>();
foreach (Match match in matches)
{
if (match.Groups["lectureWithCampus"].Success)
{
var raw = match.Value.Split('(');
var campus = raw.LastOrDefault()?.Trim(')').Trim();
var lecture = raw.FirstOrDefault()?.Trim();
if (string.IsNullOrEmpty(campus) || string.IsNullOrEmpty(lecture))
continue;
campuses.Add(campus);
lectureHalls.Add(lecture);
}
else if (match.Groups["lecture"].Success)
{
var lecture = match.Value.Trim();
if (string.IsNullOrEmpty(lecture))
continue;
campuses.Add(defaultCampus);
lectureHalls.Add(lecture);
}
}
return (
lectureHalls: lectureHalls.ToArray(),
campuses: campuses.ToArray()
);
}
[GeneratedRegex(@"\w{4}-\d{2}-\d{2}(?=\s?\d?\s?[Пп]/?[Гг]\s?\d?)?")]
private static partial Regex OnlyGroupName();
[GeneratedRegex(@"(?<surname>[А-ЯЁ][а-яё]+(-[А-ЯЁ][а-яё]+)?)\s*(?<initials>[А-ЯЁ]\.[А-ЯЁ]?\.?)?", RegexOptions.IgnorePatternWhitespace)]
private static partial Regex ProfessorFullName();
[GeneratedRegex(@"([Кк]р\.?)?\s*((\d+-\d+|\d+)(,\s*\d+(-\d+)?)*)\s*[Нн]\.?", RegexOptions.IgnoreCase, "ru-RU")]
private static partial Regex ParseSpecificWeeks();
[GeneratedRegex(@"(?<lectureWithCampus>[^,.\n]+\s?\([А-Яа-яA-Za-z]+-?\d+\))|(?<lecture>[^,.\n]+)")]
private static partial Regex ParseLectureCampus();
}