Intermediate commit

This commit is contained in:
pcjones
2024-02-12 21:04:18 +01:00
parent 0071b0c080
commit 4ca89f8bdd
14 changed files with 624 additions and 135 deletions

View File

@@ -1,4 +1,5 @@
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging.Abstractions;
using System.Text.RegularExpressions;
using UmlautAdaptarr.Utilities;
namespace UmlautAdaptarr.Models
@@ -8,40 +9,68 @@ namespace UmlautAdaptarr.Models
public int ArrId { get; set; }
public string ExternalId { get; set; }
public string Title { get; set; }
public bool HasGermanUmlaut => Title?.HasGermanUmlauts() ?? false;
public bool HasUmlaut => Title?.HasUmlauts() ?? false;
public string ExpectedTitle { get; set; }
public string? ExpectedAuthor { get; set; }
public string? GermanTitle { get; set; }
public string[] TitleSearchVariations { get; set; }
public string[] TitleMatchVariations { get; set; }
public string[] AuthorMatchVariations { get; set; }
public string MediaType { get; set; }
// TODO public MediaType instead of string
public SearchItem(int arrId, string externalId, string title, string expectedTitle, string? germanTitle, string mediaType, string[]? aliases)
public SearchItem(
int arrId,
string externalId,
string title,
string expectedTitle,
string? germanTitle,
string mediaType,
string[]? aliases,
string? expectedAuthor = null)
{
ArrId = arrId;
ExternalId = externalId;
Title = title;
ExpectedTitle = expectedTitle;
ExpectedAuthor = expectedAuthor;
GermanTitle = germanTitle;
TitleSearchVariations = GenerateTitleVariations(germanTitle).ToArray();
MediaType = mediaType;
var allTitleVariations = new List<string>(TitleSearchVariations);
// If aliases are not null, generate variations for each and add them to the list
// TODO (not necessarily here) only use deu and eng alias
if (aliases != null)
if (mediaType == "audio" && expectedAuthor != null)
{
foreach (var alias in aliases)
// e.g. Die Ärzte - best of die Ärzte
if (expectedTitle.Contains(expectedAuthor))
{
allTitleVariations.AddRange(GenerateTitleVariations(alias));
var titleWithoutAuthorName = expectedTitle.Replace(expectedAuthor, string.Empty).RemoveExtraWhitespaces().Trim();
TitleMatchVariations = GenerateVariations(titleWithoutAuthorName, mediaType).ToArray();
}
else
{
TitleMatchVariations = GenerateVariations(expectedTitle, mediaType).ToArray();
}
TitleSearchVariations = GenerateVariations($"{expectedAuthor} {expectedTitle}", mediaType).ToArray();
AuthorMatchVariations = GenerateVariations(expectedAuthor, mediaType).ToArray();
}
else
{
TitleSearchVariations = GenerateVariations(germanTitle, mediaType).ToArray();
var allTitleVariations = new List<string>(TitleSearchVariations);
TitleMatchVariations = allTitleVariations.Distinct().ToArray();
// If aliases are not null, generate variations for each and add them to the list
// TODO (not necessarily here) only use deu and eng alias
if (aliases != null)
{
foreach (var alias in aliases)
{
allTitleVariations.AddRange(GenerateVariations(alias, mediaType));
}
}
TitleMatchVariations = allTitleVariations.Distinct().ToArray();
}
}
private IEnumerable<string> GenerateTitleVariations(string? germanTitle)
private IEnumerable<string> GenerateVariations(string? germanTitle, string mediaType)
{
if (germanTitle == null)
{
@@ -76,13 +105,17 @@ namespace UmlautAdaptarr.Models
});
}
// If a german title starts with der/die/das also accept variations without it
if (mediaType != "audio" && cleanTitle.StartsWith("Der") || cleanTitle.StartsWith("Die") || cleanTitle.StartsWith("Das"))
{
var cleanTitleWithoutArticle = germanTitle[3..].Trim();
baseVariations.AddRange(GenerateVariations(cleanTitleWithoutArticle, mediaType));
}
// Remove multiple spaces
var cleanedVariations = baseVariations.Select(variation => MultipleWhitespaceRegex().Replace(variation, " "));
var cleanedVariations = baseVariations.Select(variation => variation.RemoveExtraWhitespaces());
return cleanedVariations.Distinct();
}
[GeneratedRegex(@"\s+")]
private static partial Regex MultipleWhitespaceRegex();
}
}