8 Commits

Author SHA1 Message Date
pcjones
b741239194 Lidarr optimizations 2024-02-14 23:59:53 +01:00
pcjones
96f8ff9332 Refactor TItleMatchingService 2024-02-14 21:00:24 +01:00
pcjones
e739affb39 Use TitleMatchVariations instead of TitleSearchVariations in SearchItemByTitle 2024-02-14 20:55:13 +01:00
pcjones
92bdf14618 Remove test variable 2024-02-14 20:40:13 +01:00
pcjones
4260b07bc4 Merge branch 'master' of https://github.com/PCJones/UmlautAdaptarr 2024-02-14 11:15:51 +01:00
pcjones
4d2ac194aa Ignore case when filtering distinct title match variations 2024-02-14 11:15:44 +01:00
pcjones
a6f332fd99 Fix hyphen in indexer url not being accepted 2024-02-14 11:14:26 +01:00
Jonas F
9c364cb652 Update README.md 2024-02-13 22:34:49 +01:00
9 changed files with 97 additions and 46 deletions

View File

@@ -55,16 +55,18 @@ Einige Beispiele findet ihr unter Features.
In den Klammern am Ende des Releasenamens (Bild 2 & 4) steht zu Anschauungszwecken der deutsche Titel der vorher nicht gefunden bzw. akzeptiert wurde. Das bleibt natürlich nicht so ;) In den Klammern am Ende des Releasenamens (Bild 2 & 4) steht zu Anschauungszwecken der deutsche Titel der vorher nicht gefunden bzw. akzeptiert wurde. Das bleibt natürlich nicht so ;)
**Vorher:** Release wird zwar gefunden, kann aber kann nicht zu geordnet werden. **Vorher:** Release wird zwar gefunden, kann aber kann nicht zu geordnet werden.
![Vorherige Suche ohne deutsche Titel](https://i.imgur.com/7pfRzgH.png) ![Vorherige Suche ohne deutsche Titel](https://github.com/PCJones/UmlautAdaptarr/assets/377223/1fce2909-a36c-4f1b-8497-85903357fee3)
**Jetzt:** 2-3 weitere Releases werden gefunden, außerdem meckert Sonarr nicht mehr über den Namen und würde es bei einer automatischen Suche ohne Probleme importieren. **Jetzt:** 2-3 weitere Releases werden gefunden, außerdem meckert Sonarr nicht mehr über den Namen und würde es bei einer automatischen Suche ohne Probleme importieren.
![Jetzige Suche mit deutschen Titeln](https://i.imgur.com/k55YIN9.png) ![Jetzige Suche mit deutschen Titeln](https://github.com/PCJones/UmlautAdaptarr/assets/377223/0edf43ba-2beb-4f22-aaf4-30f9a619dbd6)
**Vorher:** Es werden nur Releases mit dem englischen Titel der Serie gefunden **Vorher:** Es werden nur Releases mit dem englischen Titel der Serie gefunden
![Vorherige Suche, englische Titel](https://i.imgur.com/pbRlOeX.png) ![Vorherige Suche, englische Titel](https://github.com/PCJones/UmlautAdaptarr/assets/377223/ed7ca0fa-ac36-4584-87ac-b29f32dd9ace)
**Jetzt:** Es werden auch Titel mit dem deutschen Namen gefunden :D (haben nicht alle Suchergebnisse auf den Screenshot gepasst) **Jetzt:** Es werden auch Titel mit dem deutschen Namen gefunden :D (haben nicht alle Suchergebnisse auf den Screenshot gepasst)
![Jetzige Suche, deutsche und englische Titel](https://i.imgur.com/eeq0Voj.png) ![Jetzige Suche, deutsche und englische Titel](https://github.com/PCJones/UmlautAdaptarr/assets/377223/1c2dbe1a-5943-4fc4-91ef-29708082900e)
**Vorher:** Die deutsche Produktion `Alone - Überlebe die Wildnis` hat auf [TheTVDB](https://thetvdb.com/series/alone-uberlebe-die-wildnis) den Englischen Namen `Alone Germany`. **Vorher:** Die deutsche Produktion `Alone - Überlebe die Wildnis` hat auf [TheTVDB](https://thetvdb.com/series/alone-uberlebe-die-wildnis) den Englischen Namen `Alone Germany`.

View File

@@ -184,7 +184,7 @@ namespace UmlautAdaptarr.Controllers
if (categories.Split(',').Any(category => AUDIO_CATEGORY_IDS.Contains(category))) if (categories.Split(',').Any(category => AUDIO_CATEGORY_IDS.Contains(category)))
{ {
var mediaType = "audio"; var mediaType = "audio";
searchItem = await searchItemLookupService.GetOrFetchSearchItemByExternalId(mediaType, title.ToLower()); searchItem = await searchItemLookupService.GetOrFetchSearchItemByExternalId(mediaType, title.GetLidarrTitleForExternalId());
} }
} }
} }

View File

@@ -12,6 +12,7 @@ namespace UmlautAdaptarr.Models
public bool HasUmlaut => Title?.HasUmlauts() ?? false; public bool HasUmlaut => Title?.HasUmlauts() ?? false;
public string ExpectedTitle { get; set; } public string ExpectedTitle { get; set; }
public string? ExpectedAuthor { get; set; } public string? ExpectedAuthor { get; set; }
// TODO rename GermanTitle into Foreign or LocalTitle?
public string? GermanTitle { get; set; } public string? GermanTitle { get; set; }
public string[] TitleSearchVariations { get; set; } public string[] TitleSearchVariations { get; set; }
public string[] TitleMatchVariations { get; set; } public string[] TitleMatchVariations { get; set; }
@@ -42,6 +43,11 @@ namespace UmlautAdaptarr.Models
if (expectedTitle.Contains(expectedAuthor)) if (expectedTitle.Contains(expectedAuthor))
{ {
var titleWithoutAuthorName = expectedTitle.Replace(expectedAuthor, string.Empty).RemoveExtraWhitespaces().Trim(); var titleWithoutAuthorName = expectedTitle.Replace(expectedAuthor, string.Empty).RemoveExtraWhitespaces().Trim();
if (titleWithoutAuthorName.Length < 2)
{
// TODO log warning that this album can't be searched for automatically
}
TitleMatchVariations = GenerateVariations(titleWithoutAuthorName, mediaType).ToArray(); TitleMatchVariations = GenerateVariations(titleWithoutAuthorName, mediaType).ToArray();
} }
else else
@@ -82,17 +88,22 @@ namespace UmlautAdaptarr.Models
} }
TitleMatchVariations = allTitleVariations.Distinct().ToArray(); TitleMatchVariations = allTitleVariations.Distinct(StringComparer.InvariantCultureIgnoreCase).ToArray();
} }
} }
private IEnumerable<string> GenerateVariations(string? germanTitle, string mediaType) private IEnumerable<string> GenerateVariations(string? title, string mediaType)
{ {
if (germanTitle == null) if (title == null)
{
return [];
}
var cleanTitle = title.GetCleanTitle();
if (cleanTitle?.Length == 0)
{ {
return []; return [];
} }
var cleanTitle = germanTitle.RemoveAccentButKeepGermanUmlauts().GetCleanTitle();
// Start with base variations including handling umlauts // Start with base variations including handling umlauts
var baseVariations = new List<string> var baseVariations = new List<string>
@@ -121,11 +132,16 @@ namespace UmlautAdaptarr.Models
}); });
} }
// If a german title starts with der/die/das also accept variations without it // If a title starts with der/die/das also accept variations without it
if (mediaType != "audio" && cleanTitle.StartsWith("Der") || cleanTitle.StartsWith("Die") || cleanTitle.StartsWith("Das")) // Same for english the, an, a
if (cleanTitle.StartsWith("Der ") || cleanTitle.StartsWith("Die ") || cleanTitle.StartsWith("Das ")
|| cleanTitle.StartsWith("The ") || cleanTitle.StartsWith("An "))
{ {
var cleanTitleWithoutArticle = germanTitle[3..].Trim(); var cleanTitleWithoutArticle = title[3..].Trim();
baseVariations.AddRange(GenerateVariations(cleanTitleWithoutArticle, mediaType)); baseVariations.AddRange(GenerateVariations(cleanTitleWithoutArticle, mediaType));
} else if (cleanTitle.StartsWith("A "))
{
var cleanTitleWithoutArticle = title[2..].Trim();
} }
// Remove multiple spaces // Remove multiple spaces

View File

@@ -42,16 +42,17 @@ namespace UmlautAdaptarr.Providers
var lidarrAlbumUrl = $"{_lidarrHost}/api/v1/album?artistId={artistId}&apikey={_lidarrApiKey}"; var lidarrAlbumUrl = $"{_lidarrHost}/api/v1/album?artistId={artistId}&apikey={_lidarrApiKey}";
if (cache.TryGetValue(lidarrAlbumUrl, out List<dynamic>? albums)) // Disable cache for now as it can result in problems when adding new albums that aren't displayed on the artists page initially
{ //if (cache.TryGetValue(lidarrAlbumUrl, out List<dynamic>? albums))
logger.LogInformation($"Using cached albums for {UrlUtilities.RedactApiKey(lidarrAlbumUrl)}"); //{
} // logger.LogInformation($"Using cached albums for {UrlUtilities.RedactApiKey(lidarrAlbumUrl)}");
else //}
{ //else
//{
logger.LogInformation($"Fetching all albums from artistId {artistId} from Lidarr: {UrlUtilities.RedactApiKey(lidarrArtistsUrl)}"); logger.LogInformation($"Fetching all albums from artistId {artistId} from Lidarr: {UrlUtilities.RedactApiKey(lidarrArtistsUrl)}");
var albumApiResponse = await httpClient.GetStringAsync(lidarrAlbumUrl); var albumApiResponse = await httpClient.GetStringAsync(lidarrAlbumUrl);
albums = JsonConvert.DeserializeObject<List<dynamic>>(albumApiResponse); var albums = JsonConvert.DeserializeObject<List<dynamic>>(albumApiResponse);
} //}
if (albums == null) if (albums == null)
{ {
@@ -74,7 +75,7 @@ namespace UmlautAdaptarr.Providers
string[]? aliases = null; string[]? aliases = null;
// Abuse externalId to set the search string Lidarr uses // Abuse externalId to set the search string Lidarr uses
var externalId = expectedTitle.RemoveGermanUmlautDots().RemoveAccent().RemoveSpecialCharacters().RemoveExtraWhitespaces().ToLower(); var externalId = expectedTitle.GetLidarrTitleForExternalId();
var searchItem = new SearchItem var searchItem = new SearchItem
( (

View File

@@ -39,6 +39,7 @@ namespace UmlautAdaptarr.Providers
logger.LogWarning($"Sonarr Show {show.id} doesn't have a tvdbId."); logger.LogWarning($"Sonarr Show {show.id} doesn't have a tvdbId.");
continue; continue;
} }
(var germanTitle, var aliases) = await titleService.FetchGermanTitleAndAliasesByExternalIdAsync(_mediaType, tvdbId); (var germanTitle, var aliases) = await titleService.FetchGermanTitleAndAliasesByExternalIdAsync(_mediaType, tvdbId);
var searchItem = new SearchItem var searchItem = new SearchItem
( (

View File

@@ -84,7 +84,7 @@ namespace UmlautAdaptarr.Services
continue; continue;
} }
// After finding a potential item, compare normalizedTitle with each German title variation // After finding a potential item, compare normalizedTitle with each German title variation
foreach (var variation in item?.TitleSearchVariations ?? []) foreach (var variation in item?.TitleMatchVariations ?? [])
{ {
var normalizedVariation = variation.RemoveAccentButKeepGermanUmlauts().ToLower(); var normalizedVariation = variation.RemoveAccentButKeepGermanUmlauts().ToLower();
if (normalizedTitle.StartsWith(variation, StringComparison.OrdinalIgnoreCase)) if (normalizedTitle.StartsWith(variation, StringComparison.OrdinalIgnoreCase))

View File

@@ -20,7 +20,7 @@ namespace UmlautAdaptarr.Services
if (titleElement != null) if (titleElement != null)
{ {
var originalTitle = titleElement.Value; var originalTitle = titleElement.Value;
var normalizedOriginalTitle = NormalizeTitle(originalTitle); var cleanTitleSeperatedBySpace = ReplaceSeperatorsWithSpace(originalTitle.RemoveAccentButKeepGermanUmlauts());
var categoryElement = item.Element("category"); var categoryElement = item.Element("category");
var category = categoryElement?.Value; var category = categoryElement?.Value;
@@ -34,7 +34,7 @@ namespace UmlautAdaptarr.Services
if (useCacheService) if (useCacheService)
{ {
// Use CacheService to find a matching SearchItem by title // Use CacheService to find a matching SearchItem by title
searchItem = cacheService.SearchItemByTitle(mediaType, normalizedOriginalTitle); searchItem = cacheService.SearchItemByTitle(mediaType, cleanTitleSeperatedBySpace);
} }
if (searchItem == null) if (searchItem == null)
@@ -46,10 +46,10 @@ namespace UmlautAdaptarr.Services
switch (mediaType) switch (mediaType)
{ {
case "tv": case "tv":
FindAndReplaceForMoviesAndTV(logger, searchItem, titleElement, originalTitle, normalizedOriginalTitle!); FindAndReplaceForMoviesAndTV(logger, searchItem, titleElement, originalTitle, cleanTitleSeperatedBySpace!);
break; break;
case "movie": case "movie":
FindAndReplaceForMoviesAndTV(logger, searchItem, titleElement, originalTitle, normalizedOriginalTitle!); FindAndReplaceForMoviesAndTV(logger, searchItem, titleElement, originalTitle, cleanTitleSeperatedBySpace!);
break; break;
case "audio": case "audio":
FindAndReplaceForAudio(searchItem, titleElement, originalTitle!); FindAndReplaceForAudio(searchItem, titleElement, originalTitle!);
@@ -68,13 +68,12 @@ namespace UmlautAdaptarr.Services
var authorMatch = FindBestMatch(searchItem.AuthorMatchVariations, originalTitle.NormalizeForComparison(), originalTitle); var authorMatch = FindBestMatch(searchItem.AuthorMatchVariations, originalTitle.NormalizeForComparison(), originalTitle);
var titleMatch = FindBestMatch(searchItem.TitleMatchVariations, originalTitle.NormalizeForComparison(), originalTitle); var titleMatch = FindBestMatch(searchItem.TitleMatchVariations, originalTitle.NormalizeForComparison(), originalTitle);
if (authorMatch.Item1 && titleMatch.Item1) if (authorMatch.foundMatch && titleMatch.foundMatch)
{ {
int matchEndPositionInOriginal = Math.Max(authorMatch.Item3, titleMatch.Item3); int matchEndPositionInOriginal = Math.Max(authorMatch.bestEndInOriginal, titleMatch.bestEndInOriginal);
var test = originalTitle[matchEndPositionInOriginal];
// Check and adjust for immediate following delimiter // Check and adjust for immediate following delimiter
char[] delimiters = new char[] { ' ', '-', '_', '.' }; char[] delimiters = [' ', '-', '_', '.'];
if (matchEndPositionInOriginal < originalTitle.Length && delimiters.Contains(originalTitle[matchEndPositionInOriginal])) if (matchEndPositionInOriginal < originalTitle.Length && delimiters.Contains(originalTitle[matchEndPositionInOriginal]))
{ {
matchEndPositionInOriginal++; // Skip the delimiter if it's immediately after the match matchEndPositionInOriginal++; // Skip the delimiter if it's immediately after the match
@@ -97,7 +96,7 @@ namespace UmlautAdaptarr.Services
} }
private Tuple<bool, int, int> FindBestMatch(string[] variations, string normalizedOriginal, string originalTitle) private (bool foundMatch, int bestStart, int bestEndInOriginal) FindBestMatch(string[] variations, string normalizedOriginal, string originalTitle)
{ {
bool found = false; bool found = false;
int bestStart = int.MaxValue; int bestStart = int.MaxValue;
@@ -120,8 +119,8 @@ namespace UmlautAdaptarr.Services
} }
} }
if (!found) return Tuple.Create(false, 0, 0); if (!found) return (false, 0, 0);
return Tuple.Create(found, bestStart, bestEndInOriginal); return (found, bestStart, bestEndInOriginal);
} }
// Maps an index from the normalized string back to a corresponding index in the original string // Maps an index from the normalized string back to a corresponding index in the original string
@@ -160,6 +159,7 @@ namespace UmlautAdaptarr.Services
var titleMatchVariations = searchItem.TitleMatchVariations; var titleMatchVariations = searchItem.TitleMatchVariations;
var expectedTitle = searchItem.ExpectedTitle; var expectedTitle = searchItem.ExpectedTitle;
var variationsOrderedByLength = titleMatchVariations!.OrderByDescending(variation => variation.Length); var variationsOrderedByLength = titleMatchVariations!.OrderByDescending(variation => variation.Length);
// Attempt to find a variation that matches the start of the original title // Attempt to find a variation that matches the start of the original title
foreach (var variation in variationsOrderedByLength) foreach (var variation in variationsOrderedByLength)
{ {
@@ -199,8 +199,8 @@ namespace UmlautAdaptarr.Services
} }
} }
// Clean up any leading separators from the suffix // Clean up any leading separator from the suffix
suffix = Regex.Replace(suffix, "^[._ ]+", ""); suffix = Regex.Replace(suffix, "^ +", "");
// TODO EVALUTE! definitely make this optional - this adds GERMAN to the title is the title is german to make sure it's recognized as german // TODO EVALUTE! definitely make this optional - this adds GERMAN to the title is the title is german to make sure it's recognized as german
// can lead to problems with shows such as "dark" that have international dubs // can lead to problems with shows such as "dark" that have international dubs
@@ -226,9 +226,8 @@ namespace UmlautAdaptarr.Services
} }
} }
private static string NormalizeTitle(string title) private static string ReplaceSeperatorsWithSpace(string title)
{ {
title = title.RemoveAccentButKeepGermanUmlauts();
// Replace all known separators with space for normalization // Replace all known separators with space for normalization
return WordSeperationCharRegex().Replace(title, " ".ToString()); return WordSeperationCharRegex().Replace(title, " ".ToString());
} }

View File

@@ -47,20 +47,46 @@ namespace UmlautAdaptarr.Utilities
return stringBuilder.ToString().Normalize(NormalizationForm.FormC); return stringBuilder.ToString().Normalize(NormalizationForm.FormC);
} }
// TODO possibly replace GetCleanTitle with RemoveSpecialCharacters public static string GetLidarrTitleForExternalId(this string text)
{
text = text.RemoveGermanUmlautDots()
.GetCleanTitle()
.ToLower();
// Lidarr removes the, an and a
return TitlePrefixRegex()
.Replace(text, "")
.RemoveExtraWhitespaces()
.Trim();
}
public static string GetCleanTitle(this string text) public static string GetCleanTitle(this string text)
{ {
return text.Replace("(", "").Replace(")", "").Replace("?","").Replace(":", "").Replace("'", ""); return text
.Replace(".", " ")
.Replace(":", " ")
.RemoveAccentButKeepGermanUmlauts()
.RemoveSpecialCharacters(removeUmlauts: false)
.RemoveExtraWhitespaces()
.Trim();
} }
public static string NormalizeForComparison(this string text) public static string NormalizeForComparison(this string text)
{ {
// TODO see if we can replace RemoveGermanUmlautDots() with RemoveSpecialCharacters(removeUmlauts: false);
return text.RemoveGermanUmlautDots().RemoveAccent().RemoveSpecialCharacters().Replace(" ", "").Trim().ToLower(); return text.RemoveGermanUmlautDots().RemoveAccent().RemoveSpecialCharacters().Replace(" ", "").Trim().ToLower();
} }
public static string RemoveSpecialCharacters(this string text) public static string RemoveSpecialCharacters(this string text, bool removeUmlauts = true)
{ {
return SpecialCharactersRegex().Replace(text, ""); if (removeUmlauts)
{
return NoSpecialCharactersRegex().Replace(text, "");
}
else
{
return NoSpecialCharactersExceptUmlautsRegex().Replace(text, "");
}
} }
@@ -101,9 +127,15 @@ namespace UmlautAdaptarr.Utilities
} }
[GeneratedRegex("[^a-zA-Z0-9 ]+", RegexOptions.Compiled)] [GeneratedRegex("[^a-zA-Z0-9 ]+", RegexOptions.Compiled)]
private static partial Regex SpecialCharactersRegex(); private static partial Regex NoSpecialCharactersRegex();
[GeneratedRegex("[^a-zA-Z0-9 öäüßÖÄÜ]+", RegexOptions.Compiled)]
private static partial Regex NoSpecialCharactersExceptUmlautsRegex();
[GeneratedRegex(@"\s+")] [GeneratedRegex(@"\s+")]
private static partial Regex MultipleWhitespaceRegex(); private static partial Regex MultipleWhitespaceRegex();
[GeneratedRegex(@"\b(the|an|a)\b", RegexOptions.IgnoreCase, "de-DE")]
private static partial Regex TitlePrefixRegex();
} }
} }

View File

@@ -5,7 +5,7 @@ namespace UmlautAdaptarr.Utilities
{ {
public partial class UrlUtilities public partial class UrlUtilities
{ {
[GeneratedRegex(@"^(?!http:\/\/)([a-zA-Z0-9]+(\.[a-zA-Z0-9]+)+.*)$")] [GeneratedRegex(@"^(?!http:\/\/)([a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)+.*)$")]
private static partial Regex UrlMatchingRegex(); private static partial Regex UrlMatchingRegex();
public static bool IsValidDomain(string domain) public static bool IsValidDomain(string domain)
{ {