Files
UmlautAdaptarr/UmlautAdaptarr/Services/TitleMatchingService.cs

331 lines
15 KiB
C#
Raw Normal View History

2024-02-13 01:21:59 +01:00
using Microsoft.Extensions.FileSystemGlobbing.Internal;
using System.Text.RegularExpressions;
2024-02-07 04:50:55 +01:00
using System.Xml.Linq;
2024-02-12 21:04:18 +01:00
using UmlautAdaptarr.Models;
2024-02-07 04:50:55 +01:00
using UmlautAdaptarr.Utilities;
namespace UmlautAdaptarr.Services
{
2024-02-12 01:57:41 +01:00
public partial class TitleMatchingService(CacheService cacheService, ILogger<TitleMatchingService> logger)
2024-02-07 04:50:55 +01:00
{
2024-02-12 21:04:18 +01:00
public string RenameTitlesInContent(string content, SearchItem? searchItem)
2024-02-07 04:50:55 +01:00
{
var xDoc = XDocument.Parse(content);
2024-02-12 21:04:18 +01:00
bool useCacheService = searchItem == null;
2024-02-12 01:57:41 +01:00
2024-02-07 04:50:55 +01:00
foreach (var item in xDoc.Descendants("item"))
{
var titleElement = item.Element("title");
if (titleElement != null)
{
var originalTitle = titleElement.Value;
2024-02-14 21:00:24 +01:00
var cleanTitleSeperatedBySpace = ReplaceSeperatorsWithSpace(originalTitle.RemoveAccentButKeepGermanUmlauts());
2024-02-07 04:50:55 +01:00
2024-02-12 21:04:18 +01:00
var categoryElement = item.Element("category");
var category = categoryElement?.Value;
var mediaType = GetMediaTypeFromCategory(category);
if (mediaType == null)
2024-02-12 01:57:41 +01:00
{
2024-02-12 21:04:18 +01:00
continue;
}
2024-02-12 01:57:41 +01:00
2024-02-12 21:04:18 +01:00
if (useCacheService)
{
2024-02-12 01:57:41 +01:00
// Use CacheService to find a matching SearchItem by title
2024-02-14 21:00:24 +01:00
searchItem = cacheService.SearchItemByTitle(mediaType, cleanTitleSeperatedBySpace);
2024-02-12 01:57:41 +01:00
}
2024-02-12 21:04:18 +01:00
if (searchItem == null)
2024-02-07 04:50:55 +01:00
{
2024-02-12 21:04:18 +01:00
// Skip processing this item if no matching SearchItem is found
continue;
}
switch (mediaType)
{
case "tv":
2024-02-14 21:00:24 +01:00
FindAndReplaceForMoviesAndTV(logger, searchItem, titleElement, originalTitle, cleanTitleSeperatedBySpace!);
2024-02-12 21:04:18 +01:00
break;
case "movie":
2024-02-14 21:00:24 +01:00
FindAndReplaceForMoviesAndTV(logger, searchItem, titleElement, originalTitle, cleanTitleSeperatedBySpace!);
2024-02-12 21:04:18 +01:00
break;
case "audio":
2024-02-19 05:08:24 +01:00
FindAndReplaceForBooksAndAudio(searchItem, titleElement, originalTitle!);
break;
case "book":
FindAndReplaceForBooksAndAudio(searchItem, titleElement, originalTitle!);
2024-02-12 21:04:18 +01:00
break;
default:
throw new NotImplementedException();
2024-02-07 04:50:55 +01:00
}
}
}
return xDoc.ToString();
}
2024-02-19 05:08:24 +01:00
public void FindAndReplaceForBooksAndAudio(SearchItem searchItem, XElement? titleElement, string originalTitle)
2024-02-12 21:04:18 +01:00
{
2024-02-13 00:04:50 +01:00
var authorMatch = FindBestMatch(searchItem.AuthorMatchVariations, originalTitle.NormalizeForComparison(), originalTitle);
2024-09-04 19:39:15 +02:00
var (foundMatch, bestStart, bestEndInOriginal) = FindBestMatch(searchItem.TitleMatchVariations, originalTitle.NormalizeForComparison(), originalTitle);
2024-02-12 21:04:18 +01:00
2024-09-04 19:39:15 +02:00
if (authorMatch.foundMatch && foundMatch)
2024-02-12 21:04:18 +01:00
{
2024-09-04 19:39:15 +02:00
int matchEndPositionInOriginal = Math.Max(authorMatch.bestEndInOriginal, bestEndInOriginal);
2024-02-12 21:04:18 +01:00
2024-02-13 00:04:50 +01:00
// Check and adjust for immediate following delimiter
2024-02-14 20:40:13 +01:00
char[] delimiters = [' ', '-', '_', '.'];
2024-02-13 01:38:06 +01:00
if (matchEndPositionInOriginal < originalTitle.Length && delimiters.Contains(originalTitle[matchEndPositionInOriginal]))
2024-02-13 00:04:50 +01:00
{
matchEndPositionInOriginal++; // Skip the delimiter if it's immediately after the match
}
2024-02-12 21:04:18 +01:00
// Ensure we trim any leading delimiters from the suffix
2024-02-13 00:04:50 +01:00
string suffix = originalTitle[matchEndPositionInOriginal..].TrimStart([' ', '-', '_', '.']).Trim();
2024-02-12 21:04:18 +01:00
// Concatenate the expected title with the remaining suffix
2024-02-19 05:08:24 +01:00
var updatedTitle = $"{searchItem.ExpectedAuthor} - {searchItem.ExpectedTitle}";
if (suffix.Length >= 3)
{
updatedTitle += $"-[{suffix}]";
}
2024-02-12 21:04:18 +01:00
// Update the title element
titleElement.Value = updatedTitle;
logger.LogInformation($"TitleMatchingService - Title changed: '{originalTitle}' to '{updatedTitle}'");
}
else
{
2024-02-19 05:08:24 +01:00
logger.LogDebug($"TitleMatchingService - No satisfactory fuzzy match found for both author and title for {originalTitle}.");
2024-02-12 21:04:18 +01:00
}
}
2024-09-04 19:39:15 +02:00
private static (bool foundMatch, int bestStart, int bestEndInOriginal) FindBestMatch(string[] variations, string normalizedOriginal, string originalTitle)
2024-02-12 21:04:18 +01:00
{
bool found = false;
int bestStart = int.MaxValue;
int bestEndInOriginal = -1;
foreach (var variation in variations)
{
2024-02-13 00:04:50 +01:00
var normalizedVariation = variation.NormalizeForComparison();
2024-02-12 21:04:18 +01:00
int startNormalized = normalizedOriginal.IndexOf(normalizedVariation);
if (startNormalized >= 0)
{
found = true;
// Map the start position from the normalized string back to the original string
int startOriginal = MapNormalizedIndexToOriginal(normalizedOriginal, originalTitle, startNormalized);
int endOriginal = MapNormalizedIndexToOriginal(normalizedOriginal, originalTitle, startNormalized + normalizedVariation.Length);
bestStart = Math.Min(bestStart, startOriginal);
bestEndInOriginal = Math.Max(bestEndInOriginal, endOriginal);
}
}
2024-02-14 21:00:24 +01:00
if (!found) return (false, 0, 0);
return (found, bestStart, bestEndInOriginal);
2024-02-12 21:04:18 +01:00
}
// Maps an index from the normalized string back to a corresponding index in the original string
2024-09-04 19:39:15 +02:00
private static int MapNormalizedIndexToOriginal(string normalizedOriginal, string originalTitle, int normalizedIndex)
2024-02-12 21:04:18 +01:00
{
// Count non-special characters up to the given index in the normalized string
int nonSpecialCharCount = 0;
for (int i = 0; i < normalizedIndex && i < normalizedOriginal.Length; i++)
{
if (char.IsLetterOrDigit(normalizedOriginal[i]))
{
nonSpecialCharCount++;
}
}
// Count non-special characters in the original title to find the corresponding index
int originalIndex = 0;
for (int i = 0; i < originalTitle.Length; i++)
{
if (char.IsLetterOrDigit(originalTitle[i]))
{
if (--nonSpecialCharCount < 0)
{
break;
}
}
originalIndex = i;
}
2024-02-13 00:04:50 +01:00
return originalIndex;
2024-02-12 21:04:18 +01:00
}
// This method replaces the first variation that starts at the beginning of the release title
private static void FindAndReplaceForMoviesAndTV(ILogger<TitleMatchingService> logger, SearchItem searchItem, XElement? titleElement, string originalTitle, string normalizedOriginalTitle)
{
var titleMatchVariations = searchItem.TitleMatchVariations;
var expectedTitle = searchItem.ExpectedTitle;
var variationsOrderedByLength = titleMatchVariations!.OrderByDescending(variation => variation.Length);
2024-02-14 21:00:24 +01:00
2024-02-12 21:04:18 +01:00
// Attempt to find a variation that matches the start of the original title
foreach (var variation in variationsOrderedByLength)
{
// Skip variations that are already the expectedTitle
if (variation == expectedTitle)
{
continue;
}
// Variation is already normalized at creation
var variationMatchPattern = "^" + Regex.Escape(variation).Replace("\\ ", "[._ ]");
// Check if the originalTitle starts with the variation (ignoring case and separators)
if (Regex.IsMatch(normalizedOriginalTitle, variationMatchPattern, RegexOptions.IgnoreCase))
{
var originalTitleMatchPattern = "^" + Regex.Escape(variation).Replace("\\ ", "[._ ]");
// Find the first separator used in the original title for consistent replacement
var separator = FindFirstSeparator(originalTitle);
// Reconstruct the expected title using the original separator
var newTitlePrefix = expectedTitle!.Replace(" ", separator.ToString());
// Extract the suffix from the original title starting right after the matched variation length
var variationLength = variation.Length;
var suffix = originalTitle[Math.Min(variationLength, originalTitle.Length)..];
2024-02-13 01:21:59 +01:00
// Workaround for the rare case of e.g. "Frieren: Beyond Journey's End" that also has the alias "Frieren"
if (expectedTitle!.StartsWith(variation, StringComparison.OrdinalIgnoreCase))
{
// See if we already matched the whole title by checking if S01E01/S2024E123 pattern is coming next to avoid false positives
2024-02-13 01:21:59 +01:00
// - that won't help with movies but with tv shows
var seasonMatchingPattern = $"^{separator}S\\d{{1,4}}E\\d{{1,4}}";
2024-02-13 01:21:59 +01:00
if (!Regex.IsMatch(suffix, seasonMatchingPattern))
{
logger.LogWarning($"TitleMatchingService - Didn't rename: '{originalTitle}' because the expected title '{expectedTitle}' starts with the variation '{variation}'");
continue;
}
}
2024-02-14 21:00:24 +01:00
// Clean up any leading separator from the suffix
suffix = Regex.Replace(suffix, "^ +", "");
2024-02-12 21:04:18 +01:00
2024-09-04 19:00:58 +02:00
// TODO add this when radarr is implemented
// FixBadReleaseNaming
2024-02-12 21:04:18 +01:00
// Construct the new title with the original suffix
2024-02-23 14:08:56 +01:00
var newTitle = newTitlePrefix + (string.IsNullOrEmpty(suffix) ? "" : suffix.StartsWith(separator) ? suffix : $"{separator}{suffix}");
2024-02-12 21:04:18 +01:00
// Update the title element's value with the new title
//titleElement.Value = newTitle + $"({originalTitle.Substring(0, variationLength)})";
titleElement.Value = newTitle;
logger.LogInformation($"TitleMatchingService - Title changed: '{originalTitle}' to '{newTitle}'");
2024-04-14 22:44:07 +02:00
break;
2024-02-12 21:04:18 +01:00
}
}
}
2024-02-07 04:50:55 +01:00
2024-09-04 19:30:31 +02:00
private static readonly string[] MissingGermanTagReleaseGroups = ["tvr"];
private static readonly string[] HEVCInsteadOfx265TagReleaseGroups = ["eisbaer"];
private static readonly string[] WrongTagsReleaseGroups = ["eisbaer"];
2024-09-04 19:00:58 +02:00
private static string FixBadReleaseNaming(string title, string seperator, ILogger<TitleMatchingService> logger)
{
var releaseGroup = GetReleaseGroup(title);
if (MissingGermanTagReleaseGroups.Contains(releaseGroup))
{
// Check if "german" is not in the title, ignoring case
if (!Regex.IsMatch(title, "german", RegexOptions.IgnoreCase))
{
logger.LogInformation($"FixBadReleaseNaming - found missing GERMAN tag for {title}");
// TODO not finished
// Insert "GERMAN" after the newTitlePrefix
//newTitlePrefix += separator + "GERMAN";
}
}
if (HEVCInsteadOfx265TagReleaseGroups.Contains(releaseGroup))
{
if (!title.Contains("REMUX", StringComparison.InvariantCultureIgnoreCase))
{
logger.LogInformation($"FixBadReleaseNaming - found HEVC instead of x265 for {title}");
title = title.Replace("HEVC", "x265");
}
}
if (WrongTagsReleaseGroups.Contains(releaseGroup))
{
if (title.Contains($"{seperator}RM{seperator}"))
{
logger.LogInformation($"FixBadReleaseNaming - found bad Tag RM instead of REMASTERED for {title}");
title = title.Replace($"{seperator}RM{seperator}", $"{seperator}REMASTERED{seperator}");
}
}
return "";
}
private static string? GetReleaseGroup(string title)
{
return title.Contains('-') ? title[(title.LastIndexOf('-') + 1)..].Trim() : null;
}
2024-02-14 21:00:24 +01:00
private static string ReplaceSeperatorsWithSpace(string title)
2024-02-07 04:50:55 +01:00
{
2024-02-12 01:57:41 +01:00
// Replace all known separators with space for normalization
2024-02-07 04:50:55 +01:00
return WordSeperationCharRegex().Replace(title, " ".ToString());
}
private static char FindFirstSeparator(string title)
{
var match = WordSeperationCharRegex().Match(title);
2024-04-14 22:44:07 +02:00
return match.Success ? match.Value.First() : ' ';
2024-02-07 04:50:55 +01:00
}
private static string ReconstructTitleWithSeparator(string title, char separator)
{
2024-02-12 21:04:18 +01:00
if (separator != ' ')
{
return title;
}
2024-02-07 04:50:55 +01:00
return title.Replace(' ', separator);
}
2024-02-12 01:57:41 +01:00
public string? GetMediaTypeFromCategory(string? category)
{
if (category == null)
{
return null;
}
2024-03-06 19:52:07 +01:00
if (category == "7000" || category.StartsWith("EBook", StringComparison.OrdinalIgnoreCase) || category.StartsWith("Book", StringComparison.OrdinalIgnoreCase))
2024-02-12 01:57:41 +01:00
{
return "book";
}
2024-03-06 19:52:07 +01:00
else if (category == "2000" || category.StartsWith("Movies", StringComparison.OrdinalIgnoreCase))
2024-02-12 01:57:41 +01:00
{
return "movies";
}
2024-03-06 19:52:07 +01:00
else if (category == "5000" || category.StartsWith("TV", StringComparison.OrdinalIgnoreCase))
2024-02-12 01:57:41 +01:00
{
return "tv";
}
2024-03-06 19:52:07 +01:00
else if (category == "3030" || category.Contains("Audiobook", StringComparison.OrdinalIgnoreCase))
2024-02-12 01:57:41 +01:00
{
return "book";
}
else if (category == "3000" || category.StartsWith("Audio", StringComparison.OrdinalIgnoreCase))
2024-02-12 21:04:18 +01:00
{
return "audio";
}
2024-02-12 01:57:41 +01:00
return null;
}
2024-02-07 04:50:55 +01:00
[GeneratedRegex("[._ ]")]
private static partial Regex WordSeperationCharRegex();
2024-02-12 21:04:18 +01:00
2024-02-07 04:50:55 +01:00
}
}