2024-02-13 01:21:59 +01:00
using Microsoft.Extensions.FileSystemGlobbing.Internal ;
using System.Text.RegularExpressions ;
2024-02-07 04:50:55 +01:00
using System.Xml.Linq ;
2024-02-12 21:04:18 +01:00
using UmlautAdaptarr.Models ;
2024-02-07 04:50:55 +01:00
using UmlautAdaptarr.Utilities ;
namespace UmlautAdaptarr.Services
{
2024-02-12 01:57:41 +01:00
public partial class TitleMatchingService ( CacheService cacheService , ILogger < TitleMatchingService > logger )
2024-02-07 04:50:55 +01:00
{
2024-02-12 21:04:18 +01:00
public string RenameTitlesInContent ( string content , SearchItem ? searchItem )
2024-02-07 04:50:55 +01:00
{
var xDoc = XDocument . Parse ( content ) ;
2024-02-12 21:04:18 +01:00
bool useCacheService = searchItem = = null ;
2024-02-12 01:57:41 +01:00
2024-02-07 04:50:55 +01:00
foreach ( var item in xDoc . Descendants ( "item" ) )
{
var titleElement = item . Element ( "title" ) ;
if ( titleElement ! = null )
{
var originalTitle = titleElement . Value ;
2024-02-14 21:00:24 +01:00
var cleanTitleSeperatedBySpace = ReplaceSeperatorsWithSpace ( originalTitle . RemoveAccentButKeepGermanUmlauts ( ) ) ;
2024-02-07 04:50:55 +01:00
2024-02-12 21:04:18 +01:00
var categoryElement = item . Element ( "category" ) ;
var category = categoryElement ? . Value ;
var mediaType = GetMediaTypeFromCategory ( category ) ;
if ( mediaType = = null )
2024-02-12 01:57:41 +01:00
{
2024-02-12 21:04:18 +01:00
continue ;
}
2024-02-12 01:57:41 +01:00
2024-02-12 21:04:18 +01:00
if ( useCacheService )
{
2024-02-12 01:57:41 +01:00
// Use CacheService to find a matching SearchItem by title
2024-02-14 21:00:24 +01:00
searchItem = cacheService . SearchItemByTitle ( mediaType , cleanTitleSeperatedBySpace ) ;
2024-02-12 01:57:41 +01:00
}
2024-02-12 21:04:18 +01:00
if ( searchItem = = null )
2024-02-07 04:50:55 +01:00
{
2024-02-12 21:04:18 +01:00
// Skip processing this item if no matching SearchItem is found
continue ;
}
switch ( mediaType )
{
case "tv" :
2024-02-14 21:00:24 +01:00
FindAndReplaceForMoviesAndTV ( logger , searchItem , titleElement , originalTitle , cleanTitleSeperatedBySpace ! ) ;
2024-02-12 21:04:18 +01:00
break ;
case "movie" :
2024-02-14 21:00:24 +01:00
FindAndReplaceForMoviesAndTV ( logger , searchItem , titleElement , originalTitle , cleanTitleSeperatedBySpace ! ) ;
2024-02-12 21:04:18 +01:00
break ;
case "audio" :
2024-02-19 05:08:24 +01:00
FindAndReplaceForBooksAndAudio ( searchItem , titleElement , originalTitle ! ) ;
break ;
case "book" :
FindAndReplaceForBooksAndAudio ( searchItem , titleElement , originalTitle ! ) ;
2024-02-12 21:04:18 +01:00
break ;
default :
throw new NotImplementedException ( ) ;
2024-02-07 04:50:55 +01:00
}
}
}
return xDoc . ToString ( ) ;
}
2024-02-19 05:08:24 +01:00
public void FindAndReplaceForBooksAndAudio ( SearchItem searchItem , XElement ? titleElement , string originalTitle )
2024-02-12 21:04:18 +01:00
{
2024-02-13 00:04:50 +01:00
var authorMatch = FindBestMatch ( searchItem . AuthorMatchVariations , originalTitle . NormalizeForComparison ( ) , originalTitle ) ;
var titleMatch = FindBestMatch ( searchItem . TitleMatchVariations , originalTitle . NormalizeForComparison ( ) , originalTitle ) ;
2024-02-12 21:04:18 +01:00
2024-02-14 21:00:24 +01:00
if ( authorMatch . foundMatch & & titleMatch . foundMatch )
2024-02-12 21:04:18 +01:00
{
2024-02-14 21:00:24 +01:00
int matchEndPositionInOriginal = Math . Max ( authorMatch . bestEndInOriginal , titleMatch . bestEndInOriginal ) ;
2024-02-12 21:04:18 +01:00
2024-02-13 00:04:50 +01:00
// Check and adjust for immediate following delimiter
2024-02-14 20:40:13 +01:00
char [ ] delimiters = [ ' ' , '-' , '_' , '.' ] ;
2024-02-13 01:38:06 +01:00
if ( matchEndPositionInOriginal < originalTitle . Length & & delimiters . Contains ( originalTitle [ matchEndPositionInOriginal ] ) )
2024-02-13 00:04:50 +01:00
{
matchEndPositionInOriginal + + ; // Skip the delimiter if it's immediately after the match
}
2024-02-12 21:04:18 +01:00
// Ensure we trim any leading delimiters from the suffix
2024-02-13 00:04:50 +01:00
string suffix = originalTitle [ matchEndPositionInOriginal . . ] . TrimStart ( [ ' ' , '-' , '_' , '.' ] ) . Trim ( ) ;
2024-02-12 21:04:18 +01:00
// Concatenate the expected title with the remaining suffix
2024-02-19 05:08:24 +01:00
var updatedTitle = $"{searchItem.ExpectedAuthor} - {searchItem.ExpectedTitle}" ;
if ( suffix . Length > = 3 )
{
updatedTitle + = $"-[{suffix}]" ;
}
2024-02-12 21:04:18 +01:00
// Update the title element
titleElement . Value = updatedTitle ;
logger . LogInformation ( $"TitleMatchingService - Title changed: '{originalTitle}' to '{updatedTitle}'" ) ;
}
else
{
2024-02-19 05:08:24 +01:00
logger . LogDebug ( $"TitleMatchingService - No satisfactory fuzzy match found for both author and title for {originalTitle}." ) ;
2024-02-12 21:04:18 +01:00
}
}
2024-02-14 21:00:24 +01:00
private ( bool foundMatch , int bestStart , int bestEndInOriginal ) FindBestMatch ( string [ ] variations , string normalizedOriginal , string originalTitle )
2024-02-12 21:04:18 +01:00
{
bool found = false ;
int bestStart = int . MaxValue ;
int bestEndInOriginal = - 1 ;
foreach ( var variation in variations )
{
2024-02-13 00:04:50 +01:00
var normalizedVariation = variation . NormalizeForComparison ( ) ;
2024-02-12 21:04:18 +01:00
int startNormalized = normalizedOriginal . IndexOf ( normalizedVariation ) ;
if ( startNormalized > = 0 )
{
found = true ;
// Map the start position from the normalized string back to the original string
int startOriginal = MapNormalizedIndexToOriginal ( normalizedOriginal , originalTitle , startNormalized ) ;
int endOriginal = MapNormalizedIndexToOriginal ( normalizedOriginal , originalTitle , startNormalized + normalizedVariation . Length ) ;
bestStart = Math . Min ( bestStart , startOriginal ) ;
bestEndInOriginal = Math . Max ( bestEndInOriginal , endOriginal ) ;
}
}
2024-02-14 21:00:24 +01:00
if ( ! found ) return ( false , 0 , 0 ) ;
return ( found , bestStart , bestEndInOriginal ) ;
2024-02-12 21:04:18 +01:00
}
// Maps an index from the normalized string back to a corresponding index in the original string
private int MapNormalizedIndexToOriginal ( string normalizedOriginal , string originalTitle , int normalizedIndex )
{
// Count non-special characters up to the given index in the normalized string
int nonSpecialCharCount = 0 ;
for ( int i = 0 ; i < normalizedIndex & & i < normalizedOriginal . Length ; i + + )
{
if ( char . IsLetterOrDigit ( normalizedOriginal [ i ] ) )
{
nonSpecialCharCount + + ;
}
}
// Count non-special characters in the original title to find the corresponding index
int originalIndex = 0 ;
for ( int i = 0 ; i < originalTitle . Length ; i + + )
{
if ( char . IsLetterOrDigit ( originalTitle [ i ] ) )
{
if ( - - nonSpecialCharCount < 0 )
{
break ;
}
}
originalIndex = i ;
}
2024-02-13 00:04:50 +01:00
return originalIndex ;
2024-02-12 21:04:18 +01:00
}
// This method replaces the first variation that starts at the beginning of the release title
private static void FindAndReplaceForMoviesAndTV ( ILogger < TitleMatchingService > logger , SearchItem searchItem , XElement ? titleElement , string originalTitle , string normalizedOriginalTitle )
{
var titleMatchVariations = searchItem . TitleMatchVariations ;
var expectedTitle = searchItem . ExpectedTitle ;
var variationsOrderedByLength = titleMatchVariations ! . OrderByDescending ( variation = > variation . Length ) ;
2024-02-14 21:00:24 +01:00
2024-02-12 21:04:18 +01:00
// Attempt to find a variation that matches the start of the original title
foreach ( var variation in variationsOrderedByLength )
{
// Skip variations that are already the expectedTitle
if ( variation = = expectedTitle )
{
continue ;
}
// Variation is already normalized at creation
var variationMatchPattern = "^" + Regex . Escape ( variation ) . Replace ( "\\ " , "[._ ]" ) ;
// Check if the originalTitle starts with the variation (ignoring case and separators)
if ( Regex . IsMatch ( normalizedOriginalTitle , variationMatchPattern , RegexOptions . IgnoreCase ) )
{
var originalTitleMatchPattern = "^" + Regex . Escape ( variation ) . Replace ( "\\ " , "[._ ]" ) ;
// Find the first separator used in the original title for consistent replacement
var separator = FindFirstSeparator ( originalTitle ) ;
// Reconstruct the expected title using the original separator
var newTitlePrefix = expectedTitle ! . Replace ( " " , separator . ToString ( ) ) ;
// Extract the suffix from the original title starting right after the matched variation length
var variationLength = variation . Length ;
var suffix = originalTitle [ Math . Min ( variationLength , originalTitle . Length ) . . ] ;
2024-02-13 01:21:59 +01:00
// Workaround for the rare case of e.g. "Frieren: Beyond Journey's End" that also has the alias "Frieren"
if ( expectedTitle ! . StartsWith ( variation , StringComparison . OrdinalIgnoreCase ) )
{
// See if we already matched the whole title by checking if S01E01 pattern is coming next to avoid false positives
// - that won't help with movies but with tv shows
var seasonMatchingPattern = $"^{separator}S\\d{{1,2}}E\\d{{1,2}}" ;
if ( ! Regex . IsMatch ( suffix , seasonMatchingPattern ) )
{
logger . LogWarning ( $"TitleMatchingService - Didn't rename: '{originalTitle}' because the expected title '{expectedTitle}' starts with the variation '{variation}'" ) ;
continue ;
}
}
2024-02-14 21:00:24 +01:00
// Clean up any leading separator from the suffix
suffix = Regex . Replace ( suffix , "^ +" , "" ) ;
2024-02-12 21:04:18 +01:00
// TODO EVALUTE! definitely make this optional - this adds GERMAN to the title is the title is german to make sure it's recognized as german
// can lead to problems with shows such as "dark" that have international dubs
/ *
// Check if "german" is not in the original title, ignoring case
if ( ! Regex . IsMatch ( originalTitle , "german" , RegexOptions . IgnoreCase ) )
{
// Insert "GERMAN" after the newTitlePrefix
newTitlePrefix + = separator + "GERMAN" ;
}
* /
// Construct the new title with the original suffix
2024-02-23 14:08:56 +01:00
var newTitle = newTitlePrefix + ( string . IsNullOrEmpty ( suffix ) ? "" : suffix . StartsWith ( separator ) ? suffix : $"{separator}{suffix}" ) ;
2024-02-12 21:04:18 +01:00
// Update the title element's value with the new title
//titleElement.Value = newTitle + $"({originalTitle.Substring(0, variationLength)})";
titleElement . Value = newTitle ;
logger . LogInformation ( $"TitleMatchingService - Title changed: '{originalTitle}' to '{newTitle}'" ) ;
break ; // Break after the first successful match and modification
}
}
}
2024-02-07 04:50:55 +01:00
2024-02-14 21:00:24 +01:00
private static string ReplaceSeperatorsWithSpace ( string title )
2024-02-07 04:50:55 +01:00
{
2024-02-12 01:57:41 +01:00
// Replace all known separators with space for normalization
2024-02-07 04:50:55 +01:00
return WordSeperationCharRegex ( ) . Replace ( title , " " . ToString ( ) ) ;
}
private static char FindFirstSeparator ( string title )
{
var match = WordSeperationCharRegex ( ) . Match ( title ) ;
return match . Success ? match . Value . First ( ) : ' ' ; // Default to space if no separator found
}
private static string ReconstructTitleWithSeparator ( string title , char separator )
{
2024-02-12 21:04:18 +01:00
if ( separator ! = ' ' )
{
return title ;
}
2024-02-07 04:50:55 +01:00
return title . Replace ( ' ' , separator ) ;
}
2024-02-12 01:57:41 +01:00
public string? GetMediaTypeFromCategory ( string? category )
{
if ( category = = null )
{
return null ;
}
2024-03-06 19:52:07 +01:00
if ( category = = "7000" | | category . StartsWith ( "EBook" , StringComparison . OrdinalIgnoreCase ) | | category . StartsWith ( "Book" , StringComparison . OrdinalIgnoreCase ) )
2024-02-12 01:57:41 +01:00
{
return "book" ;
}
2024-03-06 19:52:07 +01:00
else if ( category = = "2000" | | category . StartsWith ( "Movies" , StringComparison . OrdinalIgnoreCase ) )
2024-02-12 01:57:41 +01:00
{
return "movies" ;
}
2024-03-06 19:52:07 +01:00
else if ( category = = "5000" | | category . StartsWith ( "TV" , StringComparison . OrdinalIgnoreCase ) )
2024-02-12 01:57:41 +01:00
{
return "tv" ;
}
2024-03-06 19:52:07 +01:00
else if ( category = = "3030" | | category . Contains ( "Audiobook" , StringComparison . OrdinalIgnoreCase ) )
2024-02-12 01:57:41 +01:00
{
return "book" ;
}
2024-03-06 19:52:07 +01:00
else if ( category = = "3000" | | category . StartsWith ( "Audio" ) )
2024-02-12 21:04:18 +01:00
{
return "audio" ;
}
2024-02-12 01:57:41 +01:00
return null ;
}
2024-02-07 04:50:55 +01:00
[GeneratedRegex("[._ ] ")]
private static partial Regex WordSeperationCharRegex ( ) ;
2024-02-12 21:04:18 +01:00
2024-02-07 04:50:55 +01:00
}
}