2024-02-12 21:04:18 +01:00
using Microsoft.Extensions.Logging.Abstractions ;
using System.Text.RegularExpressions ;
2024-02-12 01:57:41 +01:00
using UmlautAdaptarr.Utilities ;
namespace UmlautAdaptarr.Models
{
public partial class SearchItem
{
public int ArrId { get ; set ; }
public string ExternalId { get ; set ; }
public string Title { get ; set ; }
2024-02-12 21:04:18 +01:00
public bool HasUmlaut = > Title ? . HasUmlauts ( ) ? ? false ;
2024-02-12 01:57:41 +01:00
public string ExpectedTitle { get ; set ; }
2024-02-12 21:04:18 +01:00
public string? ExpectedAuthor { get ; set ; }
2024-02-14 23:59:53 +01:00
// TODO rename GermanTitle into Foreign or LocalTitle?
2024-02-12 01:57:41 +01:00
public string? GermanTitle { get ; set ; }
public string [ ] TitleSearchVariations { get ; set ; }
public string [ ] TitleMatchVariations { get ; set ; }
2024-02-12 21:04:18 +01:00
public string [ ] AuthorMatchVariations { get ; set ; }
2024-02-12 01:57:41 +01:00
public string MediaType { get ; set ; }
// TODO public MediaType instead of string
2024-02-12 21:04:18 +01:00
public SearchItem (
int arrId ,
string externalId ,
string title ,
string expectedTitle ,
string? germanTitle ,
string mediaType ,
string [ ] ? aliases ,
string? expectedAuthor = null )
2024-02-12 01:57:41 +01:00
{
ArrId = arrId ;
ExternalId = externalId ;
Title = title ;
ExpectedTitle = expectedTitle ;
2024-02-12 21:04:18 +01:00
ExpectedAuthor = expectedAuthor ;
2024-02-12 01:57:41 +01:00
GermanTitle = germanTitle ;
MediaType = mediaType ;
2024-02-19 05:08:24 +01:00
if ( ( mediaType = = "audio" | | mediaType = = "book" ) & & expectedAuthor ! = null )
2024-02-12 01:57:41 +01:00
{
2024-02-19 05:08:24 +01:00
GenerateVariationsForBooksAndAudio ( expectedTitle , mediaType , expectedAuthor ) ;
2024-02-12 01:57:41 +01:00
}
2024-02-12 21:04:18 +01:00
else
{
2024-02-23 14:09:20 +01:00
// if mediatype is movie/tv and the Expected Title ends with a year but the german title doesn't then append the year to the german title and to aliases
// example: https://thetvdb.com/series/385925-avatar-the-last-airbender -> german Title is without 2024
var yearAtEndOfTitleMatch = YearAtEndOfTitleRegex ( ) . Match ( expectedTitle ) ;
if ( yearAtEndOfTitleMatch . Success )
{
string year = yearAtEndOfTitleMatch . Value [ 1. . ^ 1 ] ;
if ( GermanTitle ! = null & & ! GermanTitle . Contains ( year ) )
{
GermanTitle = $"{germanTitle} {year}" ;
}
if ( aliases ! = null )
{
for ( int i = 0 ; i < aliases . Length ; i + + )
{
if ( ! aliases [ i ] . Contains ( year ) )
{
aliases [ i ] = $"{aliases[i]} {year}" ;
}
}
}
}
GenerateVariationsForTV ( GermanTitle , mediaType , aliases ) ;
2024-02-19 05:08:24 +01:00
}
}
2024-02-13 01:21:59 +01:00
2024-02-19 05:08:24 +01:00
private void GenerateVariationsForTV ( string? germanTitle , string mediaType , string [ ] ? aliases )
{
TitleSearchVariations = GenerateVariations ( germanTitle , mediaType ) . ToArray ( ) ;
2024-02-12 01:57:41 +01:00
2024-02-19 05:08:24 +01:00
var allTitleVariations = new List < string > ( TitleSearchVariations ) ;
// If aliases are not null, generate variations for each and add them to the list
// TODO (not necessarily here) only use deu and eng alias
if ( aliases ! = null )
{
foreach ( var alias in aliases )
2024-02-12 21:04:18 +01:00
{
2024-02-19 05:08:24 +01:00
allTitleVariations . AddRange ( GenerateVariations ( alias , mediaType ) ) ;
2024-02-23 14:09:20 +01:00
// If title contains ":" also match for "-"
if ( alias . Contains ( ':' ) )
{
allTitleVariations . Add ( alias . Replace ( ":" , " -" ) ) ;
}
2024-02-12 21:04:18 +01:00
}
2024-02-19 05:08:24 +01:00
}
2024-02-12 21:04:18 +01:00
2024-02-19 05:08:24 +01:00
AuthorMatchVariations = [ ] ;
2024-02-13 01:21:59 +01:00
2024-02-19 05:08:24 +01:00
// if a german title ends with (DE) also add a search string that replaces (DE) with GERMAN
// also add a matching title without (DE)
if ( germanTitle ? . EndsWith ( "(DE)" ) ? ? false )
{
TitleSearchVariations = [ . . TitleSearchVariations ,
. .
GenerateVariations (
germanTitle . Replace ( "(DE)" , " GERMAN" ) . RemoveExtraWhitespaces ( ) ,
mediaType ) ] ;
allTitleVariations . AddRange ( GenerateVariations ( germanTitle . Replace ( "(DE)" , "" ) . Trim ( ) , mediaType ) ) ;
2024-02-13 01:21:59 +01:00
2024-02-19 05:08:24 +01:00
}
2024-02-13 01:21:59 +01:00
2024-09-04 18:06:51 +02:00
// if a german title ends with "Germany" (e.g. Good Luck Guys Germany) also add a search string that replaces Germany with GERMAN
// (e.g. Good Luck Guys GERMAN). This is because reality shows often have different formats in different countries with the same
// name. // also add a matching title without GERMAN
if ( germanTitle ? . EndsWith ( "germany" , StringComparison . OrdinalIgnoreCase ) ? ? false )
{
TitleSearchVariations = [ . . TitleSearchVariations ,
. .
GenerateVariations (
( germanTitle [ . . ^ 7 ] + "GERMAN" ) . RemoveExtraWhitespaces ( ) ,
mediaType ) ] ;
allTitleVariations . AddRange ( GenerateVariations ( germanTitle [ . . ^ 8 ] . Trim ( ) , mediaType ) ) ;
}
2024-02-23 14:09:20 +01:00
// If title contains ":" also match for "-"
if ( germanTitle ? . Contains ( ':' ) ? ? false )
{
allTitleVariations . Add ( germanTitle . Replace ( ":" , " -" ) ) ;
}
2024-02-19 05:08:24 +01:00
TitleMatchVariations = allTitleVariations . Distinct ( StringComparer . InvariantCultureIgnoreCase ) . ToArray ( ) ;
}
private void GenerateVariationsForBooksAndAudio ( string expectedTitle , string mediaType , string? expectedAuthor )
{
// e.g. Die Ärzte - best of die Ärzte
if ( expectedTitle . Contains ( expectedAuthor ) )
{
var titleWithoutAuthorName = expectedTitle . Replace ( expectedAuthor , string . Empty ) . RemoveExtraWhitespaces ( ) . Trim ( ) ;
if ( titleWithoutAuthorName . Length < 2 )
{
// TODO log warning that this album can't be searched for automatically
2024-02-13 01:21:59 +01:00
}
2024-02-19 05:08:24 +01:00
TitleMatchVariations = GenerateVariations ( titleWithoutAuthorName , mediaType ) . ToArray ( ) ;
}
else
{
TitleMatchVariations = GenerateVariations ( expectedTitle , mediaType ) . ToArray ( ) ;
}
TitleSearchVariations = GenerateVariations ( $"{expectedAuthor} {expectedTitle}" , mediaType ) . ToArray ( ) ;
AuthorMatchVariations = GenerateVariations ( expectedAuthor , mediaType ) . ToArray ( ) ;
2024-02-13 01:21:59 +01:00
2024-02-19 06:20:47 +01:00
if ( mediaType = = "book" )
2024-02-19 05:08:24 +01:00
{
2024-02-19 06:20:47 +01:00
if ( expectedAuthor ? . Contains ( ' ' ) ? ? false )
{
var nameParts = expectedAuthor . Split ( ' ' , StringSplitOptions . RemoveEmptyEntries ) ;
var lastName = nameParts . Last ( ) ;
var firstNames = nameParts . Take ( nameParts . Length - 1 ) ;
2024-02-19 05:08:24 +01:00
2024-02-19 06:20:47 +01:00
var alternativeExpectedAuthor = $"{lastName}, {string.Join(" ", firstNames)}" ;
AuthorMatchVariations = [ . . AuthorMatchVariations , . . GenerateVariations ( alternativeExpectedAuthor , mediaType ) ] ;
}
2024-02-12 21:04:18 +01:00
}
2024-02-12 01:57:41 +01:00
}
2024-09-04 19:39:15 +02:00
private static IEnumerable < string > GenerateVariations ( string? title , string mediaType )
2024-02-12 01:57:41 +01:00
{
2024-02-14 23:59:53 +01:00
if ( title = = null )
{
return [ ] ;
}
2024-02-23 14:09:20 +01:00
2024-02-14 23:59:53 +01:00
var cleanTitle = title . GetCleanTitle ( ) ;
if ( cleanTitle ? . Length = = 0 )
2024-02-12 01:57:41 +01:00
{
return [ ] ;
}
// Start with base variations including handling umlauts
var baseVariations = new List < string >
{
cleanTitle , // No change
cleanTitle . ReplaceGermanUmlautsWithLatinEquivalents ( ) ,
cleanTitle . RemoveGermanUmlautDots ( )
} ;
2024-02-19 06:20:47 +01:00
if ( mediaType = = "book" | | mediaType = = "audio" )
{
baseVariations . Add ( cleanTitle . RemoveGermanUmlauts ( ) ) ;
}
2024-02-12 01:57:41 +01:00
// TODO: determine if this is really needed
// Additional variations to accommodate titles with "-"
if ( cleanTitle . Contains ( '-' ) )
{
var withoutDash = cleanTitle . Replace ( "-" , "" ) ;
var withSpaceInsteadOfDash = cleanTitle . Replace ( "-" , " " ) ;
// Add variations of the title without dash and with space instead of dash
baseVariations . AddRange ( new List < string >
{
withoutDash ,
withSpaceInsteadOfDash ,
withoutDash . ReplaceGermanUmlautsWithLatinEquivalents ( ) ,
withoutDash . RemoveGermanUmlautDots ( ) ,
withSpaceInsteadOfDash . ReplaceGermanUmlautsWithLatinEquivalents ( ) ,
withSpaceInsteadOfDash . RemoveGermanUmlautDots ( )
} ) ;
}
2024-02-14 23:59:53 +01:00
// If a title starts with der/die/das also accept variations without it
// Same for english the, an, a
if ( cleanTitle . StartsWith ( "Der " ) | | cleanTitle . StartsWith ( "Die " ) | | cleanTitle . StartsWith ( "Das " )
| | cleanTitle . StartsWith ( "The " ) | | cleanTitle . StartsWith ( "An " ) )
2024-02-12 21:04:18 +01:00
{
2024-02-14 23:59:53 +01:00
var cleanTitleWithoutArticle = title [ 3. . ] . Trim ( ) ;
2024-02-12 21:04:18 +01:00
baseVariations . AddRange ( GenerateVariations ( cleanTitleWithoutArticle , mediaType ) ) ;
2024-02-14 23:59:53 +01:00
} else if ( cleanTitle . StartsWith ( "A " ) )
{
var cleanTitleWithoutArticle = title [ 2. . ] . Trim ( ) ;
2024-02-19 05:08:24 +01:00
baseVariations . AddRange ( GenerateVariations ( cleanTitleWithoutArticle , mediaType ) ) ;
2024-02-14 23:59:53 +01:00
}
2024-02-12 21:04:18 +01:00
2024-02-12 01:57:41 +01:00
// Remove multiple spaces
2024-02-12 21:04:18 +01:00
var cleanedVariations = baseVariations . Select ( variation = > variation . RemoveExtraWhitespaces ( ) ) ;
2024-02-12 01:57:41 +01:00
return cleanedVariations . Distinct ( ) ;
}
2024-02-23 14:09:20 +01:00
[GeneratedRegex(@"\(\d{4}\)$")]
private static partial Regex YearAtEndOfTitleRegex ( ) ;
2024-02-12 01:57:41 +01:00
}
}