2024-02-13 00:04:50 +01:00
using Microsoft.AspNetCore.Mvc.Formatters ;
using Microsoft.Extensions.Caching.Memory ;
using System.Reflection.Metadata.Ecma335 ;
2024-02-12 21:04:18 +01:00
using System.Text.RegularExpressions ;
2024-02-12 01:57:41 +01:00
using UmlautAdaptarr.Models ;
using UmlautAdaptarr.Utilities ;
namespace UmlautAdaptarr.Services
{
2024-02-12 21:04:18 +01:00
public partial class CacheService ( IMemoryCache cache )
2024-02-12 01:57:41 +01:00
{
private readonly Dictionary < string , HashSet < string > > VariationIndex = [ ] ;
2024-02-19 05:08:24 +01:00
private readonly Dictionary < string , List < ( HashSet < string > TitleVariations , string CacheKey ) > > BookVariationIndex = [ ] ;
2024-02-13 00:04:50 +01:00
private readonly Dictionary < string , List < ( HashSet < string > TitleVariations , string CacheKey ) > > AudioVariationIndex = [ ] ;
2024-02-12 01:57:41 +01:00
private const int VARIATION_LOOKUP_CACHE_LENGTH = 5 ;
public void CacheSearchItem ( SearchItem item )
{
var prefix = item . MediaType ;
2024-02-13 00:04:50 +01:00
var cacheKey = $"{prefix}_extid_{item.ExternalId}" ;
cache . Set ( cacheKey , item ) ;
2024-02-12 21:04:18 +01:00
if ( item . MediaType = = "audio" )
{
2024-02-13 00:04:50 +01:00
CacheAudioSearchItem ( item , cacheKey ) ;
2024-02-12 21:04:18 +01:00
return ;
}
2024-02-19 05:08:24 +01:00
else if ( item . MediaType = = "book" )
{
CacheBookSearchItem ( item , cacheKey ) ;
return ;
}
2024-02-12 21:04:18 +01:00
2024-02-12 01:57:41 +01:00
var normalizedTitle = item . Title . RemoveAccentButKeepGermanUmlauts ( ) . ToLower ( ) ;
cache . Set ( $"{prefix}_title_{normalizedTitle}" , item ) ;
2024-02-12 03:46:06 +01:00
foreach ( var variation in item . TitleMatchVariations )
2024-02-12 01:57:41 +01:00
{
2024-02-12 03:46:06 +01:00
var normalizedVariation = variation . RemoveAccentButKeepGermanUmlauts ( ) . ToLower ( ) ;
2024-02-13 00:04:50 +01:00
cacheKey = $"{prefix}_var_{normalizedVariation}" ;
2024-02-12 01:57:41 +01:00
cache . Set ( cacheKey , item ) ;
// Indexing by prefix
var indexPrefix = normalizedVariation [ . . Math . Min ( VARIATION_LOOKUP_CACHE_LENGTH , variation . Length ) ] . ToLower ( ) ;
if ( ! VariationIndex . ContainsKey ( indexPrefix ) )
{
2024-02-12 03:46:06 +01:00
VariationIndex [ indexPrefix ] = [ ] ;
2024-02-12 01:57:41 +01:00
}
VariationIndex [ indexPrefix ] . Add ( cacheKey ) ;
}
}
2024-02-13 00:04:50 +01:00
public void CacheAudioSearchItem ( SearchItem item , string cacheKey )
2024-02-12 21:04:18 +01:00
{
2024-02-13 00:04:50 +01:00
// Index author and title variations
foreach ( var authorVariation in item . AuthorMatchVariations )
2024-02-12 21:04:18 +01:00
{
2024-02-13 00:04:50 +01:00
var normalizedAuthor = authorVariation . NormalizeForComparison ( ) ;
2024-02-12 21:04:18 +01:00
2024-02-13 00:04:50 +01:00
if ( ! AudioVariationIndex . ContainsKey ( normalizedAuthor ) )
{
AudioVariationIndex [ normalizedAuthor ] = [ ] ;
}
var titleVariations = item . TitleMatchVariations . Select ( titleMatchVariation = > titleMatchVariation . NormalizeForComparison ( ) ) . ToHashSet ( ) ;
AudioVariationIndex [ normalizedAuthor ] . Add ( ( titleVariations , cacheKey ) ) ;
}
2024-02-12 21:04:18 +01:00
}
2024-02-19 05:08:24 +01:00
public void CacheBookSearchItem ( SearchItem item , string cacheKey )
{
// Index author and title variations
foreach ( var authorVariation in item . AuthorMatchVariations )
{
var normalizedAuthor = authorVariation . NormalizeForComparison ( ) ;
if ( ! BookVariationIndex . ContainsKey ( normalizedAuthor ) )
{
BookVariationIndex [ normalizedAuthor ] = [ ] ;
}
var titleVariations = item . TitleMatchVariations . Select ( titleMatchVariation = > titleMatchVariation . NormalizeForComparison ( ) ) . ToHashSet ( ) ;
BookVariationIndex [ normalizedAuthor ] . Add ( ( titleVariations , cacheKey ) ) ;
}
}
2024-02-12 01:57:41 +01:00
public SearchItem ? SearchItemByTitle ( string mediaType , string title )
{
var normalizedTitle = title . RemoveAccentButKeepGermanUmlauts ( ) . ToLower ( ) ;
2024-02-19 05:08:24 +01:00
if ( mediaType = = "audio" | | mediaType = = "book" )
2024-02-13 00:04:50 +01:00
{
2024-02-19 05:08:24 +01:00
return FindBestMatchForBooksAndAudio ( normalizedTitle . NormalizeForComparison ( ) , mediaType ) ;
2024-02-13 00:04:50 +01:00
}
2024-02-12 01:57:41 +01:00
// Use the first few characters of the normalized title for cache prefix search
var cacheSearchPrefix = normalizedTitle [ . . Math . Min ( VARIATION_LOOKUP_CACHE_LENGTH , normalizedTitle . Length ) ] ;
2024-03-15 18:24:39 +01:00
SearchItem ? bestSearchItemMatch = null ;
var bestVariationMatchLength = 0 ;
HashSet < string > checkedSearchItems = [ ] ;
2024-02-12 01:57:41 +01:00
if ( VariationIndex . TryGetValue ( cacheSearchPrefix , out var cacheKeys ) )
{
foreach ( var cacheKey in cacheKeys )
{
if ( cache . TryGetValue ( cacheKey , out SearchItem ? item ) )
{
2024-03-15 18:24:39 +01:00
if ( item = = null | | item . MediaType ! = mediaType )
{
continue ;
}
var searchItemIdentifier = $"{item.MediaType}_{item.ExternalId}" ;
if ( checkedSearchItems . Contains ( searchItemIdentifier ) )
2024-02-12 01:57:41 +01:00
{
continue ;
}
2024-03-15 18:24:39 +01:00
else
{
checkedSearchItems . Add ( searchItemIdentifier ) ;
}
2024-02-12 01:57:41 +01:00
// After finding a potential item, compare normalizedTitle with each German title variation
2024-03-15 18:24:39 +01:00
foreach ( var variation in item . TitleMatchVariations ? ? [ ] )
2024-02-12 01:57:41 +01:00
{
var normalizedVariation = variation . RemoveAccentButKeepGermanUmlauts ( ) . ToLower ( ) ;
if ( normalizedTitle . StartsWith ( variation , StringComparison . OrdinalIgnoreCase ) )
{
2024-03-15 18:24:39 +01:00
// If we find a variation match that is "longer" then most likely that one is correct and the earlier match was wrong (if it was from another searchItem)
if ( variation . Length > bestVariationMatchLength )
{
bestSearchItemMatch = item ;
bestVariationMatchLength = variation . Length ;
}
2024-02-12 01:57:41 +01:00
}
}
}
}
}
2024-03-15 18:24:39 +01:00
return bestSearchItemMatch ;
2024-02-12 01:57:41 +01:00
}
public SearchItem ? GetSearchItemByExternalId ( string mediaType , string externalId )
{
if ( cache . TryGetValue ( $"{mediaType}_extid_{externalId}" , out SearchItem ? item ) )
{
return item ;
}
return null ;
}
public SearchItem ? GetSearchItemByTitle ( string mediaType , string title )
{
var normalizedTitle = title . RemoveAccentButKeepGermanUmlauts ( ) . ToLower ( ) ;
2024-02-13 00:04:50 +01:00
2024-02-12 01:57:41 +01:00
if ( mediaType = = "generic" )
{
// TODO
}
2024-02-13 00:04:50 +01:00
2024-02-12 01:57:41 +01:00
cache . TryGetValue ( $"{mediaType}_var_{normalizedTitle}" , out SearchItem ? item ) ;
if ( item = = null )
{
cache . TryGetValue ( $"{mediaType}_title_{normalizedTitle}" , out item ) ;
}
return item ;
}
2024-02-12 21:04:18 +01:00
2024-02-19 05:08:24 +01:00
private SearchItem ? FindBestMatchForBooksAndAudio ( string normalizedOriginalTitle , string mediaType )
2024-02-13 00:04:50 +01:00
{
2024-02-19 05:08:24 +01:00
var index = mediaType = = "audio" ? AudioVariationIndex : BookVariationIndex ;
foreach ( var authorEntry in index )
2024-02-13 00:04:50 +01:00
{
if ( normalizedOriginalTitle . Contains ( authorEntry . Key ) )
{
var sortedEntries = authorEntry . Value . OrderByDescending ( entry = > entry . TitleVariations . FirstOrDefault ( ) ? . Length ) . ToList ( ) ;
foreach ( var ( titleVariations , cacheKey ) in sortedEntries )
{
if ( titleVariations . Any ( normalizedOriginalTitle . Contains ) )
{
if ( cache . TryGetValue ( cacheKey , out SearchItem ? item ) )
{
return item ;
}
}
}
}
}
return null ;
}
2024-02-12 21:04:18 +01:00
[GeneratedRegex("\\s")]
private static partial Regex WhiteSpaceRegex ( ) ;
2024-02-12 01:57:41 +01:00
}
}