From ad0487943a17133cdb250abddd16006ece310e20 Mon Sep 17 00:00:00 2001 From: "Maxence G. de Montauzan" Date: Sun, 22 Aug 2021 19:41:58 +0200 Subject: [PATCH] Process for album Adds too many uninteresting results Eg. all albums for one artist => prevents finding interesting information --- suggester.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/suggester.py b/suggester.py index 9a3e4e8..42e15cc 100644 --- a/suggester.py +++ b/suggester.py @@ -78,9 +78,11 @@ def process_file(file_name: str, field_name: str, array_file: str = None) -> int try : input = get_tokens(data[field_name]) - if array_file and data[array_file]: + if array_file and array_file in data and data[array_file]: for key in data[array_file]: - input.extend(get_tokens(key)) + if key != data[field_name]: # => Absolutely don't work for album, and block a scoring for artists + input.extend(get_tokens(key)) + # TODO Input have the same value several times ==> use to process a score post_document(name=data[field_name], input=input, field_name=field_name.lower()) count += 1 @@ -95,7 +97,7 @@ def process_file(file_name: str, field_name: str, array_file: str = None) -> int if __name__ == '__main__': # Using readlines() count = 0 - count += process_file('/home/budd/workspace/iTunes/es-albums.json', 'Album') + count += process_file('/home/budd/workspace/iTunes/es-albums.json', 'Album', 'Artist') print('Created documents: ' + str(count)) count += process_file('/home/budd/workspace/iTunes/es-artists.json', 'Artist', 'Album Artist') print('Created documents: ' + str(count))