diff --git a/iTunesParser.py b/iTunesParser.py index 366e346..5e9062e 100644 --- a/iTunesParser.py +++ b/iTunesParser.py @@ -147,7 +147,8 @@ class ITunesParser: 'Play Count': 0, 'Rating': 0, 'Genre': set(), - 'Album': set() + 'Album': set(), + 'Album Artist': set() } # Compute information @@ -168,6 +169,9 @@ class ITunesParser: if 'Album' in track: self._artists[akey]['Album'].add(track['Album']) + if 'Album Artist' in track: + self._artists[akey]['Album Artist'].add(track['Artist']) + def _process_album(self, track): """ Process albums in the track part of library and return a JSON formated for a bulk ELS request diff --git a/suggester.es b/suggester.es index c422322..f3b84ed 100644 --- a/suggester.es +++ b/suggester.es @@ -3,6 +3,9 @@ DELETE itunes-suggest PUT /itunes-suggest { "settings": { + "index": { + "number_of_replicas": 0 + }, "analysis": { "filter": { "french_stop": { diff --git a/suggester.py b/suggester.py index 288b9f5..9a3e4e8 100644 --- a/suggester.py +++ b/suggester.py @@ -49,7 +49,19 @@ def post_document(name: str, input: list, field_name: str) -> bool: # print('Post_element - Element created: ' + el_id) return el_id -def process_file(file_name: str, field_name: str) -> int: +def process_file(file_name: str, field_name: str, array_file: str = None) -> int: + """ + Process a JSON file with data + + Parameters + ---------- + file_name: string + Name and path of file to open for analyze + field_name: string + Name of field where found data to analyze and process suggest input + array_file: string, Default: None + A name of a field with array data to analyze. Nothing if None + """ print('Process file: ' + file_name) with open(file_name, 'r') as o_file: lines = o_file.readlines() @@ -62,9 +74,14 @@ def process_file(file_name: str, field_name: str) -> int: sys.stdout.flush() sys.stdout.write("\b" * (40+1)) # return to start of line, after '[' data = json.loads(line) - if "Artist" in data: + if not "index" in data: # Exclude index line try : input = get_tokens(data[field_name]) + + if array_file and data[array_file]: + for key in data[array_file]: + input.extend(get_tokens(key)) + # TODO Input have the same value several times ==> use to process a score post_document(name=data[field_name], input=input, field_name=field_name.lower()) count += 1 except NoGoodDataException: @@ -79,5 +96,6 @@ if __name__ == '__main__': # Using readlines() count = 0 count += process_file('/home/budd/workspace/iTunes/es-albums.json', 'Album') - count += process_file('/home/budd/workspace/iTunes/es-artists.json', 'Artist') + print('Created documents: ' + str(count)) + count += process_file('/home/budd/workspace/iTunes/es-artists.json', 'Artist', 'Album Artist') print('Created documents: ' + str(count))