From 60e1fb2e74daa4e947e8837d3d59119727c88384 Mon Sep 17 00:00:00 2001 From: "Maxence G. de Montauzan" Date: Fri, 13 Aug 2021 02:30:39 +0200 Subject: [PATCH] (back) Suggester: Improve processor - more generic Process album & artist with a calculated fields Separate main Show progress (cherry picked from commit fc8407cc6a51fe18b14169b3a3f0e4fc363beb4f) --- suggester.py | 70 ++++++++++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/suggester.py b/suggester.py index 3cd76e4..288b9f5 100644 --- a/suggester.py +++ b/suggester.py @@ -1,5 +1,6 @@ import requests import json +import sys ELS_URL ='http://localhost:9200' INDEX = 'itunes-suggest' @@ -26,15 +27,15 @@ def get_tokens(data: str) -> list: raise NoGoodDataException('Data is not correct to get tokens') return [t['token'] for t in r.json()['tokens']] -def post_document(artist: str = None, artist_sugget: list = None, album: str = None, album_suggest: list = None) -> bool: +def post_document(name: str, input: list, field_name: str) -> bool: + suggest_name = field_name + '_suggest' element = { - "artist_suggest" : artist_sugget, - "artist": artist, - "album": album, - "album_suggest": album_suggest} + field_name: name, + suggest_name: input + } # Filter empty keys - element = {k: v for k, v in element.items() if v} + # element = {k: v for k, v in element.items() if v} url = '{}/{}/_doc'.format(ELS_URL, INDEX) resp = requests.post(url, json=element) @@ -48,36 +49,35 @@ def post_document(artist: str = None, artist_sugget: list = None, album: str = N # print('Post_element - Element created: ' + el_id) return el_id +def process_file(file_name: str, field_name: str) -> int: + print('Process file: ' + file_name) + with open(file_name, 'r') as o_file: + lines = o_file.readlines() + + count = 0 + i = 0 + for line in lines: + i += 1 + sys.stdout.write(str(int((i/len(lines))*100)) + '%') + sys.stdout.flush() + sys.stdout.write("\b" * (40+1)) # return to start of line, after '[' + data = json.loads(line) + if "Artist" in data: + try : + input = get_tokens(data[field_name]) + post_document(name=data[field_name], input=input, field_name=field_name.lower()) + count += 1 + except NoGoodDataException: + print('ERROR WITH DATA') + print(str(data)) + print('File processed\n') + + return count + + if __name__ == '__main__': # Using readlines() - with open('/home/budd/workspace/iTunes/es-artists.json', 'r') as artist_file: - artists_lines = artist_file.readlines() - - with open('/home/budd/workspace/iTunes/es-albums.json', 'r') as artist_file: - albums_lines = artist_file.readlines() - - # Strips the newline character count = 0 - for line in artists_lines: - data = json.loads(line) - if "Artist" in data: - try : - artist_input = get_tokens(data['Artist']) - post_document(artist=data['Artist'], artist_sugget=artist_input) - count += 1 - except NoGoodDataException: - print('ERROR WITH DATA') - print(str(data)) - - for line in albums_lines: - data = json.loads(line) - if "Artist" in data: - try : - album_input = get_tokens(data['Album']) - post_document(album=data['Album'], album_suggest=album_input) - count += 1 - except NoGoodDataException: - print('ERROR WITH DATA') - print(str(data)) - + count += process_file('/home/budd/workspace/iTunes/es-albums.json', 'Album') + count += process_file('/home/budd/workspace/iTunes/es-artists.json', 'Artist') print('Created documents: ' + str(count))