From 520d0be5954ccdc45e4c43001d7becfe9370ede0 Mon Sep 17 00:00:00 2001 From: "Maxence G. de Montauzan" Date: Sun, 1 Aug 2021 03:42:46 +0200 Subject: [PATCH] (back) Suggester V3: Process album in a separate way (cherry picked from commit ebbeeccfb8535dbb67240d2c68c7dc9a4da7e7f8) --- suggester.es | 20 +++++++++++--------- suggester.py | 47 ++++++++++++++++++++++++++++++++--------------- 2 files changed, 43 insertions(+), 24 deletions(-) diff --git a/suggester.es b/suggester.es index 53ac473..bcdeca3 100644 --- a/suggester.es +++ b/suggester.es @@ -55,22 +55,24 @@ GET itunes-suggest/_analyze POST itunes-suggest/_search { - "suggest": { - "name-suggest": { - "prefix": "sou", - "completion": { - "field": "artist_suggest" - } + "_source" : "artist", + "suggest": { + "name-suggest": { + "prefix": "sou", + "completion": { + "field": "artist_suggest" + } + } } - } } POST itunes-suggest/_search { + "_source" : "album", "suggest": { "name-suggest": { - "prefix": "trip", - "completion": { + "prefix": "new", + "completion": { "field": "album_suggest", "size": 20 } diff --git a/suggester.py b/suggester.py index feff4c2..3cd76e4 100644 --- a/suggester.py +++ b/suggester.py @@ -26,7 +26,7 @@ def get_tokens(data: str) -> list: raise NoGoodDataException('Data is not correct to get tokens') return [t['token'] for t in r.json()['tokens']] -def post_document(artist: str, artist_sugget: list, album: str, album_suggest: list) -> bool: +def post_document(artist: str = None, artist_sugget: list = None, album: str = None, album_suggest: list = None) -> bool: element = { "artist_suggest" : artist_sugget, "artist": artist, @@ -48,19 +48,36 @@ def post_document(artist: str, artist_sugget: list, album: str, album_suggest: l # print('Post_element - Element created: ' + el_id) return el_id +if __name__ == '__main__': + # Using readlines() + with open('/home/budd/workspace/iTunes/es-artists.json', 'r') as artist_file: + artists_lines = artist_file.readlines() -# Using readlines() -itunes_file = open('/home/budd/workspace/iTunes/es-artists.json', 'r') -lines = itunes_file.readlines() + with open('/home/budd/workspace/iTunes/es-albums.json', 'r') as artist_file: + albums_lines = artist_file.readlines() -# Strips the newline character -for line in lines: - data = json.loads(line) - if "Artist" in data: - try : - artist_input = get_tokens(data['Artist']) - album_input = get_tokens(data['Album']) - post_document(data['Artist'], artist_input, data['Album'], album_input) - except NoGoodDataException: - print('ERROR WITH DATA') - print(str(data)) + # Strips the newline character + count = 0 + for line in artists_lines: + data = json.loads(line) + if "Artist" in data: + try : + artist_input = get_tokens(data['Artist']) + post_document(artist=data['Artist'], artist_sugget=artist_input) + count += 1 + except NoGoodDataException: + print('ERROR WITH DATA') + print(str(data)) + + for line in albums_lines: + data = json.loads(line) + if "Artist" in data: + try : + album_input = get_tokens(data['Album']) + post_document(album=data['Album'], album_suggest=album_input) + count += 1 + except NoGoodDataException: + print('ERROR WITH DATA') + print(str(data)) + + print('Created documents: ' + str(count))