(back) Suggester V3: Process album in a separate way

(cherry picked from commit ebbeeccfb8535dbb67240d2c68c7dc9a4da7e7f8)
This commit is contained in:
2021-08-01 03:42:46 +02:00
parent 8121f3d751
commit 520d0be595
2 changed files with 43 additions and 24 deletions

View File

@@ -55,6 +55,7 @@ GET itunes-suggest/_analyze
POST itunes-suggest/_search POST itunes-suggest/_search
{ {
"_source" : "artist",
"suggest": { "suggest": {
"name-suggest": { "name-suggest": {
"prefix": "sou", "prefix": "sou",
@@ -67,9 +68,10 @@ POST itunes-suggest/_search
POST itunes-suggest/_search POST itunes-suggest/_search
{ {
"_source" : "album",
"suggest": { "suggest": {
"name-suggest": { "name-suggest": {
"prefix": "trip", "prefix": "new",
"completion": { "completion": {
"field": "album_suggest", "field": "album_suggest",
"size": 20 "size": 20

View File

@@ -26,7 +26,7 @@ def get_tokens(data: str) -> list:
raise NoGoodDataException('Data is not correct to get tokens') raise NoGoodDataException('Data is not correct to get tokens')
return [t['token'] for t in r.json()['tokens']] return [t['token'] for t in r.json()['tokens']]
def post_document(artist: str, artist_sugget: list, album: str, album_suggest: list) -> bool: def post_document(artist: str = None, artist_sugget: list = None, album: str = None, album_suggest: list = None) -> bool:
element = { element = {
"artist_suggest" : artist_sugget, "artist_suggest" : artist_sugget,
"artist": artist, "artist": artist,
@@ -48,19 +48,36 @@ def post_document(artist: str, artist_sugget: list, album: str, album_suggest: l
# print('Post_element - Element created: ' + el_id) # print('Post_element - Element created: ' + el_id)
return el_id return el_id
if __name__ == '__main__':
# Using readlines()
with open('/home/budd/workspace/iTunes/es-artists.json', 'r') as artist_file:
artists_lines = artist_file.readlines()
# Using readlines() with open('/home/budd/workspace/iTunes/es-albums.json', 'r') as artist_file:
itunes_file = open('/home/budd/workspace/iTunes/es-artists.json', 'r') albums_lines = artist_file.readlines()
lines = itunes_file.readlines()
# Strips the newline character # Strips the newline character
for line in lines: count = 0
for line in artists_lines:
data = json.loads(line) data = json.loads(line)
if "Artist" in data: if "Artist" in data:
try : try :
artist_input = get_tokens(data['Artist']) artist_input = get_tokens(data['Artist'])
album_input = get_tokens(data['Album']) post_document(artist=data['Artist'], artist_sugget=artist_input)
post_document(data['Artist'], artist_input, data['Album'], album_input) count += 1
except NoGoodDataException: except NoGoodDataException:
print('ERROR WITH DATA') print('ERROR WITH DATA')
print(str(data)) print(str(data))
for line in albums_lines:
data = json.loads(line)
if "Artist" in data:
try :
album_input = get_tokens(data['Album'])
post_document(album=data['Album'], album_suggest=album_input)
count += 1
except NoGoodDataException:
print('ERROR WITH DATA')
print(str(data))
print('Created documents: ' + str(count))