(back) Suggester: Improve processor - more generic
Process album & artist with a calculated fields Separate main Show progress (cherry picked from commit fc8407cc6a51fe18b14169b3a3f0e4fc363beb4f)
This commit is contained in:
70
suggester.py
70
suggester.py
@@ -1,5 +1,6 @@
|
||||
import requests
|
||||
import json
|
||||
import sys
|
||||
|
||||
ELS_URL ='http://localhost:9200'
|
||||
INDEX = 'itunes-suggest'
|
||||
@@ -26,15 +27,15 @@ def get_tokens(data: str) -> list:
|
||||
raise NoGoodDataException('Data is not correct to get tokens')
|
||||
return [t['token'] for t in r.json()['tokens']]
|
||||
|
||||
def post_document(artist: str = None, artist_sugget: list = None, album: str = None, album_suggest: list = None) -> bool:
|
||||
def post_document(name: str, input: list, field_name: str) -> bool:
|
||||
suggest_name = field_name + '_suggest'
|
||||
element = {
|
||||
"artist_suggest" : artist_sugget,
|
||||
"artist": artist,
|
||||
"album": album,
|
||||
"album_suggest": album_suggest}
|
||||
field_name: name,
|
||||
suggest_name: input
|
||||
}
|
||||
|
||||
# Filter empty keys
|
||||
element = {k: v for k, v in element.items() if v}
|
||||
# element = {k: v for k, v in element.items() if v}
|
||||
|
||||
url = '{}/{}/_doc'.format(ELS_URL, INDEX)
|
||||
resp = requests.post(url, json=element)
|
||||
@@ -48,36 +49,35 @@ def post_document(artist: str = None, artist_sugget: list = None, album: str = N
|
||||
# print('Post_element - Element created: ' + el_id)
|
||||
return el_id
|
||||
|
||||
def process_file(file_name: str, field_name: str) -> int:
|
||||
print('Process file: ' + file_name)
|
||||
with open(file_name, 'r') as o_file:
|
||||
lines = o_file.readlines()
|
||||
|
||||
count = 0
|
||||
i = 0
|
||||
for line in lines:
|
||||
i += 1
|
||||
sys.stdout.write(str(int((i/len(lines))*100)) + '%')
|
||||
sys.stdout.flush()
|
||||
sys.stdout.write("\b" * (40+1)) # return to start of line, after '['
|
||||
data = json.loads(line)
|
||||
if "Artist" in data:
|
||||
try :
|
||||
input = get_tokens(data[field_name])
|
||||
post_document(name=data[field_name], input=input, field_name=field_name.lower())
|
||||
count += 1
|
||||
except NoGoodDataException:
|
||||
print('ERROR WITH DATA')
|
||||
print(str(data))
|
||||
print('File processed\n')
|
||||
|
||||
return count
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Using readlines()
|
||||
with open('/home/budd/workspace/iTunes/es-artists.json', 'r') as artist_file:
|
||||
artists_lines = artist_file.readlines()
|
||||
|
||||
with open('/home/budd/workspace/iTunes/es-albums.json', 'r') as artist_file:
|
||||
albums_lines = artist_file.readlines()
|
||||
|
||||
# Strips the newline character
|
||||
count = 0
|
||||
for line in artists_lines:
|
||||
data = json.loads(line)
|
||||
if "Artist" in data:
|
||||
try :
|
||||
artist_input = get_tokens(data['Artist'])
|
||||
post_document(artist=data['Artist'], artist_sugget=artist_input)
|
||||
count += 1
|
||||
except NoGoodDataException:
|
||||
print('ERROR WITH DATA')
|
||||
print(str(data))
|
||||
|
||||
for line in albums_lines:
|
||||
data = json.loads(line)
|
||||
if "Artist" in data:
|
||||
try :
|
||||
album_input = get_tokens(data['Album'])
|
||||
post_document(album=data['Album'], album_suggest=album_input)
|
||||
count += 1
|
||||
except NoGoodDataException:
|
||||
print('ERROR WITH DATA')
|
||||
print(str(data))
|
||||
|
||||
count += process_file('/home/budd/workspace/iTunes/es-albums.json', 'Album')
|
||||
count += process_file('/home/budd/workspace/iTunes/es-artists.json', 'Artist')
|
||||
print('Created documents: ' + str(count))
|
||||
|
||||
Reference in New Issue
Block a user