Files
iTunes/suggester.py
Maxence G. de Montauzan 60e1fb2e74 (back) Suggester: Improve processor - more generic
Process album & artist with a calculated fields
Separate main
Show progress

(cherry picked from commit fc8407cc6a51fe18b14169b3a3f0e4fc363beb4f)
2021-08-22 17:01:19 +02:00

84 lines
2.4 KiB
Python

import requests
import json
import sys
ELS_URL ='http://localhost:9200'
INDEX = 'itunes-suggest'
class NoGoodDataException(Exception):
def __init__(self, message):
super().__init__(message)
def get_tokens(data: str) -> list:
if not data:
return []
query = {
"analyzer": "names",
"text" : data
}
url = '{}/{}/_analyze'.format(ELS_URL, INDEX)
r = requests.get(url, json=query)
if not 'tokens' in r.json():
print('ERROR: Not tokens in result')
print('Input: ' + str(data))
print('Request: ' + str(r.json()))
raise NoGoodDataException('Data is not correct to get tokens')
return [t['token'] for t in r.json()['tokens']]
def post_document(name: str, input: list, field_name: str) -> bool:
suggest_name = field_name + '_suggest'
element = {
field_name: name,
suggest_name: input
}
# Filter empty keys
# element = {k: v for k, v in element.items() if v}
url = '{}/{}/_doc'.format(ELS_URL, INDEX)
resp = requests.post(url, json=element)
if resp.status_code != 201:
print('ELS Response KO')
print(resp.status_code)
print(resp.text)
return
el_id = resp.json()['_id']
# print('Post_element - Element created: ' + el_id)
return el_id
def process_file(file_name: str, field_name: str) -> int:
print('Process file: ' + file_name)
with open(file_name, 'r') as o_file:
lines = o_file.readlines()
count = 0
i = 0
for line in lines:
i += 1
sys.stdout.write(str(int((i/len(lines))*100)) + '%')
sys.stdout.flush()
sys.stdout.write("\b" * (40+1)) # return to start of line, after '['
data = json.loads(line)
if "Artist" in data:
try :
input = get_tokens(data[field_name])
post_document(name=data[field_name], input=input, field_name=field_name.lower())
count += 1
except NoGoodDataException:
print('ERROR WITH DATA')
print(str(data))
print('File processed\n')
return count
if __name__ == '__main__':
# Using readlines()
count = 0
count += process_file('/home/budd/workspace/iTunes/es-albums.json', 'Album')
count += process_file('/home/budd/workspace/iTunes/es-artists.json', 'Artist')
print('Created documents: ' + str(count))