import requests import json ELS_URL ='http://localhost:9200' INDEX = 'itunes-suggest' class NoGoodDataException(Exception): def __init__(self, message): super().__init__(message) def get_tokens(data: str) -> list: if not data: return [] query = { "analyzer": "names", "text" : data } url = '{}/{}/_analyze'.format(ELS_URL, INDEX) r = requests.get(url, json=query) if not 'tokens' in r.json(): print('ERROR: Not tokens in result') print('Input: ' + str(data)) print('Request: ' + str(r.json())) raise NoGoodDataException('Data is not correct to get tokens') return [t['token'] for t in r.json()['tokens']] def post_document(artist: str = None, artist_sugget: list = None, album: str = None, album_suggest: list = None) -> bool: element = { "artist_suggest" : artist_sugget, "artist": artist, "album": album, "album_suggest": album_suggest} # Filter empty keys element = {k: v for k, v in element.items() if v} url = '{}/{}/_doc'.format(ELS_URL, INDEX) resp = requests.post(url, json=element) if resp.status_code != 201: print('ELS Response KO') print(resp.status_code) print(resp.text) return el_id = resp.json()['_id'] # print('Post_element - Element created: ' + el_id) return el_id if __name__ == '__main__': # Using readlines() with open('/home/budd/workspace/iTunes/es-artists.json', 'r') as artist_file: artists_lines = artist_file.readlines() with open('/home/budd/workspace/iTunes/es-albums.json', 'r') as artist_file: albums_lines = artist_file.readlines() # Strips the newline character count = 0 for line in artists_lines: data = json.loads(line) if "Artist" in data: try : artist_input = get_tokens(data['Artist']) post_document(artist=data['Artist'], artist_sugget=artist_input) count += 1 except NoGoodDataException: print('ERROR WITH DATA') print(str(data)) for line in albums_lines: data = json.loads(line) if "Artist" in data: try : album_input = get_tokens(data['Album']) post_document(album=data['Album'], album_suggest=album_input) count += 1 except NoGoodDataException: print('ERROR WITH DATA') print(str(data)) print('Created documents: ' + str(count))