diff --git a/suggester.es b/suggester.es index 4b570b1..53ac473 100644 --- a/suggester.es +++ b/suggester.es @@ -15,7 +15,7 @@ PUT /itunes-suggest } }, "analyzer": { - "artist_name": { + "names": { "tokenizer": "standard", "filter": [ "lowercase", @@ -34,14 +34,22 @@ PUT /itunes-suggest }, "artist": { "type": "keyword" + }, + "album_suggest": { + "type": "completion" + }, + "album": { + "type": "keyword" } } } } +// Problem with word EP, SP + GET itunes-suggest/_analyze { - "analyzer": "artist_name", + "analyzer": "names", "text": "the servent" } @@ -56,3 +64,16 @@ POST itunes-suggest/_search } } } + +POST itunes-suggest/_search +{ + "suggest": { + "name-suggest": { + "prefix": "trip", + "completion": { + "field": "album_suggest", + "size": 20 + } + } + } +} diff --git a/suggester.py b/suggester.py index c06aeb3..feff4c2 100644 --- a/suggester.py +++ b/suggester.py @@ -4,18 +4,37 @@ import json ELS_URL ='http://localhost:9200' INDEX = 'itunes-suggest' +class NoGoodDataException(Exception): + def __init__(self, message): + super().__init__(message) + def get_tokens(data: str) -> list: + if not data: + return [] query = { - "analyzer": "artist_name", + "analyzer": "names", "text" : data } url = '{}/{}/_analyze'.format(ELS_URL, INDEX) r = requests.get(url, json=query) + + if not 'tokens' in r.json(): + print('ERROR: Not tokens in result') + print('Input: ' + str(data)) + print('Request: ' + str(r.json())) + raise NoGoodDataException('Data is not correct to get tokens') return [t['token'] for t in r.json()['tokens']] -def post_artist(artist: str, sugget_input: list) -> bool: - element = { "artist_suggest" : sugget_input, "artist": artist } +def post_document(artist: str, artist_sugget: list, album: str, album_suggest: list) -> bool: + element = { + "artist_suggest" : artist_sugget, + "artist": artist, + "album": album, + "album_suggest": album_suggest} + + # Filter empty keys + element = {k: v for k, v in element.items() if v} url = '{}/{}/_doc'.format(ELS_URL, INDEX) resp = requests.post(url, json=element) @@ -26,7 +45,7 @@ def post_artist(artist: str, sugget_input: list) -> bool: return el_id = resp.json()['_id'] - print('Post_element - Element created: ' + el_id) + # print('Post_element - Element created: ' + el_id) return el_id @@ -38,6 +57,10 @@ lines = itunes_file.readlines() for line in lines: data = json.loads(line) if "Artist" in data: - # print(data) - input = get_tokens(data['Artist']) - post_artist(data['Artist'], input) + try : + artist_input = get_tokens(data['Artist']) + album_input = get_tokens(data['Album']) + post_document(data['Artist'], artist_input, data['Album'], album_input) + except NoGoodDataException: + print('ERROR WITH DATA') + print(str(data))