diff --git a/suggester.es b/suggester.es new file mode 100644 index 0000000..4b570b1 --- /dev/null +++ b/suggester.es @@ -0,0 +1,58 @@ +DELETE itunes-suggest + +PUT /itunes-suggest +{ + "settings": { + "analysis": { + "filter": { + "french_stop": { + "type": "stop", + "stopwords": "_french_" + }, + "english_stop": { + "type": "stop", + "stopwords": "_english_" + } + }, + "analyzer": { + "artist_name": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "asciifolding", + "french_stop", + "english_stop" + ] + } + } + } + }, + "mappings": { + "properties": { + "artist_suggest": { + "type": "completion" + }, + "artist": { + "type": "keyword" + } + } + } +} + +GET itunes-suggest/_analyze +{ + "analyzer": "artist_name", + "text": "the servent" +} + +POST itunes-suggest/_search +{ + "suggest": { + "name-suggest": { + "prefix": "sou", + "completion": { + "field": "artist_suggest" + } + } + } +} diff --git a/suggester.py b/suggester.py new file mode 100644 index 0000000..c06aeb3 --- /dev/null +++ b/suggester.py @@ -0,0 +1,43 @@ +import requests +import json + +ELS_URL ='http://localhost:9200' +INDEX = 'itunes-suggest' + +def get_tokens(data: str) -> list: + query = { + "analyzer": "artist_name", + "text" : data + } + + url = '{}/{}/_analyze'.format(ELS_URL, INDEX) + r = requests.get(url, json=query) + return [t['token'] for t in r.json()['tokens']] + +def post_artist(artist: str, sugget_input: list) -> bool: + element = { "artist_suggest" : sugget_input, "artist": artist } + + url = '{}/{}/_doc'.format(ELS_URL, INDEX) + resp = requests.post(url, json=element) + if resp.status_code != 201: + print('ELS Response KO') + print(resp.status_code) + print(resp.text) + return + + el_id = resp.json()['_id'] + print('Post_element - Element created: ' + el_id) + return el_id + + +# Using readlines() +itunes_file = open('/home/budd/workspace/iTunes/es-artists.json', 'r') +lines = itunes_file.readlines() + +# Strips the newline character +for line in lines: + data = json.loads(line) + if "Artist" in data: + # print(data) + input = get_tokens(data['Artist']) + post_artist(data['Artist'], input)