(back) Suggester V1: Process artist data

(cherry picked from commit 02f5705fde37e1aaef5c68de62aafe45fc86d490)
This commit is contained in:
2021-07-30 16:28:07 +02:00
parent 797c88f946
commit 436edaf3f2
2 changed files with 101 additions and 0 deletions

58
suggester.es Normal file
View File

@@ -0,0 +1,58 @@
DELETE itunes-suggest
PUT /itunes-suggest
{
"settings": {
"analysis": {
"filter": {
"french_stop": {
"type": "stop",
"stopwords": "_french_"
},
"english_stop": {
"type": "stop",
"stopwords": "_english_"
}
},
"analyzer": {
"artist_name": {
"tokenizer": "standard",
"filter": [
"lowercase",
"asciifolding",
"french_stop",
"english_stop"
]
}
}
}
},
"mappings": {
"properties": {
"artist_suggest": {
"type": "completion"
},
"artist": {
"type": "keyword"
}
}
}
}
GET itunes-suggest/_analyze
{
"analyzer": "artist_name",
"text": "the servent"
}
POST itunes-suggest/_search
{
"suggest": {
"name-suggest": {
"prefix": "sou",
"completion": {
"field": "artist_suggest"
}
}
}
}

43
suggester.py Normal file
View File

@@ -0,0 +1,43 @@
import requests
import json
ELS_URL ='http://localhost:9200'
INDEX = 'itunes-suggest'
def get_tokens(data: str) -> list:
query = {
"analyzer": "artist_name",
"text" : data
}
url = '{}/{}/_analyze'.format(ELS_URL, INDEX)
r = requests.get(url, json=query)
return [t['token'] for t in r.json()['tokens']]
def post_artist(artist: str, sugget_input: list) -> bool:
element = { "artist_suggest" : sugget_input, "artist": artist }
url = '{}/{}/_doc'.format(ELS_URL, INDEX)
resp = requests.post(url, json=element)
if resp.status_code != 201:
print('ELS Response KO')
print(resp.status_code)
print(resp.text)
return
el_id = resp.json()['_id']
print('Post_element - Element created: ' + el_id)
return el_id
# Using readlines()
itunes_file = open('/home/budd/workspace/iTunes/es-artists.json', 'r')
lines = itunes_file.readlines()
# Strips the newline character
for line in lines:
data = json.loads(line)
if "Artist" in data:
# print(data)
input = get_tokens(data['Artist'])
post_artist(data['Artist'], input)