(back) Suggester V1: Process artist data
(cherry picked from commit 02f5705fde37e1aaef5c68de62aafe45fc86d490)
This commit is contained in:
58
suggester.es
Normal file
58
suggester.es
Normal file
@@ -0,0 +1,58 @@
|
||||
DELETE itunes-suggest
|
||||
|
||||
PUT /itunes-suggest
|
||||
{
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"filter": {
|
||||
"french_stop": {
|
||||
"type": "stop",
|
||||
"stopwords": "_french_"
|
||||
},
|
||||
"english_stop": {
|
||||
"type": "stop",
|
||||
"stopwords": "_english_"
|
||||
}
|
||||
},
|
||||
"analyzer": {
|
||||
"artist_name": {
|
||||
"tokenizer": "standard",
|
||||
"filter": [
|
||||
"lowercase",
|
||||
"asciifolding",
|
||||
"french_stop",
|
||||
"english_stop"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"artist_suggest": {
|
||||
"type": "completion"
|
||||
},
|
||||
"artist": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GET itunes-suggest/_analyze
|
||||
{
|
||||
"analyzer": "artist_name",
|
||||
"text": "the servent"
|
||||
}
|
||||
|
||||
POST itunes-suggest/_search
|
||||
{
|
||||
"suggest": {
|
||||
"name-suggest": {
|
||||
"prefix": "sou",
|
||||
"completion": {
|
||||
"field": "artist_suggest"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
43
suggester.py
Normal file
43
suggester.py
Normal file
@@ -0,0 +1,43 @@
|
||||
import requests
|
||||
import json
|
||||
|
||||
ELS_URL ='http://localhost:9200'
|
||||
INDEX = 'itunes-suggest'
|
||||
|
||||
def get_tokens(data: str) -> list:
|
||||
query = {
|
||||
"analyzer": "artist_name",
|
||||
"text" : data
|
||||
}
|
||||
|
||||
url = '{}/{}/_analyze'.format(ELS_URL, INDEX)
|
||||
r = requests.get(url, json=query)
|
||||
return [t['token'] for t in r.json()['tokens']]
|
||||
|
||||
def post_artist(artist: str, sugget_input: list) -> bool:
|
||||
element = { "artist_suggest" : sugget_input, "artist": artist }
|
||||
|
||||
url = '{}/{}/_doc'.format(ELS_URL, INDEX)
|
||||
resp = requests.post(url, json=element)
|
||||
if resp.status_code != 201:
|
||||
print('ELS Response KO')
|
||||
print(resp.status_code)
|
||||
print(resp.text)
|
||||
return
|
||||
|
||||
el_id = resp.json()['_id']
|
||||
print('Post_element - Element created: ' + el_id)
|
||||
return el_id
|
||||
|
||||
|
||||
# Using readlines()
|
||||
itunes_file = open('/home/budd/workspace/iTunes/es-artists.json', 'r')
|
||||
lines = itunes_file.readlines()
|
||||
|
||||
# Strips the newline character
|
||||
for line in lines:
|
||||
data = json.loads(line)
|
||||
if "Artist" in data:
|
||||
# print(data)
|
||||
input = get_tokens(data['Artist'])
|
||||
post_artist(data['Artist'], input)
|
||||
Reference in New Issue
Block a user