(back) Suggester V2: Process album data
(cherry picked from commit dd322405d047d49e51d528341cbd008d7a98b6ab)
This commit is contained in:
25
suggester.es
25
suggester.es
@@ -15,7 +15,7 @@ PUT /itunes-suggest
|
||||
}
|
||||
},
|
||||
"analyzer": {
|
||||
"artist_name": {
|
||||
"names": {
|
||||
"tokenizer": "standard",
|
||||
"filter": [
|
||||
"lowercase",
|
||||
@@ -34,14 +34,22 @@ PUT /itunes-suggest
|
||||
},
|
||||
"artist": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"album_suggest": {
|
||||
"type": "completion"
|
||||
},
|
||||
"album": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Problem with word EP, SP
|
||||
|
||||
GET itunes-suggest/_analyze
|
||||
{
|
||||
"analyzer": "artist_name",
|
||||
"analyzer": "names",
|
||||
"text": "the servent"
|
||||
}
|
||||
|
||||
@@ -56,3 +64,16 @@ POST itunes-suggest/_search
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
POST itunes-suggest/_search
|
||||
{
|
||||
"suggest": {
|
||||
"name-suggest": {
|
||||
"prefix": "trip",
|
||||
"completion": {
|
||||
"field": "album_suggest",
|
||||
"size": 20
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
37
suggester.py
37
suggester.py
@@ -4,18 +4,37 @@ import json
|
||||
ELS_URL ='http://localhost:9200'
|
||||
INDEX = 'itunes-suggest'
|
||||
|
||||
class NoGoodDataException(Exception):
|
||||
def __init__(self, message):
|
||||
super().__init__(message)
|
||||
|
||||
def get_tokens(data: str) -> list:
|
||||
if not data:
|
||||
return []
|
||||
query = {
|
||||
"analyzer": "artist_name",
|
||||
"analyzer": "names",
|
||||
"text" : data
|
||||
}
|
||||
|
||||
url = '{}/{}/_analyze'.format(ELS_URL, INDEX)
|
||||
r = requests.get(url, json=query)
|
||||
|
||||
if not 'tokens' in r.json():
|
||||
print('ERROR: Not tokens in result')
|
||||
print('Input: ' + str(data))
|
||||
print('Request: ' + str(r.json()))
|
||||
raise NoGoodDataException('Data is not correct to get tokens')
|
||||
return [t['token'] for t in r.json()['tokens']]
|
||||
|
||||
def post_artist(artist: str, sugget_input: list) -> bool:
|
||||
element = { "artist_suggest" : sugget_input, "artist": artist }
|
||||
def post_document(artist: str, artist_sugget: list, album: str, album_suggest: list) -> bool:
|
||||
element = {
|
||||
"artist_suggest" : artist_sugget,
|
||||
"artist": artist,
|
||||
"album": album,
|
||||
"album_suggest": album_suggest}
|
||||
|
||||
# Filter empty keys
|
||||
element = {k: v for k, v in element.items() if v}
|
||||
|
||||
url = '{}/{}/_doc'.format(ELS_URL, INDEX)
|
||||
resp = requests.post(url, json=element)
|
||||
@@ -26,7 +45,7 @@ def post_artist(artist: str, sugget_input: list) -> bool:
|
||||
return
|
||||
|
||||
el_id = resp.json()['_id']
|
||||
print('Post_element - Element created: ' + el_id)
|
||||
# print('Post_element - Element created: ' + el_id)
|
||||
return el_id
|
||||
|
||||
|
||||
@@ -38,6 +57,10 @@ lines = itunes_file.readlines()
|
||||
for line in lines:
|
||||
data = json.loads(line)
|
||||
if "Artist" in data:
|
||||
# print(data)
|
||||
input = get_tokens(data['Artist'])
|
||||
post_artist(data['Artist'], input)
|
||||
try :
|
||||
artist_input = get_tokens(data['Artist'])
|
||||
album_input = get_tokens(data['Album'])
|
||||
post_document(data['Artist'], artist_input, data['Album'], album_input)
|
||||
except NoGoodDataException:
|
||||
print('ERROR WITH DATA')
|
||||
print(str(data))
|
||||
|
||||
Reference in New Issue
Block a user