(back) Suggester V2: Process album data

(cherry picked from commit dd322405d047d49e51d528341cbd008d7a98b6ab)
This commit is contained in:
2021-07-30 16:59:22 +02:00
parent 436edaf3f2
commit 8121f3d751
2 changed files with 53 additions and 9 deletions

View File

@@ -15,7 +15,7 @@ PUT /itunes-suggest
} }
}, },
"analyzer": { "analyzer": {
"artist_name": { "names": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@@ -34,14 +34,22 @@ PUT /itunes-suggest
}, },
"artist": { "artist": {
"type": "keyword" "type": "keyword"
},
"album_suggest": {
"type": "completion"
},
"album": {
"type": "keyword"
} }
} }
} }
} }
// Problem with word EP, SP
GET itunes-suggest/_analyze GET itunes-suggest/_analyze
{ {
"analyzer": "artist_name", "analyzer": "names",
"text": "the servent" "text": "the servent"
} }
@@ -56,3 +64,16 @@ POST itunes-suggest/_search
} }
} }
} }
POST itunes-suggest/_search
{
"suggest": {
"name-suggest": {
"prefix": "trip",
"completion": {
"field": "album_suggest",
"size": 20
}
}
}
}

View File

@@ -4,18 +4,37 @@ import json
ELS_URL ='http://localhost:9200' ELS_URL ='http://localhost:9200'
INDEX = 'itunes-suggest' INDEX = 'itunes-suggest'
class NoGoodDataException(Exception):
def __init__(self, message):
super().__init__(message)
def get_tokens(data: str) -> list: def get_tokens(data: str) -> list:
if not data:
return []
query = { query = {
"analyzer": "artist_name", "analyzer": "names",
"text" : data "text" : data
} }
url = '{}/{}/_analyze'.format(ELS_URL, INDEX) url = '{}/{}/_analyze'.format(ELS_URL, INDEX)
r = requests.get(url, json=query) r = requests.get(url, json=query)
if not 'tokens' in r.json():
print('ERROR: Not tokens in result')
print('Input: ' + str(data))
print('Request: ' + str(r.json()))
raise NoGoodDataException('Data is not correct to get tokens')
return [t['token'] for t in r.json()['tokens']] return [t['token'] for t in r.json()['tokens']]
def post_artist(artist: str, sugget_input: list) -> bool: def post_document(artist: str, artist_sugget: list, album: str, album_suggest: list) -> bool:
element = { "artist_suggest" : sugget_input, "artist": artist } element = {
"artist_suggest" : artist_sugget,
"artist": artist,
"album": album,
"album_suggest": album_suggest}
# Filter empty keys
element = {k: v for k, v in element.items() if v}
url = '{}/{}/_doc'.format(ELS_URL, INDEX) url = '{}/{}/_doc'.format(ELS_URL, INDEX)
resp = requests.post(url, json=element) resp = requests.post(url, json=element)
@@ -26,7 +45,7 @@ def post_artist(artist: str, sugget_input: list) -> bool:
return return
el_id = resp.json()['_id'] el_id = resp.json()['_id']
print('Post_element - Element created: ' + el_id) # print('Post_element - Element created: ' + el_id)
return el_id return el_id
@@ -38,6 +57,10 @@ lines = itunes_file.readlines()
for line in lines: for line in lines:
data = json.loads(line) data = json.loads(line)
if "Artist" in data: if "Artist" in data:
# print(data) try :
input = get_tokens(data['Artist']) artist_input = get_tokens(data['Artist'])
post_artist(data['Artist'], input) album_input = get_tokens(data['Album'])
post_document(data['Artist'], artist_input, data['Album'], album_input)
except NoGoodDataException:
print('ERROR WITH DATA')
print(str(data))