(back) Suggester V3: Process album in a separate way
(cherry picked from commit ebbeeccfb8535dbb67240d2c68c7dc9a4da7e7f8)
This commit is contained in:
@@ -55,6 +55,7 @@ GET itunes-suggest/_analyze
|
|||||||
|
|
||||||
POST itunes-suggest/_search
|
POST itunes-suggest/_search
|
||||||
{
|
{
|
||||||
|
"_source" : "artist",
|
||||||
"suggest": {
|
"suggest": {
|
||||||
"name-suggest": {
|
"name-suggest": {
|
||||||
"prefix": "sou",
|
"prefix": "sou",
|
||||||
@@ -67,9 +68,10 @@ POST itunes-suggest/_search
|
|||||||
|
|
||||||
POST itunes-suggest/_search
|
POST itunes-suggest/_search
|
||||||
{
|
{
|
||||||
|
"_source" : "album",
|
||||||
"suggest": {
|
"suggest": {
|
||||||
"name-suggest": {
|
"name-suggest": {
|
||||||
"prefix": "trip",
|
"prefix": "new",
|
||||||
"completion": {
|
"completion": {
|
||||||
"field": "album_suggest",
|
"field": "album_suggest",
|
||||||
"size": 20
|
"size": 20
|
||||||
|
|||||||
33
suggester.py
33
suggester.py
@@ -26,7 +26,7 @@ def get_tokens(data: str) -> list:
|
|||||||
raise NoGoodDataException('Data is not correct to get tokens')
|
raise NoGoodDataException('Data is not correct to get tokens')
|
||||||
return [t['token'] for t in r.json()['tokens']]
|
return [t['token'] for t in r.json()['tokens']]
|
||||||
|
|
||||||
def post_document(artist: str, artist_sugget: list, album: str, album_suggest: list) -> bool:
|
def post_document(artist: str = None, artist_sugget: list = None, album: str = None, album_suggest: list = None) -> bool:
|
||||||
element = {
|
element = {
|
||||||
"artist_suggest" : artist_sugget,
|
"artist_suggest" : artist_sugget,
|
||||||
"artist": artist,
|
"artist": artist,
|
||||||
@@ -48,19 +48,36 @@ def post_document(artist: str, artist_sugget: list, album: str, album_suggest: l
|
|||||||
# print('Post_element - Element created: ' + el_id)
|
# print('Post_element - Element created: ' + el_id)
|
||||||
return el_id
|
return el_id
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# Using readlines()
|
||||||
|
with open('/home/budd/workspace/iTunes/es-artists.json', 'r') as artist_file:
|
||||||
|
artists_lines = artist_file.readlines()
|
||||||
|
|
||||||
# Using readlines()
|
with open('/home/budd/workspace/iTunes/es-albums.json', 'r') as artist_file:
|
||||||
itunes_file = open('/home/budd/workspace/iTunes/es-artists.json', 'r')
|
albums_lines = artist_file.readlines()
|
||||||
lines = itunes_file.readlines()
|
|
||||||
|
|
||||||
# Strips the newline character
|
# Strips the newline character
|
||||||
for line in lines:
|
count = 0
|
||||||
|
for line in artists_lines:
|
||||||
data = json.loads(line)
|
data = json.loads(line)
|
||||||
if "Artist" in data:
|
if "Artist" in data:
|
||||||
try :
|
try :
|
||||||
artist_input = get_tokens(data['Artist'])
|
artist_input = get_tokens(data['Artist'])
|
||||||
album_input = get_tokens(data['Album'])
|
post_document(artist=data['Artist'], artist_sugget=artist_input)
|
||||||
post_document(data['Artist'], artist_input, data['Album'], album_input)
|
count += 1
|
||||||
except NoGoodDataException:
|
except NoGoodDataException:
|
||||||
print('ERROR WITH DATA')
|
print('ERROR WITH DATA')
|
||||||
print(str(data))
|
print(str(data))
|
||||||
|
|
||||||
|
for line in albums_lines:
|
||||||
|
data = json.loads(line)
|
||||||
|
if "Artist" in data:
|
||||||
|
try :
|
||||||
|
album_input = get_tokens(data['Album'])
|
||||||
|
post_document(album=data['Album'], album_suggest=album_input)
|
||||||
|
count += 1
|
||||||
|
except NoGoodDataException:
|
||||||
|
print('ERROR WITH DATA')
|
||||||
|
print(str(data))
|
||||||
|
|
||||||
|
print('Created documents: ' + str(count))
|
||||||
|
|||||||
Reference in New Issue
Block a user