Compare commits
8 Commits
20f4fdbd39
...
feature/im
| Author | SHA1 | Date | |
|---|---|---|---|
| 95534c92b2 | |||
| 0230bf260b | |||
| 928efb659e | |||
| 88025347ec | |||
| 67e1f8bd0c | |||
| 042c2558ae | |||
| ad0487943a | |||
| 56050d0a49 |
@@ -147,7 +147,8 @@ class ITunesParser:
|
||||
'Play Count': 0,
|
||||
'Rating': 0,
|
||||
'Genre': set(),
|
||||
'Album': set()
|
||||
'Album': set(),
|
||||
'Album Artist': set()
|
||||
}
|
||||
|
||||
# Compute information
|
||||
@@ -168,6 +169,9 @@ class ITunesParser:
|
||||
if 'Album' in track:
|
||||
self._artists[akey]['Album'].add(track['Album'])
|
||||
|
||||
if 'Album Artist' in track:
|
||||
self._artists[akey]['Album Artist'].add(track['Artist'])
|
||||
|
||||
def _process_album(self, track):
|
||||
"""
|
||||
Process albums in the track part of library and return a JSON formated for a bulk ELS request
|
||||
|
||||
48
mapping.suggest.json
Normal file
48
mapping.suggest.json
Normal file
@@ -0,0 +1,48 @@
|
||||
{
|
||||
"settings": {
|
||||
"index": {
|
||||
"number_of_replicas": 0
|
||||
},
|
||||
"analysis": {
|
||||
"filter": {
|
||||
"french_stop": {
|
||||
"type": "stop",
|
||||
"stopwords": "_french_"
|
||||
},
|
||||
"english_stop": {
|
||||
"type": "stop",
|
||||
"stopwords": "_english_"
|
||||
}
|
||||
},
|
||||
"analyzer": {
|
||||
"names": {
|
||||
"tokenizer": "standard",
|
||||
"filter": [
|
||||
"lowercase",
|
||||
"asciifolding",
|
||||
"french_stop",
|
||||
"english_stop"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"artist_suggest": {
|
||||
"type": "completion",
|
||||
"search_analyzer": "names"
|
||||
},
|
||||
"artist": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"album_suggest": {
|
||||
"type": "completion",
|
||||
"search_analyzer": "names"
|
||||
},
|
||||
"album": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
114
send_data.py
114
send_data.py
@@ -10,6 +10,8 @@ import json
|
||||
import time
|
||||
import requests
|
||||
|
||||
from suggester import process_file
|
||||
|
||||
class bcolors:
|
||||
HEADER = '\033[95m'
|
||||
OKBLUE = '\033[94m'
|
||||
@@ -22,16 +24,18 @@ class bcolors:
|
||||
UNDERLINE = '\033[4m'
|
||||
|
||||
# Default file names
|
||||
DEFAULT_SONG_FILE = 'es-songs.json'
|
||||
DEFAULT_ALBUM_FILE = 'es-albums.json'
|
||||
DEFAULT_ARTIST_FILE = 'es-artists.json'
|
||||
DEFAULT_MAPPING_SONGS_FILE = 'mapping.songs.json'
|
||||
DEFAULT_MAPPING_ARTISTS_FILE = 'mapping.artists.json'
|
||||
DEFAULT_MAPPING_ALBUMS_FILE = 'mapping.albums.json'
|
||||
SONG_FILE = 'es-songs.json'
|
||||
ALBUM_FILE = 'es-albums.json'
|
||||
ARTIST_FILE = 'es-artists.json'
|
||||
MAPPING_SONGS_FILE = 'mapping.songs.json'
|
||||
MAPPING_ARTISTS_FILE = 'mapping.artists.json'
|
||||
MAPPING_ALBUMS_FILE = 'mapping.albums.json'
|
||||
MAPPING_SUGGEST_FILE = 'mapping.suggest.json'
|
||||
|
||||
SONG_INDEX = 'itunes-songs'
|
||||
ALBUM_INDEX = 'itunes-albums'
|
||||
ARTIST_INDEX = 'itunes-artists'
|
||||
SUGGEST_INDEX = 'itunes-suggest'
|
||||
# TODO Put variables in a config files or in a python library
|
||||
|
||||
# Global values / set as default values
|
||||
@@ -47,8 +51,8 @@ def main():
|
||||
|
||||
args = create_args_parser().parse_args()
|
||||
|
||||
if not args.song and args.ALL:
|
||||
print(__file__ + ': error: argument -A/--ALL: not allowed with argument -s/--song')
|
||||
if args.ALL and args.no_song:
|
||||
print(__file__ + ': error: argument -A/--ALL: not allowed with argument --no-song')
|
||||
sys.exit(-1)
|
||||
|
||||
# Overloaded setting value
|
||||
@@ -68,16 +72,16 @@ def main():
|
||||
check_is_ok = []
|
||||
|
||||
# Send song data
|
||||
if args.song or args.ALL:
|
||||
if not args.no_song:
|
||||
if args.DELETE:
|
||||
mapping_song = load_file(args.mapping_song, DEFAULT_MAPPING_SONGS_FILE)
|
||||
mapping_song = load_file(args.mapping_song, MAPPING_SONGS_FILE)
|
||||
if not args.quiet:
|
||||
print("Mapping of song index file: '{}'".format(mapping_song.name))
|
||||
|
||||
delete_index(SONG_INDEX, args.quiet)
|
||||
put_mapping(SONG_INDEX, mapping_song, args.quiet)
|
||||
|
||||
song_file = load_file(args.song_file, DEFAULT_SONG_FILE)
|
||||
song_file = load_file(args.song_file, SONG_FILE)
|
||||
if not args.quiet:
|
||||
print("Song file: '{}'".format(song_file.name))
|
||||
|
||||
@@ -96,7 +100,7 @@ def main():
|
||||
# Send artist data
|
||||
if args.artist_file or args.ALL:
|
||||
if args.DELETE:
|
||||
mapping_artist = load_file(args.mapping_artist, DEFAULT_MAPPING_ARTISTS_FILE)
|
||||
mapping_artist = load_file(args.mapping_artist, MAPPING_ARTISTS_FILE)
|
||||
if not args.quiet:
|
||||
print("Mapping of artist index file: '{}'".format(mapping_artist.name))
|
||||
|
||||
@@ -107,7 +111,7 @@ def main():
|
||||
if not artist_file:
|
||||
if not args.quiet:
|
||||
print('No artist file specified, take default file...')
|
||||
artist_file = open(DEFAULT_ARTIST_FILE, 'r')
|
||||
artist_file = open(ARTIST_FILE, 'r')
|
||||
|
||||
if not args.quiet:
|
||||
print("Artist file: '{}'".format(artist_file.name))
|
||||
@@ -122,7 +126,7 @@ def main():
|
||||
|
||||
if args.album_file or args.ALL:
|
||||
if args.DELETE:
|
||||
mapping_album = load_file(args.mapping_album, DEFAULT_MAPPING_ALBUMS_FILE)
|
||||
mapping_album = load_file(args.mapping_album, MAPPING_ALBUMS_FILE)
|
||||
if not args.quiet:
|
||||
print("Mapping of artist index file: '{}'".format(mapping_album.name))
|
||||
|
||||
@@ -133,7 +137,7 @@ def main():
|
||||
if not album_file:
|
||||
if not args.quiet:
|
||||
print('No album file specified, take default file...')
|
||||
album_file = open(DEFAULT_ALBUM_FILE, 'r')
|
||||
album_file = open(ALBUM_FILE, 'r')
|
||||
|
||||
if not args.quiet:
|
||||
print("Take file '{}' to send song data".format(album_file.name))
|
||||
@@ -145,6 +149,28 @@ def main():
|
||||
else:
|
||||
print('Album sent')
|
||||
|
||||
if not args.no_suggest:
|
||||
print("Process suggestion:")
|
||||
if args.DELETE:
|
||||
delete_index(SUGGEST_INDEX, args.quiet)
|
||||
|
||||
if not args.ALL and not args.album_file and not args.artist_file:
|
||||
print('Only song file processed. No suggestion to process.')
|
||||
else:
|
||||
if args.DELETE:
|
||||
mapping_suggest = load_file(args.mapping_suggest, MAPPING_SUGGEST_FILE)
|
||||
if not args.quiet:
|
||||
print("Mapping of suggest index file: '{}'".format(mapping_suggest.name))
|
||||
put_mapping(SUGGEST_INDEX, mapping_suggest, args.quiet)
|
||||
|
||||
suggs_docs = 0
|
||||
if args.album_file or args.ALL:
|
||||
suggs_docs += process_file(ALBUM_FILE, 'Album')
|
||||
print('Created suggestion documents: ' + str(suggs_docs))
|
||||
if args.artist_file or args.ALL:
|
||||
suggs_docs += process_file(ARTIST_FILE, 'Artist', 'Album Artist')
|
||||
print('Created suggestion documents: ' + str(suggs_docs))
|
||||
|
||||
print("I'm done!")
|
||||
if check_is_ok.count(False) > 0:
|
||||
print('Some problems occurs')
|
||||
@@ -173,15 +199,14 @@ def create_args_parser():
|
||||
# TODO rewrit description with multi-index phylosophie
|
||||
parser = argparse.ArgumentParser(
|
||||
description='''
|
||||
Send JSON files formated for bulk Elasticsearch operation to an Elasticsearch.
|
||||
Send JSON files formated for bulk Elasticsearch operation to an Elasticsearch.
|
||||
|
||||
By default: send song data enable, send album & artist data disabled.
|
||||
Check that all the data has been sent.
|
||||
By default: send only song data. See option to send album/artist/suggest data.
|
||||
|
||||
Detect if index doesn't exist and create it with a mapping file (see -map and -idx argument).
|
||||
Remeber : it's cumulative! If you want to remove songs/artits/albums,
|
||||
you have to delete and re-create the index (use -D option).
|
||||
'''
|
||||
Create index if -D option activated with a mapping file (see -map).
|
||||
|
||||
It's cumulative! If you want to remove songs/artits/albums, you have to delete and re-create the index (use -D option).''',
|
||||
formatter_class=argparse.RawTextHelpFormatter
|
||||
)
|
||||
# Bulk
|
||||
parser.add_argument('-q', '--quiet', action='store_true',
|
||||
@@ -190,38 +215,43 @@ def create_args_parser():
|
||||
sending_group = parser.add_argument_group("Sending options")
|
||||
song_group = sending_group.add_mutually_exclusive_group()
|
||||
song_group.add_argument('-sf', '--song-file', type=argparse.FileType('r'),
|
||||
help='Song file data to send (default: \'{}\').'.format(DEFAULT_SONG_FILE))
|
||||
sending_group.add_argument('-al', '--album-file', nargs='?', type=argparse.FileType('r'), const=DEFAULT_ALBUM_FILE,
|
||||
help='Song file data to send (default: \'{}\').'.format(SONG_FILE))
|
||||
sending_group.add_argument('-al', '--album-file', nargs='?', type=argparse.FileType('r'), const=ALBUM_FILE,
|
||||
help='Enable sending album data. Optionally, precise the album data file (default: \'{}\')'
|
||||
.format(DEFAULT_ALBUM_FILE))
|
||||
sending_group.add_argument('-ar', '--artist-file', nargs='?', type=argparse.FileType('r'), const=DEFAULT_ARTIST_FILE,
|
||||
.format(ALBUM_FILE))
|
||||
sending_group.add_argument('-ar', '--artist-file', nargs='?', type=argparse.FileType('r'), const=ARTIST_FILE,
|
||||
help='Enable sending artist data. Optionally, precise the artist data file (default: \'{}\')'
|
||||
.format(DEFAULT_ARTIST_FILE))
|
||||
song_group.add_argument('-s', '--song', action='store_false',
|
||||
help='Disable sending song data')
|
||||
.format(ARTIST_FILE))
|
||||
|
||||
# Mode
|
||||
mode_group = parser.add_argument_group('Mode')
|
||||
mode_group.add_argument('-A', '--ALL', action='store_true',
|
||||
help='Send all possible data: song, artist and album')
|
||||
help='Send all possible data: song, artist, album and suggest. Use default file if not specified')
|
||||
mode_group.add_argument('-D', '--DELETE', action='store_true',
|
||||
help='''Delete old index and create a new.
|
||||
See -idx argument to set index name.
|
||||
See -map arguement to set mapping file.''')
|
||||
help='Delete index and create new. See -map arguement to set mapping file')
|
||||
mode_group.add_argument('--no-song', action='store_true',
|
||||
help='''Disable sending song data.
|
||||
Not allowed with -A option.''')
|
||||
mode_group.add_argument('--no-suggest', action='store_true',
|
||||
help='Disable sending suggest data. Allowed with -A option')
|
||||
|
||||
# Mapping
|
||||
mapping_group = parser.add_argument_group('Mapping files')
|
||||
mode_group.add_argument('-ms', '--mapping-song', type=argparse.FileType('r'), const=DEFAULT_MAPPING_SONGS_FILE, nargs='?',
|
||||
help='Mapping file for songs (default: \'{}\')'.format(DEFAULT_MAPPING_SONGS_FILE))
|
||||
mode_group.add_argument('-mr', '--mapping-artist', type=argparse.FileType('r'), const=DEFAULT_ARTIST_FILE, nargs='?',
|
||||
help='Mapping file for artists (default: \'{}\')'.format(DEFAULT_MAPPING_ARTISTS_FILE))
|
||||
mode_group.add_argument('-ml', '--mapping-album', type=argparse.FileType('r'), const=DEFAULT_MAPPING_ALBUMS_FILE, nargs='?',
|
||||
help='Mapping file for albums (default: \'{}\')'.format(DEFAULT_MAPPING_ALBUMS_FILE))
|
||||
# CAUTION default values cannot be used because they necessarily activate the option
|
||||
# QUESTION Use a for with a list of default mapping file?
|
||||
mapping_group.add_argument('-ms', '--mapping-song', type=argparse.FileType('r'), const=MAPPING_SONGS_FILE, nargs='?',
|
||||
help='Mapping file for songs (default: \'{}\')'.format(MAPPING_SONGS_FILE))
|
||||
mapping_group.add_argument('-mr', '--mapping-artist', type=argparse.FileType('r'), const=ARTIST_FILE, nargs='?',
|
||||
help='Mapping file for artists (default: \'{}\')'.format(MAPPING_ARTISTS_FILE))
|
||||
mapping_group.add_argument('-ml', '--mapping-album', type=argparse.FileType('r'), const=MAPPING_ALBUMS_FILE, nargs='?',
|
||||
help='Mapping file for albums (default: \'{}\')'.format(MAPPING_ALBUMS_FILE))
|
||||
mapping_group.add_argument('-mg', '--mapping-suggest', type=argparse.FileType('r'), const=MAPPING_SUGGEST_FILE, nargs='?',
|
||||
help='Mapping file for suggest (default: \'{}\')'.format(MAPPING_SUGGEST_FILE))
|
||||
|
||||
# Global Settings
|
||||
g_settings_group = parser.add_argument_group('Global Settings')
|
||||
g_settings_group.add_argument('-els', '--elasticsearch-url', default=ELASTICSEARCH_URL, nargs='?',
|
||||
help="Elasticsearch URL (default: \'{}\')".format(ELASTICSEARCH_URL))
|
||||
help="Elasticsearch URL.")
|
||||
|
||||
return parser
|
||||
|
||||
@@ -253,7 +283,7 @@ def delete_index(index_name, quiet=False):
|
||||
res = requests.delete(url=ELASTICSEARCH_URL + index_name)
|
||||
if res.status_code == 200:
|
||||
if not quiet:
|
||||
print(bcolors.OKGREEN + "Index deleted!" + bcolors.ENDC)
|
||||
print(bcolors.OKGREEN + "Index '{}' deleted!".format(index_name) + bcolors.ENDC)
|
||||
else:
|
||||
print(bcolors.FAIL + "An error occured" + bcolors.ENDC)
|
||||
if res.json()['error']['type'] == 'index_not_found_exception':
|
||||
@@ -276,7 +306,7 @@ def put_mapping(index_name, mapping_file, quiet=False):
|
||||
print(res.text + bcolors.ENDC)
|
||||
else:
|
||||
if not quiet:
|
||||
print(bcolors.OKGREEN + "Mapping sent" + bcolors.ENDC)
|
||||
print(bcolors.OKGREEN + "Mapping for '{}' sent".format(index_name) + bcolors.ENDC)
|
||||
|
||||
put_setting(index_name, 0, quiet)
|
||||
|
||||
|
||||
56
suggester.es
56
suggester.es
@@ -1,51 +1,7 @@
|
||||
DELETE itunes-suggest
|
||||
|
||||
PUT /itunes-suggest
|
||||
{
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"filter": {
|
||||
"french_stop": {
|
||||
"type": "stop",
|
||||
"stopwords": "_french_"
|
||||
},
|
||||
"english_stop": {
|
||||
"type": "stop",
|
||||
"stopwords": "_english_"
|
||||
}
|
||||
},
|
||||
"analyzer": {
|
||||
"names": {
|
||||
"tokenizer": "standard",
|
||||
"filter": [
|
||||
"lowercase",
|
||||
"asciifolding",
|
||||
"french_stop",
|
||||
"english_stop"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"artist_suggest": {
|
||||
"type": "completion",
|
||||
"search_analyzer": "names"
|
||||
},
|
||||
"artist": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"album_suggest": {
|
||||
"type": "completion",
|
||||
"search_analyzer": "names"
|
||||
},
|
||||
"album": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
!./mapping.suggest.json
|
||||
|
||||
// Also possible to specify analyze for ingesting => https://stackoverflow.com/questions/48304499/elasticsearch-completion-suggester-not-working-with-whitespace-analyzer
|
||||
|
||||
@@ -59,7 +15,7 @@ GET itunes-suggest/_analyze
|
||||
|
||||
GET itunes-suggest/_search
|
||||
|
||||
POST itunes-suggest/_search
|
||||
GET itunes-suggest/_search
|
||||
{
|
||||
"_source" : "artist",
|
||||
"suggest": {
|
||||
@@ -72,7 +28,7 @@ POST itunes-suggest/_search
|
||||
}
|
||||
}
|
||||
|
||||
POST itunes-suggest/_search
|
||||
GET itunes-suggest/_search
|
||||
{
|
||||
"_source" : "album",
|
||||
"suggest": {
|
||||
@@ -86,7 +42,7 @@ POST itunes-suggest/_search
|
||||
}
|
||||
}
|
||||
|
||||
POST itunes-suggest/_search
|
||||
GET itunes-suggest/_search
|
||||
{
|
||||
"_source": ["album", "artist"],
|
||||
"suggest": {
|
||||
@@ -105,7 +61,7 @@ POST itunes-suggest/_search
|
||||
}
|
||||
}
|
||||
|
||||
POST itunes-suggest/_search
|
||||
GET itunes-suggest/_search
|
||||
{
|
||||
"_source": ["album", "artist"],
|
||||
"suggest": {
|
||||
@@ -124,7 +80,7 @@ POST itunes-suggest/_search
|
||||
}
|
||||
}
|
||||
|
||||
POST itunes-suggest/_search
|
||||
GET itunes-suggest/_search
|
||||
{
|
||||
"suggest": {
|
||||
"ar-suggest": {
|
||||
|
||||
113
suggester.py
113
suggester.py
@@ -1,37 +1,77 @@
|
||||
import requests
|
||||
import json
|
||||
import sys
|
||||
"""
|
||||
Process files generated by iTunesParser to fill a suggester index.
|
||||
Suggester index in ELS must be created before use.
|
||||
|
||||
ELS_URL ='http://localhost:9200'
|
||||
Found suggester.es query to create index.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import requests
|
||||
|
||||
ELS_URL = 'http://localhost:9200'
|
||||
INDEX = 'itunes-suggest'
|
||||
|
||||
class NoGoodDataException(Exception):
|
||||
def __init__(self, message):
|
||||
super().__init__(message)
|
||||
""" Raise when data can't be correctly analyzed """
|
||||
|
||||
def get_tokens(data: str) -> list:
|
||||
"""
|
||||
Query Elasticsearch to get token for a string with a specific analyzer.
|
||||
Throw an exception if no token found in ELS response.
|
||||
Parameters
|
||||
----------
|
||||
data: string
|
||||
String to be analysed to obtain the tokens
|
||||
Returns
|
||||
-------
|
||||
list
|
||||
A list of token
|
||||
Raises
|
||||
------
|
||||
NoGoodDataException
|
||||
If no tokens are found in the ELS responses, consider that the data is not correct for analysis.
|
||||
"""
|
||||
if not data:
|
||||
return []
|
||||
query = {
|
||||
"analyzer": "names",
|
||||
"analyzer": "names", # TODO Parameterize analyzer ?
|
||||
"text" : data
|
||||
}
|
||||
|
||||
url = '{}/{}/_analyze'.format(ELS_URL, INDEX)
|
||||
r = requests.get(url, json=query)
|
||||
req = requests.get(url, json=query)
|
||||
|
||||
if not 'tokens' in r.json():
|
||||
if not 'tokens' in req.json():
|
||||
print('ERROR: Not tokens in result')
|
||||
print('Input: ' + str(data))
|
||||
print('Request: ' + str(r.json()))
|
||||
print('Request: ' + str(req.json()))
|
||||
raise NoGoodDataException('Data is not correct to get tokens')
|
||||
return [t['token'] for t in r.json()['tokens']]
|
||||
return [t['token'] for t in req.json()['tokens']]
|
||||
|
||||
def post_document(name: str, input: list, field_name: str) -> bool:
|
||||
suggest_name = field_name + '_suggest'
|
||||
def post_document(main_field_value: str, input_terms: list, main_field_name: str) -> str:
|
||||
"""
|
||||
Create suggestion document in Elasticsearch.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
main_field_value : str
|
||||
Value to put in the main field named by `main_field_name`
|
||||
input_terms : list
|
||||
List of suggestion term to put in document
|
||||
main_field_name : str
|
||||
Name of the main field, to fill with `main_field_value`
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Success: ID of created document
|
||||
Fail (ret. status <> 201): None
|
||||
"""
|
||||
suggest_name = main_field_name + '_suggest'
|
||||
element = {
|
||||
field_name: name,
|
||||
suggest_name: input
|
||||
main_field_name: main_field_value,
|
||||
suggest_name: input_terms
|
||||
}
|
||||
|
||||
# Filter empty keys
|
||||
@@ -43,13 +83,26 @@ def post_document(name: str, input: list, field_name: str) -> bool:
|
||||
print('ELS Response KO')
|
||||
print(resp.status_code)
|
||||
print(resp.text)
|
||||
return
|
||||
return None
|
||||
|
||||
el_id = resp.json()['_id']
|
||||
# print('Post_element - Element created: ' + el_id)
|
||||
return el_id
|
||||
|
||||
def process_file(file_name: str, field_name: str) -> int:
|
||||
def process_file(file_name: str, field_name: str, array_file: str = None) -> int:
|
||||
"""
|
||||
Process a JSON file with data
|
||||
|
||||
Parameters
|
||||
----------
|
||||
file_name: string
|
||||
Path and name of file to analyze
|
||||
field_name: string
|
||||
Name of the field where to find the data to create the suggestion entries
|
||||
array_file: string, Default: None
|
||||
Name of an array field to analyze to create more suggestion entries.
|
||||
Nothing if None
|
||||
"""
|
||||
print('Process file: ' + file_name)
|
||||
with open(file_name, 'r') as o_file:
|
||||
lines = o_file.readlines()
|
||||
@@ -62,22 +115,28 @@ def process_file(file_name: str, field_name: str) -> int:
|
||||
sys.stdout.flush()
|
||||
sys.stdout.write("\b" * (40+1)) # return to start of line, after '['
|
||||
data = json.loads(line)
|
||||
if "Artist" in data:
|
||||
try :
|
||||
input = get_tokens(data[field_name])
|
||||
post_document(name=data[field_name], input=input, field_name=field_name.lower())
|
||||
if not "index" in data: # Exclude index line
|
||||
try:
|
||||
suggests_entries = get_tokens(data[field_name])
|
||||
|
||||
if array_file and array_file in data and data[array_file]:
|
||||
for key in data[array_file]:
|
||||
suggests_entries.extend(get_tokens(key))
|
||||
|
||||
# TODO Input have the same value several times ==> use to process a score
|
||||
post_document(main_field_value=data[field_name], input_terms=suggests_entries, main_field_name=field_name.lower())
|
||||
count += 1
|
||||
except NoGoodDataException:
|
||||
print('ERROR WITH DATA')
|
||||
print(str(data))
|
||||
print('File processed\n')
|
||||
|
||||
return count
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Using readlines()
|
||||
count = 0
|
||||
count += process_file('/home/budd/workspace/iTunes/es-albums.json', 'Album')
|
||||
count += process_file('/home/budd/workspace/iTunes/es-artists.json', 'Artist')
|
||||
print('Created documents: ' + str(count))
|
||||
created_docs = 0
|
||||
created_docs += process_file('/home/budd/workspace/iTunes/es-albums.json', 'Album')
|
||||
print('Created documents: ' + str(created_docs))
|
||||
created_docs += process_file('/home/budd/workspace/iTunes/es-artists.json', 'Artist', 'Album Artist')
|
||||
print('Created documents: ' + str(created_docs))
|
||||
# TODO Created doc <> nb doc in ELS
|
||||
|
||||
Reference in New Issue
Block a user