(scripts) Adapat generation & sending scripts

This commit is contained in:
2020-04-12 19:04:08 +02:00
parent 4e5de730c5
commit 0c8a17febe
2 changed files with 84 additions and 42 deletions

View File

@@ -66,7 +66,10 @@ class ITunesParser:
""" """
Parse an iTunes Library and produce JSON - for ELS Parse an iTunes Library and produce JSON - for ELS
""" """
ELS_INDEX_NAME = "itunessongs" SONG_INDEX = 'itunes-songs'
ALBUM_INDEX = 'itunes-albums'
ARTIST_INDEX = 'itunes-artists'
# TODO Put variables in a config files or in a python library
def __init__(self): def __init__(self):
self._tracks = {} self._tracks = {}
@@ -249,7 +252,7 @@ class WriteElsJson:
artist['Rating'] = round(artist['Rating']) artist['Rating'] = round(artist['Rating'])
json_track_index = { json_track_index = {
"index": {"_index": ITunesParser.ELS_INDEX_NAME, "_type": "artist", "_id": persistent_id} "index": {"_index": ITunesParser.ARTIST_INDEX, "_id": persistent_id}
} }
file_artist.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8')) file_artist.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8'))
@@ -270,7 +273,7 @@ class WriteElsJson:
album['Rating'] = round(album['Rating']) album['Rating'] = round(album['Rating'])
json_track_index = { json_track_index = {
"index": {"_index": ITunesParser.ELS_INDEX_NAME, "_type": "album", "_id": persistent_id} "index": {"_index": ITunesParser.ALBUM_INDEX, "_id": persistent_id}
} }
file_albums.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8')) file_albums.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8'))
@@ -287,7 +290,7 @@ class WriteElsJson:
file = io.open(output_file, 'wb') file = io.open(output_file, 'wb')
for persistent_id, song in songs.items(): for persistent_id, song in songs.items():
json_track_index = { json_track_index = {
"index": {"_index": ITunesParser.ELS_INDEX_NAME, "_type": "song", "_id": persistent_id} "index": {"_index": ITunesParser.SONG_INDEX, "_id": persistent_id}
} }
file.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8')) file.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8'))

View File

@@ -14,11 +14,17 @@ import requests
DEFAULT_SONG_FILE = 'es-songs.json' DEFAULT_SONG_FILE = 'es-songs.json'
DEFAULT_ALBUM_FILE = 'es-albums.json' DEFAULT_ALBUM_FILE = 'es-albums.json'
DEFAULT_ARTIST_FILE = 'es-artists.json' DEFAULT_ARTIST_FILE = 'es-artists.json'
DEFAULT_MAPPING_FILE = 'mapping.json' DEFAULT_MAPPING_SONGS_FILE = 'mapping.songs.json'
DEFAULT_MAPPING_ARTISTS_FILE = 'mapping.artists.json'
DEFAULT_MAPPING_ALBUMS_FILE = 'mapping.albums.json'
SONG_INDEX = 'itunes-songs'
ALBUM_INDEX = 'itunes-albums'
ARTIST_INDEX = 'itunes-artists'
# TODO Put variables in a config files or in a python library
# Global values / set as default values # Global values / set as default values
ELASTICSEARCH_URL = 'http://localhost:9200/' ELASTICSEARCH_URL = 'http://localhost:9200/'
INDEX_NAME = "itunessongs"
# Why global variable ? # Why global variable ?
# Because if I want to use a class to do post/put operation, I will use class or instance variables # Because if I want to use a class to do post/put operation, I will use class or instance variables
@@ -26,9 +32,9 @@ def main():
""" """
Main function Main function
""" """
global ELASTICSEARCH_URL, INDEX_NAME global ELASTICSEARCH_URL
args = create_parser().parse_args() args = create_args_parser().parse_args()
if not args.song and args.ALL: if not args.song and args.ALL:
print(__file__ + ': error: argument -A/--ALL: not allowed with argument -s/--song') print(__file__ + ': error: argument -A/--ALL: not allowed with argument -s/--song')
@@ -36,41 +42,37 @@ def main():
# Overloaded setting value # Overloaded setting value
ELASTICSEARCH_URL = args.elasticsearch_url ELASTICSEARCH_URL = args.elasticsearch_url
INDEX_NAME = args.index_name # Used for deletion / creation
# TODO Improvement: check first line of file(s) to get index # TODO Improvement: check first line of file(s) to get index
if not args.quiet: if not args.quiet:
print("*** Settings values ***") print("*** Settings values ***")
print("Elasticsearch URL:\t" + ELASTICSEARCH_URL) print("Elasticsearch URL:\t" + ELASTICSEARCH_URL)
print("Index name:\t\t" + INDEX_NAME)
print() print()
else: else:
print("Processing...") print("Processing...")
if args.DELETE:
delete_index(INDEX_NAME, args.quiet)
put_mapping(INDEX_NAME, args.mapping_file, args.quiet)
print()
#TODO Detect if index doesn't exist #TODO Detect if index doesn't exist
#TODO Use log instead print #TODO Use log instead print
check_is_ok = []
# Send song data # Send song data
if args.song or args.ALL: if args.song or args.ALL:
if not args.song_file: if args.DELETE:
try: mapping_song = load_file(args.mapping_song, DEFAULT_MAPPING_SONGS_FILE)
song_file = open(DEFAULT_SONG_FILE, 'r') if not args.quiet:
except FileNotFoundError as error: print("Mapping of song index file: '{}'".format(mapping_song.name))
print(error)
print("Default file not found.\nUse -sf argument, or -h for more help") delete_index(SONG_INDEX, args.quiet)
sys.exit(2) put_mapping(SONG_INDEX, mapping_song, args.quiet)
else:
song_file = args.song_file song_file = load_file(args.song_file, DEFAULT_SONG_FILE)
if not args.quiet: if not args.quiet:
print("Song file: '{}'".format(song_file.name)) print("Song file: '{}'".format(song_file.name))
send_data(song_file, args.quiet) send_data(song_file, args.quiet)
check_all_data_is_saved(song_file, args.quiet) check = check_all_data_is_saved(song_file, SONG_INDEX, args.quiet)
check_is_ok.append(check)
# ? Improvment: allow to stop script if all data not sent? # ? Improvment: allow to stop script if all data not sent?
if not args.quiet: if not args.quiet:
print() print()
@@ -82,6 +84,14 @@ def main():
# Send artist data # Send artist data
if args.artist_file or args.ALL: if args.artist_file or args.ALL:
if args.DELETE:
mapping_artist = load_file(args.mapping_artist, DEFAULT_MAPPING_ARTISTS_FILE)
if not args.quiet:
print("Mapping of artist index file: '{}'".format(mapping_artist.name))
delete_index(ARTIST_INDEX, args.quiet)
put_mapping(ARTIST_INDEX, mapping_artist, args.quiet)
artist_file = args.artist_file artist_file = args.artist_file
if not artist_file: if not artist_file:
if not args.quiet: if not args.quiet:
@@ -92,13 +102,22 @@ def main():
print("Artist file: '{}'".format(artist_file.name)) print("Artist file: '{}'".format(artist_file.name))
send_data(artist_file, args.quiet) send_data(artist_file, args.quiet)
check_all_data_is_saved(artist_file, args.quiet) check = check_all_data_is_saved(artist_file, ARTIST_INDEX, args.quiet)
check_is_ok.append(check)
if not args.quiet: if not args.quiet:
print() print()
else: else:
print('Artist sent') print('Artist sent')
if args.album_file or args.ALL: if args.album_file or args.ALL:
if args.DELETE:
mapping_album = load_file(args.mapping_album, DEFAULT_MAPPING_ALBUMS_FILE)
if not args.quiet:
print("Mapping of artist index file: '{}'".format(mapping_album.name))
delete_index(ALBUM_INDEX, args.quiet)
put_mapping(ALBUM_INDEX, mapping_album, args.quiet)
album_file = args.album_file album_file = args.album_file
if not album_file: if not album_file:
if not args.quiet: if not args.quiet:
@@ -108,19 +127,39 @@ def main():
if not args.quiet: if not args.quiet:
print("Take file '{}' to send song data".format(album_file.name)) print("Take file '{}' to send song data".format(album_file.name))
send_data(album_file, args.quiet) send_data(album_file, args.quiet)
check_all_data_is_saved(album_file, args.quiet) check = check_all_data_is_saved(album_file, ALBUM_INDEX, args.quiet)
check_is_ok.append(check)
if not args.quiet: if not args.quiet:
print() print()
else: else:
print('Album sent') print('Album sent')
print("I'm done!") print("I'm done!")
if check_is_ok.count(False) > 0:
print('Some problems occurs')
sys.exit(check_is_ok.count(False))
def create_parser(): def load_file(args_file, default_file):
"""
If args file in None, open default file
"""
if not args_file:
try:
final_file = open(default_file, 'r')
except FileNotFoundError as error:
print(error)
print("Default file not found.\nUse -sf argument, or -h for more help")
sys.exit(2)
else:
final_file = args_file
return final_file
def create_args_parser():
""" """
Create parser with all options, default values, etc. Create parser with all options, default values, etc.
Return the parser ready to parse args Return the parser ready to parse args
""" """
# TODO rewrit description with multi-index phylosophie
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description=''' description='''
Send JSON files formated for bulk Elasticsearch operation to an Elasticsearch. Send JSON files formated for bulk Elasticsearch operation to an Elasticsearch.
@@ -159,17 +198,19 @@ def create_parser():
See -idx argument to set index name. See -idx argument to set index name.
See -map arguement to set mapping file.''') See -map arguement to set mapping file.''')
# Mapping
mapping_group = parser.add_argument_group('Mapping files')
mode_group.add_argument('-ms', '--mapping-song', type=argparse.FileType('r'), const=DEFAULT_MAPPING_SONGS_FILE, nargs='?',
help='Mapping file for songs (default: \'{}\')'.format(DEFAULT_MAPPING_SONGS_FILE))
mode_group.add_argument('-mr', '--mapping-artist', type=argparse.FileType('r'), const=DEFAULT_ARTIST_FILE, nargs='?',
help='Mapping file for artists (default: \'{}\')'.format(DEFAULT_MAPPING_ARTISTS_FILE))
mode_group.add_argument('-ml', '--mapping-album', type=argparse.FileType('r'), const=DEFAULT_MAPPING_ALBUMS_FILE, nargs='?',
help='Mapping file for albums (default: \'{}\')'.format(DEFAULT_MAPPING_ALBUMS_FILE))
# Global Settings # Global Settings
g_settings_group = parser.add_argument_group('Global Settings') g_settings_group = parser.add_argument_group('Global Settings')
g_settings_group.add_argument('-els', '--elasticsearch-url', default=ELASTICSEARCH_URL, nargs='?', g_settings_group.add_argument('-els', '--elasticsearch-url', default=ELASTICSEARCH_URL, nargs='?',
help="Elasticsearch URL (default: \'{}\')".format(ELASTICSEARCH_URL)) help="Elasticsearch URL (default: \'{}\')".format(ELASTICSEARCH_URL))
g_settings_group.add_argument('-idx', '--index-name', default=INDEX_NAME, nargs='?',
help="""Index name in Elasticsearch (default: \'{}\').
Used when creating the index: if it does not exist or after deletion.
When sending data, the index name is specified in JSON files.""".format(INDEX_NAME))
g_settings_group.add_argument('-map', '--mapping-file', type=argparse.FileType('r'), default=DEFAULT_MAPPING_FILE, nargs='?',
help='If deleting index or if index does not exist, mapping file to use (default: \'{}\')'
.format(DEFAULT_MAPPING_FILE))
return parser return parser
@@ -198,7 +239,7 @@ def delete_index(index_name, quiet=False):
""" """
if not quiet: if not quiet:
print('Deleting index \'{}\'...'.format(index_name)) print('Deleting index \'{}\'...'.format(index_name))
res = requests.delete(url=ELASTICSEARCH_URL + INDEX_NAME) res = requests.delete(url=ELASTICSEARCH_URL + index_name)
if res.status_code == 200: if res.status_code == 200:
if not quiet: if not quiet:
print("Deleted!") print("Deleted!")
@@ -228,7 +269,7 @@ def put_mapping(index_name, mapping_file, quiet=False):
put_setting(index_name, 0, quiet) put_setting(index_name, 0, quiet)
def check_all_data_is_saved(data_file, quiet=False): def check_all_data_is_saved(data_file, index_name, quiet=False):
""" """
Check if found same number of documents in ELS as number of line in file. Check if found same number of documents in ELS as number of line in file.
Detect type of data to be searched in ELS. Detect type of data to be searched in ELS.
@@ -241,15 +282,13 @@ def check_all_data_is_saved(data_file, quiet=False):
data_file.seek(0) data_file.seek(0)
lines = data_file.readlines() lines = data_file.readlines()
file_nb_line = int(len(lines) / 2) file_nb_line = int(len(lines) / 2)
extract = json.loads(lines[1])
data_type = extract['type']
if not quiet: if not quiet:
print("\tFound: {} lines in '{}' file".format(file_nb_line, data_file.name)) print("\tFound: {} lines in '{}' file".format(file_nb_line, data_file.name))
payload = {"track_total_hits": "true", "query": {"constant_score": {"filter": {"term": {"type": data_type}}}}} payload = {"track_total_hits": "true"}
res = requests.get(url=ELASTICSEARCH_URL + INDEX_NAME + '/_search?size=0', res = requests.get(url=ELASTICSEARCH_URL + index_name + '/_search?size=0',
data=json.dumps(payload), data=json.dumps(payload),
headers={'Content-Type': 'application/x-ndjson'}) headers={'Content-Type': 'application/x-ndjson'})
if res.status_code != 200: if res.status_code != 200:
@@ -259,7 +298,7 @@ def check_all_data_is_saved(data_file, quiet=False):
els_nb_doc = res.json()['hits']['total']['value'] els_nb_doc = res.json()['hits']['total']['value']
if not quiet: if not quiet:
print("\tFound: {} documents with '{}' type in ELS".format(els_nb_doc, data_type)) print("\tFound: {} documents in index '{}' in ELS".format(els_nb_doc, index_name))
if file_nb_line != els_nb_doc: if file_nb_line != els_nb_doc:
print('Look out! Not all the data has been found in ELS') print('Look out! Not all the data has been found in ELS')