diff --git a/iTunesParser.py b/iTunesParser.py index f1b27bf..c439f95 100644 --- a/iTunesParser.py +++ b/iTunesParser.py @@ -66,7 +66,10 @@ class ITunesParser: """ Parse an iTunes Library and produce JSON - for ELS """ - ELS_INDEX_NAME = "itunessongs" + SONG_INDEX = 'itunes-songs' + ALBUM_INDEX = 'itunes-albums' + ARTIST_INDEX = 'itunes-artists' + # TODO Put variables in a config files or in a python library def __init__(self): self._tracks = {} @@ -249,7 +252,7 @@ class WriteElsJson: artist['Rating'] = round(artist['Rating']) json_track_index = { - "index": {"_index": ITunesParser.ELS_INDEX_NAME, "_type": "artist", "_id": persistent_id} + "index": {"_index": ITunesParser.ARTIST_INDEX, "_id": persistent_id} } file_artist.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8')) @@ -270,7 +273,7 @@ class WriteElsJson: album['Rating'] = round(album['Rating']) json_track_index = { - "index": {"_index": ITunesParser.ELS_INDEX_NAME, "_type": "album", "_id": persistent_id} + "index": {"_index": ITunesParser.ALBUM_INDEX, "_id": persistent_id} } file_albums.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8')) @@ -287,7 +290,7 @@ class WriteElsJson: file = io.open(output_file, 'wb') for persistent_id, song in songs.items(): json_track_index = { - "index": {"_index": ITunesParser.ELS_INDEX_NAME, "_type": "song", "_id": persistent_id} + "index": {"_index": ITunesParser.SONG_INDEX, "_id": persistent_id} } file.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8')) diff --git a/send_data.py b/send_data.py index 6adb6b1..b40ed6f 100644 --- a/send_data.py +++ b/send_data.py @@ -14,11 +14,17 @@ import requests DEFAULT_SONG_FILE = 'es-songs.json' DEFAULT_ALBUM_FILE = 'es-albums.json' DEFAULT_ARTIST_FILE = 'es-artists.json' -DEFAULT_MAPPING_FILE = 'mapping.json' +DEFAULT_MAPPING_SONGS_FILE = 'mapping.songs.json' +DEFAULT_MAPPING_ARTISTS_FILE = 'mapping.artists.json' +DEFAULT_MAPPING_ALBUMS_FILE = 'mapping.albums.json' + +SONG_INDEX = 'itunes-songs' +ALBUM_INDEX = 'itunes-albums' +ARTIST_INDEX = 'itunes-artists' +# TODO Put variables in a config files or in a python library # Global values / set as default values ELASTICSEARCH_URL = 'http://localhost:9200/' -INDEX_NAME = "itunessongs" # Why global variable ? # Because if I want to use a class to do post/put operation, I will use class or instance variables @@ -26,9 +32,9 @@ def main(): """ Main function """ - global ELASTICSEARCH_URL, INDEX_NAME + global ELASTICSEARCH_URL - args = create_parser().parse_args() + args = create_args_parser().parse_args() if not args.song and args.ALL: print(__file__ + ': error: argument -A/--ALL: not allowed with argument -s/--song') @@ -36,41 +42,37 @@ def main(): # Overloaded setting value ELASTICSEARCH_URL = args.elasticsearch_url - INDEX_NAME = args.index_name # Used for deletion / creation # TODO Improvement: check first line of file(s) to get index if not args.quiet: print("*** Settings values ***") print("Elasticsearch URL:\t" + ELASTICSEARCH_URL) - print("Index name:\t\t" + INDEX_NAME) print() else: print("Processing...") - - if args.DELETE: - delete_index(INDEX_NAME, args.quiet) - put_mapping(INDEX_NAME, args.mapping_file, args.quiet) - print() #TODO Detect if index doesn't exist #TODO Use log instead print + check_is_ok = [] + # Send song data if args.song or args.ALL: - if not args.song_file: - try: - song_file = open(DEFAULT_SONG_FILE, 'r') - except FileNotFoundError as error: - print(error) - print("Default file not found.\nUse -sf argument, or -h for more help") - sys.exit(2) - else: - song_file = args.song_file + if args.DELETE: + mapping_song = load_file(args.mapping_song, DEFAULT_MAPPING_SONGS_FILE) + if not args.quiet: + print("Mapping of song index file: '{}'".format(mapping_song.name)) + + delete_index(SONG_INDEX, args.quiet) + put_mapping(SONG_INDEX, mapping_song, args.quiet) + + song_file = load_file(args.song_file, DEFAULT_SONG_FILE) if not args.quiet: print("Song file: '{}'".format(song_file.name)) send_data(song_file, args.quiet) - check_all_data_is_saved(song_file, args.quiet) + check = check_all_data_is_saved(song_file, SONG_INDEX, args.quiet) + check_is_ok.append(check) # ? Improvment: allow to stop script if all data not sent? if not args.quiet: print() @@ -82,6 +84,14 @@ def main(): # Send artist data if args.artist_file or args.ALL: + if args.DELETE: + mapping_artist = load_file(args.mapping_artist, DEFAULT_MAPPING_ARTISTS_FILE) + if not args.quiet: + print("Mapping of artist index file: '{}'".format(mapping_artist.name)) + + delete_index(ARTIST_INDEX, args.quiet) + put_mapping(ARTIST_INDEX, mapping_artist, args.quiet) + artist_file = args.artist_file if not artist_file: if not args.quiet: @@ -92,13 +102,22 @@ def main(): print("Artist file: '{}'".format(artist_file.name)) send_data(artist_file, args.quiet) - check_all_data_is_saved(artist_file, args.quiet) + check = check_all_data_is_saved(artist_file, ARTIST_INDEX, args.quiet) + check_is_ok.append(check) if not args.quiet: print() else: print('Artist sent') if args.album_file or args.ALL: + if args.DELETE: + mapping_album = load_file(args.mapping_album, DEFAULT_MAPPING_ALBUMS_FILE) + if not args.quiet: + print("Mapping of artist index file: '{}'".format(mapping_album.name)) + + delete_index(ALBUM_INDEX, args.quiet) + put_mapping(ALBUM_INDEX, mapping_album, args.quiet) + album_file = args.album_file if not album_file: if not args.quiet: @@ -108,19 +127,39 @@ def main(): if not args.quiet: print("Take file '{}' to send song data".format(album_file.name)) send_data(album_file, args.quiet) - check_all_data_is_saved(album_file, args.quiet) + check = check_all_data_is_saved(album_file, ALBUM_INDEX, args.quiet) + check_is_ok.append(check) if not args.quiet: print() else: print('Album sent') print("I'm done!") + if check_is_ok.count(False) > 0: + print('Some problems occurs') + sys.exit(check_is_ok.count(False)) -def create_parser(): +def load_file(args_file, default_file): + """ + If args file in None, open default file + """ + if not args_file: + try: + final_file = open(default_file, 'r') + except FileNotFoundError as error: + print(error) + print("Default file not found.\nUse -sf argument, or -h for more help") + sys.exit(2) + else: + final_file = args_file + return final_file + +def create_args_parser(): """ Create parser with all options, default values, etc. Return the parser ready to parse args """ + # TODO rewrit description with multi-index phylosophie parser = argparse.ArgumentParser( description=''' Send JSON files formated for bulk Elasticsearch operation to an Elasticsearch. @@ -159,17 +198,19 @@ def create_parser(): See -idx argument to set index name. See -map arguement to set mapping file.''') + # Mapping + mapping_group = parser.add_argument_group('Mapping files') + mode_group.add_argument('-ms', '--mapping-song', type=argparse.FileType('r'), const=DEFAULT_MAPPING_SONGS_FILE, nargs='?', + help='Mapping file for songs (default: \'{}\')'.format(DEFAULT_MAPPING_SONGS_FILE)) + mode_group.add_argument('-mr', '--mapping-artist', type=argparse.FileType('r'), const=DEFAULT_ARTIST_FILE, nargs='?', + help='Mapping file for artists (default: \'{}\')'.format(DEFAULT_MAPPING_ARTISTS_FILE)) + mode_group.add_argument('-ml', '--mapping-album', type=argparse.FileType('r'), const=DEFAULT_MAPPING_ALBUMS_FILE, nargs='?', + help='Mapping file for albums (default: \'{}\')'.format(DEFAULT_MAPPING_ALBUMS_FILE)) + # Global Settings g_settings_group = parser.add_argument_group('Global Settings') g_settings_group.add_argument('-els', '--elasticsearch-url', default=ELASTICSEARCH_URL, nargs='?', help="Elasticsearch URL (default: \'{}\')".format(ELASTICSEARCH_URL)) - g_settings_group.add_argument('-idx', '--index-name', default=INDEX_NAME, nargs='?', - help="""Index name in Elasticsearch (default: \'{}\'). - Used when creating the index: if it does not exist or after deletion. - When sending data, the index name is specified in JSON files.""".format(INDEX_NAME)) - g_settings_group.add_argument('-map', '--mapping-file', type=argparse.FileType('r'), default=DEFAULT_MAPPING_FILE, nargs='?', - help='If deleting index or if index does not exist, mapping file to use (default: \'{}\')' - .format(DEFAULT_MAPPING_FILE)) return parser @@ -198,7 +239,7 @@ def delete_index(index_name, quiet=False): """ if not quiet: print('Deleting index \'{}\'...'.format(index_name)) - res = requests.delete(url=ELASTICSEARCH_URL + INDEX_NAME) + res = requests.delete(url=ELASTICSEARCH_URL + index_name) if res.status_code == 200: if not quiet: print("Deleted!") @@ -228,7 +269,7 @@ def put_mapping(index_name, mapping_file, quiet=False): put_setting(index_name, 0, quiet) -def check_all_data_is_saved(data_file, quiet=False): +def check_all_data_is_saved(data_file, index_name, quiet=False): """ Check if found same number of documents in ELS as number of line in file. Detect type of data to be searched in ELS. @@ -241,15 +282,13 @@ def check_all_data_is_saved(data_file, quiet=False): data_file.seek(0) lines = data_file.readlines() file_nb_line = int(len(lines) / 2) - extract = json.loads(lines[1]) - data_type = extract['type'] if not quiet: print("\tFound: {} lines in '{}' file".format(file_nb_line, data_file.name)) - payload = {"track_total_hits": "true", "query": {"constant_score": {"filter": {"term": {"type": data_type}}}}} + payload = {"track_total_hits": "true"} - res = requests.get(url=ELASTICSEARCH_URL + INDEX_NAME + '/_search?size=0', + res = requests.get(url=ELASTICSEARCH_URL + index_name + '/_search?size=0', data=json.dumps(payload), headers={'Content-Type': 'application/x-ndjson'}) if res.status_code != 200: @@ -259,7 +298,7 @@ def check_all_data_is_saved(data_file, quiet=False): els_nb_doc = res.json()['hits']['total']['value'] if not quiet: - print("\tFound: {} documents with '{}' type in ELS".format(els_nb_doc, data_type)) + print("\tFound: {} documents in index '{}' in ELS".format(els_nb_doc, index_name)) if file_nb_line != els_nb_doc: print('Look out! Not all the data has been found in ELS')