(scripts) Adapat generation & sending scripts
This commit is contained in:
@@ -66,7 +66,10 @@ class ITunesParser:
|
||||
"""
|
||||
Parse an iTunes Library and produce JSON - for ELS
|
||||
"""
|
||||
ELS_INDEX_NAME = "itunessongs"
|
||||
SONG_INDEX = 'itunes-songs'
|
||||
ALBUM_INDEX = 'itunes-albums'
|
||||
ARTIST_INDEX = 'itunes-artists'
|
||||
# TODO Put variables in a config files or in a python library
|
||||
|
||||
def __init__(self):
|
||||
self._tracks = {}
|
||||
@@ -249,7 +252,7 @@ class WriteElsJson:
|
||||
artist['Rating'] = round(artist['Rating'])
|
||||
|
||||
json_track_index = {
|
||||
"index": {"_index": ITunesParser.ELS_INDEX_NAME, "_type": "artist", "_id": persistent_id}
|
||||
"index": {"_index": ITunesParser.ARTIST_INDEX, "_id": persistent_id}
|
||||
}
|
||||
|
||||
file_artist.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8'))
|
||||
@@ -270,7 +273,7 @@ class WriteElsJson:
|
||||
album['Rating'] = round(album['Rating'])
|
||||
|
||||
json_track_index = {
|
||||
"index": {"_index": ITunesParser.ELS_INDEX_NAME, "_type": "album", "_id": persistent_id}
|
||||
"index": {"_index": ITunesParser.ALBUM_INDEX, "_id": persistent_id}
|
||||
}
|
||||
|
||||
file_albums.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8'))
|
||||
@@ -287,7 +290,7 @@ class WriteElsJson:
|
||||
file = io.open(output_file, 'wb')
|
||||
for persistent_id, song in songs.items():
|
||||
json_track_index = {
|
||||
"index": {"_index": ITunesParser.ELS_INDEX_NAME, "_type": "song", "_id": persistent_id}
|
||||
"index": {"_index": ITunesParser.SONG_INDEX, "_id": persistent_id}
|
||||
}
|
||||
|
||||
file.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8'))
|
||||
|
||||
115
send_data.py
115
send_data.py
@@ -14,11 +14,17 @@ import requests
|
||||
DEFAULT_SONG_FILE = 'es-songs.json'
|
||||
DEFAULT_ALBUM_FILE = 'es-albums.json'
|
||||
DEFAULT_ARTIST_FILE = 'es-artists.json'
|
||||
DEFAULT_MAPPING_FILE = 'mapping.json'
|
||||
DEFAULT_MAPPING_SONGS_FILE = 'mapping.songs.json'
|
||||
DEFAULT_MAPPING_ARTISTS_FILE = 'mapping.artists.json'
|
||||
DEFAULT_MAPPING_ALBUMS_FILE = 'mapping.albums.json'
|
||||
|
||||
SONG_INDEX = 'itunes-songs'
|
||||
ALBUM_INDEX = 'itunes-albums'
|
||||
ARTIST_INDEX = 'itunes-artists'
|
||||
# TODO Put variables in a config files or in a python library
|
||||
|
||||
# Global values / set as default values
|
||||
ELASTICSEARCH_URL = 'http://localhost:9200/'
|
||||
INDEX_NAME = "itunessongs"
|
||||
# Why global variable ?
|
||||
# Because if I want to use a class to do post/put operation, I will use class or instance variables
|
||||
|
||||
@@ -26,9 +32,9 @@ def main():
|
||||
"""
|
||||
Main function
|
||||
"""
|
||||
global ELASTICSEARCH_URL, INDEX_NAME
|
||||
global ELASTICSEARCH_URL
|
||||
|
||||
args = create_parser().parse_args()
|
||||
args = create_args_parser().parse_args()
|
||||
|
||||
if not args.song and args.ALL:
|
||||
print(__file__ + ': error: argument -A/--ALL: not allowed with argument -s/--song')
|
||||
@@ -36,41 +42,37 @@ def main():
|
||||
|
||||
# Overloaded setting value
|
||||
ELASTICSEARCH_URL = args.elasticsearch_url
|
||||
INDEX_NAME = args.index_name # Used for deletion / creation
|
||||
# TODO Improvement: check first line of file(s) to get index
|
||||
|
||||
if not args.quiet:
|
||||
print("*** Settings values ***")
|
||||
print("Elasticsearch URL:\t" + ELASTICSEARCH_URL)
|
||||
print("Index name:\t\t" + INDEX_NAME)
|
||||
print()
|
||||
else:
|
||||
print("Processing...")
|
||||
|
||||
if args.DELETE:
|
||||
delete_index(INDEX_NAME, args.quiet)
|
||||
put_mapping(INDEX_NAME, args.mapping_file, args.quiet)
|
||||
print()
|
||||
#TODO Detect if index doesn't exist
|
||||
|
||||
#TODO Use log instead print
|
||||
|
||||
check_is_ok = []
|
||||
|
||||
# Send song data
|
||||
if args.song or args.ALL:
|
||||
if not args.song_file:
|
||||
try:
|
||||
song_file = open(DEFAULT_SONG_FILE, 'r')
|
||||
except FileNotFoundError as error:
|
||||
print(error)
|
||||
print("Default file not found.\nUse -sf argument, or -h for more help")
|
||||
sys.exit(2)
|
||||
else:
|
||||
song_file = args.song_file
|
||||
if args.DELETE:
|
||||
mapping_song = load_file(args.mapping_song, DEFAULT_MAPPING_SONGS_FILE)
|
||||
if not args.quiet:
|
||||
print("Mapping of song index file: '{}'".format(mapping_song.name))
|
||||
|
||||
delete_index(SONG_INDEX, args.quiet)
|
||||
put_mapping(SONG_INDEX, mapping_song, args.quiet)
|
||||
|
||||
song_file = load_file(args.song_file, DEFAULT_SONG_FILE)
|
||||
if not args.quiet:
|
||||
print("Song file: '{}'".format(song_file.name))
|
||||
|
||||
send_data(song_file, args.quiet)
|
||||
check_all_data_is_saved(song_file, args.quiet)
|
||||
check = check_all_data_is_saved(song_file, SONG_INDEX, args.quiet)
|
||||
check_is_ok.append(check)
|
||||
# ? Improvment: allow to stop script if all data not sent?
|
||||
if not args.quiet:
|
||||
print()
|
||||
@@ -82,6 +84,14 @@ def main():
|
||||
|
||||
# Send artist data
|
||||
if args.artist_file or args.ALL:
|
||||
if args.DELETE:
|
||||
mapping_artist = load_file(args.mapping_artist, DEFAULT_MAPPING_ARTISTS_FILE)
|
||||
if not args.quiet:
|
||||
print("Mapping of artist index file: '{}'".format(mapping_artist.name))
|
||||
|
||||
delete_index(ARTIST_INDEX, args.quiet)
|
||||
put_mapping(ARTIST_INDEX, mapping_artist, args.quiet)
|
||||
|
||||
artist_file = args.artist_file
|
||||
if not artist_file:
|
||||
if not args.quiet:
|
||||
@@ -92,13 +102,22 @@ def main():
|
||||
print("Artist file: '{}'".format(artist_file.name))
|
||||
|
||||
send_data(artist_file, args.quiet)
|
||||
check_all_data_is_saved(artist_file, args.quiet)
|
||||
check = check_all_data_is_saved(artist_file, ARTIST_INDEX, args.quiet)
|
||||
check_is_ok.append(check)
|
||||
if not args.quiet:
|
||||
print()
|
||||
else:
|
||||
print('Artist sent')
|
||||
|
||||
if args.album_file or args.ALL:
|
||||
if args.DELETE:
|
||||
mapping_album = load_file(args.mapping_album, DEFAULT_MAPPING_ALBUMS_FILE)
|
||||
if not args.quiet:
|
||||
print("Mapping of artist index file: '{}'".format(mapping_album.name))
|
||||
|
||||
delete_index(ALBUM_INDEX, args.quiet)
|
||||
put_mapping(ALBUM_INDEX, mapping_album, args.quiet)
|
||||
|
||||
album_file = args.album_file
|
||||
if not album_file:
|
||||
if not args.quiet:
|
||||
@@ -108,19 +127,39 @@ def main():
|
||||
if not args.quiet:
|
||||
print("Take file '{}' to send song data".format(album_file.name))
|
||||
send_data(album_file, args.quiet)
|
||||
check_all_data_is_saved(album_file, args.quiet)
|
||||
check = check_all_data_is_saved(album_file, ALBUM_INDEX, args.quiet)
|
||||
check_is_ok.append(check)
|
||||
if not args.quiet:
|
||||
print()
|
||||
else:
|
||||
print('Album sent')
|
||||
|
||||
print("I'm done!")
|
||||
if check_is_ok.count(False) > 0:
|
||||
print('Some problems occurs')
|
||||
sys.exit(check_is_ok.count(False))
|
||||
|
||||
def create_parser():
|
||||
def load_file(args_file, default_file):
|
||||
"""
|
||||
If args file in None, open default file
|
||||
"""
|
||||
if not args_file:
|
||||
try:
|
||||
final_file = open(default_file, 'r')
|
||||
except FileNotFoundError as error:
|
||||
print(error)
|
||||
print("Default file not found.\nUse -sf argument, or -h for more help")
|
||||
sys.exit(2)
|
||||
else:
|
||||
final_file = args_file
|
||||
return final_file
|
||||
|
||||
def create_args_parser():
|
||||
"""
|
||||
Create parser with all options, default values, etc.
|
||||
Return the parser ready to parse args
|
||||
"""
|
||||
# TODO rewrit description with multi-index phylosophie
|
||||
parser = argparse.ArgumentParser(
|
||||
description='''
|
||||
Send JSON files formated for bulk Elasticsearch operation to an Elasticsearch.
|
||||
@@ -159,17 +198,19 @@ def create_parser():
|
||||
See -idx argument to set index name.
|
||||
See -map arguement to set mapping file.''')
|
||||
|
||||
# Mapping
|
||||
mapping_group = parser.add_argument_group('Mapping files')
|
||||
mode_group.add_argument('-ms', '--mapping-song', type=argparse.FileType('r'), const=DEFAULT_MAPPING_SONGS_FILE, nargs='?',
|
||||
help='Mapping file for songs (default: \'{}\')'.format(DEFAULT_MAPPING_SONGS_FILE))
|
||||
mode_group.add_argument('-mr', '--mapping-artist', type=argparse.FileType('r'), const=DEFAULT_ARTIST_FILE, nargs='?',
|
||||
help='Mapping file for artists (default: \'{}\')'.format(DEFAULT_MAPPING_ARTISTS_FILE))
|
||||
mode_group.add_argument('-ml', '--mapping-album', type=argparse.FileType('r'), const=DEFAULT_MAPPING_ALBUMS_FILE, nargs='?',
|
||||
help='Mapping file for albums (default: \'{}\')'.format(DEFAULT_MAPPING_ALBUMS_FILE))
|
||||
|
||||
# Global Settings
|
||||
g_settings_group = parser.add_argument_group('Global Settings')
|
||||
g_settings_group.add_argument('-els', '--elasticsearch-url', default=ELASTICSEARCH_URL, nargs='?',
|
||||
help="Elasticsearch URL (default: \'{}\')".format(ELASTICSEARCH_URL))
|
||||
g_settings_group.add_argument('-idx', '--index-name', default=INDEX_NAME, nargs='?',
|
||||
help="""Index name in Elasticsearch (default: \'{}\').
|
||||
Used when creating the index: if it does not exist or after deletion.
|
||||
When sending data, the index name is specified in JSON files.""".format(INDEX_NAME))
|
||||
g_settings_group.add_argument('-map', '--mapping-file', type=argparse.FileType('r'), default=DEFAULT_MAPPING_FILE, nargs='?',
|
||||
help='If deleting index or if index does not exist, mapping file to use (default: \'{}\')'
|
||||
.format(DEFAULT_MAPPING_FILE))
|
||||
|
||||
return parser
|
||||
|
||||
@@ -198,7 +239,7 @@ def delete_index(index_name, quiet=False):
|
||||
"""
|
||||
if not quiet:
|
||||
print('Deleting index \'{}\'...'.format(index_name))
|
||||
res = requests.delete(url=ELASTICSEARCH_URL + INDEX_NAME)
|
||||
res = requests.delete(url=ELASTICSEARCH_URL + index_name)
|
||||
if res.status_code == 200:
|
||||
if not quiet:
|
||||
print("Deleted!")
|
||||
@@ -228,7 +269,7 @@ def put_mapping(index_name, mapping_file, quiet=False):
|
||||
|
||||
put_setting(index_name, 0, quiet)
|
||||
|
||||
def check_all_data_is_saved(data_file, quiet=False):
|
||||
def check_all_data_is_saved(data_file, index_name, quiet=False):
|
||||
"""
|
||||
Check if found same number of documents in ELS as number of line in file.
|
||||
Detect type of data to be searched in ELS.
|
||||
@@ -241,15 +282,13 @@ def check_all_data_is_saved(data_file, quiet=False):
|
||||
data_file.seek(0)
|
||||
lines = data_file.readlines()
|
||||
file_nb_line = int(len(lines) / 2)
|
||||
extract = json.loads(lines[1])
|
||||
data_type = extract['type']
|
||||
|
||||
if not quiet:
|
||||
print("\tFound: {} lines in '{}' file".format(file_nb_line, data_file.name))
|
||||
|
||||
payload = {"track_total_hits": "true", "query": {"constant_score": {"filter": {"term": {"type": data_type}}}}}
|
||||
payload = {"track_total_hits": "true"}
|
||||
|
||||
res = requests.get(url=ELASTICSEARCH_URL + INDEX_NAME + '/_search?size=0',
|
||||
res = requests.get(url=ELASTICSEARCH_URL + index_name + '/_search?size=0',
|
||||
data=json.dumps(payload),
|
||||
headers={'Content-Type': 'application/x-ndjson'})
|
||||
if res.status_code != 200:
|
||||
@@ -259,7 +298,7 @@ def check_all_data_is_saved(data_file, quiet=False):
|
||||
els_nb_doc = res.json()['hits']['total']['value']
|
||||
|
||||
if not quiet:
|
||||
print("\tFound: {} documents with '{}' type in ELS".format(els_nb_doc, data_type))
|
||||
print("\tFound: {} documents in index '{}' in ELS".format(els_nb_doc, index_name))
|
||||
|
||||
if file_nb_line != els_nb_doc:
|
||||
print('Look out! Not all the data has been found in ELS')
|
||||
|
||||
Reference in New Issue
Block a user