Files
iTunes/send_data.py
Maxence G. de Montauzan fc35397883 (send_data) POC: ensure all documents are in ELS
Create a script to check ID's to help find problems
2020-04-10 23:29:55 +02:00

187 lines
7.0 KiB
Python

#!/usr/bin/env python
"""
Send JSON files to ELS
"""
import sys
import argparse
import requests
import json
import time
def send_data(file, quiet=False):
"""
Send a data bulk file to ELS.
'file' should be (readable) file object.
"""
if not quiet:
print("Sending '{}' data file...".format(file.name))
res = requests.post(url=ELASTICSEARCH_URL + '_bulk',
data=file,
headers={'Content-Type': 'application/x-ndjson'})
if res.status_code != 200:
print("An error occured")
print(res.text)
else:
if not quiet:
print("File '{} sended to Elasticsearch!".format(file.name))
check_all_data_is_saved(file)
def delete_index(index_name, quiet=False):
"""
Delete an index in ELS
"""
if not quiet:
print('Deleting index \'{}\'...'.format(index_name))
res = requests.delete(url=ELASTICSEARCH_URL + INDEX_NAME)
if res.status_code == 200:
if not quiet:
print("Deleted!")
else:
print("An error occured")
if res.json()['error']['type'] == 'index_not_found_exception':
print("Index '{}' doesn't exist and can't be deleted".format(index_name))
else:
print(res.text)
def put_mapping(index_name, mapping_file, quiet=False):
"""
Send a mapping file for an index to ELS.
"""
if not quiet:
print("Put '{}' mapping file...".format(mapping_file.name))
res = requests.put(url=ELASTICSEARCH_URL + index_name,
data=mapping_file,
headers={'Content-Type': 'application/json'})
if res.status_code != 200:
print("An error occured")
print(res.text)
else:
if not quiet:
print("File '{} sended to Elasticsearch!".format(mapping_file.name))
def check_all_data_is_saved(file):
time.sleep(2)
with open(file.name, 'r') as file:
lines = file.readlines()
file_nb_line = len(lines) / 2
extract = json.loads(lines[1])
type = extract['type']
payload = "{\"track_total_hits\": true,\"query\": {\"constant_score\": {\"filter\": {\"term\": {\"type\": \""+ type + "\"}}}}}"
res = requests.get(url=ELASTICSEARCH_URL + INDEX_NAME + '/_search?size=0',
data=payload,
headers={'Content-Type': 'application/x-ndjson'})
element_in_els = res.json()['hits']['total']['value']
print(element_in_els)
print(str(int(file_nb_line)))
#### main block ####
# Settings var (can be overloaded)
ELASTICSEARCH_URL = 'http://localhost:9200/'
INDEX_NAME = "itunessongs"
# Default file names
DEFAULT_SONG_FILE = 'es-songs.json'
DEFAULT_ALBUM_FILE = 'es-albums.json'
DEFAULT_ARTIST_FILE = 'es-artists.json'
DEFAULT_MAPPING_FILE = 'mapping.json'
# Get options
parser = argparse.ArgumentParser(
description='''
Send JSON files formated for bulk Elasticsearch operation to an Elasticsearch.
By default: send song data enable, send album & artist data disabled.
'''
)
# Bulk
parser.add_argument('-q', '--quiet', action='store_true',
help="Disable main output")
# Choose what to enable for send and files
sending_group = parser.add_argument_group("Sending options")
song_group = sending_group.add_mutually_exclusive_group()
song_group.add_argument('-s', '--song', action='store_false',
help='Disable send song data')
song_group.add_argument('-sf', '--song-file', type=argparse.FileType('r'),
help='Song file data to send (default: \'{}\').'.format(DEFAULT_SONG_FILE))
sending_group.add_argument('-al', '--album-file', nargs='?', type=argparse.FileType('r'), const=DEFAULT_ALBUM_FILE,
help='Enable send album data. Optionally, precise the album file (default: \'{}\')'.format(DEFAULT_ALBUM_FILE))
sending_group.add_argument('-ar', '--artist-file', nargs='?', type=argparse.FileType('r'), const=DEFAULT_ARTIST_FILE,
help='Enable send artist data. Optionally, precise the artist file (default: \'{}\')'.format(DEFAULT_ARTIST_FILE))
sending_group.add_argument('-m', '--mapping-file', type=argparse.FileType('r'), default=DEFAULT_MAPPING_FILE,
help='If deleting index, mapping file to send (default: \'{}\')'.format(DEFAULT_MAPPING_FILE))
# Mode
mode_group = parser.add_argument_group('Mode')
mode_group.add_argument('-A', '--ALL', action='store_true',
help='Send all possible data: song, artist and album')
mode_group.add_argument('-D', '--DELETE', action='store_true',
help='Delete old index (precise name with -idx argument). This will send a mapping to ELS.')
# Settings
g_settings_group = parser.add_argument_group('Global Settings')
g_settings_group.add_argument('-els', '--elasticsearch-url', default=ELASTICSEARCH_URL, nargs='?',
help="Elasticsearch URL to send data (default: \'{}\')".format(ELASTICSEARCH_URL))
g_settings_group.add_argument('-idx', '--index-name', default=INDEX_NAME,
help="Index name in Elasticsearch ONLY FOR DELETING! (default: \'{}\'). ".format(INDEX_NAME) +
"When sending data, index name is specified in JSON files.")
if __name__ == '__main__':
args = parser.parse_args()
# Overloaded setting value
INDEX_NAME = args.index_name
# TODO Critical: index in in the file!
ELASTICSEARCH_URL = args.elasticsearch_url
if not args.quiet:
print("*** Settings values ***")
print("Elasticsearch URL:\t" + ELASTICSEARCH_URL)
print("Index name:\t\t" + INDEX_NAME)
print("")
else:
print("Processing...")
if args.DELETE:
delete_index(INDEX_NAME, args.quiet)
put_mapping(INDEX_NAME, args.mapping_file, args.quiet)
if args.song or args.ALL:
# Retrieve default song file if not precised
if not args.song_file:
try:
song_file = open(DEFAULT_SONG_FILE, 'r')
except FileNotFoundError: # Theoretically, can occur only when default file not found
print("Error: can't open default music file: [Errno 2] No such file or directory: '{}'.".format(DEFAULT_SONG_FILE))
print("Use -sf argument, or -h for more help")
sys.exit(2)
else:
song_file = args.song_file
if not args.quiet:
print("Take file '{}' to send song data".format(song_file.name))
send_data(song_file, args.quiet)
if args.artist_file or args.ALL:
artist_file = args.artist_file
if not args.quiet:
print("Take file '{}' to send song data".format(artist_file.name))
send_data(artist_file, args.quiet)
if args.album_file or args.ALL:
album_file = args.album_file
if not args.quiet:
print("Take file '{}' to send song data".format(album_file.name))
send_data(album_file, args.quiet)
check_all_data_is_saved(artist_file)
check_all_data_is_saved(song_file)
check_all_data_is_saved(album_file)