diff --git a/check_id.py b/check_id.py new file mode 100644 index 0000000..dec68e4 --- /dev/null +++ b/check_id.py @@ -0,0 +1,26 @@ +import json + +files = ['es-songs.json', 'es-artists.json', 'es-albums.json'] + +ids = [] +bad_lines = {} + +for file in files: + with open(file) as fp: + line = fp.readline() + + while line: + content = json.loads(line) + if 'index' in content: + id = content['index']['_id'] + if id in ids: + bad_lines[id] = content + else: + ids.append(id) + line = fp.readline() + +if not bad_lines: + print("No duplicate ID's found, everything's fine!!") +else: + print('KO') + print(bad_lines) diff --git a/send_data.py b/send_data.py index 6e03cf0..2c6d61f 100644 --- a/send_data.py +++ b/send_data.py @@ -7,6 +7,8 @@ import sys import argparse import requests +import json +import time def send_data(file, quiet=False): """ @@ -27,6 +29,8 @@ def send_data(file, quiet=False): if not quiet: print("File '{} sended to Elasticsearch!".format(file.name)) + check_all_data_is_saved(file) + def delete_index(index_name, quiet=False): """ @@ -61,6 +65,25 @@ def put_mapping(index_name, mapping_file, quiet=False): if not quiet: print("File '{} sended to Elasticsearch!".format(mapping_file.name)) +def check_all_data_is_saved(file): + time.sleep(2) + with open(file.name, 'r') as file: + lines = file.readlines() + file_nb_line = len(lines) / 2 + extract = json.loads(lines[1]) + type = extract['type'] + + payload = "{\"track_total_hits\": true,\"query\": {\"constant_score\": {\"filter\": {\"term\": {\"type\": \""+ type + "\"}}}}}" + + res = requests.get(url=ELASTICSEARCH_URL + INDEX_NAME + '/_search?size=0', + data=payload, + headers={'Content-Type': 'application/x-ndjson'}) + + element_in_els = res.json()['hits']['total']['value'] + + print(element_in_els) + print(str(int(file_nb_line))) + #### main block #### @@ -157,3 +180,7 @@ if __name__ == '__main__': if not args.quiet: print("Take file '{}' to send song data".format(album_file.name)) send_data(album_file, args.quiet) + + check_all_data_is_saved(artist_file) + check_all_data_is_saved(song_file) + check_all_data_is_saved(album_file)