(send_data) POC: ensure all documents are in ELS

Create a script to check ID's to help find problems
This commit is contained in:
2020-04-02 01:51:37 +02:00
parent 1728b2a922
commit fc35397883
2 changed files with 53 additions and 0 deletions

26
check_id.py Normal file
View File

@@ -0,0 +1,26 @@
import json
files = ['es-songs.json', 'es-artists.json', 'es-albums.json']
ids = []
bad_lines = {}
for file in files:
with open(file) as fp:
line = fp.readline()
while line:
content = json.loads(line)
if 'index' in content:
id = content['index']['_id']
if id in ids:
bad_lines[id] = content
else:
ids.append(id)
line = fp.readline()
if not bad_lines:
print("No duplicate ID's found, everything's fine!!")
else:
print('KO')
print(bad_lines)

View File

@@ -7,6 +7,8 @@
import sys import sys
import argparse import argparse
import requests import requests
import json
import time
def send_data(file, quiet=False): def send_data(file, quiet=False):
""" """
@@ -27,6 +29,8 @@ def send_data(file, quiet=False):
if not quiet: if not quiet:
print("File '{} sended to Elasticsearch!".format(file.name)) print("File '{} sended to Elasticsearch!".format(file.name))
check_all_data_is_saved(file)
def delete_index(index_name, quiet=False): def delete_index(index_name, quiet=False):
""" """
@@ -61,6 +65,25 @@ def put_mapping(index_name, mapping_file, quiet=False):
if not quiet: if not quiet:
print("File '{} sended to Elasticsearch!".format(mapping_file.name)) print("File '{} sended to Elasticsearch!".format(mapping_file.name))
def check_all_data_is_saved(file):
time.sleep(2)
with open(file.name, 'r') as file:
lines = file.readlines()
file_nb_line = len(lines) / 2
extract = json.loads(lines[1])
type = extract['type']
payload = "{\"track_total_hits\": true,\"query\": {\"constant_score\": {\"filter\": {\"term\": {\"type\": \""+ type + "\"}}}}}"
res = requests.get(url=ELASTICSEARCH_URL + INDEX_NAME + '/_search?size=0',
data=payload,
headers={'Content-Type': 'application/x-ndjson'})
element_in_els = res.json()['hits']['total']['value']
print(element_in_els)
print(str(int(file_nb_line)))
#### main block #### #### main block ####
@@ -157,3 +180,7 @@ if __name__ == '__main__':
if not args.quiet: if not args.quiet:
print("Take file '{}' to send song data".format(album_file.name)) print("Take file '{}' to send song data".format(album_file.name))
send_data(album_file, args.quiet) send_data(album_file, args.quiet)
check_all_data_is_saved(artist_file)
check_all_data_is_saved(song_file)
check_all_data_is_saved(album_file)