(send_data) POC: ensure all documents are in ELS

Create a script to check ID's to help find problems
This commit is contained in:
2020-04-02 01:51:37 +02:00
parent 1728b2a922
commit fc35397883
2 changed files with 53 additions and 0 deletions

View File

@@ -7,6 +7,8 @@
import sys
import argparse
import requests
import json
import time
def send_data(file, quiet=False):
"""
@@ -27,6 +29,8 @@ def send_data(file, quiet=False):
if not quiet:
print("File '{} sended to Elasticsearch!".format(file.name))
check_all_data_is_saved(file)
def delete_index(index_name, quiet=False):
"""
@@ -61,6 +65,25 @@ def put_mapping(index_name, mapping_file, quiet=False):
if not quiet:
print("File '{} sended to Elasticsearch!".format(mapping_file.name))
def check_all_data_is_saved(file):
time.sleep(2)
with open(file.name, 'r') as file:
lines = file.readlines()
file_nb_line = len(lines) / 2
extract = json.loads(lines[1])
type = extract['type']
payload = "{\"track_total_hits\": true,\"query\": {\"constant_score\": {\"filter\": {\"term\": {\"type\": \""+ type + "\"}}}}}"
res = requests.get(url=ELASTICSEARCH_URL + INDEX_NAME + '/_search?size=0',
data=payload,
headers={'Content-Type': 'application/x-ndjson'})
element_in_els = res.json()['hits']['total']['value']
print(element_in_els)
print(str(int(file_nb_line)))
#### main block ####
@@ -157,3 +180,7 @@ if __name__ == '__main__':
if not args.quiet:
print("Take file '{}' to send song data".format(album_file.name))
send_data(album_file, args.quiet)
check_all_data_is_saved(artist_file)
check_all_data_is_saved(song_file)
check_all_data_is_saved(album_file)