(send_data) POC: ensure all documents are in ELS
Create a script to check ID's to help find problems
This commit is contained in:
26
check_id.py
Normal file
26
check_id.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import json
|
||||
|
||||
files = ['es-songs.json', 'es-artists.json', 'es-albums.json']
|
||||
|
||||
ids = []
|
||||
bad_lines = {}
|
||||
|
||||
for file in files:
|
||||
with open(file) as fp:
|
||||
line = fp.readline()
|
||||
|
||||
while line:
|
||||
content = json.loads(line)
|
||||
if 'index' in content:
|
||||
id = content['index']['_id']
|
||||
if id in ids:
|
||||
bad_lines[id] = content
|
||||
else:
|
||||
ids.append(id)
|
||||
line = fp.readline()
|
||||
|
||||
if not bad_lines:
|
||||
print("No duplicate ID's found, everything's fine!!")
|
||||
else:
|
||||
print('KO')
|
||||
print(bad_lines)
|
||||
27
send_data.py
27
send_data.py
@@ -7,6 +7,8 @@
|
||||
import sys
|
||||
import argparse
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
|
||||
def send_data(file, quiet=False):
|
||||
"""
|
||||
@@ -27,6 +29,8 @@ def send_data(file, quiet=False):
|
||||
if not quiet:
|
||||
print("File '{} sended to Elasticsearch!".format(file.name))
|
||||
|
||||
check_all_data_is_saved(file)
|
||||
|
||||
|
||||
def delete_index(index_name, quiet=False):
|
||||
"""
|
||||
@@ -61,6 +65,25 @@ def put_mapping(index_name, mapping_file, quiet=False):
|
||||
if not quiet:
|
||||
print("File '{} sended to Elasticsearch!".format(mapping_file.name))
|
||||
|
||||
def check_all_data_is_saved(file):
|
||||
time.sleep(2)
|
||||
with open(file.name, 'r') as file:
|
||||
lines = file.readlines()
|
||||
file_nb_line = len(lines) / 2
|
||||
extract = json.loads(lines[1])
|
||||
type = extract['type']
|
||||
|
||||
payload = "{\"track_total_hits\": true,\"query\": {\"constant_score\": {\"filter\": {\"term\": {\"type\": \""+ type + "\"}}}}}"
|
||||
|
||||
res = requests.get(url=ELASTICSEARCH_URL + INDEX_NAME + '/_search?size=0',
|
||||
data=payload,
|
||||
headers={'Content-Type': 'application/x-ndjson'})
|
||||
|
||||
element_in_els = res.json()['hits']['total']['value']
|
||||
|
||||
print(element_in_els)
|
||||
print(str(int(file_nb_line)))
|
||||
|
||||
|
||||
#### main block ####
|
||||
|
||||
@@ -157,3 +180,7 @@ if __name__ == '__main__':
|
||||
if not args.quiet:
|
||||
print("Take file '{}' to send song data".format(album_file.name))
|
||||
send_data(album_file, args.quiet)
|
||||
|
||||
check_all_data_is_saved(artist_file)
|
||||
check_all_data_is_saved(song_file)
|
||||
check_all_data_is_saved(album_file)
|
||||
|
||||
Reference in New Issue
Block a user