(send_data) POC: ensure all documents are in ELS
Create a script to check ID's to help find problems
This commit is contained in:
26
check_id.py
Normal file
26
check_id.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
files = ['es-songs.json', 'es-artists.json', 'es-albums.json']
|
||||||
|
|
||||||
|
ids = []
|
||||||
|
bad_lines = {}
|
||||||
|
|
||||||
|
for file in files:
|
||||||
|
with open(file) as fp:
|
||||||
|
line = fp.readline()
|
||||||
|
|
||||||
|
while line:
|
||||||
|
content = json.loads(line)
|
||||||
|
if 'index' in content:
|
||||||
|
id = content['index']['_id']
|
||||||
|
if id in ids:
|
||||||
|
bad_lines[id] = content
|
||||||
|
else:
|
||||||
|
ids.append(id)
|
||||||
|
line = fp.readline()
|
||||||
|
|
||||||
|
if not bad_lines:
|
||||||
|
print("No duplicate ID's found, everything's fine!!")
|
||||||
|
else:
|
||||||
|
print('KO')
|
||||||
|
print(bad_lines)
|
||||||
27
send_data.py
27
send_data.py
@@ -7,6 +7,8 @@
|
|||||||
import sys
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
import requests
|
import requests
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
|
||||||
def send_data(file, quiet=False):
|
def send_data(file, quiet=False):
|
||||||
"""
|
"""
|
||||||
@@ -27,6 +29,8 @@ def send_data(file, quiet=False):
|
|||||||
if not quiet:
|
if not quiet:
|
||||||
print("File '{} sended to Elasticsearch!".format(file.name))
|
print("File '{} sended to Elasticsearch!".format(file.name))
|
||||||
|
|
||||||
|
check_all_data_is_saved(file)
|
||||||
|
|
||||||
|
|
||||||
def delete_index(index_name, quiet=False):
|
def delete_index(index_name, quiet=False):
|
||||||
"""
|
"""
|
||||||
@@ -61,6 +65,25 @@ def put_mapping(index_name, mapping_file, quiet=False):
|
|||||||
if not quiet:
|
if not quiet:
|
||||||
print("File '{} sended to Elasticsearch!".format(mapping_file.name))
|
print("File '{} sended to Elasticsearch!".format(mapping_file.name))
|
||||||
|
|
||||||
|
def check_all_data_is_saved(file):
|
||||||
|
time.sleep(2)
|
||||||
|
with open(file.name, 'r') as file:
|
||||||
|
lines = file.readlines()
|
||||||
|
file_nb_line = len(lines) / 2
|
||||||
|
extract = json.loads(lines[1])
|
||||||
|
type = extract['type']
|
||||||
|
|
||||||
|
payload = "{\"track_total_hits\": true,\"query\": {\"constant_score\": {\"filter\": {\"term\": {\"type\": \""+ type + "\"}}}}}"
|
||||||
|
|
||||||
|
res = requests.get(url=ELASTICSEARCH_URL + INDEX_NAME + '/_search?size=0',
|
||||||
|
data=payload,
|
||||||
|
headers={'Content-Type': 'application/x-ndjson'})
|
||||||
|
|
||||||
|
element_in_els = res.json()['hits']['total']['value']
|
||||||
|
|
||||||
|
print(element_in_els)
|
||||||
|
print(str(int(file_nb_line)))
|
||||||
|
|
||||||
|
|
||||||
#### main block ####
|
#### main block ####
|
||||||
|
|
||||||
@@ -157,3 +180,7 @@ if __name__ == '__main__':
|
|||||||
if not args.quiet:
|
if not args.quiet:
|
||||||
print("Take file '{}' to send song data".format(album_file.name))
|
print("Take file '{}' to send song data".format(album_file.name))
|
||||||
send_data(album_file, args.quiet)
|
send_data(album_file, args.quiet)
|
||||||
|
|
||||||
|
check_all_data_is_saved(artist_file)
|
||||||
|
check_all_data_is_saved(song_file)
|
||||||
|
check_all_data_is_saved(album_file)
|
||||||
|
|||||||
Reference in New Issue
Block a user