(send_data) POC: ensure all documents are in ELS
Create a script to check ID's to help find problems
This commit is contained in:
26
check_id.py
Normal file
26
check_id.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import json
|
||||
|
||||
files = ['es-songs.json', 'es-artists.json', 'es-albums.json']
|
||||
|
||||
ids = []
|
||||
bad_lines = {}
|
||||
|
||||
for file in files:
|
||||
with open(file) as fp:
|
||||
line = fp.readline()
|
||||
|
||||
while line:
|
||||
content = json.loads(line)
|
||||
if 'index' in content:
|
||||
id = content['index']['_id']
|
||||
if id in ids:
|
||||
bad_lines[id] = content
|
||||
else:
|
||||
ids.append(id)
|
||||
line = fp.readline()
|
||||
|
||||
if not bad_lines:
|
||||
print("No duplicate ID's found, everything's fine!!")
|
||||
else:
|
||||
print('KO')
|
||||
print(bad_lines)
|
||||
Reference in New Issue
Block a user