Process for album
Adds too many uninteresting results Eg. all albums for one artist => prevents finding interesting information
This commit is contained in:
@@ -78,9 +78,11 @@ def process_file(file_name: str, field_name: str, array_file: str = None) -> int
|
|||||||
try :
|
try :
|
||||||
input = get_tokens(data[field_name])
|
input = get_tokens(data[field_name])
|
||||||
|
|
||||||
if array_file and data[array_file]:
|
if array_file and array_file in data and data[array_file]:
|
||||||
for key in data[array_file]:
|
for key in data[array_file]:
|
||||||
input.extend(get_tokens(key))
|
if key != data[field_name]: # => Absolutely don't work for album, and block a scoring for artists
|
||||||
|
input.extend(get_tokens(key))
|
||||||
|
|
||||||
# TODO Input have the same value several times ==> use to process a score
|
# TODO Input have the same value several times ==> use to process a score
|
||||||
post_document(name=data[field_name], input=input, field_name=field_name.lower())
|
post_document(name=data[field_name], input=input, field_name=field_name.lower())
|
||||||
count += 1
|
count += 1
|
||||||
@@ -95,7 +97,7 @@ def process_file(file_name: str, field_name: str, array_file: str = None) -> int
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# Using readlines()
|
# Using readlines()
|
||||||
count = 0
|
count = 0
|
||||||
count += process_file('/home/budd/workspace/iTunes/es-albums.json', 'Album')
|
count += process_file('/home/budd/workspace/iTunes/es-albums.json', 'Album', 'Artist')
|
||||||
print('Created documents: ' + str(count))
|
print('Created documents: ' + str(count))
|
||||||
count += process_file('/home/budd/workspace/iTunes/es-artists.json', 'Artist', 'Album Artist')
|
count += process_file('/home/budd/workspace/iTunes/es-artists.json', 'Artist', 'Album Artist')
|
||||||
print('Created documents: ' + str(count))
|
print('Created documents: ' + str(count))
|
||||||
|
|||||||
Reference in New Issue
Block a user