choose json style + simplificate write json

Refactoring dict construction
Don't take so much more time...
This commit is contained in:
2025-10-22 14:19:47 +02:00
parent ce680beeb3
commit e71346e8f3
3 changed files with 85 additions and 96 deletions

7
.gitignore vendored
View File

@@ -1,8 +1,8 @@
node_modules/ node_modules/
.vscode/ .vscode/
/es-albums.json /es-albums.*
/es-artists.json /es-artists.*
/es-songs.json /es-songs.*
/iTunesLibrary.xml /iTunesLibrary.xml
# Working files # Working files
@@ -17,4 +17,3 @@ sand_box.py
rating_test.py rating_test.py
iTunesGraphParser.my.py iTunesGraphParser.my.py
iTunes Library.xml iTunes Library.xml
*.jsonl

View File

@@ -1,6 +1,6 @@
parse: parse:
rm -v es-* -rm -v es-*
python3 iTunesParser.py -f iTunes\ Library.xml python3 iTunesParser.py -f iTunes\ Library.xml -F json
send-all: send-all:
just send songs just send songs

View File

@@ -130,13 +130,13 @@ class ITunesParser:
return return
akey = track["Album Artist"] if "Album Artist" in track else track["Artist"] akey = track["Album Artist"] if "Album Artist" in track else track["Artist"]
# Add artist persistent_id = self.calc_id(akey)
if akey not in self._artists:
a_id = self.calc_id(akey) if persistent_id not in self._artists:
# Key is used to increment/precise some information # Key is used to increment/precise some information
# So we use artist name as a key to avoid calculating an ID for each track # So we use artist name as a key to avoid calculating an ID for each track
self._artists[akey] = { self._artists[persistent_id] = {
"Persistent ID": a_id, "Persistent ID": persistent_id,
"Name": akey, "Name": akey,
"Artist": akey, "Artist": akey,
"Track Count": 0, "Track Count": 0,
@@ -151,36 +151,38 @@ class ITunesParser:
rating = track["Rating"] if "Rating" in track else 0 rating = track["Rating"] if "Rating" in track else 0
rating = self.calc_average( rating = self.calc_average(
rating, self._artists[akey]["Rating"], self._artists[akey]["Track Count"] rating,
self._artists[persistent_id]["Rating"],
self._artists[persistent_id]["Track Count"],
) )
self._artists[akey]["Track Count"] += 1 self._artists[persistent_id]["Track Count"] += 1
self._artists[akey]["Rating"] = rating self._artists[persistent_id]["Rating"] = rating
self._artists[akey]["Play Count"] += play_count self._artists[persistent_id]["Play Count"] += play_count
if "Genre" in track: if "Genre" in track:
# Split up the Genres # Split up the Genres
genre_parts = track["Genre"].split("/") genre_parts = track["Genre"].split("/")
self._artists[akey]["Genre"] |= set(genre_parts) self._artists[persistent_id]["Genre"] |= set(genre_parts)
if "Album" in track: if "Album" in track:
self._artists[akey]["Album"].add(track["Album"]) self._artists[persistent_id]["Album"].add(track["Album"])
def _process_album(self, track): def _process_album(self, track):
""" """
Process albums in the track part of library and return a JSON formated for a bulk ELS request Process albums in the track part of library and return a JSON formated for a bulk ELS request
""" """
if "Album" not in track: if "Album" not in track:
return return
akey = track["Album"] akey = track["Album"]
if akey not in self._albums: persistent_id = self.calc_id(akey)
a_id = self.calc_id(akey)
if persistent_id not in self._albums:
# Key is used to increment/precise some information # Key is used to increment/precise some information
# So we use album name as a key to avoid calculating an ID for each track # So we use album name as a key to avoid calculating an ID for each track
self._albums[akey] = { self._albums[persistent_id] = {
"Persistent ID": a_id, "Persistent ID": persistent_id,
"Name": akey, "Name": akey,
"Album": akey, "Album": akey,
"Track Count": 0, "Track Count": 0,
@@ -201,36 +203,38 @@ class ITunesParser:
avg_bitrate = self.calc_average( avg_bitrate = self.calc_average(
track["Bit Rate"], track["Bit Rate"],
self._albums[akey]["Avg Bit Rate"], self._albums[persistent_id]["Avg Bit Rate"],
self._albums[akey]["Track Count"], self._albums[persistent_id]["Track Count"],
) )
self._albums[akey]["Avg Bit Rate"] = avg_bitrate self._albums[persistent_id]["Avg Bit Rate"] = avg_bitrate
self._albums[akey]["Track Count"] += 1 self._albums[persistent_id]["Track Count"] += 1
self._albums[akey]["Play Count"] += play_count self._albums[persistent_id]["Play Count"] += play_count
self._albums[akey]["Total Time"] += total_time self._albums[persistent_id]["Total Time"] += total_time
self._albums[akey]["Location"] = os.path.dirname(track["Location"]) self._albums[persistent_id]["Location"] = os.path.dirname(track["Location"])
if self._albums[akey]["Min Bit Rate"] > track["Bit Rate"]: if self._albums[persistent_id]["Min Bit Rate"] > track["Bit Rate"]:
self._albums[akey]["Min Bit Rate"] = track["Bit Rate"] self._albums[persistent_id]["Min Bit Rate"] = track["Bit Rate"]
if "Genre" in track: if "Genre" in track:
# Split up the Genres # Split up the Genres
genre_parts = track["Genre"].split("/") genre_parts = track["Genre"].split("/")
self._albums[akey]["Genre"] |= set(genre_parts) self._albums[persistent_id]["Genre"] |= set(genre_parts)
if "Artist" in track: if "Artist" in track:
self._albums[akey]["Artist"].add(track["Artist"]) self._albums[persistent_id]["Artist"].add(track["Artist"])
if "Album Rating" in track: if "Album Rating" in track:
self._albums[akey]["Album Rating"] = track["Album Rating"] self._albums[persistent_id]["Album Rating"] = track["Album Rating"]
if "Album Rating Computed" in track: if "Album Rating Computed" in track:
self._albums[akey]["Album Rating Computed"] = track["Album Rating Computed"] self._albums[persistent_id]["Album Rating Computed"] = track[
"Album Rating Computed"
]
if "Album Artist" in track: if "Album Artist" in track:
self._albums[akey]["Album Artist"] = track["Album Artist"] self._albums[persistent_id]["Album Artist"] = track["Album Artist"]
@classmethod @classmethod
def calc_average(cls, added_value, current_value, nb_values): def calc_average(cls, added_value, current_value, nb_values):
@@ -251,67 +255,42 @@ class ITunesParser:
class WriteElsJson: class WriteElsJson:
@staticmethod @staticmethod
def write_artists(artists, output_file): def write_elements(
""" elements: dict,
Write artists data to another JSON file o_name: str,
""" json_style: str,
els_index=ITunesParser.SONG_INDEX,
file_artist = io.open(output_file, "wb") els=False,
for _, artist in artists.items(): ):
persistent_id = artist["Persistent ID"]
artist["Rating"] = round(artist["Rating"])
json_track_index = {
"index": {"_index": ITunesParser.ARTIST_INDEX, "_id": persistent_id}
}
# file_artist.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8'))
# file_artist.write(bytes("\n", 'UTF-8'))
file_artist.write(
bytes(json.dumps(artist, indent=None, cls=SetEncoder), "UTF-8")
)
file_artist.write(bytes("\n", "UTF-8"))
file_artist.close()
@staticmethod
def write_albums(albums, output_file):
"""
Write albums data to another JSON file
"""
file_albums = io.open(output_file, "wb")
for _, album in albums.items():
persistent_id = album["Persistent ID"]
album["Avg Bit Rate"] = round(album["Avg Bit Rate"])
json_track_index = {
"index": {"_index": ITunesParser.ALBUM_INDEX, "_id": persistent_id}
}
# file_albums.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8'))
# file_albums.write(bytes("\n", 'UTF-8'))
file_albums.write(
bytes(json.dumps(album, indent=None, cls=SetEncoder), "UTF-8")
)
file_albums.write(bytes("\n", "UTF-8"))
file_albums.close()
@staticmethod
def write_songs(songs, output_file):
""" """
Write songs to a JSON Write songs to a JSON
""" """
file = io.open(output_file, "wb") output_filename = f"{o_name}.{json_style}"
for persistent_id, song in songs.items():
json_track_index = {
"index": {"_index": ITunesParser.SONG_INDEX, "_id": persistent_id}
}
with io.open(output_filename, "wb") as ofile:
if json_style == "json":
ofile.write(bytes("[\n", "UTF-8"))
for persistent_id, song in elements.items():
if els:
json_track_index = {
"index": {
"_index": els_index,
"_id": persistent_id,
}
}
# file.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8')) # file.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8'))
# file.write(bytes("\n", 'UTF-8')) # file.write(bytes("\n", 'UTF-8'))
file.write(bytes(json.dumps(song, indent=None, cls=SetEncoder), "UTF-8"))
file.write(bytes("\n", "UTF-8")) ofile.write(
file.close() bytes(json.dumps(song, indent=None, cls=SetEncoder), "UTF-8")
)
if json_style == "json":
ofile.write(
bytes(",", "UTF-8")
) # TODO Doesn't work -> last line...
ofile.write(bytes("\n", "UTF-8"))
if json_style == "json":
ofile.write(bytes("]\n", "UTF-8"))
#### main block #### #### main block ####
@@ -345,6 +324,13 @@ parser.add_argument(
parser.add_argument( parser.add_argument(
"-c", "--console", action="store_true", help="Output to console instead of file" "-c", "--console", action="store_true", help="Output to console instead of file"
) )
parser.add_argument(
"-F",
"--format",
choices=["json", "jsonl"],
default="json",
help="Choose JSON style",
)
# parser.add_argument('-v', '--verbose', action='store_true', # parser.add_argument('-v', '--verbose', action='store_true',
# help='Verbose output') # help='Verbose output')
@@ -355,9 +341,13 @@ if __name__ == "__main__":
itunes_parser = ITunesParser().parse(args.file) itunes_parser = ITunesParser().parse(args.file)
print("Writing JSON files...") print("Writing JSON files...")
WriteElsJson.write_songs(itunes_parser["songs"], "es-songs.jsonl") WriteElsJson.write_elements(itunes_parser["songs"], "es-songs", args.format)
WriteElsJson.write_artists(itunes_parser["artists"], "es-artists.jsonl") WriteElsJson.write_elements(
WriteElsJson.write_albums(itunes_parser["albums"], "es-albums.jsonl") itunes_parser["artists"], "es-artists", args.format, ITunesParser.ARTIST_INDEX
)
WriteElsJson.write_elements(
itunes_parser["albums"], "es-albums", args.format, ITunesParser.ARTIST_INDEX
)
print("Done!") print("Done!")