Separate parser and writer

2017-04-21 12:42:23 +02:00
parent 92982fb042
commit 06af68bf5a
1 changed files with 119 additions and 78 deletions
--- a/iTunesParser.py
+++ b/iTunesParser.py
@@ -66,42 +66,37 @@ class ITunesParser:
    """
        Parse an iTunes Library and produce JSON - for ELS
    """
-    def __init__(self, library_file):
+    def __init__(self):
+        self._tracks = {}
        self._albums = {}
        self._artists = {}
-        self.library_file = library_file

-    def to_json(self):
+    def _read_tracks(self, library_file):
        """
-            Just do processSong()
-            or do process_songs, then _write_artists and _write_albums.
-            Note: process_songs do a process_artists and process_albums...
-            This method suck.
+            Read library file and return Tracks key of dict.
+            Dict may contains
+                - Major Version
+                - Minor Version
+                - Date
+                - Application Version
+                - Features
+                - Show Content Ratings
+                - Music Folder
+                - Library Persistent ID
+                - Tracks
+                - ...
        """
-        ret = self._process_songs()
-
-        self._write_artists()
-        self._write_albums()
-
-        # return json.dumps(jsonObj, indent=indent, cls=SetEncoder)
-        return ret
-
-    def _read_tracks(self):
-        """
-            Read library and return Tracks key of dict
-        """
-        plist = plistlib.load(open(self.library_file, 'rb'))
+        plist = plistlib.load(open(library_file, 'rb'))
        return plist['Tracks']

-    def _process_songs(self):
+    def parse(self, library_file):
        """
            Return an output JSON for an ELS Bulk request - Not a correct format
            This method call process_album & process_artist
            TODO Just return a _correct_ JSON and treat in another place/class
        """

-        tracks = self._read_tracks()
-        ret = ""
+        tracks = self._read_tracks(library_file)

        for _, track in tracks.items():
            # Filter out any non-music
@@ -110,20 +105,19 @@ class ITunesParser:
            if 'Podcast' in track or 'Has Video' in track:
                continue

-            persistent_id = track['Persistent ID']
-            json_track_index = {
-                "index": {"_index": "itunessongs", "_type": "song", "_id": persistent_id}
-            }
+            # Each keeped track are stored
+            self._tracks[track['Persistent ID']] = track

            # Retrieve for each track artist information
            self._process_artist(track)
            # Retrieve for each track album information
            self._process_album(track)

-            ret += json.dumps(json_track_index, indent=None, cls=SetEncoder)
-            ret += "\n"
-            ret += json.dumps(track, indent=None, cls=SetEncoder)
-            ret += "\n"
+        ret = {
+            'songs': self._tracks,
+            'albums': self._albums,
+            'artists': self._artists
+        }
        return ret

    def _process_artist(self, track):
@@ -138,6 +132,8 @@ class ITunesParser:
        # Add artist
        if akey not in self._artists:
            a_id = self.calc_id(akey)
+            # Key is used to increment/precise some information
+            # So we use artist name as a key to avoid calculating an ID for each track
            self._artists[akey] = {
                'Persistent ID': a_id,
                'Name': akey,
@@ -173,6 +169,8 @@ class ITunesParser:
        akey = track['Album']
        if akey not in self._albums:
            a_id = self.calc_id(akey)
+            # Key is used to increment/precise some information
+            # So we use album name as a key to avoid calculating an ID for each track
            self._albums[akey] = {
                'Persistent ID': a_id,
                'Name': akey,
@@ -205,42 +203,6 @@ class ITunesParser:
            self._albums[akey]['Album Rating'] = track['Album Rating']
            self._albums[akey]['Album Rating Computed'] = True

-    def _write_artists(self):
-        """
-            Write artists data to another JSON file
-        """
-
-        file_artist = io.open('es-artist-data.json', 'wb')
-        for artist in self._artists:
-            persistent_id = self._artists[artist]['Persistent ID']
-            self._artists[artist]['Rating'] = round(self._artists[artist]['Rating'])
-            json_track_index = {
-                "index": {"_index": "itunessongs", "_type": "artist", "_id": persistent_id}
-            }
-            file_artist.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8'))
-            file_artist.write(bytes("\n", 'UTF-8'))
-            file_artist.write(bytes(json.dumps(self._artists[artist], indent=None, cls=SetEncoder), 'UTF-8'))
-            file_artist.write(bytes("\n", 'UTF-8'))
-        file_artist.close()
-
-    def _write_albums(self):
-        """
-            Write albums data to another JSON file
-        """
-
-        file_albums = io.open('es-albums-data.json', 'wb')
-        for album in self._albums:
-            persistent_id = self._albums[album]['Persistent ID']
-            self._albums[album]['Rating'] = round(self._albums[album]['Rating'])
-            json_track_index = {
-                "index": {"_index": "itunessongs", "_type": "album", "_id": persistent_id}
-            }
-            file_albums.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8'))
-            file_albums.write(bytes("\n", 'UTF-8'))
-            file_albums.write(bytes(json.dumps(self._albums[album], indent=None, cls=SetEncoder), 'UTF-8'))
-            file_albums.write(bytes("\n", 'UTF-8'))
-        file_albums.close()
-
    @classmethod
    def calc_rating(cls, added_value, current_rating, count):
        """
@@ -257,6 +219,82 @@ class ITunesParser:
        md5.update(key.encode('UTF-8'))
        return md5.hexdigest()

+
+class WriteElsJson:
+    def to_json(self):
+        """
+            Just do processSong()
+            or do process_songs, then _write_artists and _write_albums.
+            Note: process_songs do a process_artists and process_albums...
+            This method suck.
+        """
+        ret = self._process_songs()
+
+        self._write_artists()
+        self._write_albums()
+
+        # return json.dumps(jsonObj, indent=indent, cls=SetEncoder)
+        return ret
+
+    @staticmethod
+    def write_artists(artists, output_file):
+        """
+            Write artists data to another JSON file
+        """
+
+        file_artist = io.open(output_file, 'wb')
+        for _, artist in artists.items():
+            persistent_id = artist['Persistent ID']
+            artist['Rating'] = round(artist['Rating'])
+
+            json_track_index = {
+                "index": {"_index": "itunessongs", "_type": "artist", "_id": persistent_id}
+            }
+
+            file_artist.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8'))
+            file_artist.write(bytes("\n", 'UTF-8'))
+            file_artist.write(bytes(json.dumps(artist, indent=None, cls=SetEncoder), 'UTF-8'))
+            file_artist.write(bytes("\n", 'UTF-8'))
+        file_artist.close()
+
+    @staticmethod
+    def write_albums(albums, output_file):
+        """
+            Write albums data to another JSON file
+        """
+
+        file_albums = io.open(output_file, 'wb')
+        for _, album in albums.items():
+            persistent_id = album['Persistent ID']
+            album['Rating'] = round(album['Rating'])
+
+            json_track_index = {
+                "index": {"_index": "itunessongs", "_type": "album", "_id": persistent_id}
+            }
+
+            file_albums.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8'))
+            file_albums.write(bytes("\n", 'UTF-8'))
+            file_albums.write(bytes(json.dumps(album, indent=None, cls=SetEncoder), 'UTF-8'))
+            file_albums.write(bytes("\n", 'UTF-8'))
+        file_albums.close()
+
+    @staticmethod
+    def write_songs(songs, output_file):
+        """
+            Write songs to a JSON
+        """
+        file = io.open(output_file, 'wb')
+        for persistent_id, song in songs.items():
+            json_track_index = {
+                "index": {"_index": "itunessongs", "_type": "album", "_id": persistent_id}
+            }
+
+            file.write(bytes(json.dumps(json_track_index, indent=None, cls=SetEncoder), 'UTF-8'))
+            file.write(bytes("\n", 'UTF-8'))
+            file.write(bytes(json.dumps(song, indent=None, cls=SetEncoder), 'UTF-8'))
+            file.write(bytes("\n", 'UTF-8'))
+        file.close()
+
 #### main block ####

 # Default input & output files
@@ -279,15 +317,18 @@ parser.add_argument('-c', '--console', action='store_true',
 if __name__ == '__main__':
    args = parser.parse_args()

-    itunes_parser = ITunesParser(args.file)
-    output = itunes_parser.to_json()
+    itunes_parser = ITunesParser().parse(args.file)

-    if args.console:
-        print(output)
-    else:
-        with io.open(args.output, 'wb') as outfile:
-            if sys.version_info.major == 2:
-                outfile.write(bytes(output))
-            elif sys.version_info.major == 3:
-                outfile.write(bytes(output, 'UTF-8'))
-        print('JSON data written to: ' + args.output)
+    WriteElsJson.write_songs(itunes_parser['songs'], "es-songs.json")
+    WriteElsJson.write_artists(itunes_parser['artists'], "es-artists.json")
+    WriteElsJson.write_albums(itunes_parser['albums'], "es-albums.json")
+
+    # if args.console:
+    #     print(output)
+    # else:
+    #     with io.open(args.output, 'wb') as outfile:
+    #         if sys.version_info.major == 2:
+    #             outfile.write(bytes(output))
+    #         elif sys.version_info.major == 3:
+    #             outfile.write(bytes(output, 'UTF-8'))
+    #     print('JSON data written to: ' + args.output)