All pastes #322768 Raw Edit

Sans titre

public text v1 · immutable
#322768 ·published 2007-01-20 18:55 UTC
rendered paste body
""" ID3v2 Frames """

__author__ = "Alastair Tse <alastair@tse.id.au>"
__license__ = "BSD"
__copyright__ = "Copyright (c) 2004, Alastair Tse" 

__revision__ = "$Id: id3v2frame.py,v 1.3 2006/02/15 14:46:59 sylvinus Exp $"

from pytagger.constants import *
from pytagger.exceptions import *
from pytagger.utility import *
from pytagger.debug import *
from pytagger.encoding import *

from encodings import normalize_encoding

import struct, types, tempfile

class ID3v2BaseFrame:
    """ Base ID3v2 Frame for 2.2, 2.3 and 2.4

    Abstract class that defines basic functions that are common for
    2.2, 2.3 and 2.4.

    o_* functions means output_*, they output a bytestring encoding
    the given data

    x_* functions means extract_*, they extract data into accessible
    structures when given a suitable length bytestream

    @cvar header_length: header portion length
    @cvar supported: supported frame ids
    @cvar status_flags: status flags required
    @cvar format_flags: format flags required
    
    @ivar fid: frame id code
    @ivar rawdata: rawdata of the rest of the frame minus the header
    @ivar length: length of the frame in bytes
    @ivar flags: dictionary of flags for this frame

    @ivar encoding: optional - for text fields we have the encoding name
    @ivar strings: a list of strings for text fields
    
    @ivar shortcomment: set if this frame is a comment
    @ivar longcomment: set if this frame is a comment (optional)
    @ivar language: set if this frame is a comment (2 character code)
    
    
    @ivar mimetype: mimetype for GEOB, APIC
    @ivar filename: filename for GEOB
    @ivar obj: data for GEOB
    @ivar desc: for geob and URL
    @ivar url: for URL
    
    @ivar counter: for playcount (PCNT)
    """
    supported = {}
    header_length = 0
    status_flags = {}
    format_flags = {}
    
    fid = None
    rawdata = None
    length = 0
    flags = 0
    encoding = ''
    strings = []
    shortcomment = ''
    longcomment = ''
    language = ''
    mimetype = ''
    filename = ''
    obj = None
    desc = ''
    url = ''

    def __init__(self, frame=None, fid=None):
        """
        creates an ID3v2BaseFrame structure. If you specify frame,
        then it will go into parse mode. If you specify the fid,
        then it will create a new frame.

        @param frame: frame bytestring
        @param fid: frame id for creating a new frame
        """

        if fid and not frame and fid not in self.supported.keys():
            raise ID3ParameterException("Unsupported ID3v2 Field: %s" % fid)
        elif fid and not frame:
            self.fid = fid
            self.new_frame_header()
        elif frame:
            self.parse_frame_header(frame)
            self.parse_field()

    def parse_frame_header(self, frame):

        """
        Parse the frame header from a bytestring

        @param frame: bytestring of the frame
        @type frame: string

        @todo: apple's id3 tags doesn't seem to follow the unsync safe format
        """
        self.rawdata = ''
        self.length = 0     
        raise ID3NotImplementedException("parse_frame_header")

    def new_frame_header(self):
        """
        creates a new frame header
        """
        self.flags = {}
        for flagname, bit in self.status_flags + self.format_flags:
            self.flags[flagname] = 0
    
    def output(self):
        """
        Create a bytestring representing the frame contents
        and the field

        @todo: no syncsafing
        @todo: no status format flags used
        """
        raise ID3NotImplementedException("output")

    def parse_field(self):
        if self.fid not in self.supported.keys():
            raise ID3FrameException("Unsupported ID3v2 Field: %s" % self.fid)
        parser = self.supported[self.fid][0]
        eval('self.x_' + parser + '()')

    def output_field(self):
        print self.fid
        if self.fid not in self.supported.keys():
            raise ID3FrameException("Unsupported ID3v2 Field: %s" % self.fid)
        parser = self.supported[self.fid][0]
        return eval('self.o_' + parser + '()')

    def o_string(self, s, toenc, fromenc='latin_1'):
        """
        Converts a String or Unicode String to a byte string of specified encoding.

        @param toenc: Encoding which we wish to convert to. This can be either ID3V2_FIELD_ENC_* or the actual python encoding type
        @param fromenc: converting from encoding specified
        """

        # sanitise input - convert to string repr
        try:
            if type(encodings[toenc]) == types.StringType:
                toenc = encodings[toenc]
        except KeyError:
            toenc = 'latin_1'

        outstring = ''

        # make sure string is of a type we understand
        if type(s) not in [types.StringType, types.UnicodeType]:
            s = unicode(s)

        if type(s) == types.StringType:
            if  toenc == fromenc:
                # don't need any conversion here
                outstring = s
            else:
                try:
                    outstring = s.decode(fromenc).encode(toenc)
                except (UnicodeEncodeError, UnicodeDecodeError):
                    warn("o_string: frame conversion failed. leaving as is.")
                    outstring = s
        
        elif type(s) == types.UnicodeType:
            try:
                outstring = s.encode(toenc)
            except UnicodeEncodeError, err:
                warn("o_string: frame conversion failed - leaving empty. %s" %\
                     err)
                outstring = ''
                
        return outstring
        

    def o_text(self):
        """
        Output text bytestring
        """
        newstrings = []
        for s in self.strings:
            newstrings.append(self.o_string(s, self.encoding))
            
        output = chr(encodings[self.encoding])
        for s in newstrings:
            output += null_terminate(self.encoding, s)

        """
        # strip the last null terminator
        if is_double_byte(self.encoding) and len(output) > 1:
            output = output[:-2]
        elif not is_double_byte(self.encoding) and len(output) > 0:
            output = output[:-1]
        """
        
        return output

    def x_text(self):
        """
        Extract Text Fields

        @todo: handle multiple strings seperated by \x00

        sets: encoding, strings
        """
        data = self.rawdata
        self.encoding = encodings[ord(data[0])]
        rawtext = data[1:]
        
        if normalize_encoding(self.encoding) == 'latin_1':
            text = rawtext
            self.strings = text.split('\x00')
        else:
            text = rawtext.decode(self.encoding)
            if is_double_byte(self.encoding):
                self.strings = text.split('\x00\x00')               
            else:
                self.strings = text.split('\x00')
                
        try:
            dummy = text.encode('utf_8')
            debug('Read Field: %s Len: %d Enc: %s Text: %s' %
                   (self.fid, self.length, self.encoding, str([text])))
        except UnicodeDecodeError:
            debug('Read Field: %s Len: %d Enc: %s Text: %s (Err)' %
                   (self.fid, self.length, self.encoding, str([text])))

    def set_text(self, s, encoding = 'utf_16'):
        self.strings = [s]
        self.encoding = encoding

    def o_comm(self):
        if is_double_byte(self.encoding):
            sep = '\x00\x00'
        else:
            sep = '\x00'
            
        return chr(encodings[self.encoding]) + self.language + \
               self.o_string(self.shortcomment, self.encoding) + sep + \
               self.o_string(self.longcomment, self.encoding) + sep

    def x_comm(self):
        """
        extract comment field

        sets: encoding, lang, shortcomment, longcomment
        """
        data = self.rawdata
        self.encoding = encodings[ord(data[0])]
        self.language = data[1:4]
        self.shortcomment = ''
        self.longcomment = ''

        if is_double_byte(self.encoding):
            for i in range(4,len(data)-1):
                if data[i:i+2] == '\x00\x00':
                    self.shortcomment = data[4:i].strip('\x00')
                    self.longcomment = data[i+2:].strip('\x00')
                    break
        else:
            for i in range(4,len(data)):
                if data[i] == '\x00':
                    self.shortcomment = data[4:i].strip('\x00')
                    self.longcomment = data[i+1:].strip('\x00')
                    break
                
        debug('Read Field: %s Len: %d Enc: %s Lang: %s Comm: %s' %
              (self.fid, self.length, self.encoding, self.language,
               str([self.shortcomment, self.longcomment])))
        

    def o_pcnt(self):
        counter = ''
        if self.length == 4:
            counter = struct.pack('!I', self.counter)
        else:
            for i in range(0, self.length):
                x = (self.counter >> (i*8) ) & 0xff
                counter = counter + struct.pack('!B',x)
        return counter
     
    def x_pcnt(self):
        """
        Extract Play Count

        sets: counter
        """
        data = self.rawdata
        bytes = self.length
        counter = 0
        if bytes == 4:
            counter = struct.unpack('!I',data)[0]
        else:
            for i in range(0,bytes):
                counter += struct.unpack('B',data[i]) * pow(256,i)
                
        debug('Read Field: %s Len: %d Count: %d' % (self.fid, bytes, counter))
        self.counter = counter

    def o_bin(self):
        return self.rawdata

    def x_bin(self):
        pass

    def o_wxxx(self):
        if is_double_byte(self.encoding):
            return chr(encodings[self.encoding]) + \
                   self.o_string(self.desc, self.encoding) + '\x00\x00' + \
                   self.o_string(self.url, self.encoding) + '\x00\x00'
        else:
            return chr(encodings[self.encoding]) + \
                   self.o_string(self.desc, self.encoding) + '\x00' + \
                   self.o_string(self.url, self.encoding) + '\x00'

    def x_wxxx(self):
        """
        Extract URL
        
        set: encoding, desc, url
        """
        data = self.rawdata
        self.encoding = encodings[ord(data[0])]
        if is_double_byte(self.encoding):
            for i in range(1,len(data)-1):
                if data[i:i+2] == '\x00\x00':
                    self.desc = data[1:i]
                    self.url = data[i+2:]
                    break
        else:
            for i in range(1,len(data)):
                if data[i] == '\x00':
                    self.desc = data[1:i]
                    self.url = data[i+1:]
                    break

        debug("Read field: %s Len: %s Enc: %s Desc: %s URL: %s" %
               (self.fid, self.length, self.encoding,
                self.desc, str([self.url])))
        
    def o_apic(self):
        enc = encodings[self.encoding]
        if is_double_byte(self.encoding):
            sep = '\x00\x00'
        else:
            sep = '\x00'
        
        return chr(enc) + self.mimetype + '\x00' + \
               chr(self.picttype) + \
               self.o_string(self.desc, self.encoding) + \
               sep + self.pict


    def x_apic(self):
        """
        Extract APIC

        set: encoding, mimetype, desc, pict, picttype
        """
        data = self.rawdata
        self.encoding = encodings[ord(data[0])]
        self.mimetype = ''
        self.desc = ''
        self.pict = ''
        self.picttype = 0

        # get mime type (must be latin-1)
        for i in range(1,len(data)):
            if data[i] == '\x00':
                self.mimetype = data[1:i]
                break

        if not self.mimetype:
            raise ID3FrameException("APIC extraction failed. Missing mimetype")

        picttype = ord(data[len(self.mimetype) + 2])

        # get picture description

        if is_double_byte(self.encoding):
            for i in range(len(self.mimetype) + 2, len(data)-1):
                if data[i:i+2] == '\x00\x00':
                    self.desc = data[len(self.mimetype)+2:i]
                    self.pict = data[i+2:]
                    break
        else:
            for i in range(len(self.mimetype) + 2, len(data)):
                if data[i] == '\x00':
                    self.desc = data[len(self.mimetype)+2:i]
                    self.pict = data[i+1:]
                    break           
        debug('Read Field: %s Len: %d PicType: %d Mime: %s Desc: %s PicLen: %d' % 
               (self.fid, self.length, self.picttype, self.mimetype,
                self.desc, len(self.pict)))
        
        # open("test.png","w").write(pictdata)

    def o_url(self):
        return self.rawdata

    def x_url(self):
        debug("Read Field: %s Len: %d Data: %s" %
               (self.fid, self.length, [self.rawdata]))
        return

    def o_geob(self):
        if is_double_byte(self.encoding):
            return chr(encodings[self.encoding]) + self.mimetype + '\x00' + \
                   self.filename + '\x00\x00' + self.desc + \
                   '\x00\x00' + self.obj
        else:
            return chr(encodings[self.encoding]) + self.mimetype + '\x00' + \
                   self.filename + '\x00' + self.desc + \
                   '\x00' + self.obj

    def x_geob(self):
        """
        Extract GEOB

        set: encoding, mimetype, filename, desc, obj
        """
        data = self.rawdata
        self.encoding = encodings[ord(data[0])]
        self.mimetype = ''
        self.filename = ''
        self.desc = ''
        self.obj = ''
        
        for i in range(1,len(data)):
            if data[i] == '\x00':
                self.mimetype = data[1:i]
                break

        if not self.mimetype:
            raise ID3FrameException("Unable to extract GEOB. Missing mimetype")

        # FIXME: because filename and desc are optional, we should be
        #        smarter about splitting
        if is_double_byte(self.encoding):
            for i in range(len(self.mimetype)+2,len(data)-1):
                if data[i:i+2] == '\x00\x00':
                    self.filename = data[len(self.mimetype)+2:i]
                    ptr = len(self.mimetype) + len(self.filename) + 4
                    break
        else:
            for i in range(len(self.mimetype)+2,len(data)-1):
                if data[i] == '\x00':
                    self.filename = data[len(self.mimetype)+2:i]
                    ptr = len(self.mimetype) + len(self.filename) + 3
                    break

        if is_double_byte(self.encoding):
            for i in range(ptr,len(data)-1):
                if data[i:i+2] == '\x00\x00':
                    self.desc = data[ptr:i]
                    self.obj = data[i+2:]
                    break
        else:
            for i in range(ptr,len(data)-1):
                if data[i] == '\x00':
                    self.desc = data[ptr:i]
                    self.obj = data[i+1:]
                    break

        debug("Read Field: %s Len: %d Enc: %s Mime: %s Filename: %s Desc: %s ObjLen: %d" %
               (self.fid, self.length, self.encoding, self.mimetype,
                self.filename, self.desc, len(self.obj)))


class ID3v2_2_Frame(ID3v2BaseFrame):
    supported = ID3V2_2_FRAME_SUPPORTED_IDS
    header_length = ID3V2_2_FRAME_HEADER_LENGTH
    version = 2.2
    status_flags = []
    format_flags = []

    def parse_frame_header(self, frame):
        header = frame[:self.header_length]

        self.fid = header[0:3]
        self.rawdata = frame[self.header_length:]
        self.length = struct.unpack('!I', '\x00' + header[3:6])[0]

    def output(self):
        fieldstr = self.output_field()
        # FIXME: no syncsafe
        # NOTE: ID3v2 uses only 3 bytes for size, so we strip of MSB
        header = self.fid + struct.pack('!I', len(fieldstr))[1:]
        return header + fieldstr
    
    def o_text(self):
        """
        Output Text Field

        ID3v2.2 text fields do not support multiple fields
        """
        newstring = self.o_string(self.strings[0], self.encoding)
        enc = encodings[self.encoding]
        return chr(enc) + null_terminate(self.encoding, newstring)

    def o_apic(self):
        enc = encodings[self.encoding]
        if is_double_byte(self.encoding):
            sep = '\x00\x00'
        else:
            sep = '\x00'
        
        imgtype = self.mimetype
        if len(imgtype) != 3:
            #attempt conversion
            if imgtype in ID3V2_2_FRAME_MIME_TYPE_TO_IMAGE_FORMAT.keys():
                imgtype = ID3V2_2_FRAME_MIME_TYPE_TO_IMAGE_FORMAT[imgtype]
            else:
                raise ID3FrameException("ID3v2.2 picture format must be three characters")
        
        return chr(enc) + imgtype + '\x00' + \
               chr(self.picttype) + \
               self.o_string(self.desc, self.encoding) + \
               sep + self.pict


    def x_apic(self):
        """
        Extract APIC

        set: encoding, mimetype, desc, pict, picttype
        """
        data = self.rawdata
        self.encoding = encodings[ord(data[0])]
        self.mimetype = ''
        self.desc = ''
        self.pict = ''
        self.picttype = 0

        # get mime type (must be latin-1)
        imgtype = data[1:4]
        if not imgtype:
            raise ID3FrameException("APIC extraction failed. Missing mimetype")

        if imgtype not in ID3V2_2_FRAME_IMAGE_FORMAT_TO_MIME_TYPE.keys():
            raise ID3FrameException("Unrecognised mime-type")            
        else:
            self.mimetype = ID3V2_2_FRAME_IMAGE_FORMAT_TO_MIME_TYPE[imgtype]

        picttype = ord(data[len(imgtype) + 1])

        # get picture description
        for i in range(len(imgtype) + 2, len(data)):
                if data[i] == '\x00':
                    self.desc = data[len(imgtype)+2:i]
                    self.pict = data[i+1:]
                    break
                    
        debug('Read Field: %s Len: %d PicType: %d Mime: %s Desc: %s PicLen: %d' % 
               (self.fid, self.length, self.picttype, self.mimetype,
                self.desc, len(self.pict)))
        
        # open("test.png","w").write(pictdata)

class ID3v2_3_Frame(ID3v2BaseFrame):
    supported = ID3V2_3_ABOVE_SUPPORTED_IDS
    header_length = ID3V2_3_FRAME_HEADER_LENGTH
    status_flags = ID3V2_3_FRAME_STATUS_FLAGS
    format_flags = ID3V2_3_FRAME_FORMAT_FLAGS
    version = 2.3

    def parse_frame_header(self, frame):

        frame_header = frame[:self.header_length]
        
        (fid, rawsize, status, format) = struct.unpack("!4sIBB", frame_header)

        self.fid = fid
        self.rawdata = frame[self.header_length:]
        self.length = rawsize
        self.flags = {}
        
        for flagname, bit in self.status_flags:
            self.flags[flagname] = (status >> bit) & 0x01

        for flagname, bit in self.format_flags:
            self.flags[flagname] = (format >> bit) & 0x01
        
    def output(self):
        fieldstr = self.output_field()
        header = self.fid + struct.pack('!IBB', len(fieldstr), \
                                        self.getstatus(), \
                                        self.getformat())
        return header + fieldstr        
        
    def getstatus(self):
        status_word = 0
        if self.flags and self.status_flags:
            for flag, bit in self.status_flags:
                if self.flags.has_key(flag):
                    status_word = status_word & (0x01 << bit)
        return status_word

        
    def getformat(self):
        format_word = 0
        if self.flags and self.format_flags:
            for flag, bit in self.format_flags:
                if self.flags.has_key(flag):
                    format_word = format_word & (0x01 << bit)
        return format_word      
            

class ID3v2_4_Frame(ID3v2_3_Frame):
    supported = ID3V2_3_ABOVE_SUPPORTED_IDS
    header_length = ID3V2_3_FRAME_HEADER_LENGTH
    flags = ID3V2_3_FRAME_FLAGS 
    version = 2.4


ID3v2Frame = ID3v2_4_Frame