audio.py

"""
The main `Echo Nest`_ `Remix API`_ module for manipulating audio files and
their associated `Echo Nest`_ `Analyze API`_ analyses.

AudioData, and getpieces by Robert Ochshorn on 2008-06-06.  
Some refactoring and everything else by Joshua Lifton 2008-09-07.  
Refactoring by Ben Lacker 2009-02-11. 
Other contributions by Adam Lindsay. 
Additional functions and cleanup by Peter Sobot on 2012-11-01.

:group Base Classes: AudioAnalysis, AudioRenderable, AudioData, AudioData32
:group Audio-plus-Analysis Classes: AudioFile, LocalAudioFile, LocalAnalysis
:group Building Blocks: AudioQuantum, AudioSegment, AudioQuantumList, ModifiedRenderable
:group Effects: AudioEffect, LevelDB, AmplitudeFactor, TimeTruncateFactor, TimeTruncateLength, Simultaneous
:group Exception Classes: FileTypeError, EchoNestRemixError

:group Audio helper functions: getpieces, mix, assemble, megamix
:group Utility functions: _dataParser, _attributeParser, _segmentsParser

.. _Analyze API: http://developer.echonest.com/
.. _Remix API: https://github.com/echonest/remix
.. _Echo Nest: http://the.echonest.com/
"""

__version__ = "$Revision: 0 $"
# $Source$

import hashlib
import numpy
import os
import sys
import errno
import cPickle
import shutil
import struct
import tempfile
import logging
import wave
import time
import traceback
import cStringIO
import xml.etree.ElementTree as etree
import xml.dom.minidom as minidom
import weakref

from pyechonest import track
from pyechonest.util import EchoNestAPIError
import pyechonest.util
import pyechonest.config as config
from ffmpeg import ffmpeg, ffmpeg_downconvert


MP3_BITRATE = 128

log = logging.getLogger(__name__)


class AudioAnalysis(object):
    """
    This class uses (but does not wrap) `pyechonest.track` to allow
    transparent caching of the audio analysis of an audio file.

    For example, the following script will display the bars of a track
    twice::

        from echonest import *
        a = audio.AudioAnalysis('YOUR_TRACK_ID_HERE')
        a.bars
        a.bars

    The first time `a.bars` is called, a network request is made of the
    `Echo Nest`_ `Analyze API`_.  The second time time `a.bars` is called, the
    cached value is returned immediately.

    An `AudioAnalysis` object can be created using an existing ID, as in
    the example above, or by specifying the audio file to upload in
    order to create the ID, as in::

        a = audio.AudioAnalysis('FULL_PATH_TO_AUDIO_FILE')

    .. _Analyze API: http://developer.echonest.com/pages/overview?version=2
    .. _Echo Nest: http://the.echonest.com/
    """

    @classmethod
    def __get_cache_path(cls, identifier):
        return "cache/%s.pickle" % identifier

    def __new__(cls, *args, **kwargs):
        if len(args):
            initializer = args[0]
            if type(initializer) is str and len(initializer) == 32:
                path = cls.__get_cache_path(initializer)
                if os.path.exists(path):
                    return cPickle.load(open(path, 'r'))
        return object.__new__(cls, *args, **kwargs)

    def __init__(self, initializer, filetype = None, lastTry = False):
        """
        Constructor.  If the argument is a valid local path or a URL,
        the track ID is generated by uploading the file to the `Echo Nest`_
        `Analyze API`_\.  Otherwise, the argument is assumed to be
        the track ID.

        :param path_or_identifier_or_file:
            A string representing either a path to a local
            file, or the ID of a file that has already
            been uploaded for analysis, or an open file-like object.

        .. _Analyze API: http://developer.echonest.com/docs/v4/track.html
        .. _Echo Nest: http://the.echonest.com/
        """
        if type(initializer) not in [str, unicode] and not hasattr(initializer, 'read'):
            # Argument is invalid.
            raise TypeError("Argument 'initializer' must be a string \
                            representing either a filename, track ID, or MD5, or \
                            instead, a file-like object.")

        __save_to_cache = False
        try:
            if isinstance(initializer, basestring):
                # see if path_or_identifier is a path or an ID
                if os.path.isfile(initializer):
                    # it's a filename
                    self.pyechonest_track = track.track_from_filename(initializer)
                    self.pyechonest_track.get_analysis()
                else:
                    if initializer.startswith('music://') or \
                       (initializer.startswith('TR') and
                        len(initializer) == 18):
                        # it's an id
                        self.pyechonest_track = track.track_from_id(initializer)
                        self.pyechonest_track.get_analysis()
                    elif len(initializer) == 32:
                        # it's an md5
                        self.pyechonest_track = track.track_from_md5(initializer)
                        self.pyechonest_track.get_analysis()
                        __save_to_cache = True
            else:
                assert(filetype is not None)
                initializer.seek(0)
                try:
                    self.pyechonest_track = track.track_from_file(initializer, filetype)
                    self.pyechonest_track.get_analysis()
                except (IOError, pyechonest.util.EchoNestAPIError) as e:
                    if lastTry:
                        raise

                    if (isinstance(e, IOError)
                        and (e.errno in [errno.EPIPE, errno.ECONNRESET]))\
                    or (isinstance(e, pyechonest.util.EchoNestAPIError)
                        and any([("Error %s" % x) in str(e) for x in [-1, 5, 6]])):
                        logging.getLogger(__name__).warning("Upload to EN failed - transcoding and reattempting.")
                        self.__init__(ffmpeg_downconvert(initializer, filetype), 'mp3', lastTry=True)
                        return
                    elif (isinstance(e, pyechonest.util.EchoNestAPIError)
                            and any([("Error %s" % x) in str(e) for x in [3]])):
                        logging.getLogger(__name__).warning("EN API limit hit. Waiting 10 seconds.")
                        time.sleep(10)
                        self.__init__(initializer, filetype, lastTry=True)
                        return
                    else:
                        logging.getLogger(__name__).warning("Got unhandlable EN exception. Raising:\n%s",
                                                            traceback.format_exc())
                        raise
        except Exception as e:
            if lastTry or type(initializer) is str:
                raise

            if "the track is still being analyzed" in str(e)\
            or "there was an error analyzing the track" in str(e):
                logging.getLogger(__name__).warning("Could not analyze track - truncating last byte and trying again.")
                try:
                    initializer.seek(-1, os.SEEK_END)
                    initializer.truncate()
                    initializer.seek(0)
                except IOError:
                    initializer.seek(-1, os.SEEK_END)
                    new_len = initializer.tell()
                    initializer.seek(0)
                    initializer = cStringIO.StringIO(initializer.read(new_len))
                self.__init__(initializer, filetype, lastTry=True)
                return
            else:
                logging.getLogger(__name__).warning("Got a further unhandlable EN exception. Raising:\n%s",
                                                    traceback.format_exc())
                raise

        if self.pyechonest_track is None:
            #   This is an EN-side error that will *not* be solved by repeated calls
            if type(initializer) is str:
                raise EchoNestRemixError('Could not find track %s' % initializer)
            else:
                raise EchoNestRemixError('Could not find analysis for track!')

        self.source = None

        self._bars = None
        self._beats = None
        self._tatums = None
        self._sections = None
        self._segments = None

        self.identifier = self.pyechonest_track.id
        # Patching around the fact that sometimes pyechonest doesn't give back metadata
        # As of 11/2012, metadata is not used by remix
        try:
            self.metadata = self.pyechonest_track.meta
        except AttributeError:
            self.metadata = None
            print >> sys.stderr, "Warning:  no metadata returned for track."

        for attribute in ('time_signature', 'mode', 'tempo', 'key'):
            d = {'value': getattr(self.pyechonest_track, attribute),
                 'confidence': getattr(self.pyechonest_track, attribute + '_confidence')}
            setattr(self, attribute, d)

        for attribute in ('end_of_fade_in', 'start_of_fade_out', 'duration', 'loudness'):
            setattr(self, attribute, getattr(self.pyechonest_track, attribute))

        if __save_to_cache:
            path = self.__get_cache_path(initializer)
            if not os.path.isfile(path) and os.path.isdir(os.path.dirname(path)):
                cPickle.dump(self, open(path, 'w'), 2)

    @property
    def bars(self):
        if self._bars is None:
            self._bars = _dataParser('bar', self.pyechonest_track.bars)
            self._bars.attach(self)
        return self._bars

    @property
    def beats(self):
        if self._beats is None:
            self._beats = _dataParser('beat', self.pyechonest_track.beats)
            self._beats.attach(self)
        return self._beats

    @property
    def tatums(self):
        if self._tatums is None:
            self._tatums = _dataParser('tatum', self.pyechonest_track.tatums)
            self._tatums.attach(self)
        return self._tatums

    @property
    def sections(self):
        if self._sections is None:
            self._sections = _attributeParser('section', self.pyechonest_track.sections)
            self._sections.attach(self)
        return self._sections

    @property
    def segments(self):
        if self._segments is None:
            self._segments = _segmentsParser(self.pyechonest_track.segments)
            self._segments.attach(self)
        return self._segments

    def __getstate__(self):
        """
        Eliminates the circular reference for pickling.
        """
        dictclone = self.__dict__.copy()
        del dictclone['source']
        return dictclone

    def __setstate__(self, state):
        """
        Recreates circular references after unpickling.
        """
        self.__dict__.update(state)
        if hasattr(AudioAnalysis, 'CACHED_VARIABLES'):
            for cached_var in AudioAnalysis.CACHED_VARIABLES:
                if type(object.__getattribute__(self, cached_var)) == AudioQuantumList:
                    object.__getattribute__(self, cached_var).attach(self)


class AudioRenderable(object):
    """
    An object that gives an `AudioData` in response to a call to its `render`\()
    method.
    Intended to be an abstract class that helps enforce the `AudioRenderable`
    protocol. Picked up a couple of convenience methods common to many descendants.

    Every `AudioRenderable` must provide three things:

    render()
        A method returning the `AudioData` for the object. The rhythmic duration (point
        at which any following audio is appended) is signified by the `endindex` accessor,
        measured in samples.
    source
        An accessor pointing to the `AudioData` that contains the original sample data of
        (a superset of) this audio object.
    duration
        An accessor returning the rhythmic duration (in seconds) of the audio object.
    """
    def resolve_source(self, alt):
        """
        Given an alternative, fallback `alt` source, return either `self`'s
        source or the alternative. Throw an informative error if no source
        is found.

        Utility code that ended up being replicated in several places, so
        it ended up here. Not necessary for use in the RenderableAudioObject
        protocol.
        """
        if hasattr(self, 'source'):
            source = self.source
        else:
            if isinstance(alt, AudioData):
                source = alt
            else:
                print >> sys.stderr, self.__repr__()
                raise EchoNestRemixError("%s has no implicit or explicit source \
                                                during rendering." %
                                                (self.__class__.__name__, ))
        return source

    @staticmethod
    def init_audio_data(source, num_samples):
        """
        Convenience function for rendering: return a pre-allocated, zeroed
        `AudioData`.
        """
        if source.numChannels > 1:
            newchans = source.numChannels
            newshape = (num_samples, newchans)
        else:
            newchans = 1
            newshape = (num_samples,)
        return AudioData32(shape=newshape, sampleRate=source.sampleRate,
                            numChannels=newchans, defer=False)

    def sources(self):
        return set([self.source])

    def encode(self, filename):
        """
        Shortcut function that takes care of the need to obtain an `AudioData`
        object first, through `render`.
        """
        self.render().encode(filename)


class AudioData(AudioRenderable):
    """
    Handles audio data transparently. A smart audio container
    with accessors that include:

    sampleRate
        samples per second
    numChannels
        number of channels
    data
        a `numpy.array`_

    .. _numpy.array: http://docs.scipy.org/doc/numpy/reference/generated/numpy.array.html
    """
    def __init__(self, filename=None, ndarray = None, shape=None, sampleRate=None, numChannels=None, defer=False, verbose=True):
        """
        Given an input `ndarray`, import the sample values and shape
        (if none is specified) of the input `numpy.array`.

        Given a `filename` (and an input ndarray), use ffmpeg to convert
        the file to wave, then load the file into the data,
        auto-detecting the sample rate, and number of channels.

        :param filename: a path to an audio file for loading its sample
            data into the AudioData.data
        :param ndarray: a `numpy.array`_ instance with sample data
        :param shape: a tuple of array dimensions
        :param sampleRate: sample rate, in Hz
        :param numChannels: number of channels

        .. _numpy.array: http://docs.scipy.org/doc/numpy/reference/generated/numpy.array.html
        """
        self.verbose = verbose
        self.defer = defer
        self.filename = filename
        self.sampleRate = sampleRate
        self.numChannels = numChannels
        self.convertedfile = None
        self.endindex = 0
        if shape is None and isinstance(ndarray, numpy.ndarray) and not self.defer:
            self.data = numpy.zeros(ndarray.shape, dtype=numpy.int16)
        elif shape is not None and not self.defer:
            self.data = numpy.zeros(shape, dtype=numpy.int16)
        elif not self.defer and self.filename:
            self.data = None
            self.load()
        else:
            self.data = None
        if ndarray is not None and self.data is not None:
            self.endindex = len(ndarray)
            self.data[0:self.endindex] = ndarray

    def load(self):
        if isinstance(self.data, numpy.ndarray):
            return
        temp_file_handle = None
        if self.filename.lower().endswith(".wav") and (self.sampleRate, self.numChannels) == (44100, 2):
            file_to_read = self.filename
        elif self.convertedfile:
            file_to_read = self.convertedfile
        else:
            temp_file_handle, self.convertedfile = tempfile.mkstemp(".wav")
            self.sampleRate, self.numChannels = ffmpeg(self.filename, self.convertedfile, overwrite=True,
                    numChannels=self.numChannels, sampleRate=self.sampleRate, verbose=self.verbose)
            file_to_read = self.convertedfile

        w = wave.open(file_to_read, 'r')
        numFrames = w.getnframes()
        raw = w.readframes(numFrames)
        sampleSize = numFrames * self.numChannels
        data = numpy.frombuffer(raw, dtype="<h", count=sampleSize)
        ndarray = numpy.array(data, dtype=numpy.int16)
        if self.numChannels > 1:
            ndarray.resize((numFrames, self.numChannels))
        self.data = numpy.zeros(ndarray.shape, dtype=numpy.int16)
        self.endindex = 0
        if ndarray is not None:
            self.endindex = len(ndarray)
            self.data = ndarray
        if temp_file_handle is not None:
            os.close(temp_file_handle)
        w.close()

    def __getitem__(self, index):
        """
        Fetches a frame or slice. Returns an individual frame (if the index
        is a time offset float or an integer sample number) or a slice if
        the index is an `AudioQuantum` (or quacks like one).
        """
        if not isinstance(self.data, numpy.ndarray) and self.defer:
            self.load()
        if isinstance(index, float):
            index = int(index * self.sampleRate)
        elif hasattr(index, "start") and hasattr(index, "duration"):
            index =  slice(float(index.start), index.start + index.duration)

        if isinstance(index, slice):
            if (hasattr(index.start, "start") and
                 hasattr(index.stop, "duration") and
                 hasattr(index.stop, "start")):
                index = slice(index.start.start, index.stop.start + index.stop.duration)

        if isinstance(index, slice):
            return self.getslice(index)
        else:
            return self.getsample(index)

    def getslice(self, index):
        "Help `__getitem__` return a new AudioData for a given slice"
        if not isinstance(self.data, numpy.ndarray) and self.defer:
            self.load()
        if isinstance(index.start, float):
            index = slice(int(index.start * self.sampleRate),
                            int(index.stop * self.sampleRate), index.step)
        return AudioData(None, self.data[index], sampleRate=self.sampleRate,
                            numChannels=self.numChannels, defer=False)

    def getsample(self, index):
        """
        Help `__getitem__` return a frame (all channels for a given
        sample index)
        """
        if not isinstance(self.data, numpy.ndarray) and self.defer:
            self.load()
        if isinstance(index, int):
            return self.data[index]
        else:
            #let the numpy array interface be clever
            return AudioData(None, self.data[index], defer=False)

    def pad_with_zeros(self, num_samples):
        if num_samples > 0:
            if self.numChannels == 1:
                extra_shape = (num_samples,)
            else:
                extra_shape = (num_samples, self.numChannels)
            self.data = numpy.append(self.data,
                                     numpy.zeros(extra_shape, dtype=numpy.int16), axis=0)

    def append(self, another_audio_data):
        "Appends the input to the end of this `AudioData`."
        extra = len(another_audio_data.data) - (len(self.data) - self.endindex)
        self.pad_with_zeros(extra)
        self.data[self.endindex : self.endindex + len(another_audio_data)] += another_audio_data.data
        self.endindex += another_audio_data.endindex

    def sum(self, another_audio_data):
        extra = len(another_audio_data.data) - len(self.data)
        self.pad_with_zeros(extra)
        compare_limit = min(len(another_audio_data.data), len(self.data)) - 1
        self.data[: compare_limit] += another_audio_data.data[: compare_limit]

    def add_at(self, time, another_audio_data):
        """
        Adds the input `another_audio_data` to this `AudioData` 
        at the `time` specified in seconds. If `another_audio_data` has fewer channels than
        this `AudioData`, the `another_audio_data` will be resampled to match.
        In this case, this method will modify `another_audio_data`.

        """
        offset = int(time * self.sampleRate)
        extra = offset + len(another_audio_data.data) - len(self.data)
        self.pad_with_zeros(extra)
        if another_audio_data.numChannels < self.numChannels:
            # Resample another_audio_data
            another_audio_data.data = numpy.repeat(another_audio_data.data, self.numChannels).reshape(len(another_audio_data), self.numChannels)
            another_audio_data.numChannels = self.numChannels
        self.data[offset : offset + len(another_audio_data.data)] += another_audio_data.data 

    def __len__(self):
        if self.data is not None:
            return len(self.data)
        else:
            return 0

    def __add__(self, other):
        """Supports stuff like this: sound3 = sound1 + sound2"""
        return assemble([self, other], numChannels=self.numChannels,
                            sampleRate=self.sampleRate)

    def encode(self, filename=None, mp3=None):
        """
        Outputs an MP3 or WAVE file to `filename`.
        Format is determined by `mp3` parameter.
        """
        if not mp3 and filename.lower().endswith('.wav'):
            mp3 = False
        else:
            mp3 = True
        if mp3:
            foo, tempfilename = tempfile.mkstemp(".wav")
            os.close(foo)
        else:
            tempfilename = filename
        fid = open(tempfilename, 'wb')
        # Based on Scipy svn
        # http://projects.scipy.org/pipermail/scipy-svn/2007-August/001189.html
        fid.write('RIFF')
        fid.write(struct.pack('<i', 0))  # write a 0 for length now, we'll go back and add it later
        fid.write('WAVE')
        # fmt chunk
        fid.write('fmt ')
        if self.data.ndim == 1:
            noc = 1
        else:
            noc = self.data.shape[1]
        bits = self.data.dtype.itemsize * 8
        sbytes = self.sampleRate * (bits / 8) * noc
        ba = noc * (bits / 8)
        fid.write(struct.pack('<ihHiiHH', 16, 1, noc, self.sampleRate, sbytes, ba, bits))
        # data chunk
        fid.write('data')
        fid.write(struct.pack('<i', self.data.nbytes))
        self.data.tofile(fid)
        # Determine file size and place it in correct
        # position at start of the file.
        size = fid.tell()
        fid.seek(4)
        fid.write(struct.pack('<i', size - 8))
        fid.close()
        if not mp3:
            return tempfilename
        # now convert it to mp3
        if not filename.lower().endswith('.mp3'):
            filename = filename + '.mp3'
        try:
            bitRate = MP3_BITRATE
        except NameError:
            bitRate = 128
        ffmpeg(tempfilename, filename, bitRate=bitRate, verbose=self.verbose)
        if tempfilename != filename:
            if self.verbose:
                print >> sys.stderr, "Deleting: %s" % tempfilename
            os.remove(tempfilename)
        return filename

    def unload(self):
        self.data = None
        if self.convertedfile:
            if self.verbose:
                print >> sys.stderr, "Deleting: %s" % self.convertedfile
            os.remove(self.convertedfile)
            self.convertedfile = None

    def render(self, start=0.0, to_audio=None, with_source=None):
        if not to_audio:
            return self
        if with_source != self:
            return
        to_audio.add_at(start, self)
        return

    @property
    def duration(self):
        return float(self.endindex) / self.sampleRate

    @property
    def source(self):
        return self


class AudioData32(AudioData):
    """A 32-bit variant of AudioData, intended for data collection on
    audio rendering with headroom."""
    def __init__(self, filename=None, ndarray = None, shape=None, sampleRate=None, numChannels=None, defer=False, verbose=True):
        """
        Special form of AudioData to allow for headroom when collecting samples.
        """
        self.verbose = verbose
        self.defer = defer
        self.filename = filename
        self.sampleRate = sampleRate
        self.numChannels = numChannels
        self.convertedfile = None
        if shape is None and isinstance(ndarray, numpy.ndarray) and not self.defer:
            self.data = numpy.zeros(ndarray.shape, dtype=numpy.int32)
        elif shape is not None and not self.defer:
            self.data = numpy.zeros(shape, dtype=numpy.int32)
        elif not self.defer and self.filename:
            self.load()
        else:
            self.data = None
        self.endindex = 0
        if ndarray is not None and self.data is not None:
            self.endindex = len(ndarray)
            self.data[0:self.endindex] = ndarray

    def load(self):
        if isinstance(self.data, numpy.ndarray):
            return
        temp_file_handle = None
        if self.filename.lower().endswith(".wav") and (self.sampleRate, self.numChannels) == (44100, 2):
            file_to_read = self.filename
        elif self.convertedfile:
            file_to_read = self.convertedfile
        else:
            temp_file_handle, self.convertedfile = tempfile.mkstemp(".wav")
            self.sampleRate, self.numChannels = ffmpeg(self.filename, self.convertedfile, overwrite=True,
                    numChannels=self.numChannels, sampleRate=self.sampleRate, verbose=self.verbose)
            file_to_read = self.convertedfile

        w = wave.open(file_to_read, 'r')
        numFrames = w.getnframes()
        raw = w.readframes(numFrames)
        sampleSize = numFrames * self.numChannels
        data = numpy.frombuffer(raw, dtype="<h", count=sampleSize)
        ndarray = numpy.array(data, dtype=numpy.int16)
        if self.numChannels > 1:
            ndarray.resize((numFrames, self.numChannels))
        self.data = numpy.zeros(ndarray.shape, dtype=numpy.int32)
        self.endindex = 0
        if ndarray is not None:
            self.endindex = len(ndarray)
            self.data[0:self.endindex] = ndarray
        if temp_file_handle is not None:
            os.close(temp_file_handle)
        w.close()

    def encode(self, filename=None, mp3=None):
        """
        Outputs an MP3 or WAVE file to `filename`.
        Format is determined by `mp3` parameter.
        """
        normalized = self.normalized()
        temp_file_handle = None
        if not mp3 and filename.lower().endswith('.wav'):
            mp3 = False
        else:
            mp3 = True
        if mp3:
            temp_file_handle, tempfilename = tempfile.mkstemp(".wav")
        else:
            tempfilename = filename
        fid = open(tempfilename, 'wb')
        # Based on Scipy svn
        # http://projects.scipy.org/pipermail/scipy-svn/2007-August/001189.html
        fid.write('RIFF')
        fid.write(struct.pack('<i', 0))  # write a 0 for length now, we'll go back and add it later
        fid.write('WAVE')
        # fmt chunk
        fid.write('fmt ')
        if normalized.ndim == 1:
            noc = 1
        else:
            noc = normalized.shape[1]
        bits = normalized.dtype.itemsize * 8
        sbytes = self.sampleRate * (bits / 8) * noc
        ba = noc * (bits / 8)
        fid.write(struct.pack('<ihHiiHH', 16, 1, noc, self.sampleRate, sbytes, ba, bits))
        # data chunk
        fid.write('data')
        fid.write(struct.pack('<i', normalized.nbytes))
        normalized.tofile(fid)
        # Determine file size and place it in correct
        # position at start of the file.
        size = fid.tell()
        fid.seek(4)
        fid.write(struct.pack('<i', size - 8))
        fid.close()
        if not mp3:
            return tempfilename
        # now convert it to mp3
        if not filename.lower().endswith('.mp3'):
            filename = filename + '.mp3'
        try:
            bitRate = MP3_BITRATE
        except NameError:
            bitRate = 128
        ffmpeg(tempfilename, filename, bitRate=bitRate, verbose=self.verbose)
        if tempfilename != filename:
            if self.verbose:
                print >> sys.stderr, "Deleting: %s" % tempfilename
            os.remove(tempfilename)
        if temp_file_handle is not None:
            os.close(temp_file_handle)
        return filename

    def normalized(self):
        """Return to 16-bit for encoding."""
        factor = 32767.0 / numpy.max(numpy.absolute(self.data.flatten()))
        # If the max was 32768, don't bother scaling:
        if factor < 1.000031:
            return (self.data * factor).astype(numpy.int16)
        else:
            return self.data.astype(numpy.int16)

    def pad_with_zeros(self, num_samples):
        if num_samples > 0:
            if self.numChannels == 1:
                extra_shape = (num_samples,)
            else:
                extra_shape = (num_samples, self.numChannels)
            self.data = numpy.append(self.data,
                                     numpy.zeros(extra_shape, dtype=numpy.int32), axis=0)

def getpieces(audioData, segs):
    """
    Collects audio samples for output.
    Returns a new `AudioData` where the new sample data is assembled
    from the input audioData according to the time offsets in each
    of the elements of the input segs (commonly an `AudioQuantumList`).

    :param audioData: an `AudioData` object
    :param segs: an iterable containing objects that may be accessed
        as slices or indices for an `AudioData`
    """

    # Ensure that we have data
    if audioData.data == None or audioData.defer:
        audioData.data = None
        audioData.load()

    dur = 0
    for s in segs:
        dur += int(s.duration * audioData.sampleRate)
    # if I wanted to add some padding to the length, I'd do it here

    #determine shape of new array
    if len(audioData.data.shape) > 1:
        newshape = (dur, audioData.data.shape[1])
        newchans = audioData.data.shape[1]
    else:
        newshape = (dur,)
        newchans = 1

    #make accumulator segment
    newAD = AudioData(shape=newshape, sampleRate=audioData.sampleRate,
                    numChannels=newchans, defer=False, verbose=audioData.verbose)

    #concatenate segs to the new segment
    for s in segs:
        newAD.append(audioData[s])
    # audioData.unload()
    return newAD


def assemble(audioDataList, numChannels=1, sampleRate=44100, verbose=True):
    """
    Collects audio samples for output.
    Returns a new `AudioData` object assembled
    by concatenating all the elements of audioDataList.

    :param audioDataList: a list of `AudioData` objects
    """
    return AudioData(ndarray=numpy.concatenate([a.data for a in audioDataList]),
                        numChannels=numChannels,
                        sampleRate=sampleRate, defer=False, verbose=verbose)


def mix(dataA, dataB, mix=0.5):
    """
    Mixes two `AudioData` objects. Assumes they have the same sample rate
    and number of channels.

    Mix takes a float 0-1 and determines the relative mix of two audios.
    i.e., mix=0.9 yields greater presence of dataA in the final mix.
    """
    if dataA.endindex > dataB.endindex:
        newdata = AudioData(ndarray=dataA.data, sampleRate=dataA.sampleRate, numChannels=dataA.numChannels, defer=False)
        newdata.data *= float(mix)
        newdata.data[:dataB.endindex] += dataB.data[:] * (1 - float(mix))
    else:
        newdata = AudioData(ndarray=dataB.data, sampleRate=dataB.sampleRate, numChannels=dataB.numChannels, defer=False)
        newdata.data *= 1 - float(mix)
        newdata.data[:dataA.endindex] += dataA.data[:] * float(mix)
    return newdata


def normalize(audio):
    """
    For compatibility with some legacy Wub Machine calls.
    """
    return audio.normalized()


def __genFade(fadeLength, dimensions=1):
    """
    Internal helper for fadeEdges()
    """
    fadeOut = numpy.linspace(1.0, 0.0, fadeLength) ** 2
    if dimensions == 2:
        return fadeOut[:, numpy.newaxis]
    return fadeOut


def fadeEdges(input_, fadeLength=50):
    """
    Fade in/out the ends of an audioData to prevent clicks/pops at edges.
    Optional fadeLength argument is the number of samples to fade in/out.
    """
    if isinstance(input_, AudioData):
        ad = input_.data
    elif isinstance(input_, numpy.ndarray):
        ad = input_
    else:
        raise Exception("Cannot fade edges of unknown datatype.")
    fadeOut = __genFade(min(fadeLength, len(ad)), ad.shape[1])
    ad[0:fadeLength] *= fadeOut[::-1]
    ad[-1 * fadeLength:] *= fadeOut
    return input_


def truncatemix(dataA, dataB, mix=0.5):
    """
    Mixes two `AudioData` objects. Assumes they have the same sample rate
    and number of channels.

    Mix takes a float 0-1 and determines the relative mix of two audios.
    i.e., mix=0.9 yields greater presence of dataA in the final mix.

    If dataB is longer than dataA, dataB is truncated to dataA's length.
    Note that if dataA is longer than dataB, dataA will not be truncated.
    """
    newdata = AudioData(ndarray=dataA.data, sampleRate=dataA.sampleRate,
                        numChannels=dataA.numChannels, verbose=False)
    newdata.data *= float(mix)
    if dataB.endindex > dataA.endindex:
        newdata.data[:] += dataB.data[:dataA.endindex] * (1 - float(mix))
    else:
        newdata.data[:dataB.endindex] += dataB.data[:] * (1 - float(mix))
    return newdata


def megamix(dataList):
    """
    Mix together any number of `AudioData` objects. Keep the shape of
    the first one in the list. Assume they all have the same sample rate
    and number of channels.
    """
    if not isinstance(dataList, list):
        raise TypeError('input must be a list of AudioData objects')
    newdata = AudioData(shape=dataList[0].data.shape, sampleRate=dataList[0].sampleRate,
                            numChannels=dataList[0].numChannels, defer=False)
    for adata in dataList:
        if not isinstance(adata, AudioData):
            raise TypeError('input must be a list of AudioData objects')
        if len(adata) > len(newdata):
            newseg = AudioData(ndarray=adata[:newdata.endindex].data,
                                numChannels=newdata.numChannels,
                                sampleRate=newdata.sampleRate, defer=False)
            newseg.endindex = newdata.endindex
        else:
            newseg = AudioData(ndarray=adata.data,
                                numChannels=newdata.numChannels,
                                sampleRate=newdata.sampleRate, defer=False)
            newseg.endindex = adata.endindex
        newdata.data[:newseg.endindex] += (newseg.data / float(len(dataList))).astype(newdata.data.dtype)
    newdata.endindex = len(newdata)
    return newdata


class LocalAudioFile(AudioData):
    """
    The basic do-everything class for remixing. Acts as an `AudioData`
    object, but with an added `analysis` selector which is an
    `AudioAnalysis` object. It conditionally uploads the file
    it was initialized with. If the file is already known to the
    Analyze API, then it does not bother uploading the file.
    """

    def __new__(cls, filename, verbose=True, defer=False, sampleRate=None, numChannels=None):
        # There must be a better way to avoid collisions between analysis files and .wav files
        if filename is not None and '.analysis.en' in filename:
            print >> sys.stderr, "Reading analysis from local file " + filename
            f = open(filename, 'rb')
            audiofile = cPickle.load(f)
            f.close()
            return audiofile
        else:
            # This just creates the object and goes straight on to initializing it
            return AudioData.__new__(cls, filename=filename, verbose=verbose, defer=defer, sampleRate=sampleRate)

    def __init__(self, filename, verbose=True, defer=False, sampleRate=None, numChannels=None):
        """
        :param filename: path to a local MP3 file
        """
        # We have to skip the initialization here as the local file is already a complete object
        if '.analysis.en' in filename:
            self.is_local = True
        else:
            AudioData.__init__(self, filename=filename, verbose=verbose, defer=defer,
                                sampleRate=sampleRate, numChannels=numChannels)
            track_md5 = hashlib.md5(file(self.filename, 'rb').read()).hexdigest()

            if verbose:
                print >> sys.stderr, "Computed MD5 of file is " + track_md5
            try:
                if verbose:
                    print >> sys.stderr, "Probing for existing analysis"
                tempanalysis = AudioAnalysis(track_md5)
            except Exception:
                if verbose:
                    print >> sys.stderr, "Analysis not found. Uploading..."
                tempanalysis = AudioAnalysis(filename)

            self.analysis = tempanalysis
            self.analysis.source = self
            self.is_local = False

    # Save out as a pickled file.
    def save(self):
        # If we loaded from a local file, there's no need to save
        if self.is_local is True:
            print >> sys.stderr, "Analysis was loaded from local file, not saving"
        else:
            input_path = os.path.split(self.filename)[0]
            input_file = os.path.split(self.filename)[1]
            path_to_wave = self.convertedfile
            wav_filename = input_file + '.wav'
            new_path = os.path.abspath(input_path) + os.path.sep
            wav_path = new_path + wav_filename
            try:
                shutil.copyfile(path_to_wave, wav_path)
            except shutil.Error:
                print >> sys.stderr, "Error when moving .wav file:  the same file may already exist in this folder"
                return
            self.convertedfile = wav_path
            analysis_filename = input_file + '.analysis.en'
            analysis_path = new_path + analysis_filename
            print >> sys.stderr, "Saving analysis to local file " + analysis_path
            f = open(analysis_path, 'wb')
            cPickle.dump(self, f)
            f.close()

    def toxml(self, context=None):
        raise NotImplementedError

    @property
    def duration(self):
        """
        Since we consider `AudioFile` to be an evolved version of
        `AudioData`, we return the measured duration from the analysis.
        """
        return self.analysis.duration

    def __setstate__(self, state):
        """
        Recreates circular reference after unpickling.
        """
        self.__dict__.update(state)
        self.analysis.source = weakref.proxy(self)


class LocalAnalysis(object):
    """
    Like `LocalAudioFile`, it conditionally uploads the file with which
    it was initialized. Unlike `LocalAudioFile`, it is not a subclass of
    `AudioData`, so contains no sample data.
    """
    def __init__(self, filename, verbose=True):
        """
        :param filename: path to a local MP3 file
        """

        track_md5 = hashlib.md5(file(filename, 'rb').read()).hexdigest()
        if verbose:
            print >> sys.stderr, "Computed MD5 of file is " + track_md5
        try:
            if verbose:
                print >> sys.stderr, "Probing for existing analysis"
            tempanalysis = AudioAnalysis(track_md5)
        except Exception:
            if verbose:
                print >> sys.stderr, "Analysis not found. Uploading..."
            tempanalysis = AudioAnalysis(filename)

        self.analysis = tempanalysis
        self.analysis.source = self

class AudioQuantum(AudioRenderable):
    """
    A unit of musical time, identified at minimum with a start time and
    a duration, both in seconds. It most often corresponds with a `section`,
    `bar`, `beat`, `tatum`, or (by inheritance) `segment` obtained from an Analyze
    API call.

    Additional properties include:

    end
        computed time offset for convenience: `start` + `duration`
    container
        a circular reference to the containing `AudioQuantumList`,
        created upon creation of the `AudioQuantumList` that covers
        the whole track
    """
    def __init__(self, start=0, duration=0, kind=None, confidence=None, source=None) :
        """
        Initializes an `AudioQuantum`.

        :param start: offset from the start of the track, in seconds
        :param duration: length of the `AudioQuantum`
        :param kind: string containing what kind of rhythm unit it came from
        :param confidence: float between zero and one
        """
        self.start = start
        self.duration = duration
        self.kind = kind
        self.confidence = confidence
        self._source = source

    def get_end(self):
        return self.start + self.duration

    end = property(get_end, doc="""
    A computed property: the sum of `start` and `duration`.
    """)

    def get_source(self):
        "Returns itself or its parent."
        if self._source:
            return self._source
        else:
            source = None
            try:
                source = self.container.source
            except AttributeError:
                source = None
            return source

    def set_source(self, value):
        if isinstance(value, AudioData):
            self._source = value
        else:
            raise TypeError("Source must be an instance of echonest.remix.audio.AudioData")

    source = property(get_source, set_source, doc="""
    The `AudioData` source for the AudioQuantum.
    """)

    def parent(self):
        """
        Returns the containing `AudioQuantum` in the rhythm hierarchy:
        a `tatum` returns a `beat`, a `beat` returns a `bar`, and a `bar` returns a
        `section`.
        Note that some AudioQuantums have no parent.  None will be returned in this case.
        """
        parent_dict = {'tatum': 'beats',
                       'beat':  'bars',
                       'bar':   'sections'}
        try:
            all_chunks = getattr(self.container.container, parent_dict[self.kind])
            for chunk in all_chunks:
                if self.start < chunk.end and self.end > chunk.start:
                    return chunk
            return None
        except LookupError:
            # Might not be in pars, might not have anything in parent.
            return None

    def children(self):
        """
        Returns an `AudioQuantumList` of the AudioQuanta that it contains,
        one step down the hierarchy. A `beat` returns `tatums`, a `bar` returns
        `beats`, and a `section` returns `bars`.
        """
        children_dict = {'beat':    'tatums',
                         'bar':     'beats',
                         'section': 'bars'}
        try:
            all_chunks = getattr(self.container.container, children_dict[self.kind])
            child_chunks = AudioQuantumList(kind=children_dict[self.kind])
            for chunk in all_chunks:
                if chunk.start >= self.start and chunk.end <= self.end: 
                    child_chunks.append(chunk)
                    continue
            return child_chunks
        except LookupError:
            return None

    @property
    def segments(self):
        """
        Returns any segments that overlap or are in the same timespan as the AudioQuantum.
        Note that this means that some segments will appear in more than one AudioQuantum.
        This function, thus, is NOT suited to rhythmic modifications.
        """
        # If this is a segment, return it in a list so we can iterate over it
        if self.kind == 'segment':
            return [self]

        all_segments = self.source.analysis.segments
        filtered_segments = AudioQuantumList(kind="segment")
        
        # Filter and then break once we've got the needed segments
        for segment in all_segments:
            if segment.start < self.end and segment.end > self.start:
                filtered_segments.append(segment)
            elif len(filtered_segments) != 0:
                break
        return filtered_segments

    def mean_pitches(self):
        """
        Returns a pitch vector that is the mean of the pitch vectors of any segments 
        that overlap this AudioQuantum.
        Note that this means that some segments will appear in more than one AudioQuantum.
        """
        temp_pitches = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        segments = self.segments
        for segment in segments:
            for index, pitch in enumerate(segment.pitches):
                temp_pitches[index] = temp_pitches[index] + pitch
            mean_pitches = [pitch / len(segments) for pitch in temp_pitches]
            return mean_pitches
    
    def mean_timbre(self):
        """
        Returns a timbre vector that is the mean of the pitch vectors of any segments 
        that overlap this AudioQuantum.
        Note that this means that some segments will appear in more than one AudioQuantum.
        """
        temp_timbre = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        segments = self.segments
        for segment in segments:
            for index, timbre in enumerate(segment.timbre):
                temp_timbre[index] = temp_timbre[index] + timbre
            mean_timbre = [timbre / len(segments) for timbre in temp_timbre]
            return mean_timbre


    def mean_loudness(self):
        """
        Returns the mean of the maximum loudness of any segments that overlap this AudioQuantum. 
        Note that this means that some segments will appear in more than one AudioQuantum.
        """
        loudness_average = 0
        segments = self.segments
        for segment in self.segments:
            loudness_average = loudness_average + segment.loudness_max
        return loudness_average / len(segments)

    def group(self):
        """
        Returns the `children`\() of the `AudioQuantum`\'s `parent`\().
        In other words: 'siblings'. If no parent is found, then return the
        `AudioQuantumList` for the whole track.
        """
        if self.parent():
            return self.parent().children()
        else:
            return self.container

    def prev(self, step=1):
        """
        Step backwards in the containing `AudioQuantumList`.
        Returns `self` if a boundary is reached.
        """
        group = self.container
        try:
            loc = group.index(self)
            new = max(loc - step, 0)
            return group[new]
        except Exception:
            return self

    def next(self, step=1):
        """
        Step forward in the containing `AudioQuantumList`.
        Returns `self` if a boundary is reached.
        """
        group = self.container
        try:
            loc = group.index(self)
            new = min(loc + step, len(group))
            return group[new]
        except Exception:
            return self

    def __str__(self):
        """
        Lists the `AudioQuantum`.kind with start and
        end times, in seconds, e.g.::

            "segment (20.31 - 20.42)"
        """
        return "%s (%.2f - %.2f)" % (self.kind, self.start, self.end)

    def __repr__(self):
        """
        A string representing a constructor, including kind, start time,
        duration, and (if it exists) confidence, e.g.::

            "AudioQuantum(kind='tatum', start=42.198267, duration=0.1523394)"
        """
        if self.confidence is not None:
            return "AudioQuantum(kind='%s', start=%f, duration=%f, confidence=%f)" % (self.kind, self.start, self.duration, self.confidence)
        else:
            return "AudioQuantum(kind='%s', start=%f, duration=%f)" % (self.kind, self.start, self.duration)

    def local_context(self):
        """
        Returns a tuple of (*index*, *length*) within rhythm siblings, where
        *index* is the (zero-indexed) position within its `group`\(), and
        *length* is the number of siblings within its `group`\().
        """
        group = self.group()
        count = len(group)
        try:
            loc  = group.index(self)
        except Exception:  # seem to be some uncontained beats
            loc = 0
        return (loc, count,)

    def absolute_context(self):
        """
        Returns a tuple of (*index*, *length*) within the containing
        `AudioQuantumList`, where *index* is the (zero-indexed) position within
        its container, and *length* is the number of siblings within the
        container.
        """
        group = self.container
        count = len(group)
        loc = group.index(self)
        return (loc, count,)

    def context_string(self):
        """
        Returns a one-indexed, human-readable version of context.
        For example::

            "bar 4 of 142, beat 3 of 4, tatum 2 of 3"
        """
        if self.parent() and self.kind != "bar":
            return "%s, %s %i of %i" % (self.parent().context_string(),
                                  self.kind, self.local_context()[0] + 1,
                                  self.local_context()[1])
        else:
            return "%s %i of %i" % (self.kind, self.absolute_context()[0] + 1,
                                  self.absolute_context()[1])

    def __getstate__(self):
        """
        Eliminates the circular reference for pickling.
        """
        dictclone = self.__dict__.copy()
        if 'container' in dictclone:
            del dictclone['container']
        return dictclone

    def toxml(self, context=None):
        attributedict = {'duration': str(self.duration),
                         'start': str(self.start)}
        try:
            if not(hasattr(context, 'source') and self.source == context.source):
                attributedict['source'] = self.source.analysis.identifier
        except Exception:
            pass
        xml = etree.Element(self.kind, attrib=attributedict)
        if context:
            return xml
        else:
            return minidom.parseString(xml).toprettyxml()

    def render(self, start=0.0, to_audio=None, with_source=None):
        if not to_audio:
            source = self.resolve_source(with_source)
            return source[self]
        if with_source != self.source:
            return
        to_audio.add_at(start, with_source[self])
        return


class AudioSegment(AudioQuantum):
    """
    Subclass of `AudioQuantum` for the data-rich segments returned by
    the Analyze API.
    """
    def __init__(self, start=0., duration=0., pitches = None, timbre = None,
                 loudness_begin=0., loudness_max=0., time_loudness_max=0.,
                 loudness_end=None, kind='segment', source=None):
        """
        Initializes an `AudioSegment`.

        :param start: offset from start of the track, in seconds
        :param duration: duration of the `AudioSegment`, in seconds
        :param pitches: a twelve-element list with relative loudnesses of each
                pitch class, from C (pitches[0]) to B (pitches[11])
        :param timbre: a twelve-element list with the loudness of each of a
                principal component of time and/or frequency profile
        :param kind: string identifying the kind of AudioQuantum: "segment"
        :param loudness_begin: loudness in dB at the start of the segment
        :param loudness_max: loudness in dB at the loudest moment of the
                segment
        :param time_loudness_max: time (in sec from start of segment) of
                loudest moment
        :param loudness_end: loudness at end of segment (if it is given)
        """
        self.start = start
        self.duration = duration
        self.pitches = pitches or []
        self.timbre = timbre or []
        self.loudness_begin = loudness_begin
        self.loudness_max = loudness_max
        self.time_loudness_max = time_loudness_max
        if loudness_end:
            self.loudness_end = loudness_end
        self.kind = kind
        self.confidence = None
        self._source = source
        

    @property
    def tatum(self):
        """
        Returns the tatum that overlaps most with the segment
        Note that some segments have NO overlapping tatums.
        If this is the case, None will be returned.
        """
        all_tatums = self.source.analysis.tatums
        filtered_tatums = []
        for tatum in all_tatums:
            # If the segment contains the tatum
            if self.start < tatum.start and self.end > tatum.end:
                filtered_tatums.append((tatum, tatum.duration))
            # If the tatum contains the segment
            elif tatum.start < self.start and tatum.end > self.end:
                filtered_tatums.append((tatum, self.duration))
            # If the tatum overlaps and starts before the segment
            elif tatum.start < self.start and tatum.end > self.start:
                filtered_tatums.append((tatum, tatum.end - self.start))
            # If the tatum overlaps and starts after the segment
            elif tatum.start < self.end and tatum.end > self.end:
                filtered_tatums.append((tatum, self.end - tatum.start))
            # If we're past the segment, stop
            elif tatum.start > self.end:
                break

        # Sort and get the tatum with the maximum overlap
        sorted_tatums = sorted(filtered_tatums, key=lambda tatum: tatum[1], reverse=True)
        if not sorted_tatums:
            return None
        else:
            return sorted_tatums[0][0]

    @property
    def beat(self):
        return self.tatum.parent


class ModifiedRenderable(AudioRenderable):
    """Class that contains any AudioRenderable, but overrides the
    render() method with nested effects, called sequentially on the
    result of the preceeding effect."""
    def __init__(self, original, effects=[]):
        if isinstance(original, ModifiedRenderable):
            self._original = original._original
            self._effects = original._effects + effects
        else:
            self._original = original
            self._effects = effects

    @property
    def duration(self):
        dur = self._original.duration
        for effect in self._effects:
            if hasattr(effect, 'duration'):
                dur = effect.duration(dur)
        return dur

    @property
    def source(self):
        return self._original.source

    @property
    def sources(self):
        return self._original.sources

    def render(self, start=0.0, to_audio=None, with_source=None):
        if not to_audio:
            base = self._original.render(with_source=with_source)
            copy = AudioData32(ndarray=base.data, sampleRate=base.sampleRate, numChannels=base.numChannels, defer=False)
            for effect in self._effects:
                copy = effect.modify(copy)
            return copy
        if with_source != self.source:
            return
        base = self._original.render(with_source=with_source)
        copy = AudioData32(ndarray=base.data, shape=base.data.shape, sampleRate=base.sampleRate, numChannels=base.numChannels, defer=False)
        for effect in self._effects:
            copy = effect.modify(copy)
        to_audio.add_at(start, copy)
        return

    def toxml(self, context=None):
        outerattributedict = {'duration': str(self.duration)}
        node = etree.Element("modified_audioquantum", attrib=outerattributedict)

        innerattributedict = {'duration': str(self._original.duration),
                              'start': str(self._original.start)}
        try:
            if not(hasattr(context, 'source') and self.source == context.source):
                innerattributedict['source'] = self.source.analysis.identifier
        except Exception:
            pass
        orignode = etree.Element(self._original.kind, attrib=innerattributedict)
        node.append(orignode)
        fx = etree.Element('effects')
        for effect in self._effects:
            fxdict = {'id': '%s.%s' % (effect.__module__, effect.__class__.__name__)}
            fxdict.update(effect.__dict__)
            fx.append(etree.Element('effect', attrib=fxdict))
        node.append(fx)
        if context:
            return node
        else:
            return minidom.parseString(node).toprettyxml()


class AudioEffect(object):
    def __call__(self, aq):
        return ModifiedRenderable(aq, [self])


class LevelDB(AudioEffect):
    def __init__(self, change):
        self.change = change

    def modify(self, adata):
        adata.data *= pow(10., self.change / 20.)
        return adata


class AmplitudeFactor(AudioEffect):
    def __init__(self, change):
        self.change = change

    def modify(self, adata):
        adata.data *= self.change
        return adata


class TimeTruncateFactor(AudioEffect):
    def __init__(self, factor):
        self.factor = factor

    def duration(self, old_duration):
        return old_duration * self.factor

    def modify(self, adata):
        endindex = int(self.factor * len(adata))
        if self.factor > 1:
            adata.pad_with_zeros(endindex - len(adata))
        adata.endindex = endindex
        return adata[:endindex]


class TimeTruncateLength(AudioEffect):
    def __init__(self, new_duration):
        self.new_duration = new_duration

    def duration(self, old_duration):
        return self.new_duration

    def modify(self, adata):
        endindex = int(self.new_duration * adata.sampleRate)
        if self.new_duration > adata.duration:
            adata.pad_with_zeros(endindex - len(adata))
        adata.endindex = endindex
        return adata[:endindex]


class AudioQuantumList(list, AudioRenderable):
    """
    A container that enables content-based selection and filtering.
    A `List` that contains `AudioQuantum` objects, with additional methods
    for manipulating them.

    When an `AudioQuantumList` is created for a track via a call to the
    Analyze API, `attach`\() is called so that its container is set to the
    containing `AudioAnalysis`, and the container of each of the
    `AudioQuantum` list members is set to itself.

    Additional accessors now include AudioQuantum elements such as
    `start`, `duration`, and `confidence`, which each return a List of the
    corresponding properties in the contained AudioQuanta. A special name
    is `kinds`, which returns a List of the `kind` of each `AudioQuantum`.
    If `AudioQuantumList.kind` is "`segment`", then `pitches`, `timbre`,
    `loudness_begin`, `loudness_max`, `time_loudness_max`, and `loudness_end`
    are available.
    """
    def __init__(self, initial = None, kind = None, container = None, source = None):
        """
        Initializes an `AudioQuantumList`. All parameters are optional.

        :param initial: a `List` type with the initial contents
        :param kind: a label for the kind of `AudioQuantum` contained
            within
        :param container: a reference to the containing `AudioAnalysis`
        :param source: a reference to the `AudioData` with the corresponding samples
            and time base for the contained AudioQuanta
        """
        list.__init__(self)
        self.kind = None
        self._source = None
        if isinstance(initial, AudioQuantumList):
            self.kind = initial.kind
            self.container = initial.container
            self._source = initial.source
        if kind:
            self.kind = kind
        if container:
            self.container = container
        if source:
            self._source = source
        if initial:
            self.extend(initial)

    def get_many(attribute):
        def fun(self):
            """
            Returns a list of %s for each `AudioQuantum`.
            """ % attribute
            return [getattr(x, attribute) for x in list.__iter__(self)]
        return fun

    def get_many_if_segment(attribute):
        def fun(self):
            """
            Returns a list of %s for each `Segment`.
            """ % attribute
            if self.kind == 'segment':
                return [getattr(x, attribute) for x in list.__iter__(self)]
            else:
                raise AttributeError("<%s> only accessible for segments" % (attribute,))
        return fun

    def get_duration(self):
        return sum(self.durations)
        #return sum([x.duration for x in self])

    def get_source(self):
        "Returns its own or its parent's source."
        if len(self) < 1:
            return
        if self._source:
            return self._source
        else:
            try:
                source = self.container.source
            except AttributeError:
                source = self[0].source
            return source

    def set_source(self, value):
        "Checks input to see if it is an `AudioData`."
        if isinstance(value, AudioData):
            self._source = value
        else:
            raise TypeError("Source must be an instance of echonest.remix.audio.AudioData")

    durations  = property(get_many('duration'))
    kinds      = property(get_many('kind'))
    start      = property(get_many('start'))
    confidence = property(get_many('confidence'))

    pitches           = property(get_many_if_segment('pitches'))
    timbre            = property(get_many_if_segment('timbre'))
    loudness_begin    = property(get_many_if_segment('loudness_begin'))
    loudness_max      = property(get_many_if_segment('loudness_max'))
    time_loudness_max = property(get_many_if_segment('time_loudness_max'))
    loudness_end      = property(get_many_if_segment('loudness_end'))

    source = property(get_source, set_source, doc="""
    The `AudioData` source for the `AudioQuantumList`.
    """)

    duration = property(get_duration, doc="""
    Total duration of the `AudioQuantumList`.
    """)

    def sources(self):
        ss = set()
        for aq in list.__iter__(self):
            ss.update(aq.sources())
        return ss

    def attach(self, container):
        """
        Create circular references to the containing `AudioAnalysis` and for the
        contained `AudioQuantum` objects.
        """
        self.container = container
        for i in self:
            i.container = self

    def __getstate__(self):
        """
        Eliminates the circular reference for pickling.
        """
        dictclone = self.__dict__.copy()
        if 'container' in dictclone:
            del dictclone['container']
        return dictclone

    def toxml(self, context=None):
        xml = etree.Element("sequence")
        xml.attrib['duration'] = str(self.duration)
        if not context:
            xml.attrib['source'] = self.source.analysis.identifier
            for s in self.sources():
                xml.append(s.toxml())
        elif self._source:
            try:
                if self.source != context.source:
                    xml.attrib['source'] = self.source.analysis.identifier
            except Exception:
                pass
        for x in list.__iter__(self):
            xml.append(x.toxml(context=self))
        if context:
            return xml
        else:
            return minidom.parseString(xml).toprettyxml()

    def render(self, start=0.0, to_audio=None, with_source=None):
        if len(self) < 1:
            return
        if not to_audio:
            dur = 0
            tempsource = self.source or list.__getitem__(self, 0).source
            for aq in list.__iter__(self):
                dur += int(aq.duration * tempsource.sampleRate)
            to_audio = self.init_audio_data(tempsource, dur)
        if not hasattr(with_source, 'data'):
            for tsource in self.sources():
                this_start = start
                for aq in list.__iter__(self):
                    aq.render(start=this_start, to_audio=to_audio, with_source=tsource)
                    this_start += aq.duration
                if tsource.defer:
                    tsource.unload()
            return to_audio
        else:
            if with_source not in self.sources():
                return
            for aq in list.__iter__(self):
                aq.render(start=start, to_audio=to_audio, with_source=with_source)
                start += aq.duration


class Simultaneous(AudioQuantumList):
    """
    Stacks all contained AudioQuanta atop one another, adding their respective
    samples. The rhythmic length of the segment is the duration of the first
    `AudioQuantum`, but there can be significant overlap caused by the longest
    segment.

    Sample usage::
        Simultaneous(a.analysis.bars).encode("my.mp3")
    """
    def __init__(self, *args, **kwargs):
        AudioQuantumList.__init__(self, *args, **kwargs)

    def get_duration(self):
        try:
            return self[0].duration
        except Exception:
            return 0.

    duration = property(get_duration, doc="""
        Rhythmic duration of the `Simultaneous` AudioQuanta: the
        same as the duration of the first in the list.
        """)

    def toxml(self, context=None):
        xml = etree.Element("parallel")
        xml.attrib['duration'] = str(self.duration)
        if not context:
            xml.attrib['source'] = self.source.analysis.identifier
        elif self.source != context.source:
            try:
                xml.attrib['source'] = self.source.analysis.identifier
            except Exception:
                pass
        for x in list.__iter__(self):
            xml.append(x.toxml(context=self))
        if context:
            return xml
        else:
            return minidom.parseString(xml).toprettyxml()

    def render(self, start=0.0, to_audio=None, with_source=None):
        if not to_audio:
            tempsource = self.source or list.__getitem__(self, 0).source
            dur = int(max(self.durations) * tempsource.sampleRate)
            to_audio = self.init_audio_data(tempsource, dur)
        if not hasattr(with_source, 'data'):
            for source in self.sources():
                for aq in list.__iter__(self):
                    aq.render(start=start, to_audio=to_audio, with_source=source)
                if source.defer:
                    source.unload()
            return to_audio
        else:
            if with_source not in self.sources():
                return
            else:
                for aq in list.__iter__(self):
                    aq.render(start=start, to_audio=to_audio, with_source=with_source)


def _dataParser(tag, nodes):
    out = AudioQuantumList(kind=tag)
    for n in nodes:
        out.append(AudioQuantum(start=n['start'], kind=tag, confidence=n['confidence']))
    if len(out) > 1:
        for i in range(len(out) - 1) :
            out[i].duration = out[i + 1].start - out[i].start
        out[-1].duration = out[-2].duration
    return out


def _attributeParser(tag, nodes):
    out = AudioQuantumList(kind=tag)
    for n in nodes :
        out.append(AudioQuantum(n['start'], n['duration'], tag))
    return out


def _segmentsParser(nodes):
    out = AudioQuantumList(kind='segment')
    for n in nodes:
        out.append(AudioSegment(start=n['start'], duration=n['duration'],
                                pitches=n['pitches'], timbre=n['timbre'],
                                loudness_begin=n['loudness_start'],
                                loudness_max=n['loudness_max'],
                                time_loudness_max=n['loudness_max_time'],
                                loudness_end=n.get('loudness_end')))
    return out

class FileTypeError(Exception):
    def __init__(self, filename, message):
        self.filename = filename
        self.message = message

    def __str__(self):
        return self.message + ': ' + self.filename


class EchoNestRemixError(Exception):
    """
    Error raised by the Remix API.
    """
    pass