jsawruk · RicherMans · Nov 17, 2015 · Nov 17, 2015 · Nov 17, 2015 · Nov 18, 2015
diff --git a/README.md b/README.md
@@ -18,6 +18,9 @@ PyMIR is a Python library for common tasks in Music Information Retrieval (MIR)
     * RMS
     * Spectrum (FFT)
     * Zero-crossing rate
+    * Linear Predictive Components (LPC)
+    * Linear Predictive Cepstral Components (LPCC) from LPC
+    * Line Spectrum Pairs (LSP) / Line Spectrum Frequencies (LSF) from LPC
 * Spectral feature extraction (Spectrum class)
     * Spectral Centroid
     * Spectral Flatness
@@ -34,6 +37,7 @@ PyMIR is a Python library for common tasks in Music Information Retrieval (MIR)
     * Naive pitch estimation
     * Onset detectors (energy, flux)
     * Spectral Flux
+    * Delta computation of features (Useful for speech processing)
 
 ## Examples
 
@@ -72,6 +76,9 @@ The standard workflow for working with PyMIR is:
     fixedFrames[0].plot()                       # Plot using matplotlib
     fixedFrames[0].rms() 						# Root-mean-squared amplitude
     fixedFrames[0].zcr() 						# Zero-crossing raate
+    fixedFrames[0].lpc()                        # LPC, with order = len(fixedFrames[0])-1
+    fixedFrames[0].lpcc()                       # LPCC, with order = len(fixedFrames[0])-1
+    fixedFrames[0].lsp()                        # LSP/LSF, with order = len(fixedFrames[0])-1
 
 ### Extracting spectral features
     # Compute the spectra of each frame
@@ -96,6 +103,12 @@ The standard workflow for working with PyMIR is:
 	# Compute the spectral flux
 	flux = SpectralFlux.spectralFlux(spectra, rectify = True)
 
+    from pymir.Deltas import getDeltas
+    # Computing delta and delta-deltas
+    deltas = getDeltas([1,2,3,4,5])
+    print deltas # array([ 1.  ,  2.  ,  3.  ,  4.  ,  5.  ,  0.5 ,  0.8 ,  1.  ,  0.8 ,
+        0.5 ,  0.13,  0.11,  0.  , -0.11, -0.13])
+
 ### Audio playback
 
 Playback is provided on all AudioFile and Frame objects. Internal representation is 32-bit floating point.
@@ -107,7 +120,7 @@ Playback is provided on all AudioFile and Frame objects. Internal representation
 
 Naive chord estimation using a dictionary of the 24 major and minor triads only, represented as
 normalized chroma vectors. Similarity is measured using the cosine similarity function. The closest
-match is returned (as a string). 
+match is returned (as a string).
 
 This is called a naive approach because it does not consider preceding chords, which could improve
 chord estimation accuracy.

diff --git a/pymir/Deltas.py b/pymir/Deltas.py
@@ -0,0 +1,44 @@
+import numpy as np
+
+
+def getDeltas(seq, derivative=2, winsize=2):
+    # First stack the static features
+    ret = seq[:]
+    for i in xrange(derivative):
+        seq = _getSingleDeltas(seq)
+        ret.extend(seq)
+    return ret
+
+
+def _getSingleDeltas(feature, winsize=2):
+    '''
+    Calculates a single pass deltas for the given feature
+    returns the calculated feature stacked upon the given feature
+    '''
+    ret = []
+    # Calculates the denominator: 2* \sum_n^N n*n
+    denom = 2. * sum(x**2 for x in xrange(1, winsize + 1))
+    # iterate over all frames
+    for frameindex in xrange(len(feature)):
+        # We calculate the difference in between two frames
+        # In the border case of having the current frame is < winsize, we use the
+        # Current frame as the "replacement" effectively exting the array left and right by
+        # the frames at the positions +- winsize
+        fwd = bwd = feature[frameindex]
+        innersum = 0
+        # Winsize will range between 1 and winsize+1, since we want to have the
+        # adjacent frames
+        for k in xrange(1, winsize + 1):
+            # Check if our features are in range, if not we use the default
+            # setting
+            # Since one of the features will certainly be not out of range (
+            # except having
+            # a zero or one length frame length), we don't get any zeros in the
+            # result
+            if frameindex + k < len(feature):
+                fwd = feature[frameindex + k]
+            if frameindex - k >= 0:
+                bwd = feature[frameindex - k]
+            innersum += k * (fwd - bwd)
+        ret.append(innersum / denom)
+    return ret
diff --git a/pymir/Frame.py b/pymir/Frame.py
@@ -11,32 +11,32 @@
 from numpy import *
 from numpy.lib import stride_tricks
 
-import scipy
 
 import matplotlib.pyplot as plt
 
 import pymir
-from pymir import Spectrum, Transforms
+from pymir import Spectrum, Transforms, LinearPredictiveAnalysis
 import pyaudio
 
+
 class Frame(numpy.ndarray):
-    
+
     def __new__(subtype, shape, dtype=float, buffer=None, offset=0,
-          strides=None, order=None):
+                strides=None, order=None):
         # Create the ndarray instance of our type, given the usual
         # ndarray input arguments.  This will call the standard
         # ndarray constructor, but return an object of our type.
         # It also triggers a call to InfoArray.__array_finalize__
         obj = numpy.ndarray.__new__(subtype, shape, dtype, buffer, offset, strides,
-                         order)
-        
+                                    order)
+
         obj.sampleRate = 0
         obj.channels = 1
         obj.format = pyaudio.paFloat32
-        
+
         # Finally, we must return the newly created object:
         return obj
-    
+
     def __array_finalize__(self, obj):
         # ``self`` is a new object resulting from
         # ndarray.__new__(InfoArray, ...), therefore it only has
@@ -49,7 +49,8 @@ def __array_finalize__(self, obj):
         #    (we're in the middle of the InfoArray.__new__
         #    constructor, and self.info will be set when we return to
         #    InfoArray.__new__)
-        if obj is None: return
+        if obj is None:
+            return
         # From view casting - e.g arr.view(InfoArray):
         #    obj is arr
         #    (type(obj) can be InfoArray)
@@ -61,62 +62,134 @@ def __array_finalize__(self, obj):
         # method sees all creation of default objects - with the
         # InfoArray.__new__ constructor, but also with
         # arr.view(InfoArray).
-        
+
         self.sampleRate = getattr(obj, 'sampleRate', None)
         self.channels = getattr(obj, 'channels', None)
         self.format = getattr(obj, 'format', None)
-        
+
         # We do not need to return anything
-        
+
     #####################
     # Frame methods
     #####################
-    
+
     def cqt(self):
         """
         Compute the Constant Q Transform (CQT)
         """
         return Transforms.cqt(self)
-    
+
     def dct(self):
         """
         Compute the Discrete Cosine Transform (DCT)
         """
         return Transforms.dct(self)
-    
-    def energy(self, windowSize = 256):
+
+    def energy(self, windowSize=256):
         """
         Compute the energy of this frame
         """
         N = len(self)
 
         window = numpy.hamming(windowSize)
         window.shape = (windowSize, 1)
-
-        n = N - windowSize #number of windowed samples.
-
-        # Create a view of signal who's shape is (n, windowSize). Use stride_tricks such that each stide jumps only one item.
-        p = numpy.power(self,2)
-        s = stride_tricks.as_strided(p,shape=(n, windowSize), strides=(self.itemsize, self.itemsize))
+
+        n = N - windowSize  # number of windowed samples.
+
+        # Create a view of signal who's shape is (n, windowSize). Use
+        # stride_tricks such that each stide jumps only one item.
+        p = numpy.power(self, 2)
+        s = stride_tricks.as_strided(
+            p, shape=(n, windowSize), strides=(self.itemsize, self.itemsize))
         e = numpy.dot(s, window) / windowSize
         e.shape = (e.shape[0], )
         return e
-
-    def frames(self, frameSize, windowFunction = None):
+
+    def lpcc(self, lpcorder=None, cepsorder=None):
+        '''
+        Function: lpcc
+        Summary: Computes the linear predictive cepstral compoents. Note: Returned values are in the frequency domain. LPCC is computed through LPC.
+        Examples: audiofile = AudioFile.open('file.wav',16000)
+              frames = audiofile.frames(512,np.hamming)
+              for frame in frames:
+                frame.lpcc()
+        Attributes:
+            @param (self):
+            @param (lpcorder) default=None: The input order to compute the LPC coefficents.
+            @param (cepsorder) default=None: The output order to compute the LPCC coefficents.
+        Returns: A list of LPCC components with size order +1 or len(seq), depending on if cepsorder is None
+        '''
+        coefs, err_term = LinearPredictiveAnalysis.lpc(self, lpcorder)
+        return LinearPredictiveAnalysis.lpcc(coefs, err_term, cepsorder)
+
+    def lpc(self, order=None):
+        '''
+        Function: lpc
+        Summary: Computes for each given sequence the LPC ( Linear predictive components ) sequence.
+        Examples: audiofile = AudioFile.open('file.wav',16000)
+                  frames = audiofile.frames(512,np.hamming)
+                  for frame in frames:
+                    frame.lpc()
+        Attributes:
+            @param (seq):A sequence of time-domain frames, usually obtained by .frames()
+            @param (order) default=None: Size of the returning cepstral components. If None is given,
+                                         we use len(seq) as default, otherwise order +1
+        Returns: A list of lpc coefficents
+        '''
+        # Only return the coefficients not the error term (in [1])
+        return LinearPredictiveAnalysis.lpc(self, order)[0]
+
+    def lsp(self,order=None,rectify=True):
+        '''
+        Function: lsp
+        Summary: Computes Line spectrum pairs ( also called  line spectral frequencies [lsf]). Does not use any fancy algorithm except np.roots to solve
+    for the zeros of the given polynom A(z) = 0.5(P(z) + Q(z))
+        Examples: audiofile = AudioFile.open('file.wav',16000)
+                  frames = audiofile.frames(512,np.hamming)
+                  for frame in frames:
+                    frame.lsp()
+        Attributes:
+            @param (self):
+            @param (order) default=None:Order of lpc coefficients. Return array has size order + 1. Default is the length of the current frame
+            @param (rectify) default=True: Specifies if the return values are only positive. If rectify is False it also returns the (symmetric) negative values
+        Returns: A list of size order/ len(frames) (if nothing is specifed), which represents the line spectrum pairs.
+        '''
+        coefs, _ = LinearPredictiveAnalysis.lpc(self, order)
+        return LinearPredictiveAnalysis.lsp(coefs,rectify)
+
+    def autocorr(self, order=None):
+        '''
+        Function: autocorr
+        Summary: Calculates the autocorrelation with the given order
+        Examples:   f = AudioFile.open('audiofile.wav',16000)
+                    for frame in f.frames(512,numpy.hamming):
+                        frame.autocorr()
+        Attributes:
+            @param (self):
+            @param (order) default=None: The order ( order +1 is length of the returned array) of the auto correlation.
+                                         If order is None we use len(frame)-1 as default
+        Returns:Array of length order +1 with the autocorrelation coefficients
+        '''
+        if order is None:
+            order = len(self) - 1
+        return [sum(self[n] * self[n + tau] for n in xrange(len(self) - tau))
+                for tau in xrange(order + 1)]
+
+    def frames(self, frameSize, windowFunction=None):
         """
         Decompose this frame into smaller frames of size frameSize
         """
         frames = []
         start = 0
         end = frameSize
         while start < len(self):
-            
+
             if windowFunction == None:
                 frames.append(self[start:end])
             else:
                 window = windowFunction(frameSize)
                 window.shape = (frameSize, 1)
-                window =  numpy.squeeze(window)
+                window = numpy.squeeze(window)
                 frame = self[start:end]
                 if len(frame) < len(window):
                     # Zero pad
@@ -128,7 +201,7 @@ def frames(self, frameSize, windowFunction = None):
 
                     diff = len(window) - len(frame)
                     frame = numpy.append(frame, [0] * diff)
-                    
+
                     if frameType == "AudioFile":
                         frame = frame.view(pymir.AudioFile)
                     else:
@@ -138,22 +211,22 @@ def frames(self, frameSize, windowFunction = None):
                     frame.sampleRate = sampleRate
                     frame.channels = channels
                     frame.format = format
-       
+
                 windowedFrame = frame * window
                 frames.append(windowedFrame)
 
             start = start + frameSize
             end = end + frameSize
-            
+
         return frames
-    
+
     def framesFromOnsets(self, onsets):
         """
         Decompose into frames based on onset start time-series
         """
         frames = []
         for i in range(0, len(onsets) - 1):
-            frames.append(self[onsets[i] : onsets[i + 1]])
+            frames.append(self[onsets[i]: onsets[i + 1]])
 
         return frames
 
@@ -164,7 +237,8 @@ def play(self):
         """
         # Create the stream
         p = pyaudio.PyAudio()
-        stream = p.open(format = self.format, channels = self.channels, rate = self.sampleRate, output = True)
+        stream = p.open(
+            format=self.format, channels=self.channels, rate=self.sampleRate, output=True)
 
         # Write the audio data to the stream
         audioData = self.tostring()
@@ -191,11 +265,11 @@ def rms(self):
         sum = 0
         for i in range(0, len(self)):
             sum = sum + self[i] ** 2
-            
+
         sum = sum / (1.0 * len(self))
-        
+
         return math.sqrt(sum)
-    
+
     # Spectrum
     def spectrum(self):
         """
@@ -212,5 +286,5 @@ def zcr(self):
         for i in range(1, len(self)):
             if (self[i - 1] * self[i]) < 0:
                 zcr = zcr + 1
-                
-        return zcr / (1.0 * len(self))
+
+        return zcr / (1.0 * len(self))