Skip to content
This repository has been archived by the owner on Feb 14, 2019. It is now read-only.

Added LPC, LPCC and LSF/LSP plus some delta computation for common features #8

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ PyMIR is a Python library for common tasks in Music Information Retrieval (MIR)
* RMS
* Spectrum (FFT)
* Zero-crossing rate
* Linear Predictive Components (LPC)
* Linear Predictive Cepstral Components (LPCC) from LPC
* Line Spectrum Pairs (LSP) / Line Spectrum Frequencies (LSF) from LPC
* Spectral feature extraction (Spectrum class)
* Spectral Centroid
* Spectral Flatness
Expand All @@ -34,6 +37,7 @@ PyMIR is a Python library for common tasks in Music Information Retrieval (MIR)
* Naive pitch estimation
* Onset detectors (energy, flux)
* Spectral Flux
* Delta computation of features (Useful for speech processing)

## Examples

Expand Down Expand Up @@ -72,6 +76,9 @@ The standard workflow for working with PyMIR is:
fixedFrames[0].plot() # Plot using matplotlib
fixedFrames[0].rms() # Root-mean-squared amplitude
fixedFrames[0].zcr() # Zero-crossing raate
fixedFrames[0].lpc() # LPC, with order = len(fixedFrames[0])-1
fixedFrames[0].lpcc() # LPCC, with order = len(fixedFrames[0])-1
fixedFrames[0].lsp() # LSP/LSF, with order = len(fixedFrames[0])-1

### Extracting spectral features
# Compute the spectra of each frame
Expand All @@ -96,6 +103,12 @@ The standard workflow for working with PyMIR is:
# Compute the spectral flux
flux = SpectralFlux.spectralFlux(spectra, rectify = True)

from pymir.Deltas import getDeltas
# Computing delta and delta-deltas
deltas = getDeltas([1,2,3,4,5])
print deltas # array([ 1. , 2. , 3. , 4. , 5. , 0.5 , 0.8 , 1. , 0.8 ,
0.5 , 0.13, 0.11, 0. , -0.11, -0.13])

### Audio playback

Playback is provided on all AudioFile and Frame objects. Internal representation is 32-bit floating point.
Expand All @@ -107,7 +120,7 @@ Playback is provided on all AudioFile and Frame objects. Internal representation

Naive chord estimation using a dictionary of the 24 major and minor triads only, represented as
normalized chroma vectors. Similarity is measured using the cosine similarity function. The closest
match is returned (as a string).
match is returned (as a string).

This is called a naive approach because it does not consider preceding chords, which could improve
chord estimation accuracy.
Expand Down
44 changes: 44 additions & 0 deletions pymir/Deltas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import numpy as np


def getDeltas(seq, derivative=2, winsize=2):
# First stack the static features
ret = seq[:]
for i in xrange(derivative):
seq = _getSingleDeltas(seq)
ret.extend(seq)
return ret


def _getSingleDeltas(feature, winsize=2):
'''
Calculates a single pass deltas for the given feature
returns the calculated feature stacked upon the given feature
'''
ret = []
# Calculates the denominator: 2* \sum_n^N n*n
denom = 2. * sum(x**2 for x in xrange(1, winsize + 1))
# iterate over all frames
for frameindex in xrange(len(feature)):
# We calculate the difference in between two frames
# In the border case of having the current frame is < winsize, we use the
# Current frame as the "replacement" effectively exting the array left and right by
# the frames at the positions +- winsize
fwd = bwd = feature[frameindex]
innersum = 0
# Winsize will range between 1 and winsize+1, since we want to have the
# adjacent frames
for k in xrange(1, winsize + 1):
# Check if our features are in range, if not we use the default
# setting
# Since one of the features will certainly be not out of range (
# except having
# a zero or one length frame length), we don't get any zeros in the
# result
if frameindex + k < len(feature):
fwd = feature[frameindex + k]
if frameindex - k >= 0:
bwd = feature[frameindex - k]
innersum += k * (fwd - bwd)
ret.append(innersum / denom)
return ret
148 changes: 111 additions & 37 deletions pymir/Frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,32 +11,32 @@
from numpy import *
from numpy.lib import stride_tricks

import scipy

import matplotlib.pyplot as plt

import pymir
from pymir import Spectrum, Transforms
from pymir import Spectrum, Transforms, LinearPredictiveAnalysis
import pyaudio


class Frame(numpy.ndarray):

def __new__(subtype, shape, dtype=float, buffer=None, offset=0,
strides=None, order=None):
strides=None, order=None):
# Create the ndarray instance of our type, given the usual
# ndarray input arguments. This will call the standard
# ndarray constructor, but return an object of our type.
# It also triggers a call to InfoArray.__array_finalize__
obj = numpy.ndarray.__new__(subtype, shape, dtype, buffer, offset, strides,
order)
order)

obj.sampleRate = 0
obj.channels = 1
obj.format = pyaudio.paFloat32

# Finally, we must return the newly created object:
return obj

def __array_finalize__(self, obj):
# ``self`` is a new object resulting from
# ndarray.__new__(InfoArray, ...), therefore it only has
Expand All @@ -49,7 +49,8 @@ def __array_finalize__(self, obj):
# (we're in the middle of the InfoArray.__new__
# constructor, and self.info will be set when we return to
# InfoArray.__new__)
if obj is None: return
if obj is None:
return
# From view casting - e.g arr.view(InfoArray):
# obj is arr
# (type(obj) can be InfoArray)
Expand All @@ -61,62 +62,134 @@ def __array_finalize__(self, obj):
# method sees all creation of default objects - with the
# InfoArray.__new__ constructor, but also with
# arr.view(InfoArray).

self.sampleRate = getattr(obj, 'sampleRate', None)
self.channels = getattr(obj, 'channels', None)
self.format = getattr(obj, 'format', None)

# We do not need to return anything

#####################
# Frame methods
#####################

def cqt(self):
"""
Compute the Constant Q Transform (CQT)
"""
return Transforms.cqt(self)

def dct(self):
"""
Compute the Discrete Cosine Transform (DCT)
"""
return Transforms.dct(self)
def energy(self, windowSize = 256):

def energy(self, windowSize=256):
"""
Compute the energy of this frame
"""
N = len(self)

window = numpy.hamming(windowSize)
window.shape = (windowSize, 1)

n = N - windowSize #number of windowed samples.

# Create a view of signal who's shape is (n, windowSize). Use stride_tricks such that each stide jumps only one item.
p = numpy.power(self,2)
s = stride_tricks.as_strided(p,shape=(n, windowSize), strides=(self.itemsize, self.itemsize))

n = N - windowSize # number of windowed samples.

# Create a view of signal who's shape is (n, windowSize). Use
# stride_tricks such that each stide jumps only one item.
p = numpy.power(self, 2)
s = stride_tricks.as_strided(
p, shape=(n, windowSize), strides=(self.itemsize, self.itemsize))
e = numpy.dot(s, window) / windowSize
e.shape = (e.shape[0], )
return e

def frames(self, frameSize, windowFunction = None):

def lpcc(self, lpcorder=None, cepsorder=None):
'''
Function: lpcc
Summary: Computes the linear predictive cepstral compoents. Note: Returned values are in the frequency domain. LPCC is computed through LPC.
Examples: audiofile = AudioFile.open('file.wav',16000)
frames = audiofile.frames(512,np.hamming)
for frame in frames:
frame.lpcc()
Attributes:
@param (self):
@param (lpcorder) default=None: The input order to compute the LPC coefficents.
@param (cepsorder) default=None: The output order to compute the LPCC coefficents.
Returns: A list of LPCC components with size order +1 or len(seq), depending on if cepsorder is None
'''
coefs, err_term = LinearPredictiveAnalysis.lpc(self, lpcorder)
return LinearPredictiveAnalysis.lpcc(coefs, err_term, cepsorder)

def lpc(self, order=None):
'''
Function: lpc
Summary: Computes for each given sequence the LPC ( Linear predictive components ) sequence.
Examples: audiofile = AudioFile.open('file.wav',16000)
frames = audiofile.frames(512,np.hamming)
for frame in frames:
frame.lpc()
Attributes:
@param (seq):A sequence of time-domain frames, usually obtained by .frames()
@param (order) default=None: Size of the returning cepstral components. If None is given,
we use len(seq) as default, otherwise order +1
Returns: A list of lpc coefficents
'''
# Only return the coefficients not the error term (in [1])
return LinearPredictiveAnalysis.lpc(self, order)[0]

def lsp(self,order=None,rectify=True):
'''
Function: lsp
Summary: Computes Line spectrum pairs ( also called line spectral frequencies [lsf]). Does not use any fancy algorithm except np.roots to solve
for the zeros of the given polynom A(z) = 0.5(P(z) + Q(z))
Examples: audiofile = AudioFile.open('file.wav',16000)
frames = audiofile.frames(512,np.hamming)
for frame in frames:
frame.lsp()
Attributes:
@param (self):
@param (order) default=None:Order of lpc coefficients. Return array has size order + 1. Default is the length of the current frame
@param (rectify) default=True: Specifies if the return values are only positive. If rectify is False it also returns the (symmetric) negative values
Returns: A list of size order/ len(frames) (if nothing is specifed), which represents the line spectrum pairs.
'''
coefs, _ = LinearPredictiveAnalysis.lpc(self, order)
return LinearPredictiveAnalysis.lsp(coefs,rectify)

def autocorr(self, order=None):
'''
Function: autocorr
Summary: Calculates the autocorrelation with the given order
Examples: f = AudioFile.open('audiofile.wav',16000)
for frame in f.frames(512,numpy.hamming):
frame.autocorr()
Attributes:
@param (self):
@param (order) default=None: The order ( order +1 is length of the returned array) of the auto correlation.
If order is None we use len(frame)-1 as default
Returns:Array of length order +1 with the autocorrelation coefficients
'''
if order is None:
order = len(self) - 1
return [sum(self[n] * self[n + tau] for n in xrange(len(self) - tau))
for tau in xrange(order + 1)]

def frames(self, frameSize, windowFunction=None):
"""
Decompose this frame into smaller frames of size frameSize
"""
frames = []
start = 0
end = frameSize
while start < len(self):

if windowFunction == None:
frames.append(self[start:end])
else:
window = windowFunction(frameSize)
window.shape = (frameSize, 1)
window = numpy.squeeze(window)
window = numpy.squeeze(window)
frame = self[start:end]
if len(frame) < len(window):
# Zero pad
Expand All @@ -128,7 +201,7 @@ def frames(self, frameSize, windowFunction = None):

diff = len(window) - len(frame)
frame = numpy.append(frame, [0] * diff)

if frameType == "AudioFile":
frame = frame.view(pymir.AudioFile)
else:
Expand All @@ -138,22 +211,22 @@ def frames(self, frameSize, windowFunction = None):
frame.sampleRate = sampleRate
frame.channels = channels
frame.format = format

windowedFrame = frame * window
frames.append(windowedFrame)

start = start + frameSize
end = end + frameSize

return frames

def framesFromOnsets(self, onsets):
"""
Decompose into frames based on onset start time-series
"""
frames = []
for i in range(0, len(onsets) - 1):
frames.append(self[onsets[i] : onsets[i + 1]])
frames.append(self[onsets[i]: onsets[i + 1]])

return frames

Expand All @@ -164,7 +237,8 @@ def play(self):
"""
# Create the stream
p = pyaudio.PyAudio()
stream = p.open(format = self.format, channels = self.channels, rate = self.sampleRate, output = True)
stream = p.open(
format=self.format, channels=self.channels, rate=self.sampleRate, output=True)

# Write the audio data to the stream
audioData = self.tostring()
Expand All @@ -191,11 +265,11 @@ def rms(self):
sum = 0
for i in range(0, len(self)):
sum = sum + self[i] ** 2

sum = sum / (1.0 * len(self))

return math.sqrt(sum)

# Spectrum
def spectrum(self):
"""
Expand All @@ -212,5 +286,5 @@ def zcr(self):
for i in range(1, len(self)):
if (self[i - 1] * self[i]) < 0:
zcr = zcr + 1
return zcr / (1.0 * len(self))

return zcr / (1.0 * len(self))
Loading