Skip to content

Commit

Permalink
Some improvements to UI
Browse files Browse the repository at this point in the history
  • Loading branch information
Stephen Marsland committed Jun 13, 2016
1 parent 84b0546 commit f616238
Show file tree
Hide file tree
Showing 6 changed files with 423 additions and 371 deletions.
55 changes: 52 additions & 3 deletions Features.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,24 @@

# TODO:
# Put some more stuff in here and use it!
# So what are good features? MFCC is a start, what else? Wavelets?
# Add chroma features and Tonnetz
# Prosodic features (pitch, duration, intensity)
# Spectral statistics
# Frequency modulation
# Linear Predictive Coding? -> from scikits.talkbox import lpc (see also audiolazy)
# Frechet distance for DTW?
# Pick things from spectrogram

# Add something that plots some of these to help playing, so that I can understand the librosa things, etc.

# And assemble a decent dataset of birdcalls to play with.
# Or in fact, two: kiwi, ruru, bittern, and then a much bigger one

class Features:
# This class implements various feature extraction algorithms for the AviaNZ interface
# In essence, it will be given a segment as a region of audiodata (between start and stop points)
# Classifiers will then be called on the features
# Currently it's just MFCC. Has DTW in too.
# TODO: test what there is so far!

Expand Down Expand Up @@ -54,8 +69,8 @@ def dtw_path(self,d):
return xpath, ypath

def get_mfcc(self):
# Use a library to get the MFCC coefficients
mfcc = librosa.feature.mfcc(data, sampleRate)
# Use librosa to get the MFCC coefficients
mfcc = librosa.feature.mfcc(self.data, self.sampleRate)
librosa.display.specshow(mfcc)

# Normalise
Expand All @@ -64,6 +79,40 @@ def get_mfcc(self):

return mfcc

def get_chroma(self):
# Use librosa to get the Chroma coefficients
cstft = librosa.feature.chroma_stft(self.data,self.sampleRate)
ccqt = librosa.feature.chroma_cqt(self.data,self.sampleRate)

def get_tonnetz(self):
tonnetz = librosa.feature.tonnetz(self.data,self.sampleRate)

def get_spectral_features(self):
s1 = librosa.feature.spectral_bandwidth(self.data,self.sampleRate)
s2 = librosa.feature.spectral_centroid(self.data,self.sampleRate)
s3 = librosa.feature.spectral_contrast(self.data,self.sampleRate)
s4 = librosa.feature.spectral_rolloff(self.data,self.sampleRate)

zcr = librosa.feature.zero_crossing_rate(self.data,self.sampleRate)

def other_features(self):
librosa.fft_frequencies(self.sampleRate)
librosa.cqt_frequencies()
librosa.audio.get_duration()

# Estimate dominant frequency of STFT bins by parabolic interpolation
librosa.piptrack()

# Adaptive noise floor -> read up
librosa.feature.logamplitude()

librosa.onset.onset_detect()
librosa.onset.onset_strength()

def get_lpc(self):
from scikits.talkbox import lpc
lpc(data,order)

def testDTW(self):
x = [0, 0, 1, 1, 2, 4, 2, 1, 2, 0]
y = [1, 1, 1, 2, 2, 2, 2, 3, 2, 0]
Expand All @@ -75,4 +124,4 @@ def test():
a = Features()
a.testDTW()
pl.show()
test()
#test()
7 changes: 6 additions & 1 deletion Learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,15 @@

# TODO:
# Put some stuff in here!
# Needs decision trees and recurrent NN as comparisons to Digby and Bagnall
# Plus whatever scikit-learn has :)
# Some deep learning stuff?

# Also, consider HMMs -> syllable ordering, etc.

class Learning:
# This class implements various learning algorithms for the AviaNZ interface
# Based on scikit-learn

def __init__(self,data):
def __init__(self,features):
pass
35 changes: 34 additions & 1 deletion Segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,42 @@
# TODO:
# Put some stuff in here!
# Should at least do power, wavelets, then think about more
# Add Nirosha's approach of simultaneous segmentation and recognition using wavelets
# Try onset_detect from librosa
# Want to take each second or so and say yes or no for presence
# Should compute SNR
# Use spectrogram instead/as well

class Segment:
# This class implements various signal processing algorithms for the AviaNZ interface
def __init__(self,data):
pass
self.data = data
# This is the length of a window to average to get the power
self.length = 100
self.segments = []

def segmentByAmplitude(self,threshold):
self.seg = np.where(self.data>threshold,1,0)
inSegment=False
for i in range(len(self.data)):
if self.seg[i] > 0:
if inSegment:
pass
else:
inSegment = True
start = i
else:
if inSegment:
self.segments.append([start, i])
inSegment = False
return self.segments

def segmentByWavelet(self,threshold):
# Need to think about this. Basically should play with it (without the interface) and do some computations
# and plot the wavelet packets
pass

def SnNR(self,startSignal,startNoise):
pS = np.sum(self.data[startSignal:startSignal+self.length]**2)/self.length
pN = np.sum(self.data[startNoise:startNoise+self.length]**2)/self.length
return 10.*np.log10(pS/pN)
49 changes: 39 additions & 10 deletions SignalProc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,18 @@
# TODO:
# Denoising needs work
# Add in bandpass filtering
# Also downsampling (use librosa)
# Some tidying needed
# Test the different windows, play with threshold multiplier -> how to set? Look up log amplitude scaling
# What else should be added into here?

class SignalProc:
# This class implements various signal processing algorithms for the AviaNZ interface
def __init__(self,data=[],sampleRate=0,window_width=256,incr=128,maxSearchDepth=20):
def __init__(self,data=[],sampleRate=0,window_width=256,incr=128,maxSearchDepth=20,thresholdMultiplier=4.5):
self.window_width=window_width
self.incr=incr
self.maxsearch=maxSearchDepth
self.thresholdMultiplier = thresholdMultiplier
if data != []:
self.data = data
self.sampleRate = sampleRate
Expand Down Expand Up @@ -99,13 +102,11 @@ def BestLevel(self):

return level-1

def denoise(self,threshold='soft'):
def denoise(self,thresholdType='soft'):
# Perform wavelet denoising. Can use soft or hard thresholding
level = 0
self.maxlevel = self.BestLevel()
print self.maxlevel

# TODO: reuse previous tree instead of making new one!
self.wp = pywt.WaveletPacket(data=self.data, wavelet='dmey', mode='symmetric',maxlevel=self.maxlevel)

# nlevels = self.maxsearch
Expand All @@ -117,10 +118,10 @@ def denoise(self,threshold='soft'):
det1 = self.wp['d'].data
# Note magic conversion number
sigma = np.median(np.abs(det1)) / 0.6745
threshold = 4.5*sigma
threshold = self.thresholdMultiplier*sigma
for level in range(self.maxlevel):
for n in self.wp.get_level(level, 'natural'):
if threshold = 'hard':
if thresholdType == 'hard':
# Hard thresholding
n.data = np.where(np.abs(n.data)<threshold,0.0,n.data)
else:
Expand All @@ -142,23 +143,30 @@ def denoise(self,threshold='soft'):

return self.wData

def writefile(self,name):
def writeFile(self,name):
# Save a sound file for after denoising
# Need them to be 16 bit integers
self.wData *= 32768.0
self.wData = self.wData.astype('int16')
wavfile.write(name,self.sampleRate, self.wData)

def loadData(self):
def loadData(self,fileName):
# Load a sound file and normalise it
self.sampleRate, self.data = wavfile.read(fileName)
# self.sampleRate, self.data = wavfile.read('../Birdsong/more1.wav')
# self.sampleRate, self.data = wavfile.read('../Birdsong/Denoise/Primary dataset/kiwi/female/female1.wav')
#self.sampleRate, self.data = wavfile.read('ruru.wav')
self.sampleRate, self.data = wavfile.read('tril1.wav')
#self.sampleRate, self.data = wavfile.read('tril1.wav')
# self.sampleRate, self.data = wavfile.read('male1.wav')
# The constant is for normalisation (2^15, as 16 bit numbers)
self.data = self.data.astype('float') / 32768.0

def denoiseFile(fileName,thresholdMultiplier):
sp = SignalProc(thresholdMultiplier=thresholdMultiplier)
sp.loadData(fileName)
sp.denoise()
sp.writeFile(fileName[:-4]+'denoised'+str(sp.thresholdMultiplier)+fileName[-4:])

def test():
#pl.ion()
a = SignalProc()
Expand All @@ -180,7 +188,28 @@ def test():
#a.play()
a.writefile('out.wav')
pl.show()

def show():
#pl.ion()
a = SignalProc()
a.loadData('Sound Files/male1.wav')
sg = a.spectrogram(a.data)
pl.figure()
pl.plot(a.data)
pl.figure()
pl.imshow(sg,cmap='gray_r')
pl.show()

#show()
#pl.show()
#test()
#pl.show()

#pl.ion()
#pl.ion()

#denoiseFile('tril1.wav',1.5)
#denoiseFile('tril1.wav',2.5)
#denoiseFile('tril1.wav',3.5)
#denoiseFile('tril1.wav',4.0)
#denoiseFile('tril1.wav',4.5)
#denoiseFile('tril1.wav',5.0)
Loading

0 comments on commit f616238

Please sign in to comment.