Some improvements to UI

LimitlessGreen · Jun 13, 2016 · f616238 · f616238
1 parent 84b0546
commit f616238
Show file tree

Hide file tree

Showing 6 changed files with 423 additions and 371 deletions.
diff --git a/Features.py b/Features.py
@@ -10,9 +10,24 @@
 
 # TODO:
 # Put some more stuff in here and use it!
+# So what are good features? MFCC is a start, what else? Wavelets?
+# Add chroma features and Tonnetz
+# Prosodic features (pitch, duration, intensity)
+# Spectral statistics
+# Frequency modulation
+# Linear Predictive Coding? -> from scikits.talkbox import lpc (see also audiolazy)
+# Frechet distance for DTW?
+# Pick things from spectrogram
+
+# Add something that plots some of these to help playing, so that I can understand the librosa things, etc.
+
+# And assemble a decent dataset of birdcalls to play with.
+# Or in fact, two: kiwi, ruru, bittern, and then a much bigger one
 
 class Features:
     # This class implements various feature extraction algorithms for the AviaNZ interface
+    # In essence, it will be given a segment as a region of audiodata (between start and stop points)
+    # Classifiers will then be called on the features
     # Currently it's just MFCC. Has DTW in too.
     # TODO: test what there is so far!
 
@@ -54,8 +69,8 @@ def dtw_path(self,d):
         return xpath, ypath
 
     def get_mfcc(self):
-        # Use a library to get the MFCC coefficients
-        mfcc = librosa.feature.mfcc(data, sampleRate)
+        # Use librosa to get the MFCC coefficients
+        mfcc = librosa.feature.mfcc(self.data, self.sampleRate)
         librosa.display.specshow(mfcc)
 
         # Normalise
@@ -64,6 +79,40 @@ def get_mfcc(self):
 
         return mfcc
 
+    def get_chroma(self):
+        # Use librosa to get the Chroma coefficients
+        cstft = librosa.feature.chroma_stft(self.data,self.sampleRate)
+        ccqt = librosa.feature.chroma_cqt(self.data,self.sampleRate)
+
+    def get_tonnetz(self):
+        tonnetz = librosa.feature.tonnetz(self.data,self.sampleRate)
+
+    def get_spectral_features(self):
+        s1 = librosa.feature.spectral_bandwidth(self.data,self.sampleRate)
+        s2 = librosa.feature.spectral_centroid(self.data,self.sampleRate)
+        s3 = librosa.feature.spectral_contrast(self.data,self.sampleRate)
+        s4 = librosa.feature.spectral_rolloff(self.data,self.sampleRate)
+
+        zcr = librosa.feature.zero_crossing_rate(self.data,self.sampleRate)
+
+    def other_features(self):
+        librosa.fft_frequencies(self.sampleRate)
+        librosa.cqt_frequencies()
+        librosa.audio.get_duration()
+
+        # Estimate dominant frequency of STFT bins by parabolic interpolation
+        librosa.piptrack()
+
+        # Adaptive noise floor -> read up
+        librosa.feature.logamplitude()
+
+        librosa.onset.onset_detect()
+        librosa.onset.onset_strength()
+
+    def get_lpc(self):
+        from scikits.talkbox import lpc
+        lpc(data,order)
+
     def testDTW(self):
         x = [0, 0, 1, 1, 2, 4, 2, 1, 2, 0]
         y = [1, 1, 1, 2, 2, 2, 2, 3, 2, 0]
@@ -75,4 +124,4 @@ def test():
     a = Features()
     a.testDTW()
     pl.show()
-test()
+#test()
diff --git a/Learning.py b/Learning.py
@@ -10,10 +10,15 @@
 
 # TODO:
 # Put some stuff in here!
+# Needs decision trees and recurrent NN as comparisons to Digby and Bagnall
+# Plus whatever scikit-learn has :)
+# Some deep learning stuff?
+
+# Also, consider HMMs -> syllable ordering, etc.
 
 class Learning:
     # This class implements various learning algorithms for the AviaNZ interface
     # Based on scikit-learn
 
-    def __init__(self,data):
+    def __init__(self,features):
         pass
diff --git a/Segment.py b/Segment.py
@@ -10,9 +10,42 @@
 # TODO:
 # Put some stuff in here!
 # Should at least do power, wavelets, then think about more
+# Add Nirosha's approach of simultaneous segmentation and recognition using wavelets
+# Try onset_detect from librosa
+# Want to take each second or so and say yes or no for presence
 # Should compute SNR
+# Use spectrogram instead/as well
 
 class Segment:
     # This class implements various signal processing algorithms for the AviaNZ interface
     def __init__(self,data):
-        pass
+        self.data = data
+        # This is the length of a window to average to get the power
+        self.length = 100
+        self.segments = []
+
+    def segmentByAmplitude(self,threshold):
+        self.seg = np.where(self.data>threshold,1,0)
+        inSegment=False
+        for i in range(len(self.data)):
+            if self.seg[i] > 0:
+                if inSegment:
+                    pass
+                else:
+                    inSegment = True
+                    start = i
+            else:
+                if inSegment:
+                    self.segments.append([start, i])
+                    inSegment = False
+        return self.segments
+
+    def segmentByWavelet(self,threshold):
+        # Need to think about this. Basically should play with it (without the interface) and do some computations
+        # and plot the wavelet packets
+        pass
+
+    def SnNR(self,startSignal,startNoise):
+        pS = np.sum(self.data[startSignal:startSignal+self.length]**2)/self.length
+        pN = np.sum(self.data[startNoise:startNoise+self.length]**2)/self.length
+        return 10.*np.log10(pS/pN)
diff --git a/SignalProc.py b/SignalProc.py
@@ -10,15 +10,18 @@
 # TODO:
 # Denoising needs work
 # Add in bandpass filtering
+# Also downsampling (use librosa)
 # Some tidying needed
+# Test the different windows, play with threshold multiplier -> how to set? Look up log amplitude scaling
 # What else should be added into here?
 
 class SignalProc:
     # This class implements various signal processing algorithms for the AviaNZ interface
-    def __init__(self,data=[],sampleRate=0,window_width=256,incr=128,maxSearchDepth=20):
+    def __init__(self,data=[],sampleRate=0,window_width=256,incr=128,maxSearchDepth=20,thresholdMultiplier=4.5):
         self.window_width=window_width
         self.incr=incr
         self.maxsearch=maxSearchDepth
+        self.thresholdMultiplier = thresholdMultiplier
         if data != []:
             self.data = data
             self.sampleRate = sampleRate
@@ -99,13 +102,11 @@ def BestLevel(self):
 
         return level-1
 
-    def denoise(self,threshold='soft'):
+    def denoise(self,thresholdType='soft'):
         # Perform wavelet denoising. Can use soft or hard thresholding
-        level = 0
         self.maxlevel = self.BestLevel()
         print self.maxlevel
 
-        # TODO: reuse previous tree instead of making new one!
         self.wp = pywt.WaveletPacket(data=self.data, wavelet='dmey', mode='symmetric',maxlevel=self.maxlevel)
 
         # nlevels = self.maxsearch
@@ -117,10 +118,10 @@ def denoise(self,threshold='soft'):
         det1 = self.wp['d'].data
         # Note magic conversion number
         sigma = np.median(np.abs(det1)) / 0.6745
-        threshold = 4.5*sigma
+        threshold = self.thresholdMultiplier*sigma
         for level in range(self.maxlevel):
             for n in self.wp.get_level(level, 'natural'):
-                if threshold = 'hard':
+                if thresholdType == 'hard':
                     # Hard thresholding
                     n.data = np.where(np.abs(n.data)<threshold,0.0,n.data)
                 else:
@@ -142,23 +143,30 @@ def denoise(self,threshold='soft'):
 
         return self.wData
 
-    def writefile(self,name):
+    def writeFile(self,name):
         # Save a sound file for after denoising
         # Need them to be 16 bit integers
         self.wData *= 32768.0
         self.wData = self.wData.astype('int16')
         wavfile.write(name,self.sampleRate, self.wData)
 
-    def loadData(self):
+    def loadData(self,fileName):
         # Load a sound file and normalise it
+        self.sampleRate, self.data = wavfile.read(fileName)
         # self.sampleRate, self.data = wavfile.read('../Birdsong/more1.wav')
         # self.sampleRate, self.data = wavfile.read('../Birdsong/Denoise/Primary dataset/kiwi/female/female1.wav')
         #self.sampleRate, self.data = wavfile.read('ruru.wav')
-        self.sampleRate, self.data = wavfile.read('tril1.wav')
+        #self.sampleRate, self.data = wavfile.read('tril1.wav')
         # self.sampleRate, self.data = wavfile.read('male1.wav')
         # The constant is for normalisation (2^15, as 16 bit numbers)
         self.data = self.data.astype('float') / 32768.0
 
+def denoiseFile(fileName,thresholdMultiplier):
+    sp = SignalProc(thresholdMultiplier=thresholdMultiplier)
+    sp.loadData(fileName)
+    sp.denoise()
+    sp.writeFile(fileName[:-4]+'denoised'+str(sp.thresholdMultiplier)+fileName[-4:])
+
 def test():
     #pl.ion()
     a = SignalProc()
@@ -180,7 +188,28 @@ def test():
     #a.play()
     a.writefile('out.wav')
     pl.show()
+
+def show():
+    #pl.ion()
+    a = SignalProc()
+    a.loadData('Sound Files/male1.wav')
+    sg = a.spectrogram(a.data)
+    pl.figure()
+    pl.plot(a.data)
+    pl.figure()
+    pl.imshow(sg,cmap='gray_r')
+    pl.show()
+
+#show()
+#pl.show()
 #test()
 #pl.show()
 
-#pl.ion()
+#pl.ion()
+
+#denoiseFile('tril1.wav',1.5)
+#denoiseFile('tril1.wav',2.5)
+#denoiseFile('tril1.wav',3.5)
+#denoiseFile('tril1.wav',4.0)
+#denoiseFile('tril1.wav',4.5)
+#denoiseFile('tril1.wav',5.0)