This paste brought to you by Paste Code. View Raw

  1.  
  2. import numpy as np
  3. from matplotlib import pyplot as plt
  4. import scipy.io.wavfile as wav
  5. from numpy.lib import stride_tricks
  6.  
  7. """ short time fourier transform of audio signal """
  8. def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
  9.     win = window(frameSize)
  10.     hopSize = int(frameSize - np.floor(overlapFac * frameSize))
  11.  
  12.     # zeros at beginning (thus center of 1st window should be for sample nr. 0)  
  13.     samples = np.append(np.zeros(int(np.floor(frameSize/2.0))), sig)    
  14.     # cols for windowing
  15.     cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1
  16.     # zeros at end (thus samples can be fully covered by frames)
  17.     samples = np.append(samples, np.zeros(frameSize))
  18.  
  19.     frames = stride_tricks.as_strided(samples, shape=(int(cols), frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy()
  20.     frames *= win
  21.  
  22.     return np.fft.rfft(frames)    
  23.  
  24. """ scale frequency axis logarithmically """    
  25. def logscale_spec(spec, sr=44100, factor=20.):
  26.     timebins, freqbins = np.shape(spec)
  27.  
  28.     scale = np.linspace(0, 1, freqbins) ** factor
  29.     scale *= (freqbins-1)/max(scale)
  30.     scale = np.unique(np.round(scale))
  31.  
  32.     # create spectrogram with new freq bins
  33.     newspec = np.complex128(np.zeros([timebins, len(scale)]))
  34.     for i in range(0, len(scale)):        
  35.         if i == len(scale)-1:
  36.             newspec[:,i] = np.sum(spec[:,int(scale[i]):], axis=1)
  37.         else:        
  38.             newspec[:,i] = np.sum(spec[:,int(scale[i]):int(scale[i+1])], axis=1)
  39.  
  40.     # list center freq of bins
  41.     allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])
  42.     freqs = []
  43.     for i in range(0, len(scale)):
  44.         if i == len(scale)-1:
  45.             freqs += [np.mean(allfreqs[int(scale[i]):])]
  46.         else:
  47.             freqs += [np.mean(allfreqs[int(scale[i]):int(scale[i+1])])]
  48.  
  49.     return newspec, freqs
  50.  
  51. """ plot spectrogram"""
  52. def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="jet"):
  53.     samplerate, samples = wav.read(audiopath)
  54.  
  55.     s = stft(samples, binsize)
  56.  
  57.     sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate)
  58.  
  59.     ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel
  60.  
  61.     timebins, freqbins = np.shape(ims)
  62.  
  63.     print("timebins: ", timebins)
  64.     print("freqbins: ", freqbins)
  65.  
  66.     plt.figure(figsize=(15, 7.5))
  67.     plt.imshow(np.transpose(ims), origin="lower", aspect="auto", cmap=colormap, interpolation="none")
  68.     plt.colorbar()
  69.  
  70.     plt.xlabel("time (s)")
  71.     plt.ylabel("frequency (hz)")
  72.     plt.xlim([0, timebins-1])
  73.     plt.ylim([0, freqbins])
  74.  
  75.     xlocs = np.float32(np.linspace(0, timebins-1, 5))
  76.     plt.xticks(xlocs, ["%.02f" % l for l in ((xlocs*len(samples)/timebins)+(0.5*binsize))/samplerate])
  77.     ylocs = np.int16(np.round(np.linspace(0, freqbins-1, 10)))
  78.     plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs])
  79.  
  80.     if plotpath:
  81.         plt.savefig(plotpath, bbox_inches="tight")
  82.     else:
  83.         plt.show()
  84.  
  85.     plt.clf()
  86.  
  87.     return ims
  88.  
  89. ims = plotstft("testvoice.wav")