From Murat, 3 Months ago, written in Python.
Embed
  1.  
  2. import numpy as np
  3. from matplotlib import pyplot as plt
  4. import scipy.io.wavfile as wav
  5. from numpy.lib import stride_tricks
  6.  
  7. """ short time fourier transform of audio signal """
  8. def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
  9.     win = window(frameSize)
  10.     hopSize = int(frameSize - np.floor(overlapFac * frameSize))
  11.  
  12.     # zeros at beginning (thus center of 1st window should be for sample nr. 0)  
  13.     samples = np.append(np.zeros(int(np.floor(frameSize/2.0))), sig)    
  14.     # cols for windowing
  15.     cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1
  16.     # zeros at end (thus samples can be fully covered by frames)
  17.     samples = np.append(samples, np.zeros(frameSize))
  18.  
  19.     frames = stride_tricks.as_strided(samples, shape=(int(cols), frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy()
  20.     frames *= win
  21.  
  22.     return np.fft.rfft(frames)    
  23.  
  24. """ scale frequency axis logarithmically """    
  25. def logscale_spec(spec, sr=44100, factor=20.):
  26.     timebins, freqbins = np.shape(spec)
  27.  
  28.     scale = np.linspace(0, 1, freqbins) ** factor
  29.     scale *= (freqbins-1)/max(scale)
  30.     scale = np.unique(np.round(scale))
  31.  
  32.     # create spectrogram with new freq bins
  33.     newspec = np.complex128(np.zeros([timebins, len(scale)]))
  34.     for i in range(0, len(scale)):        
  35.         if i == len(scale)-1:
  36.             newspec[:,i] = np.sum(spec[:,int(scale[i]):], axis=1)
  37.         else:        
  38.             newspec[:,i] = np.sum(spec[:,int(scale[i]):int(scale[i+1])], axis=1)
  39.  
  40.     # list center freq of bins
  41.     allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])
  42.     freqs = []
  43.     for i in range(0, len(scale)):
  44.         if i == len(scale)-1:
  45.             freqs += [np.mean(allfreqs[int(scale[i]):])]
  46.         else:
  47.             freqs += [np.mean(allfreqs[int(scale[i]):int(scale[i+1])])]
  48.  
  49.     return newspec, freqs
  50.  
  51. """ plot spectrogram"""
  52. def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="jet"):
  53.     samplerate, samples = wav.read(audiopath)
  54.  
  55.     s = stft(samples, binsize)
  56.  
  57.     sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate)
  58.  
  59.     ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel
  60.  
  61.     timebins, freqbins = np.shape(ims)
  62.  
  63.     print("timebins: ", timebins)
  64.     print("freqbins: ", freqbins)
  65.  
  66.     plt.figure(figsize=(15, 7.5))
  67.     plt.imshow(np.transpose(ims), origin="lower", aspect="auto", cmap=colormap, interpolation="none")
  68.     plt.colorbar()
  69.  
  70.     plt.xlabel("time (s)")
  71.     plt.ylabel("frequency (hz)")
  72.     plt.xlim([0, timebins-1])
  73.     plt.ylim([0, freqbins])
  74.  
  75.     xlocs = np.float32(np.linspace(0, timebins-1, 5))
  76.     plt.xticks(xlocs, ["%.02f" % l for l in ((xlocs*len(samples)/timebins)+(0.5*binsize))/samplerate])
  77.     ylocs = np.int16(np.round(np.linspace(0, freqbins-1, 10)))
  78.     plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs])
  79.  
  80.     if plotpath:
  81.         plt.savefig(plotpath, bbox_inches="tight")
  82.     else:
  83.         plt.show()
  84.  
  85.     plt.clf()
  86.  
  87.     return ims
  88.  
  89. ims = plotstft("testvoice.wav")