マイクの音声をリアルタイムでスペクトラム表示！Pythonで音の世界を可視化しよう

音は目に見えないものですが、それを可視化できたら面白いですよね？

この記事では、Pythonを使ってマイクの音声をリアルタイムでスペクトラム表示するプログラムをご紹介します。音の周波数特性を視覚的に捉えることで、音響解析や音楽処理など、様々な応用が考えられます。

スペクトラムって何？
使用ライブラリ
プログラムの概要
プログラムの構成
- micspectrum_hmi.py
- micspectrum_hmi_support.py
実行結果
本プログラムの改良ポイント
まとめ

スペクトラムって何？

そもそもスペクトラムとは、複雑な波形を構成する様々な周波数の成分を、それぞれの強さと共に視覚的に表現したものです。

虹を思い浮かべてみてください。太陽光は一見白色ですが、プリズムを通すと様々な色の光に分かれますよね？それと同じように、音も様々な周波数の波が組み合わさってできています。スペクトラム表示を使うことで、どの周波数の音がどれくらいの強さで含まれているのかを分析することができるのです。

使用ライブラリ

今回ご紹介するプログラムは、以下のライブラリを使用しています。
それぞれpipにてインストールしてください。

Tkinter: Pythonの標準GUIライブラリ。ユーザーインターフェースの作成に使用します。
pyaudio: Pythonでオーディオ入出力を行うためのライブラリ。マイクからの音声入力に使用します。
matplotlib: Pythonのグラフ描画ライブラリ。スペクトラム表示に使用します。
NumPy: Pythonの数値計算ライブラリ。音声データを処理するために使用します。
librosa: Pythonの音声解析ライブラリ。スペクトログラムの計算などに使用します。

プログラムの概要

プログラムは、大きく分けて以下の機能を持ちます。

マイクデバイスの選択: 使用可能なマイクデバイスをリスト表示し、選択できるようにします。
音声入力: 選択したマイクデバイスから音声を入力します。
スペクトラム変換: 入力された音声をリアルタイムでスペクトラムに変換します。
スペクトラム表示: 変換されたスペクトラムをグラフとして表示します。

プログラムの構成

プログラムは、micspectrum_hmi.pyとmicspectrum_hmi_support.pyの2つのファイルから構成されています。

micspectrum_hmi.py

GUIのレイアウトやウィジェットの配置などを定義します。

#! /usr/bin/env python3
#  -*- coding: utf-8 -*-
#
# GUI module generated by PAGE version 8.0
#  in conjunction with Tcl version 8.6
#    Oct 12, 2024 11:01:57 PM JST  platform: Windows NT

import sys
import tkinter as tk
import tkinter.ttk as ttk
from tkinter.constants import *
import os.path
import librosa
import librosa.display

_location = os.path.dirname(__file__)

import micspectrum_hmi_support

_bgcolor = '#d9d9d9'
_fgcolor = '#000000'
_tabfg1 = 'black' 
_tabfg2 = 'white' 
_bgmode = 'light' 
_tabbg1 = '#d9d9d9' 
_tabbg2 = 'gray40' 

_style_code_ran = 0
def _style_code():
    global _style_code_ran
    if _style_code_ran: return        
    try: micspectrum_hmi_support.root.tk.call('source',
                os.path.join(_location, 'themes', 'default.tcl'))
    except: pass
    style = ttk.Style()
    style.theme_use('default')
    style.configure('.', font = "TkDefaultFont")
    if sys.platform == "win32":
       style.theme_use('winnative')    
    _style_code_ran = 1

class Toplevel1:
    def __init__(self, top=None):
        '''This class configures and populates the toplevel window.
           top is the toplevel containing window.'''

        top.geometry("843x450+516+208")
        top.minsize(120, 1)
        top.maxsize(3844, 1061)
        top.resizable(1,  1)
        top.title("Toplevel 0")
        top.configure(background="#d9d9d9")
        top.configure(highlightbackground="#d9d9d9")
        top.configure(highlightcolor="#000000")

        self.top = top
        self.combobox = tk.StringVar()

        self.Canvas1 = tk.Canvas(self.top)
        self.Canvas1.place(relx=0.356, rely=0.044, relheight=0.918
                , relwidth=0.625)
        self.Canvas1.configure(background="#d9d9d9")
        self.Canvas1.configure(borderwidth="2")
        self.Canvas1.configure(cursor="fleur")
        self.Canvas1.configure(highlightbackground="#d9d9d9")
        self.Canvas1.configure(highlightcolor="#000000")
        self.Canvas1.configure(insertbackground="#000000")
        self.Canvas1.configure(relief="ridge")
        self.Canvas1.configure(selectbackground="#d9d9d9")
        self.Canvas1.configure(selectforeground="black")

        _style_code()
        self.TCombobox1 = ttk.Combobox(self.top)
        self.TCombobox1.place(relx=0.013, rely=0.044, relheight=0.064
                , relwidth=0.235)
        self.TCombobox1.configure(font="-family {Yu Gothic UI} -size 9")
        self.TCombobox1.configure(textvariable=self.combobox)

        self.TButton1 = ttk.Button(self.top)
        self.TButton1.place(relx=0.262, rely=0.053, height=26, width=65)
        self.TButton1.configure(command=micspectrum_hmi_support.connect_button_on_click)
        self.TButton1.configure(text='''Connect''')
        self.TButton1.configure(compound='left')

        self.Scrolledtext1 = ScrolledText(self.top)
        self.Scrolledtext1.place(relx=0.012, rely=0.133, relheight=0.829
                , relwidth=0.336)
        self.Scrolledtext1.configure(background="white")
        self.Scrolledtext1.configure(font="TkTextFont")
        self.Scrolledtext1.configure(foreground="black")
        self.Scrolledtext1.configure(highlightbackground="#d9d9d9")
        self.Scrolledtext1.configure(highlightcolor="#000000")
        self.Scrolledtext1.configure(insertbackground="#000000")
        self.Scrolledtext1.configure(insertborderwidth="3")
        self.Scrolledtext1.configure(selectbackground="#d9d9d9")
        self.Scrolledtext1.configure(selectforeground="black")
        self.Scrolledtext1.configure(wrap="none")

# The following code is added to facilitate the Scrolled widgets you specified.
class AutoScroll(object):
    '''Configure the scrollbars for a widget.'''
    def __init__(self, master):
        #  Rozen. Added the try-except clauses so that this class
        #  could be used for scrolled entry widget for which vertical
        #  scrolling is not supported. 5/7/14.
        try:
            vsb = ttk.Scrollbar(master, orient='vertical', command=self.yview)
        except:
            pass
        hsb = ttk.Scrollbar(master, orient='horizontal', command=self.xview)
        try:
            self.configure(yscrollcommand=self._autoscroll(vsb))
        except:
            pass
        self.configure(xscrollcommand=self._autoscroll(hsb))
        self.grid(column=0, row=0, sticky='nsew')
        try:
            vsb.grid(column=1, row=0, sticky='ns')
        except:
            pass
        hsb.grid(column=0, row=1, sticky='ew')
        master.grid_columnconfigure(0, weight=1)
        master.grid_rowconfigure(0, weight=1)
        # Copy geometry methods of master  (taken from ScrolledText.py)
        methods = tk.Pack.__dict__.keys() | tk.Grid.__dict__.keys() \
                  | tk.Place.__dict__.keys()
        for meth in methods:
            if meth[0] != '_' and meth not in ('config', 'configure'):
                setattr(self, meth, getattr(master, meth))

    @staticmethod
    def _autoscroll(sbar):
        '''Hide and show scrollbar as needed.'''
        def wrapped(first, last):
            first, last = float(first), float(last)
            if first <= 0 and last >= 1:
                sbar.grid_remove()
            else:
                sbar.grid()
            sbar.set(first, last)
        return wrapped

    def __str__(self):
        return str(self.master)

def _create_container(func):
    '''Creates a ttk Frame with a given master, and use this new frame to
    place the scrollbars and the widget.'''
    def wrapped(cls, master, **kw):
        container = ttk.Frame(master)
        container.bind('<Enter>', lambda e: _bound_to_mousewheel(e, container))
        container.bind('<Leave>', lambda e: _unbound_to_mousewheel(e, container))
        return func(cls, container, **kw)
    return wrapped

class ScrolledText(AutoScroll, tk.Text):
    '''A standard Tkinter Text widget with scrollbars that will
    automatically show/hide as needed.'''
    @_create_container
    def __init__(self, master, **kw):
        tk.Text.__init__(self, master, **kw)
        AutoScroll.__init__(self, master)

import platform
def _bound_to_mousewheel(event, widget):
    child = widget.winfo_children()[0]
    if platform.system() == 'Windows' or platform.system() == 'Darwin':
        child.bind_all('<MouseWheel>', lambda e: _on_mousewheel(e, child))
        child.bind_all('<Shift-MouseWheel>', lambda e: _on_shiftmouse(e, child))
    else:
        child.bind_all('<Button-4>', lambda e: _on_mousewheel(e, child))
        child.bind_all('<Button-5>', lambda e: _on_mousewheel(e, child))
        child.bind_all('<Shift-Button-4>', lambda e: _on_shiftmouse(e, child))
        child.bind_all('<Shift-Button-5>', lambda e: _on_shiftmouse(e, child))

def _unbound_to_mousewheel(event, widget):
    if platform.system() == 'Windows' or platform.system() == 'Darwin':
        widget.unbind_all('<MouseWheel>')
        widget.unbind_all('<Shift-MouseWheel>')
    else:
        widget.unbind_all('<Button-4>')
        widget.unbind_all('<Button-5>')
        widget.unbind_all('<Shift-Button-4>')
        widget.unbind_all('<Shift-Button-5>')

def _on_mousewheel(event, widget):
    if platform.system() == 'Windows':
        widget.yview_scroll(-1*int(event.delta/120),'units')
    elif platform.system() == 'Darwin':
        widget.yview_scroll(-1*int(event.delta),'units')
    else:
        if event.num == 4:
            widget.yview_scroll(-1, 'units')
        elif event.num == 5:
            widget.yview_scroll(1, 'units')

def _on_shiftmouse(event, widget):
    if platform.system() == 'Windows':
        widget.xview_scroll(-1*int(event.delta/120), 'units')
    elif platform.system() == 'Darwin':
        widget.xview_scroll(-1*int(event.delta), 'units')
    else:
        if event.num == 4:
            widget.xview_scroll(-1, 'units')
        elif event.num == 5:
            widget.xview_scroll(1, 'units')
def start_up():
    micspectrum_hmi_support.main()

if __name__ == '__main__':
    micspectrum_hmi_support.main()

micspectrum_hmi_support.py

マイク入力、スペクトラム変換、グラフ表示などのロジックを記述します。

#! /usr/bin/env python3
#  -*- coding: utf-8 -*-
#
# Support module generated by PAGE version 8.0
#  in conjunction with Tcl version 8.6
#    Oct 12, 2024 10:06:52 PM JST  platform: Windows NT

import sys
import tkinter as tk
import tkinter.ttk as ttk
from tkinter.constants import *

import micspectrum_hmi

import pyaudio
import threading
import matplotlib.pyplot as plot
import numpy
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import librosa
import librosa.display

devices = []
audio = None
stream = None
fig = None
ax = None
canvas = None

def main(*args):
    '''Main entry point for the application.'''
    global root
    root = tk.Tk()
    root.protocol( 'WM_DELETE_WINDOW' , root.destroy)
    # Creates a toplevel widget.
    global _top1, _w1
    _top1 = root
    _w1 = micspectrum_hmi.Toplevel1(_top1)

    list_microphones(_w1.TCombobox1)
    _w1.TCombobox1.bind("<<ComboboxSelected>>", device_on_selected)

    global fig, ax, canvas
    fig, ax = plot.subplots()
    canvas = FigureCanvasTkAgg(fig, master=_w1.Canvas1)
    canvas.get_tk_widget().pack(fill=BOTH, expand=True)  # pack を追加
    
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xticklabels([])
    ax.set_yticklabels([])
    ax.axhline(y=0, color='black', linewidth=0.5)
    
    canvas.draw()

    root.mainloop()


def list_microphones( combo ):
    global devices
    
    p = pyaudio.PyAudio()
    info = p.get_host_api_info_by_index(0)
    numdevices = info.get('deviceCount')
    updated_values=[]
    for i in range(0, numdevices):
        if (p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
             # デバイスをリストに追加
            devices.append({
                "index": i,
                "name": p.get_device_info_by_host_api_device_index(0, i).get('name'),
                "maxInputChannels": p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels'),
                "maxOutputChannels": p.get_device_info_by_host_api_device_index(0, i).get('maxOutputChannels'),
                "defaultSampleRate": p.get_device_info_by_host_api_device_index(0, i).get('defaultSampleRate')
            })
            new_value = f"Input Device  {i}"
            updated_values.append(new_value)
            
            
    p.terminate()
    combo["values"] =updated_values
    
    
def device_on_selected(event):
    global _w1
    # イベント発生元のウィジェットを取得
    widget = event.widget
    selected_value = widget.get()  # イベント発生元のComboboxの選択された値を取得
    #print(f"Selected value from {widget}: {selected_value}")
    
    device_number = int(selected_value.split("Device ")[1])
    
    device = devices[device_number]
    device_info = (
            f"Index: {device['index']}, \n"
            f"name: {device['name']}, \n"
            f"maxInputChannels: {device['maxInputChannels']}, \n"
            f"maxOutputChannels: {device['maxOutputChannels']}, \n"
            f"defaultSampleRate: {device['defaultSampleRate']}, \n"
        )
    _w1.Scrolledtext1.delete(1.0, 'end')  # 全削除（0から末尾まで）
    _w1.Scrolledtext1.insert('end', device_info)  # 新しいテキストを挿入
    
def connect_button_on_click(*args):
    global _w1
    if _w1.TButton1['text']=="Connect":
        _w1.TButton1['text']="Disconnect"
        selected_index = _w1.TCombobox1.current()  # 選択されたインデックスを取得
        if 0 <= selected_index < len(devices):  # インデックスが有効範囲内か確認
            device_index = devices[selected_index]["index"]
            audiostart(device_index) # 選択されたデバイスのインデックスを渡す
            plotting_thread = threading.Thread(target=read_plot_data) # スレッドに名前を付ける
            plotting_thread.daemon = True # デーモンスレッド化
            plotting_thread.start()
    else:
        _w1.TButton1['text']="Connect"
        audiostop()
    

def audiostart( device_index ):
    global audio, stream
    try:
        audio = pyaudio.PyAudio()
        stream = audio.open(format=pyaudio.paInt16,
                            rate=44100,
                            channels=1,
                            input_device_index=device_index, # ここで使用する
                            input=True,
                            frames_per_buffer=10240)
    except OSError as e:
        print(f"Error opening stream: {e}")
        stream = None # エラー発生時はstreamをNoneにする
        # 必要に応じてエラーメッセージをGUIに表示する処理を追加
                
def audiostop():
    global audio, stream, plotting_thread # plotting_thread を追加
    if stream:
        stream.stop_stream()
        stream.close()
    if audio:
        audio.terminate()
    stream = None # stream を None に設定

def read_plot_data():
    global stream, canvas, fig, ax
    if stream is None or canvas is None or fig is None or ax is None:
        return

    while stream: # stream が存在する間ループ
        try:
            data = stream.read(10240)
            audiodata = numpy.frombuffer(data, dtype='int16')
            # スペクトログラムを計算 & dBスケールに変換
            stft_result = librosa.stft(audiodata.astype(numpy.float32), n_fft=2048, hop_length=512)
            spectrogram = numpy.abs(stft_result)**2
            log_spectrogram = librosa.power_to_db(spectrogram)
                    
            ax.cla()  # 毎回クリア

            # スペクトログラムを表示
            librosa.display.specshow(log_spectrogram, sr=44100, hop_length=512, x_axis='time', y_axis='log', ax=ax)            
            canvas.draw()
        except OSError as e:
            print(f"Error reading from stream: {e}")
            audiostop() # エラー発生時はstreamを停止
            break # ループを抜ける
    
if __name__ == '__main__':
    mic_hmi.start_up()