Wake Word Plugins

WakeWord plugins classify audio and report if a certain word or sound is present or not

These plugins usually correspond to the name of the voice assistant, "hey mycroft", but can also be used for other purposes

List of Wake Word plugins

Plugin	Type
ovos-ww-plugin-pocketsphinx	phonemes
ovos-ww-plugin-vosk	text samples
ovos-ww-plugin-snowboy	model
ovos-ww-plugin-precise	model
ovos-ww-plugin-precise-lite	model
ovos-ww-plugin-nyumaya	model
ovos-ww-plugin-nyumaya-legacy	model
neon_ww_plugin_efficientwordnet	model
mycroft-porcupine-plugin	model
ovos-ww-plugin-hotkeys	keyboard

Standalone Usage

first lets get some boilerplate ouf of the way for the microphone handling logic

import pyaudio

# helper class
class CyclicAudioBuffer:
    def __init__(self, duration=0.98, initial_data=None,
                 sample_rate=16000, sample_width=2):
        self.size = self.duration_to_bytes(duration, sample_rate, sample_width)
        initial_data = initial_data or self.get_silence(self.size)
        # Get at most size bytes from the end of the initial data
        self._buffer = initial_data[-self.size:]

    @staticmethod
    def duration_to_bytes(duration, sample_rate=16000, sample_width=2):
        return int(duration * sample_rate) * sample_width

    @staticmethod
    def get_silence(num_bytes):
        return b'\0' * num_bytes

    def append(self, data):
        """Add new data to the buffer, and slide out data if the buffer is full
        Arguments:
            data (bytes): binary data to append to the buffer. If buffer size
                          is exceeded the oldest data will be dropped.
        """
        buff = self._buffer + data
        if len(buff) > self.size:
            buff = buff[-self.size:]
        self._buffer = buff

    def get(self):
        """Get the binary data."""
        return self._buffer

# pyaudio params
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
MAX_RECORD_SECONDS = 20
SAMPLE_WIDTH = pyaudio.get_sample_size(FORMAT)
audio = pyaudio.PyAudio()

# start Recording
stream = audio.open(channels=CHANNELS, format=FORMAT,
    rate=RATE, frames_per_buffer=CHUNK, input=True)


def load_plugin():
    # Wake word initialization
    config = {"model": "path/to/hey_computer.model"}
    return MyHotWord("hey computer", config=config)


def listen_for_ww(plug):
    # TODO - see examples below
    return False

plug = load_plugin()

print(f"Waiting for wake word {MAX_RECORD_SECONDS} seconds")
found = listen_for_ww(plug)

if found:
    print("Found wake word!")
else:
    print("No wake word found")

# stop everything
plug.stop()
stream.stop_stream()
stream.close()
audio.terminate()

new style plugins

New style plugins expect to receive live audio, they may keep their own cyclic buffers internally


def listen_for_ww(plug):
    for i in range(0, int(RATE / CHUNK * MAX_RECORD_SECONDS)):
        data = stream.read(CHUNK)
        # feed data directly to streaming prediction engines
        plug.update(data)
        # streaming engines return result here
        found = plug.found_wake_word(data)
        if found:
            return True

old style plugins (DEPRECATED)

Old style plugins expect to receive ~3 seconds of audio data at once

def listen_for_ww(plug):
    # used for old style non-streaming wakeword (deprecated)
    audio_buffer = CyclicAudioBuffer(plug.expected_duration,
                                     sample_rate=RATE, sample_width=SAMPLE_WIDTH)
    for i in range(0, int(RATE / CHUNK * MAX_RECORD_SECONDS)):
        data = stream.read(CHUNK)
        # add data to rolling buffer, used by non-streaming engines
        audio_buffer.append(data)
        # non-streaming engines check the byte_data in audio_buffer
        audio_data = audio_buffer.get()
        found = plug.found_wake_word(audio_data)
        if found:
            return True

new + old style plugins (backwards compatibility)

if you are unsure what kind of plugin you will be using you can be compatible with both approaches like ovos-core

def listen_for_ww(plug):
    # used for old style non-streaming wakeword (deprecated)
    audio_buffer = CyclicAudioBuffer(plug.expected_duration,
                                     sample_rate=RATE, sample_width=SAMPLE_WIDTH)
    for i in range(0, int(RATE / CHUNK * MAX_RECORD_SECONDS)):
        data = stream.read(CHUNK)
        # old style engines will ignore the update
        plug.update(data)
        # streaming engines will ignore the byte_data
        audio_buffer.append(data)
        audio_data = audio_buffer.get()
        found = plug.found_wake_word(audio_data)
        if found:
            return True

Plugin Template

from ovos_plugin_manager.templates.hotwords import HotWordEngine
from threading import Event


class MyWWPlugin(HotWordEngine):
    def __init__(self, key_phrase="hey mycroft", config=None, lang="en-us"):
        super().__init__(key_phrase, config, lang)
        self.detection = Event()
        # read config settings for your plugin
        self.sensitivity = self.config.get("sensitivity", 0.5)
        # TODO - plugin stuff
        # how does your plugin work? phonemes? text? models?
        self.engine = MyWW(key_phrase) 

    def found_wake_word(self, frame_data):
        """Check if wake word has been found.

        Checks if the wake word has been found. Should reset any internal
        tracking of the wake word state.

        Arguments:
            frame_data (binary data): Deprecated. Audio data for large chunk
                                      of audio to be processed. This should not
                                      be used to detect audio data instead
                                      use update() to incrementally update audio
        Returns:
            bool: True if a wake word was detected, else False
        """
        detected = self.detection.is_set()
        if detected:
            self.detection.clear()
        return detected

    def update(self, chunk):
        """Updates the hotword engine with new audio data.

        The engine should process the data and update internal trigger state.

        Arguments:
            chunk (bytes): Chunk of audio data to process
        """
        if self.engine.found_it(chunk): # TODO - check for wake word
            self.detection.set()

    def stop(self):
        """Perform any actions needed to shut down the wake word engine.

        This may include things such as unloading data or shutdown
        external processess.
        """
        self.engine.bye()  # TODO - plugin specific shutdown