Added Initial Voice Command Handler.

This commit is contained in:
Michael Mainguy 2023-08-11 10:54:10 -05:00
parent 720e348aa9
commit 8251681035
5 changed files with 146 additions and 25 deletions

View File

@ -12,8 +12,7 @@ export const handler: Handler = async (event: HandlerEvent, context: HandlerCont
headers: {'Content-Type': 'application/json'}, headers: {'Content-Type': 'application/json'},
statusCode: 200, statusCode: 200,
body: JSON.stringify(data) body: JSON.stringify(data)
}; }
} catch (error) { } catch (error) {
return { return {
statusCode: 500, statusCode: 500,

View File

@ -18,6 +18,8 @@ import {InputTextView} from "./information/inputTextView";
import {GamepadManager} from "./controllers/gamepadManager"; import {GamepadManager} from "./controllers/gamepadManager";
import {CustomEnvironment} from "./util/customEnvironment"; import {CustomEnvironment} from "./util/customEnvironment";
import {DrawioManager} from "./integration/drawioManager"; import {DrawioManager} from "./integration/drawioManager";
import {VoiceManager} from "./integration/voiceManager";
import {TranscriptType} from "./integration/voiceTranscript";
export class App { export class App {
@ -32,10 +34,10 @@ export class App {
//log.getLogger('IndexdbPersistenceManager').setLevel('info'); //log.getLogger('IndexdbPersistenceManager').setLevel('info');
//log.getLogger('DiagramManager').setLevel('info'); //log.getLogger('DiagramManager').setLevel('info');
//log.getLogger('DiagramConnection').setLevel('debug'); //log.getLogger('DiagramConnection').setLevel('debug');
log.getLogger('DrawioManager').setLevel('debug'); log.getLogger('DrawioManager').setLevel('warn');
log.getLogger('VoiceManager').setLevel('debug');
log.getLogger('EntityTree').setLevel('debug'); log.getLogger('EntityTree').setLevel('warn');
log.getLogger('EditMenu').setLevel('debug'); log.getLogger('EditMenu').setLevel('warn');
const canvas = document.createElement("canvas"); const canvas = document.createElement("canvas");
canvas.style.width = "100%"; canvas.style.width = "100%";
canvas.style.height = "100%"; canvas.style.height = "100%";
@ -53,6 +55,7 @@ export class App {
const engine = new Engine(canvas, true); const engine = new Engine(canvas, true);
const scene = new Scene(engine); const scene = new Scene(engine);
const environment = new CustomEnvironment(scene); const environment = new CustomEnvironment(scene);
const query = Object.fromEntries(new URLSearchParams(window.location.search)); const query = Object.fromEntries(new URLSearchParams(window.location.search));
logger.debug('Query', query); logger.debug('Query', query);
if (query.shareCode) { if (query.shareCode) {
@ -119,8 +122,37 @@ export class App {
const gamepadManager = new GamepadManager(scene); const gamepadManager = new GamepadManager(scene);
const voiceManager = new VoiceManager();
voiceManager.transcriptionObserver.add((text) => {
logger.info('Transcription', text);
switch (text.type) {
case TranscriptType.PartialTranscript:
if (text.words.length > 0 &&
text.words[0].text.toLowerCase() == 'meta') {
logger.info('Meta command', text.text);
}
break;
case TranscriptType.FinalTranscript:
logger.info('Final', text.words[0].text.toLowerCase().substring(0, 4));
if (text.words.length > 0 &&
text.words[0].text.toLowerCase().substring(0, 4) == 'meta' &&
text.words[0].confidence > .8) {
logger.info('Meta Final command',
text.words.map((e) => {
return e.text
}).slice(1).join(' '));
}
}
});
window.addEventListener("keydown", (ev) => { window.addEventListener("keydown", (ev) => {
// Shift+Ctrl+Alt+I if (ev.key == "z") {
voiceManager.startRecording();
}
if (ev.key == "x") {
voiceManager.stopRecording();
}
if (ev.shiftKey && ev.ctrlKey && ev.altKey && ev.keyCode === 73) { if (ev.shiftKey && ev.ctrlKey && ev.altKey && ev.keyCode === 73) {
import("@babylonjs/core/Debug/debugLayer").then(() => { import("@babylonjs/core/Debug/debugLayer").then(() => {
import("@babylonjs/inspector").then(() => { import("@babylonjs/inspector").then(() => {
@ -134,6 +166,7 @@ export class App {
} }
}); });
logger.info('keydown event listener added, use Ctrl+Shift+Alt+I to toggle debug layer'); logger.info('keydown event listener added, use Ctrl+Shift+Alt+I to toggle debug layer');
engine.runRenderLoop(() => { engine.runRenderLoop(() => {
scene.render(); scene.render();
@ -141,6 +174,7 @@ export class App {
logger.info('Render loop started'); logger.info('Render loop started');
} }
} }
const app = new App(); const app = new App();

View File

@ -1,53 +1,76 @@
import RecordRTC from 'recordrtc'; import RecordRTC from 'recordrtc';
import log from "loglevel";
import {Observable} from "@babylonjs/core";
import {TranscriptType, VoiceTranscript} from "./voiceTranscript";
type VoiceManagerEvent = {
audio_start?: number;
audio_end?: number;
confidence?: number;
text?: string;
words?: Array<any>;
created?: string;
message_type?: string
}
export class VoiceManager { export class VoiceManager {
private socket: WebSocket; private socket: WebSocket;
private token: string; private token: string;
public readonly transcriptionObserver: Observable<VoiceTranscript> = new Observable<VoiceTranscript>();
private recorder: RecordRTC; private recorder: RecordRTC;
private data: any[] = []; private data: any[] = [];
private logger = log.getLogger('VoiceManager');
constructor() { constructor() {
this.setupRecorder();
} }
public async setupConnection() { public startRecording() {
const response = await fetch('/api/voice/token'); this.connectToVoice();
}
public stopRecording() {
this.recorder.reset();
this.socket.send('{"terminate_session": true}');
this.socket = null;
}
public async connectToVoice() {
const response = await fetch('/.netlify/functions/voice');
const data = await response.json(); const data = await response.json();
this.token = data.token; this.token = data.token;
if (!this.socket) { if (!this.socket) {
this.socket = new WebSocket(`wss://api.assemblyai.com/v2/realtime/ws?sample_rate=16000&token=${this.token}`); this.socket = new WebSocket(`wss://api.assemblyai.com/v2/realtime/ws?sample_rate=16000&token=${this.token}`);
this.socket.onmessage = (message) => { this.socket.onmessage = this.messageRecieved;
const res = JSON.parse(message.data);
if (this.data) {
this.data.push(res);
//this.target.emit('transcriptiondata', {data: res});
}
}
this.socket.onopen = this.socketOpen; this.socket.onopen = this.socketOpen;
this.socket.onclose = this.socketClose;
} else { } else {
switch (this.socket.readyState) { switch (this.socket.readyState) {
case 0: case 0:
console.log('socket opening'); this.logger.debug('socket opening');
break; break;
case 1: case 1:
console.log('socket already open'); this.logger.debug('socket already open');
//await this.recorder.startRecording();
break; break;
case 2: case 2:
console.log('dang, socket is closing'); this.logger.debug('socket is closing');
this.socket = null; this.socket = null;
//await this.setupConnection();
break; break;
case 3: case 3:
console.log('Socket is closed'); this.logger.debug('Socket is closed');
this.socket = null; this.socket = null;
//await this.setupConnection();
break break
default: default:
console.log(`socket state is unknown: ${this.socket.readyState}`); this.logger.debug(`socket state is unknown: ${this.socket.readyState}`);
} }
} }
} }
private async socketOpen() { private async setupRecorder() {
if (!this.recorder) { if (!this.recorder) {
const stream = await navigator.mediaDevices.getUserMedia({audio: true}); const stream = await navigator.mediaDevices.getUserMedia({audio: true});
this.recorder = new RecordRTC(stream, { this.recorder = new RecordRTC(stream, {
@ -62,7 +85,7 @@ export class VoiceManager {
if (this.socket && (this.socket.readyState === 1)) { if (this.socket && (this.socket.readyState === 1)) {
this.socket.send(JSON.stringify({audio_data: base64data.split('base64,')[1]})); this.socket.send(JSON.stringify({audio_data: base64data.split('base64,')[1]}));
} else { } else {
console.log('no socket available'); this.logger.warn('no socket available');
} }
}; };
reader.readAsDataURL(blob); reader.readAsDataURL(blob);
@ -70,4 +93,53 @@ export class VoiceManager {
}); });
} }
} }
private messageRecieved = (message: any) => {
const res = (JSON.parse(message.data) as VoiceManagerEvent);
if (this.data) {
//this.logger.debug(`Received data: ${JSON.stringify(res)}`);
switch (res.message_type) {
case 'PartialTranscript':
if (res.words.length > 0) {
this.logger.debug(`PartialTranscript: ${res.text}`);
this.transcriptionObserver.notifyObservers(
{
text: res.text, words: res.words, confidence: res.confidence,
type: TranscriptType.PartialTranscript
});
}
break;
case 'FinalTranscript':
if (res.words.length > 0) {
this.transcriptionObserver.notifyObservers(
{
text: res.text, words: res.words, confidence: res.confidence,
type: TranscriptType.FinalTranscript
});
}
break;
case 'SessionBegins':
this.logger.debug(`SessionBegins: ${res}`);
break;
}
}
}
private socketClose = async () => {
this.logger.debug('Socket closed');
this.socket = null;
this.recorder.reset();
}
private socketOpen = async () => {
this.logger.debug('voice socket opened');
if (!this.recorder) {
this.logger.error('recorder not initialized');
} else {
this.recorder.startRecording();
}
}
} }

View File

@ -0,0 +1,11 @@
export type VoiceTranscript = {
words: VoiceTranscript[];
text: string;
type: TranscriptType;
confidence: number;
}
export enum TranscriptType {
PartialTranscript = 'PartialTranscript',
FinalTranscript = 'FinalTranscript'
}

View File

@ -3,7 +3,12 @@ import {defineConfig} from "vite";
/** @type {import('vite').UserConfig} */ /** @type {import('vite').UserConfig} */
export default defineConfig({ export default defineConfig({
server: { server: {
port: 3001, port: 3001,
proxy: {
'/.netlify': {
target: 'http://localhost:9999/',
}
}
}, },
base: "/" base: "/"