Added Initial Voice Command Handler.
This commit is contained in:
parent
720e348aa9
commit
8251681035
@ -12,8 +12,7 @@ export const handler: Handler = async (event: HandlerEvent, context: HandlerCont
|
|||||||
headers: {'Content-Type': 'application/json'},
|
headers: {'Content-Type': 'application/json'},
|
||||||
statusCode: 200,
|
statusCode: 200,
|
||||||
body: JSON.stringify(data)
|
body: JSON.stringify(data)
|
||||||
};
|
}
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
return {
|
return {
|
||||||
statusCode: 500,
|
statusCode: 500,
|
||||||
|
|||||||
44
src/app.ts
44
src/app.ts
@ -18,6 +18,8 @@ import {InputTextView} from "./information/inputTextView";
|
|||||||
import {GamepadManager} from "./controllers/gamepadManager";
|
import {GamepadManager} from "./controllers/gamepadManager";
|
||||||
import {CustomEnvironment} from "./util/customEnvironment";
|
import {CustomEnvironment} from "./util/customEnvironment";
|
||||||
import {DrawioManager} from "./integration/drawioManager";
|
import {DrawioManager} from "./integration/drawioManager";
|
||||||
|
import {VoiceManager} from "./integration/voiceManager";
|
||||||
|
import {TranscriptType} from "./integration/voiceTranscript";
|
||||||
|
|
||||||
|
|
||||||
export class App {
|
export class App {
|
||||||
@ -32,10 +34,10 @@ export class App {
|
|||||||
//log.getLogger('IndexdbPersistenceManager').setLevel('info');
|
//log.getLogger('IndexdbPersistenceManager').setLevel('info');
|
||||||
//log.getLogger('DiagramManager').setLevel('info');
|
//log.getLogger('DiagramManager').setLevel('info');
|
||||||
//log.getLogger('DiagramConnection').setLevel('debug');
|
//log.getLogger('DiagramConnection').setLevel('debug');
|
||||||
log.getLogger('DrawioManager').setLevel('debug');
|
log.getLogger('DrawioManager').setLevel('warn');
|
||||||
|
log.getLogger('VoiceManager').setLevel('debug');
|
||||||
log.getLogger('EntityTree').setLevel('debug');
|
log.getLogger('EntityTree').setLevel('warn');
|
||||||
log.getLogger('EditMenu').setLevel('debug');
|
log.getLogger('EditMenu').setLevel('warn');
|
||||||
const canvas = document.createElement("canvas");
|
const canvas = document.createElement("canvas");
|
||||||
canvas.style.width = "100%";
|
canvas.style.width = "100%";
|
||||||
canvas.style.height = "100%";
|
canvas.style.height = "100%";
|
||||||
@ -53,6 +55,7 @@ export class App {
|
|||||||
const engine = new Engine(canvas, true);
|
const engine = new Engine(canvas, true);
|
||||||
const scene = new Scene(engine);
|
const scene = new Scene(engine);
|
||||||
const environment = new CustomEnvironment(scene);
|
const environment = new CustomEnvironment(scene);
|
||||||
|
|
||||||
const query = Object.fromEntries(new URLSearchParams(window.location.search));
|
const query = Object.fromEntries(new URLSearchParams(window.location.search));
|
||||||
logger.debug('Query', query);
|
logger.debug('Query', query);
|
||||||
if (query.shareCode) {
|
if (query.shareCode) {
|
||||||
@ -119,8 +122,37 @@ export class App {
|
|||||||
|
|
||||||
|
|
||||||
const gamepadManager = new GamepadManager(scene);
|
const gamepadManager = new GamepadManager(scene);
|
||||||
|
const voiceManager = new VoiceManager();
|
||||||
|
voiceManager.transcriptionObserver.add((text) => {
|
||||||
|
logger.info('Transcription', text);
|
||||||
|
switch (text.type) {
|
||||||
|
case TranscriptType.PartialTranscript:
|
||||||
|
if (text.words.length > 0 &&
|
||||||
|
text.words[0].text.toLowerCase() == 'meta') {
|
||||||
|
logger.info('Meta command', text.text);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case TranscriptType.FinalTranscript:
|
||||||
|
logger.info('Final', text.words[0].text.toLowerCase().substring(0, 4));
|
||||||
|
if (text.words.length > 0 &&
|
||||||
|
text.words[0].text.toLowerCase().substring(0, 4) == 'meta' &&
|
||||||
|
text.words[0].confidence > .8) {
|
||||||
|
logger.info('Meta Final command',
|
||||||
|
text.words.map((e) => {
|
||||||
|
return e.text
|
||||||
|
}).slice(1).join(' '));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
window.addEventListener("keydown", (ev) => {
|
window.addEventListener("keydown", (ev) => {
|
||||||
// Shift+Ctrl+Alt+I
|
if (ev.key == "z") {
|
||||||
|
voiceManager.startRecording();
|
||||||
|
}
|
||||||
|
if (ev.key == "x") {
|
||||||
|
voiceManager.stopRecording();
|
||||||
|
}
|
||||||
if (ev.shiftKey && ev.ctrlKey && ev.altKey && ev.keyCode === 73) {
|
if (ev.shiftKey && ev.ctrlKey && ev.altKey && ev.keyCode === 73) {
|
||||||
import("@babylonjs/core/Debug/debugLayer").then(() => {
|
import("@babylonjs/core/Debug/debugLayer").then(() => {
|
||||||
import("@babylonjs/inspector").then(() => {
|
import("@babylonjs/inspector").then(() => {
|
||||||
@ -134,6 +166,7 @@ export class App {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
logger.info('keydown event listener added, use Ctrl+Shift+Alt+I to toggle debug layer');
|
logger.info('keydown event listener added, use Ctrl+Shift+Alt+I to toggle debug layer');
|
||||||
engine.runRenderLoop(() => {
|
engine.runRenderLoop(() => {
|
||||||
scene.render();
|
scene.render();
|
||||||
@ -141,6 +174,7 @@ export class App {
|
|||||||
logger.info('Render loop started');
|
logger.info('Render loop started');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const app = new App();
|
const app = new App();
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,53 +1,76 @@
|
|||||||
import RecordRTC from 'recordrtc';
|
import RecordRTC from 'recordrtc';
|
||||||
|
import log from "loglevel";
|
||||||
|
import {Observable} from "@babylonjs/core";
|
||||||
|
import {TranscriptType, VoiceTranscript} from "./voiceTranscript";
|
||||||
|
|
||||||
|
type VoiceManagerEvent = {
|
||||||
|
audio_start?: number;
|
||||||
|
audio_end?: number;
|
||||||
|
confidence?: number;
|
||||||
|
text?: string;
|
||||||
|
words?: Array<any>;
|
||||||
|
created?: string;
|
||||||
|
message_type?: string
|
||||||
|
}
|
||||||
|
|
||||||
export class VoiceManager {
|
export class VoiceManager {
|
||||||
private socket: WebSocket;
|
private socket: WebSocket;
|
||||||
private token: string;
|
private token: string;
|
||||||
|
public readonly transcriptionObserver: Observable<VoiceTranscript> = new Observable<VoiceTranscript>();
|
||||||
private recorder: RecordRTC;
|
private recorder: RecordRTC;
|
||||||
private data: any[] = [];
|
private data: any[] = [];
|
||||||
|
private logger = log.getLogger('VoiceManager');
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
|
this.setupRecorder();
|
||||||
}
|
}
|
||||||
|
|
||||||
public async setupConnection() {
|
public startRecording() {
|
||||||
const response = await fetch('/api/voice/token');
|
this.connectToVoice();
|
||||||
|
}
|
||||||
|
|
||||||
|
public stopRecording() {
|
||||||
|
this.recorder.reset();
|
||||||
|
this.socket.send('{"terminate_session": true}');
|
||||||
|
this.socket = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async connectToVoice() {
|
||||||
|
const response = await fetch('/.netlify/functions/voice');
|
||||||
const data = await response.json();
|
const data = await response.json();
|
||||||
this.token = data.token;
|
this.token = data.token;
|
||||||
if (!this.socket) {
|
if (!this.socket) {
|
||||||
this.socket = new WebSocket(`wss://api.assemblyai.com/v2/realtime/ws?sample_rate=16000&token=${this.token}`);
|
this.socket = new WebSocket(`wss://api.assemblyai.com/v2/realtime/ws?sample_rate=16000&token=${this.token}`);
|
||||||
this.socket.onmessage = (message) => {
|
this.socket.onmessage = this.messageRecieved;
|
||||||
const res = JSON.parse(message.data);
|
|
||||||
if (this.data) {
|
|
||||||
this.data.push(res);
|
|
||||||
//this.target.emit('transcriptiondata', {data: res});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
this.socket.onopen = this.socketOpen;
|
this.socket.onopen = this.socketOpen;
|
||||||
|
this.socket.onclose = this.socketClose;
|
||||||
} else {
|
} else {
|
||||||
switch (this.socket.readyState) {
|
switch (this.socket.readyState) {
|
||||||
case 0:
|
case 0:
|
||||||
console.log('socket opening');
|
this.logger.debug('socket opening');
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
console.log('socket already open');
|
this.logger.debug('socket already open');
|
||||||
|
//await this.recorder.startRecording();
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
console.log('dang, socket is closing');
|
this.logger.debug('socket is closing');
|
||||||
this.socket = null;
|
this.socket = null;
|
||||||
|
//await this.setupConnection();
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
console.log('Socket is closed');
|
this.logger.debug('Socket is closed');
|
||||||
this.socket = null;
|
this.socket = null;
|
||||||
|
//await this.setupConnection();
|
||||||
break
|
break
|
||||||
default:
|
default:
|
||||||
console.log(`socket state is unknown: ${this.socket.readyState}`);
|
this.logger.debug(`socket state is unknown: ${this.socket.readyState}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private async socketOpen() {
|
private async setupRecorder() {
|
||||||
if (!this.recorder) {
|
if (!this.recorder) {
|
||||||
const stream = await navigator.mediaDevices.getUserMedia({audio: true});
|
const stream = await navigator.mediaDevices.getUserMedia({audio: true});
|
||||||
this.recorder = new RecordRTC(stream, {
|
this.recorder = new RecordRTC(stream, {
|
||||||
@ -62,7 +85,7 @@ export class VoiceManager {
|
|||||||
if (this.socket && (this.socket.readyState === 1)) {
|
if (this.socket && (this.socket.readyState === 1)) {
|
||||||
this.socket.send(JSON.stringify({audio_data: base64data.split('base64,')[1]}));
|
this.socket.send(JSON.stringify({audio_data: base64data.split('base64,')[1]}));
|
||||||
} else {
|
} else {
|
||||||
console.log('no socket available');
|
this.logger.warn('no socket available');
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
reader.readAsDataURL(blob);
|
reader.readAsDataURL(blob);
|
||||||
@ -70,4 +93,53 @@ export class VoiceManager {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private messageRecieved = (message: any) => {
|
||||||
|
const res = (JSON.parse(message.data) as VoiceManagerEvent);
|
||||||
|
if (this.data) {
|
||||||
|
//this.logger.debug(`Received data: ${JSON.stringify(res)}`);
|
||||||
|
switch (res.message_type) {
|
||||||
|
case 'PartialTranscript':
|
||||||
|
if (res.words.length > 0) {
|
||||||
|
this.logger.debug(`PartialTranscript: ${res.text}`);
|
||||||
|
this.transcriptionObserver.notifyObservers(
|
||||||
|
{
|
||||||
|
text: res.text, words: res.words, confidence: res.confidence,
|
||||||
|
type: TranscriptType.PartialTranscript
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
case 'FinalTranscript':
|
||||||
|
if (res.words.length > 0) {
|
||||||
|
this.transcriptionObserver.notifyObservers(
|
||||||
|
{
|
||||||
|
text: res.text, words: res.words, confidence: res.confidence,
|
||||||
|
type: TranscriptType.FinalTranscript
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
case 'SessionBegins':
|
||||||
|
this.logger.debug(`SessionBegins: ${res}`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private socketClose = async () => {
|
||||||
|
this.logger.debug('Socket closed');
|
||||||
|
this.socket = null;
|
||||||
|
this.recorder.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
private socketOpen = async () => {
|
||||||
|
this.logger.debug('voice socket opened');
|
||||||
|
if (!this.recorder) {
|
||||||
|
this.logger.error('recorder not initialized');
|
||||||
|
} else {
|
||||||
|
this.recorder.startRecording();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
11
src/integration/voiceTranscript.ts
Normal file
11
src/integration/voiceTranscript.ts
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
export type VoiceTranscript = {
|
||||||
|
words: VoiceTranscript[];
|
||||||
|
text: string;
|
||||||
|
type: TranscriptType;
|
||||||
|
confidence: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export enum TranscriptType {
|
||||||
|
PartialTranscript = 'PartialTranscript',
|
||||||
|
FinalTranscript = 'FinalTranscript'
|
||||||
|
}
|
||||||
@ -3,7 +3,12 @@ import {defineConfig} from "vite";
|
|||||||
/** @type {import('vite').UserConfig} */
|
/** @type {import('vite').UserConfig} */
|
||||||
export default defineConfig({
|
export default defineConfig({
|
||||||
server: {
|
server: {
|
||||||
port: 3001,
|
port: 3001,
|
||||||
|
proxy: {
|
||||||
|
'/.netlify': {
|
||||||
|
target: 'http://localhost:9999/',
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
base: "/"
|
base: "/"
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user