babylon-mcp/scripts/index-editor-source.ts
Michael Mainguy e6a3329c9b Add Editor source code tools and expand indexed Babylon.js packages
Adds search_babylon_editor_source and get_babylon_editor_source MCP tools
for searching and retrieving Editor source code. Expands source indexing
to include inspector, viewer, addons, accessibility, node-editor, and
procedural-textures packages. Improves pathToDocId to handle Editor paths
and adds Editor URL construction fallback in getDocumentByPath.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-03 09:02:20 -06:00

230 lines
6.9 KiB
TypeScript

// MUST set environment variable before any imports that use @xenova/transformers
// This prevents onnxruntime-node from being loaded on Alpine Linux (musl libc)
if (process.env.TRANSFORMERS_BACKEND === 'wasm' || process.env.TRANSFORMERS_BACKEND === 'onnxruntime-web') {
process.env.ONNXRUNTIME_BACKEND = 'wasm';
}
import { connect } from '@lancedb/lancedb';
import { pipeline } from '@xenova/transformers';
import fs from 'fs/promises';
import path from 'path';
import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
interface SourceCodeChunk {
id: string;
filePath: string;
package: string;
content: string;
startLine: number;
endLine: number;
language: string;
imports: string;
exports: string;
url: string;
vector: number[];
}
const CHUNK_SIZE = 200;
const CHUNK_OVERLAP = 20;
async function getAllSourceFiles(dir: string): Promise<string[]> {
const files: string[] = [];
try {
const entries = await fs.readdir(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
if (!['node_modules', 'dist', 'build', 'lib', '.git', 'declaration'].includes(entry.name)) {
const subFiles = await getAllSourceFiles(fullPath);
files.push(...subFiles);
}
} else if (entry.isFile()) {
if (/\.(ts|tsx|js|jsx)$/.test(entry.name)) {
files.push(fullPath);
}
}
}
} catch {
return [];
}
return files;
}
function extractImports(content: string): string {
const imports: string[] = [];
const importRegex = /import\s+(?:{[^}]+}|[^;]+)\s+from\s+['"]([^'"]+)['"]/g;
let match;
while ((match = importRegex.exec(content)) !== null) {
if (match[1]) {
imports.push(match[1]);
}
}
return imports.slice(0, 20).join(', ');
}
function extractExports(content: string): string {
const exports: string[] = [];
const exportRegex = /export\s+(?:class|function|interface|type|const|let|var|enum|default)\s+([A-Za-z_$][A-Za-z0-9_$]*)/g;
let match;
while ((match = exportRegex.exec(content)) !== null) {
if (match[1]) {
exports.push(match[1]);
}
}
return exports.slice(0, 20).join(', ');
}
function extractComments(code: string): string {
const comments: string[] = [];
const singleLineRegex = /\/\/\s*(.+)$/gm;
let match;
while ((match = singleLineRegex.exec(code)) !== null) {
if (match[1]) {
comments.push(match[1].trim());
}
}
const multiLineRegex = /\/\*\*?([\s\S]*?)\*\//g;
while ((match = multiLineRegex.exec(code)) !== null) {
if (match[1]) {
comments.push(match[1].trim());
}
}
return comments.slice(0, 5).join(' ');
}
async function main() {
const projectRoot = path.join(__dirname, '..');
const dbPath = path.join(projectRoot, 'data', 'lancedb');
const repositoryPath = path.join(projectRoot, 'data', 'repositories', 'Editor');
const tableName = 'babylon_editor_source';
// Editor packages with their source paths (relative to repo root)
const packages = [
{ name: 'editor', srcPath: 'editor/src' },
{ name: 'tools', srcPath: 'tools/src' },
{ name: 'website', srcPath: 'website/src' },
];
console.log('Starting Editor source code indexing...');
console.log(`Database path: ${dbPath}`);
console.log(`Repository path: ${repositoryPath}`);
console.log(`Packages: ${packages.map(p => p.name).join(', ')}`);
console.log();
// Initialize LanceDB
console.log('Initializing LanceDB connection...');
const db = await connect(dbPath);
// Load embedding model
console.log('Loading embedding model...');
const embedder = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
console.log('Embedding model loaded');
const chunks: SourceCodeChunk[] = [];
let totalFiles = 0;
for (const pkg of packages) {
console.log(`\nIndexing package: ${pkg.name}...`);
const packagePath = path.join(repositoryPath, pkg.srcPath);
try {
const files = await getAllSourceFiles(packagePath);
console.log(`Found ${files.length} source files in ${pkg.name}`);
for (let i = 0; i < files.length; i++) {
const file = files[i]!;
try {
const content = await fs.readFile(file, 'utf-8');
const lines = content.split('\n');
const imports = extractImports(content);
const exports = extractExports(content);
const language = file.endsWith('.ts') || file.endsWith('.tsx') ? 'typescript' : 'javascript';
const relativePath = path.relative(repositoryPath, file);
// Chunk the file
for (let j = 0; j < lines.length; j += CHUNK_SIZE - CHUNK_OVERLAP) {
const startLine = j + 1;
const endLine = Math.min(j + CHUNK_SIZE, lines.length);
const chunkLines = lines.slice(j, endLine);
const chunkContent = chunkLines.join('\n');
if (chunkContent.trim().length === 0) {
continue;
}
// Create embedding text
const fileName = path.basename(file);
const dirName = path.dirname(relativePath).split('/').pop() || '';
const comments = extractComments(chunkContent);
const embeddingText = `${fileName} ${dirName} ${comments} ${chunkContent.substring(0, 1000)}`;
// Generate embedding
const result = await embedder(embeddingText, {
pooling: 'mean',
normalize: true,
});
const vector = Array.from(result.data) as number[];
// Generate GitHub URL for Editor repo
const url = `https://github.com/BabylonJS/Editor/blob/master/${relativePath}#L${startLine}-L${endLine}`;
chunks.push({
id: `${relativePath}:${startLine}-${endLine}`,
filePath: relativePath,
package: pkg.name,
content: chunkContent,
startLine,
endLine,
language,
imports,
exports,
url,
vector,
});
}
totalFiles++;
if (totalFiles % 50 === 0) {
console.log(`Processed ${totalFiles} files, ${chunks.length} chunks...`);
}
} catch (error) {
console.error(`Error processing ${file}:`, error);
}
}
} catch (error) {
console.error(`Error indexing package ${pkg.name}:`, error);
}
}
console.log(`\nTotal files processed: ${totalFiles}`);
console.log(`Total source code chunks: ${chunks.length}`);
console.log('Creating LanceDB table...');
// Drop existing table if it exists
const tableNames = await db.tableNames();
if (tableNames.includes(tableName)) {
await db.dropTable(tableName);
}
// Create new table
await db.createTable(tableName, chunks);
console.log('\n✓ Editor source code indexing completed successfully!');
}
main().catch(console.error);