Adds search_babylon_editor_source and get_babylon_editor_source MCP tools for searching and retrieving Editor source code. Expands source indexing to include inspector, viewer, addons, accessibility, node-editor, and procedural-textures packages. Improves pathToDocId to handle Editor paths and adds Editor URL construction fallback in getDocumentByPath. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
230 lines
6.9 KiB
TypeScript
230 lines
6.9 KiB
TypeScript
// MUST set environment variable before any imports that use @xenova/transformers
|
|
// This prevents onnxruntime-node from being loaded on Alpine Linux (musl libc)
|
|
if (process.env.TRANSFORMERS_BACKEND === 'wasm' || process.env.TRANSFORMERS_BACKEND === 'onnxruntime-web') {
|
|
process.env.ONNXRUNTIME_BACKEND = 'wasm';
|
|
}
|
|
|
|
import { connect } from '@lancedb/lancedb';
|
|
import { pipeline } from '@xenova/transformers';
|
|
import fs from 'fs/promises';
|
|
import path from 'path';
|
|
import { fileURLToPath } from 'url';
|
|
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = path.dirname(__filename);
|
|
|
|
interface SourceCodeChunk {
|
|
id: string;
|
|
filePath: string;
|
|
package: string;
|
|
content: string;
|
|
startLine: number;
|
|
endLine: number;
|
|
language: string;
|
|
imports: string;
|
|
exports: string;
|
|
url: string;
|
|
vector: number[];
|
|
}
|
|
|
|
const CHUNK_SIZE = 200;
|
|
const CHUNK_OVERLAP = 20;
|
|
|
|
async function getAllSourceFiles(dir: string): Promise<string[]> {
|
|
const files: string[] = [];
|
|
|
|
try {
|
|
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
|
|
for (const entry of entries) {
|
|
const fullPath = path.join(dir, entry.name);
|
|
|
|
if (entry.isDirectory()) {
|
|
if (!['node_modules', 'dist', 'build', 'lib', '.git', 'declaration'].includes(entry.name)) {
|
|
const subFiles = await getAllSourceFiles(fullPath);
|
|
files.push(...subFiles);
|
|
}
|
|
} else if (entry.isFile()) {
|
|
if (/\.(ts|tsx|js|jsx)$/.test(entry.name)) {
|
|
files.push(fullPath);
|
|
}
|
|
}
|
|
}
|
|
} catch {
|
|
return [];
|
|
}
|
|
|
|
return files;
|
|
}
|
|
|
|
function extractImports(content: string): string {
|
|
const imports: string[] = [];
|
|
const importRegex = /import\s+(?:{[^}]+}|[^;]+)\s+from\s+['"]([^'"]+)['"]/g;
|
|
let match;
|
|
|
|
while ((match = importRegex.exec(content)) !== null) {
|
|
if (match[1]) {
|
|
imports.push(match[1]);
|
|
}
|
|
}
|
|
|
|
return imports.slice(0, 20).join(', ');
|
|
}
|
|
|
|
function extractExports(content: string): string {
|
|
const exports: string[] = [];
|
|
const exportRegex = /export\s+(?:class|function|interface|type|const|let|var|enum|default)\s+([A-Za-z_$][A-Za-z0-9_$]*)/g;
|
|
let match;
|
|
|
|
while ((match = exportRegex.exec(content)) !== null) {
|
|
if (match[1]) {
|
|
exports.push(match[1]);
|
|
}
|
|
}
|
|
|
|
return exports.slice(0, 20).join(', ');
|
|
}
|
|
|
|
function extractComments(code: string): string {
|
|
const comments: string[] = [];
|
|
|
|
const singleLineRegex = /\/\/\s*(.+)$/gm;
|
|
let match;
|
|
while ((match = singleLineRegex.exec(code)) !== null) {
|
|
if (match[1]) {
|
|
comments.push(match[1].trim());
|
|
}
|
|
}
|
|
|
|
const multiLineRegex = /\/\*\*?([\s\S]*?)\*\//g;
|
|
while ((match = multiLineRegex.exec(code)) !== null) {
|
|
if (match[1]) {
|
|
comments.push(match[1].trim());
|
|
}
|
|
}
|
|
|
|
return comments.slice(0, 5).join(' ');
|
|
}
|
|
|
|
async function main() {
|
|
const projectRoot = path.join(__dirname, '..');
|
|
const dbPath = path.join(projectRoot, 'data', 'lancedb');
|
|
const repositoryPath = path.join(projectRoot, 'data', 'repositories', 'Editor');
|
|
const tableName = 'babylon_editor_source';
|
|
|
|
// Editor packages with their source paths (relative to repo root)
|
|
const packages = [
|
|
{ name: 'editor', srcPath: 'editor/src' },
|
|
{ name: 'tools', srcPath: 'tools/src' },
|
|
{ name: 'website', srcPath: 'website/src' },
|
|
];
|
|
|
|
console.log('Starting Editor source code indexing...');
|
|
console.log(`Database path: ${dbPath}`);
|
|
console.log(`Repository path: ${repositoryPath}`);
|
|
console.log(`Packages: ${packages.map(p => p.name).join(', ')}`);
|
|
console.log();
|
|
|
|
// Initialize LanceDB
|
|
console.log('Initializing LanceDB connection...');
|
|
const db = await connect(dbPath);
|
|
|
|
// Load embedding model
|
|
console.log('Loading embedding model...');
|
|
const embedder = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
|
console.log('Embedding model loaded');
|
|
|
|
const chunks: SourceCodeChunk[] = [];
|
|
let totalFiles = 0;
|
|
|
|
for (const pkg of packages) {
|
|
console.log(`\nIndexing package: ${pkg.name}...`);
|
|
const packagePath = path.join(repositoryPath, pkg.srcPath);
|
|
|
|
try {
|
|
const files = await getAllSourceFiles(packagePath);
|
|
console.log(`Found ${files.length} source files in ${pkg.name}`);
|
|
|
|
for (let i = 0; i < files.length; i++) {
|
|
const file = files[i]!;
|
|
try {
|
|
const content = await fs.readFile(file, 'utf-8');
|
|
const lines = content.split('\n');
|
|
|
|
const imports = extractImports(content);
|
|
const exports = extractExports(content);
|
|
const language = file.endsWith('.ts') || file.endsWith('.tsx') ? 'typescript' : 'javascript';
|
|
const relativePath = path.relative(repositoryPath, file);
|
|
|
|
// Chunk the file
|
|
for (let j = 0; j < lines.length; j += CHUNK_SIZE - CHUNK_OVERLAP) {
|
|
const startLine = j + 1;
|
|
const endLine = Math.min(j + CHUNK_SIZE, lines.length);
|
|
const chunkLines = lines.slice(j, endLine);
|
|
const chunkContent = chunkLines.join('\n');
|
|
|
|
if (chunkContent.trim().length === 0) {
|
|
continue;
|
|
}
|
|
|
|
// Create embedding text
|
|
const fileName = path.basename(file);
|
|
const dirName = path.dirname(relativePath).split('/').pop() || '';
|
|
const comments = extractComments(chunkContent);
|
|
const embeddingText = `${fileName} ${dirName} ${comments} ${chunkContent.substring(0, 1000)}`;
|
|
|
|
// Generate embedding
|
|
const result = await embedder(embeddingText, {
|
|
pooling: 'mean',
|
|
normalize: true,
|
|
});
|
|
const vector = Array.from(result.data) as number[];
|
|
|
|
// Generate GitHub URL for Editor repo
|
|
const url = `https://github.com/BabylonJS/Editor/blob/master/${relativePath}#L${startLine}-L${endLine}`;
|
|
|
|
chunks.push({
|
|
id: `${relativePath}:${startLine}-${endLine}`,
|
|
filePath: relativePath,
|
|
package: pkg.name,
|
|
content: chunkContent,
|
|
startLine,
|
|
endLine,
|
|
language,
|
|
imports,
|
|
exports,
|
|
url,
|
|
vector,
|
|
});
|
|
}
|
|
|
|
totalFiles++;
|
|
if (totalFiles % 50 === 0) {
|
|
console.log(`Processed ${totalFiles} files, ${chunks.length} chunks...`);
|
|
}
|
|
} catch (error) {
|
|
console.error(`Error processing ${file}:`, error);
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.error(`Error indexing package ${pkg.name}:`, error);
|
|
}
|
|
}
|
|
|
|
console.log(`\nTotal files processed: ${totalFiles}`);
|
|
console.log(`Total source code chunks: ${chunks.length}`);
|
|
console.log('Creating LanceDB table...');
|
|
|
|
// Drop existing table if it exists
|
|
const tableNames = await db.tableNames();
|
|
if (tableNames.includes(tableName)) {
|
|
await db.dropTable(tableName);
|
|
}
|
|
|
|
// Create new table
|
|
await db.createTable(tableName, chunks);
|
|
console.log('\n✓ Editor source code indexing completed successfully!');
|
|
}
|
|
|
|
main().catch(console.error);
|