diff --git a/README.md b/README.md new file mode 100644 index 0000000..5e7b6c7 --- /dev/null +++ b/README.md @@ -0,0 +1,322 @@ +# Babylon MCP Server + +A Model Context Protocol (MCP) server that provides AI agents with access to Babylon.js documentation, API references, and source code through semantic search. + +## Overview + +The Babylon MCP server enables AI assistants to: +- Search and retrieve Babylon.js documentation +- Query API documentation for classes, methods, and properties +- Search through Babylon.js source code +- Retrieve specific source code files or line ranges + +This provides a canonical source for Babylon.js framework information, reducing token usage and improving accuracy when working with AI agents. + +## Features + +- **Documentation Search**: Semantic search across Babylon.js documentation +- **API Documentation**: Search TypeScript API documentation with full TSDoc details +- **Source Code Search**: Vector-based semantic search through Babylon.js source code +- **Source Code Retrieval**: Fetch specific files or line ranges from the repository +- **Local Repository Management**: Automatically clones and updates Babylon.js repositories + +## Prerequisites + +- Node.js 18 or higher +- npm or yarn +- ~2GB disk space for repositories and vector database + +## Installation + +1. Clone this repository: +```bash +git clone +cd babylon-mcp +``` + +2. Install dependencies: +```bash +npm install +``` + +3. Build the project: +```bash +npm run build +``` + +## Initial Setup + +Before using the MCP server, you need to index the Babylon.js repositories. This is a one-time setup process. + +### Index All Data (Recommended) + +Run the master indexing script to index documentation, API, and source code: + +```bash +npm run index:all +``` + +This will: +1. Clone the required repositories (Documentation, Babylon.js, havok) +2. Index all documentation files (~5-10 minutes) +3. Index API documentation from TypeScript source (~10-15 minutes) +4. Index source code from core packages (~15-20 minutes) + +Total indexing time: **30-45 minutes** depending on your system. + +### Index Individual Components + +You can also index components separately: + +```bash +# Index documentation only +npm run index:docs + +# Index API documentation only +npm run index:api + +# Index source code only +npm run index:source +``` + +## Running the Server + +### Development Mode + +Run the server with hot reload: + +```bash +npm run dev +``` + +### Production Mode + +```bash +npm start +``` + +The server runs on **port 4000** by default. + +## Integration with Claude Desktop + +To use this MCP server with Claude Desktop, add it to your Claude configuration file. + +### Configuration File Location + +**macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json` +**Windows**: `%APPDATA%\Claude\claude_desktop_config.json` + +### Configuration + +Add the following to your `claude_desktop_config.json`: + +```json +{ + "mcpServers": { + "babylon-mcp": { + "command": "node", + "args": [ + "/absolute/path/to/babylon-mcp/dist/mcp/index.js" + ], + "env": {} + } + } +} +``` + +Replace `/absolute/path/to/babylon-mcp` with the actual path to your babylon-mcp directory. + +### Restart Claude Desktop + +After updating the configuration, restart Claude Desktop for the changes to take effect. + +## Available MCP Tools + +Once configured, Claude will have access to these tools: + +### 1. search_babylon_docs +Search Babylon.js documentation with semantic search. + +**Parameters:** +- `query` (string, required): Search query +- `category` (string, optional): Filter by category (e.g., "api", "tutorial") +- `limit` (number, optional): Maximum results (default: 5) + +**Example:** +``` +Search for "how to create a mesh" in Babylon.js documentation +``` + +### 2. get_babylon_doc +Retrieve full content of a specific documentation page. + +**Parameters:** +- `path` (string, required): Documentation file path or identifier + +**Example:** +``` +Get the full documentation for "features/featuresDeepDive/mesh/creation" +``` + +### 3. search_babylon_api +Search Babylon.js API documentation (classes, methods, properties). + +**Parameters:** +- `query` (string, required): API search query (e.g., "getMeshByName", "Scene") +- `limit` (number, optional): Maximum results (default: 5) + +**Example:** +``` +Search the API for "getMeshByName" +``` + +### 4. search_babylon_source +Search Babylon.js source code using semantic search. + +**Parameters:** +- `query` (string, required): Search query for source code +- `package` (string, optional): Filter by package (e.g., "core", "gui") +- `limit` (number, optional): Maximum results (default: 5) + +**Example:** +``` +Search the source code for "mesh rendering implementation" +``` + +### 5. get_babylon_source +Retrieve full source code file or specific line range. + +**Parameters:** +- `filePath` (string, required): Relative path from repository root +- `startLine` (number, optional): Start line number (1-indexed) +- `endLine` (number, optional): End line number (1-indexed) + +**Example:** +``` +Get the source code from "packages/dev/core/src/scene.ts" lines 4100-4110 +``` + +## Project Structure + +``` +babylon-mcp/ +├── src/ +│ ├── mcp/ # MCP server implementation +│ │ ├── index.ts # Server entry point +│ │ ├── server.ts # BabylonMCPServer class +│ │ ├── handlers.ts # MCP tool handlers +│ │ └── ... +│ └── search/ # Search and indexing +│ ├── lancedb-search.ts # Search implementation +│ ├── lancedb-indexer.ts # Documentation indexer +│ ├── api-indexer.ts # API indexer +│ ├── source-code-indexer.ts # Source code indexer +│ └── ... +├── scripts/ # Indexing scripts +│ ├── index-docs.ts # Index documentation +│ ├── index-api.ts # Index API docs +│ └── index-source.ts # Index source code +├── data/ # Data directory (created during indexing) +│ ├── repositories/ # Cloned repositories +│ └── lancedb/ # Vector database +└── dist/ # Compiled output +``` + +## Development + +### Running Tests + +```bash +# Run tests in watch mode +npm test + +# Run tests once +npm run test:run + +# Run tests with UI +npm run test:ui + +# Run tests with coverage +npm run test:coverage +``` + +### Type Checking + +```bash +npm run typecheck +``` + +### Building + +```bash +npm run build +``` + +## Data Storage + +The server stores data in the `./data` directory: + +- **`./data/repositories/`**: Cloned Git repositories (Documentation, Babylon.js, havok) +- **`./data/lancedb/`**: Vector database containing indexed content + +This directory will be approximately **1.5-2GB** after full indexing. + +## Updating Data + +To update the indexed data with the latest Babylon.js content: + +1. The repositories are automatically updated during indexing +2. Re-run the indexing scripts: + +```bash +npm run index:all +``` + +## Troubleshooting + +### Server won't start +- Ensure port 4000 is available +- Check that the project has been built: `npm run build` +- Verify Node.js version is 18 or higher + +### Indexing fails +- Ensure you have internet connectivity (for cloning repositories) +- Check disk space (~2GB required) +- Try indexing components individually to isolate the issue + +### Claude Desktop doesn't see the tools +- Verify the path in `claude_desktop_config.json` is absolute +- Restart Claude Desktop after configuration changes +- Check that the server builds without errors: `npm run build` + +### Search returns no results +- Ensure indexing has completed successfully +- Check that the `./data/lancedb` directory exists and contains data +- Try re-indexing: `npm run index:all` + +## Architecture + +The server uses: +- **LanceDB**: Vector database for semantic search +- **Xenova/all-MiniLM-L6-v2**: Transformer model for embeddings +- **TypeDoc**: For extracting TypeScript API documentation +- **Express.js**: Web server framework +- **MCP SDK**: Model Context Protocol implementation + +## Contributing + +Contributions are welcome! Please ensure: +- All tests pass: `npm test` +- Type checking passes: `npm run typecheck` +- Code follows the project style + +## License + +ISC + +## Resources + +- [Babylon.js Documentation](https://doc.babylonjs.com/) +- [Babylon.js Repository](https://github.com/BabylonJS/Babylon.js) +- [Model Context Protocol](https://modelcontextprotocol.io/) +- [Claude Desktop](https://claude.ai/download) diff --git a/package.json b/package.json index 37e17f7..0b410d0 100644 --- a/package.json +++ b/package.json @@ -13,8 +13,13 @@ "test:ui": "vitest --ui", "test:run": "vitest run", "test:coverage": "vitest run --coverage", - "index-docs": "tsx scripts/index-docs.ts", - "index-api": "tsx scripts/index-api.ts" + "index:docs": "tsx scripts/index-docs.ts", + "index:api": "tsx scripts/index-api.ts", + "index:source": "tsx scripts/index-source.ts", + "index:all": "npm run index:docs && npm run index:api && npm run index:source", + "index-docs": "npm run index:docs", + "index-api": "npm run index:api", + "index-source": "npm run index:source" }, "keywords": [], "author": "", diff --git a/scripts/index-source.ts b/scripts/index-source.ts new file mode 100644 index 0000000..a74c1f8 --- /dev/null +++ b/scripts/index-source.ts @@ -0,0 +1,39 @@ +import { SourceCodeIndexer } from '../src/search/source-code-indexer.js'; + +async function main() { + // Define packages to index + const packages = [ + 'core', + 'gui', + 'materials', + 'loaders', + 'serializers', + ]; + + console.log('Starting source code indexing for Babylon.js packages...'); + console.log(`Indexing ${packages.length} packages:`, packages.join(', ')); + console.log(); + + const indexer = new SourceCodeIndexer( + './data/lancedb', + 'babylon_source_code', + './data/repositories/Babylon.js', + 200, // chunk size (lines) + 20 // chunk overlap (lines) + ); + + try { + await indexer.initialize(); + await indexer.indexSourceCode(packages); + await indexer.close(); + console.log('\n✓ Source code indexing completed successfully!'); + } catch (error) { + console.error('Error during source code indexing:', error); + if (error instanceof Error) { + console.error('Stack trace:', error.stack); + } + process.exit(1); + } +} + +main().catch(console.error); diff --git a/scripts/test-source-indexing.ts b/scripts/test-source-indexing.ts new file mode 100644 index 0000000..e7f0890 --- /dev/null +++ b/scripts/test-source-indexing.ts @@ -0,0 +1,32 @@ +import { SourceCodeIndexer } from '../src/search/source-code-indexer.js'; + +async function main() { + // Start with just core package for testing + const packages = ['core']; + + console.log('Testing source code indexing with core package...'); + console.log(); + + const indexer = new SourceCodeIndexer( + './data/lancedb', + 'babylon_source_test', + './data/repositories/Babylon.js', + 100, // smaller chunk size for testing + 10 // smaller overlap for testing + ); + + try { + await indexer.initialize(); + await indexer.indexSourceCode(packages); + await indexer.close(); + console.log('\n✓ Test source code indexing completed successfully!'); + } catch (error) { + console.error('Error during test indexing:', error); + if (error instanceof Error) { + console.error('Stack trace:', error.stack); + } + process.exit(1); + } +} + +main().catch(console.error); diff --git a/scripts/test-source-search.ts b/scripts/test-source-search.ts new file mode 100644 index 0000000..9d61d95 --- /dev/null +++ b/scripts/test-source-search.ts @@ -0,0 +1,71 @@ +import { LanceDBSearch } from '../src/search/lancedb-search.js'; + +async function main() { + console.log('Testing source code search...\n'); + + // Note: We use babylon_docs as the main table, but specify babylon_source_test for source code search + const search = new LanceDBSearch('./data/lancedb', 'babylon_docs'); + await search.initialize(); + + try { + // Test 1: Search for getMeshByName implementation + console.log('='.repeat(80)); + console.log('Test 1: Searching for "getMeshByName implementation"'); + console.log('='.repeat(80)); + const results1 = await search.searchSourceCode('getMeshByName implementation', { + limit: 3, + tableName: 'babylon_source_test' + }); + console.log(`Found ${results1.length} results:\n`); + + for (const result of results1) { + console.log(`File: ${result.filePath}`); + console.log(`Lines: ${result.startLine}-${result.endLine}`); + console.log(`Score: ${(result.score * 100).toFixed(1)}%`); + console.log(`Preview: ${result.content.substring(0, 200)}...`); + console.log(`URL: ${result.url}`); + console.log('-'.repeat(80)); + } + + // Test 2: Get specific source file + console.log('\n'); + console.log('='.repeat(80)); + console.log('Test 2: Getting source file scene.ts lines 4100-4110'); + console.log('='.repeat(80)); + const sourceCode = await search.getSourceFile('packages/dev/core/src/scene.ts', 4100, 4110); + if (sourceCode) { + console.log(sourceCode); + } else { + console.log('File not found'); + } + + // Test 3: Search for mesh management + console.log('\n'); + console.log('='.repeat(80)); + console.log('Test 3: Searching for "mesh management scene"'); + console.log('='.repeat(80)); + const results3 = await search.searchSourceCode('mesh management scene', { + limit: 2, + tableName: 'babylon_source_test' + }); + console.log(`Found ${results3.length} results:\n`); + + for (const result of results3) { + console.log(`File: ${result.filePath}`); + console.log(`Lines: ${result.startLine}-${result.endLine}`); + console.log(`Exports: ${result.exports}`); + console.log(`Score: ${(result.score * 100).toFixed(1)}%`); + console.log('-'.repeat(80)); + } + + } catch (error) { + console.error('Error during search:', error); + if (error instanceof Error) { + console.error('Stack:', error.stack); + } + } + + await search.close(); +} + +main().catch(console.error); diff --git a/src/mcp/handlers.test.ts b/src/mcp/handlers.test.ts index d25bb7a..73bec85 100644 --- a/src/mcp/handlers.test.ts +++ b/src/mcp/handlers.test.ts @@ -17,7 +17,7 @@ describe('MCP Handlers', () => { it('should register all required tools', () => { setupHandlers(mockServer); - expect(registerToolSpy).toHaveBeenCalledTimes(3); + expect(registerToolSpy).toHaveBeenCalledTimes(5); }); it('should register search_babylon_docs tool', () => { diff --git a/src/mcp/handlers.ts b/src/mcp/handlers.ts index e74b6ea..1e58148 100644 --- a/src/mcp/handlers.ts +++ b/src/mcp/handlers.ts @@ -16,6 +16,8 @@ export function setupHandlers(server: McpServer): void { registerSearchDocsTool(server); registerGetDocTool(server); registerSearchApiTool(server); + registerSearchSourceTool(server); + registerGetSourceTool(server); } function registerSearchDocsTool(server: McpServer): void { @@ -247,3 +249,149 @@ function registerSearchApiTool(server: McpServer): void { } ); } + +function registerSearchSourceTool(server: McpServer): void { + server.registerTool( + 'search_babylon_source', + { + description: 'Search Babylon.js source code files', + inputSchema: { + query: z.string().describe('Search query for source code (e.g., "getMeshByName implementation", "scene rendering")'), + package: z + .string() + .optional() + .describe('Optional package filter (e.g., "core", "gui", "materials")'), + limit: z + .number() + .optional() + .default(5) + .describe('Maximum number of results to return (default: 5)'), + }, + }, + async ({ query, package: packageFilter, limit = 5 }) => { + try { + const search = await getSearchInstance(); + const options = packageFilter ? { package: packageFilter, limit } : { limit }; + const results = await search.searchSourceCode(query, options); + + if (results.length === 0) { + return { + content: [ + { + type: 'text', + text: `No source code found for "${query}". Try different search terms or check if the source code has been indexed.`, + }, + ], + }; + } + + // Format results for better readability + const formattedResults = results.map((result, index) => ({ + rank: index + 1, + filePath: result.filePath, + package: result.package, + startLine: result.startLine, + endLine: result.endLine, + language: result.language, + codeSnippet: result.content.substring(0, 500) + (result.content.length > 500 ? '...' : ''), + imports: result.imports, + exports: result.exports, + url: result.url, + relevance: (result.score * 100).toFixed(1) + '%', + })); + + return { + content: [ + { + type: 'text', + text: JSON.stringify( + { + query, + totalResults: results.length, + results: formattedResults, + }, + null, + 2 + ), + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: 'text', + text: `Error searching source code: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + }; + } + } + ); +} + +function registerGetSourceTool(server: McpServer): void { + server.registerTool( + 'get_babylon_source', + { + description: 'Retrieve full Babylon.js source code file or specific line range', + inputSchema: { + filePath: z.string().describe('Relative file path from repository root (e.g., "packages/dev/core/src/scene.ts")'), + startLine: z + .number() + .optional() + .describe('Optional start line number (1-indexed)'), + endLine: z + .number() + .optional() + .describe('Optional end line number (1-indexed)'), + }, + }, + async ({ filePath, startLine, endLine }) => { + try { + const search = await getSearchInstance(); + const sourceCode = await search.getSourceFile(filePath, startLine, endLine); + + if (!sourceCode) { + return { + content: [ + { + type: 'text', + text: `Source file not found: ${filePath}. The path may be incorrect or the file does not exist in the repository.`, + }, + ], + }; + } + + return { + content: [ + { + type: 'text', + text: JSON.stringify( + { + filePath, + startLine: startLine || 1, + endLine: endLine || sourceCode.split('\n').length, + totalLines: sourceCode.split('\n').length, + language: filePath.endsWith('.ts') || filePath.endsWith('.tsx') ? 'typescript' : 'javascript', + content: sourceCode, + }, + null, + 2 + ), + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: 'text', + text: `Error retrieving source file: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + }; + } + } + ); +} diff --git a/src/search/lancedb-search.test.ts b/src/search/lancedb-search.test.ts index 31b0f25..536c338 100644 --- a/src/search/lancedb-search.test.ts +++ b/src/search/lancedb-search.test.ts @@ -53,7 +53,7 @@ vi.mock('@lancedb/lancedb', () => ({ })); vi.mock('@xenova/transformers', () => ({ - pipeline: vi.fn(() => Promise.resolve((text: string) => ({ + pipeline: vi.fn(() => Promise.resolve((_text: string) => ({ data: new Float32Array([0.1, 0.2, 0.3]), }))), })); diff --git a/src/search/lancedb-search.ts b/src/search/lancedb-search.ts index 96fc86d..41cfee3 100644 --- a/src/search/lancedb-search.ts +++ b/src/search/lancedb-search.ts @@ -203,6 +203,52 @@ export class LanceDBSearch { .replace(/\//g, '_'); } + async searchSourceCode( + query: string, + options: { package?: string; limit?: number; tableName?: string } = {} + ): Promise> { + if (!this.db || !this.embedder) { + throw new Error('Search not initialized'); + } + + const limit = options.limit || 5; + const tableName = options.tableName || 'babylon_source_code'; + const queryVector = await this.generateEmbedding(query); + + const sourceTable = await this.db.openTable(tableName); + let searchQuery = sourceTable.vectorSearch(queryVector).limit(limit); + + if (options.package) { + searchQuery = searchQuery.where(`package = '${options.package}'`); + } + + const results = await searchQuery.toArray(); + return results.map((doc: any) => ({ + ...doc, + score: doc._distance ? Math.max(0, 1 - doc._distance) : 0, + })); + } + + async getSourceFile( + filePath: string, + startLine?: number, + endLine?: number + ): Promise { + try { + const fullPath = path.join('./data/repositories/Babylon.js', filePath); + const content = await fs.readFile(fullPath, 'utf-8'); + + if (startLine !== undefined && endLine !== undefined) { + const lines = content.split('\n'); + return lines.slice(startLine - 1, endLine).join('\n'); + } + return content; + } catch (error) { + console.error(`Error reading source file ${filePath}:`, error); + return null; + } + } + async close(): Promise { // LanceDB doesn't require explicit closing } diff --git a/src/search/source-code-indexer.ts b/src/search/source-code-indexer.ts new file mode 100644 index 0000000..0dffe68 --- /dev/null +++ b/src/search/source-code-indexer.ts @@ -0,0 +1,264 @@ +import { connect } from '@lancedb/lancedb'; +import { pipeline } from '@xenova/transformers'; +import fs from 'fs/promises'; +import path from 'path'; + +export interface SourceCodeChunk { + id: string; + filePath: string; + package: string; + content: string; + startLine: number; + endLine: number; + language: string; + imports: string; + exports: string; + url: string; + vector: number[]; +} + +export class SourceCodeIndexer { + private db: any; + private embedder: any; + private readonly dbPath: string; + private readonly tableName: string; + private readonly repositoryPath: string; + private readonly chunkSize: number; + private readonly chunkOverlap: number; + + constructor( + dbPath: string = './data/lancedb', + tableName: string = 'babylon_source_code', + repositoryPath: string = './data/repositories/Babylon.js', + chunkSize: number = 200, + chunkOverlap: number = 20 + ) { + this.dbPath = dbPath; + this.tableName = tableName; + this.repositoryPath = repositoryPath; + this.chunkSize = chunkSize; + this.chunkOverlap = chunkOverlap; + } + + async initialize(): Promise { + console.log('Initializing LanceDB connection...'); + this.db = await connect(this.dbPath); + + console.log('Loading embedding model...'); + this.embedder = await pipeline( + 'feature-extraction', + 'Xenova/all-MiniLM-L6-v2' + ); + console.log('Embedding model loaded'); + } + + async indexSourceCode(packages: string[] = ['core']): Promise { + if (!this.embedder) { + throw new Error('Indexer not initialized. Call initialize() first.'); + } + + const chunks: SourceCodeChunk[] = []; + let fileCount = 0; + + for (const pkg of packages) { + console.log(`\nIndexing package: ${pkg}...`); + const packagePath = path.join(this.repositoryPath, 'packages/dev', pkg, 'src'); + + try { + const files = await this.getAllSourceFiles(packagePath); + console.log(`Found ${files.length} source files in ${pkg}`); + + for (const file of files) { + try { + const fileChunks = await this.processFile(file, pkg); + chunks.push(...fileChunks); + fileCount++; + + if (fileCount % 50 === 0) { + console.log(`Processed ${fileCount}/${files.length} files...`); + } + } catch (error) { + console.error(`Error processing ${file}:`, error); + } + } + } catch (error) { + console.error(`Error indexing package ${pkg}:`, error); + } + } + + console.log(`\nTotal source code chunks: ${chunks.length}`); + console.log('Creating LanceDB table...'); + + // Drop existing table if it exists + const tableNames = await this.db.tableNames(); + if (tableNames.includes(this.tableName)) { + await this.db.dropTable(this.tableName); + } + + // Create new table + await this.db.createTable(this.tableName, chunks); + console.log('Source code indexing complete!'); + } + + private async getAllSourceFiles(dir: string): Promise { + const files: string[] = []; + + try { + const entries = await fs.readdir(dir, { withFileTypes: true }); + + for (const entry of entries) { + const fullPath = path.join(dir, entry.name); + + if (entry.isDirectory()) { + // Skip node_modules, dist, build, etc. + if (!['node_modules', 'dist', 'build', 'lib', '.git'].includes(entry.name)) { + const subFiles = await this.getAllSourceFiles(fullPath); + files.push(...subFiles); + } + } else if (entry.isFile()) { + // Include .ts, .tsx, .js, .jsx files + if (/\.(ts|tsx|js|jsx)$/.test(entry.name)) { + files.push(fullPath); + } + } + } + } catch (error) { + // Directory doesn't exist or can't be read + return []; + } + + return files; + } + + private async processFile(filePath: string, pkg: string): Promise { + const content = await fs.readFile(filePath, 'utf-8'); + const lines = content.split('\n'); + const chunks: SourceCodeChunk[] = []; + + // Extract imports and exports for metadata + const imports = this.extractImports(content); + const exports = this.extractExports(content); + + // Determine language + const language = filePath.endsWith('.ts') || filePath.endsWith('.tsx') ? 'typescript' : 'javascript'; + + // Get relative path from repository root + const relativePath = path.relative(this.repositoryPath, filePath); + + // Chunk the file + for (let i = 0; i < lines.length; i += this.chunkSize - this.chunkOverlap) { + const startLine = i + 1; + const endLine = Math.min(i + this.chunkSize, lines.length); + const chunkLines = lines.slice(i, endLine); + const chunkContent = chunkLines.join('\n'); + + // Skip empty chunks + if (chunkContent.trim().length === 0) { + continue; + } + + // Create embedding + const embeddingText = this.createEmbeddingText(chunkContent, relativePath); + const vector = await this.generateEmbedding(embeddingText); + + chunks.push({ + id: `${relativePath}:${startLine}-${endLine}`, + filePath: relativePath, + package: pkg, + content: chunkContent, + startLine, + endLine, + language, + imports, + exports, + url: this.generateGitHubUrl(relativePath, startLine, endLine), + vector, + }); + } + + return chunks; + } + + private extractImports(content: string): string { + const imports: string[] = []; + const importRegex = /import\s+(?:{[^}]+}|[^;]+)\s+from\s+['"]([^'"]+)['"]/g; + let match; + + while ((match = importRegex.exec(content)) !== null) { + if (match[1]) { + imports.push(match[1]); + } + } + + return imports.slice(0, 20).join(', '); // Limit to first 20 imports + } + + private extractExports(content: string): string { + const exports: string[] = []; + const exportRegex = /export\s+(?:class|function|interface|type|const|let|var|enum|default)\s+([A-Za-z_$][A-Za-z0-9_$]*)/g; + let match; + + while ((match = exportRegex.exec(content)) !== null) { + if (match[1]) { + exports.push(match[1]); + } + } + + return exports.slice(0, 20).join(', '); // Limit to first 20 exports + } + + private createEmbeddingText(code: string, filePath: string): string { + // Combine file path, code, and extract key terms for better search + const fileName = path.basename(filePath); + const dirName = path.dirname(filePath).split('/').pop() || ''; + + // Extract comments for context + const comments = this.extractComments(code); + + return `${fileName} ${dirName} ${comments} ${code.substring(0, 1000)}`; + } + + private extractComments(code: string): string { + const comments: string[] = []; + + // Single-line comments + const singleLineRegex = /\/\/\s*(.+)$/gm; + let match; + while ((match = singleLineRegex.exec(code)) !== null) { + if (match[1]) { + comments.push(match[1].trim()); + } + } + + // Multi-line comments + const multiLineRegex = /\/\*\*?([\s\S]*?)\*\//g; + while ((match = multiLineRegex.exec(code)) !== null) { + if (match[1]) { + comments.push(match[1].trim()); + } + } + + return comments.slice(0, 5).join(' '); + } + + private async generateEmbedding(text: string): Promise { + if (!this.embedder) { + throw new Error('Embedder not initialized'); + } + + const result = await this.embedder(text, { + pooling: 'mean', + normalize: true, + }); + + return Array.from(result.data); + } + + private generateGitHubUrl(relativePath: string, startLine: number, endLine: number): string { + return `https://github.com/BabylonJS/Babylon.js/blob/master/${relativePath}#L${startLine}-L${endLine}`; + } + + async close(): Promise { + console.log('Source code indexer closed'); + } +}