babylon-mcp/src/search/api-indexer.ts
Michael Mainguy 5459fe9179 feat: Add TypeScript API documentation indexing and search with improved test coverage
## New Features
- Implemented TSDoc extraction using TypeDoc API
- Added API documentation indexing with LanceDB vector search
- Created search_babylon_api MCP tool for querying API docs
- Added 6 indexing and testing scripts

## API Indexing System
- TSDocExtractor: Parses TypeScript source files and extracts documentation
- ApiIndexer: Converts API docs to embeddings and stores in LanceDB
- Support for all Babylon.js packages (core, gui, materials, loaders, etc.)
- Successfully indexed 44,253 API entries from core package

## Bug Fixes
- Fixed TypeScript strict mode errors with exactOptionalPropertyTypes
- Fixed optional property handling in tsConfigPath and returns fields
- Resolved EventEmitter MaxListeners warning in test suite
- Updated all failing handler tests for real implementation

## Test Coverage Improvements
- Added 27 new tests (92 → 119 tests passing)
- Lines: 93.88% (was 82.53%, target 80%) ✓
- Functions: 100% (was 91.17%, target 80%) ✓
- Statements: 93.3% (was 81.58%, target 80%) ✓
- Branches: 69.72% (was 51.37%, target 75%)

## New Test Files
- src/search/lancedb-search.test.ts (15 tests)
- Enhanced handlers.test.ts with API search tests
- Enhanced document-parser.test.ts with edge case tests

## Scripts Added
- scripts/index-api.ts: Index all Babylon.js API documentation
- scripts/test-api-indexing.ts: Test API indexing for core package
- scripts/test-api-search.ts: Test API search functionality
- scripts/get-api-details.ts: Display detailed API documentation
- scripts/search-handmenu-api.ts: Search for HandMenu API examples

## Technical Details
- TypeDoc integration for TSDoc extraction
- Vector embeddings using Xenova/all-MiniLM-L6-v2 model
- Semantic search across 11 Babylon.js packages
- GitHub source links with line numbers in search results

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-23 05:58:16 -06:00

198 lines
5.7 KiB
TypeScript

import { connect } from '@lancedb/lancedb';
import { pipeline } from '@xenova/transformers';
import type { ApiDocumentation } from './types.js';
import { TSDocExtractor } from './tsdoc-extractor.js';
export interface EmbeddedApiDoc {
id: string;
name: string;
fullName: string;
kind: string;
summary: string;
description: string;
examples: string;
parameters: string;
returns: string;
type: string;
deprecated: string;
see: string;
since: string;
sourceFile: string;
sourceLine: number;
category: string;
url: string;
vector: number[];
}
export class ApiIndexer {
private db: any;
private embedder: any;
private readonly dbPath: string;
private readonly tableName: string;
private readonly entryPoints: string[];
private readonly tsConfigPath?: string | undefined;
constructor(
dbPath: string = './data/lancedb',
tableName: string = 'babylon_api',
entryPoints: string[] = [],
tsConfigPath?: string | undefined
) {
this.dbPath = dbPath;
this.tableName = tableName;
this.entryPoints = entryPoints;
this.tsConfigPath = tsConfigPath;
}
async initialize(): Promise<void> {
console.log('Initializing LanceDB connection...');
this.db = await connect(this.dbPath);
console.log('Loading embedding model...');
this.embedder = await pipeline(
'feature-extraction',
'Xenova/all-MiniLM-L6-v2'
);
console.log('Embedding model loaded');
}
async indexApi(): Promise<void> {
if (!this.embedder) {
throw new Error('Indexer not initialized. Call initialize() first.');
}
// Extract API documentation using TypeDoc
console.log('Extracting API documentation with TypeDoc...');
const extractor = new TSDocExtractor();
const config: any = {
entryPoints: this.entryPoints,
includePrivate: false,
};
// Only include tsConfigPath if it's defined to satisfy exactOptionalPropertyTypes
if (this.tsConfigPath !== undefined) {
config.tsConfigPath = this.tsConfigPath;
}
await extractor.initialize(config);
const apiDocs = await extractor.extract();
console.log(`Extracted ${apiDocs.length} API documentation entries`);
// Convert to embedded documents
console.log('Converting to embedded documents...');
const embeddedDocs: EmbeddedApiDoc[] = [];
for (let i = 0; i < apiDocs.length; i++) {
const doc = apiDocs[i];
if (!doc) continue;
try {
const embedded = await this.processApiDoc(doc);
embeddedDocs.push(embedded);
if ((i + 1) % 100 === 0) {
console.log(`Processed ${i + 1}/${apiDocs.length} API docs`);
}
} catch (error) {
console.error(`Error processing ${doc.fullName}:`, error);
}
}
console.log(`\nTotal API docs embedded: ${embeddedDocs.length}`);
console.log('Creating LanceDB table...');
// Drop existing table if it exists
const tableNames = await this.db.tableNames();
if (tableNames.includes(this.tableName)) {
await this.db.dropTable(this.tableName);
}
// Create new table with embedded documents
await this.db.createTable(this.tableName, embeddedDocs);
console.log('API indexing complete!');
}
private async processApiDoc(doc: ApiDocumentation): Promise<EmbeddedApiDoc> {
const embeddingText = this.createEmbeddingText(doc);
const vector = await this.generateEmbedding(embeddingText);
// Generate URL - point to GitHub source
const url = this.generateGitHubUrl(doc.sourceFile, doc.sourceLine);
// Determine category from kind
const category = this.determineCategory(doc);
return {
id: this.generateDocId(doc.fullName, doc.kind),
name: doc.name,
fullName: doc.fullName,
kind: doc.kind,
summary: doc.summary,
description: doc.description,
examples: doc.examples.join('\n\n---\n\n'),
parameters: JSON.stringify(doc.parameters),
returns: doc.returns ? JSON.stringify(doc.returns) : '',
type: doc.type || '',
deprecated: doc.deprecated || '',
see: doc.see.join(', '),
since: doc.since || '',
sourceFile: doc.sourceFile,
sourceLine: doc.sourceLine,
category,
url,
vector,
};
}
private createEmbeddingText(doc: ApiDocumentation): string {
// Combine key fields for embedding - prioritize name, summary, parameters
const parts = [
doc.fullName,
doc.kind,
doc.summary,
doc.description.substring(0, 500),
doc.parameters.map(p => `${p.name}: ${p.type}`).join(', '),
doc.returns ? `returns ${doc.returns.type}` : '',
doc.examples.slice(0, 1).join(' '),
];
return parts.filter(Boolean).join(' ');
}
private async generateEmbedding(text: string): Promise<number[]> {
if (!this.embedder) {
throw new Error('Embedder not initialized');
}
const result = await this.embedder(text, {
pooling: 'mean',
normalize: true,
});
return Array.from(result.data);
}
private generateDocId(fullName: string, kind: string): string {
return `api_${kind}_${fullName.replace(/[^a-zA-Z0-9]/g, '_')}`;
}
private generateGitHubUrl(sourceFile: string, sourceLine: number): string {
// Convert local path to GitHub URL
const relativePath = sourceFile.replace(/^.*\/packages\//, 'packages/');
return `https://github.com/BabylonJS/Babylon.js/blob/master/${relativePath}#L${sourceLine}`;
}
private determineCategory(doc: ApiDocumentation): string {
// Extract category from source file path
const match = doc.sourceFile.match(/packages\/dev\/([^/]+)\//);
if (match && match[1]) {
return `api/${match[1]}`;
}
return `api/${doc.kind.toLowerCase()}`;
}
async close(): Promise<void> {
console.log('API indexer closed');
}
}