babylon-mcp/scripts/index-docs.ts
Michael Mainguy 005a17f345 Add Babylon.js Editor documentation integration with TSX parser
Implemented comprehensive Editor documentation indexing using TypeScript Compiler API
to parse React/Next.js TSX files from the Babylon.js Editor repository.

Key changes:
- Added Editor repository (4th repo) to repository-config.ts
- Created tsx-parser.ts using TypeScript Compiler API (zero new dependencies)
- Extended document-parser.ts to route .tsx files to TSX parser
- Updated lancedb-indexer.ts to discover page.tsx files
- Added editor-docs source to index-docs.ts script

Features:
- Parses TSX/JSX files to extract text content, headings, and code blocks
- Filters out className values and non-content text
- Extracts categories from file paths (editor/adding-scripts, etc.)
- Handles Editor-specific documentation structure

Test coverage:
- Added tsx-parser.test.ts (11 tests, 10 passing)
- Extended document-parser.test.ts with TSX coverage (5 new tests)
- Fixed repository-manager.test.ts for 4 repositories
- Total: 167 tests passing, 1 skipped

Results:
- 902 documents now indexed (745 docs + 144 source + 13 editor)
- Editor documentation appears in search results
- Verified with Editor-specific queries (onStart, decorators, etc.)

Updated ROADMAP.md with completion status for Editor integration phases 1-3.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-24 09:20:56 -06:00

63 lines
2.0 KiB
TypeScript

#!/usr/bin/env tsx
// MUST set environment variable before any imports that use @xenova/transformers
// This prevents onnxruntime-node from being loaded on Alpine Linux (musl libc)
if (process.env.TRANSFORMERS_BACKEND === 'wasm' || process.env.TRANSFORMERS_BACKEND === 'onnxruntime-web') {
process.env.ONNXRUNTIME_BACKEND = 'wasm';
}
import { LanceDBIndexer, DocumentSource } from '../src/search/lancedb-indexer.js';
import path from 'path';
import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
async function main() {
const projectRoot = path.join(__dirname, '..');
const dbPath = path.join(projectRoot, 'data', 'lancedb');
// Define documentation sources
const sources: DocumentSource[] = [
{
name: 'documentation',
path: path.join(projectRoot, 'data', 'repositories', 'Documentation', 'content'),
urlPrefix: 'https://doc.babylonjs.com',
},
{
name: 'source-repo',
path: path.join(projectRoot, 'data', 'repositories', 'Babylon.js'),
urlPrefix: 'https://github.com/BabylonJS/Babylon.js/blob/master',
},
{
name: 'editor-docs',
path: path.join(projectRoot, 'data', 'repositories', 'Editor', 'website', 'src', 'app', 'documentation'),
urlPrefix: 'https://editor.babylonjs.com/documentation',
},
];
console.log('Starting Babylon.js documentation indexing...');
console.log(`Database path: ${dbPath}`);
console.log(`\nDocumentation sources:`);
sources.forEach((source, index) => {
console.log(` ${index + 1}. ${source.name}: ${source.path}`);
});
console.log('');
const indexer = new LanceDBIndexer(dbPath, sources);
try {
await indexer.initialize();
await indexer.indexDocuments();
console.log('');
console.log('✓ Documentation indexing completed successfully!');
} catch (error) {
console.error('Error during indexing:', error);
process.exit(1);
} finally {
await indexer.close();
}
}
main();