babylon-mcp/src/search/document-parser.test.ts
Michael Mainguy f56b92e76e Implement LanceDB-based search and document retrieval
- Add LanceDBSearch class for vector-based documentation search
- Implement search() method with category filtering and relevance scoring
- Add getDocumentByPath() with URL lookup and local file fetching
- Fix getDocument() to use .query() instead of .search() for non-vector queries
- Update handlers.ts to integrate LanceDBSearch with MCP tools
- Parse stringified array fields (breadcrumbs, headings, keywords, playgroundIds) in get_babylon_doc
- Fetch fresh content from local repositories (Documentation, Babylon.js, havok)
- Add DocumentParser, LanceDBIndexer and related types for document processing

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-23 04:57:29 -06:00

50 lines
1.5 KiB
TypeScript

import { describe, it, expect } from 'vitest';
import { DocumentParser } from './document-parser.js';
import path from 'path';
describe('DocumentParser', () => {
const parser = new DocumentParser();
const sampleFile = path.join(
process.cwd(),
'data/repositories/Documentation/content/features.md'
);
it('should parse YAML front matter', async () => {
const doc = await parser.parseFile(sampleFile);
expect(doc.title).toBe('Babylon.js Features');
expect(doc.description).toContain('breadth and depth');
expect(doc.keywords).toContain('features');
expect(doc.keywords).toContain('capabilities');
});
it('should extract category from file path', async () => {
const doc = await parser.parseFile(sampleFile);
expect(doc.category).toBe('features');
expect(doc.breadcrumbs).toEqual(['features']);
});
it('should extract headings', async () => {
const doc = await parser.parseFile(sampleFile);
expect(doc.headings.length).toBeGreaterThan(0);
expect(doc.headings[0]?.text).toBe('Babylon.js Features');
expect(doc.headings[0]?.level).toBe(1);
});
it('should have markdown content', async () => {
const doc = await parser.parseFile(sampleFile);
expect(doc.content).toContain('Babylon.js Features');
expect(doc.content.length).toBeGreaterThan(0);
});
it('should extract file path and modified date', async () => {
const doc = await parser.parseFile(sampleFile);
expect(doc.filePath).toBe(sampleFile);
expect(doc.lastModified).toBeInstanceOf(Date);
});
});