Fix document path lookup in get_babylon_doc

Fixed pathToDocId to properly match document IDs generated during indexing.

Problem:
- During indexing, doc IDs are generated as: Documentation_content_<path>
- During retrieval, pathToDocId was not prepending the prefix
- This caused get_babylon_doc to fail with "Document not found" errors

Solution:
- Updated pathToDocId to prepend "Documentation_content_" prefix
- Now handles paths with or without "content/" correctly
- Matches the ID format used during indexing

Example:
- User provides: "features/featuresDeepDive/audio/v2/migrateFromV1"
- Now correctly converts to: "Documentation_content_features_featuresDeepDive_audio_v2_migrateFromV1"

All 152 tests passing.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Michael Mainguy 2025-11-23 07:52:40 -06:00
parent 210ceb7d24
commit 99259efc4b

View File

@ -197,10 +197,16 @@ export class LanceDBSearch {
} }
private pathToDocId(filePath: string): string { private pathToDocId(filePath: string): string {
return filePath // Remove .md extension if present
.replace(/^.*\/content\//, '') let normalizedPath = filePath.replace(/\.md$/, '');
.replace(/\.md$/, '')
.replace(/\//g, '_'); // If path already starts with content/, strip everything before it
normalizedPath = normalizedPath.replace(/^.*\/content\//, '');
// If path doesn't have content/ prefix, assume it's relative to content/
// and prepend Documentation_content_ to match indexing
const pathWithUnderscores = normalizedPath.replace(/\//g, '_');
return `Documentation_content_${pathWithUnderscores}`;
} }
async searchSourceCode( async searchSourceCode(