immersive2/server/api/cloudflare.js
Michael Mainguy eef2dcd5a5 Add context window tracking and usage display
- Track context window usage per AI model in usage tracker
- Add context limits for Claude (200K) and Cloudflare models
- Display context percentage badge in ChatPanel header
- Add context warnings at 80% and 95% usage levels
- Fix entity label generation to allow explicit empty labels
- Auto-refocus input after message completion

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-14 14:46:56 -06:00

150 lines
6.6 KiB
JavaScript

import { Router } from "express";
import { getSession, addMessage } from "../services/sessionStore.js";
import { trackUsage, getUsageSummary, formatCost, getSessionUsage } from "../services/usageTracker.js";
import { getCloudflareAccountId, getCloudflareApiToken } from "../services/providerConfig.js";
import { getCloudflareModel } from "../services/ChatCloudflare.js";
import { buildLangChainMessages, aiMessageToClaudeResponse } from "../services/langchainModels.js";
const router = Router();
// Context limits for Cloudflare models
const CLOUDFLARE_CONTEXT_LIMITS = {
'@cf/mistralai/mistral-small-3.1-24b-instruct': 32000,
'@hf/nousresearch/hermes-2-pro-mistral-7b': 8000,
'@cf/meta/llama-3.3-70b-instruct-fp8-fast': 128000,
'@cf/meta/llama-3.1-8b-instruct': 128000,
'@cf/deepseek-ai/deepseek-r1-distill-qwen-32b': 32000,
'@cf/qwen/qwen2.5-coder-32b-instruct': 32000
};
router.post("/*path", async (req, res) => {
const requestStart = Date.now();
console.log(`[Cloudflare API] ========== REQUEST START ==========`);
const accountId = getCloudflareAccountId();
const apiToken = getCloudflareApiToken();
if (!accountId) {
console.error(`[Cloudflare API] ERROR: Account ID not configured`);
return res.status(500).json({ error: "Cloudflare account ID not configured" });
}
if (!apiToken) {
console.error(`[Cloudflare API] ERROR: API token not configured`);
return res.status(500).json({ error: "Cloudflare API token not configured" });
}
const { sessionId, model: modelId, system: systemPrompt, messages } = req.body;
console.log(`[Cloudflare API] Session ID: ${sessionId || 'none'}`);
console.log(`[Cloudflare API] Model: ${modelId}`);
console.log(`[Cloudflare API] Messages count: ${messages?.length || 0}`);
try {
// Get LangChain-compatible Cloudflare model with tools bound
const model = getCloudflareModel(modelId);
// Build messages with entity context and history
const langChainMessages = buildLangChainMessages(
sessionId,
messages,
systemPrompt
);
console.log(`[Cloudflare API] Sending request via LangChain ChatCloudflare...`);
const fetchStart = Date.now();
// Invoke model
const response = await model.invoke(langChainMessages);
const fetchDuration = Date.now() - fetchStart;
console.log(`[Cloudflare API] Response received in ${fetchDuration}ms`);
// Convert to Claude API format for client compatibility
const data = aiMessageToClaudeResponse(response, modelId);
console.log(`[Cloudflare API] Response converted. Stop reason: ${data.stop_reason}, content blocks: ${data.content?.length || 0}`);
// Track and log token usage
if (data.usage) {
const userMessage = messages?.[messages.length - 1];
const inputText = typeof userMessage?.content === 'string' ? userMessage.content : null;
const outputText = data.content
?.filter(c => c.type === 'text')
.map(c => c.text)
.join('\n') || null;
const toolCalls = data.content
?.filter(c => c.type === 'tool_use')
.map(c => ({ name: c.name, input: c.input })) || [];
const contextLimit = CLOUDFLARE_CONTEXT_LIMITS[modelId] || 32000;
const usageRecord = trackUsage(sessionId, modelId, data.usage, {
inputText,
outputText,
toolCalls,
contextLimit
});
console.log(`[Cloudflare API] REQUEST USAGE: ${getUsageSummary(usageRecord)}`);
if (sessionId) {
const sessionStats = getSessionUsage(sessionId);
if (sessionStats) {
console.log(`[Cloudflare API] SESSION TOTALS (${sessionStats.requestCount} requests):`);
console.log(`[Cloudflare API] Total input: ${sessionStats.totalInputTokens} tokens`);
console.log(`[Cloudflare API] Total output: ${sessionStats.totalOutputTokens} tokens`);
console.log(`[Cloudflare API] Total cost: ${formatCost(sessionStats.totalCost)}`);
console.log(`[Cloudflare API] Context: ${sessionStats.contextUsed}/${sessionStats.contextLimit} (${sessionStats.contextPercent.toFixed(1)}%)`);
// Context warnings
if (sessionStats.contextPercent >= 95) {
console.error(`[Cloudflare API] ⚠️ CONTEXT CRITICAL: ${sessionStats.contextPercent.toFixed(0)}% used! Consider clearing conversation.`);
} else if (sessionStats.contextPercent >= 80) {
console.warn(`[Cloudflare API] ⚠️ CONTEXT WARNING: ${sessionStats.contextPercent.toFixed(0)}% of context window used`);
}
}
}
}
// Store messages to session
if (sessionId && data.content) {
const session = getSession(sessionId);
if (session) {
const userMessage = messages?.[messages.length - 1];
if (userMessage && userMessage.role === 'user' && typeof userMessage.content === 'string') {
addMessage(sessionId, {
role: 'user',
content: userMessage.content
});
console.log(`[Cloudflare API] Stored user message to session`);
}
const assistantContent = data.content
.filter(c => c.type === 'text')
.map(c => c.text)
.join('\n');
if (assistantContent) {
addMessage(sessionId, {
role: 'assistant',
content: assistantContent
});
console.log(`[Cloudflare API] Stored assistant response to session (${assistantContent.length} chars)`);
}
}
}
const totalDuration = Date.now() - requestStart;
console.log(`[Cloudflare API] ========== REQUEST COMPLETE (${totalDuration}ms) ==========`);
res.json(data);
} catch (error) {
const totalDuration = Date.now() - requestStart;
console.error(`[Cloudflare API] ========== REQUEST FAILED (${totalDuration}ms) ==========`);
console.error(`[Cloudflare API] Error:`, error);
console.error(`[Cloudflare API] Error message:`, error.message);
res.status(500).json({ error: "Failed to call Cloudflare API", details: error.message });
}
});
export default router;