Add context window tracking and usage display
- Track context window usage per AI model in usage tracker - Add context limits for Claude (200K) and Cloudflare models - Display context percentage badge in ChatPanel header - Add context warnings at 80% and 95% usage levels - Fix entity label generation to allow explicit empty labels - Auto-refocus input after message completion Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
719c969f72
commit
eef2dcd5a5
@ -9,6 +9,9 @@ import {
|
|||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
|
// Context limits for Claude models (all have 200K)
|
||||||
|
const CLAUDE_CONTEXT_LIMIT = 200000;
|
||||||
|
|
||||||
router.post("/*path", async (req, res) => {
|
router.post("/*path", async (req, res) => {
|
||||||
const requestStart = Date.now();
|
const requestStart = Date.now();
|
||||||
console.log(`[Claude API] ========== REQUEST START ==========`);
|
console.log(`[Claude API] ========== REQUEST START ==========`);
|
||||||
@ -66,7 +69,8 @@ router.post("/*path", async (req, res) => {
|
|||||||
const usageRecord = trackUsage(sessionId, modelId, data.usage, {
|
const usageRecord = trackUsage(sessionId, modelId, data.usage, {
|
||||||
inputText,
|
inputText,
|
||||||
outputText,
|
outputText,
|
||||||
toolCalls
|
toolCalls,
|
||||||
|
contextLimit: CLAUDE_CONTEXT_LIMIT
|
||||||
});
|
});
|
||||||
console.log(`[Claude API] REQUEST USAGE: ${getUsageSummary(usageRecord)}`);
|
console.log(`[Claude API] REQUEST USAGE: ${getUsageSummary(usageRecord)}`);
|
||||||
|
|
||||||
@ -77,6 +81,14 @@ router.post("/*path", async (req, res) => {
|
|||||||
console.log(`[Claude API] Total input: ${sessionStats.totalInputTokens} tokens`);
|
console.log(`[Claude API] Total input: ${sessionStats.totalInputTokens} tokens`);
|
||||||
console.log(`[Claude API] Total output: ${sessionStats.totalOutputTokens} tokens`);
|
console.log(`[Claude API] Total output: ${sessionStats.totalOutputTokens} tokens`);
|
||||||
console.log(`[Claude API] Total cost: ${formatCost(sessionStats.totalCost)}`);
|
console.log(`[Claude API] Total cost: ${formatCost(sessionStats.totalCost)}`);
|
||||||
|
console.log(`[Claude API] Context: ${sessionStats.contextUsed}/${sessionStats.contextLimit} (${sessionStats.contextPercent.toFixed(1)}%)`);
|
||||||
|
|
||||||
|
// Context warnings
|
||||||
|
if (sessionStats.contextPercent >= 95) {
|
||||||
|
console.error(`[Claude API] ⚠️ CONTEXT CRITICAL: ${sessionStats.contextPercent.toFixed(0)}% used! Consider clearing conversation.`);
|
||||||
|
} else if (sessionStats.contextPercent >= 80) {
|
||||||
|
console.warn(`[Claude API] ⚠️ CONTEXT WARNING: ${sessionStats.contextPercent.toFixed(0)}% of context window used`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -7,6 +7,16 @@ import { buildLangChainMessages, aiMessageToClaudeResponse } from "../services/l
|
|||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
|
// Context limits for Cloudflare models
|
||||||
|
const CLOUDFLARE_CONTEXT_LIMITS = {
|
||||||
|
'@cf/mistralai/mistral-small-3.1-24b-instruct': 32000,
|
||||||
|
'@hf/nousresearch/hermes-2-pro-mistral-7b': 8000,
|
||||||
|
'@cf/meta/llama-3.3-70b-instruct-fp8-fast': 128000,
|
||||||
|
'@cf/meta/llama-3.1-8b-instruct': 128000,
|
||||||
|
'@cf/deepseek-ai/deepseek-r1-distill-qwen-32b': 32000,
|
||||||
|
'@cf/qwen/qwen2.5-coder-32b-instruct': 32000
|
||||||
|
};
|
||||||
|
|
||||||
router.post("/*path", async (req, res) => {
|
router.post("/*path", async (req, res) => {
|
||||||
const requestStart = Date.now();
|
const requestStart = Date.now();
|
||||||
console.log(`[Cloudflare API] ========== REQUEST START ==========`);
|
console.log(`[Cloudflare API] ========== REQUEST START ==========`);
|
||||||
@ -68,10 +78,12 @@ router.post("/*path", async (req, res) => {
|
|||||||
?.filter(c => c.type === 'tool_use')
|
?.filter(c => c.type === 'tool_use')
|
||||||
.map(c => ({ name: c.name, input: c.input })) || [];
|
.map(c => ({ name: c.name, input: c.input })) || [];
|
||||||
|
|
||||||
|
const contextLimit = CLOUDFLARE_CONTEXT_LIMITS[modelId] || 32000;
|
||||||
const usageRecord = trackUsage(sessionId, modelId, data.usage, {
|
const usageRecord = trackUsage(sessionId, modelId, data.usage, {
|
||||||
inputText,
|
inputText,
|
||||||
outputText,
|
outputText,
|
||||||
toolCalls
|
toolCalls,
|
||||||
|
contextLimit
|
||||||
});
|
});
|
||||||
console.log(`[Cloudflare API] REQUEST USAGE: ${getUsageSummary(usageRecord)}`);
|
console.log(`[Cloudflare API] REQUEST USAGE: ${getUsageSummary(usageRecord)}`);
|
||||||
|
|
||||||
@ -82,6 +94,14 @@ router.post("/*path", async (req, res) => {
|
|||||||
console.log(`[Cloudflare API] Total input: ${sessionStats.totalInputTokens} tokens`);
|
console.log(`[Cloudflare API] Total input: ${sessionStats.totalInputTokens} tokens`);
|
||||||
console.log(`[Cloudflare API] Total output: ${sessionStats.totalOutputTokens} tokens`);
|
console.log(`[Cloudflare API] Total output: ${sessionStats.totalOutputTokens} tokens`);
|
||||||
console.log(`[Cloudflare API] Total cost: ${formatCost(sessionStats.totalCost)}`);
|
console.log(`[Cloudflare API] Total cost: ${formatCost(sessionStats.totalCost)}`);
|
||||||
|
console.log(`[Cloudflare API] Context: ${sessionStats.contextUsed}/${sessionStats.contextLimit} (${sessionStats.contextPercent.toFixed(1)}%)`);
|
||||||
|
|
||||||
|
// Context warnings
|
||||||
|
if (sessionStats.contextPercent >= 95) {
|
||||||
|
console.error(`[Cloudflare API] ⚠️ CONTEXT CRITICAL: ${sessionStats.contextPercent.toFixed(0)}% used! Consider clearing conversation.`);
|
||||||
|
} else if (sessionStats.contextPercent >= 80) {
|
||||||
|
console.warn(`[Cloudflare API] ⚠️ CONTEXT WARNING: ${sessionStats.contextPercent.toFixed(0)}% of context window used`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -114,6 +114,7 @@ function calculateCost(model, usage) {
|
|||||||
* @param {string} content.inputText - User input text
|
* @param {string} content.inputText - User input text
|
||||||
* @param {string} content.outputText - Assistant output text
|
* @param {string} content.outputText - Assistant output text
|
||||||
* @param {array} content.toolCalls - Tool calls made
|
* @param {array} content.toolCalls - Tool calls made
|
||||||
|
* @param {number} content.contextLimit - Model's context window limit
|
||||||
*/
|
*/
|
||||||
export function trackUsage(sessionId, model, usage, content = {}) {
|
export function trackUsage(sessionId, model, usage, content = {}) {
|
||||||
if (!usage) return null;
|
if (!usage) return null;
|
||||||
@ -150,7 +151,10 @@ export function trackUsage(sessionId, model, usage, content = {}) {
|
|||||||
totalCost: 0,
|
totalCost: 0,
|
||||||
requestCount: 0,
|
requestCount: 0,
|
||||||
requests: [],
|
requests: [],
|
||||||
startTime: Date.now()
|
startTime: Date.now(),
|
||||||
|
// Context tracking
|
||||||
|
contextUsed: 0,
|
||||||
|
contextLimit: content.contextLimit || 32000
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -163,6 +167,12 @@ export function trackUsage(sessionId, model, usage, content = {}) {
|
|||||||
session.requestCount += 1;
|
session.requestCount += 1;
|
||||||
session.requests.push(usageRecord);
|
session.requests.push(usageRecord);
|
||||||
|
|
||||||
|
// Update context tracking - input tokens represent current context size
|
||||||
|
session.contextUsed = usageRecord.inputTokens;
|
||||||
|
if (content.contextLimit) {
|
||||||
|
session.contextLimit = content.contextLimit;
|
||||||
|
}
|
||||||
|
|
||||||
// Keep only last 100 requests per session to limit memory
|
// Keep only last 100 requests per session to limit memory
|
||||||
if (session.requests.length > 100) {
|
if (session.requests.length > 100) {
|
||||||
session.requests.shift();
|
session.requests.shift();
|
||||||
@ -198,7 +208,18 @@ export function trackUsage(sessionId, model, usage, content = {}) {
|
|||||||
* Get usage for a session
|
* Get usage for a session
|
||||||
*/
|
*/
|
||||||
export function getSessionUsage(sessionId) {
|
export function getSessionUsage(sessionId) {
|
||||||
return sessionUsage.get(sessionId) || null;
|
const session = sessionUsage.get(sessionId);
|
||||||
|
if (!session) return null;
|
||||||
|
|
||||||
|
// Calculate context percentage
|
||||||
|
const contextPercent = session.contextLimit > 0
|
||||||
|
? (session.contextUsed / session.contextLimit) * 100
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
return {
|
||||||
|
...session,
|
||||||
|
contextPercent
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -110,7 +110,9 @@ export class DiagramManager {
|
|||||||
this._logger.debug('chatCreateEntity', entity);
|
this._logger.debug('chatCreateEntity', entity);
|
||||||
|
|
||||||
// Generate a default label if none is provided
|
// Generate a default label if none is provided
|
||||||
if (!entity.text) {
|
// Use strict check to allow empty string "" (explicit no label) while still
|
||||||
|
// generating labels for undefined/null (user didn't specify)
|
||||||
|
if (entity.text === undefined || entity.text === null) {
|
||||||
entity.text = this.generateDefaultLabel(entity);
|
entity.text = this.generateDefaultLabel(entity);
|
||||||
this._logger.debug('Generated default label:', entity.text);
|
this._logger.debug('Generated default label:', entity.text);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
import React, {useEffect, useRef, useState} from "react";
|
import React, {useEffect, useRef, useState} from "react";
|
||||||
import {ActionIcon, Alert, Box, CloseButton, Group, Paper, ScrollArea, Text, Textarea, Tooltip, UnstyledButton} from "@mantine/core";
|
import {ActionIcon, Alert, Badge, Box, CloseButton, Group, Paper, ScrollArea, Text, Textarea, Tooltip, UnstyledButton} from "@mantine/core";
|
||||||
import {IconAlertCircle, IconCoins, IconRobot, IconSend, IconTrash} from "@tabler/icons-react";
|
import {IconAlertCircle, IconCoins, IconRobot, IconSend, IconTrash} from "@tabler/icons-react";
|
||||||
import ChatMessage from "./ChatMessage";
|
import ChatMessage from "./ChatMessage";
|
||||||
import UsageDetailModal from "./UsageDetailModal";
|
import UsageDetailModal from "./UsageDetailModal";
|
||||||
@ -177,6 +177,8 @@ export default function ChatPanel({width = 400, onClose}: ChatPanelProps) {
|
|||||||
setError(err instanceof Error ? err.message : 'Failed to send message');
|
setError(err instanceof Error ? err.message : 'Failed to send message');
|
||||||
} finally {
|
} finally {
|
||||||
setIsLoading(false);
|
setIsLoading(false);
|
||||||
|
// Refocus the input after message completes
|
||||||
|
textareaRef.current?.focus();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -226,6 +228,20 @@ export default function ChatPanel({width = 400, onClose}: ChatPanelProps) {
|
|||||||
</UnstyledButton>
|
</UnstyledButton>
|
||||||
</Tooltip>
|
</Tooltip>
|
||||||
)}
|
)}
|
||||||
|
{usage && usage.contextPercent !== undefined && (
|
||||||
|
<Tooltip
|
||||||
|
label={`${(usage.contextUsed || 0).toLocaleString()} / ${(usage.contextLimit || 0).toLocaleString()} tokens`}
|
||||||
|
position="bottom"
|
||||||
|
>
|
||||||
|
<Badge
|
||||||
|
size="sm"
|
||||||
|
variant="light"
|
||||||
|
color={usage.contextPercent > 80 ? 'red' : usage.contextPercent > 50 ? 'yellow' : 'green'}
|
||||||
|
>
|
||||||
|
{usage.contextPercent.toFixed(0)}%
|
||||||
|
</Badge>
|
||||||
|
</Tooltip>
|
||||||
|
)}
|
||||||
<Tooltip label="Clear conversation" position="bottom">
|
<Tooltip label="Clear conversation" position="bottom">
|
||||||
<ActionIcon
|
<ActionIcon
|
||||||
variant="subtle"
|
variant="subtle"
|
||||||
|
|||||||
@ -188,4 +188,8 @@ export interface SessionUsage {
|
|||||||
requestCount: number;
|
requestCount: number;
|
||||||
startTime: number;
|
startTime: number;
|
||||||
requests?: UsageRequestDetail[];
|
requests?: UsageRequestDetail[];
|
||||||
|
// Context tracking
|
||||||
|
contextUsed?: number;
|
||||||
|
contextLimit?: number;
|
||||||
|
contextPercent?: number;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user