- Add Cloudflare Workers AI as third provider alongside Claude and Ollama - New cloudflare.js API handler with format conversion - Tool converter functions for Cloudflare's OpenAI-compatible format - Handle [TOOL_CALLS] and [Called tool:] text formats from Mistral - Robust parser that handles truncated JSON responses - Add usage tracking with cost display - New usageTracker.js service for tracking token usage per session - UsageDetailModal component showing per-request breakdown - Cost display in ChatPanel header - Add new diagram manipulation features - Entity scale and rotation support via modify_entity tool - Wikipedia search tool for researching topics before diagramming - Clear conversation tool to reset chat history - JSON import from hamburger menu (moved from ChatPanel) - Fix connection label rotation in billboard mode - Labels no longer have conflicting local rotation when billboard enabled - Update rotation when rendering mode changes - Improve tool calling reliability - Add MAX_TOOL_ITERATIONS safety limit - Break loop after model switch to prevent context issues - Increase max_tokens to 4096 to prevent truncation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
242 lines
7.3 KiB
JavaScript
242 lines
7.3 KiB
JavaScript
/**
|
|
* Token usage tracking and cost estimation service
|
|
*/
|
|
|
|
// Pricing per million tokens (as of Dec 2025)
|
|
const MODEL_PRICING = {
|
|
// Claude 4.5 models
|
|
"claude-opus-4-5-20251101": { input: 5.00, output: 25.00 },
|
|
"claude-sonnet-4-5-20250929": { input: 3.00, output: 15.00 },
|
|
"claude-haiku-4-5-20251001": { input: 1.00, output: 5.00 },
|
|
|
|
// Claude 4 models
|
|
"claude-opus-4-1-20250805": { input: 15.00, output: 75.00 },
|
|
"claude-sonnet-4-20250514": { input: 3.00, output: 15.00 },
|
|
|
|
// Claude 3.7/3.5 models
|
|
"claude-3-7-sonnet-20250219": { input: 3.00, output: 15.00 },
|
|
"claude-3-5-sonnet-20241022": { input: 3.00, output: 15.00 },
|
|
"claude-3-5-haiku-20241022": { input: 0.80, output: 4.00 },
|
|
|
|
// Claude 3 models
|
|
"claude-3-opus-20240229": { input: 15.00, output: 75.00 },
|
|
"claude-3-sonnet-20240229": { input: 3.00, output: 15.00 },
|
|
"claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
|
|
|
|
// Cloudflare Workers AI models (approximate - based on neuron costs)
|
|
"@cf/mistralai/mistral-small-3.1-24b-instruct": { input: 0.30, output: 0.30 },
|
|
"@hf/nousresearch/hermes-2-pro-mistral-7b": { input: 0.10, output: 0.10 },
|
|
"@cf/meta/llama-3.3-70b-instruct-fp8-fast": { input: 0.20, output: 0.20 },
|
|
"@cf/meta/llama-3.1-70b-instruct": { input: 0.20, output: 0.20 },
|
|
"@cf/meta/llama-3.1-8b-instruct": { input: 0.05, output: 0.05 },
|
|
"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b": { input: 0.15, output: 0.15 },
|
|
"@cf/qwen/qwen2.5-coder-32b-instruct": { input: 0.15, output: 0.15 },
|
|
};
|
|
|
|
// Cache pricing multipliers
|
|
const CACHE_WRITE_MULTIPLIER = 1.25; // 25% more expensive to write cache
|
|
const CACHE_READ_MULTIPLIER = 0.10; // 90% cheaper to read from cache
|
|
|
|
// In-memory storage for usage tracking
|
|
const sessionUsage = new Map();
|
|
const globalUsage = {
|
|
totalInputTokens: 0,
|
|
totalOutputTokens: 0,
|
|
totalCacheCreationTokens: 0,
|
|
totalCacheReadTokens: 0,
|
|
totalCost: 0,
|
|
requestCount: 0,
|
|
byModel: {},
|
|
startTime: Date.now()
|
|
};
|
|
|
|
/**
|
|
* Get pricing for a model, with fallback to sonnet pricing
|
|
*/
|
|
function getModelPricing(model) {
|
|
// Try exact match first
|
|
if (MODEL_PRICING[model]) {
|
|
return MODEL_PRICING[model];
|
|
}
|
|
|
|
// Try to match by model family
|
|
if (model.includes("opus")) {
|
|
return MODEL_PRICING["claude-opus-4-5-20251101"];
|
|
}
|
|
if (model.includes("haiku")) {
|
|
return MODEL_PRICING["claude-haiku-4-5-20251001"];
|
|
}
|
|
|
|
// Cloudflare models - default to cheap pricing
|
|
if (model.startsWith("@cf/") || model.startsWith("@hf/")) {
|
|
return { input: 0.10, output: 0.10 };
|
|
}
|
|
|
|
// Default to sonnet pricing
|
|
return MODEL_PRICING["claude-sonnet-4-5-20250929"];
|
|
}
|
|
|
|
/**
|
|
* Calculate cost for a request
|
|
*/
|
|
function calculateCost(model, usage) {
|
|
const pricing = getModelPricing(model);
|
|
const perMillionDivisor = 1_000_000;
|
|
|
|
let cost = 0;
|
|
|
|
// Standard input tokens
|
|
const standardInputTokens = (usage.input_tokens || 0) - (usage.cache_read_input_tokens || 0);
|
|
cost += (standardInputTokens / perMillionDivisor) * pricing.input;
|
|
|
|
// Cache read tokens (90% cheaper)
|
|
if (usage.cache_read_input_tokens) {
|
|
cost += (usage.cache_read_input_tokens / perMillionDivisor) * pricing.input * CACHE_READ_MULTIPLIER;
|
|
}
|
|
|
|
// Cache creation tokens (25% more expensive)
|
|
if (usage.cache_creation_input_tokens) {
|
|
cost += (usage.cache_creation_input_tokens / perMillionDivisor) * pricing.input * CACHE_WRITE_MULTIPLIER;
|
|
}
|
|
|
|
// Output tokens
|
|
cost += ((usage.output_tokens || 0) / perMillionDivisor) * pricing.output;
|
|
|
|
return cost;
|
|
}
|
|
|
|
/**
|
|
* Track usage for a request
|
|
* @param {string} sessionId - Session identifier
|
|
* @param {string} model - Model used
|
|
* @param {object} usage - Token usage from API response
|
|
* @param {object} content - Optional input/output content for detailed tracking
|
|
* @param {string} content.inputText - User input text
|
|
* @param {string} content.outputText - Assistant output text
|
|
* @param {array} content.toolCalls - Tool calls made
|
|
*/
|
|
export function trackUsage(sessionId, model, usage, content = {}) {
|
|
if (!usage) return null;
|
|
|
|
const cost = calculateCost(model, usage);
|
|
|
|
// Truncate text for storage (keep first 500 chars)
|
|
const truncate = (text, maxLen = 500) => {
|
|
if (!text) return null;
|
|
return text.length > maxLen ? text.substring(0, maxLen) + '...' : text;
|
|
};
|
|
|
|
const usageRecord = {
|
|
timestamp: Date.now(),
|
|
model,
|
|
inputTokens: usage.input_tokens || 0,
|
|
outputTokens: usage.output_tokens || 0,
|
|
cacheCreationTokens: usage.cache_creation_input_tokens || 0,
|
|
cacheReadTokens: usage.cache_read_input_tokens || 0,
|
|
cost,
|
|
inputText: truncate(content.inputText),
|
|
outputText: truncate(content.outputText),
|
|
toolCalls: content.toolCalls || []
|
|
};
|
|
|
|
// Update session usage
|
|
if (sessionId) {
|
|
if (!sessionUsage.has(sessionId)) {
|
|
sessionUsage.set(sessionId, {
|
|
totalInputTokens: 0,
|
|
totalOutputTokens: 0,
|
|
totalCacheCreationTokens: 0,
|
|
totalCacheReadTokens: 0,
|
|
totalCost: 0,
|
|
requestCount: 0,
|
|
requests: [],
|
|
startTime: Date.now()
|
|
});
|
|
}
|
|
|
|
const session = sessionUsage.get(sessionId);
|
|
session.totalInputTokens += usageRecord.inputTokens;
|
|
session.totalOutputTokens += usageRecord.outputTokens;
|
|
session.totalCacheCreationTokens += usageRecord.cacheCreationTokens;
|
|
session.totalCacheReadTokens += usageRecord.cacheReadTokens;
|
|
session.totalCost += cost;
|
|
session.requestCount += 1;
|
|
session.requests.push(usageRecord);
|
|
|
|
// Keep only last 100 requests per session to limit memory
|
|
if (session.requests.length > 100) {
|
|
session.requests.shift();
|
|
}
|
|
}
|
|
|
|
// Update global usage
|
|
globalUsage.totalInputTokens += usageRecord.inputTokens;
|
|
globalUsage.totalOutputTokens += usageRecord.outputTokens;
|
|
globalUsage.totalCacheCreationTokens += usageRecord.cacheCreationTokens;
|
|
globalUsage.totalCacheReadTokens += usageRecord.cacheReadTokens;
|
|
globalUsage.totalCost += cost;
|
|
globalUsage.requestCount += 1;
|
|
|
|
// Track by model
|
|
if (!globalUsage.byModel[model]) {
|
|
globalUsage.byModel[model] = {
|
|
inputTokens: 0,
|
|
outputTokens: 0,
|
|
cost: 0,
|
|
requestCount: 0
|
|
};
|
|
}
|
|
globalUsage.byModel[model].inputTokens += usageRecord.inputTokens;
|
|
globalUsage.byModel[model].outputTokens += usageRecord.outputTokens;
|
|
globalUsage.byModel[model].cost += cost;
|
|
globalUsage.byModel[model].requestCount += 1;
|
|
|
|
return usageRecord;
|
|
}
|
|
|
|
/**
|
|
* Get usage for a session
|
|
*/
|
|
export function getSessionUsage(sessionId) {
|
|
return sessionUsage.get(sessionId) || null;
|
|
}
|
|
|
|
/**
|
|
* Get global usage stats
|
|
*/
|
|
export function getGlobalUsage() {
|
|
return {
|
|
...globalUsage,
|
|
uptime: Date.now() - globalUsage.startTime
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Format cost as currency string
|
|
*/
|
|
export function formatCost(cost) {
|
|
return `$${cost.toFixed(6)}`;
|
|
}
|
|
|
|
/**
|
|
* Clear session usage (call when session ends)
|
|
*/
|
|
export function clearSessionUsage(sessionId) {
|
|
sessionUsage.delete(sessionId);
|
|
}
|
|
|
|
/**
|
|
* Get a formatted usage summary for logging
|
|
*/
|
|
export function getUsageSummary(usageRecord) {
|
|
if (!usageRecord) return "No usage data";
|
|
|
|
return [
|
|
`Input: ${usageRecord.inputTokens}`,
|
|
`Output: ${usageRecord.outputTokens}`,
|
|
usageRecord.cacheReadTokens ? `Cache read: ${usageRecord.cacheReadTokens}` : null,
|
|
usageRecord.cacheCreationTokens ? `Cache write: ${usageRecord.cacheCreationTokens}` : null,
|
|
`Cost: ${formatCost(usageRecord.cost)}`
|
|
].filter(Boolean).join(" | ");
|
|
}
|