immersive2/server/services/usageTracker.js
Michael Mainguy 03217f3e65 Add Cloudflare Workers AI provider and multiple AI chat improvements
- Add Cloudflare Workers AI as third provider alongside Claude and Ollama
  - New cloudflare.js API handler with format conversion
  - Tool converter functions for Cloudflare's OpenAI-compatible format
  - Handle [TOOL_CALLS] and [Called tool:] text formats from Mistral
  - Robust parser that handles truncated JSON responses

- Add usage tracking with cost display
  - New usageTracker.js service for tracking token usage per session
  - UsageDetailModal component showing per-request breakdown
  - Cost display in ChatPanel header

- Add new diagram manipulation features
  - Entity scale and rotation support via modify_entity tool
  - Wikipedia search tool for researching topics before diagramming
  - Clear conversation tool to reset chat history
  - JSON import from hamburger menu (moved from ChatPanel)

- Fix connection label rotation in billboard mode
  - Labels no longer have conflicting local rotation when billboard enabled
  - Update rotation when rendering mode changes

- Improve tool calling reliability
  - Add MAX_TOOL_ITERATIONS safety limit
  - Break loop after model switch to prevent context issues
  - Increase max_tokens to 4096 to prevent truncation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-03 06:31:43 -06:00

242 lines
7.3 KiB
JavaScript

/**
* Token usage tracking and cost estimation service
*/
// Pricing per million tokens (as of Dec 2025)
const MODEL_PRICING = {
// Claude 4.5 models
"claude-opus-4-5-20251101": { input: 5.00, output: 25.00 },
"claude-sonnet-4-5-20250929": { input: 3.00, output: 15.00 },
"claude-haiku-4-5-20251001": { input: 1.00, output: 5.00 },
// Claude 4 models
"claude-opus-4-1-20250805": { input: 15.00, output: 75.00 },
"claude-sonnet-4-20250514": { input: 3.00, output: 15.00 },
// Claude 3.7/3.5 models
"claude-3-7-sonnet-20250219": { input: 3.00, output: 15.00 },
"claude-3-5-sonnet-20241022": { input: 3.00, output: 15.00 },
"claude-3-5-haiku-20241022": { input: 0.80, output: 4.00 },
// Claude 3 models
"claude-3-opus-20240229": { input: 15.00, output: 75.00 },
"claude-3-sonnet-20240229": { input: 3.00, output: 15.00 },
"claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
// Cloudflare Workers AI models (approximate - based on neuron costs)
"@cf/mistralai/mistral-small-3.1-24b-instruct": { input: 0.30, output: 0.30 },
"@hf/nousresearch/hermes-2-pro-mistral-7b": { input: 0.10, output: 0.10 },
"@cf/meta/llama-3.3-70b-instruct-fp8-fast": { input: 0.20, output: 0.20 },
"@cf/meta/llama-3.1-70b-instruct": { input: 0.20, output: 0.20 },
"@cf/meta/llama-3.1-8b-instruct": { input: 0.05, output: 0.05 },
"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b": { input: 0.15, output: 0.15 },
"@cf/qwen/qwen2.5-coder-32b-instruct": { input: 0.15, output: 0.15 },
};
// Cache pricing multipliers
const CACHE_WRITE_MULTIPLIER = 1.25; // 25% more expensive to write cache
const CACHE_READ_MULTIPLIER = 0.10; // 90% cheaper to read from cache
// In-memory storage for usage tracking
const sessionUsage = new Map();
const globalUsage = {
totalInputTokens: 0,
totalOutputTokens: 0,
totalCacheCreationTokens: 0,
totalCacheReadTokens: 0,
totalCost: 0,
requestCount: 0,
byModel: {},
startTime: Date.now()
};
/**
* Get pricing for a model, with fallback to sonnet pricing
*/
function getModelPricing(model) {
// Try exact match first
if (MODEL_PRICING[model]) {
return MODEL_PRICING[model];
}
// Try to match by model family
if (model.includes("opus")) {
return MODEL_PRICING["claude-opus-4-5-20251101"];
}
if (model.includes("haiku")) {
return MODEL_PRICING["claude-haiku-4-5-20251001"];
}
// Cloudflare models - default to cheap pricing
if (model.startsWith("@cf/") || model.startsWith("@hf/")) {
return { input: 0.10, output: 0.10 };
}
// Default to sonnet pricing
return MODEL_PRICING["claude-sonnet-4-5-20250929"];
}
/**
* Calculate cost for a request
*/
function calculateCost(model, usage) {
const pricing = getModelPricing(model);
const perMillionDivisor = 1_000_000;
let cost = 0;
// Standard input tokens
const standardInputTokens = (usage.input_tokens || 0) - (usage.cache_read_input_tokens || 0);
cost += (standardInputTokens / perMillionDivisor) * pricing.input;
// Cache read tokens (90% cheaper)
if (usage.cache_read_input_tokens) {
cost += (usage.cache_read_input_tokens / perMillionDivisor) * pricing.input * CACHE_READ_MULTIPLIER;
}
// Cache creation tokens (25% more expensive)
if (usage.cache_creation_input_tokens) {
cost += (usage.cache_creation_input_tokens / perMillionDivisor) * pricing.input * CACHE_WRITE_MULTIPLIER;
}
// Output tokens
cost += ((usage.output_tokens || 0) / perMillionDivisor) * pricing.output;
return cost;
}
/**
* Track usage for a request
* @param {string} sessionId - Session identifier
* @param {string} model - Model used
* @param {object} usage - Token usage from API response
* @param {object} content - Optional input/output content for detailed tracking
* @param {string} content.inputText - User input text
* @param {string} content.outputText - Assistant output text
* @param {array} content.toolCalls - Tool calls made
*/
export function trackUsage(sessionId, model, usage, content = {}) {
if (!usage) return null;
const cost = calculateCost(model, usage);
// Truncate text for storage (keep first 500 chars)
const truncate = (text, maxLen = 500) => {
if (!text) return null;
return text.length > maxLen ? text.substring(0, maxLen) + '...' : text;
};
const usageRecord = {
timestamp: Date.now(),
model,
inputTokens: usage.input_tokens || 0,
outputTokens: usage.output_tokens || 0,
cacheCreationTokens: usage.cache_creation_input_tokens || 0,
cacheReadTokens: usage.cache_read_input_tokens || 0,
cost,
inputText: truncate(content.inputText),
outputText: truncate(content.outputText),
toolCalls: content.toolCalls || []
};
// Update session usage
if (sessionId) {
if (!sessionUsage.has(sessionId)) {
sessionUsage.set(sessionId, {
totalInputTokens: 0,
totalOutputTokens: 0,
totalCacheCreationTokens: 0,
totalCacheReadTokens: 0,
totalCost: 0,
requestCount: 0,
requests: [],
startTime: Date.now()
});
}
const session = sessionUsage.get(sessionId);
session.totalInputTokens += usageRecord.inputTokens;
session.totalOutputTokens += usageRecord.outputTokens;
session.totalCacheCreationTokens += usageRecord.cacheCreationTokens;
session.totalCacheReadTokens += usageRecord.cacheReadTokens;
session.totalCost += cost;
session.requestCount += 1;
session.requests.push(usageRecord);
// Keep only last 100 requests per session to limit memory
if (session.requests.length > 100) {
session.requests.shift();
}
}
// Update global usage
globalUsage.totalInputTokens += usageRecord.inputTokens;
globalUsage.totalOutputTokens += usageRecord.outputTokens;
globalUsage.totalCacheCreationTokens += usageRecord.cacheCreationTokens;
globalUsage.totalCacheReadTokens += usageRecord.cacheReadTokens;
globalUsage.totalCost += cost;
globalUsage.requestCount += 1;
// Track by model
if (!globalUsage.byModel[model]) {
globalUsage.byModel[model] = {
inputTokens: 0,
outputTokens: 0,
cost: 0,
requestCount: 0
};
}
globalUsage.byModel[model].inputTokens += usageRecord.inputTokens;
globalUsage.byModel[model].outputTokens += usageRecord.outputTokens;
globalUsage.byModel[model].cost += cost;
globalUsage.byModel[model].requestCount += 1;
return usageRecord;
}
/**
* Get usage for a session
*/
export function getSessionUsage(sessionId) {
return sessionUsage.get(sessionId) || null;
}
/**
* Get global usage stats
*/
export function getGlobalUsage() {
return {
...globalUsage,
uptime: Date.now() - globalUsage.startTime
};
}
/**
* Format cost as currency string
*/
export function formatCost(cost) {
return `$${cost.toFixed(6)}`;
}
/**
* Clear session usage (call when session ends)
*/
export function clearSessionUsage(sessionId) {
sessionUsage.delete(sessionId);
}
/**
* Get a formatted usage summary for logging
*/
export function getUsageSummary(usageRecord) {
if (!usageRecord) return "No usage data";
return [
`Input: ${usageRecord.inputTokens}`,
`Output: ${usageRecord.outputTokens}`,
usageRecord.cacheReadTokens ? `Cache read: ${usageRecord.cacheReadTokens}` : null,
usageRecord.cacheCreationTokens ? `Cache write: ${usageRecord.cacheCreationTokens}` : null,
`Cost: ${formatCost(usageRecord.cost)}`
].filter(Boolean).join(" | ");
}