/** * Token usage tracking and cost estimation service */ // Pricing per million tokens (as of Dec 2025) const MODEL_PRICING = { // Claude 4.5 models "claude-opus-4-5-20251101": { input: 5.00, output: 25.00 }, "claude-sonnet-4-5-20250929": { input: 3.00, output: 15.00 }, "claude-haiku-4-5-20251001": { input: 1.00, output: 5.00 }, // Claude 4 models "claude-opus-4-1-20250805": { input: 15.00, output: 75.00 }, "claude-sonnet-4-20250514": { input: 3.00, output: 15.00 }, // Claude 3.7/3.5 models "claude-3-7-sonnet-20250219": { input: 3.00, output: 15.00 }, "claude-3-5-sonnet-20241022": { input: 3.00, output: 15.00 }, "claude-3-5-haiku-20241022": { input: 0.80, output: 4.00 }, // Claude 3 models "claude-3-opus-20240229": { input: 15.00, output: 75.00 }, "claude-3-sonnet-20240229": { input: 3.00, output: 15.00 }, "claude-3-haiku-20240307": { input: 0.25, output: 1.25 }, // Cloudflare Workers AI models (approximate - based on neuron costs) "@cf/mistralai/mistral-small-3.1-24b-instruct": { input: 0.30, output: 0.30 }, "@hf/nousresearch/hermes-2-pro-mistral-7b": { input: 0.10, output: 0.10 }, "@cf/meta/llama-3.3-70b-instruct-fp8-fast": { input: 0.20, output: 0.20 }, "@cf/meta/llama-3.1-70b-instruct": { input: 0.20, output: 0.20 }, "@cf/meta/llama-3.1-8b-instruct": { input: 0.05, output: 0.05 }, "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b": { input: 0.15, output: 0.15 }, "@cf/qwen/qwen2.5-coder-32b-instruct": { input: 0.15, output: 0.15 }, }; // Cache pricing multipliers const CACHE_WRITE_MULTIPLIER = 1.25; // 25% more expensive to write cache const CACHE_READ_MULTIPLIER = 0.10; // 90% cheaper to read from cache // In-memory storage for usage tracking const sessionUsage = new Map(); const globalUsage = { totalInputTokens: 0, totalOutputTokens: 0, totalCacheCreationTokens: 0, totalCacheReadTokens: 0, totalCost: 0, requestCount: 0, byModel: {}, startTime: Date.now() }; /** * Get pricing for a model, with fallback to sonnet pricing */ function getModelPricing(model) { // Try exact match first if (MODEL_PRICING[model]) { return MODEL_PRICING[model]; } // Try to match by model family if (model.includes("opus")) { return MODEL_PRICING["claude-opus-4-5-20251101"]; } if (model.includes("haiku")) { return MODEL_PRICING["claude-haiku-4-5-20251001"]; } // Cloudflare models - default to cheap pricing if (model.startsWith("@cf/") || model.startsWith("@hf/")) { return { input: 0.10, output: 0.10 }; } // Default to sonnet pricing return MODEL_PRICING["claude-sonnet-4-5-20250929"]; } /** * Calculate cost for a request */ function calculateCost(model, usage) { const pricing = getModelPricing(model); const perMillionDivisor = 1_000_000; let cost = 0; // Standard input tokens const standardInputTokens = (usage.input_tokens || 0) - (usage.cache_read_input_tokens || 0); cost += (standardInputTokens / perMillionDivisor) * pricing.input; // Cache read tokens (90% cheaper) if (usage.cache_read_input_tokens) { cost += (usage.cache_read_input_tokens / perMillionDivisor) * pricing.input * CACHE_READ_MULTIPLIER; } // Cache creation tokens (25% more expensive) if (usage.cache_creation_input_tokens) { cost += (usage.cache_creation_input_tokens / perMillionDivisor) * pricing.input * CACHE_WRITE_MULTIPLIER; } // Output tokens cost += ((usage.output_tokens || 0) / perMillionDivisor) * pricing.output; return cost; } /** * Track usage for a request * @param {string} sessionId - Session identifier * @param {string} model - Model used * @param {object} usage - Token usage from API response * @param {object} content - Optional input/output content for detailed tracking * @param {string} content.inputText - User input text * @param {string} content.outputText - Assistant output text * @param {array} content.toolCalls - Tool calls made */ export function trackUsage(sessionId, model, usage, content = {}) { if (!usage) return null; const cost = calculateCost(model, usage); // Truncate text for storage (keep first 500 chars) const truncate = (text, maxLen = 500) => { if (!text) return null; return text.length > maxLen ? text.substring(0, maxLen) + '...' : text; }; const usageRecord = { timestamp: Date.now(), model, inputTokens: usage.input_tokens || 0, outputTokens: usage.output_tokens || 0, cacheCreationTokens: usage.cache_creation_input_tokens || 0, cacheReadTokens: usage.cache_read_input_tokens || 0, cost, inputText: truncate(content.inputText), outputText: truncate(content.outputText), toolCalls: content.toolCalls || [] }; // Update session usage if (sessionId) { if (!sessionUsage.has(sessionId)) { sessionUsage.set(sessionId, { totalInputTokens: 0, totalOutputTokens: 0, totalCacheCreationTokens: 0, totalCacheReadTokens: 0, totalCost: 0, requestCount: 0, requests: [], startTime: Date.now() }); } const session = sessionUsage.get(sessionId); session.totalInputTokens += usageRecord.inputTokens; session.totalOutputTokens += usageRecord.outputTokens; session.totalCacheCreationTokens += usageRecord.cacheCreationTokens; session.totalCacheReadTokens += usageRecord.cacheReadTokens; session.totalCost += cost; session.requestCount += 1; session.requests.push(usageRecord); // Keep only last 100 requests per session to limit memory if (session.requests.length > 100) { session.requests.shift(); } } // Update global usage globalUsage.totalInputTokens += usageRecord.inputTokens; globalUsage.totalOutputTokens += usageRecord.outputTokens; globalUsage.totalCacheCreationTokens += usageRecord.cacheCreationTokens; globalUsage.totalCacheReadTokens += usageRecord.cacheReadTokens; globalUsage.totalCost += cost; globalUsage.requestCount += 1; // Track by model if (!globalUsage.byModel[model]) { globalUsage.byModel[model] = { inputTokens: 0, outputTokens: 0, cost: 0, requestCount: 0 }; } globalUsage.byModel[model].inputTokens += usageRecord.inputTokens; globalUsage.byModel[model].outputTokens += usageRecord.outputTokens; globalUsage.byModel[model].cost += cost; globalUsage.byModel[model].requestCount += 1; return usageRecord; } /** * Get usage for a session */ export function getSessionUsage(sessionId) { return sessionUsage.get(sessionId) || null; } /** * Get global usage stats */ export function getGlobalUsage() { return { ...globalUsage, uptime: Date.now() - globalUsage.startTime }; } /** * Format cost as currency string */ export function formatCost(cost) { return `$${cost.toFixed(6)}`; } /** * Clear session usage (call when session ends) */ export function clearSessionUsage(sessionId) { sessionUsage.delete(sessionId); } /** * Get a formatted usage summary for logging */ export function getUsageSummary(usageRecord) { if (!usageRecord) return "No usage data"; return [ `Input: ${usageRecord.inputTokens}`, `Output: ${usageRecord.outputTokens}`, usageRecord.cacheReadTokens ? `Cache read: ${usageRecord.cacheReadTokens}` : null, usageRecord.cacheCreationTokens ? `Cache write: ${usageRecord.cacheCreationTokens}` : null, `Cost: ${formatCost(usageRecord.cost)}` ].filter(Boolean).join(" | "); }