Skip to Content

Performance Optimization

This guide covers performance optimization techniques for Earna AI Console across all components, focusing on GPT-4o streaming, Supabase queries, and real-time features.

Overview

Performance optimization in Earna AI Console focuses on:

  • Real-time chat: Sub-200ms time to first token with GPT-4o streaming
  • Scalability: Handle thousands of concurrent users with edge functions
  • Resource efficiency: Optimize API calls and database queries
  • Cost optimization: Balance performance with API and infrastructure costs

GPT-4o Optimization

Streaming Response Pipeline

// lib/ai/streaming.ts import { openai } from '@ai-sdk/openai'; import { streamText } from 'ai'; export class StreamProcessor { async streamGPT4oResponse( messages: any[], onChunk: (text: string) => void ) { const result = await streamText({ model: openai('gpt-4o'), messages, temperature: 0.7, maxTokens: 4096, onFinish: async ({ text, usage }) => { // Save to Supabase after completion await this.saveToDatabase({ content: text, tokens: usage.totalTokens, model: 'gpt-4o' }); } }); // Process stream chunks for await (const chunk of result.textStream) { onChunk(chunk); } return result; } // Optimized streaming with Server-Sent Events async createSSEStream( messages: any[], chatId: string ): Promise<ReadableStream> { const encoder = new TextEncoder(); return new ReadableStream({ async start(controller) { try { const result = await streamText({ model: openai('gpt-4o'), messages, temperature: 0.7 }); for await (const chunk of result.textStream) { controller.enqueue( encoder.encode(`data: ${JSON.stringify({ type: 'text', content: chunk })}\n\n`) ); } controller.enqueue( encoder.encode(`data: ${JSON.stringify({ type: 'done' })}\n\n`) ); } catch (error) { controller.enqueue( encoder.encode(`data: ${JSON.stringify({ type: 'error', error: error.message })}\n\n`) ); } finally { controller.close(); } } }); } private async saveToDatabase(data: any) { const { error } = await supabase .from('messages') .insert(data); if (error) console.error('Failed to save message:', error); } }

Supabase Optimization

Database Query Optimization

// lib/db/connection-pool.ts import { createClient } from '@supabase/supabase-js'; import { Pool } from '@neondatabase/serverless'; export class DatabasePool { private supabase: any; private pgPool: Pool; constructor() { // Supabase client with connection pooling this.supabase = createClient( process.env.NEXT_PUBLIC_SUPABASE_URL!, process.env.SUPABASE_SERVICE_ROLE_KEY!, { auth: { persistSession: false }, db: { schema: 'public' }, global: { headers: { 'x-connection-pool': 'true' } } } ); // Direct PostgreSQL pool for complex queries this.pgPool = new Pool({ connectionString: process.env.DATABASE_URL, max: 20, // Maximum connections idleTimeoutMillis: 30000, connectionTimeoutMillis: 2000 }); } async executeQuery<T>(query: string, params?: any[]): Promise<T[]> { const client = await this.pgPool.connect(); try { const result = await client.query(query, params); return result.rows; } finally { client.release(); } } async batchInsert(table: string, records: any[], batchSize = 1000) { for (let i = 0; i < records.length; i += batchSize) { const batch = records.slice(i, i + batchSize); const { error } = await this.supabase .from(table) .insert(batch); if (error) throw error; } } async getOptimizedChatHistory(chatId: string, limit = 50) { // Use RPC for optimized query const { data, error } = await this.supabase .rpc('get_chat_history_optimized', { chat_id: chatId, message_limit: limit }); if (error) throw error; return data; } } // Supabase RPC function (create in SQL Editor) /* CREATE OR REPLACE FUNCTION get_chat_history_optimized( chat_id UUID, message_limit INT DEFAULT 50 ) RETURNS TABLE ( id UUID, role TEXT, content TEXT, model TEXT, created_at TIMESTAMPTZ ) AS $$ BEGIN RETURN QUERY SELECT m.id, m.role, m.content, m.model, m.created_at FROM messages m WHERE m.chat_id = $1 ORDER BY m.created_at DESC LIMIT $2; END; $$ LANGUAGE plpgsql; CREATE INDEX IF NOT EXISTS idx_messages_chat_created ON messages(chat_id, created_at DESC); */

Edge Function Optimization

Vercel Edge Functions

// app/api/chat/route.ts import { NextRequest } from 'next/server'; export const runtime = 'edge'; // Use edge runtime export const maxDuration = 25; // Maximum 25 seconds export async function POST(request: NextRequest) { // Parse request efficiently const { messages, model = 'gpt-4o' } = await request.json(); // Get cached response if available const cacheKey = generateCacheKey(messages); const cached = await getCachedResponse(cacheKey); if (cached) { return new Response(cached, { headers: { 'Content-Type': 'application/json', 'X-Cache': 'HIT' } }); } // Stream response const stream = await createOptimizedStream(messages, model); return new Response(stream, { headers: { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'X-Cache': 'MISS' } }); } async function createOptimizedStream(messages: any[], model: string) { const encoder = new TextEncoder(); return new ReadableStream({ async start(controller) { // Use connection pooling for model const modelProvider = await getModelProvider(model); try { const result = await streamText({ model: modelProvider, messages, temperature: 0.7 }); for await (const chunk of result.textStream) { // Send chunk immediately controller.enqueue( encoder.encode(`data: ${JSON.stringify({ text: chunk })}\n\n`) ); } } catch (error) { controller.enqueue( encoder.encode(`data: ${JSON.stringify({ error: error.message })}\n\n`) ); } finally { controller.close(); } } }); }

Frontend Performance

React Optimization

// components/chat/optimized-chat.tsx import { memo, useCallback, useMemo, useRef } from 'react'; import { useVirtualizer } from '@tanstack/react-virtual'; const MessageList = memo(({ messages }: { messages: any[] }) => { const parentRef = useRef<HTMLDivElement>(null); // Virtualize large message lists const virtualizer = useVirtualizer({ count: messages.length, getScrollElement: () => parentRef.current, estimateSize: () => 100, overscan: 5 }); return ( <div ref={parentRef} className="h-full overflow-auto"> <div style={{ height: `${virtualizer.getTotalSize()}px`, width: '100%', position: 'relative' }} > {virtualizer.getVirtualItems().map((virtualItem) => ( <div key={virtualItem.key} style={{ position: 'absolute', top: 0, left: 0, width: '100%', height: `${virtualItem.size}px`, transform: `translateY(${virtualItem.start}px)` }} > <Message message={messages[virtualItem.index]} /> </div> ))} </div> </div> ); }); // Memoized message component const Message = memo(({ message }: { message: any }) => { return ( <div className="px-4 py-2"> <div className={`message ${message.role}`}> {message.content} </div> </div> ); }, (prevProps, nextProps) => { // Custom comparison for better performance return prevProps.message.id === nextProps.message.id && prevProps.message.content === nextProps.message.content; }); // Optimized chat input with debouncing export function ChatInput({ onSend }: { onSend: (text: string) => void }) { const [input, setInput] = useState(''); // Debounce typing indicator const sendTypingIndicator = useMemo( () => debounce((isTyping: boolean) => { // Send typing status }, 500), [] ); const handleChange = useCallback((e: React.ChangeEvent<HTMLTextAreaElement>) => { setInput(e.target.value); sendTypingIndicator(true); }, [sendTypingIndicator]); const handleSubmit = useCallback(() => { if (input.trim()) { onSend(input); setInput(''); } }, [input, onSend]); return ( <div className="chat-input"> <textarea value={input} onChange={handleChange} onKeyDown={(e) => { if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); handleSubmit(); } }} placeholder="Type a message..." /> </div> ); }

Memory Management

// lib/performance/memory-manager.ts export class MemoryManager { private sessionPool: Map<string, any> = new Map(); private maxSessions = 1000; private sessionTTL = 30 * 60 * 1000; // 30 minutes async getSession(sessionId: string): Promise<any> { // Check pool if (this.sessionPool.has(sessionId)) { const session = this.sessionPool.get(sessionId); session.lastAccessed = Date.now(); return session; } // Evict LRU if full if (this.sessionPool.size >= this.maxSessions) { this.evictLRUSession(); } // Load from database const session = await this.loadSession(sessionId); // Add to pool this.sessionPool.set(sessionId, { ...session, lastAccessed: Date.now() }); return session; } private evictLRUSession(): void { let oldestTime = Date.now(); let oldestId = null; for (const [id, session] of this.sessionPool) { if (session.lastAccessed < oldestTime) { oldestTime = session.lastAccessed; oldestId = id; } } if (oldestId) { this.sessionPool.delete(oldestId); } } // Monitor memory usage monitorMemory(): void { setInterval(() => { const usage = process.memoryUsage(); const heapUsedMB = Math.round(usage.heapUsed / 1024 / 1024); if (heapUsedMB > 400) { // Alert if > 400MB console.warn('High memory usage:', heapUsedMB, 'MB'); // Force garbage collection if available if (global.gc) { global.gc(); } // Reduce pool size this.maxSessions = Math.floor(this.maxSessions * 0.8); this.trimPool(); } }, 30000); } private trimPool(): void { while (this.sessionPool.size > this.maxSessions) { this.evictLRUSession(); } } }

Performance Monitoring

// lib/monitoring/performance.ts import { Analytics } from '@vercel/analytics/react'; export class PerformanceMonitor { // Track API latency async trackAPICall( endpoint: string, method: string, startTime: number ) { const duration = Date.now() - startTime; // Send to Vercel Analytics if (typeof window !== 'undefined') { window.analytics?.track('api_call', { endpoint, method, duration, timestamp: new Date().toISOString() }); } // Log slow requests if (duration > 1000) { console.warn(`Slow API call: ${method} ${endpoint} took ${duration}ms`); } } // Track GPT-4o streaming performance trackStreamingPerformance( model: string, timeToFirstToken: number, totalTime: number, tokenCount: number ) { const metrics = { model, timeToFirstToken, totalTime, tokenCount, tokensPerSecond: tokenCount / (totalTime / 1000) }; // Send metrics this.sendMetrics('streaming_performance', metrics); } // Track database performance async trackDatabaseQuery( query: string, startTime: number, rowCount: number ) { const duration = Date.now() - startTime; if (duration > 100) { // Log slow queries console.warn(`Slow query (${duration}ms): ${query.substring(0, 100)}...`); } this.sendMetrics('database_query', { duration, rowCount, timestamp: new Date().toISOString() }); } private sendMetrics(event: string, data: any) { // Send to monitoring service fetch('/api/metrics', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ event, data }) }).catch(console.error); } }

Performance Checklist

GPT-4o Optimization

  • Enable streaming responses for all chat interactions
  • Implement response caching with Redis (1 hour TTL)
  • Use batch processing for multiple requests
  • Monitor token usage and costs
  • Implement fallback to cached responses on rate limits

Supabase Optimization

  • Enable connection pooling with pgBouncer
  • Create indexes on frequently queried columns
  • Use materialized views for aggregations
  • Implement query result caching
  • Optimize real-time subscriptions

Edge Function Optimization

  • Use Vercel Edge Runtime for API routes
  • Enable response caching with proper headers
  • Implement request deduplication
  • Use regional edge functions for lower latency
  • Monitor function execution time

Frontend Optimization

  • Implement virtual scrolling for long lists
  • Use React.memo and useMemo appropriately
  • Enable code splitting with dynamic imports
  • Optimize bundle size with tree shaking
  • Implement progressive image loading

Caching Strategy

  • L1: In-memory LRU cache (5 minute TTL)
  • L2: Redis cache (1 hour TTL)
  • L3: CDN edge caching for static assets
  • Implement cache warming for active users
  • Use stale-while-revalidate pattern

Following these optimization strategies can achieve sub-200ms time to first token with GPT-4o streaming, handle thousands of concurrent users, and reduce API costs by up to 50% through intelligent caching.

Last updated on