Performance Optimization

This guide covers performance optimization techniques for Earna AI Console across all components, focusing on GPT-4o streaming, Supabase queries, and real-time features.

Overview

Performance optimization in Earna AI Console focuses on:

Real-time chat: Sub-200ms time to first token with GPT-4o streaming
Scalability: Handle thousands of concurrent users with edge functions
Resource efficiency: Optimize API calls and database queries
Cost optimization: Balance performance with API and infrastructure costs

GPT-4o Optimization

Streaming Response Pipeline

Stream Processing


// lib/ai/streaming.ts
import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';
 
export class StreamProcessor {
  async streamGPT4oResponse(
    messages: any[],
    onChunk: (text: string) => void
  ) {
    const result = await streamText({
      model: openai('gpt-4o'),
      messages,
      temperature: 0.7,
      maxTokens: 4096,
      onFinish: async ({ text, usage }) => {
        // Save to Supabase after completion
        await this.saveToDatabase({
          content: text,
          tokens: usage.totalTokens,
          model: 'gpt-4o'
        });
      }
    });
 
    // Process stream chunks
    for await (const chunk of result.textStream) {
      onChunk(chunk);
    }
 
    return result;
  }
 
  // Optimized streaming with Server-Sent Events
  async createSSEStream(
    messages: any[],
    chatId: string
  ): Promise<ReadableStream> {
    const encoder = new TextEncoder();
    
    return new ReadableStream({
      async start(controller) {
        try {
          const result = await streamText({
            model: openai('gpt-4o'),
            messages,
            temperature: 0.7
          });
 
          for await (const chunk of result.textStream) {
            controller.enqueue(
              encoder.encode(`data: ${JSON.stringify({ 
                type: 'text',
                content: chunk 
              })}\n\n`)
            );
          }
 
          controller.enqueue(
            encoder.encode(`data: ${JSON.stringify({ 
              type: 'done' 
            })}\n\n`)
          );
        } catch (error) {
          controller.enqueue(
            encoder.encode(`data: ${JSON.stringify({ 
              type: 'error',
              error: error.message 
            })}\n\n`)
          );
        } finally {
          controller.close();
        }
      }
    });
  }
 
  private async saveToDatabase(data: any) {
    const { error } = await supabase
      .from('messages')
      .insert(data);
    
    if (error) console.error('Failed to save message:', error);
  }
}

Batch Processing


// lib/ai/batch-processor.ts
import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';
import pLimit from 'p-limit';
 
export class BatchProcessor {
  private limit = pLimit(5); // Max 5 concurrent GPT-4o requests
  
  async processBatch(
    requests: Array<{ userId: string; message: string; chatId: string }>
  ): Promise<Array<{ response: string; usage: any }>> {
    // Group by user for better context
    const grouped = this.groupByUser(requests);
    
    // Process with rate limiting
    const promises = Object.entries(grouped).map(
      ([userId, userRequests]) =>
        this.limit(() => this.processUserRequests(userId, userRequests))
    );
    
    const results = await Promise.all(promises);
    return results.flat();
  }
 
  private async processUserRequests(
    userId: string,
    requests: Array<{ message: string; chatId: string }>
  ) {
    // Get user context from cache
    const context = await this.getUserContext(userId);
    const results = [];
    
    for (const { message, chatId } of requests) {
      // Check cache first
      const cached = await this.getCachedResponse(chatId, message);
      if (cached) {
        results.push(cached);
        continue;
      }
 
      // Generate new response
      const response = await generateText({
        model: openai('gpt-4o'),
        messages: [
          ...context.history,
          { role: 'user', content: message }
        ],
        temperature: 0.7,
        maxTokens: 4096
      });
      
      // Cache the response
      await this.cacheResponse(chatId, message, response);
      
      results.push({
        response: response.text,
        usage: response.usage
      });
    }
    
    return results;
  }
 
  private async getUserContext(userId: string) {
    // Get from Redis cache
    const cached = await redis.get(`context:${userId}`);
    if (cached) return JSON.parse(cached);
    
    // Load from Supabase
    const { data } = await supabase
      .from('chats')
      .select(`
        *,
        messages (
          role,
          content,
          created_at
        )
      `)
      .eq('user_id', userId)
      .order('created_at', { ascending: false })
      .limit(1)
      .single();
    
    const context = {
      history: data?.messages?.slice(-10) || []
    };
    
    // Cache for 1 hour
    await redis.setex(`context:${userId}`, 3600, JSON.stringify(context));
    return context;
  }
 
  private groupByUser(requests: any[]) {
    return requests.reduce((acc, req) => {
      if (!acc[req.userId]) acc[req.userId] = [];
      acc[req.userId].push({ 
        message: req.message, 
        chatId: req.chatId 
      });
      return acc;
    }, {});
  }
}

Caching Strategy


// lib/cache/multi-layer.ts
import { Redis } from '@upstash/redis';
import { LRUCache } from 'lru-cache';
 
export class MultiLayerCache {
  private redis: Redis;
  private memoryCache: LRUCache<string, any>;
  
  // TTL configurations
  private readonly TTL_CHAT_RESPONSE = 3600; // 1 hour
  private readonly TTL_USER_CONTEXT = 1800; // 30 minutes
  private readonly TTL_MODEL_CONFIG = 86400; // 24 hours
  
  constructor() {
    // Initialize Redis
    this.redis = new Redis({
      url: process.env.UPSTASH_REDIS_REST_URL!,
      token: process.env.UPSTASH_REDIS_REST_TOKEN!
    });
 
    // Initialize in-memory LRU cache
    this.memoryCache = new LRUCache<string, any>({
      max: 500, // Max 500 items
      ttl: 1000 * 60 * 5, // 5 minutes default
      updateAgeOnGet: true
    });
  }
 
  async get(key: string): Promise<any | null> {
    // L1: Check memory cache
    const memCached = this.memoryCache.get(key);
    if (memCached) return memCached;
 
    // L2: Check Redis
    const redisCached = await this.redis.get(key);
    if (redisCached) {
      // Populate memory cache
      this.memoryCache.set(key, redisCached);
      return redisCached;
    }
 
    return null;
  }
 
  async set(key: string, value: any, ttlSeconds?: number): Promise<void> {
    // Store in both caches
    this.memoryCache.set(key, value);
    
    if (ttlSeconds) {
      await this.redis.setex(key, ttlSeconds, JSON.stringify(value));
    } else {
      await this.redis.set(key, JSON.stringify(value));
    }
  }
 
  async cacheChatResponse(
    chatId: string,
    messageHash: string,
    response: any
  ): Promise<void> {
    const key = `chat:${chatId}:${messageHash}`;
    await this.set(key, response, this.TTL_CHAT_RESPONSE);
  }
 
  async cacheUserContext(userId: string, context: any): Promise<void> {
    const key = `context:${userId}`;
    await this.set(key, context, this.TTL_USER_CONTEXT);
  }
 
  async cacheModelConfig(modelId: string, config: any): Promise<void> {
    const key = `model:${modelId}`;
    await this.set(key, config, this.TTL_MODEL_CONFIG);
  }
 
  async invalidateUserCache(userId: string): Promise<void> {
    const patterns = [
      `context:${userId}`,
      `chat:*:${userId}:*`
    ];
 
    for (const pattern of patterns) {
      const keys = await this.redis.keys(pattern);
      if (keys.length > 0) {
        await this.redis.del(...keys);
      }
      
      // Clear from memory cache
      for (const key of this.memoryCache.keys()) {
        if (key.includes(userId)) {
          this.memoryCache.delete(key);
        }
      }
    }
  }
 
  // Cache warming for active users
  async warmCache(userIds: string[]): Promise<void> {
    const promises = userIds.map(async (userId) => {
      // Pre-fetch user context
      const { data } = await supabase
        .from('users')
        .select(`
          *,
          chats (
            id,
            model,
            created_at
          )
        `)
        .eq('id', userId)
        .single();
      
      if (data) {
        await this.cacheUserContext(userId, data);
      }
    });
 
    await Promise.all(promises);
  }
}

Supabase Optimization

Database Query Optimization

Connection Pooling


// lib/db/connection-pool.ts
import { createClient } from '@supabase/supabase-js';
import { Pool } from '@neondatabase/serverless';
 
export class DatabasePool {
  private supabase: any;
  private pgPool: Pool;
  
  constructor() {
    // Supabase client with connection pooling
    this.supabase = createClient(
      process.env.NEXT_PUBLIC_SUPABASE_URL!,
      process.env.SUPABASE_SERVICE_ROLE_KEY!,
      {
        auth: {
          persistSession: false
        },
        db: {
          schema: 'public'
        },
        global: {
          headers: {
            'x-connection-pool': 'true'
          }
        }
      }
    );
 
    // Direct PostgreSQL pool for complex queries
    this.pgPool = new Pool({
      connectionString: process.env.DATABASE_URL,
      max: 20, // Maximum connections
      idleTimeoutMillis: 30000,
      connectionTimeoutMillis: 2000
    });
  }
 
  async executeQuery<T>(query: string, params?: any[]): Promise<T[]> {
    const client = await this.pgPool.connect();
    try {
      const result = await client.query(query, params);
      return result.rows;
    } finally {
      client.release();
    }
  }
 
  async batchInsert(table: string, records: any[], batchSize = 1000) {
    for (let i = 0; i < records.length; i += batchSize) {
      const batch = records.slice(i, i + batchSize);
      
      const { error } = await this.supabase
        .from(table)
        .insert(batch);
      
      if (error) throw error;
    }
  }
 
  async getOptimizedChatHistory(chatId: string, limit = 50) {
    // Use RPC for optimized query
    const { data, error } = await this.supabase
      .rpc('get_chat_history_optimized', {
        chat_id: chatId,
        message_limit: limit
      });
 
    if (error) throw error;
    return data;
  }
}
 
// Supabase RPC function (create in SQL Editor)
/*
CREATE OR REPLACE FUNCTION get_chat_history_optimized(
  chat_id UUID,
  message_limit INT DEFAULT 50
)
RETURNS TABLE (
  id UUID,
  role TEXT,
  content TEXT,
  model TEXT,
  created_at TIMESTAMPTZ
) AS $$
BEGIN
  RETURN QUERY
  SELECT m.id, m.role, m.content, m.model, m.created_at
  FROM messages m
  WHERE m.chat_id = $1
  ORDER BY m.created_at DESC
  LIMIT $2;
END;
$$ LANGUAGE plpgsql;
 
CREATE INDEX IF NOT EXISTS idx_messages_chat_created 
ON messages(chat_id, created_at DESC);
*/

Query Optimization


// lib/db/query-optimizer.ts
export class QueryOptimizer {
  // Optimized queries with proper indexing
  async getUserChatsOptimized(userId: string) {
    const { data, error } = await supabase
      .from('chats')
      .select(`
        id,
        title,
        model,
        created_at,
        updated_at,
        message_count:messages(count)
      `)
      .eq('user_id', userId)
      .order('updated_at', { ascending: false })
      .limit(20);
 
    if (error) throw error;
    return data;
  }
 
  // Efficient message pagination
  async getMessagesPaginated(
    chatId: string,
    cursor?: string,
    limit = 50
  ) {
    let query = supabase
      .from('messages')
      .select('*')
      .eq('chat_id', chatId)
      .order('created_at', { ascending: false })
      .limit(limit);
 
    if (cursor) {
      query = query.lt('created_at', cursor);
    }
 
    const { data, error } = await query;
    if (error) throw error;
    
    return {
      messages: data,
      nextCursor: data.length === limit ? data[data.length - 1].created_at : null
    };
  }
 
  // Batch operations for efficiency
  async updateMessagesBatch(updates: Array<{ id: string; content: string }>) {
    // Use transaction for consistency
    const { data, error } = await supabase.rpc('batch_update_messages', {
      updates: updates
    });
 
    if (error) throw error;
    return data;
  }
 
  // Aggregate queries with materialized views
  async getUserStats(userId: string) {
    const { data, error } = await supabase
      .from('user_stats_view') // Materialized view
      .select('*')
      .eq('user_id', userId)
      .single();
 
    if (error) throw error;
    return data;
  }
}
 
// SQL for materialized view
/*
CREATE MATERIALIZED VIEW user_stats_view AS
SELECT 
  u.id as user_id,
  COUNT(DISTINCT c.id) as total_chats,
  COUNT(m.id) as total_messages,
  MAX(m.created_at) as last_message_at,
  SUM(CASE WHEN m.created_at > NOW() - INTERVAL '24 hours' 
       THEN 1 ELSE 0 END) as messages_today
FROM users u
LEFT JOIN chats c ON u.id = c.user_id
LEFT JOIN messages m ON c.id = m.chat_id
GROUP BY u.id;
 
CREATE UNIQUE INDEX ON user_stats_view (user_id);
 
-- Refresh every hour
CREATE OR REPLACE FUNCTION refresh_user_stats()
RETURNS void AS $$
BEGIN
  REFRESH MATERIALIZED VIEW CONCURRENTLY user_stats_view;
END;
$$ LANGUAGE plpgsql;
*/

Real-time Performance


// lib/db/realtime-optimizer.ts
export class RealtimeOptimizer {
  private channels: Map<string, any> = new Map();
  
  subscribeToChat(
    chatId: string,
    onMessage: (message: any) => void
  ) {
    // Reuse existing channel if available
    let channel = this.channels.get(chatId);
    
    if (!channel) {
      channel = supabase
        .channel(`chat:${chatId}`)
        .on(
          'postgres_changes',
          {
            event: 'INSERT',
            schema: 'public',
            table: 'messages',
            filter: `chat_id=eq.${chatId}`
          },
          (payload) => {
            onMessage(payload.new);
          }
        );
      
      this.channels.set(chatId, channel);
    }
 
    channel.subscribe();
    
    return () => {
      this.unsubscribe(chatId);
    };
  }
 
  // Batch subscriptions for efficiency
  subscribeToMultipleChats(
    chatIds: string[],
    onMessage: (chatId: string, message: any) => void
  ) {
    const channel = supabase
      .channel('multiple-chats')
      .on(
        'postgres_changes',
        {
          event: 'INSERT',
          schema: 'public',
          table: 'messages',
          filter: `chat_id=in.(${chatIds.join(',')})`
        },
        (payload) => {
          onMessage(payload.new.chat_id, payload.new);
        }
      )
      .subscribe();
 
    return () => {
      supabase.removeChannel(channel);
    };
  }
 
  // Optimized presence tracking
  trackPresence(chatId: string, userId: string) {
    const channel = supabase.channel(`presence:${chatId}`);
    
    channel
      .on('presence', { event: 'sync' }, () => {
        const state = channel.presenceState();
        console.log('Presence state:', state);
      })
      .subscribe(async (status) => {
        if (status === 'SUBSCRIBED') {
          await channel.track({
            user_id: userId,
            online_at: new Date().toISOString()
          });
        }
      });
 
    return channel;
  }
 
  private unsubscribe(chatId: string) {
    const channel = this.channels.get(chatId);
    if (channel) {
      supabase.removeChannel(channel);
      this.channels.delete(chatId);
    }
  }
 
  // Clean up all subscriptions
  cleanup() {
    for (const channel of this.channels.values()) {
      supabase.removeChannel(channel);
    }
    this.channels.clear();
  }
}

Edge Function Optimization

Vercel Edge Functions


// app/api/chat/route.ts
import { NextRequest } from 'next/server';
 
export const runtime = 'edge'; // Use edge runtime
export const maxDuration = 25; // Maximum 25 seconds
 
export async function POST(request: NextRequest) {
  // Parse request efficiently
  const { messages, model = 'gpt-4o' } = await request.json();
  
  // Get cached response if available
  const cacheKey = generateCacheKey(messages);
  const cached = await getCachedResponse(cacheKey);
  
  if (cached) {
    return new Response(cached, {
      headers: {
        'Content-Type': 'application/json',
        'X-Cache': 'HIT'
      }
    });
  }
 
  // Stream response
  const stream = await createOptimizedStream(messages, model);
  
  return new Response(stream, {
    headers: {
      'Content-Type': 'text/event-stream',
      'Cache-Control': 'no-cache',
      'Connection': 'keep-alive',
      'X-Cache': 'MISS'
    }
  });
}
 
async function createOptimizedStream(messages: any[], model: string) {
  const encoder = new TextEncoder();
  
  return new ReadableStream({
    async start(controller) {
      // Use connection pooling for model
      const modelProvider = await getModelProvider(model);
      
      try {
        const result = await streamText({
          model: modelProvider,
          messages,
          temperature: 0.7
        });
 
        for await (const chunk of result.textStream) {
          // Send chunk immediately
          controller.enqueue(
            encoder.encode(`data: ${JSON.stringify({ text: chunk })}\n\n`)
          );
        }
      } catch (error) {
        controller.enqueue(
          encoder.encode(`data: ${JSON.stringify({ error: error.message })}\n\n`)
        );
      } finally {
        controller.close();
      }
    }
  });
}

Frontend Performance

React Optimization

Component Optimization


// components/chat/optimized-chat.tsx
import { memo, useCallback, useMemo, useRef } from 'react';
import { useVirtualizer } from '@tanstack/react-virtual';
 
const MessageList = memo(({ messages }: { messages: any[] }) => {
  const parentRef = useRef<HTMLDivElement>(null);
  
  // Virtualize large message lists
  const virtualizer = useVirtualizer({
    count: messages.length,
    getScrollElement: () => parentRef.current,
    estimateSize: () => 100,
    overscan: 5
  });
 
  return (
    <div ref={parentRef} className="h-full overflow-auto">
      <div
        style={{
          height: `${virtualizer.getTotalSize()}px`,
          width: '100%',
          position: 'relative'
        }}
      >
        {virtualizer.getVirtualItems().map((virtualItem) => (
          <div
            key={virtualItem.key}
            style={{
              position: 'absolute',
              top: 0,
              left: 0,
              width: '100%',
              height: `${virtualItem.size}px`,
              transform: `translateY(${virtualItem.start}px)`
            }}
          >
            <Message message={messages[virtualItem.index]} />
          </div>
        ))}
      </div>
    </div>
  );
});
 
// Memoized message component
const Message = memo(({ message }: { message: any }) => {
  return (
    <div className="px-4 py-2">
      <div className={`message ${message.role}`}>
        {message.content}
      </div>
    </div>
  );
}, (prevProps, nextProps) => {
  // Custom comparison for better performance
  return prevProps.message.id === nextProps.message.id &&
         prevProps.message.content === nextProps.message.content;
});
 
// Optimized chat input with debouncing
export function ChatInput({ onSend }: { onSend: (text: string) => void }) {
  const [input, setInput] = useState('');
  
  // Debounce typing indicator
  const sendTypingIndicator = useMemo(
    () => debounce((isTyping: boolean) => {
      // Send typing status
    }, 500),
    []
  );
 
  const handleChange = useCallback((e: React.ChangeEvent<HTMLTextAreaElement>) => {
    setInput(e.target.value);
    sendTypingIndicator(true);
  }, [sendTypingIndicator]);
 
  const handleSubmit = useCallback(() => {
    if (input.trim()) {
      onSend(input);
      setInput('');
    }
  }, [input, onSend]);
 
  return (
    <div className="chat-input">
      <textarea
        value={input}
        onChange={handleChange}
        onKeyDown={(e) => {
          if (e.key === 'Enter' && !e.shiftKey) {
            e.preventDefault();
            handleSubmit();
          }
        }}
        placeholder="Type a message..."
      />
    </div>
  );
}

Data Fetching


// hooks/use-optimized-data.ts
import useSWR from 'swr';
import { useInfiniteQuery } from '@tanstack/react-query';
 
// Optimized data fetching with SWR
export function useChats(userId: string) {
  const { data, error, mutate } = useSWR(
    userId ? `/api/chats?userId=${userId}` : null,
    fetcher,
    {
      revalidateOnFocus: false,
      revalidateOnReconnect: false,
      dedupingInterval: 60000, // Dedupe requests for 1 minute
      fallbackData: [] // Provide fallback
    }
  );
 
  return {
    chats: data,
    isLoading: !error && !data,
    isError: error,
    mutate
  };
}
 
// Infinite scrolling for messages
export function useInfiniteMessages(chatId: string) {
  return useInfiniteQuery({
    queryKey: ['messages', chatId],
    queryFn: async ({ pageParam = null }) => {
      const url = `/api/messages?chatId=${chatId}${
        pageParam ? `&cursor=${pageParam}` : ''
      }`;
      const res = await fetch(url);
      return res.json();
    },
    getNextPageParam: (lastPage) => lastPage.nextCursor,
    staleTime: 1000 * 60 * 5, // 5 minutes
    cacheTime: 1000 * 60 * 10, // 10 minutes
    refetchOnWindowFocus: false
  });
}
 
// Prefetch data for better UX
export function usePrefetch() {
  const queryClient = useQueryClient();
  
  const prefetchChat = useCallback((chatId: string) => {
    queryClient.prefetchQuery({
      queryKey: ['chat', chatId],
      queryFn: () => fetchChat(chatId),
      staleTime: 1000 * 60 * 5
    });
  }, [queryClient]);
 
  return { prefetchChat };
}
 
// Optimistic updates
export function useOptimisticMessage() {
  const queryClient = useQueryClient();
  
  const addOptimisticMessage = useCallback((chatId: string, message: any) => {
    queryClient.setQueryData(['messages', chatId], (old: any) => {
      return {
        ...old,
        pages: old.pages.map((page: any, index: number) => {
          if (index === 0) {
            return {
              ...page,
              messages: [message, ...page.messages]
            };
          }
          return page;
        })
      };
    });
  }, [queryClient]);
 
  return { addOptimisticMessage };
}

Bundle Size


// next.config.js
module.exports = {
  // Enable SWC minification
  swcMinify: true,
  
  // Optimize images
  images: {
    formats: ['image/avif', 'image/webp'],
    minimumCacheTTL: 60
  },
  
  // Analyze bundle size
  webpack: (config, { isServer }) => {
    if (!isServer) {
      // Replace large libraries with smaller alternatives
      config.resolve.alias = {
        ...config.resolve.alias,
        'lodash': 'lodash-es',
        'moment': 'dayjs'
      };
    }
 
    // Enable tree shaking
    config.optimization = {
      ...config.optimization,
      usedExports: true,
      sideEffects: false
    };
 
    return config;
  },
  
  // Experimental features for performance
  experimental: {
    optimizeCss: true,
    scrollRestoration: true
  }
};
 
// Dynamic imports for code splitting
const HeyGenAvatar = dynamic(
  () => import('@/components/avatar/heygen-avatar'),
  { 
    loading: () => <AvatarSkeleton />,
    ssr: false 
  }
);
 
const VoiceMode = dynamic(
  () => import('@/components/voice/voice-mode'),
  { 
    loading: () => <VoiceSkeleton />,
    ssr: false 
  }
);
 
// Lazy load heavy components
export function LazyLoadedFeatures() {
  const [showAvatar, setShowAvatar] = useState(false);
  
  return (
    <>
      <button onClick={() => setShowAvatar(true)}>
        Show Avatar
      </button>
      
      {showAvatar && (
        <Suspense fallback={<AvatarSkeleton />}>
          <HeyGenAvatar />
        </Suspense>
      )}
    </>
  );
}

Memory Management


// lib/performance/memory-manager.ts
export class MemoryManager {
  private sessionPool: Map<string, any> = new Map();
  private maxSessions = 1000;
  private sessionTTL = 30 * 60 * 1000; // 30 minutes
  
  async getSession(sessionId: string): Promise<any> {
    // Check pool
    if (this.sessionPool.has(sessionId)) {
      const session = this.sessionPool.get(sessionId);
      session.lastAccessed = Date.now();
      return session;
    }
 
    // Evict LRU if full
    if (this.sessionPool.size >= this.maxSessions) {
      this.evictLRUSession();
    }
 
    // Load from database
    const session = await this.loadSession(sessionId);
    
    // Add to pool
    this.sessionPool.set(sessionId, {
      ...session,
      lastAccessed: Date.now()
    });
 
    return session;
  }
 
  private evictLRUSession(): void {
    let oldestTime = Date.now();
    let oldestId = null;
 
    for (const [id, session] of this.sessionPool) {
      if (session.lastAccessed < oldestTime) {
        oldestTime = session.lastAccessed;
        oldestId = id;
      }
    }
 
    if (oldestId) {
      this.sessionPool.delete(oldestId);
    }
  }
 
  // Monitor memory usage
  monitorMemory(): void {
    setInterval(() => {
      const usage = process.memoryUsage();
      const heapUsedMB = Math.round(usage.heapUsed / 1024 / 1024);
      
      if (heapUsedMB > 400) { // Alert if > 400MB
        console.warn('High memory usage:', heapUsedMB, 'MB');
        
        // Force garbage collection if available
        if (global.gc) {
          global.gc();
        }
        
        // Reduce pool size
        this.maxSessions = Math.floor(this.maxSessions * 0.8);
        this.trimPool();
      }
    }, 30000);
  }
 
  private trimPool(): void {
    while (this.sessionPool.size > this.maxSessions) {
      this.evictLRUSession();
    }
  }
}

Performance Monitoring


// lib/monitoring/performance.ts
import { Analytics } from '@vercel/analytics/react';
 
export class PerformanceMonitor {
  // Track API latency
  async trackAPICall(
    endpoint: string,
    method: string,
    startTime: number
  ) {
    const duration = Date.now() - startTime;
    
    // Send to Vercel Analytics
    if (typeof window !== 'undefined') {
      window.analytics?.track('api_call', {
        endpoint,
        method,
        duration,
        timestamp: new Date().toISOString()
      });
    }
 
    // Log slow requests
    if (duration > 1000) {
      console.warn(`Slow API call: ${method} ${endpoint} took ${duration}ms`);
    }
  }
 
  // Track GPT-4o streaming performance
  trackStreamingPerformance(
    model: string,
    timeToFirstToken: number,
    totalTime: number,
    tokenCount: number
  ) {
    const metrics = {
      model,
      timeToFirstToken,
      totalTime,
      tokenCount,
      tokensPerSecond: tokenCount / (totalTime / 1000)
    };
 
    // Send metrics
    this.sendMetrics('streaming_performance', metrics);
  }
 
  // Track database performance
  async trackDatabaseQuery(
    query: string,
    startTime: number,
    rowCount: number
  ) {
    const duration = Date.now() - startTime;
    
    if (duration > 100) { // Log slow queries
      console.warn(`Slow query (${duration}ms): ${query.substring(0, 100)}...`);
    }
 
    this.sendMetrics('database_query', {
      duration,
      rowCount,
      timestamp: new Date().toISOString()
    });
  }
 
  private sendMetrics(event: string, data: any) {
    // Send to monitoring service
    fetch('/api/metrics', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({ event, data })
    }).catch(console.error);
  }
}

Performance Checklist

GPT-4o Optimization

Enable streaming responses for all chat interactions
Implement response caching with Redis (1 hour TTL)
Use batch processing for multiple requests
Monitor token usage and costs
Implement fallback to cached responses on rate limits

Supabase Optimization

Enable connection pooling with pgBouncer
Create indexes on frequently queried columns
Use materialized views for aggregations
Implement query result caching
Optimize real-time subscriptions

Edge Function Optimization

Use Vercel Edge Runtime for API routes
Enable response caching with proper headers
Implement request deduplication
Use regional edge functions for lower latency
Monitor function execution time

Frontend Optimization

Implement virtual scrolling for long lists
Use React.memo and useMemo appropriately
Enable code splitting with dynamic imports
Optimize bundle size with tree shaking
Implement progressive image loading

Caching Strategy

L1: In-memory LRU cache (5 minute TTL)
L2: Redis cache (1 hour TTL)
L3: CDN edge caching for static assets
Implement cache warming for active users
Use stale-while-revalidate pattern

Following these optimization strategies can achieve sub-200ms time to first token with GPT-4o streaming, handle thousands of concurrent users, and reduce API costs by up to 50% through intelligent caching.