Performance Optimization
This guide covers performance optimization techniques for Earna AI Console across all components, focusing on GPT-4o streaming, Supabase queries, and real-time features.
Overview
Performance optimization in Earna AI Console focuses on:
- Real-time chat: Sub-200ms time to first token with GPT-4o streaming
- Scalability: Handle thousands of concurrent users with edge functions
- Resource efficiency: Optimize API calls and database queries
- Cost optimization: Balance performance with API and infrastructure costs
GPT-4o Optimization
Streaming Response Pipeline
Stream Processing
// lib/ai/streaming.ts
import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';
export class StreamProcessor {
async streamGPT4oResponse(
messages: any[],
onChunk: (text: string) => void
) {
const result = await streamText({
model: openai('gpt-4o'),
messages,
temperature: 0.7,
maxTokens: 4096,
onFinish: async ({ text, usage }) => {
// Save to Supabase after completion
await this.saveToDatabase({
content: text,
tokens: usage.totalTokens,
model: 'gpt-4o'
});
}
});
// Process stream chunks
for await (const chunk of result.textStream) {
onChunk(chunk);
}
return result;
}
// Optimized streaming with Server-Sent Events
async createSSEStream(
messages: any[],
chatId: string
): Promise<ReadableStream> {
const encoder = new TextEncoder();
return new ReadableStream({
async start(controller) {
try {
const result = await streamText({
model: openai('gpt-4o'),
messages,
temperature: 0.7
});
for await (const chunk of result.textStream) {
controller.enqueue(
encoder.encode(`data: ${JSON.stringify({
type: 'text',
content: chunk
})}\n\n`)
);
}
controller.enqueue(
encoder.encode(`data: ${JSON.stringify({
type: 'done'
})}\n\n`)
);
} catch (error) {
controller.enqueue(
encoder.encode(`data: ${JSON.stringify({
type: 'error',
error: error.message
})}\n\n`)
);
} finally {
controller.close();
}
}
});
}
private async saveToDatabase(data: any) {
const { error } = await supabase
.from('messages')
.insert(data);
if (error) console.error('Failed to save message:', error);
}
}
Supabase Optimization
Database Query Optimization
Connection Pooling
// lib/db/connection-pool.ts
import { createClient } from '@supabase/supabase-js';
import { Pool } from '@neondatabase/serverless';
export class DatabasePool {
private supabase: any;
private pgPool: Pool;
constructor() {
// Supabase client with connection pooling
this.supabase = createClient(
process.env.NEXT_PUBLIC_SUPABASE_URL!,
process.env.SUPABASE_SERVICE_ROLE_KEY!,
{
auth: {
persistSession: false
},
db: {
schema: 'public'
},
global: {
headers: {
'x-connection-pool': 'true'
}
}
}
);
// Direct PostgreSQL pool for complex queries
this.pgPool = new Pool({
connectionString: process.env.DATABASE_URL,
max: 20, // Maximum connections
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 2000
});
}
async executeQuery<T>(query: string, params?: any[]): Promise<T[]> {
const client = await this.pgPool.connect();
try {
const result = await client.query(query, params);
return result.rows;
} finally {
client.release();
}
}
async batchInsert(table: string, records: any[], batchSize = 1000) {
for (let i = 0; i < records.length; i += batchSize) {
const batch = records.slice(i, i + batchSize);
const { error } = await this.supabase
.from(table)
.insert(batch);
if (error) throw error;
}
}
async getOptimizedChatHistory(chatId: string, limit = 50) {
// Use RPC for optimized query
const { data, error } = await this.supabase
.rpc('get_chat_history_optimized', {
chat_id: chatId,
message_limit: limit
});
if (error) throw error;
return data;
}
}
// Supabase RPC function (create in SQL Editor)
/*
CREATE OR REPLACE FUNCTION get_chat_history_optimized(
chat_id UUID,
message_limit INT DEFAULT 50
)
RETURNS TABLE (
id UUID,
role TEXT,
content TEXT,
model TEXT,
created_at TIMESTAMPTZ
) AS $$
BEGIN
RETURN QUERY
SELECT m.id, m.role, m.content, m.model, m.created_at
FROM messages m
WHERE m.chat_id = $1
ORDER BY m.created_at DESC
LIMIT $2;
END;
$$ LANGUAGE plpgsql;
CREATE INDEX IF NOT EXISTS idx_messages_chat_created
ON messages(chat_id, created_at DESC);
*/
Edge Function Optimization
Vercel Edge Functions
// app/api/chat/route.ts
import { NextRequest } from 'next/server';
export const runtime = 'edge'; // Use edge runtime
export const maxDuration = 25; // Maximum 25 seconds
export async function POST(request: NextRequest) {
// Parse request efficiently
const { messages, model = 'gpt-4o' } = await request.json();
// Get cached response if available
const cacheKey = generateCacheKey(messages);
const cached = await getCachedResponse(cacheKey);
if (cached) {
return new Response(cached, {
headers: {
'Content-Type': 'application/json',
'X-Cache': 'HIT'
}
});
}
// Stream response
const stream = await createOptimizedStream(messages, model);
return new Response(stream, {
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'X-Cache': 'MISS'
}
});
}
async function createOptimizedStream(messages: any[], model: string) {
const encoder = new TextEncoder();
return new ReadableStream({
async start(controller) {
// Use connection pooling for model
const modelProvider = await getModelProvider(model);
try {
const result = await streamText({
model: modelProvider,
messages,
temperature: 0.7
});
for await (const chunk of result.textStream) {
// Send chunk immediately
controller.enqueue(
encoder.encode(`data: ${JSON.stringify({ text: chunk })}\n\n`)
);
}
} catch (error) {
controller.enqueue(
encoder.encode(`data: ${JSON.stringify({ error: error.message })}\n\n`)
);
} finally {
controller.close();
}
}
});
}
Frontend Performance
React Optimization
Component Optimization
// components/chat/optimized-chat.tsx
import { memo, useCallback, useMemo, useRef } from 'react';
import { useVirtualizer } from '@tanstack/react-virtual';
const MessageList = memo(({ messages }: { messages: any[] }) => {
const parentRef = useRef<HTMLDivElement>(null);
// Virtualize large message lists
const virtualizer = useVirtualizer({
count: messages.length,
getScrollElement: () => parentRef.current,
estimateSize: () => 100,
overscan: 5
});
return (
<div ref={parentRef} className="h-full overflow-auto">
<div
style={{
height: `${virtualizer.getTotalSize()}px`,
width: '100%',
position: 'relative'
}}
>
{virtualizer.getVirtualItems().map((virtualItem) => (
<div
key={virtualItem.key}
style={{
position: 'absolute',
top: 0,
left: 0,
width: '100%',
height: `${virtualItem.size}px`,
transform: `translateY(${virtualItem.start}px)`
}}
>
<Message message={messages[virtualItem.index]} />
</div>
))}
</div>
</div>
);
});
// Memoized message component
const Message = memo(({ message }: { message: any }) => {
return (
<div className="px-4 py-2">
<div className={`message ${message.role}`}>
{message.content}
</div>
</div>
);
}, (prevProps, nextProps) => {
// Custom comparison for better performance
return prevProps.message.id === nextProps.message.id &&
prevProps.message.content === nextProps.message.content;
});
// Optimized chat input with debouncing
export function ChatInput({ onSend }: { onSend: (text: string) => void }) {
const [input, setInput] = useState('');
// Debounce typing indicator
const sendTypingIndicator = useMemo(
() => debounce((isTyping: boolean) => {
// Send typing status
}, 500),
[]
);
const handleChange = useCallback((e: React.ChangeEvent<HTMLTextAreaElement>) => {
setInput(e.target.value);
sendTypingIndicator(true);
}, [sendTypingIndicator]);
const handleSubmit = useCallback(() => {
if (input.trim()) {
onSend(input);
setInput('');
}
}, [input, onSend]);
return (
<div className="chat-input">
<textarea
value={input}
onChange={handleChange}
onKeyDown={(e) => {
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
handleSubmit();
}
}}
placeholder="Type a message..."
/>
</div>
);
}
Memory Management
// lib/performance/memory-manager.ts
export class MemoryManager {
private sessionPool: Map<string, any> = new Map();
private maxSessions = 1000;
private sessionTTL = 30 * 60 * 1000; // 30 minutes
async getSession(sessionId: string): Promise<any> {
// Check pool
if (this.sessionPool.has(sessionId)) {
const session = this.sessionPool.get(sessionId);
session.lastAccessed = Date.now();
return session;
}
// Evict LRU if full
if (this.sessionPool.size >= this.maxSessions) {
this.evictLRUSession();
}
// Load from database
const session = await this.loadSession(sessionId);
// Add to pool
this.sessionPool.set(sessionId, {
...session,
lastAccessed: Date.now()
});
return session;
}
private evictLRUSession(): void {
let oldestTime = Date.now();
let oldestId = null;
for (const [id, session] of this.sessionPool) {
if (session.lastAccessed < oldestTime) {
oldestTime = session.lastAccessed;
oldestId = id;
}
}
if (oldestId) {
this.sessionPool.delete(oldestId);
}
}
// Monitor memory usage
monitorMemory(): void {
setInterval(() => {
const usage = process.memoryUsage();
const heapUsedMB = Math.round(usage.heapUsed / 1024 / 1024);
if (heapUsedMB > 400) { // Alert if > 400MB
console.warn('High memory usage:', heapUsedMB, 'MB');
// Force garbage collection if available
if (global.gc) {
global.gc();
}
// Reduce pool size
this.maxSessions = Math.floor(this.maxSessions * 0.8);
this.trimPool();
}
}, 30000);
}
private trimPool(): void {
while (this.sessionPool.size > this.maxSessions) {
this.evictLRUSession();
}
}
}
Performance Monitoring
// lib/monitoring/performance.ts
import { Analytics } from '@vercel/analytics/react';
export class PerformanceMonitor {
// Track API latency
async trackAPICall(
endpoint: string,
method: string,
startTime: number
) {
const duration = Date.now() - startTime;
// Send to Vercel Analytics
if (typeof window !== 'undefined') {
window.analytics?.track('api_call', {
endpoint,
method,
duration,
timestamp: new Date().toISOString()
});
}
// Log slow requests
if (duration > 1000) {
console.warn(`Slow API call: ${method} ${endpoint} took ${duration}ms`);
}
}
// Track GPT-4o streaming performance
trackStreamingPerformance(
model: string,
timeToFirstToken: number,
totalTime: number,
tokenCount: number
) {
const metrics = {
model,
timeToFirstToken,
totalTime,
tokenCount,
tokensPerSecond: tokenCount / (totalTime / 1000)
};
// Send metrics
this.sendMetrics('streaming_performance', metrics);
}
// Track database performance
async trackDatabaseQuery(
query: string,
startTime: number,
rowCount: number
) {
const duration = Date.now() - startTime;
if (duration > 100) { // Log slow queries
console.warn(`Slow query (${duration}ms): ${query.substring(0, 100)}...`);
}
this.sendMetrics('database_query', {
duration,
rowCount,
timestamp: new Date().toISOString()
});
}
private sendMetrics(event: string, data: any) {
// Send to monitoring service
fetch('/api/metrics', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ event, data })
}).catch(console.error);
}
}
Performance Checklist
GPT-4o Optimization
- Enable streaming responses for all chat interactions
- Implement response caching with Redis (1 hour TTL)
- Use batch processing for multiple requests
- Monitor token usage and costs
- Implement fallback to cached responses on rate limits
Supabase Optimization
- Enable connection pooling with pgBouncer
- Create indexes on frequently queried columns
- Use materialized views for aggregations
- Implement query result caching
- Optimize real-time subscriptions
Edge Function Optimization
- Use Vercel Edge Runtime for API routes
- Enable response caching with proper headers
- Implement request deduplication
- Use regional edge functions for lower latency
- Monitor function execution time
Frontend Optimization
- Implement virtual scrolling for long lists
- Use React.memo and useMemo appropriately
- Enable code splitting with dynamic imports
- Optimize bundle size with tree shaking
- Implement progressive image loading
Caching Strategy
- L1: In-memory LRU cache (5 minute TTL)
- L2: Redis cache (1 hour TTL)
- L3: CDN edge caching for static assets
- Implement cache warming for active users
- Use stale-while-revalidate pattern
Following these optimization strategies can achieve sub-200ms time to first token with GPT-4o streaming, handle thousands of concurrent users, and reduce API costs by up to 50% through intelligent caching.
Last updated on