Files
cim_summary/backend/sql/minimal_setup.sql
admin 9c916d12f4 feat: Production release v2.0.0 - Simple Document Processor
Major release with significant performance improvements and new processing strategy.

## Core Changes
- Implemented simple_full_document processing strategy (default)
- Full document → LLM approach: 1-2 passes, ~5-6 minutes processing time
- Achieved 100% completeness with 2 API calls (down from 5+)
- Removed redundant Document AI passes for faster processing

## Financial Data Extraction
- Enhanced deterministic financial table parser
- Improved FY3/FY2/FY1/LTM identification from varying CIM formats
- Automatic merging of parser results with LLM extraction

## Code Quality & Infrastructure
- Cleaned up debug logging (removed emoji markers from production code)
- Fixed Firebase Secrets configuration (using modern defineSecret approach)
- Updated OpenAI API key
- Resolved deployment conflicts (secrets vs environment variables)
- Added .env files to Firebase ignore list

## Deployment
- Firebase Functions v2 deployment successful
- All 7 required secrets verified and configured
- Function URL: https://api-y56ccs6wva-uc.a.run.app

## Performance Improvements
- Processing time: ~5-6 minutes (down from 23+ minutes)
- API calls: 1-2 (down from 5+)
- Completeness: 100% achievable
- LLM Model: claude-3-7-sonnet-latest

## Breaking Changes
- Default processing strategy changed to 'simple_full_document'
- RAG processor available as alternative strategy 'document_ai_agentic_rag'

## Files Changed
- 36 files changed, 5642 insertions(+), 4451 deletions(-)
- Removed deprecated documentation files
- Cleaned up unused services and models

This release represents a major refactoring focused on speed, accuracy, and maintainability.
2025-11-09 21:07:22 -05:00

85 lines
3.2 KiB
PL/PgSQL

-- Minimal Database Setup - Just what's needed for uploads to work
-- This won't conflict with existing tables
-- 1. Create update function if it doesn't exist
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ language 'plpgsql';
-- 2. Drop and recreate documents table (to ensure clean state)
DROP TABLE IF EXISTS processing_jobs CASCADE;
DROP TABLE IF EXISTS documents CASCADE;
-- 3. Create documents table (user_id as VARCHAR to match Firebase UID)
CREATE TABLE documents (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id VARCHAR(255) NOT NULL,
original_file_name VARCHAR(500) NOT NULL,
file_path VARCHAR(1000) NOT NULL,
file_size BIGINT NOT NULL CHECK (file_size > 0),
uploaded_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
status VARCHAR(50) NOT NULL DEFAULT 'uploaded',
extracted_text TEXT,
generated_summary TEXT,
summary_markdown_path VARCHAR(1000),
summary_pdf_path VARCHAR(1000),
processing_started_at TIMESTAMP WITH TIME ZONE,
processing_completed_at TIMESTAMP WITH TIME ZONE,
error_message TEXT,
analysis_data JSONB,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_documents_user_id ON documents(user_id);
CREATE INDEX idx_documents_status ON documents(status);
CREATE INDEX idx_documents_uploaded_at ON documents(uploaded_at);
CREATE INDEX idx_documents_user_status ON documents(user_id, status);
CREATE TRIGGER update_documents_updated_at
BEFORE UPDATE ON documents
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- 4. Create processing_jobs table
CREATE TABLE processing_jobs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
document_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
user_id VARCHAR(255) NOT NULL,
status VARCHAR(50) NOT NULL DEFAULT 'pending',
attempts INTEGER NOT NULL DEFAULT 0,
max_attempts INTEGER NOT NULL DEFAULT 3,
options JSONB,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
started_at TIMESTAMP WITH TIME ZONE,
completed_at TIMESTAMP WITH TIME ZONE,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
error TEXT,
last_error_at TIMESTAMP WITH TIME ZONE,
result JSONB
);
CREATE INDEX idx_processing_jobs_status ON processing_jobs(status);
CREATE INDEX idx_processing_jobs_created_at ON processing_jobs(created_at);
CREATE INDEX idx_processing_jobs_document_id ON processing_jobs(document_id);
CREATE INDEX idx_processing_jobs_user_id ON processing_jobs(user_id);
CREATE INDEX idx_processing_jobs_pending ON processing_jobs(status, created_at) WHERE status = 'pending';
CREATE TRIGGER update_processing_jobs_updated_at
BEFORE UPDATE ON processing_jobs
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- 5. Verify tables were created
SELECT
table_name,
(SELECT COUNT(*) FROM information_schema.columns WHERE table_name = t.table_name) as column_count
FROM information_schema.tables t
WHERE table_schema = 'public'
AND table_name IN ('documents', 'processing_jobs')
ORDER BY table_name;