apiVersion: serving.knative.dev/v1 kind: Service metadata: name: cim-processor-backend annotations: run.googleapis.com/ingress: all run.googleapis.com/execution-environment: gen2 spec: template: metadata: annotations: run.googleapis.com/execution-environment: gen2 run.googleapis.com/cpu-throttling: "false" run.googleapis.com/startup-cpu-boost: "true" autoscaling.knative.dev/minScale: "0" autoscaling.knative.dev/maxScale: "100" autoscaling.knative.dev/targetCPUUtilization: "60" spec: containerConcurrency: 80 timeoutSeconds: 300 containers: - image: gcr.io/cim-summarizer/cim-processor-backend:latest ports: - containerPort: 8080 env: - name: NODE_ENV value: "production" - name: PORT value: "8080" - name: PROCESSING_STRATEGY value: "agentic_rag" - name: GCLOUD_PROJECT_ID value: "cim-summarizer" - name: DOCUMENT_AI_LOCATION value: "us" - name: DOCUMENT_AI_PROCESSOR_ID value: "add30c555ea0ff89" - name: GCS_BUCKET_NAME value: "cim-summarizer-uploads" - name: DOCUMENT_AI_OUTPUT_BUCKET_NAME value: "cim-summarizer-document-ai-output" - name: LLM_PROVIDER value: "anthropic" - name: VECTOR_PROVIDER value: "supabase" - name: AGENTIC_RAG_ENABLED value: "true" - name: ENABLE_RAG_PROCESSING value: "true" resources: limits: cpu: "2" memory: "4Gi" requests: cpu: "1" memory: "2Gi" startupProbe: httpGet: path: /health port: 8080 initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3 livenessProbe: httpGet: path: /health port: 8080 periodSeconds: 30 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: httpGet: path: /health port: 8080 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3