Files
HomeAudit/scripts/diagnose_paperless_issues.sh
admin 45363040f3 feat: Complete infrastructure cleanup phase documentation and status updates
## Major Infrastructure Milestones Achieved

###  Service Migrations Completed
- Jellyfin: Successfully migrated to Docker Swarm with latest version
- Vaultwarden: Running in Docker Swarm on OMV800 (eliminated duplicate)
- Nextcloud: Operational with database optimization and cron setup
- Paperless services: Both NGX and AI running successfully

### 🚨 Duplicate Service Analysis Complete
- Identified MariaDB conflict (OMV800 Swarm vs lenovo410 standalone)
- Identified Vaultwarden duplication (now resolved)
- Documented PostgreSQL and Redis consolidation opportunities
- Mapped monitoring stack optimization needs

### 🏗️ Infrastructure Status Documentation
- Updated README with current cleanup phase status
- Enhanced Service Analysis with duplicate service inventory
- Updated Quick Start guide with immediate action items
- Documented current container distribution across 6 nodes

### 📋 Action Plan Documentation
- Phase 1: Immediate service conflict resolution (this week)
- Phase 2: Service migration and load balancing (next 2 weeks)
- Phase 3: Database consolidation and optimization (future)

### 🔧 Current Infrastructure Health
- Docker Swarm: All 6 nodes operational and healthy
- Caddy Reverse Proxy: Fully operational with SSL certificates
- Storage: MergerFS healthy, local storage for databases
- Monitoring: Prometheus + Grafana + Uptime Kuma operational

### 📊 Container Distribution Status
- OMV800: 25+ containers (needs load balancing)
- lenovo410: 9 containers (cleanup in progress)
- fedora: 1 container (ready for additional services)
- audrey: 4 containers (well-balanced, monitoring hub)
- lenovo420: 7 containers (balanced, can assist)
- surface: 9 containers (specialized, reverse proxy)

### 🎯 Next Steps
1. Remove lenovo410 MariaDB (eliminate port 3306 conflict)
2. Clean up lenovo410 Vaultwarden (256MB space savings)
3. Verify no service conflicts exist
4. Begin service migration from OMV800 to fedora/audrey

Status: Infrastructure 99% complete, entering cleanup and optimization phase
2025-09-01 16:50:37 -04:00

294 lines
11 KiB
Bash
Executable File

#!/bin/bash
# Paperless Database Issue Diagnostic Script
# This script helps identify and diagnose issues between Paperless-ngx and Paperless AI
set -e
echo "🔍 Paperless Database Issue Diagnostic"
echo "======================================"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Function to print colored output
print_status() {
echo -e "${BLUE}[INFO]${NC} $1"
}
print_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# Check if Docker is running
if ! docker info > /dev/null 2>&1; then
print_error "Docker is not running. Please start Docker first."
exit 1
fi
echo ""
print_status "1. Checking container status..."
# Check Paperless-ngx containers
PAPERLESS_CONTAINERS=$(docker ps -a --filter "name=paperless" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}")
if [[ -n "$PAPERLESS_CONTAINERS" ]]; then
echo "📄 Paperless-ngx containers:"
echo "$PAPERLESS_CONTAINERS"
else
print_warning "No Paperless-ngx containers found"
fi
# Check Paperless AI containers
PAPERLESS_AI_CONTAINERS=$(docker ps -a --filter "name=paperless-ai" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}")
if [[ -n "$PAPERLESS_AI_CONTAINERS" ]]; then
echo ""
echo "🤖 Paperless AI containers:"
echo "$PAPERLESS_AI_CONTAINERS"
else
print_warning "No Paperless AI containers found"
fi
echo ""
print_status "2. Checking database configurations..."
# Check if PostgreSQL is running
if docker ps | grep -q "postgres"; then
print_success "PostgreSQL container is running"
# Get PostgreSQL container details
PG_CONTAINER=$(docker ps --filter "name=postgres" --format "{{.Names}}" | head -1)
if [[ -n "$PG_CONTAINER" ]]; then
echo " Container: $PG_CONTAINER"
# Check database connectivity
if docker exec "$PG_CONTAINER" pg_isready -U postgres > /dev/null 2>&1; then
print_success " Database is accepting connections"
else
print_error " Database is not accepting connections"
fi
# Check if paperless database exists
if docker exec "$PG_CONTAINER" psql -U postgres -lqt | cut -d \| -f 1 | grep -qw paperless; then
print_success " Paperless database exists"
else
print_error " Paperless database does not exist"
fi
fi
else
print_warning "PostgreSQL container not found"
fi
echo ""
print_status "3. Checking network connectivity..."
# Check if containers can communicate
if docker ps | grep -q "paperless" && docker ps | grep -q "paperless-ai"; then
PAPERLESS_CONTAINER=$(docker ps --filter "name=paperless" --format "{{.Names}}" | head -1)
PAPERLESS_AI_CONTAINER=$(docker ps --filter "name=paperless-ai" --format "{{.Names}}" | head -1)
if [[ -n "$PAPERLESS_CONTAINER" && -n "$PAPERLESS_AI_CONTAINER" ]]; then
# Check if they're on the same network
PAPERLESS_NETWORKS=$(docker inspect "$PAPERLESS_CONTAINER" --format '{{range $net, $config := .NetworkSettings.Networks}}{{$net}} {{end}}')
PAPERLESS_AI_NETWORKS=$(docker inspect "$PAPERLESS_AI_CONTAINER" --format '{{range $net, $config := .NetworkSettings.Networks}}{{$net}} {{end}}')
echo " Paperless-ngx networks: $PAPERLESS_NETWORKS"
echo " Paperless AI networks: $PAPERLESS_AI_NETWORKS"
# Check for common networks
for net in $PAPERLESS_NETWORKS; do
if echo "$PAPERLESS_AI_NETWORKS" | grep -q "$net"; then
print_success " Both containers are on network: $net"
break
fi
done
fi
else
print_warning "Cannot check network connectivity - containers not running"
fi
echo ""
print_status "4. Checking environment variables..."
# Check Paperless-ngx environment
if docker ps | grep -q "paperless"; then
PAPERLESS_CONTAINER=$(docker ps --filter "name=paperless" --format "{{.Names}}" | head -1)
if [[ -n "$PAPERLESS_CONTAINER" ]]; then
echo "📄 Paperless-ngx environment:"
docker exec "$PAPERLESS_CONTAINER" env | grep -E "(PAPERLESS_DB|PAPERLESS_REDIS|PAPERLESS_URL)" | sort
fi
fi
# Check Paperless AI environment
if docker ps | grep -q "paperless-ai"; then
PAPERLESS_AI_CONTAINER=$(docker ps --filter "name=paperless-ai" --format "{{.Names}}" | head -1)
if [[ -n "$PAPERLESS_AI_CONTAINER" ]]; then
echo ""
echo "🤖 Paperless AI environment:"
docker exec "$PAPERLESS_AI_CONTAINER" env | grep -E "(PAPERLESS_|OPENAI_|OLLAMA_|DEEPSEEK_|AZURE_)" | sort
fi
fi
echo ""
print_status "5. Checking API connectivity..."
# Check Paperless-ngx API
if curl -f -s "https://paperless.pressmess.duckdns.org/api/" > /dev/null 2>&1; then
print_success "Paperless-ngx API is accessible"
else
print_error "Paperless-ngx API is not accessible"
fi
# Check Paperless AI API
if curl -f -s "http://localhost:3000/health" > /dev/null 2>&1; then
print_success "Paperless AI API is accessible"
else
print_error "Paperless AI API is not accessible"
fi
echo ""
print_status "6. Checking recent logs for errors..."
# Check Paperless-ngx logs
if docker ps | grep -q "paperless"; then
PAPERLESS_CONTAINER=$(docker ps --filter "name=paperless" --format "{{.Names}}" | head -1)
if [[ -n "$PAPERLESS_CONTAINER" ]]; then
echo "📄 Recent Paperless-ngx errors:"
docker logs "$PAPERLESS_CONTAINER" --tail 20 2>&1 | grep -i "error\|exception\|failed" || echo " No recent errors found"
fi
fi
# Check Paperless AI logs
if docker ps | grep -q "paperless-ai"; then
PAPERLESS_AI_CONTAINER=$(docker ps --filter "name=paperless-ai" --format "{{.Names}}" | head -1)
if [[ -n "$PAPERLESS_AI_CONTAINER" ]]; then
echo ""
echo "🤖 Recent Paperless AI errors:"
docker logs "$PAPERLESS_AI_CONTAINER" --tail 20 2>&1 | grep -i "error\|exception\|failed" || echo " No recent errors found"
fi
fi
echo ""
print_status "7. Database schema analysis..."
# Check if we can connect to the database and analyze the schema
if docker ps | grep -q "postgres"; then
PG_CONTAINER=$(docker ps --filter "name=postgres" --format "{{.Names}}" | head -1)
if [[ -n "$PG_CONTAINER" ]]; then
echo "📊 Checking Paperless database schema..."
# Check for core tables
CORE_TABLES=("documents_document" "documents_tag" "documents_correspondent" "documents_documenttype")
for table in "${CORE_TABLES[@]}"; do
if docker exec "$PG_CONTAINER" psql -U postgres -d paperless -c "\dt $table" > /dev/null 2>&1; then
print_success " Table exists: $table"
else
print_error " Table missing: $table"
fi
done
# Check document count
DOC_COUNT=$(docker exec "$PG_CONTAINER" psql -U postgres -d paperless -t -c "SELECT COUNT(*) FROM documents_document;" 2>/dev/null | tr -d ' ')
if [[ -n "$DOC_COUNT" && "$DOC_COUNT" != "0" ]]; then
print_success " Documents in database: $DOC_COUNT"
else
print_warning " No documents found in database"
fi
# Check tag count
TAG_COUNT=$(docker exec "$PG_CONTAINER" psql -U postgres -d paperless -t -c "SELECT COUNT(*) FROM documents_tag;" 2>/dev/null | tr -d ' ')
if [[ -n "$TAG_COUNT" && "$TAG_COUNT" != "0" ]]; then
print_success " Tags in database: $TAG_COUNT"
else
print_warning " No tags found in database"
fi
fi
fi
echo ""
print_status "8. Recommendations..."
echo "🔧 Based on the analysis, here are the likely issues and solutions:"
echo ""
# Check for common issues
ISSUES_FOUND=0
# Issue 1: Different databases
if docker ps | grep -q "paperless" && docker ps | grep -q "paperless-ai"; then
PAPERLESS_CONTAINER=$(docker ps --filter "name=paperless" --format "{{.Names}}" | head -1)
PAPERLESS_AI_CONTAINER=$(docker ps --filter "name=paperless-ai" --format "{{.Names}}" | head -1)
if [[ -n "$PAPERLESS_CONTAINER" && -n "$PAPERLESS_AI_CONTAINER" ]]; then
PAPERLESS_DB=$(docker exec "$PAPERLESS_CONTAINER" env | grep PAPERLESS_DBHOST | cut -d= -f2)
PAPERLESS_AI_DB=$(docker exec "$PAPERLESS_AI_CONTAINER" env | grep PAPERLESS_DBHOST | cut -d= -f2)
if [[ "$PAPERLESS_DB" != "$PAPERLESS_AI_DB" ]]; then
echo "❌ ISSUE 1: Different database hosts"
echo " Paperless-ngx: $PAPERLESS_DB"
echo " Paperless AI: $PAPERLESS_AI_DB"
echo " SOLUTION: Configure both to use the same database"
echo ""
ISSUES_FOUND=$((ISSUES_FOUND + 1))
fi
fi
fi
# Issue 2: Missing API configuration
if docker ps | grep -q "paperless-ai"; then
PAPERLESS_AI_CONTAINER=$(docker ps --filter "name=paperless-ai" --format "{{.Names}}" | head -1)
if [[ -n "$PAPERLESS_AI_CONTAINER" ]]; then
if ! docker exec "$PAPERLESS_AI_CONTAINER" env | grep -q "PAPERLESS_URL"; then
echo "❌ ISSUE 2: Missing Paperless-ngx URL configuration"
echo " SOLUTION: Set PAPERLESS_URL environment variable"
echo ""
ISSUES_FOUND=$((ISSUES_FOUND + 1))
fi
if ! docker exec "$PAPERLESS_AI_CONTAINER" env | grep -q "PAPERLESS_API_TOKEN\|PAPERLESS_USERNAME"; then
echo "❌ ISSUE 3: Missing authentication configuration"
echo " SOLUTION: Configure PAPERLESS_API_TOKEN or PAPERLESS_USERNAME/PAPERLESS_PASSWORD"
echo ""
ISSUES_FOUND=$((ISSUES_FOUND + 1))
fi
fi
fi
# Issue 4: No AI providers configured
if docker ps | grep -q "paperless-ai"; then
PAPERLESS_AI_CONTAINER=$(docker ps --filter "name=paperless-ai" --format "{{.Names}}" | head -1)
if [[ -n "$PAPERLESS_AI_CONTAINER" ]]; then
if ! docker exec "$PAPERLESS_AI_CONTAINER" env | grep -E "(OPENAI_API_KEY|OLLAMA_BASE_URL|DEEPSEEK_API_KEY|AZURE_OPENAI_API_KEY)" | grep -v "=" > /dev/null; then
echo "❌ ISSUE 4: No AI providers configured"
echo " SOLUTION: Configure at least one AI provider (OpenAI, Ollama, DeepSeek, or Azure)"
echo ""
ISSUES_FOUND=$((ISSUES_FOUND + 1))
fi
fi
fi
if [[ $ISSUES_FOUND -eq 0 ]]; then
echo "✅ No obvious configuration issues found"
echo " The problem might be in the application logic or data processing"
fi
echo ""
echo "🚀 To fix these issues, run:"
echo " ./scripts/setup_paperless_ai_integration.sh"
echo ""
echo "📚 For more information, see:"
echo " - stacks/ai/paperless-ai.yml (new configuration)"
echo " - scripts/setup_paperless_ai_integration.sh (setup script)"
echo " - https://github.com/clusterzx/paperless-ai (documentation)"