## Major Infrastructure Milestones Achieved ### ✅ Service Migrations Completed - Jellyfin: Successfully migrated to Docker Swarm with latest version - Vaultwarden: Running in Docker Swarm on OMV800 (eliminated duplicate) - Nextcloud: Operational with database optimization and cron setup - Paperless services: Both NGX and AI running successfully ### 🚨 Duplicate Service Analysis Complete - Identified MariaDB conflict (OMV800 Swarm vs lenovo410 standalone) - Identified Vaultwarden duplication (now resolved) - Documented PostgreSQL and Redis consolidation opportunities - Mapped monitoring stack optimization needs ### 🏗️ Infrastructure Status Documentation - Updated README with current cleanup phase status - Enhanced Service Analysis with duplicate service inventory - Updated Quick Start guide with immediate action items - Documented current container distribution across 6 nodes ### 📋 Action Plan Documentation - Phase 1: Immediate service conflict resolution (this week) - Phase 2: Service migration and load balancing (next 2 weeks) - Phase 3: Database consolidation and optimization (future) ### 🔧 Current Infrastructure Health - Docker Swarm: All 6 nodes operational and healthy - Caddy Reverse Proxy: Fully operational with SSL certificates - Storage: MergerFS healthy, local storage for databases - Monitoring: Prometheus + Grafana + Uptime Kuma operational ### 📊 Container Distribution Status - OMV800: 25+ containers (needs load balancing) - lenovo410: 9 containers (cleanup in progress) - fedora: 1 container (ready for additional services) - audrey: 4 containers (well-balanced, monitoring hub) - lenovo420: 7 containers (balanced, can assist) - surface: 9 containers (specialized, reverse proxy) ### 🎯 Next Steps 1. Remove lenovo410 MariaDB (eliminate port 3306 conflict) 2. Clean up lenovo410 Vaultwarden (256MB space savings) 3. Verify no service conflicts exist 4. Begin service migration from OMV800 to fedora/audrey Status: Infrastructure 99% complete, entering cleanup and optimization phase
186 lines
5.4 KiB
Bash
Executable File
186 lines
5.4 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Test Re-processing Script
|
|
# This script tests the re-processing approach on a small sample of documents
|
|
|
|
set -e
|
|
|
|
echo "🧪 Testing Document Re-processing (Sample)"
|
|
echo "=========================================="
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m' # No Color
|
|
|
|
print_status() {
|
|
echo -e "${BLUE}[INFO]${NC} $1"
|
|
}
|
|
|
|
print_success() {
|
|
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
|
}
|
|
|
|
print_warning() {
|
|
echo -e "${YELLOW}[WARNING]${NC} $1"
|
|
}
|
|
|
|
print_error() {
|
|
echo -e "${RED}[ERROR]${NC} $1"
|
|
}
|
|
|
|
# Configuration
|
|
PAPERLESS_HOST="192.168.50.229"
|
|
PAPERLESS_PORT="8000"
|
|
API_TOKEN="e10c341c7c67b9bce7a968e1a3349963a70f800c"
|
|
API_BASE_URL="http://${PAPERLESS_HOST}:${PAPERLESS_PORT}/api"
|
|
TEST_COUNT=5
|
|
|
|
echo ""
|
|
print_status "Step 1: Testing API connectivity..."
|
|
|
|
# Test API connection
|
|
API_RESPONSE=$(curl -s -H "Authorization: Token ${API_TOKEN}" "${API_BASE_URL}/")
|
|
if [[ $? -eq 0 ]]; then
|
|
print_success "API connection successful"
|
|
else
|
|
print_error "API connection failed"
|
|
exit 1
|
|
fi
|
|
|
|
echo ""
|
|
print_status "Step 2: Getting total document count..."
|
|
|
|
# Get total document count
|
|
TOTAL_DOCS=$(curl -s -H "Authorization: Token ${API_TOKEN}" "${API_BASE_URL}/documents/" | jq -r '.count')
|
|
print_success "Found ${TOTAL_DOCS} total documents"
|
|
|
|
echo ""
|
|
print_status "Step 3: Testing with first ${TEST_COUNT} documents..."
|
|
|
|
# Get first few documents
|
|
DOCS_RESPONSE=$(curl -s -H "Authorization: Token ${API_TOKEN}" "${API_BASE_URL}/documents/?page_size=${TEST_COUNT}")
|
|
DOC_IDS=$(echo "$DOCS_RESPONSE" | jq -r '.results[].id')
|
|
|
|
echo "Document IDs to test: $DOC_IDS"
|
|
|
|
# Function to clear tags and titles for a document
|
|
clear_document_metadata() {
|
|
local doc_id=$1
|
|
print_status "Processing document ${doc_id}..."
|
|
|
|
# Get current document data
|
|
local doc_data=$(curl -s -H "Authorization: Token ${API_TOKEN}" "${API_BASE_URL}/documents/${doc_id}/")
|
|
|
|
if [[ $? -ne 0 ]]; then
|
|
print_error "Failed to get document ${doc_id}"
|
|
return 1
|
|
fi
|
|
|
|
# Extract current data
|
|
local title=$(echo "$doc_data" | jq -r '.title // empty')
|
|
local tags=$(echo "$doc_data" | jq -r '.tags // empty')
|
|
local correspondent=$(echo "$doc_data" | jq -r '.correspondent // empty')
|
|
local document_type=$(echo "$doc_data" | jq -r '.document_type // empty')
|
|
|
|
print_status " Current title: ${title}"
|
|
print_status " Current tags: ${tags}"
|
|
print_status " Current correspondent: ${correspondent}"
|
|
print_status " Current document_type: ${document_type}"
|
|
|
|
# Create update payload - clear tags, correspondent, document_type, but keep title
|
|
local update_payload=$(cat <<EOF
|
|
{
|
|
"title": "${title}",
|
|
"tags": [],
|
|
"correspondent": null,
|
|
"document_type": null
|
|
}
|
|
EOF
|
|
)
|
|
|
|
print_status " Clearing metadata..."
|
|
|
|
# Update the document
|
|
local response=$(curl -s -X PATCH \
|
|
-H "Authorization: Token ${API_TOKEN}" \
|
|
-H "Content-Type: application/json" \
|
|
-d "$update_payload" \
|
|
"${API_BASE_URL}/documents/${doc_id}/")
|
|
|
|
if [[ $? -eq 0 ]]; then
|
|
print_success " ✅ Successfully cleared metadata for document ${doc_id}"
|
|
|
|
# Verify the changes
|
|
local updated_data=$(curl -s -H "Authorization: Token ${API_TOKEN}" "${API_BASE_URL}/documents/${doc_id}/")
|
|
local updated_tags=$(echo "$updated_data" | jq -r '.tags // empty')
|
|
local updated_correspondent=$(echo "$updated_data" | jq -r '.correspondent // empty')
|
|
local updated_document_type=$(echo "$updated_data" | jq -r '.document_type // empty')
|
|
|
|
print_status " Verification:"
|
|
print_status " Tags: ${updated_tags}"
|
|
print_status " Correspondent: ${updated_correspondent}"
|
|
print_status " Document Type: ${updated_document_type}"
|
|
|
|
return 0
|
|
else
|
|
print_error " ❌ Failed to clear metadata for document ${doc_id}"
|
|
print_error " Response: ${response}"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Process test documents
|
|
SUCCESS_COUNT=0
|
|
FAILED_COUNT=0
|
|
|
|
for doc_id in $DOC_IDS; do
|
|
if [[ -n "$doc_id" && "$doc_id" != "null" ]]; then
|
|
if clear_document_metadata "$doc_id"; then
|
|
SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
|
|
else
|
|
FAILED_COUNT=$((FAILED_COUNT + 1))
|
|
fi
|
|
echo ""
|
|
fi
|
|
done
|
|
|
|
echo ""
|
|
print_status "Step 4: Test Results Summary"
|
|
echo "=================================="
|
|
print_success "Successfully processed: ${SUCCESS_COUNT} documents"
|
|
if [[ $FAILED_COUNT -gt 0 ]]; then
|
|
print_error "Failed to process: ${FAILED_COUNT} documents"
|
|
else
|
|
print_success "All test documents processed successfully!"
|
|
fi
|
|
|
|
echo ""
|
|
print_status "Step 5: Testing Paperless AI connection..."
|
|
|
|
# Check if Paperless AI is running
|
|
if ssh root@${PAPERLESS_HOST} "docker ps | grep -q paperless-ai"; then
|
|
print_success "Paperless AI is running"
|
|
|
|
# Check Paperless AI logs
|
|
print_status "Checking Paperless AI logs..."
|
|
ssh root@${PAPERLESS_HOST} "docker logs paperless-ai --tail 5"
|
|
else
|
|
print_error "Paperless AI is not running!"
|
|
fi
|
|
|
|
echo ""
|
|
print_status "Test Complete!"
|
|
echo ""
|
|
if [[ $SUCCESS_COUNT -eq $TEST_COUNT ]]; then
|
|
print_success "✅ All tests passed! The re-processing approach is working correctly."
|
|
echo ""
|
|
echo "🚀 You can now run the full re-processing script:"
|
|
echo " ./scripts/reprocess_all_documents.sh"
|
|
else
|
|
print_warning "⚠️ Some tests failed. Please review the errors before running the full script."
|
|
fi
|
|
|