#!/bin/bash # Re-process All Documents Script # This script clears existing tags/titles and triggers Paperless AI to re-process all documents set -e echo "🔄 Re-processing All Documents in Paperless-ngx" echo "==============================================" # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color print_status() { echo -e "${BLUE}[INFO]${NC} $1" } print_success() { echo -e "${GREEN}[SUCCESS]${NC} $1" } print_warning() { echo -e "${YELLOW}[WARNING]${NC} $1" } print_error() { echo -e "${RED}[ERROR]${NC} $1" } # Configuration PAPERLESS_HOST="192.168.50.229" PAPERLESS_PORT="8000" API_TOKEN="e10c341c7c67b9bce7a968e1a3349963a70f800c" API_BASE_URL="http://${PAPERLESS_HOST}:${PAPERLESS_PORT}/api" echo "" print_status "Step 1: Checking current document count..." # Get total document count TOTAL_DOCS=$(curl -s -H "Authorization: Token ${API_TOKEN}" "${API_BASE_URL}/documents/" | jq -r '.count') print_success "Found ${TOTAL_DOCS} documents to re-process" echo "" print_status "Step 2: Clearing existing tags and titles from all documents..." # Function to clear tags and titles for a document clear_document_metadata() { local doc_id=$1 local doc_data=$(curl -s -H "Authorization: Token ${API_TOKEN}" "${API_BASE_URL}/documents/${doc_id}/") # Extract current data local title=$(echo "$doc_data" | jq -r '.title // empty') local tags=$(echo "$doc_data" | jq -r '.tags // empty') local correspondent=$(echo "$doc_data" | jq -r '.correspondent // empty') local document_type=$(echo "$doc_data" | jq -r '.document_type // empty') # Create update payload - clear tags, correspondent, document_type, and reset title local update_payload=$(cat <