#!/bin/bash # Test Re-processing Script # This script tests the re-processing approach on a small sample of documents set -e echo "๐Ÿงช Testing Document Re-processing (Sample)" echo "==========================================" # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color print_status() { echo -e "${BLUE}[INFO]${NC} $1" } print_success() { echo -e "${GREEN}[SUCCESS]${NC} $1" } print_warning() { echo -e "${YELLOW}[WARNING]${NC} $1" } print_error() { echo -e "${RED}[ERROR]${NC} $1" } # Configuration PAPERLESS_HOST="192.168.50.229" PAPERLESS_PORT="8000" API_TOKEN="e10c341c7c67b9bce7a968e1a3349963a70f800c" API_BASE_URL="http://${PAPERLESS_HOST}:${PAPERLESS_PORT}/api" TEST_COUNT=5 echo "" print_status "Step 1: Testing API connectivity..." # Test API connection API_RESPONSE=$(curl -s -H "Authorization: Token ${API_TOKEN}" "${API_BASE_URL}/") if [[ $? -eq 0 ]]; then print_success "API connection successful" else print_error "API connection failed" exit 1 fi echo "" print_status "Step 2: Getting total document count..." # Get total document count TOTAL_DOCS=$(curl -s -H "Authorization: Token ${API_TOKEN}" "${API_BASE_URL}/documents/" | jq -r '.count') print_success "Found ${TOTAL_DOCS} total documents" echo "" print_status "Step 3: Testing with first ${TEST_COUNT} documents..." # Get first few documents DOCS_RESPONSE=$(curl -s -H "Authorization: Token ${API_TOKEN}" "${API_BASE_URL}/documents/?page_size=${TEST_COUNT}") DOC_IDS=$(echo "$DOCS_RESPONSE" | jq -r '.results[].id') echo "Document IDs to test: $DOC_IDS" # Function to clear tags and titles for a document clear_document_metadata() { local doc_id=$1 print_status "Processing document ${doc_id}..." # Get current document data local doc_data=$(curl -s -H "Authorization: Token ${API_TOKEN}" "${API_BASE_URL}/documents/${doc_id}/") if [[ $? -ne 0 ]]; then print_error "Failed to get document ${doc_id}" return 1 fi # Extract current data local title=$(echo "$doc_data" | jq -r '.title // empty') local tags=$(echo "$doc_data" | jq -r '.tags // empty') local correspondent=$(echo "$doc_data" | jq -r '.correspondent // empty') local document_type=$(echo "$doc_data" | jq -r '.document_type // empty') print_status " Current title: ${title}" print_status " Current tags: ${tags}" print_status " Current correspondent: ${correspondent}" print_status " Current document_type: ${document_type}" # Create update payload - clear tags, correspondent, document_type, but keep title local update_payload=$(cat <