cim_summary/check_gcf_bucket.sh

#!/bin/bash

# Script to check Google Cloud Functions bucket contents
BUCKET_NAME="gcf-v2-uploads-245796323861.us-central1.cloudfunctions.appspot.com"
PROJECT_ID="cim-summarizer"

echo "=== Google Cloud Functions Bucket Analysis ==="
echo "Bucket: $BUCKET_NAME"
echo "Project: $PROJECT_ID"
echo "Date: $(date)"
echo ""

# Check if gcloud is authenticated
if ! gcloud auth list --filter=status:ACTIVE --format="value(account)" | grep -q .; then
    echo "❌ Not authenticated with gcloud. Please run: gcloud auth login"
    exit 1
fi

# Check if we have access to the bucket
echo "🔍 Checking bucket access..."
if ! gsutil ls -b "gs://$BUCKET_NAME" > /dev/null 2>&1; then
    echo "❌ Cannot access bucket. This might be a system-managed bucket."
    echo "   Cloud Functions v2 buckets are typically managed by Google Cloud."
    exit 1
fi

echo "✅ Bucket accessible"
echo ""

# List bucket contents with sizes
echo "📋 Bucket contents:"
echo "=================="
gsutil ls -lh "gs://$BUCKET_NAME" | head -20

echo ""
echo "📊 Size breakdown by prefix:"
echo "============================"

# Get all objects and group by prefix
gsutil ls -r "gs://$BUCKET_NAME" | while read -r object; do
    if [[ $object == gs://* ]]; then
        # Extract prefix (everything after bucket name)
        prefix=$(echo "$object" | sed "s|gs://$BUCKET_NAME/||")
        if [[ -n "$prefix" ]]; then
            # Get size of this object
            size=$(gsutil ls -lh "$object" | awk '{print $1}' | tail -1)
            echo "$size - $prefix"
        fi
    fi
done | sort -hr | head -10

echo ""
echo "🔍 Checking for large files (>100MB):"
echo "====================================="
gsutil ls -lh "gs://$BUCKET_NAME" | grep -E "([0-9]+\.?[0-9]*G|[0-9]+\.?[0-9]*M)" | head -10

echo ""
echo "📈 Total bucket size:"
echo "===================="
gsutil du -sh "gs://$BUCKET_NAME"

echo ""
echo "💡 Recommendations:"
echo "=================="
echo "1. This is a Google Cloud Functions v2 system bucket"
echo "2. It contains function source code, dependencies, and runtime files"
echo "3. Google manages cleanup automatically for old deployments"
echo "4. Manual cleanup is not recommended as it may break function deployments"
echo "5. Large size is likely due to Puppeteer/Chromium dependencies"
echo ""
echo "🔧 To reduce future deployment sizes:"
echo "   - Review .gcloudignore file to exclude unnecessary files"
echo "   - Consider using container-based functions for large dependencies"
echo "   - Use .gcloudignore to exclude node_modules (let Cloud Functions install deps)"