Files

admin 2a3dedde11

2026-01-30 03:04:10 +00:00

12 KiB

Raw Permalink Blame History

File Security Reference

Overview

File operations present multiple security risks: path traversal attacks, malicious file uploads, XML External Entity (XXE) attacks, and insecure file permissions. This reference covers secure patterns for handling files.

Path Traversal Prevention

The Vulnerability

# VULNERABLE: User-controlled path
@app.route('/download')
def download():
    filename = request.args.get('file')
    return send_file(f'/uploads/{filename}')

# Attack: ?file=../../../etc/passwd
# Results in: /uploads/../../../etc/passwd → /etc/passwd

Prevention Techniques

import os
from pathlib import Path

# Method 1: Validate and canonicalize path
def safe_join(base_directory, user_path):
    """Safely join paths, preventing traversal."""
    # Resolve to absolute path
    base = Path(base_directory).resolve()
    target = (base / user_path).resolve()

    # Verify target is under base
    if not str(target).startswith(str(base)):
        raise ValueError("Path traversal detected")

    return str(target)

# Method 2: Use allowlist of files
ALLOWED_FILES = {'report.pdf', 'manual.pdf', 'readme.txt'}

def download_file(filename):
    if filename not in ALLOWED_FILES:
        raise ValueError("File not allowed")
    return send_file(os.path.join(UPLOAD_DIR, filename))

# Method 3: Use indirect references
def get_file_by_id(file_id):
    # Map ID to filename in database
    file_record = File.query.get(file_id)
    if not file_record or file_record.user_id != current_user.id:
        raise PermissionError()
    return send_file(file_record.storage_path)

Characters to Block

# Dangerous path patterns
BLOCKED_PATTERNS = [
    '..',           # Parent directory
    '~',            # Home directory
    '%2e%2e',       # URL-encoded ..
    '%252e%252e',   # Double-encoded ..
    '..\\',         # Windows backslash
    '..%5c',        # URL-encoded Windows
    '%00',          # Null byte (older systems)
]

def contains_traversal(path):
    path_lower = path.lower()
    return any(pattern in path_lower for pattern in BLOCKED_PATTERNS)

File Upload Security

Defense in Depth Approach

import magic
import hashlib
import uuid
from pathlib import Path

# Configuration
ALLOWED_EXTENSIONS = {'pdf', 'png', 'jpg', 'jpeg', 'gif'}
ALLOWED_MIMETYPES = {
    'application/pdf',
    'image/png',
    'image/jpeg',
    'image/gif'
}
MAX_FILE_SIZE = 10 * 1024 * 1024  # 10MB
UPLOAD_DIR = '/var/uploads'  # Outside webroot

def secure_upload(file):
    """Comprehensive file upload validation."""

    # 1. Check file size
    file.seek(0, 2)  # Seek to end
    size = file.tell()
    file.seek(0)  # Reset
    if size > MAX_FILE_SIZE:
        raise ValueError(f"File too large: {size} bytes")

    # 2. Validate extension
    original_filename = file.filename
    extension = Path(original_filename).suffix.lower().lstrip('.')
    if extension not in ALLOWED_EXTENSIONS:
        raise ValueError(f"Extension not allowed: {extension}")

    # 3. Validate MIME type (don't trust Content-Type header)
    mime = magic.from_buffer(file.read(2048), mime=True)
    file.seek(0)
    if mime not in ALLOWED_MIMETYPES:
        raise ValueError(f"MIME type not allowed: {mime}")

    # 4. Validate extension matches content
    expected_extensions = get_extensions_for_mime(mime)
    if extension not in expected_extensions:
        raise ValueError("Extension doesn't match content type")

    # 5. Generate safe filename (ignore user input)
    safe_filename = f"{uuid.uuid4().hex}.{extension}"

    # 6. Store outside webroot
    storage_path = os.path.join(UPLOAD_DIR, safe_filename)
    file.save(storage_path)

    # 7. Set restrictive permissions
    os.chmod(storage_path, 0o640)

    return {
        'original_name': original_filename,
        'stored_name': safe_filename,
        'storage_path': storage_path,
        'size': size,
        'mime_type': mime
    }

Filename Sanitization

import re
import unicodedata

def sanitize_filename(filename):
    """Sanitize filename for safe storage."""
    # Normalize unicode
    filename = unicodedata.normalize('NFKD', filename)

    # Remove path components
    filename = os.path.basename(filename)

    # Remove null bytes
    filename = filename.replace('\x00', '')

    # Allow only safe characters
    filename = re.sub(r'[^a-zA-Z0-9._-]', '_', filename)

    # Prevent hidden files
    filename = filename.lstrip('.')

    # Limit length
    if len(filename) > 255:
        name, ext = os.path.splitext(filename)
        filename = name[:255-len(ext)] + ext

    return filename or 'unnamed'

Image Validation

from PIL import Image
import io

def validate_image(file_data):
    """Validate and reprocess image to strip metadata/payloads."""
    try:
        # Verify it's a valid image
        img = Image.open(io.BytesIO(file_data))
        img.verify()

        # Reopen for processing (verify closes the file)
        img = Image.open(io.BytesIO(file_data))

        # Convert to remove potential embedded content
        output = io.BytesIO()
        img.save(output, format=img.format)
        output.seek(0)

        return output.read()

    except Exception as e:
        raise ValueError(f"Invalid image: {e}")

Dangerous File Types

# Never allow execution
DANGEROUS_EXTENSIONS = {
    # Executables
    'exe', 'dll', 'so', 'dylib', 'bin',
    # Scripts
    'php', 'php3', 'php4', 'php5', 'phtml',
    'asp', 'aspx', 'ascx', 'ashx',
    'jsp', 'jspx',
    'cgi', 'pl', 'py', 'rb', 'sh', 'bash',
    # Server config
    'htaccess', 'htpasswd',
    'config', 'ini',
    # HTML (XSS risk)
    'html', 'htm', 'xhtml', 'svg',
    # Office macros
    'docm', 'xlsm', 'pptm',
}

# Dangerous MIME types
DANGEROUS_MIMETYPES = {
    'application/x-executable',
    'application/x-msdownload',
    'application/x-php',
    'text/html',
    'image/svg+xml',  # Can contain scripts
}

XML External Entity (XXE) Prevention

The Vulnerability

<!-- Malicious XML -->
<?xml version="1.0"?>
<!DOCTYPE foo [
  <!ENTITY xxe SYSTEM "file:///etc/passwd">
]>
<data>&xxe;</data>

Python Prevention

# VULNERABLE: Default lxml settings
from lxml import etree
doc = etree.parse(untrusted_file)  # XXE enabled by default

# SAFE: Disable external entities
from lxml import etree
parser = etree.XMLParser(
    resolve_entities=False,
    no_network=True,
    dtd_validation=False,
    load_dtd=False
)
doc = etree.parse(untrusted_file, parser)

# SAFE: defusedxml library (recommended)
import defusedxml.ElementTree as ET
doc = ET.parse(untrusted_file)  # XXE disabled by default

Java Prevention

// VULNERABLE: Default DocumentBuilder
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(untrustedFile);

// SAFE: Disable dangerous features
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
dbf.setFeature("http://xml.org/sax/features/external-general-entities", false);
dbf.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
dbf.setXIncludeAware(false);
dbf.setExpandEntityReferences(false);
DocumentBuilder db = dbf.newDocumentBuilder();

.NET Prevention

// SAFE in .NET 4.5.2+: XmlReader is safe by default
XmlReader reader = XmlReader.Create(stream);

// For older versions, explicitly disable
XmlReaderSettings settings = new XmlReaderSettings();
settings.DtdProcessing = DtdProcessing.Prohibit;
settings.XmlResolver = null;
XmlReader reader = XmlReader.Create(stream, settings);

Archive (ZIP) Handling

Zip Slip Prevention

import zipfile
import os

def safe_extract(zip_path, extract_dir):
    """Safely extract ZIP, preventing path traversal."""
    extract_dir = os.path.abspath(extract_dir)

    with zipfile.ZipFile(zip_path, 'r') as zf:
        for member in zf.namelist():
            # Get absolute path of extracted file
            member_path = os.path.abspath(os.path.join(extract_dir, member))

            # Verify it's under extract directory
            if not member_path.startswith(extract_dir + os.sep):
                raise ValueError(f"Path traversal in ZIP: {member}")

            # Check for symlinks (additional safety)
            if member.endswith('/'):
                os.makedirs(member_path, exist_ok=True)
            else:
                os.makedirs(os.path.dirname(member_path), exist_ok=True)
                with zf.open(member) as source, open(member_path, 'wb') as target:
                    target.write(source.read())

Zip Bomb Prevention

MAX_UNCOMPRESSED_SIZE = 100 * 1024 * 1024  # 100MB
MAX_COMPRESSION_RATIO = 100

def check_zip_bomb(zip_path):
    """Detect potential zip bombs."""
    compressed_size = os.path.getsize(zip_path)

    with zipfile.ZipFile(zip_path, 'r') as zf:
        uncompressed_size = sum(info.file_size for info in zf.infolist())

        # Check total size
        if uncompressed_size > MAX_UNCOMPRESSED_SIZE:
            raise ValueError(f"Uncompressed size too large: {uncompressed_size}")

        # Check compression ratio
        if compressed_size > 0:
            ratio = uncompressed_size / compressed_size
            if ratio > MAX_COMPRESSION_RATIO:
                raise ValueError(f"Suspicious compression ratio: {ratio}")

    return True

File Permissions

Secure Defaults

import os
import stat

# Uploaded files: readable by app, not executable
def secure_file_permissions(path):
    os.chmod(path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP)  # 640

# Directories: accessible by app
def secure_directory_permissions(path):
    os.chmod(path, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP)  # 750

# Sensitive files: only owner
def sensitive_file_permissions(path):
    os.chmod(path, stat.S_IRUSR | stat.S_IWUSR)  # 600

Temporary Files

import tempfile
import os

# VULNERABLE: Predictable temp file
with open('/tmp/myapp_temp.txt', 'w') as f:
    f.write(sensitive_data)

# SAFE: Secure temp file
with tempfile.NamedTemporaryFile(mode='w', delete=False) as f:
    f.write(sensitive_data)
    temp_path = f.name
    # File has restrictive permissions automatically

# SAFE: Temp directory
with tempfile.TemporaryDirectory() as tmpdir:
    # Directory and contents deleted on exit
    pass

Grep Patterns for Detection

# Path traversal risks
grep -rn "open(.*request\|send_file(.*request" --include="*.py"
grep -rn "fs\.readFile.*req\|fs\.writeFile.*req" --include="*.js"

# Dangerous file operations
grep -rn "os\.system.*file\|subprocess.*file" --include="*.py"

# XML parsing (XXE risk)
grep -rn "etree\.parse\|xml\.parse\|DOM\.parse" --include="*.py" --include="*.java"
grep -rn "XMLReader\|DocumentBuilder" --include="*.java"

# ZIP handling
grep -rn "zipfile\|ZipFile\|extractall" --include="*.py" --include="*.java"

# File permissions
grep -rn "chmod 777\|chmod 666\|chmod 755" --include="*.py" --include="*.sh"

Testing Checklist

Path traversal prevented (canonicalization + validation)
File extensions validated against allowlist
MIME types validated (not just Content-Type header)
Filenames sanitized (don't use user input directly)
Files stored outside webroot
Restrictive file permissions set
Upload size limits enforced
Dangerous file types blocked
XML parsing has XXE disabled
ZIP extraction validates paths
ZIP bomb detection in place
Temporary files handled securely

12 KiB Raw Permalink Blame History

File Security Reference

Overview

Path Traversal Prevention

The Vulnerability

Prevention Techniques

Characters to Block

File Upload Security

Defense in Depth Approach

Filename Sanitization

Image Validation

Dangerous File Types

XML External Entity (XXE) Prevention

The Vulnerability

Python Prevention

Java Prevention

.NET Prevention

Archive (ZIP) Handling

Zip Slip Prevention

Zip Bomb Prevention

File Permissions

Secure Defaults

Temporary Files

Grep Patterns for Detection

Testing Checklist

References

12 KiB

Raw Permalink Blame History