Skip to content
AI Detector

Detect AI-Generated Images in PDFs

The Copyleaks AI Image Detection API is a powerful tool to determine if a given image was generated or partially generated by an AI.

As a consumer, you might have a PDF with images. If you want to scan those images separately from the PDF, this guide is for you.

This guide will walk you through the process of extracting images from a pdf file and submitting them for AI detection and understanding the results.

  1. Before you start, ensure you have the following:

  2. Install the relevant packages using pip install -U PyMuPDF Pillow copyleaks.

  3. To perform a scan, we first need to generate an access token. For that, we will use the login endpoint. The API key can be found on the Copyleaks API Dashboard.

    Upon successful authentication, you will receive a token that must be attached to subsequent API calls via the Authorization: Bearer <TOKEN> header. This token remains valid for 48 hours.

    POST https://id.copyleaks.com/v3/account/login/api
    Headers
    Content-Type: application/json
    Body
    {
    "email": "[email protected]",
    "key": "00000000-0000-0000-0000-000000000000"
    }

    Response

    {
    "access_token": "<ACCESS_TOKEN>",
    ".issued": "2025-07-31T10:19:40.0690015Z",
    ".expires": "2025-08-02T10:19:40.0690016Z"
    }

    Save this token! It’s valid for 48 hours and can be reused for subsequent API calls.

  4. Next, we are going to extract all the images from the PDF. The function below will take a pdf file path and extract all its images to a specified directory.

    The following example takes the input PDF file and outputs all its nested images to the output_folder directory

    import os
    import fitz # package by the PyMuPDF module
    from pathlib import Path
    def extract_images(pdf_path: str, output_folder: str = "Extracted-Images") -> list[str]:
    """
    Extract all images from a PDF file.
    Args:
    pdf_path: Path to PDF file
    output_folder: Output folder for images
    Returns:
    List of extracted image paths as strings
    """
    os.makedirs(output_folder, exist_ok=True)
    extracted = []
    pdf_name = Path(pdf_path).stem
    pdf = None
    try:
    pdf = fitz.open(pdf_path)
    print(f"Processing: {pdf_path}")
    print(f"Pages: {len(pdf)}")
    image_count = 0
    for page_num in range(len(pdf)):
    page = pdf[page_num]
    images = page.get_images(full=True)
    print(f"Page {page_num + 1}: {len(images)} image(s)")
    for img_index, img in enumerate(images):
    xref = img[0]
    base_image = pdf.extract_image(xref)
    image_bytes = base_image["image"]
    ext = base_image["ext"]
    image_count += 1
    filename = f"{pdf_name}_page{
    page_num + 1}_img{img_index + 1}.{ext}"
    path = os.path.join(output_folder, filename)
    try:
    with open(path, "wb") as f:
    f.write(image_bytes)
    except Exception as e:
    print(f" ✗ Error: {e}")
    extracted.append(path)
    print(f" ✓ {filename}")
    except Exception as e:
    print(f"✗ Error: {e}")
    return []
    finally:
    if pdf is not None:
    pdf.close()
    print(f"\n✓ Extracted {image_count} images")
    return extracted
    if __name__ == "__main__":
    extract_images("my_file.pdf", "output_dir")
  5. Once we have the extracted images, you can submit them for analysis.

    We are going to use the AI Image Detector Endpoint to send an image for analysis.

    This function takes your image, converts it to base64, and submits it via the SDK’s ImageDetectionClient. The SDK handles authentication and HTTP transport.

    import os
    import base64
    import uuid
    from copyleaks.clients.image_detection_client import ImageDetectionClient
    from copyleaks.models.ai_image_detection import (
    CopyleaksAiImageDetectionRequestModel,
    CopyleaksAiImageDetectionModels,
    )
    def detect(image_path: str, auth_token: str):
    """Detect AI content in image using the Copyleaks SDK."""
    try:
    with open(image_path, 'rb') as f:
    image_data = base64.b64encode(f.read()).decode('utf-8')
    except Exception as e:
    print(f" ✗ Error reading image: {e}")
    return None
    scan_id = str(uuid.uuid4())
    payload = CopyleaksAiImageDetectionRequestModel(
    base64=image_data,
    filename=os.path.basename(image_path),
    model=CopyleaksAiImageDetectionModels.AI_IMAGE_1_ULTRA,
    sandbox=False,
    )
    client = ImageDetectionClient()
    return client.submit(auth_token, scan_id, payload)
    if __name__ == "__main__":
    from pathlib import Path
    path = Path('directory_path')
    for entry in path.iterdir():
    if entry.is_file():
    detect(entry.name, 'auth_token')
  6. This function takes an image_path, detection_result, and a directory name.

    It parses the output from the API to produce an image with an overlay in red and green tint.

    from pathlib import Path
    from PIL import Image, ImageDraw
    def create_overlay(image_path, detection_result, output_folder):
    """Create image with AI detection overlay"""
    try:
    img = Image.open(image_path)
    if img.mode != 'RGB':
    img = img.convert('RGB')
    width, height = img.size
    overlay = Image.new('RGBA', img.size, (0, 0, 0, 0))
    draw = ImageDraw.Draw(overlay)
    # Colors
    ai_color = (255, 0, 0, 60) # Red for AI
    human_color = (0, 255, 0, 60) # Green for human
    if detection_result:
    result_data = detection_result.get("result", {})
    starts = result_data.get("starts", [])
    lengths = result_data.get("lengths", [])
    # Draw green (human) as base
    draw.rectangle([0, 0, width - 1, height - 1], fill=human_color)
    # Draw red (AI) on detected regions
    if starts and lengths:
    for start, length in zip(starts, lengths):
    end_pixel = start + length - 1
    start_col = start % width
    start_row = start // width
    end_col = end_pixel % width
    end_row = end_pixel // width
    if start_row == end_row:
    draw.rectangle(
    [start_col, start_row, end_col, start_row],
    fill=ai_color
    )
    else:
    current_row = start_row
    while current_row <= end_row:
    if current_row == start_row:
    draw.rectangle(
    [start_col, current_row,
    width - 1, current_row],
    fill=ai_color
    )
    elif current_row == end_row:
    draw.rectangle(
    [0, current_row, end_col, current_row],
    fill=ai_color
    )
    else:
    draw.rectangle(
    [0, current_row, width - 1, current_row],
    fill=ai_color
    )
    current_row += 1
    # Composite and save
    result_img = Image.alpha_composite(
    img.convert('RGBA'), overlay).convert('RGB')
    os.makedirs(output_folder, exist_ok=True)
    output_path = os.path.join(
    output_folder,
    f"detected_{Path(image_path).name}"
    )
    result_img.save(output_path, quality=95)
    return output_path
    except Exception as e:
    print(f" ✗ Overlay error: {e}")
    return None
    if __name__ == "__main__":
    create_overlay('image_path', 'detection_result.json', 'overlay_output_dir')
  7. After following all the steps in this guide, you will have the directory you specified as output_folder with two types of files:

    Firstly, the images. They will have an overlay of red and green colors, red indicating suspected AI manipulation.

    Secondly, JSON responses. For a complete breakdown of all fields in the response, see the AI Image Detection Response documentation.

  8. You have successfully submitted images from PDF for AI detection.

    Of course, you are free to fit this code to your needs.