Advanced

Step 4: Advanced Features

Take your image generator from a basic text-to-image tool to a production-grade creative suite. You will add image-to-image transformation, inpainting (editing parts of images), AI upscaling, and batch generation.

Image-to-Image Generation

Image-to-image (img2img) takes an existing image plus a prompt and generates a new image that follows the composition and structure of the original while applying the prompt's style and content changes. This is useful for style transfer, concept variations, and iterative refinement.

Backend: img2img Endpoint

Add this method to services/image_service.py:

async def img2img_stability(
    self,
    image_path: str,
    prompt: str,
    negative_prompt: str = "",
    strength: float = 0.7,
    steps: int = 30,
    cfg_scale: float = 7.0,
    seed: int = 0,
) -> dict:
    """Generate an image based on an existing image + prompt.

    Args:
        image_path: Path to the source image
        prompt: Text description of desired output
        strength: How much to change (0.0 = identical, 1.0 = ignore source)
    """
    url = "https://api.stability.ai/v2beta/stable-image/generate/sd3"
    headers = {
        "Authorization": f"Bearer {self.stability_key}",
        "Accept": "application/json",
    }

    # Read and encode the source image
    with open(image_path, "rb") as f:
        image_bytes = f.read()

    import base64
    image_b64 = base64.b64encode(image_bytes).decode("utf-8")

    payload = {
        "prompt": prompt,
        "negative_prompt": negative_prompt,
        "image": image_b64,
        "strength": strength,
        "steps": steps,
        "cfg_scale": cfg_scale,
        "output_format": "png",
    }
    if seed:
        payload["seed"] = seed

    async with httpx.AsyncClient(timeout=120) as client:
        response = await client.post(url, headers=headers, json=payload)
        response.raise_for_status()
        data = response.json()

    image_data = base64.b64decode(data["image"])
    return self._save_image(image_data, prompt, "stability-img2img", seed)

Add the route in routers/generate.py:

from fastapi import UploadFile, File, Form
import shutil

@router.post("/img2img")
async def img2img(
    image: UploadFile = File(...),
    prompt: str = Form(...),
    negative_prompt: str = Form(default=""),
    strength: float = Form(default=0.7),
    steps: int = Form(default=30),
    cfg_scale: float = Form(default=7.0),
    seed: int = Form(default=0),
):
    """Generate a new image based on an uploaded image and prompt."""
    # Save uploaded image temporarily
    temp_path = IMAGES_DIR / f"temp_{image.filename}"
    with open(temp_path, "wb") as f:
        shutil.copyfileobj(image.file, f)

    try:
        result = await image_service.img2img_stability(
            image_path=str(temp_path),
            prompt=prompt,
            negative_prompt=negative_prompt,
            strength=strength,
            steps=steps,
            cfg_scale=cfg_scale,
            seed=seed,
        )
        generated_images.append(result)
        return result
    finally:
        temp_path.unlink(missing_ok=True)

Understanding the Strength Parameter

StrengthBehaviorUse Case
0.1 - 0.3Subtle changes, keeps most of the originalColor correction, minor style tweaks
0.4 - 0.6Moderate changes, keeps compositionStyle transfer, mood changes
0.7 - 0.8Major changes, loosely follows originalReimagining scenes, concept exploration
0.9 - 1.0Almost completely new imageUsing source only as rough layout guide

Inpainting

Inpainting lets users edit specific regions of an image while keeping the rest unchanged. The user provides a mask (white = edit, black = keep) and a prompt describing what should appear in the masked area.

async def inpaint_stability(
    self,
    image_path: str,
    mask_path: str,
    prompt: str,
    negative_prompt: str = "",
    steps: int = 30,
    cfg_scale: float = 7.0,
    seed: int = 0,
) -> dict:
    """Edit a specific region of an image using inpainting.

    Args:
        image_path: Path to the original image
        mask_path: Path to the mask (white = area to edit)
        prompt: Description of what to generate in the masked area
    """
    url = "https://api.stability.ai/v2beta/stable-image/edit/inpaint"
    headers = {
        "Authorization": f"Bearer {self.stability_key}",
        "Accept": "application/json",
    }

    with open(image_path, "rb") as f:
        image_b64 = base64.b64encode(f.read()).decode("utf-8")
    with open(mask_path, "rb") as f:
        mask_b64 = base64.b64encode(f.read()).decode("utf-8")

    payload = {
        "prompt": prompt,
        "negative_prompt": negative_prompt,
        "image": image_b64,
        "mask": mask_b64,
        "steps": steps,
        "cfg_scale": cfg_scale,
        "output_format": "png",
    }
    if seed:
        payload["seed"] = seed

    async with httpx.AsyncClient(timeout=120) as client:
        response = await client.post(url, headers=headers, json=payload)
        response.raise_for_status()
        data = response.json()

    image_data = base64.b64decode(data["image"])
    return self._save_image(image_data, prompt, "stability-inpaint", seed)

Creating a Canvas-Based Mask Editor

Add a simple mask painter to the frontend so users can draw masks directly on their images:

// Mask editor component for inpainting
class MaskEditor {
  constructor(canvasId, imageUrl) {
    this.canvas = document.getElementById(canvasId);
    this.ctx = this.canvas.getContext("2d");
    this.painting = false;
    this.brushSize = 30;

    // Load the image onto the canvas
    this.image = new Image();
    this.image.crossOrigin = "anonymous";
    this.image.onload = () => {
      this.canvas.width = this.image.width;
      this.canvas.height = this.image.height;
      this.ctx.drawImage(this.image, 0, 0);
    };
    this.image.src = imageUrl;

    // Create a separate canvas for the mask
    this.maskCanvas = document.createElement("canvas");
    this.maskCtx = this.maskCanvas.getContext("2d");

    this.setupEvents();
  }

  setupEvents() {
    this.canvas.addEventListener("mousedown", (e) => {
      this.painting = true;
      this.paint(e);
    });
    this.canvas.addEventListener("mousemove", (e) => {
      if (this.painting) this.paint(e);
    });
    this.canvas.addEventListener("mouseup", () => {
      this.painting = false;
    });
    this.canvas.addEventListener("mouseleave", () => {
      this.painting = false;
    });

    // Touch support
    this.canvas.addEventListener("touchstart", (e) => {
      e.preventDefault();
      this.painting = true;
      this.paintTouch(e);
    });
    this.canvas.addEventListener("touchmove", (e) => {
      e.preventDefault();
      if (this.painting) this.paintTouch(e);
    });
    this.canvas.addEventListener("touchend", () => {
      this.painting = false;
    });
  }

  paint(e) {
    const rect = this.canvas.getBoundingClientRect();
    const scaleX = this.canvas.width / rect.width;
    const scaleY = this.canvas.height / rect.height;
    const x = (e.clientX - rect.left) * scaleX;
    const y = (e.clientY - rect.top) * scaleY;

    // Draw semi-transparent red on the visible canvas
    this.ctx.fillStyle = "rgba(255, 0, 0, 0.4)";
    this.ctx.beginPath();
    this.ctx.arc(x, y, this.brushSize, 0, Math.PI * 2);
    this.ctx.fill();

    // Draw white on the mask canvas
    this.maskCtx.fillStyle = "white";
    this.maskCtx.beginPath();
    this.maskCtx.arc(x, y, this.brushSize, 0, Math.PI * 2);
    this.maskCtx.fill();
  }

  paintTouch(e) {
    const touch = e.touches[0];
    this.paint({
      clientX: touch.clientX,
      clientY: touch.clientY,
    });
  }

  getMaskBlob() {
    // Initialize mask canvas with black background
    this.maskCanvas.width = this.canvas.width;
    this.maskCanvas.height = this.canvas.height;
    this.maskCtx.fillStyle = "black";
    this.maskCtx.fillRect(0, 0, this.maskCanvas.width, this.maskCanvas.height);

    return new Promise((resolve) => {
      this.maskCanvas.toBlob(resolve, "image/png");
    });
  }

  reset() {
    this.ctx.drawImage(this.image, 0, 0);
    this.maskCtx.clearRect(
      0, 0, this.maskCanvas.width, this.maskCanvas.height
    );
  }
}

Image Upscaling

Upscaling uses AI to increase image resolution while adding realistic detail. This is essential when users want to print or use images at larger sizes.

async def upscale_stability(
    self,
    image_path: str,
    scale: int = 2,
) -> dict:
    """Upscale an image using AI-powered super resolution.

    Args:
        image_path: Path to the image to upscale
        scale: Upscale factor (2 or 4)
    """
    url = "https://api.stability.ai/v2beta/stable-image/upscale/fast"
    headers = {
        "Authorization": f"Bearer {self.stability_key}",
        "Accept": "application/json",
    }

    with open(image_path, "rb") as f:
        image_b64 = base64.b64encode(f.read()).decode("utf-8")

    payload = {
        "image": image_b64,
        "output_format": "png",
    }

    async with httpx.AsyncClient(timeout=120) as client:
        response = await client.post(url, headers=headers, json=payload)
        response.raise_for_status()
        data = response.json()

    image_data = base64.b64decode(data["image"])
    return self._save_image(image_data, f"Upscaled {scale}x", "stability-upscale", 0)

Alternative: Use Replicate with a Real-ESRGAN model for free upscaling:

async def upscale_replicate(
    self,
    image_path: str,
    scale: int = 4,
) -> dict:
    """Upscale using Real-ESRGAN via Replicate."""
    import replicate

    with open(image_path, "rb") as f:
        output = replicate.run(
            "nightmareai/real-esrgan:f121d640bd286e1fdc67f9799164c1d5be36ff74576ee11c803ae5b665dd46aa",
            input={
                "image": f,
                "scale": scale,
                "face_enhance": True,
            },
        )

    # Download the upscaled image
    async with httpx.AsyncClient(timeout=60) as client:
        response = await client.get(str(output))
        image_data = response.content

    return self._save_image(image_data, f"Upscaled {scale}x", "replicate-upscale", 0)

Batch Generation

Batch generation creates multiple variations of the same prompt in parallel. This is valuable for exploring different possibilities from a single idea.

import asyncio

@router.post("/batch-generate")
async def batch_generate(
    prompt: str = Form(...),
    negative_prompt: str = Form(default=""),
    count: int = Form(default=4, ge=1, le=8),
    width: int = Form(default=1024),
    height: int = Form(default=1024),
    steps: int = Form(default=30),
    cfg_scale: float = Form(default=7.0),
):
    """Generate multiple variations of the same prompt."""
    tasks = []
    for i in range(count):
        # Each variation gets a different random seed
        task = image_service.generate_stability(
            prompt=prompt,
            negative_prompt=negative_prompt,
            width=width,
            height=height,
            steps=steps,
            cfg_scale=cfg_scale,
            seed=0,  # Random seed for each
        )
        tasks.append(task)

    # Run all generations concurrently
    results = await asyncio.gather(*tasks, return_exceptions=True)

    # Filter out errors and return successful results
    successful = []
    errors = []
    for result in results:
        if isinstance(result, Exception):
            errors.append(str(result))
        else:
            generated_images.append(result)
            successful.append(result)

    return {
        "images": successful,
        "total_requested": count,
        "total_generated": len(successful),
        "errors": errors,
    }
Watch your API costs with batch generation. Each image in a batch costs the same as a single generation. A batch of 8 images costs 8x. Add a confirmation step in the UI before allowing large batches, and consider implementing daily limits per user.

Frontend: Advanced Features Panel

Add tabs to the UI so users can switch between text-to-image, img2img, inpainting, and upscaling:

<!-- Add to templates/index.html, inside the prompt section -->
<div class="mode-tabs">
  <button class="mode-tab active" data-mode="txt2img">Text to Image</button>
  <button class="mode-tab" data-mode="img2img">Image to Image</button>
  <button class="mode-tab" data-mode="inpaint">Inpainting</button>
  <button class="mode-tab" data-mode="upscale">Upscale</button>
  <button class="mode-tab" data-mode="batch">Batch</button>
</div>

<!-- Image upload area (shown for img2img, inpaint, upscale) -->
<div id="uploadArea" class="upload-area hidden">
  <input type="file" id="imageUpload" accept="image/*">
  <label for="imageUpload" class="upload-label">
    Drop an image here or click to upload
  </label>
  <img id="uploadPreview" class="upload-preview hidden">
</div>

<!-- Canvas for inpainting mask -->
<div id="inpaintArea" class="inpaint-area hidden">
  <canvas id="maskCanvas"></canvas>
  <div class="brush-controls">
    <label>Brush Size:
      <input type="range" id="brushSize" min="5" max="100" value="30">
    </label>
    <button id="clearMask" class="btn-secondary">Clear Mask</button>
  </div>
</div>
// Mode tab switching logic
const modeTabs = document.querySelectorAll(".mode-tab");
const uploadArea = document.getElementById("uploadArea");
const inpaintArea = document.getElementById("inpaintArea");
let currentMode = "txt2img";

modeTabs.forEach((tab) => {
  tab.addEventListener("click", () => {
    modeTabs.forEach((t) => t.classList.remove("active"));
    tab.classList.add("active");
    currentMode = tab.dataset.mode;

    // Show/hide relevant sections
    const needsUpload = ["img2img", "inpaint", "upscale"].includes(currentMode);
    uploadArea.classList.toggle("hidden", !needsUpload);
    inpaintArea.classList.toggle("hidden", currentMode !== "inpaint");

    // Hide prompt for upscale mode (not needed)
    promptInput.closest(".prompt-input-group")
      .classList.toggle("hidden", currentMode === "upscale");
  });
});
📌
Checkpoint: Your app now supports five generation modes: text-to-image, image-to-image, inpainting, upscaling, and batch generation. Each mode has its own UI panel and backend endpoint. In the next lesson, you will containerize everything with Docker and deploy to production.