Step 4: Advanced Features
Take your image generator from a basic text-to-image tool to a production-grade creative suite. You will add image-to-image transformation, inpainting (editing parts of images), AI upscaling, and batch generation.
Image-to-Image Generation
Image-to-image (img2img) takes an existing image plus a prompt and generates a new image that follows the composition and structure of the original while applying the prompt's style and content changes. This is useful for style transfer, concept variations, and iterative refinement.
Backend: img2img Endpoint
Add this method to services/image_service.py:
async def img2img_stability(
self,
image_path: str,
prompt: str,
negative_prompt: str = "",
strength: float = 0.7,
steps: int = 30,
cfg_scale: float = 7.0,
seed: int = 0,
) -> dict:
"""Generate an image based on an existing image + prompt.
Args:
image_path: Path to the source image
prompt: Text description of desired output
strength: How much to change (0.0 = identical, 1.0 = ignore source)
"""
url = "https://api.stability.ai/v2beta/stable-image/generate/sd3"
headers = {
"Authorization": f"Bearer {self.stability_key}",
"Accept": "application/json",
}
# Read and encode the source image
with open(image_path, "rb") as f:
image_bytes = f.read()
import base64
image_b64 = base64.b64encode(image_bytes).decode("utf-8")
payload = {
"prompt": prompt,
"negative_prompt": negative_prompt,
"image": image_b64,
"strength": strength,
"steps": steps,
"cfg_scale": cfg_scale,
"output_format": "png",
}
if seed:
payload["seed"] = seed
async with httpx.AsyncClient(timeout=120) as client:
response = await client.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
image_data = base64.b64decode(data["image"])
return self._save_image(image_data, prompt, "stability-img2img", seed)
Add the route in routers/generate.py:
from fastapi import UploadFile, File, Form
import shutil
@router.post("/img2img")
async def img2img(
image: UploadFile = File(...),
prompt: str = Form(...),
negative_prompt: str = Form(default=""),
strength: float = Form(default=0.7),
steps: int = Form(default=30),
cfg_scale: float = Form(default=7.0),
seed: int = Form(default=0),
):
"""Generate a new image based on an uploaded image and prompt."""
# Save uploaded image temporarily
temp_path = IMAGES_DIR / f"temp_{image.filename}"
with open(temp_path, "wb") as f:
shutil.copyfileobj(image.file, f)
try:
result = await image_service.img2img_stability(
image_path=str(temp_path),
prompt=prompt,
negative_prompt=negative_prompt,
strength=strength,
steps=steps,
cfg_scale=cfg_scale,
seed=seed,
)
generated_images.append(result)
return result
finally:
temp_path.unlink(missing_ok=True)
Understanding the Strength Parameter
| Strength | Behavior | Use Case |
|---|---|---|
| 0.1 - 0.3 | Subtle changes, keeps most of the original | Color correction, minor style tweaks |
| 0.4 - 0.6 | Moderate changes, keeps composition | Style transfer, mood changes |
| 0.7 - 0.8 | Major changes, loosely follows original | Reimagining scenes, concept exploration |
| 0.9 - 1.0 | Almost completely new image | Using source only as rough layout guide |
Inpainting
Inpainting lets users edit specific regions of an image while keeping the rest unchanged. The user provides a mask (white = edit, black = keep) and a prompt describing what should appear in the masked area.
async def inpaint_stability(
self,
image_path: str,
mask_path: str,
prompt: str,
negative_prompt: str = "",
steps: int = 30,
cfg_scale: float = 7.0,
seed: int = 0,
) -> dict:
"""Edit a specific region of an image using inpainting.
Args:
image_path: Path to the original image
mask_path: Path to the mask (white = area to edit)
prompt: Description of what to generate in the masked area
"""
url = "https://api.stability.ai/v2beta/stable-image/edit/inpaint"
headers = {
"Authorization": f"Bearer {self.stability_key}",
"Accept": "application/json",
}
with open(image_path, "rb") as f:
image_b64 = base64.b64encode(f.read()).decode("utf-8")
with open(mask_path, "rb") as f:
mask_b64 = base64.b64encode(f.read()).decode("utf-8")
payload = {
"prompt": prompt,
"negative_prompt": negative_prompt,
"image": image_b64,
"mask": mask_b64,
"steps": steps,
"cfg_scale": cfg_scale,
"output_format": "png",
}
if seed:
payload["seed"] = seed
async with httpx.AsyncClient(timeout=120) as client:
response = await client.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
image_data = base64.b64decode(data["image"])
return self._save_image(image_data, prompt, "stability-inpaint", seed)
Creating a Canvas-Based Mask Editor
Add a simple mask painter to the frontend so users can draw masks directly on their images:
// Mask editor component for inpainting
class MaskEditor {
constructor(canvasId, imageUrl) {
this.canvas = document.getElementById(canvasId);
this.ctx = this.canvas.getContext("2d");
this.painting = false;
this.brushSize = 30;
// Load the image onto the canvas
this.image = new Image();
this.image.crossOrigin = "anonymous";
this.image.onload = () => {
this.canvas.width = this.image.width;
this.canvas.height = this.image.height;
this.ctx.drawImage(this.image, 0, 0);
};
this.image.src = imageUrl;
// Create a separate canvas for the mask
this.maskCanvas = document.createElement("canvas");
this.maskCtx = this.maskCanvas.getContext("2d");
this.setupEvents();
}
setupEvents() {
this.canvas.addEventListener("mousedown", (e) => {
this.painting = true;
this.paint(e);
});
this.canvas.addEventListener("mousemove", (e) => {
if (this.painting) this.paint(e);
});
this.canvas.addEventListener("mouseup", () => {
this.painting = false;
});
this.canvas.addEventListener("mouseleave", () => {
this.painting = false;
});
// Touch support
this.canvas.addEventListener("touchstart", (e) => {
e.preventDefault();
this.painting = true;
this.paintTouch(e);
});
this.canvas.addEventListener("touchmove", (e) => {
e.preventDefault();
if (this.painting) this.paintTouch(e);
});
this.canvas.addEventListener("touchend", () => {
this.painting = false;
});
}
paint(e) {
const rect = this.canvas.getBoundingClientRect();
const scaleX = this.canvas.width / rect.width;
const scaleY = this.canvas.height / rect.height;
const x = (e.clientX - rect.left) * scaleX;
const y = (e.clientY - rect.top) * scaleY;
// Draw semi-transparent red on the visible canvas
this.ctx.fillStyle = "rgba(255, 0, 0, 0.4)";
this.ctx.beginPath();
this.ctx.arc(x, y, this.brushSize, 0, Math.PI * 2);
this.ctx.fill();
// Draw white on the mask canvas
this.maskCtx.fillStyle = "white";
this.maskCtx.beginPath();
this.maskCtx.arc(x, y, this.brushSize, 0, Math.PI * 2);
this.maskCtx.fill();
}
paintTouch(e) {
const touch = e.touches[0];
this.paint({
clientX: touch.clientX,
clientY: touch.clientY,
});
}
getMaskBlob() {
// Initialize mask canvas with black background
this.maskCanvas.width = this.canvas.width;
this.maskCanvas.height = this.canvas.height;
this.maskCtx.fillStyle = "black";
this.maskCtx.fillRect(0, 0, this.maskCanvas.width, this.maskCanvas.height);
return new Promise((resolve) => {
this.maskCanvas.toBlob(resolve, "image/png");
});
}
reset() {
this.ctx.drawImage(this.image, 0, 0);
this.maskCtx.clearRect(
0, 0, this.maskCanvas.width, this.maskCanvas.height
);
}
}
Image Upscaling
Upscaling uses AI to increase image resolution while adding realistic detail. This is essential when users want to print or use images at larger sizes.
async def upscale_stability(
self,
image_path: str,
scale: int = 2,
) -> dict:
"""Upscale an image using AI-powered super resolution.
Args:
image_path: Path to the image to upscale
scale: Upscale factor (2 or 4)
"""
url = "https://api.stability.ai/v2beta/stable-image/upscale/fast"
headers = {
"Authorization": f"Bearer {self.stability_key}",
"Accept": "application/json",
}
with open(image_path, "rb") as f:
image_b64 = base64.b64encode(f.read()).decode("utf-8")
payload = {
"image": image_b64,
"output_format": "png",
}
async with httpx.AsyncClient(timeout=120) as client:
response = await client.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
image_data = base64.b64decode(data["image"])
return self._save_image(image_data, f"Upscaled {scale}x", "stability-upscale", 0)
Alternative: Use Replicate with a Real-ESRGAN model for free upscaling:
async def upscale_replicate(
self,
image_path: str,
scale: int = 4,
) -> dict:
"""Upscale using Real-ESRGAN via Replicate."""
import replicate
with open(image_path, "rb") as f:
output = replicate.run(
"nightmareai/real-esrgan:f121d640bd286e1fdc67f9799164c1d5be36ff74576ee11c803ae5b665dd46aa",
input={
"image": f,
"scale": scale,
"face_enhance": True,
},
)
# Download the upscaled image
async with httpx.AsyncClient(timeout=60) as client:
response = await client.get(str(output))
image_data = response.content
return self._save_image(image_data, f"Upscaled {scale}x", "replicate-upscale", 0)
Batch Generation
Batch generation creates multiple variations of the same prompt in parallel. This is valuable for exploring different possibilities from a single idea.
import asyncio
@router.post("/batch-generate")
async def batch_generate(
prompt: str = Form(...),
negative_prompt: str = Form(default=""),
count: int = Form(default=4, ge=1, le=8),
width: int = Form(default=1024),
height: int = Form(default=1024),
steps: int = Form(default=30),
cfg_scale: float = Form(default=7.0),
):
"""Generate multiple variations of the same prompt."""
tasks = []
for i in range(count):
# Each variation gets a different random seed
task = image_service.generate_stability(
prompt=prompt,
negative_prompt=negative_prompt,
width=width,
height=height,
steps=steps,
cfg_scale=cfg_scale,
seed=0, # Random seed for each
)
tasks.append(task)
# Run all generations concurrently
results = await asyncio.gather(*tasks, return_exceptions=True)
# Filter out errors and return successful results
successful = []
errors = []
for result in results:
if isinstance(result, Exception):
errors.append(str(result))
else:
generated_images.append(result)
successful.append(result)
return {
"images": successful,
"total_requested": count,
"total_generated": len(successful),
"errors": errors,
}
Frontend: Advanced Features Panel
Add tabs to the UI so users can switch between text-to-image, img2img, inpainting, and upscaling:
<!-- Add to templates/index.html, inside the prompt section -->
<div class="mode-tabs">
<button class="mode-tab active" data-mode="txt2img">Text to Image</button>
<button class="mode-tab" data-mode="img2img">Image to Image</button>
<button class="mode-tab" data-mode="inpaint">Inpainting</button>
<button class="mode-tab" data-mode="upscale">Upscale</button>
<button class="mode-tab" data-mode="batch">Batch</button>
</div>
<!-- Image upload area (shown for img2img, inpaint, upscale) -->
<div id="uploadArea" class="upload-area hidden">
<input type="file" id="imageUpload" accept="image/*">
<label for="imageUpload" class="upload-label">
Drop an image here or click to upload
</label>
<img id="uploadPreview" class="upload-preview hidden">
</div>
<!-- Canvas for inpainting mask -->
<div id="inpaintArea" class="inpaint-area hidden">
<canvas id="maskCanvas"></canvas>
<div class="brush-controls">
<label>Brush Size:
<input type="range" id="brushSize" min="5" max="100" value="30">
</label>
<button id="clearMask" class="btn-secondary">Clear Mask</button>
</div>
</div>
// Mode tab switching logic
const modeTabs = document.querySelectorAll(".mode-tab");
const uploadArea = document.getElementById("uploadArea");
const inpaintArea = document.getElementById("inpaintArea");
let currentMode = "txt2img";
modeTabs.forEach((tab) => {
tab.addEventListener("click", () => {
modeTabs.forEach((t) => t.classList.remove("active"));
tab.classList.add("active");
currentMode = tab.dataset.mode;
// Show/hide relevant sections
const needsUpload = ["img2img", "inpaint", "upscale"].includes(currentMode);
uploadArea.classList.toggle("hidden", !needsUpload);
inpaintArea.classList.toggle("hidden", currentMode !== "inpaint");
// Hide prompt for upscale mode (not needed)
promptInput.closest(".prompt-input-group")
.classList.toggle("hidden", currentMode === "upscale");
});
});
Lilly Tech Systems