by xinsir
Open source · 11k downloads · 275 likes
ControlNet Tile SDXL 1.0 is an AI model specialized in enhancing and transforming images derived from SDXL. It excels in three key areas: denoising and restoring details in blurry images, generating stylistic variations similar to MidJourney, and increasing resolution (super-resolution) with upscaling up to three times while preserving the original proportions. This model stands out for its versatility, capable of processing images in any format or aspect ratio and optimizing them for diverse applications, such as restoring old photos, creating artwork, or enhancing visuals for professional projects. Its "tile-based" approach ensures consistent quality across the entire image, even when significantly enlarged.











support any aspect ratio and any times upscale, followings are 3 * 3 times




code reference: https://huggingface.co/TTPlanet/TTPLanet_SDXL_Controlnet_Tile_Realistic/blob/main/TTP_tile_preprocessor_v5.py
https://github.com/lllyasviel/ControlNet-v1-1-nightly/blob/main/gradio_tile.py
from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
from PIL import Image
from guided_filter import FastGuidedFilter # I have upload this file in this repo
import torch
import numpy as np
import cv2
def resize_image_control(control_image, resolution):
HH, WW, _ = control_image.shape
crop_h = random.randint(0, HH - resolution[1])
crop_w = random.randint(0, WW - resolution[0])
crop_image = control_image[crop_h:crop_h+resolution[1], crop_w:crop_w+resolution[0], :]
return crop_image, crop_w, crop_h
def apply_gaussian_blur(image_np, ksize=5, sigmaX=1.0):
if ksize % 2 == 0:
ksize += 1 # ksize must be odd
blurred_image = cv2.GaussianBlur(image_np, (ksize, ksize), sigmaX=sigmaX)
return blurred_image
def apply_guided_filter(image_np, radius, eps, scale):
filter = FastGuidedFilter(image_np, radius, eps, scale)
return filter.filter(image_np)
controlnet_conditioning_scale = 1.0
prompt = "your prompt, the longer the better, you can describe it as detail as possible"
negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
controlnet = ControlNetModel.from_pretrained(
"xinsir/controlnet-tile-sdxl-1.0",
torch_dtype=torch.float16
)
# when test with other base model, you need to change the vae also.
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
controlnet=controlnet,
vae=vae,
safety_checker=None,
torch_dtype=torch.float16,
scheduler=eulera_scheduler,
)
controlnet_img = cv2.imread("your original image path")
height, width, _ = controlnet_img.shape
ratio = np.sqrt(1024. * 1024. / (width * height))
W, H = int(width * ratio), int(height * ratio)
crop_w, crop_h = 0, 0
controlnet_img = cv2.resize(controlnet_img, (W, H))
blur_strength = random.sample([i / 10. for i in range(10, 201, 2)], k=1)[0]
radius = random.sample([i for i in range(1, 40, 2)], k=1)[0]
eps = random.sample([i / 1000. for i in range(1, 101, 2)], k=1)[0]
scale_factor = random.sample([i / 10. for i in range(10, 181, 5)], k=1)[0]
if random.random() > 0.5:
controlnet_img = apply_gaussian_blur(controlnet_img, ksize=int(blur_strength), sigmaX=blur_strength / 2)
if random.random() > 0.5:
# Apply Guided Filter
controlnet_img = apply_guided_filter(controlnet_img, radius, eps, scale_factor)
# Resize image
controlnet_img = cv2.resize(controlnet_img, (int(W / scale_factor), int(H / scale_factor)), interpolation=cv2.INTER_AREA)
controlnet_img = cv2.resize(controlnet_img, (W, H), interpolation=cv2.INTER_CUBIC)
controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB)
controlnet_img = Image.fromarray(controlnet_img)
# need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance
images = pipe(
prompt,
negative_prompt=negative_prompt,
image=controlnet_img,
controlnet_conditioning_scale=controlnet_conditioning_scale,
width=new_width,
height=new_height,
num_inference_steps=30,
).images
images[0].save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")
Use more detail prompt to regerate can help!
from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
from PIL import Image
import torch
import numpy as np
import cv2
controlnet_conditioning_scale = 1.0
prompt = "your prompt, the longer the better, you can describe it as detail as possible"
negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
controlnet = ControlNetModel.from_pretrained(
"xinsir/controlnet-tile-sdxl-1.0",
torch_dtype=torch.float16
)
# when test with other base model, you need to change the vae also.
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
controlnet=controlnet,
vae=vae,
safety_checker=None,
torch_dtype=torch.float16,
scheduler=eulera_scheduler,
)
controlnet_img = cv2.imread("your original image path")
height, width, _ = controlnet_img.shape
ratio = np.sqrt(1024. * 1024. / (width * height))
W, H = int(width * ratio), int(height * ratio)
crop_w, crop_h = 0, 0
controlnet_img = cv2.resize(controlnet_img, (W, H))
controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB)
controlnet_img = Image.fromarray(controlnet_img)
# need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance
images = pipe(
prompt,
negative_prompt=negative_prompt,
image=controlnet_img,
controlnet_conditioning_scale=controlnet_conditioning_scale,
width=new_width,
height=new_height,
num_inference_steps=30,
).images
images[0].save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")
performance may unstable and next version is optimizing!
from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
from PIL import Image
import torch
import numpy as np
import cv2
controlnet_conditioning_scale = 1.0
prompt = "your prompt, the longer the better, you can describe it as detail as possible"
negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
controlnet = ControlNetModel.from_pretrained(
"xinsir/controlnet-tile-sdxl-1.0",
torch_dtype=torch.float16
)
# when test with other base model, you need to change the vae also.
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
controlnet=controlnet,
vae=vae,
safety_checker=None,
torch_dtype=torch.float16,
scheduler=eulera_scheduler,
)
controlnet_img = cv2.imread("your original image path")
height, width, _ = controlnet_img.shape
ratio = np.sqrt(1024. * 1024. / (width * height))
W, H = int(width * ratio) // 48 * 48, int(height * ratio) // 48 * 48
controlnet_img = cv2.resize(controlnet_img, (W, H))
controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB)
controlnet_img = Image.fromarray(controlnet_img)
# need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance
target_width = W // 3
target_height = H // 3
for i in range(3): # 两行
for j in range(3): # 两列
left = j * target_width
top = i * target_height
right = left + target_width
bottom = top + target_height
# 根据计算的边界裁剪图像
cropped_image = controlnet_img.crop((left, top, right, bottom))
cropped_image = cropped_image.resize((W, H))
images.append(cropped_image)
seed = random.randint(0, 2147483647)
generator = torch.Generator('cuda').manual_seed(seed)
result_images = []
for sub_img in images:
new_width, new_height = W, H
out = pipe(prompt=[prompt]*1,
image=sub_img,
control_image=sub_img,
negative_prompt=[negative_prompt]*1,
generator=generator,
width=new_width,
height=new_height,
num_inference_steps=30,
crops_coords_top_left=(W, H),
target_size=(W, H),
original_size=(W * 2, H * 2),
)
result_images.append(out.images[0])
new_im = Image.new('RGB', (new_width*3, new_height*3))
# 拼接图片到新的图像上
new_im.paste(result_images[0], (0, 0))
new_im.paste(result_images[1], (new_width, 0))
new_im.paste(result_images[2], (new_width * 2, 0))
new_im.paste(result_images[3], (0, new_height))
new_im.paste(result_images[4], (new_width, new_height))
new_im.paste(result_images[5], (new_width * 2, new_height))
new_im.paste(result_images[6], (0, new_height * 2))
new_im.paste(result_images[7], (new_width, new_height * 2))
new_im.paste(result_images[8], (new_width * 2, new_height * 2))
new_im.save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")