by optimum-intel-internal-testing
Open source · 115k downloads · 0 likes
The *Stable Diffusion 3 Tiny Random* model is a lightweight and simplified version of Stable Diffusion 3, primarily designed for debugging and testing purposes. Unlike its parent model, it employs random parameters and a reduced architecture, making it less powerful but more efficient and faster to run. Its capabilities are limited to generating images from text descriptions, though the quality and consistency of the results are lower than those of more refined versions. It is particularly suitable for developers who wish to experiment or validate features without consuming significant resources. This model stands out for its lightness and simplicity, though at the expense of precision and the refinement of the generated images.
This pipeline is intended for debugging. It is adapted from stabilityai/stable-diffusion-3-medium-diffusers with smaller size and randomly initialized parameters.
import torch
from diffusers import StableDiffusion3Pipeline
pipe = StableDiffusion3Pipeline.from_pretrained("yujiepan/stable-diffusion-3-tiny-random", torch_dtype=torch.float16)
pipe = pipe.to("cuda")
image = pipe(
"A cat holding a sign that says hello world",
negative_prompt="",
num_inference_steps=2,
guidance_scale=7.0,
).images[0]
image
import importlib
import torch
import transformers
import diffusers
import rich
def get_original_model_configs(pipeline_cls: type[diffusers.DiffusionPipeline], pipeline_id: str):
pipeline_config: dict[str, list[str]] = pipeline_cls.load_config(pipeline_id)
model_configs = {}
for subfolder, import_strings in pipeline_config.items():
if subfolder.startswith("_"):
continue
module = importlib.import_module(".".join(import_strings[:-1]))
cls = getattr(module, import_strings[-1])
if issubclass(cls, transformers.PreTrainedModel):
config_class: transformers.PretrainedConfig = cls.config_class
config = config_class.from_pretrained(pipeline_id, subfolder=subfolder)
model_configs[subfolder] = config
elif issubclass(cls, diffusers.ModelMixin) and issubclass(cls, diffusers.ConfigMixin):
config = cls.load_config(pipeline_id, subfolder=subfolder)
model_configs[subfolder] = config
return model_configs
def load_pipeline(pipeline_cls: type[diffusers.DiffusionPipeline], pipeline_id: str, model_configs: dict[str, dict]):
pipeline_config: dict[str, list[str]] = pipeline_cls.load_config(pipeline_id)
components = {}
for subfolder, import_strings in pipeline_config.items():
if subfolder.startswith("_"):
continue
module = importlib.import_module(".".join(import_strings[:-1]))
cls = getattr(module, import_strings[-1])
print(f"Loading:", ".".join(import_strings))
if issubclass(cls, transformers.PreTrainedModel):
config = model_configs[subfolder]
component = cls(config)
elif issubclass(cls, transformers.PreTrainedTokenizerBase):
component = cls.from_pretrained(pipeline_id, subfolder=subfolder)
elif issubclass(cls, diffusers.ModelMixin) and issubclass(cls, diffusers.ConfigMixin):
config = model_configs[subfolder]
component = cls.from_config(config)
elif issubclass(cls, diffusers.SchedulerMixin) and issubclass(cls, diffusers.ConfigMixin):
component = cls.from_pretrained(pipeline_id, subfolder=subfolder)
else:
raise (f"unknown {subfolder}: {import_strings}")
components[subfolder] = component
pipeline = pipeline_cls(**components)
return pipeline
def get_pipeline():
torch.manual_seed(42)
pipeline_id = "stabilityai/stable-diffusion-3-medium-diffusers"
pipeline_cls = diffusers.StableDiffusion3Pipeline
model_configs = get_original_model_configs(pipeline_cls, pipeline_id)
rich.print(model_configs)
HIDDEN_SIZE = 8
model_configs["text_encoder"].hidden_size = HIDDEN_SIZE
model_configs["text_encoder"].intermediate_size = HIDDEN_SIZE * 2
model_configs["text_encoder"].num_attention_heads = 2
model_configs["text_encoder"].num_hidden_layers = 2
model_configs["text_encoder"].projection_dim = HIDDEN_SIZE
model_configs["text_encoder_2"].hidden_size = HIDDEN_SIZE
model_configs["text_encoder_2"].intermediate_size = HIDDEN_SIZE * 2
model_configs["text_encoder_2"].num_attention_heads = 2
model_configs["text_encoder_2"].num_hidden_layers = 2
model_configs["text_encoder_2"].projection_dim = HIDDEN_SIZE
model_configs["text_encoder_3"].d_model = HIDDEN_SIZE
model_configs["text_encoder_3"].d_ff = HIDDEN_SIZE * 2
model_configs["text_encoder_3"].d_kv = HIDDEN_SIZE // 2
model_configs["text_encoder_3"].num_heads = 2
model_configs["text_encoder_3"].num_layers = 2
model_configs["transformer"]["num_layers"] = 2
model_configs["transformer"]["num_attention_heads"] = 2
model_configs["transformer"]["attention_head_dim"] = HIDDEN_SIZE // 2
model_configs["transformer"]["pooled_projection_dim"] = HIDDEN_SIZE * 2
model_configs["transformer"]["joint_attention_dim"] = HIDDEN_SIZE
model_configs["transformer"]["caption_projection_dim"] = HIDDEN_SIZE
model_configs["vae"]["layers_per_block"] = 1
model_configs["vae"]["block_out_channels"] = [HIDDEN_SIZE] * 4
model_configs["vae"]["norm_num_groups"] = 2
model_configs["vae"]["latent_channels"] = 16
pipeline = load_pipeline(pipeline_cls, pipeline_id, model_configs)
return pipeline
pipeline = get_pipeline()
image = pipeline(
"hello world",
negative_prompt="runtime error",
num_inference_steps=2,
guidance_scale=7.0,
).images[0]
pipeline = pipeline.to(torch.float16)
pipeline.save_pretrained("/tmp/stable-diffusion-3-tiny-random")
pipeline.push_to_hub("yujiepan/stable-diffusion-3-tiny-random")