par optimum-intel-internal-testing
Open source · 111k downloads · 0 likes
Le modèle *Stable Diffusion 3 Tiny Random* est une version allégée et simplifiée de Stable Diffusion 3, conçue principalement à des fins de débogage et de tests. Contrairement à son modèle parent, il utilise des paramètres aléatoires et une architecture réduite, ce qui le rend moins performant mais plus léger et rapide à exécuter. Ses capacités se limitent à générer des images à partir de descriptions textuelles, bien que la qualité et la cohérence des résultats soient inférieures à celles des versions plus abouties. Il convient particulièrement aux développeurs qui souhaitent expérimenter ou valider des fonctionnalités sans mobiliser des ressources importantes. Ce modèle se distingue par sa légèreté et sa simplicité, au détriment de la précision et de la finesse des images produites.
This pipeline is intended for debugging. It is adapted from stabilityai/stable-diffusion-3-medium-diffusers with smaller size and randomly initialized parameters.
import torch
from diffusers import StableDiffusion3Pipeline
pipe = StableDiffusion3Pipeline.from_pretrained("yujiepan/stable-diffusion-3-tiny-random", torch_dtype=torch.float16)
pipe = pipe.to("cuda")
image = pipe(
"A cat holding a sign that says hello world",
negative_prompt="",
num_inference_steps=2,
guidance_scale=7.0,
).images[0]
image
import importlib
import torch
import transformers
import diffusers
import rich
def get_original_model_configs(pipeline_cls: type[diffusers.DiffusionPipeline], pipeline_id: str):
pipeline_config: dict[str, list[str]] = pipeline_cls.load_config(pipeline_id)
model_configs = {}
for subfolder, import_strings in pipeline_config.items():
if subfolder.startswith("_"):
continue
module = importlib.import_module(".".join(import_strings[:-1]))
cls = getattr(module, import_strings[-1])
if issubclass(cls, transformers.PreTrainedModel):
config_class: transformers.PretrainedConfig = cls.config_class
config = config_class.from_pretrained(pipeline_id, subfolder=subfolder)
model_configs[subfolder] = config
elif issubclass(cls, diffusers.ModelMixin) and issubclass(cls, diffusers.ConfigMixin):
config = cls.load_config(pipeline_id, subfolder=subfolder)
model_configs[subfolder] = config
return model_configs
def load_pipeline(pipeline_cls: type[diffusers.DiffusionPipeline], pipeline_id: str, model_configs: dict[str, dict]):
pipeline_config: dict[str, list[str]] = pipeline_cls.load_config(pipeline_id)
components = {}
for subfolder, import_strings in pipeline_config.items():
if subfolder.startswith("_"):
continue
module = importlib.import_module(".".join(import_strings[:-1]))
cls = getattr(module, import_strings[-1])
print(f"Loading:", ".".join(import_strings))
if issubclass(cls, transformers.PreTrainedModel):
config = model_configs[subfolder]
component = cls(config)
elif issubclass(cls, transformers.PreTrainedTokenizerBase):
component = cls.from_pretrained(pipeline_id, subfolder=subfolder)
elif issubclass(cls, diffusers.ModelMixin) and issubclass(cls, diffusers.ConfigMixin):
config = model_configs[subfolder]
component = cls.from_config(config)
elif issubclass(cls, diffusers.SchedulerMixin) and issubclass(cls, diffusers.ConfigMixin):
component = cls.from_pretrained(pipeline_id, subfolder=subfolder)
else:
raise (f"unknown {subfolder}: {import_strings}")
components[subfolder] = component
pipeline = pipeline_cls(**components)
return pipeline
def get_pipeline():
torch.manual_seed(42)
pipeline_id = "stabilityai/stable-diffusion-3-medium-diffusers"
pipeline_cls = diffusers.StableDiffusion3Pipeline
model_configs = get_original_model_configs(pipeline_cls, pipeline_id)
rich.print(model_configs)
HIDDEN_SIZE = 8
model_configs["text_encoder"].hidden_size = HIDDEN_SIZE
model_configs["text_encoder"].intermediate_size = HIDDEN_SIZE * 2
model_configs["text_encoder"].num_attention_heads = 2
model_configs["text_encoder"].num_hidden_layers = 2
model_configs["text_encoder"].projection_dim = HIDDEN_SIZE
model_configs["text_encoder_2"].hidden_size = HIDDEN_SIZE
model_configs["text_encoder_2"].intermediate_size = HIDDEN_SIZE * 2
model_configs["text_encoder_2"].num_attention_heads = 2
model_configs["text_encoder_2"].num_hidden_layers = 2
model_configs["text_encoder_2"].projection_dim = HIDDEN_SIZE
model_configs["text_encoder_3"].d_model = HIDDEN_SIZE
model_configs["text_encoder_3"].d_ff = HIDDEN_SIZE * 2
model_configs["text_encoder_3"].d_kv = HIDDEN_SIZE // 2
model_configs["text_encoder_3"].num_heads = 2
model_configs["text_encoder_3"].num_layers = 2
model_configs["transformer"]["num_layers"] = 2
model_configs["transformer"]["num_attention_heads"] = 2
model_configs["transformer"]["attention_head_dim"] = HIDDEN_SIZE // 2
model_configs["transformer"]["pooled_projection_dim"] = HIDDEN_SIZE * 2
model_configs["transformer"]["joint_attention_dim"] = HIDDEN_SIZE
model_configs["transformer"]["caption_projection_dim"] = HIDDEN_SIZE
model_configs["vae"]["layers_per_block"] = 1
model_configs["vae"]["block_out_channels"] = [HIDDEN_SIZE] * 4
model_configs["vae"]["norm_num_groups"] = 2
model_configs["vae"]["latent_channels"] = 16
pipeline = load_pipeline(pipeline_cls, pipeline_id, model_configs)
return pipeline
pipeline = get_pipeline()
image = pipeline(
"hello world",
negative_prompt="runtime error",
num_inference_steps=2,
guidance_scale=7.0,
).images[0]
pipeline = pipeline.to(torch.float16)
pipeline.save_pretrained("/tmp/stable-diffusion-3-tiny-random")
pipeline.push_to_hub("yujiepan/stable-diffusion-3-tiny-random")