View file src/colab/stable_diffusion_jabberwocky_32steps_new.py - Download

# -*- coding: utf-8 -*-
"""stable_diffusion_jabberwocky_32steps_new.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1ikpFitFx0kgHWxBs7Jh92AM03p3ZhmiE

Image generation with PyTorch and Stable Diffusion

Source : https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/stable_diffusion.ipynb
"""

# Stable Diffusion
# Source : https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/stable_diffusion.ipynb

print("Install packages")

!pip install diffusers==0.11.1
!pip install transformers scipy ftfy accelerate


print("Imports")

import torch
torch_device = "cuda" if torch.cuda.is_available() else "cpu"

repo_id = "google/ddpm-church-256"

from transformers import CLIPTextModel, CLIPTokenizer
from diffusers import AutoencoderKL, UNet2DModel, UNet2DConditionModel, PNDMScheduler

# 1. Load the autoencoder model which will be used to decode the latents into image space.
vae = AutoencoderKL.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="vae")

# 2. Load the tokenizer and text encoder to tokenize and encode the text.
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")

# 3. The UNet model for generating the latents.
unet = UNet2DConditionModel.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="unet")

# model = UNet2DModel.from_pretrained(repo_id)

from diffusers import LMSDiscreteScheduler

scheduler = LMSDiscreteScheduler.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="scheduler")


from diffusers import DDPMScheduler
# scheduler = DDPMScheduler.from_config(repo_id)


vae = vae.to(torch_device)
text_encoder = text_encoder.to(torch_device)
unet = unet.to(torch_device)

# Parameters
# Prompt (description of the image to generate)
# prompt = ["a photograph of an astronaut riding a horse"]
# prompt = ["hyper realistic photo of a cute and sweet young Asian woman with straight wet auburn hair and bangs, wearing white open windbreaker and red nylon shorts, outdoors under the rain, water drop, show breasts, show tits, show pussy, covered nipples, open mouth. The woman has long straight, wet hair with golden locks, small breasts, small chest and a slim waist. Plenty of cum drips from the woman's mouth and falls onto her chest. Her red nylon shorts are glossy and slightly see-through. Use stable diffusion techniques to capture the subtle details of her cute wet face covered with cum, and her wet shiny body, and the raindrops on her clothes and skin, to create a cute and sexy image that feels vivid and alive. 8k, highres."]
# prompt = ["medium shot of sweet and cute asian girl firstnude woman, straight auburn hair with bangs, wearing crop open white glossy windbreaker and glossy red nylon shorts, ultra realistic"]
# prompt = ["photo of a cute and sweet young Asian woman with straight wet auburn hair and bangs, wearing red nylon shorts and bra, outdoors under the rain"]
prompt = ["Twas brillig, and the slithy toves did gyre and gimble in the wabe, all mimsy were the borogoves and the mome raths outgrabe"]
# prompt = ["a photograph of the Jabberwock"]
# prompt = ["As in uffish thought he stood, the Jabberwock with eyes of flame came whiffling through the tulgey wood and burbled as it came."]
# prompt = ["Hwæt! wē Gār-Dena in geār-dagum þēod-cyninga þrym gefrūnon, hū þā æðelingas ellen fremedon."]
# prompt = ["Un jardin tropical"]
# prompt = ["Le soir venu quand tu t'endors Ils viennent sans bruit Dans leurs manteaux de pourpre et d'or Autour de ton lit. Ils viennent changer le décor Le temps d'une nuit Ils resteront jusqu'à l'aurore Seigneurs de la nuit. On voit briller sur leurs épaules Des oiseaux de nuit Au bout de leurs mains qui nous frolent Un vent de folie. Sur fond de brume vaporeuse Ils aiment donner D'étranges fêtes silencieuses Tendres et parfumées. Laisse entrer les seigneurs de la nuit Laisse entrer les seigneurs de la nuit. Des femmes fleurs aux yeux très doux Qui les accompagnent Illuminent la nuit du bout De leurs mains diaphanes. Elles ont trouvé les mots sucrés Tous les mots magiques Et la jeunesse et la beauté Jamais ne les quittent. Laisse entrer les seigneurs de la nuit. Laisse venir les magiciennes Autour de ton lit Déposer l'or et le satin Qui manquent à ta vie. Laisse entrer les seigneurs de la nuit."]
# prompt = ["In the evening when you fall asleep They come noiselessly In their purple and gold coats Around your bed. They come to change the scenery For one night They will stay until dawn Lords of the night. We see shining on their shoulders Night birds At the end of their hands which brush against us A wind of madness. On a background of vaporous mist They like to give Strange silent celebrations Tender and fragrant. Let the night lords in. Let the night lords in. Flower women with very gentle eyes Who accompany them Illuminate the night with the tips of Their diaphanous hands. They found the sweet words All the magic words And youth and beauty Never leave them. Let the lords of the night in. Let the sorceresses come Around your bed Lay down the gold and satin That your life lacks. Let the lords of the night in."]
# prompt = ["Mon enfant, ma sœur, Songe à la douceur D’aller là-bas vivre ensemble ! Aimer à loisir, Aimer et mourir Au pays qui te ressemble ! Les soleils mouillés De ces ciels brouillés Pour mon esprit ont les charmes Si mystérieux De tes traîtres yeux, Brillant à travers leurs larmes. Là, tout n’est qu’ordre et beauté, Luxe, calme et volupté. Des meubles luisants, Polis par les ans, Décoreraient notre chambre ; Les plus rares fleurs Mêlant leurs odeurs Aux vagues senteurs de l’ambre, Les riches plafonds, Les miroirs profonds, La splendeur orientale, Tout y parlerait À l’âme en secret Sa douce langue natale. Là, tout n’est qu’ordre et beauté, Luxe, calme et volupté. Vois sur ces canaux Dormir ces vaisseaux Dont l’humeur est vagabonde ; C’est pour assouvir Ton moindre désir Qu’ils viennent du bout du monde. – Les soleils couchants Revêtent les champs, Les canaux, la ville entière, D’hyacinthe et d’or ; Le monde s’endort Dans une chaude lumière. Là, tout n’est qu’ordre et beauté, Luxe, calme et volupté."]
# prompt = ["My child, my sister, Think of the sweetness Of going there to live together! Loving at leisure, Loving and dying In the country that resembles you! The wet suns Of these blurred skies For my mind have the charms So mysterious Of your treacherous eyes, Shining through their tears. There, all is order and beauty, Luxury, calm and voluptuousness. Gleaming furniture, Polished by the years, Would decorate our room; The rarest flowers Mixing their odors With the vague scents of amber, The rich ceilings, The deep mirrors, The oriental splendor, Everything would speak there To the soul in secret Its sweet native language. There, all is order and beauty, Luxury, calm and voluptuousness. See on these canals Sleeping these vessels Whose mood is wandering; It is to satisfy Your slightest desire That they come from the end of the world. – The setting suns Dress the fields, The canals, the whole city, With hyacinth and gold; The world falls asleep In a warm light. There, all is order and beauty, Luxury, calm and voluptuousness."]
# prompt = ["A hen brooding her eggs"]
# prompt = ["a chicken egg hatches"]
# prompt = ["a flower"]
# prompt = ["a garden"]


height = 512                       # default height of Stable Diffusion = 512
width = 512                        # default width of Stable Diffusion = 512

num_inference_steps = 32            # Number of denoising steps

guidance_scale = 7.5                # Scale for classifier-free guidance

batch_size = 1


text_input = tokenizer(prompt, padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")

with torch.no_grad():
  text_embeddings = text_encoder(text_input.input_ids.to(torch_device))[0]


max_length = text_input.input_ids.shape[-1]
uncond_input = tokenizer(
    [""] * batch_size, padding="max_length", max_length=max_length, return_tensors="pt"
)
with torch.no_grad():
  uncond_embeddings = text_encoder(uncond_input.input_ids.to(torch_device))[0]


text_embeddings = torch.cat([uncond_embeddings, text_embeddings])




from tqdm.auto import tqdm
from torch import autocast

import PIL.Image
import numpy as np

def display_sample_1(sample, i):
    image_processed = sample.cpu().permute(0, 2, 3, 1)
    image_processed = (image_processed + 1.0) * 127.5
    image_processed = image_processed.numpy().astype(np.uint8)

    image_pil = PIL.Image.fromarray(image_processed[0])
    display(f"Image at step {i}")
    display(image_pil)

from PIL import Image

def display_sample(sample, i):

    # scale and decode the image latents with vae
    sample = 1 / 0.18215 * sample

    with torch.no_grad():
        image = vae.decode(sample).sample

    image = (image / 2 + 0.5).clamp(0, 1)

    image = image.detach().cpu().permute(0, 2, 3, 1).numpy()
    images = (image * 255).round().astype("uint8")
    pil_images = [Image.fromarray(image) for image in images]
    # pil_images[0].save("astronaut_riding_horse.png")
    display(f"Image at step {i}")
    display(pil_images[0])

for seed in range(12):

  generator = torch.manual_seed(seed)   # Seed generator to create the inital latent noise

  latents = torch.randn(
    (batch_size, unet.in_channels, height // 8, width // 8),
    generator=generator,
  )
  latents = latents.to(torch_device)

  print(latents.shape)

  scheduler.set_timesteps(num_inference_steps)

  latents = latents * scheduler.init_noise_sigma


  print("Denoising loop")

  step = 0
  for t in tqdm(scheduler.timesteps):
    step += 1
    # expand the latents if we are doing classifier-free guidance to avoid doing two forward passes.
    latent_model_input = torch.cat([latents] * 2)

    latent_model_input = scheduler.scale_model_input(latent_model_input, t)

    # predict the noise residual
    with torch.no_grad():
      noise_pred = unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample

    # perform guidance
    noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
    noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)

    # compute the previous noisy sample x_t -> x_t-1
    latents = scheduler.step(noise_pred, t, latents).prev_sample

    # display_sample(latents, step)

  print ("Scale and decode")

  # scale and decode the image latents with vae
  latents = 1 / 0.18215 * latents

  with torch.no_grad():
    image = vae.decode(latents).sample


  # from PIL import Image

  image = (image / 2 + 0.5).clamp(0, 1)
  image = image.detach().cpu().permute(0, 2, 3, 1).numpy()
  images = (image * 255).round().astype("uint8")
  pil_images = [Image.fromarray(image) for image in images]
  # pil_images[0].save("astronaut_riding_horse.png")
  display(f"Result with seed {seed} :")
  display(pil_images[0])