Stable Diffusion | Blog for Cold9's Documentation

Stable Diffusion Documentation

Edit me

Note: This page is under construction.

Latent Diffusion

Installation & Running

$ git clone https://github.com/CompVis/latent-diffusion.git
$ conda env create -f environment.yaml
$ conda activate ldm

Retrieval Augmented Diffusion Models

$ pip3 install transformers==4.19.2 scann kornia==0.6.4 torchmetrics==0.6.0
$ pip3 install git+https://github.com/arogozhnikov/einops.git
  
$ mkdir -p models/rdm/rdm768x768 && wget -O models/rdm/rdm768x768/model.ckpt https://ommer-lab.com/files/rdm/model.ckpt
  
# RDM with text-prompt only
$ python3 scripts/knn2img.py  --prompt "a happy bear reading a newspaper, oil on canvas"
  
# RDM with text-to-image retrieval
$ mkdir -p data/rdm/retrieval_databases \
  && wget -O data/rdm/retrieval_databases/artbench.zip https://ommer-lab.com/files/rdm/artbench_databases.zip \
  && wget -O data/rdm/retrieval_databases/openimages.zip https://ommer-lab.com/files/rdm/openimages_database.zip \
  && unzip data/rdm/retrieval_databases/artbench.zip -d data/rdm/retrieval_databases \
  && unzip data/rdm/retrieval_databases/openimages.zip -d data/rdm/retrieval_databases
$ mkdir -p data/rdm/searchers \
  && wget -O data/rdm/searchers/artbench.zip https://ommer-lab.com/files/rdm/artbench_searchers.zip \
  && unzip data/rdm/searchers/artbench.zip -d data/rdm/searchers
# maximum supported number of neighbors = 20
# 1) database: artbench-art_nouveau, artbench-baroque, artbench-expressionism, artbench-impressionism, artbench-post_impressionism, artbench-realism, artbench-renaissance, artbench-romanticism, artbench-surrealism, artbench-ukiyo_e
$ python scripts/knn2img.py --database <database> --prompt "a happy pineapple" --use_neighbors --knn <number_of_neighbors> 
# 2) database: openimages
$ python3 scripts/train_searcher.py
$ python3 scripts/knn2img.py --database openimages --prompt "a happy pineapple" --use_neighbors --knn <number_of_neighbors> 

Text to Image (txt2img)

$ mkdir -p models/ldm/text2img-large && wget -O models/ldm/text2img-large/model.ckpt https://ommer-lab.com/files/latent-diffusion/nitro/txt2img-f8-large/model.ckpt
$ python3 scripts/txt2img.py --prompt "a virus monster is playing guitar, oil on canvas" --ddim_eta 0.0 --n_samples 4 --n_iter 4 --scale 5.0 --ddim_steps 50
$ python3 scripts/txt2img.py --prompt "a sunset behind a mountain range, vector image" --ddim_eta 1.0 --n_samples 1 --n_iter 1 --H 384 --W 1024 --scale 5.0

Image Inpainting

$ wget -O models/ldm/inpainting_big/last.ckpt https://heibox.uni-heidelberg.de/f/4d9ac7ea40c64582b7c9/?dl=1
$ python3 scripts/inpaint.py --indir data/inpainting_examples --outdir outputs/inpainting_results
# 'indir' should contain images '*.png' and masks '<image_fname>_mask.png' like the examples provided in 'data/inpainting_examples'

Class Conditional Image Net

Unconditional Models

# sampling from unconditional LDMs (e.g. LSUN, FFHQ, ...)
CUDA_VISIBLE_DEVICES=<GPU_ID> python3 scripts/sample_diffusion.py -r models/ldm/<model_spec>/model.ckpt -l <logdir> -n <\#samples> --batch_size <batch_size> -c <\#ddim steps> -e <\#eta>

Training

Stable Diffusion 2

Model Card

https://github.com/Stability-AI/stablediffusion/blob/main/modelcard.md
https://huggingface.co/stabilityai/stable-diffusion-2

Using Google Colab

Installation

!git clone https://github.com/Stability-AI/stablediffusion.git
!cd stablediffusion && pip install -r requirements.txt && pip install -e .

# xformers, memory efficient attention (for more efficiency and speed on GPUs, CUDA)
#!export CUDA_HOME=/usr/local/cuda-11.4
#!conda install -c nvidia/label/cuda-11.4.0 cuda-nvcc
#!conda install -c conda-forge gcc
#!conda install -c conda-forge gxx_linux-64==9.5.0
#!cd ..
#!git clone https://github.com/facebookresearch/xformers.git
#!cd xformers
#!git submodule update --init --recursive
#!pip install -r requirements.txt
#!pip install -e .
#!cd ../stablediffusion

# prerequisite: git-lfs from https://git-lfs.com or https://github.com/git-lfs/git-lfs#installing
!git lfs install

Text to Image (txt2img)

!git clone https://huggingface.co/stabilityai/stable-diffusion-2-1
# !git clone https://huggingface.co/stabilityai/stable-diffusion-2-1-base

Image Modification (depth2img)

!git clone https://huggingface.co/stabilityai/stable-diffusion-2-depth
!wget https://github.com/intel-isl/DPT/releases/download/1_0/dpt_hybrid-midas-501f0c75.pt && mv dpt_hybrid-midas-501f0c75.pt $(find . -name "*midas_models*" -exec pwd {}\;)

Image Upscaling

!git clone https://huggingface.co/stabilityai/stable-diffusion-x4-upscaler

Image Inpainting

!git clone https://huggingface.co/stabilityai/stable-diffusion-2-inpainting

Running

Text to Image (txt2img)

!python3 stablediffusion/scripts/txt2img.py --prompt "a professional photograph of an astronaut riding a horse" --ckpt stable-diffusion-2-1/v2-1_768-ema-pruned.ckpt --config stablediffusion/configs/stable-diffusion/v2-inference-v.yaml --H 768 --W 768
  
!python3 stablediffusion/scripts/txt2img.py --prompt "a professional photograph of an astronaut riding a horse" --ckpt stable-diffusion-2-1/v2-1_768-nonema-pruned.ckpt --config stablediffusion/configs/stable-diffusion/v2-inference-v.yaml --H 768 --W 768

Image Modification (depth2img, img2img)

# depth conditional
!python3 scripts/gradio/depth2img.py stablediffusion/configs/stable-diffusion/v2-midas-inference.yaml --ckpt stable-diffusion-2-depth/512-depth-ema.ckpt
!streamlit run scripts/streamlit/depth2img.py stablediffusion/configs/stable-diffusion/v2-midas-inference.yaml --ckpt stable-diffusion-2-depth/512-depth-ema.ckpt

# classic img2img
!python3 scripts/img2img.py --prompt "A fantasy landscape, trending on artstation" --init-img <path-to-img.jpg> --strength 0.8 --ckpt <path-to-ckpt>

Image Upscaling

!python3 scripts/gradio/superresolution.py stablediffusion/configs/stable-diffusion/x4-upscaling.yaml --ckpt stable-diffusion-x4-upscaler/x4-upscaler-ema.ckpt
!streamlit run scripts/streamlit/superresolution.py -- stablediffusion/configs/stable-diffusion/x4-upscaling.yaml --ckpt stable-diffusion-x4-upscaler/x4-upscaler-ema.ckpt

Image Inpainting

!python3 scripts/gradio/inpainting.py stablediffusion/configs/stable-diffusion/v2-inpainting-inference.yaml --ckpt stable-diffusion-2-inpainting/512-inpainting-ema.ckpt
!python3 scripts/gradio/inpainting.py stablediffusion/configs/stable-diffusion/v2-inpainting-inference.yaml --ckpt stable-diffusion-2-inpainting/512-inpainting-ema.ckpt

Stable Diffusion 3 Medium

Model Card

Stability AI Developer Platform (API)

Using ComfyUI

Installation

Jupiter Notebook
ComfyUI Windows Portable Nvidia

Linux Manual Install

Git clone repo.

 $ git clone https://github.com/comfyanonymous/ComfyUI.git

Put your SD checkpoints (the huge ckpt/safetensors files) in: models/checkpoints
Put your VAE in: models/vae

GPUs

AMD

 $ pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.1
 $ pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.2

NVIDIA

 $ pip uninstall torch
 $ pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu124
 $ pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124

Intel
i. Intel Extension for PyTorch
ii. Intel oneAPI Base Toolkit
Mac Silicon
i. Accelerated PyTorch training on Mac

DirectML

 $ pip3 install torch-directml
 # python3 main.py --directml

Running

$ python main.py

# for 6700, 6600 and maybe other RDNA2 or older
$ HSA_OVERRIDE_GFX_VERSION=10.3.0 python main.py

# for AMD 7600 and maybe other RDNA3 cards
$ HSA_OVERRIDE_GFX_VERSION=11.0.0 python main.py

Shortcuts

Web UI

Wiki

Windows

# prerequisites: python 3.10.6, git
> git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui.git
> webui-user.bat

macOS (Apple Silicon)

Linux

# prerequisite: 3.10 <= python <= 3.11
  
# install dependencies
$ apt install wget git python3 python3-venv libgl1 libglib2.0-0
$ dnf install wget git python3 gperftools-libs libglvnd-glx
$ zypper install wget git python3 libtcmalloc4 libglvnd
$ pacman -S wget git python3
  
# git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui
# cd stable-diffusion-webui
$ wget -q https://raw.githubusercontent.com/AUTOMATIC1111/stable-diffusion-webui/master/webui.sh
$ chmod +x webui.sh
$ ./webui-user.sh

Diffusers

Installation

$ python3 -m venv .env
$ source .env/bin/activate

$ pip3 install jupyter
$ jupyter notebook

!pip3 install -U diffusers

#!pip3 install --upgrade diffusers[torch] transformers
#!pip3 install --upgrade diffusers[flax] transformers

#!conda install -c conda-forge diffusers

#!pip3 install accelerate
#!pip3 install git+https://github.com/huggingface/diffusers

#!git clone https://github.com/huggingface/diffusers.git
#!cd diffusers && pip3 install -e ".[torch]" && pip3 install -e ".[flax]"

Models

Generative Tasks

References

Tags:

Stable Diffusion October 18, 2024

Latent Diffusion

Installation & Running

Training

Stable Diffusion 2

Model Card

Using Google Colab

Installation

Running

Stable Diffusion 3 Medium

Model Card

Stability AI Developer Platform (API)

Using ComfyUI

Installation

Running

Web UI

Diffusers

Installation

Models

Generative Tasks

References