Stable Diffusion Documentation
Edit me

Latent Diffusion

Installation & Running

$ git clone https://github.com/CompVis/latent-diffusion.git
$ conda env create -f environment.yaml
$ conda activate ldm
  • Retrieval Augmented Diffusion Models
    $ pip3 install transformers==4.19.2 scann kornia==0.6.4 torchmetrics==0.6.0
    $ pip3 install git+https://github.com/arogozhnikov/einops.git
      
    $ mkdir -p models/rdm/rdm768x768 && wget -O models/rdm/rdm768x768/model.ckpt https://ommer-lab.com/files/rdm/model.ckpt
      
    # RDM with text-prompt only
    $ python3 scripts/knn2img.py  --prompt "a happy bear reading a newspaper, oil on canvas"
      
    # RDM with text-to-image retrieval
    $ mkdir -p data/rdm/retrieval_databases \
      && wget -O data/rdm/retrieval_databases/artbench.zip https://ommer-lab.com/files/rdm/artbench_databases.zip \
      && wget -O data/rdm/retrieval_databases/openimages.zip https://ommer-lab.com/files/rdm/openimages_database.zip \
      && unzip data/rdm/retrieval_databases/artbench.zip -d data/rdm/retrieval_databases \
      && unzip data/rdm/retrieval_databases/openimages.zip -d data/rdm/retrieval_databases
    $ mkdir -p data/rdm/searchers \
      && wget -O data/rdm/searchers/artbench.zip https://ommer-lab.com/files/rdm/artbench_searchers.zip \
      && unzip data/rdm/searchers/artbench.zip -d data/rdm/searchers
    # maximum supported number of neighbors = 20
    # 1) database: artbench-art_nouveau, artbench-baroque, artbench-expressionism, artbench-impressionism, artbench-post_impressionism, artbench-realism, artbench-renaissance, artbench-romanticism, artbench-surrealism, artbench-ukiyo_e
    $ python scripts/knn2img.py --database <database> --prompt "a happy pineapple" --use_neighbors --knn <number_of_neighbors> 
    # 2) database: openimages
    $ python3 scripts/train_searcher.py
    $ python3 scripts/knn2img.py --database openimages --prompt "a happy pineapple" --use_neighbors --knn <number_of_neighbors> 
    
  • Text to Image (txt2img)
    $ mkdir -p models/ldm/text2img-large && wget -O models/ldm/text2img-large/model.ckpt https://ommer-lab.com/files/latent-diffusion/nitro/txt2img-f8-large/model.ckpt
    $ python3 scripts/txt2img.py --prompt "a virus monster is playing guitar, oil on canvas" --ddim_eta 0.0 --n_samples 4 --n_iter 4 --scale 5.0 --ddim_steps 50
    $ python3 scripts/txt2img.py --prompt "a sunset behind a mountain range, vector image" --ddim_eta 1.0 --n_samples 1 --n_iter 1 --H 384 --W 1024 --scale 5.0
    
  • Image Inpainting
    $ wget -O models/ldm/inpainting_big/last.ckpt https://heibox.uni-heidelberg.de/f/4d9ac7ea40c64582b7c9/?dl=1
    $ python3 scripts/inpaint.py --indir data/inpainting_examples --outdir outputs/inpainting_results
    # 'indir' should contain images '*.png' and masks '<image_fname>_mask.png' like the examples provided in 'data/inpainting_examples'
    
  • Class Conditional Image Net
  • Unconditional Models
    # sampling from unconditional LDMs (e.g. LSUN, FFHQ, ...)
    CUDA_VISIBLE_DEVICES=<GPU_ID> python3 scripts/sample_diffusion.py -r models/ldm/<model_spec>/model.ckpt -l <logdir> -n <\#samples> --batch_size <batch_size> -c <\#ddim steps> -e <\#eta>
    

Training

Stable Diffusion 2

Model Card

  • https://github.com/Stability-AI/stablediffusion/blob/main/modelcard.md
  • https://huggingface.co/stabilityai/stable-diffusion-2

Using Google Colab

Installation

!git clone https://github.com/Stability-AI/stablediffusion.git
!cd stablediffusion && pip install -r requirements.txt && pip install -e .

# xformers, memory efficient attention (for more efficiency and speed on GPUs, CUDA)
#!export CUDA_HOME=/usr/local/cuda-11.4
#!conda install -c nvidia/label/cuda-11.4.0 cuda-nvcc
#!conda install -c conda-forge gcc
#!conda install -c conda-forge gxx_linux-64==9.5.0
#!cd ..
#!git clone https://github.com/facebookresearch/xformers.git
#!cd xformers
#!git submodule update --init --recursive
#!pip install -r requirements.txt
#!pip install -e .
#!cd ../stablediffusion

# prerequisite: git-lfs from https://git-lfs.com or https://github.com/git-lfs/git-lfs#installing
!git lfs install
  • Text to Image (txt2img)
    !git clone https://huggingface.co/stabilityai/stable-diffusion-2-1
    # !git clone https://huggingface.co/stabilityai/stable-diffusion-2-1-base
    
  • Image Modification (depth2img)
    !git clone https://huggingface.co/stabilityai/stable-diffusion-2-depth
    !wget https://github.com/intel-isl/DPT/releases/download/1_0/dpt_hybrid-midas-501f0c75.pt && mv dpt_hybrid-midas-501f0c75.pt $(find . -name "*midas_models*" -exec pwd {}\;)
    
  • Image Upscaling
    !git clone https://huggingface.co/stabilityai/stable-diffusion-x4-upscaler
    
  • Image Inpainting
    !git clone https://huggingface.co/stabilityai/stable-diffusion-2-inpainting
    

Running

  • Text to Image (txt2img)
    !python3 stablediffusion/scripts/txt2img.py --prompt "a professional photograph of an astronaut riding a horse" --ckpt stable-diffusion-2-1/v2-1_768-ema-pruned.ckpt --config stablediffusion/configs/stable-diffusion/v2-inference-v.yaml --H 768 --W 768
      
    !python3 stablediffusion/scripts/txt2img.py --prompt "a professional photograph of an astronaut riding a horse" --ckpt stable-diffusion-2-1/v2-1_768-nonema-pruned.ckpt --config stablediffusion/configs/stable-diffusion/v2-inference-v.yaml --H 768 --W 768
    
  • Image Modification (depth2img, img2img)
    # depth conditional
    !python3 scripts/gradio/depth2img.py stablediffusion/configs/stable-diffusion/v2-midas-inference.yaml --ckpt stable-diffusion-2-depth/512-depth-ema.ckpt
    !streamlit run scripts/streamlit/depth2img.py stablediffusion/configs/stable-diffusion/v2-midas-inference.yaml --ckpt stable-diffusion-2-depth/512-depth-ema.ckpt
    
    # classic img2img
    !python3 scripts/img2img.py --prompt "A fantasy landscape, trending on artstation" --init-img <path-to-img.jpg> --strength 0.8 --ckpt <path-to-ckpt>
    
  • Image Upscaling
    !python3 scripts/gradio/superresolution.py stablediffusion/configs/stable-diffusion/x4-upscaling.yaml --ckpt stable-diffusion-x4-upscaler/x4-upscaler-ema.ckpt
    !streamlit run scripts/streamlit/superresolution.py -- stablediffusion/configs/stable-diffusion/x4-upscaling.yaml --ckpt stable-diffusion-x4-upscaler/x4-upscaler-ema.ckpt
    
  • Image Inpainting
    !python3 scripts/gradio/inpainting.py stablediffusion/configs/stable-diffusion/v2-inpainting-inference.yaml --ckpt stable-diffusion-2-inpainting/512-inpainting-ema.ckpt
    !python3 scripts/gradio/inpainting.py stablediffusion/configs/stable-diffusion/v2-inpainting-inference.yaml --ckpt stable-diffusion-2-inpainting/512-inpainting-ema.ckpt
    

Stable Diffusion 3 Medium

Model Card

Stability AI Developer Platform (API)

Using ComfyUI

Installation

  • Jupiter Notebook
  • ComfyUI Windows Portable Nvidia
  • Linux Manual Install
    1. Git clone repo.
       $ git clone https://github.com/comfyanonymous/ComfyUI.git
      
    2. Put your SD checkpoints (the huge ckpt/safetensors files) in: models/checkpoints
    3. Put your VAE in: models/vae

    4. GPUs
      • AMD
         $ pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.1
         $ pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.2
        
      • NVIDIA
         $ pip uninstall torch
         $ pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu124
         $ pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124
        
      • Intel
        i. Intel Extension for PyTorch
        ii. Intel oneAPI Base Toolkit
      • Mac Silicon
        i. Accelerated PyTorch training on Mac
      • DirectML
         $ pip3 install torch-directml
         # python3 main.py --directml
        

Running

$ python main.py

# for 6700, 6600 and maybe other RDNA2 or older
$ HSA_OVERRIDE_GFX_VERSION=10.3.0 python main.py

# for AMD 7600 and maybe other RDNA3 cards
$ HSA_OVERRIDE_GFX_VERSION=11.0.0 python main.py

Web UI

  • Wiki
  • Windows
    # prerequisites: python 3.10.6, git
    > git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui.git
    > webui-user.bat
    
  • macOS (Apple Silicon)
  • Linux
    # prerequisite: 3.10 <= python <= 3.11
      
    # install dependencies
    $ apt install wget git python3 python3-venv libgl1 libglib2.0-0
    $ dnf install wget git python3 gperftools-libs libglvnd-glx
    $ zypper install wget git python3 libtcmalloc4 libglvnd
    $ pacman -S wget git python3
      
    # git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui
    # cd stable-diffusion-webui
    $ wget -q https://raw.githubusercontent.com/AUTOMATIC1111/stable-diffusion-webui/master/webui.sh
    $ chmod +x webui.sh
    $ ./webui-user.sh
    

Diffusers

Installation

$ python3 -m venv .env
$ source .env/bin/activate

$ pip3 install jupyter
$ jupyter notebook
!pip3 install -U diffusers

#!pip3 install --upgrade diffusers[torch] transformers
#!pip3 install --upgrade diffusers[flax] transformers

#!conda install -c conda-forge diffusers

#!pip3 install accelerate
#!pip3 install git+https://github.com/huggingface/diffusers

#!git clone https://github.com/huggingface/diffusers.git
#!cd diffusers && pip3 install -e ".[torch]" && pip3 install -e ".[flax]"

Models

Generative Tasks

References