处理后端接口

2023-02-10 02:02:25 +08:00
parent fabccbaf7d
commit d0e7db4bce
4 changed files with 503 additions and 25 deletions
--- a/server.py
+++ b/server.py
@@ -0,0 +1,388 @@
+import argparse, os
+import cv2
+import torch
+import numpy as np
+from omegaconf import OmegaConf
+from PIL import Image
+from tqdm import tqdm, trange
+from itertools import islice
+from einops import rearrange
+from torchvision.utils import make_grid
+from pytorch_lightning import seed_everything
+from torch import autocast
+from contextlib import nullcontext
+from imwatermark import WatermarkEncoder
+
+from ldm.util import instantiate_from_config
+from ldm.models.diffusion.ddim import DDIMSampler
+from ldm.models.diffusion.plms import PLMSSampler
+from ldm.models.diffusion.dpm_solver import DPMSolverSampler
+
+torch.set_grad_enabled(False)
+
+def chunk(it, size):
+    it = iter(it)
+    return iter(lambda: tuple(islice(it, size)), ())
+
+
+def load_model_from_config(config, ckpt, device=torch.device("cuda"), verbose=False):
+    print(f"Loading model from {ckpt}")
+    pl_sd = torch.load(ckpt, map_location="cpu")
+    if "global_step" in pl_sd:
+        print(f"Global Step: {pl_sd['global_step']}")
+    sd = pl_sd["state_dict"]
+    model = instantiate_from_config(config.model)
+    m, u = model.load_state_dict(sd, strict=False)
+    if len(m) > 0 and verbose:
+        print("missing keys:")
+        print(m)
+    if len(u) > 0 and verbose:
+        print("unexpected keys:")
+        print(u)
+
+    if device == torch.device("cuda"):
+        model.cuda()
+    elif device == torch.device("cpu"):
+        model.cpu()
+        model.cond_stage_model.device = "cpu"
+    else:
+        raise ValueError(f"Incorrect device name. Received: {device}")
+    model.eval()
+    return model
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--prompt",
+        type=str,
+        nargs="?",
+        default="a professional photograph of an astronaut riding a triceratops",
+        help="the prompt to render"
+    )
+    parser.add_argument(
+        "--outdir",
+        type=str,
+        nargs="?",
+        help="dir to write results to",
+        default="outputs/txt2img-samples"
+    )
+    parser.add_argument(
+        "--steps",
+        type=int,
+        default=50,
+        help="number of ddim sampling steps",
+    )
+    parser.add_argument(
+        "--plms",
+        action='store_true',
+        help="use plms sampling",
+    )
+    parser.add_argument(
+        "--dpm",
+        action='store_true',
+        help="use DPM (2) sampler",
+    )
+    parser.add_argument(
+        "--fixed_code",
+        action='store_true',
+        help="if enabled, uses the same starting code across all samples ",
+    )
+    parser.add_argument(
+        "--ddim_eta",
+        type=float,
+        default=0.0,
+        help="ddim eta (eta=0.0 corresponds to deterministic sampling",
+    )
+    parser.add_argument(
+        "--n_iter",
+        type=int,
+        default=3,
+        help="sample this often",
+    )
+    parser.add_argument(
+        "--H",
+        type=int,
+        default=512,
+        help="image height, in pixel space",
+    )
+    parser.add_argument(
+        "--W",
+        type=int,
+        default=512,
+        help="image width, in pixel space",
+    )
+    parser.add_argument(
+        "--C",
+        type=int,
+        default=4,
+        help="latent channels",
+    )
+    parser.add_argument(
+        "--f",
+        type=int,
+        default=8,
+        help="downsampling factor, most often 8 or 16",
+    )
+    parser.add_argument(
+        "--n_samples",
+        type=int,
+        default=3,
+        help="how many samples to produce for each given prompt. A.k.a batch size",
+    )
+    parser.add_argument(
+        "--n_rows",
+        type=int,
+        default=0,
+        help="rows in the grid (default: n_samples)",
+    )
+    parser.add_argument(
+        "--scale",
+        type=float,
+        default=9.0,
+        help="unconditional guidance scale: eps = eps(x, empty) + scale * (eps(x, cond) - eps(x, empty))",
+    )
+    parser.add_argument(
+        "--from-file",
+        type=str,
+        help="if specified, load prompts from this file, separated by newlines",
+    )
+    parser.add_argument(
+        "--config",
+        type=str,
+        default="configs/stable-diffusion/v2-inference.yaml",
+        help="path to config which constructs model",
+    )
+    parser.add_argument(
+        "--ckpt",
+        type=str,
+        help="path to checkpoint of model",
+    )
+    parser.add_argument(
+        "--seed",
+        type=int,
+        default=42,
+        help="the seed (for reproducible sampling)",
+    )
+    parser.add_argument(
+        "--precision",
+        type=str,
+        help="evaluate at this precision",
+        choices=["full", "autocast"],
+        default="autocast"
+    )
+    parser.add_argument(
+        "--repeat",
+        type=int,
+        default=1,
+        help="repeat each prompt in file this often",
+    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        help="Device on which Stable Diffusion will be run",
+        choices=["cpu", "cuda"],
+        default="cpu"
+    )
+    parser.add_argument(
+        "--torchscript",
+        action='store_true',
+        help="Use TorchScript",
+    )
+    parser.add_argument(
+        "--ipex",
+        action='store_true',
+        help="Use Intel® Extension for PyTorch*",
+    )
+    parser.add_argument(
+        "--bf16",
+        action='store_true',
+        help="Use bfloat16",
+    )
+    opt = parser.parse_args()
+    return opt
+
+
+def put_watermark(img, wm_encoder=None):
+    if wm_encoder is not None:
+        img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+        img = wm_encoder.encode(img, 'dwtDct')
+        img = Image.fromarray(img[:, :, ::-1])
+    return img
+
+
+def main(opt):
+    seed_everything(opt.seed)
+
+    config = OmegaConf.load(f"{opt.config}")
+    device = torch.device("cuda") if opt.device == "cuda" else torch.device("cpu")
+    model = load_model_from_config(config, f"{opt.ckpt}", device)
+
+    if opt.plms:
+        sampler = PLMSSampler(model, device=device)
+    elif opt.dpm:
+        sampler = DPMSolverSampler(model, device=device)
+    else:
+        sampler = DDIMSampler(model, device=device)
+
+    os.makedirs(opt.outdir, exist_ok=True)
+    outpath = opt.outdir
+
+    print("Creating invisible watermark encoder (see https://github.com/ShieldMnt/invisible-watermark)...")
+    wm = "SDV2"
+    wm_encoder = WatermarkEncoder()
+    wm_encoder.set_watermark('bytes', wm.encode('utf-8'))
+
+    batch_size = opt.n_samples
+    n_rows = opt.n_rows if opt.n_rows > 0 else batch_size
+    if not opt.from_file:
+        prompt = opt.prompt
+        assert prompt is not None
+        data = [batch_size * [prompt]]
+
+    else:
+        print(f"reading prompts from {opt.from_file}")
+        with open(opt.from_file, "r") as f:
+            data = f.read().splitlines()
+            data = [p for p in data for i in range(opt.repeat)]
+            data = list(chunk(data, batch_size))
+
+    sample_path = os.path.join(outpath, "samples")
+    os.makedirs(sample_path, exist_ok=True)
+    sample_count = 0
+    base_count = len(os.listdir(sample_path))
+    grid_count = len(os.listdir(outpath)) - 1
+
+    start_code = None
+    if opt.fixed_code:
+        start_code = torch.randn([opt.n_samples, opt.C, opt.H // opt.f, opt.W // opt.f], device=device)
+
+    if opt.torchscript or opt.ipex:
+        transformer = model.cond_stage_model.model
+        unet = model.model.diffusion_model
+        decoder = model.first_stage_model.decoder
+        additional_context = torch.cpu.amp.autocast() if opt.bf16 else nullcontext()
+        shape = [opt.C, opt.H // opt.f, opt.W // opt.f]
+
+        if opt.bf16 and not opt.torchscript and not opt.ipex:
+            raise ValueError('Bfloat16 is supported only for torchscript+ipex')
+        if opt.bf16 and unet.dtype != torch.bfloat16:
+            raise ValueError("Use configs/stable-diffusion/intel/ configs with bf16 enabled if " +
+                             "you'd like to use bfloat16 with CPU.")
+        if unet.dtype == torch.float16 and device == torch.device("cpu"):
+            raise ValueError("Use configs/stable-diffusion/intel/ configs for your model if you'd like to run it on CPU.")
+
+        if opt.ipex:
+            import intel_extension_for_pytorch as ipex
+            bf16_dtype = torch.bfloat16 if opt.bf16 else None
+            transformer = transformer.to(memory_format=torch.channels_last)
+            transformer = ipex.optimize(transformer, level="O1", inplace=True)
+
+            unet = unet.to(memory_format=torch.channels_last)
+            unet = ipex.optimize(unet, level="O1", auto_kernel_selection=True, inplace=True, dtype=bf16_dtype)
+
+            decoder = decoder.to(memory_format=torch.channels_last)
+            decoder = ipex.optimize(decoder, level="O1", auto_kernel_selection=True, inplace=True, dtype=bf16_dtype)
+
+        if opt.torchscript:
+            with torch.no_grad(), additional_context:
+                # get UNET scripted
+                if unet.use_checkpoint:
+                    raise ValueError("Gradient checkpoint won't work with tracing. " +
+                    "Use configs/stable-diffusion/intel/ configs for your model or disable checkpoint in your config.")
+
+                img_in = torch.ones(2, 4, 96, 96, dtype=torch.float32)
+                t_in = torch.ones(2, dtype=torch.int64)
+                context = torch.ones(2, 77, 1024, dtype=torch.float32)
+                scripted_unet = torch.jit.trace(unet, (img_in, t_in, context))
+                scripted_unet = torch.jit.optimize_for_inference(scripted_unet)
+                print(type(scripted_unet))
+                model.model.scripted_diffusion_model = scripted_unet
+
+                # get Decoder for first stage model scripted
+                samples_ddim = torch.ones(1, 4, 96, 96, dtype=torch.float32)
+                scripted_decoder = torch.jit.trace(decoder, (samples_ddim))
+                scripted_decoder = torch.jit.optimize_for_inference(scripted_decoder)
+                print(type(scripted_decoder))
+                model.first_stage_model.decoder = scripted_decoder
+
+        prompts = data[0]
+        print("Running a forward pass to initialize optimizations")
+        uc = None
+        if opt.scale != 1.0:
+            uc = model.get_learned_conditioning(batch_size * [""])
+        if isinstance(prompts, tuple):
+            prompts = list(prompts)
+
+        with torch.no_grad(), additional_context:
+            for _ in range(3):
+                c = model.get_learned_conditioning(prompts)
+            samples_ddim, _ = sampler.sample(S=5,
+                                             conditioning=c,
+                                             batch_size=batch_size,
+                                             shape=shape,
+                                             verbose=False,
+                                             unconditional_guidance_scale=opt.scale,
+                                             unconditional_conditioning=uc,
+                                             eta=opt.ddim_eta,
+                                             x_T=start_code)
+            print("Running a forward pass for decoder")
+            for _ in range(3):
+                x_samples_ddim = model.decode_first_stage(samples_ddim)
+
+    precision_scope = autocast if opt.precision=="autocast" or opt.bf16 else nullcontext
+    with torch.no_grad(), \
+        precision_scope(opt.device), \
+        model.ema_scope():
+            all_samples = list()
+            for n in trange(opt.n_iter, desc="Sampling"):
+                for prompts in tqdm(data, desc="data"):
+                    uc = None
+                    if opt.scale != 1.0:
+                        uc = model.get_learned_conditioning(batch_size * [""])
+                    if isinstance(prompts, tuple):
+                        prompts = list(prompts)
+                    c = model.get_learned_conditioning(prompts)
+                    shape = [opt.C, opt.H // opt.f, opt.W // opt.f]
+                    samples, _ = sampler.sample(S=opt.steps,
+                                                     conditioning=c,
+                                                     batch_size=opt.n_samples,
+                                                     shape=shape,
+                                                     verbose=False,
+                                                     unconditional_guidance_scale=opt.scale,
+                                                     unconditional_conditioning=uc,
+                                                     eta=opt.ddim_eta,
+                                                     x_T=start_code)
+
+                    x_samples = model.decode_first_stage(samples)
+                    x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0)
+
+                    for x_sample in x_samples:
+                        x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c')
+                        img = Image.fromarray(x_sample.astype(np.uint8))
+                        img = put_watermark(img, wm_encoder)
+                        img.save(os.path.join(sample_path, f"{base_count:05}.png"))
+                        base_count += 1
+                        sample_count += 1
+
+                    all_samples.append(x_samples)
+
+            # additionally, save as grid
+            grid = torch.stack(all_samples, 0)
+            grid = rearrange(grid, 'n b c h w -> (n b) c h w')
+            grid = make_grid(grid, nrow=n_rows)
+
+            # to image
+            grid = 255. * rearrange(grid, 'c h w -> h w c').cpu().numpy()
+            grid = Image.fromarray(grid.astype(np.uint8))
+            grid = put_watermark(grid, wm_encoder)
+            grid.save(os.path.join(outpath, f'grid-{grid_count:04}.png'))
+            grid_count += 1
+
+    print(f"Your samples are ready and waiting for you here: \n{outpath} \n"
+          f" \nEnjoy.")
+
+
+if __name__ == "__main__":
+    opt = parse_args()
+    main(opt)