forked from Mikubill/sd-webui-controlnet
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: remove mmcv packages, prepare for new annotator/models
- Loading branch information
Showing
286 changed files
with
726 additions
and
40,217 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
from distutils import extension | ||
import numpy as np | ||
import cv2 | ||
import torch | ||
from einops import rearrange | ||
|
||
import os | ||
from modules import devices | ||
from modules.paths import models_path | ||
|
||
class Network(torch.nn.Module): | ||
def __init__(self, model_path): | ||
super().__init__() | ||
|
||
self.netVggOne = torch.nn.Sequential( | ||
torch.nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1), | ||
torch.nn.ReLU(inplace=False), | ||
torch.nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1), | ||
torch.nn.ReLU(inplace=False) | ||
) | ||
|
||
self.netVggTwo = torch.nn.Sequential( | ||
torch.nn.MaxPool2d(kernel_size=2, stride=2), | ||
torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1), | ||
torch.nn.ReLU(inplace=False), | ||
torch.nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1), | ||
torch.nn.ReLU(inplace=False) | ||
) | ||
|
||
self.netVggThr = torch.nn.Sequential( | ||
torch.nn.MaxPool2d(kernel_size=2, stride=2), | ||
torch.nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1), | ||
torch.nn.ReLU(inplace=False), | ||
torch.nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1), | ||
torch.nn.ReLU(inplace=False), | ||
torch.nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1), | ||
torch.nn.ReLU(inplace=False) | ||
) | ||
|
||
self.netVggFou = torch.nn.Sequential( | ||
torch.nn.MaxPool2d(kernel_size=2, stride=2), | ||
torch.nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1), | ||
torch.nn.ReLU(inplace=False), | ||
torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), | ||
torch.nn.ReLU(inplace=False), | ||
torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), | ||
torch.nn.ReLU(inplace=False) | ||
) | ||
|
||
self.netVggFiv = torch.nn.Sequential( | ||
torch.nn.MaxPool2d(kernel_size=2, stride=2), | ||
torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), | ||
torch.nn.ReLU(inplace=False), | ||
torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), | ||
torch.nn.ReLU(inplace=False), | ||
torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), | ||
torch.nn.ReLU(inplace=False) | ||
) | ||
|
||
self.netScoreOne = torch.nn.Conv2d(in_channels=64, out_channels=1, kernel_size=1, stride=1, padding=0) | ||
self.netScoreTwo = torch.nn.Conv2d(in_channels=128, out_channels=1, kernel_size=1, stride=1, padding=0) | ||
self.netScoreThr = torch.nn.Conv2d(in_channels=256, out_channels=1, kernel_size=1, stride=1, padding=0) | ||
self.netScoreFou = torch.nn.Conv2d(in_channels=512, out_channels=1, kernel_size=1, stride=1, padding=0) | ||
self.netScoreFiv = torch.nn.Conv2d(in_channels=512, out_channels=1, kernel_size=1, stride=1, padding=0) | ||
|
||
self.netCombine = torch.nn.Sequential( | ||
torch.nn.Conv2d(in_channels=5, out_channels=1, kernel_size=1, stride=1, padding=0), | ||
torch.nn.Sigmoid() | ||
) | ||
|
||
self.load_state_dict({strKey.replace('module', 'net'): tenWeight for strKey, tenWeight in torch.load(model_path).items()}) | ||
# end | ||
|
||
def forward(self, tenInput): | ||
tenInput = tenInput * 255.0 | ||
tenInput = tenInput - torch.tensor(data=[104.00698793, 116.66876762, 122.67891434], dtype=tenInput.dtype, device=tenInput.device).view(1, 3, 1, 1) | ||
|
||
tenVggOne = self.netVggOne(tenInput) | ||
tenVggTwo = self.netVggTwo(tenVggOne) | ||
tenVggThr = self.netVggThr(tenVggTwo) | ||
tenVggFou = self.netVggFou(tenVggThr) | ||
tenVggFiv = self.netVggFiv(tenVggFou) | ||
|
||
tenScoreOne = self.netScoreOne(tenVggOne) | ||
tenScoreTwo = self.netScoreTwo(tenVggTwo) | ||
tenScoreThr = self.netScoreThr(tenVggThr) | ||
tenScoreFou = self.netScoreFou(tenVggFou) | ||
tenScoreFiv = self.netScoreFiv(tenVggFiv) | ||
|
||
tenScoreOne = torch.nn.functional.interpolate(input=tenScoreOne, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False) | ||
tenScoreTwo = torch.nn.functional.interpolate(input=tenScoreTwo, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False) | ||
tenScoreThr = torch.nn.functional.interpolate(input=tenScoreThr, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False) | ||
tenScoreFou = torch.nn.functional.interpolate(input=tenScoreFou, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False) | ||
tenScoreFiv = torch.nn.functional.interpolate(input=tenScoreFiv, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False) | ||
|
||
return self.netCombine(torch.cat([ tenScoreOne, tenScoreTwo, tenScoreThr, tenScoreFou, tenScoreFiv ], 1)) | ||
# end | ||
# end | ||
|
||
netNetwork = None | ||
remote_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/network-bsds500.pth" | ||
modeldir = os.path.join(models_path, "hed") | ||
old_modeldir = os.path.dirname(os.path.realpath(__file__)) | ||
|
||
def apply_hed(input_image): | ||
global netNetwork | ||
if netNetwork is None: | ||
modelpath = os.path.join(modeldir, "network-bsds500.pth") | ||
old_modelpath = os.path.join(old_modeldir, "network-bsds500.pth") | ||
if os.path.exists(old_modelpath): | ||
modelpath = old_modelpath | ||
elif not os.path.exists(modelpath): | ||
from basicsr.utils.download_util import load_file_from_url | ||
load_file_from_url(remote_model_path, model_dir=modeldir) | ||
netNetwork = Network(modelpath) | ||
netNetwork.to(devices.get_device_for("controlnet")).eval() | ||
|
||
assert input_image.ndim == 3 | ||
input_image = input_image[:, :, ::-1].copy() | ||
with torch.no_grad(): | ||
image_hed = torch.from_numpy(input_image).float().to(devices.get_device_for("controlnet")) | ||
image_hed = image_hed / 255.0 | ||
image_hed = rearrange(image_hed, 'h w c -> 1 c h w') | ||
edge = netNetwork(image_hed)[0] | ||
edge = (edge.cpu().numpy() * 255.0).clip(0, 255).astype(np.uint8) | ||
return edge[0] | ||
|
||
def unload_hed_model(): | ||
global netNetwork | ||
if netNetwork is not None: | ||
netNetwork.cpu() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
import numpy as np | ||
import cv2 | ||
import torch | ||
|
||
import os | ||
from modules import devices | ||
from modules.paths import models_path | ||
|
||
import mmcv | ||
from mmdet.apis import inference_detector, init_detector | ||
from mmpose.apis import inference_top_down_pose_model | ||
from mmpose.apis import init_pose_model, process_mmdet_results, vis_pose_result | ||
|
||
|
||
def preprocessing(image, device): | ||
# Resize | ||
scale = 640 / max(image.shape[:2]) | ||
image = cv2.resize(image, dsize=None, fx=scale, fy=scale) | ||
raw_image = image.astype(np.uint8) | ||
|
||
# Subtract mean values | ||
image = image.astype(np.float32) | ||
image -= np.array( | ||
[ | ||
float(104.008), | ||
float(116.669), | ||
float(122.675), | ||
] | ||
) | ||
|
||
# Convert to torch.Tensor and add "batch" axis | ||
image = torch.from_numpy(image.transpose(2, 0, 1)).float().unsqueeze(0) | ||
image = image.to(device) | ||
|
||
return image, raw_image | ||
|
||
|
||
def imshow_keypoints(img, | ||
pose_result, | ||
skeleton=None, | ||
kpt_score_thr=0.1, | ||
pose_kpt_color=None, | ||
pose_link_color=None, | ||
radius=4, | ||
thickness=1): | ||
"""Draw keypoints and links on an image. | ||
Args: | ||
img (ndarry): The image to draw poses on. | ||
pose_result (list[kpts]): The poses to draw. Each element kpts is | ||
a set of K keypoints as an Kx3 numpy.ndarray, where each | ||
keypoint is represented as x, y, score. | ||
kpt_score_thr (float, optional): Minimum score of keypoints | ||
to be shown. Default: 0.3. | ||
pose_kpt_color (np.array[Nx3]`): Color of N keypoints. If None, | ||
the keypoint will not be drawn. | ||
pose_link_color (np.array[Mx3]): Color of M links. If None, the | ||
links will not be drawn. | ||
thickness (int): Thickness of lines. | ||
""" | ||
|
||
img_h, img_w, _ = img.shape | ||
img = np.zeros(img.shape) | ||
|
||
for idx, kpts in enumerate(pose_result): | ||
if idx > 1: | ||
continue | ||
kpts = kpts['keypoints'] | ||
# print(kpts) | ||
kpts = np.array(kpts, copy=False) | ||
|
||
# draw each point on image | ||
if pose_kpt_color is not None: | ||
assert len(pose_kpt_color) == len(kpts) | ||
|
||
for kid, kpt in enumerate(kpts): | ||
x_coord, y_coord, kpt_score = int(kpt[0]), int(kpt[1]), kpt[2] | ||
|
||
if kpt_score < kpt_score_thr or pose_kpt_color[kid] is None: | ||
# skip the point that should not be drawn | ||
continue | ||
|
||
color = tuple(int(c) for c in pose_kpt_color[kid]) | ||
cv2.circle(img, (int(x_coord), int(y_coord)), | ||
radius, color, -1) | ||
|
||
# draw links | ||
if skeleton is not None and pose_link_color is not None: | ||
assert len(pose_link_color) == len(skeleton) | ||
|
||
for sk_id, sk in enumerate(skeleton): | ||
pos1 = (int(kpts[sk[0], 0]), int(kpts[sk[0], 1])) | ||
pos2 = (int(kpts[sk[1], 0]), int(kpts[sk[1], 1])) | ||
|
||
if (pos1[0] <= 0 or pos1[0] >= img_w or pos1[1] <= 0 or pos1[1] >= img_h or pos2[0] <= 0 | ||
or pos2[0] >= img_w or pos2[1] <= 0 or pos2[1] >= img_h or kpts[sk[0], 2] < kpt_score_thr | ||
or kpts[sk[1], 2] < kpt_score_thr or pose_link_color[sk_id] is None): | ||
# skip the link that should not be drawn | ||
continue | ||
color = tuple(int(c) for c in pose_link_color[sk_id]) | ||
cv2.line(img, pos1, pos2, color, thickness=thickness) | ||
|
||
return img | ||
|
||
|
||
human_det, pose_model = None, None | ||
det_model_path = "https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth" | ||
pose_model_path = "https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth" | ||
|
||
modeldir = os.path.join(models_path, "keypose") | ||
old_modeldir = os.path.dirname(os.path.realpath(__file__)) | ||
|
||
det_config = 'faster_rcnn_r50_fpn_coco.py' | ||
pose_config = 'hrnet_w48_coco_256x192.py' | ||
|
||
det_checkpoint = 'faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth' | ||
pose_checkpoint = 'hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth' | ||
det_cat_id = 1 | ||
bbox_thr = 0.2 | ||
|
||
skeleton = [ | ||
[15, 13], [13, 11], [16, 14], [14, 12], [11, 12], [5, 11], [6, 12], [5, 6], [5, 7], [6, 8], | ||
[7, 9], [8, 10], | ||
[1, 2], [0, 1], [0, 2], [1, 3], [2, 4], [3, 5], [4, 6] | ||
] | ||
|
||
pose_kpt_color = [ | ||
[51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255], | ||
[0, 255, 0], | ||
[255, 128, 0], [0, 255, 0], [255, 128, 0], [0, 255, 0], [255, 128, 0], [0, 255, 0], | ||
[255, 128, 0], | ||
[0, 255, 0], [255, 128, 0], [0, 255, 0], [255, 128, 0] | ||
] | ||
|
||
pose_link_color = [ | ||
[0, 255, 0], [0, 255, 0], [255, 128, 0], [255, 128, 0], | ||
[51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255], [0, 255, 0], | ||
[255, 128, 0], | ||
[0, 255, 0], [255, 128, 0], [51, 153, 255], [51, 153, 255], [51, 153, 255], | ||
[51, 153, 255], | ||
[51, 153, 255], [51, 153, 255], [51, 153, 255] | ||
] | ||
|
||
def find_download_model(checkpoint, remote_path): | ||
modelpath = os.path.join(modeldir, checkpoint) | ||
old_modelpath = os.path.join(old_modeldir, checkpoint) | ||
|
||
if os.path.exists(old_modelpath): | ||
modelpath = old_modelpath | ||
elif not os.path.exists(modelpath): | ||
from basicsr.utils.download_util import load_file_from_url | ||
load_file_from_url(remote_path, model_dir=modeldir) | ||
|
||
return modelpath | ||
|
||
def apply_keypose(input_image): | ||
global human_det, pose_model | ||
if netNetwork is None: | ||
det_model_local = find_download_model(det_checkpoint, det_model_path) | ||
hrnet_model_local = find_download_model(pose_checkpoint, pose_model_path) | ||
det_config_mmcv = mmcv.Config.fromfile(det_config) | ||
pose_config_mmcv = mmcv.Config.fromfile(pose_config) | ||
human_det = init_detector(det_config_mmcv, det_model_local, device=devices.get_device_for("controlnet")) | ||
pose_model = init_pose_model(pose_config_mmcv, hrnet_model_local, device=devices.get_device_for("controlnet")) | ||
|
||
assert input_image.ndim == 3 | ||
input_image = input_image.copy() | ||
with torch.no_grad(): | ||
image = torch.from_numpy(input_image).float().to(devices.get_device_for("controlnet")) | ||
image = image / 255.0 | ||
mmdet_results = inference_detector(human_det, image) | ||
|
||
# keep the person class bounding boxes. | ||
person_results = process_mmdet_results(mmdet_results, det_cat_id) | ||
|
||
return_heatmap = False | ||
dataset = pose_model.cfg.data['test']['type'] | ||
|
||
# e.g. use ('backbone', ) to return backbone feature | ||
output_layer_names = None | ||
pose_results, _ = inference_top_down_pose_model( | ||
pose_model, | ||
image, | ||
person_results, | ||
bbox_thr=bbox_thr, | ||
format='xyxy', | ||
dataset=dataset, | ||
dataset_info=None, | ||
return_heatmap=return_heatmap, | ||
outputs=output_layer_names | ||
) | ||
|
||
im_keypose_out = imshow_keypoints( | ||
image, | ||
pose_results, | ||
skeleton=skeleton, | ||
pose_kpt_color=pose_kpt_color, | ||
pose_link_color=pose_link_color, | ||
radius=2, | ||
thickness=2 | ||
) | ||
im_keypose_out = im_keypose_out.astype(np.uint8) | ||
|
||
# image_hed = rearrange(image_hed, 'h w c -> 1 c h w') | ||
# edge = netNetwork(image_hed)[0] | ||
# edge = (edge.cpu().numpy() * 255.0).clip(0, 255).astype(np.uint8) | ||
return im_keypose_out | ||
|
||
|
||
def unload_hed_model(): | ||
global netNetwork | ||
if netNetwork is not None: | ||
netNetwork.cpu() |
Oops, something went wrong.