diff --git a/rope/Coordinator.py b/rope/Coordinator.py index c6bb2af8..274f5e2c 100644 --- a/rope/Coordinator.py +++ b/rope/Coordinator.py @@ -97,8 +97,8 @@ def coordinator(): # action.pop(0) elif action [0][0] == "parameters": if action[0][1]['UpscaleState']: - if not vm.resnet_model: - vm.resnet_model = load_resnet_model() + # if not vm.resnet_model: + # vm.resnet_model = load_resnet_model() index = action[0][1]['UpscaleMode'] if action[0][1]['UpscaleModes'][index] == 'GFPGAN': if not vm.GFPGAN_model: @@ -106,6 +106,12 @@ def coordinator(): elif action[0][1]['UpscaleModes'][index] == 'CF': if not vm.codeformer_model: vm.codeformer_model = load_codeformer_model() + elif action[0][1]['UpscaleModes'][index] == 'GPEN256': + if not vm.GPEN_256_model: + vm.GPEN_256_model = load_GPEN_256_model() + elif action[0][1]['UpscaleModes'][index] == 'GPEN512': + if not vm.GPEN_512_model: + vm.GPEN_512_model = load_GPEN_512_model() if action[0][1]["CLIPState"]: if not vm.clip_session: vm.clip_session = load_clip_model() @@ -177,7 +183,7 @@ def load_swapper_model(): emap = onnx.numpy_helper.to_array(graph.initializer[-1]) sess_options = onnxruntime.SessionOptions() - sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL + # sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL @@ -193,10 +199,17 @@ def load_clip_model(): clip_session.to(device) return clip_session +def load_GPEN_512_model(): + session = onnxruntime.InferenceSession( "./models/GPEN-BFR-512.onnx", providers=["CUDAExecutionProvider", 'CPUExecutionProvider']) + return session + +def load_GPEN_256_model(): + session = onnxruntime.InferenceSession( "./models/GPEN-BFR-256.onnx", providers=["CUDAExecutionProvider", 'CPUExecutionProvider']) + return session def load_GFPGAN_model(): - GFPGAN_session = onnxruntime.InferenceSession( "./models/GFPGANv1.4.onnx", providers=["CUDAExecutionProvider", 'CPUExecutionProvider']) - return GFPGAN_session + session = onnxruntime.InferenceSession( "./models/GPEN-BFR-512.onnx", providers=["CUDAExecutionProvider", 'CPUExecutionProvider']) + return session def load_codeformer_model(): codeformer_session = onnxruntime.InferenceSession( "./models/codeformer_fp16.onnx", providers=["CUDAExecutionProvider", 'CPUExecutionProvider']) diff --git a/rope/Dicts.py b/rope/Dicts.py index 29b443bf..9740982c 100644 --- a/rope/Dicts.py +++ b/rope/Dicts.py @@ -1,8 +1,8 @@ PARAM_BUTTONS_PARAMS = { 'UpscaleState': False, 'UpscaleMode': 0, - 'UpscaleModes': ['GFPGAN', 'CF'], - 'UpscaleAmount': [100, 100], + 'UpscaleModes': ['GFPGAN', 'CF', 'GPEN256', 'GPEN512'], + 'UpscaleAmount': [100, 100, 100, 100], 'UpscaleMin': 0, 'UpscaleMax': 100, 'UpscaleInc': 5, @@ -16,7 +16,7 @@ 'DiffAmount': [4], 'DiffMin': 0, 'DiffMax': 100, - 'DiffInc': 1, + 'DiffInc': 2, 'DiffUnit': '%', 'DiffIcon': './rope/media/diff.png', 'DiffMessage': 'DIFFERENCER - Blends between Target Face and Swapped Face based on pixel difference. [LB: on/off, MW: difference threshold]', @@ -24,7 +24,7 @@ 'BorderState': False, 'BorderMode': 0, 'BorderModes': ['Brdr Top', 'Brdr Sides', 'Brdr Bttm', 'Brdr Blur'], - 'BorderAmount': [5, 5, 5, 10], + 'BorderAmount': [10, 10, 10, 10], 'BorderMin': 0, 'BorderMax': 64, 'BorderInc': 1, @@ -153,7 +153,16 @@ 'TransformIcon': './rope/media/construction.png', 'TransformMessage': 'SCALE - Adjust the scale of the face. Use with Background parser to blend into the image. [LB: on/off, MW: amount]', - + 'ColorState': False, + 'ColorMode': 0, + 'ColorModes': ['Red', 'Green', 'Blue'], + 'ColorAmount': [0, 0, 0], + 'ColorMin': -100, + 'ColorMax': 100, + 'ColorInc': 1, + 'ColorUnit': 'i', + 'ColorIcon': './rope/media/construction.png', + 'ColorMessage': 'SCALE - Adjust the scale of the face. Use with Background parser to blend into the image. [LB: on/off, MW: amount]', "CLIPText": '', diff --git a/rope/GUI.py b/rope/GUI.py index e25c55d9..21989edd 100644 --- a/rope/GUI.py +++ b/rope/GUI.py @@ -51,6 +51,7 @@ def __init__( self): self.stop_image = [] self.marker_icon = [] self.stop_marker_icon = [] + self.video_length = [] # self.window_y = [] # self.window_width = [] @@ -274,7 +275,7 @@ def __init__( self): self.options_frame_canvas1.grid( row = 1, column = 0, sticky='NEWS', pady = 0 ) # Label Frame 1 - self.label_frame1 = tk.LabelFrame( self.options_frame_canvas1, self.frame_style, height = 71, width = 1200 ) + self.label_frame1 = tk.LabelFrame( self.options_frame_canvas1, self.frame_style, height = 71, width = 1400 ) self.label_frame1.place(x=0, y=0) column=8 @@ -308,7 +309,10 @@ def __init__( self): column=column+125+x_space self.create_ui_button('RefDel', self.label_frame1, column, 8) - self.create_ui_button('Transform', self.label_frame1, column, 37) + self.create_ui_button('Transform', self.label_frame1, column, 37) + + column=column+125+x_space + self.create_ui_button('Color', self.label_frame1, column, 8) ######## Target Faces # Frame @@ -445,28 +449,26 @@ def key_event(self, event): # print(event.char, event.keysym, event.keycode) if self.focus_get() != self.CLIP_name and self.focus_get() != self.me_name and self.actions['ImgVidMode'] == 0: + frame = self.video_slider.get() if event.char == ' ': self.toggle_play_video() elif event.char == 'w': - frame = self.video_slider.get()+1 - self.video_slider.set(frame) - self.add_action("get_requested_video_frame", frame) - self.parameter_update_from_marker(frame) + frame += 1 elif event.char == 's': - frame = self.video_slider.get()-1 - self.video_slider.set(frame) - self.add_action("get_requested_video_frame", frame) - self.parameter_update_from_marker(frame) + frame -= 1 elif event.char == 'd': - frame = self.video_slider.get()+30 - self.video_slider.set(frame) - self.add_action("get_requested_video_frame", frame) - self.parameter_update_from_marker(frame) + frame += 30 elif event.char == 'a': - frame = self.video_slider.get()-30 - self.video_slider.set(frame) - self.add_action("get_requested_video_frame", frame) - self.parameter_update_from_marker(frame) + frame -= 30 + + if frame > self.video_length: + frame = self.video_length + elif frame < 0: + frame = 0 + + self.video_slider.set(frame) + self.add_action("get_requested_video_frame", frame) + self.parameter_update_from_marker(frame) def initialize_gui( self ): @@ -613,6 +615,8 @@ def __init__(self): self.update_ui_button('Orientation') self.update_ui_button('RefDel') self.update_ui_button('Transform') + self.update_ui_button('Color') + # self.change_video_quality(event) self.change_threads_amount(event) @@ -717,61 +721,61 @@ def load_source_faces(self): pass directory = self.json_dict["source faces"] - - if directory == None: - print("No Sourrce Face directory assigned") - - else: - filenames = os.listdir(directory) - - faces = [] - + filenames = [os.path.join(dirpath,f) for (dirpath, dirnames, filenames) in os.walk(directory) for f in filenames] + faces = [] + for file in filenames: # Does not include full path # Find all faces and ad to faces[] - for name in filenames: #should check if is an image - try: - temp_file = os.path.join(directory, name) - temp_file = cv2.imread(temp_file) - img = torch.from_numpy(temp_file).to('cuda') - img = img.permute(2,0,1) - kpss = self.detect(img, input_size = (640, 640), max_num=1, metric='default') - ret = [] - for i in range(kpss.shape[0]): - if kpss is not None: - face_kps = kpss[i] - - face_emb, img_out = self.recognize(img, face_kps) - ret.append([face_kps, face_emb, img_out]) - - if ret: - crop = cv2.cvtColor(ret[0][2].cpu().numpy(), cv2.COLOR_BGR2RGB) - crop = cv2.resize( crop, (82, 82)) - faces.append([crop, ret[0][1]]) - - except: - print('Bad file', name) + # Guess File type based on extension + try: + file_type = mimetypes.guess_type(file)[0][:5] + except: + print('Unrecognized file type:', file) + else: + # Its an image + if file_type == 'image': + try: + img = cv2.imread(file) + except: + print('Bad file', file) + else: + img = torch.from_numpy(img).to('cuda') + img = img.permute(2,0,1) + kpss = self.detect(img, input_size = (640, 640), max_num=1, metric='default') + ret = [] + for i in range(kpss.shape[0]): + if kpss is not None: + face_kps = kpss[i] + + face_emb, img_out = self.recognize(img, face_kps) + ret.append([face_kps, face_emb, img_out]) + + if ret: + crop = cv2.cvtColor(ret[0][2].cpu().numpy(), cv2.COLOR_BGR2RGB) + crop = cv2.resize( crop, (82, 82)) + faces.append([crop, ret[0][1]]) + + shift_i_len = len(self.source_faces) + + # Add faces[] images to buttons + for i in range(len(faces)): + new_source_face = self.source_face.copy() + self.source_faces.append(new_source_face) - shift_i_len = len(self.source_faces) + shift_i = i+ shift_i_len + + self.source_faces[shift_i]["Image"] = ImageTk.PhotoImage(image=Image.fromarray(faces[i][0])) + self.source_faces[shift_i]["Embedding"] = faces[i][1] + self.source_faces[shift_i]["TKButton"] = tk.Button(self.source_faces_canvas, self.inactive_button_style, image= self.source_faces[shift_i]["Image"], height = 86, width = 86) + self.source_faces[shift_i]["ButtonState"] = False - # Add faces[] images to buttons - for i in range(len(faces)): - new_source_face = self.source_face.copy() - self.source_faces.append(new_source_face) - - shift_i = i+ shift_i_len + self.source_faces[shift_i]["TKButton"].bind("", lambda event, arg=shift_i: self.toggle_source_faces_buttons_state(event, arg)) + self.source_faces[shift_i]["TKButton"].bind("", lambda event, arg=shift_i: self.toggle_source_faces_buttons_state_shift(event, arg)) + self.source_faces[shift_i]["TKButton"].bind("", self.source_faces_mouse_wheel) - self.source_faces[shift_i]["Image"] = ImageTk.PhotoImage(image=Image.fromarray(faces[i][0])) - self.source_faces[shift_i]["Embedding"] = faces[i][1] - self.source_faces[shift_i]["TKButton"] = tk.Button(self.source_faces_canvas, self.inactive_button_style, image= self.source_faces[shift_i]["Image"], height = 86, width = 86) - self.source_faces[shift_i]["ButtonState"] = False - - self.source_faces[shift_i]["TKButton"].bind("", lambda event, arg=shift_i: self.toggle_source_faces_buttons_state(event, arg)) - self.source_faces[shift_i]["TKButton"].bind("", lambda event, arg=shift_i: self.toggle_source_faces_buttons_state_shift(event, arg)) - self.source_faces[shift_i]["TKButton"].bind("", self.source_faces_mouse_wheel) - - self.source_faces_canvas.create_window(((shift_i_len//4)+i+1)*92,8, window = self.source_faces[shift_i]["TKButton"],anchor='nw') + self.source_faces_canvas.create_window(((shift_i_len//4)+i+1)*92,8, window = self.source_faces[shift_i]["TKButton"],anchor='nw') - self.source_faces_canvas.configure(scrollregion = self.source_faces_canvas.bbox("all")) - self.source_faces_canvas.xview_moveto(0) + self.source_faces_canvas.configure(scrollregion = self.source_faces_canvas.bbox("all")) + self.source_faces_canvas.xview_moveto(0) def find_faces(self, scope): try: @@ -935,55 +939,32 @@ def toggle_source_faces_buttons_state_shift(self, event, button): self.add_action("target_faces", self.target_faces, True, False) def populate_target_videos(self): + # Recursively read all media files from directory directory = self.json_dict["source videos"] - filenames = os.listdir(directory) + filenames = [os.path.join(dirpath,f) for (dirpath, dirnames, filenames) in os.walk(directory) for f in filenames] videos = [] - images = [] + images = [] self.target_media = [] self.target_media_buttons = [] self.target_media_canvas.delete("all") - for name in filenames: # Does not include full path + for file in filenames: # Does not include full path # Guess File type based on extension - file_type = mimetypes.guess_type(name)[0][:5] - - # Load as a object - file_path = os.path.join(directory, name) - - # Its an image - if file_type == 'image': - image = cv2.imread(file_path) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - ratio = float(image.shape[0]) / image.shape[1] - - if ratio>1: - new_height = 82 - new_width = int(new_height / ratio) - else: - new_width = 82 - new_height = int(new_width * ratio) - - det_scale = float(new_height) / image.shape[0] - image = cv2.resize(image, (new_width, new_height)) - - det_img = np.zeros( (82, 82, 3), dtype=np.uint8 ) - image[:new_height, :new_width, :] = image - images.append([image, file_path]) - - # Its a video - elif file_type == 'video': - video = cv2.VideoCapture(file_path) - - if video.isOpened(): - # Grab a frame from the middle for a thumbnail - video.set(cv2.CAP_PROP_POS_FRAMES, int(video.get(cv2.CAP_PROP_FRAME_COUNT)/2)) - success, video_frame = video.read() - - if success: - video_frame = cv2.cvtColor(video_frame, cv2.COLOR_BGR2RGB) - - ratio = float(video_frame.shape[0]) / video_frame.shape[1] + try: + file_type = mimetypes.guess_type(file)[0][:5] + except: + print('Unrecognized file type:', file) + else: + # Its an image + if file_type == 'image': + try: + image = cv2.imread(file) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + except: + print('Trouble reading file:', file) + else: + ratio = float(image.shape[0]) / image.shape[1] if ratio>1: new_height = 82 @@ -992,24 +973,53 @@ def populate_target_videos(self): new_width = 82 new_height = int(new_width * ratio) - det_scale = float(new_height) / video_frame.shape[0] - video_frame = cv2.resize(video_frame, (new_width, new_height)) + det_scale = float(new_height) / image.shape[0] + image = cv2.resize(image, (new_width, new_height)) det_img = np.zeros( (82, 82, 3), dtype=np.uint8 ) - video_frame[:new_height, :new_width, :] = video_frame - - videos.append([video_frame, file_path]) - video.release() - + image[:new_height, :new_width, :] = image + images.append([image, file]) + + # Its a video + elif file_type == 'video': + try: + video = cv2.VideoCapture(file) + except: + print('Trouble reading file:', file) else: - print("Couldn't read:", file_path) + if video.isOpened(): + + # Grab a frame from the middle for a thumbnail + video.set(cv2.CAP_PROP_POS_FRAMES, int(video.get(cv2.CAP_PROP_FRAME_COUNT)/2)) + success, video_frame = video.read() + + if success: + video_frame = cv2.cvtColor(video_frame, cv2.COLOR_BGR2RGB) + + ratio = float(video_frame.shape[0]) / video_frame.shape[1] + + if ratio>1: + new_height = 82 + new_width = int(new_height / ratio) + else: + new_width = 82 + new_height = int(new_width * ratio) + + det_scale = float(new_height) / video_frame.shape[0] + video_frame = cv2.resize(video_frame, (new_width, new_height)) + + det_img = np.zeros( (82, 82, 3), dtype=np.uint8 ) + video_frame[:new_height, :new_width, :] = video_frame + + videos.append([video_frame, file]) + video.release() + + else: + print('Trouble reading file:', file) + else: + print('Trouble opening file:', file) - else: - print("Couldn't open:", file_path) - - # Don't know what it is - else: - print('Unrecognized file type:', file_path) + if self.actions['ImgVidMode'] == 1: for i in range(len(images)): @@ -1040,6 +1050,7 @@ def load_target(self, button, media_file, media_type): if media_type == 'Videos': self.video_slider.set(0) + self.video_length = [] self.add_action("load_target_video", media_file, False) @@ -1261,6 +1272,7 @@ def get_action_length(self): def set_video_slider_length(self, video_length): + self.video_length = video_length self.video_slider.configure(to=video_length) def set_slider_position(self, position): diff --git a/rope/VideoManager.py b/rope/VideoManager.py index 6bd5206a..911f6cef 100644 --- a/rope/VideoManager.py +++ b/rope/VideoManager.py @@ -49,6 +49,8 @@ def __init__( self ): self.face_parsing_model = [] self.face_parsing_tensor = [] self.codeformer_model = [] + self.GPEN_256_model = [] + self.GPEN_256_model = [] self.FFHQ_kps = np.array([[ 192.98138, 239.94708 ], [ 318.90277, 240.1936 ], [ 256.63416, 314.01935 ], [ 201.26117, 371.41043 ], [ 313.08905, 371.15118 ] ]) @@ -517,7 +519,23 @@ def swap_video(self, target_image, frame_number, change_parameters): # Load frame into VRAM img = torch.from_numpy(target_image).to('cuda') #HxWxc - img = img.permute(2,0,1)#cxHxW + img = img.permute(2,0,1)#cxHxW + + #Scale up frame if it is smaller than 512 + img_x = img.size()[2] + img_y = img.size()[1] + + if img_x<512 and img_y<512: + if img_x <= img_y: + tscale = v2.Resize((512, 512)) + + elif img_x<512: + tscale = v2.Resize((int(512*img_y/img_x), 512)) + img = tscale(img) + + elif img_y<512: + tscale = v2.Resize((512, int(512*img_x/img_y))) + img = tscale(img) # Rotate the frame if parameters['OrientationState']: @@ -565,7 +583,15 @@ def swap_video(self, target_image, frame_number, change_parameters): img = img.permute(1,2,0) if self.perf_test: - print('------------------------') + print('------------------------') + + # Unscale small videos + if img_x <512 or img_y < 512: + tscale = v2.Resize((img_y, img_x)) + img = img.permute(2,0,1) + img = tscale(img) + img = img.permute(1,2,0) + img = img.cpu().numpy() return img.astype(np.uint8) @@ -612,10 +638,11 @@ def swap_core(self, img, kps, s_e, parameters, frame): # img = RGB # Grab 512 face from image and create 256 and 128 copys original_face_512 = v2.functional.affine(img, tform.rotation*57.2958, (tform.translation[0], tform.translation[1]) , tform.scale, 0, center = (0,0), interpolation=v2.InterpolationMode.BILINEAR ) + original_face_512 = v2.functional.crop(original_face_512, 0,0, 512, 512)# 3, 512, 512 original_face_256 = t256(original_face_512) original_face_128 = t128(original_face_256) - + # Optional Scaling # change the thransform matrix if parameters['TransformState']: original_face_128 = v2.functional.affine(original_face_128, 0, (0,0) , 1+parameters['TransformAmount'][0]/100, 0, center = (63,63), interpolation=v2.InterpolationMode.BILINEAR) @@ -650,7 +677,7 @@ def swap_core(self, img, kps, s_e, parameters, frame): # img = RGB io_binding.bind_output(name=self.output_names[0], device_type='cuda', device_id=0, element_type=np.float32, shape=tuple(swap.shape), buffer_ptr=swap.data_ptr()) # Sync and run model - cpu_syncvec = self.syncvec.cpu() + syncvec = self.syncvec.cpu() self.swapper_model.run_with_iobinding(io_binding) if parameters['StrengthState']: @@ -697,8 +724,18 @@ def swap_core(self, img, kps, s_e, parameters, frame): # img = RGB # GFPGAN if parameters["UpscaleState"] and parameters['UpscaleMode']==0: - swap = self.func_w_test('GFPGAN_onnx', self.apply_GFPGAN, swap, parameters) + swap = self.func_w_test('GFPGAN', self.apply_GFPGAN, swap, parameters) + + # GPEN_256 + if parameters["UpscaleState"] and parameters['UpscaleMode']==2: + GPEN_resize = t256(swap) + swap = self.func_w_test('GPEN_256', self.apply_GPEN_256, swap, parameters) + swap = t512(swap) + # GPEN_512 + if parameters["UpscaleState"] and parameters['UpscaleMode']==3: + swap = self.func_w_test('GPEN_512', self.apply_GPEN_512, swap, parameters) + # Occluder if parameters["OccluderState"]: mask = self.func_w_test('occluder', self.apply_occlusion , original_face_256, parameters["OccluderAmount"][0]) @@ -733,6 +770,14 @@ def swap_core(self, img, kps, s_e, parameters, frame): # img = RGB gauss = transforms.GaussianBlur(parameters['BlurAmount'][0]*2+1, (parameters['BlurAmount'][0]+1)*0.2) swap_mask = gauss(swap_mask) + # Apply color corerctions + if parameters['ColorState']: + swap = swap.permute(1, 2, 0).type(torch.float32) + del_color = torch.tensor([parameters['ColorAmount'][0], parameters['ColorAmount'][1], parameters['ColorAmount'][2]], device=device) + swap += del_color + swap = torch.clamp(swap, min=0., max=255.) + swap = swap.permute(2, 0, 1).type(torch.uint8) + # Combine border and swap mask, scale, and apply to swap swap_mask = torch.mul(swap_mask, border_mask) swap_mask = t512(swap_mask) @@ -816,8 +861,7 @@ def apply_occlusion(self, img, amount): io_binding.bind_output(name='output', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,1,256,256), buffer_ptr=outpred.data_ptr()) # Sync and run model - syncvec = torch.empty((1,1), dtype=torch.float32, device=device) - syncvec = syncvec.cpu() + syncvec = self.syncvec.cpu() self.occluder_model.run_with_iobinding(io_binding) outpred = torch.squeeze(outpred) @@ -982,6 +1026,110 @@ def apply_bg_face_parser(self, img, FaceParserAmount): return outpred # @profile + def apply_GPEN_256(self, swapped_face_upscaled, parameters): + # Set up Transformation + dst = self.arcface_dst * 4.0 + dst[:,0] += 32.0 + tform = trans.SimilarityTransform() + + t512 = v2.Resize((512, 512), antialias=True) + t256 = v2.Resize((256, 256), antialias=True) + + # # Select detection approach + # if parameters['TestState']: + # try: + # dst = self.ret50_landmarks(swapped_face_upscaled) + # except: + # return swapped_face_upscaled + + tform.estimate(dst, self.FFHQ_kps) + + # Transform, scale, and normalize + temp = v2.functional.affine(swapped_face_upscaled, tform.rotation*57.2958, (tform.translation[0], tform.translation[1]) , tform.scale, 0, center = (0,0) ) + temp = v2.functional.crop(temp, 0,0, 512, 512) + temp = torch.div(temp, 255) + temp = v2.functional.normalize(temp, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=False) + temp = t256(temp) + temp = torch.unsqueeze(temp, 0) + + # Bindings + outpred = torch.empty((1,3,256,256), dtype=torch.float32, device=device).contiguous() + io_binding = self.GPEN_256_model.io_binding() + io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,256,256), buffer_ptr=temp.data_ptr()) + io_binding.bind_output(name='output', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,256,256), buffer_ptr=outpred.data_ptr()) + + # Sync and run model + syncvec = self.syncvec.cpu() + self.GPEN_256_model.run_with_iobinding(io_binding) + + # Format back to cxHxW @ 255 + outpred = torch.squeeze(outpred) + outpred = torch.clamp(outpred, -1, 1) + outpred = torch.add(outpred, 1) + outpred = torch.div(outpred, 2) + outpred = torch.mul(outpred, 255) + outpred = t512(outpred) + + # Invert Transform + outpred = v2.functional.affine(outpred, tform.inverse.rotation*57.2958, (tform.inverse.translation[0], tform.inverse.translation[1]) , tform. + inverse.scale, 0, interpolation=v2.InterpolationMode.BILINEAR, center = (0,0) ) + + # Blend + alpha = float(parameters["UpscaleAmount"][2])/100.0 + outpred = torch.add(torch.mul(outpred, alpha), torch.mul(swapped_face_upscaled, 1-alpha)) + + return outpred + + + def apply_GPEN_512(self, swapped_face_upscaled, parameters): + # Set up Transformation + dst = self.arcface_dst * 4.0 + dst[:,0] += 32.0 + tform = trans.SimilarityTransform() + + # # Select detection approach + # if parameters['TestState']: + # try: + # dst = self.ret50_landmarks(swapped_face_upscaled) + # except: + # return swapped_face_upscaled + + tform.estimate(dst, self.FFHQ_kps) + + # Transform, scale, and normalize + temp = v2.functional.affine(swapped_face_upscaled, tform.rotation*57.2958, (tform.translation[0], tform.translation[1]) , tform.scale, 0, center = (0,0) ) + temp = v2.functional.crop(temp, 0,0, 512, 512) + temp = torch.div(temp, 255) + temp = v2.functional.normalize(temp, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=False) + temp = torch.unsqueeze(temp, 0) + + # Bindings + outpred = torch.empty((1,3,512,512), dtype=torch.float32, device=device).contiguous() + io_binding = self.GPEN_512_model.io_binding() + io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=temp.data_ptr()) + io_binding.bind_output(name='output', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=outpred.data_ptr()) + + # Sync and run model + syncvec = self.syncvec.cpu() + self.GPEN_512_model.run_with_iobinding(io_binding) + + # Format back to cxHxW @ 255 + outpred = torch.squeeze(outpred) + outpred = torch.clamp(outpred, -1, 1) + outpred = torch.add(outpred, 1) + outpred = torch.div(outpred, 2) + outpred = torch.mul(outpred, 255) + + # Invert Transform + outpred = v2.functional.affine(outpred, tform.inverse.rotation*57.2958, (tform.inverse.translation[0], tform.inverse.translation[1]) , tform. + inverse.scale, 0, interpolation=v2.InterpolationMode.BILINEAR, center = (0,0) ) + + # Blend + alpha = float(parameters["UpscaleAmount"][3])/100.0 + outpred = torch.add(torch.mul(outpred, alpha), torch.mul(swapped_face_upscaled, 1-alpha)) + + return outpred + def apply_GFPGAN(self, swapped_face_upscaled, parameters): # Set up Transformation dst = self.arcface_dst * 4.0 @@ -1002,7 +1150,7 @@ def apply_GFPGAN(self, swapped_face_upscaled, parameters): temp = v2.functional.crop(temp, 0,0, 512, 512) temp = torch.div(temp, 255) temp = v2.functional.normalize(temp, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=False) - temp = torch.reshape(temp, (1, 3, 512, 512))#############change to unsqueeze + temp = torch.unsqueeze(temp, 0) # Bindings outpred = torch.empty((1,3,512,512), dtype=torch.float32, device=device).contiguous() @@ -1011,8 +1159,7 @@ def apply_GFPGAN(self, swapped_face_upscaled, parameters): io_binding.bind_output(name='output', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=outpred.data_ptr()) # Sync and run model - syncvec = torch.empty((1,1), dtype=torch.float32, device=device) - syncvec = syncvec.cpu() + syncvec = self.syncvec.cpu() self.GFPGAN_model.run_with_iobinding(io_binding) # Format back to cxHxW @ 255 @@ -1039,10 +1186,14 @@ def apply_fake_diff(self, swapped_face, original_face, DiffAmount): diff = swapped_face-original_face diff = torch.abs(diff) - fthresh = DiffAmount/2.0 + # Find the diffrence between the swap and original, per channel + fthresh = DiffAmount*2.55 + + # Bimodal diff[diff=fthresh] = 1 + # If any of the channels exceeded the threshhold, them add them to the mask diff = torch.sum(diff, dim=2) diff = torch.unsqueeze(diff, 2) diff[diff>0] = 1 @@ -1082,8 +1233,7 @@ def apply_codeformer(self, swapped_face_upscaled, parameters): io_binding.bind_output(name='y', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=outpred.data_ptr()) # Sync and run model - syncvec = torch.empty((1,1), dtype=torch.float32, device=device) - syncvec = syncvec.cpu() + syncvec = self.syncvec.cpu() self.codeformer_model.run_with_iobinding(io_binding) # Format back to cxHxW @ 255 @@ -1212,7 +1362,7 @@ def detect(self, img, input_size, max_num=0, metric='default'): io_binding.bind_output(output_names[i], 'cuda') # Sync and run model - cpu_syncvec = self.syncvec.cpu() + syncvec = self.syncvec.cpu() self.detection_model.run_with_iobinding(io_binding) net_outs = io_binding.copy_outputs_to_cpu() @@ -1368,7 +1518,7 @@ def recognize(self, img, face_kps): io_binding.bind_output(output_names[i], 'cuda') # Sync and run model - cpu_syncvec = self.syncvec.cpu() + syncvec = self.syncvec.cpu() self.recognition_model.run_with_iobinding(io_binding) # Return embedding @@ -1376,8 +1526,6 @@ def recognize(self, img, face_kps): - # # test out - # swap = swap.permute(1, 2, 0) - # swapped_face = swap.cpu().numpy() - # cv2.imwrite('2.jpg', swapped_face) - # # test out \ No newline at end of file + # test = swap.permute(1, 2, 0) + # test = test.cpu().numpy() + # cv2.imwrite('2.jpg', test) \ No newline at end of file