-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathapp.py
160 lines (82 loc) · 4.05 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# Gradio app
import gradio as gr
import os
import argparse
from src.main.inference import Inference
MODEL_TYPE = ['lstm','attn_lstm']
MODEL_NAME = { 'lstm':('./checkpoints/generator/benchmark/pure_lstmgen_l1.pth','lstm'),
'lstm_syncnet' : ('./checkpoints/generator/benchmark/pretrain_lstmgen_l1.pth','lstm'),
'attn_lstm_syncnet': ('./checkpoints/generator/benchmark/attn_generator_020_l1_1e_2.pth','attn_lstm')}
print(MODEL_NAME.keys())
def func(video,audio,check,drop_down):
path , model_type = MODEL_NAME[drop_down]
print(path)
print(model_type)
parser = argparse.ArgumentParser(description="File for running Inference")
parser.add_argument('--model_type', help='Type of generator model', default=model_type, type=str)
parser.add_argument('--generator_checkpoint', type=str ,default=path)
parser.add_argument('--image2image_checkpoint', type=str, default='./checkpoints/image2image/image2image.pth',required=False)
parser.add_argument('--input_face', type=str,default=video, required=False)
parser.add_argument('--input_audio', type=str, default=audio, required=False)
# parser.add_argument('--output_path', type=str, help="Path for saving the result", default='result.mp4', required=False)
parser.add_argument('--fps', type=float, default=25,required=False)
parser.add_argument('--fl_detector_batchsize', type=int , default = 2)
parser.add_argument('--generator_batchsize', type=int, default=2)
parser.add_argument('--output_name', type=str , default="results.mp4")
parser.add_argument('--only_fl', type=bool , default=False)
parser.add_argument('--vis_fl', type=bool, default=check)
parser.add_argument('--test_img2img', type=bool, help="Testing image2image module with no lip generation" , default=False)
args = parser.parse_args()
Inference(args=args).start()
return './results.mp4'
def gui():
with gr.Blocks() as video_tab:
with gr.Row():
with gr.Column():
video = gr.Video().style()
audio = gr.Audio(source="upload", type="filepath")
with gr.Column():
outputs = gr.PlayableVideo()
with gr.Row():
with gr.Column():
check_box = gr.Checkbox(value=False,label="Do you want to visualize reconstructed facial landmark??")
drop_down = gr.Dropdown(list(MODEL_NAME.keys()), label="Select Model")
with gr.Row():
with gr.Column():
inputs = [video,audio,check_box,drop_down]
gr.Button("Sync").click(
fn=func,
inputs=inputs,
outputs=outputs
)
with gr.Blocks() as image_tab:
with gr.Row():
with gr.Column():
video = gr.Image(type="filepath")
audio = gr.Audio(source="upload", type="filepath")
with gr.Column():
outputs = gr.PlayableVideo()
with gr.Row():
with gr.Column():
check_box = gr.Checkbox(value=False,label="Do you want to visualize reconstructed facial landmark??")
drop_down = gr.Dropdown(list(MODEL_NAME.keys()), label="Select Model")
with gr.Row():
with gr.Column():
inputs = [video,audio,check_box,drop_down]
gr.Button("Sync").click(
fn=func,
inputs=inputs,
outputs=outputs
)
with gr.Blocks() as main:
gr.Markdown(
"""
# Audio-Visual Lip Synthesis!
### Creator : Wish Suharitdamrong
Start typing below to see the output.
"""
)
gui = gr.TabbedInterface([video_tab,image_tab],['Using Video as input','Using Image as input'])
main.launch()
if __name__ == "__main__":
gui()