Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ViG models [NeurIPS 2022] #1578

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Prev Previous commit
Next Next commit
fix test errors
  • Loading branch information
iamhankai committed Dec 6, 2022
commit 38a908bf542f7b5cffa884e0c27587f10eb27ea9
2 changes: 1 addition & 1 deletion tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def test_model_features_pretrained(model_name, batch_size):

EXCLUDE_JIT_FILTERS = [
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems these lines cannot be removed

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I managed to fix the other jit exceptions so would rather not add more, I feel it's likely it can be supported with appropriate type decl, etc

'*iabn*', 'tresnet*', # models using inplace abn unlikely to ever be scriptable
'dla*', 'hrnet*', 'ghostnet*', # hopefully fix at some point
'dla*', 'hrnet*', 'ghostnet*', 'pvig*', # hopefully fix at some point
'vit_large_*', 'vit_huge_*', 'vit_gi*',
]

Expand Down
6 changes: 4 additions & 2 deletions timm/models/layers/gnn_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,13 +247,15 @@ def __init__(self, in_channels, kernel_size=9, dilation=1, conv='mr', act_layer=
nn.BatchNorm2d(in_channels),
)
self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
self.relative_pos = None
if relative_pos:
relative_pos_tensor = get_2d_relative_pos_embed(in_channels,
int(n**0.5)).unsqueeze(0).unsqueeze(1)
relative_pos_tensor = F.interpolate(
relative_pos_tensor, size=(n, n//(r*r)), mode='bicubic', align_corners=False)
self.relative_pos = nn.Parameter(-relative_pos_tensor.squeeze(1), requires_grad=False)
# self.relative_pos = nn.Parameter(-relative_pos_tensor.squeeze(1))
self.register_buffer('relative_pos', -relative_pos_tensor.squeeze(1))
else:
self.relative_pos = None

def _get_relative_pos(self, relative_pos, H, W):
if relative_pos is None or H * W == self.n:
Expand Down
47 changes: 34 additions & 13 deletions timm/models/vision_gnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def _cfg(url='', **kwargs):
'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': None,
'crop_pct': .9, 'interpolation': 'bicubic',
'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD,
'first_conv': 'patch_embed.proj', 'classifier': 'head',
'first_conv': 'stem.convs.0', 'classifier': 'prediction.4',
'min_input_size': (3, 224, 224),
**kwargs
}
Expand Down Expand Up @@ -123,10 +123,11 @@ def __init__(self, opt, num_classes=1000, in_chans=3):
stochastic = opt.use_stochastic
conv = opt.conv
drop_path = opt.drop_path
channels = opt.channels
self.num_features = channels[-1] # num_features for consistency with other models

blocks = opt.blocks
self.n_blocks = sum(blocks)
channels = opt.channels
reduce_ratios = [4, 2, 1, 1]
dpr = [x.item() for x in torch.linspace(0, drop_path, self.n_blocks)] # stochastic depth decay
num_knn = [int(x.item()) for x in torch.linspace(k, k, self.n_blocks)] # number of knn's k
Expand All @@ -152,11 +153,14 @@ def __init__(self, opt, num_classes=1000, in_chans=3):
idx += 1
self.backbone = Seq(*self.backbone)

self.prediction = Seq(nn.Conv2d(channels[-1], 1024, 1, bias=True),
if num_classes > 0:
self.prediction = Seq(nn.Conv2d(self.num_features, 1024, 1, bias=True),
nn.BatchNorm2d(1024),
act_layer(),
nn.Dropout(opt.dropout),
nn.Conv2d(1024, num_classes, 1, bias=True))
else:
self.prediction = nn.Identity()
self.model_init()

def model_init(self):
Expand All @@ -174,13 +178,30 @@ def _get_pos_embed(self, pos_embed, H, W):
else:
return F.interpolate(pos_embed, size=(H, W), mode="bicubic")

def forward(self, inputs):
x = self.stem(inputs)
def reset_classifier(self, num_classes: int, global_pool=None):
self.num_classes = num_classes
if global_pool is not None:
assert global_pool in ('', 'avg', 'token')
self.global_pool = global_pool
if num_classes > 0:
self.prediction = Seq(nn.Conv2d(self.num_features, 1024, 1, bias=True),
nn.BatchNorm2d(1024),
act_layer(),
nn.Dropout(opt.dropout),
nn.Conv2d(1024, num_classes, 1, bias=True))
else:
self.prediction = nn.Identity()

def forward_features(self, x):
x = self.stem(x)
B, C, H, W = x.shape
x = x + self._get_pos_embed(self.pos_embed, H, W)
for i in range(len(self.backbone)):
x = self.backbone[i](x)
return x

def forward(self, x):
x = self.forward_features(x)
x = F.adaptive_avg_pool2d(x, 1)
return self.prediction(x).squeeze(-1).squeeze(-1)

Expand All @@ -200,7 +221,7 @@ def _create_pvig(variant, opt, pretrained=False, **kwargs):


@register_model
def pvig_ti_224_gelu(pretrained=False, **kwargs):
def pvig_ti_224_gelu(pretrained=False, num_classes=1000, **kwargs):
class OptInit:
def __init__(self, drop_path_rate=0.0, **kwargs):
self.k = 9 # neighbor num (default:9)
Expand All @@ -216,13 +237,13 @@ def __init__(self, drop_path_rate=0.0, **kwargs):
self.channels = [48, 96, 240, 384] # number of channels of deep features

opt = OptInit(**kwargs)
model = _create_pvig('pvig_ti_224_gelu', opt, pretrained)
model = _create_pvig('pvig_ti_224_gelu', opt, pretrained, num_classes=num_classes)
model.default_cfg = default_cfgs['pvig_ti_224_gelu']
return model


@register_model
def pvig_s_224_gelu(pretrained=False, **kwargs):
def pvig_s_224_gelu(pretrained=False, num_classes=1000, **kwargs):
class OptInit:
def __init__(self, drop_path_rate=0.0, **kwargs):
self.k = 9 # neighbor num (default:9)
Expand All @@ -238,13 +259,13 @@ def __init__(self, drop_path_rate=0.0, **kwargs):
self.channels = [80, 160, 400, 640] # number of channels of deep features

opt = OptInit(**kwargs)
model = _create_pvig('pvig_s_224_gelu', opt, pretrained)
model = _create_pvig('pvig_s_224_gelu', opt, pretrained, num_classes=num_classes)
model.default_cfg = default_cfgs['pvig_s_224_gelu']
return model


@register_model
def pvig_m_224_gelu(pretrained=False, **kwargs):
def pvig_m_224_gelu(pretrained=False, num_classes=1000, **kwargs):
class OptInit:
def __init__(self, drop_path_rate=0.0, **kwargs):
self.k = 9 # neighbor num (default:9)
Expand All @@ -260,13 +281,13 @@ def __init__(self, drop_path_rate=0.0, **kwargs):
self.channels = [96, 192, 384, 768] # number of channels of deep features

opt = OptInit(**kwargs)
model = _create_pvig('pvig_m_224_gelu', opt, pretrained)
model = _create_pvig('pvig_m_224_gelu', opt, pretrained, num_classes=num_classes)
model.default_cfg = default_cfgs['pvig_m_224_gelu']
return model


@register_model
def pvig_b_224_gelu(pretrained=False, **kwargs):
def pvig_b_224_gelu(pretrained=False, num_classes=1000, **kwargs):
class OptInit:
def __init__(self, drop_path_rate=0.0, **kwargs):
self.k = 9 # neighbor num (default:9)
Expand All @@ -282,6 +303,6 @@ def __init__(self, drop_path_rate=0.0, **kwargs):
self.channels = [128, 256, 512, 1024] # number of channels of deep features

opt = OptInit(**kwargs)
model = _create_pvig('pvig_b_224_gelu', opt, pretrained)
model = _create_pvig('pvig_b_224_gelu', opt, pretrained, num_classes=num_classes)
model.default_cfg = default_cfgs['pvig_b_224_gelu']
return model