@@ -6024,19 +6024,12 @@ def fn0(i0, i1):
6024
6024
def fn1 (i0 , i1 ):
6025
6025
return torch .lerp (i1 , i0 , 70000 )
6026
6026
6027
- def compare (fn , inputs ):
6028
- compiled = torch ._dynamo .optimize ("inductor" )(fn )
6029
- expected = fn (* inputs )
6030
- actual = compiled (* inputs )
6031
- self .assertEqual (expected , actual )
6032
- self .assertEqual (expected .stride (), actual .stride ())
6033
-
6034
- compare (fn0 , [torch .rand (10 , 3 , 10 ), torch .rand (3 , 10 , 10 )])
6035
- compare (fn1 , [torch .rand (3 , 10 , 10 ), torch .rand (3 , 10 , 10 )])
6027
+ self .common (fn0 , [torch .rand (10 , 3 , 10 ), torch .rand (3 , 10 , 10 )])
6028
+ self .common (fn1 , [torch .rand (3 , 10 , 10 ), torch .rand (3 , 10 , 10 )])
6036
6029
6037
6030
def test_unspec_inputs (self ):
6038
6031
if self .device == "cpu" :
6039
- raise unittest .SkipTest ("segfault with CPU backend " )
6032
+ raise unittest .SkipTest ("Testing mixed devices " )
6040
6033
6041
6034
def fn (x , y ):
6042
6035
return x + y , x * y , x / y
@@ -6138,9 +6131,7 @@ def fn(x):
6138
6131
return attn .softmax (dim = - 1 )
6139
6132
6140
6133
x = torch .rand (128 , 32 , 63 )
6141
- res_ref = fn (x )
6142
- res = torch ._dynamo .optimize ("inductor" )(fn )(x )
6143
- self .assertEqual (res , res_ref )
6134
+ self .common (fn , (x ,))
6144
6135
6145
6136
def test_kwargs (self ):
6146
6137
if self .device == "cuda" :
@@ -6242,9 +6233,6 @@ def fn(a, b):
6242
6233
)
6243
6234
6244
6235
def test_index_dynamic_shapes (self ):
6245
- if self .device == "cuda" :
6246
- raise unittest .SkipTest ("index dynamic shapes only supports cpu" )
6247
-
6248
6236
# Repro from vision_maskrcnn
6249
6237
def fn (arg0_1 ):
6250
6238
unsqueeze = arg0_1 .unsqueeze (0 )
@@ -6255,7 +6243,7 @@ def fn(arg0_1):
6255
6243
start = 0 ,
6256
6244
step = 1 ,
6257
6245
dtype = torch .int64 ,
6258
- device = "cpu" ,
6246
+ device = arg0_1 . device ,
6259
6247
requires_grad = False ,
6260
6248
)
6261
6249
convert_element_type_1 = iota .to (torch .float32 )
@@ -6267,7 +6255,7 @@ def fn(arg0_1):
6267
6255
start = 0 ,
6268
6256
step = 1 ,
6269
6257
dtype = torch .int64 ,
6270
- device = "cpu" ,
6258
+ device = arg0_1 . device ,
6271
6259
requires_grad = False ,
6272
6260
)
6273
6261
convert_element_type_3 = iota_1 .to (torch .float32 )
@@ -6507,9 +6495,9 @@ def fn(a):
6507
6495
return a [out_features .index (in_feature )]
6508
6496
6509
6497
x = [
6510
- torch .rand ([1 , 256 , 100 , 152 ]),
6511
- torch .rand ([1 , 256 , 50 , 76 ]),
6512
- torch .rand ([1 , 256 , 25 , 38 ]),
6498
+ torch .rand ([1 , 256 , 100 , 152 ], device = self . device ),
6499
+ torch .rand ([1 , 256 , 50 , 76 ], device = self . device ),
6500
+ torch .rand ([1 , 256 , 25 , 38 ], device = self . device ),
6513
6501
]
6514
6502
opt_fn = torch ._dynamo .optimize ("inductor" )(fn )
6515
6503
same (fn (x ), opt_fn (x ))
@@ -6521,8 +6509,7 @@ def fn(a):
6521
6509
return y
6522
6510
6523
6511
x = torch .rand (48 , 3 , 512 , 512 )
6524
- opt_fn = torch ._dynamo .optimize ("inductor" )(fn )
6525
- same (fn (x ), opt_fn (x ))
6512
+ self .common (fn , (x ,))
6526
6513
6527
6514
@unittest .skipIf (not HAS_CPU , "requires C++ compiler" )
6528
6515
def test_data_type_propogation (self ):
@@ -6636,6 +6623,10 @@ def func(arg0_1):
6636
6623
elif node .target == "output" :
6637
6624
self .assertEqual (get_data_type (node ), torch .bfloat16 )
6638
6625
6626
+ # Calling div only torch.SymInt arguments is not yet supported.
6627
+ # To support this behavior, we need to allow const-propping tensors that store symint data.
6628
+ # For now, dynamo will explicitly graph break when it encounters user code with this behavior.
6629
+ @expectedFailureCodegenDynamic
6639
6630
def test_AllenaiLongformerBase_repro (self ):
6640
6631
def fn (query , scores , window_overlap ):
6641
6632
batch_size , seq_len , num_heads , _ = query .size ()
@@ -6661,12 +6652,12 @@ def fn(query, scores, window_overlap):
6661
6652
return input_tensor
6662
6653
6663
6654
args = [
6664
- ((4 , 1024 , 12 , 64 ), (768 , 3072 , 64 , 1 ), torch . float32 , "cpu" ),
6665
- ((48 , 3 , 512 , 513 ), (787968 , 262656 , 513 , 1 ), torch . float32 , "cpu" ),
6655
+ ((4 , 1024 , 12 , 64 ), (768 , 3072 , 64 , 1 )),
6656
+ ((48 , 3 , 512 , 513 ), (787968 , 262656 , 513 , 1 )),
6666
6657
]
6667
- args = [rand_strided (sh , st , dt , dev ) for (sh , st , dt , dev ) in args ]
6668
- opt_fn = torch . _dynamo . optimize ( "inductor" )( fn )
6669
- same (fn ( * args , 256 ), opt_fn ( * args , 256 ) )
6658
+ args = [rand_strided (sh , st ) for (sh , st ) in args ]
6659
+ args . append ( 256 )
6660
+ self . common (fn , args )
6670
6661
6671
6662
def test_cumsum_pattern_matcher_issue (self ):
6672
6663
def fn (input_ids ) -> torch .Tensor :
@@ -6675,25 +6666,23 @@ def fn(input_ids) -> torch.Tensor:
6675
6666
batch_size , seq_length = input_shape
6676
6667
past_key_values_length = 0
6677
6668
mask_seq_length = past_key_values_length + seq_length
6678
- attention_mask = torch .ones (batch_size , mask_seq_length )
6669
+ attention_mask = torch .ones (
6670
+ batch_size , mask_seq_length , device = input_ids .device
6671
+ )
6679
6672
attention_mask = attention_mask .long ()
6680
6673
return torch .cumsum (attention_mask , dim = 1 )
6681
6674
6682
- torch ._dynamo .reset ()
6683
6675
x = torch .randn (2 , 2 )
6684
- opt = torch ._dynamo .optimize ("inductor" )(fn )
6685
- res = opt (x )
6686
- ref = fn (x )
6687
- self .assertEqual (res , ref , atol = 0 , rtol = 0 )
6676
+ self .common (fn , (x ,), atol = 0 , rtol = 0 )
6688
6677
6678
+ # It's a view so it doens't generate a kernel
6679
+ @expectedFailureCodegenDynamic
6689
6680
def test_slice (self ):
6690
6681
def fn (a , b ):
6691
6682
return torch .ops .aten .slice .Tensor (a , 0 , 0 , - b )
6692
6683
6693
- torch ._dynamo .reset ()
6694
6684
x = torch .rand (48 , 3 , 512 , 512 )
6695
- opt_fn = torch ._dynamo .optimize ("inductor" )(fn )
6696
- same (fn (x , 2 ), opt_fn (x , 2 ))
6685
+ self .common (fn , (x , 2 ))
6697
6686
6698
6687
def test_inplace_resize_as (self ):
6699
6688
def fn (x , y ):
0 commit comments