lithuak
diff --git a/‎aten/src/ATen/native/mps/OperationUtils.h
+19 b/‎aten/src/ATen/native/mps/OperationUtils.h
+19
diff --git a/‎aten/src/ATen/native/mps/operations/Activation.mm
+718-1,147 b/‎aten/src/ATen/native/mps/operations/Activation.mm
+718-1,147
diff --git a/‎aten/src/ATen/native/mps/operations/BinaryOps.mm
+33-46 b/‎aten/src/ATen/native/mps/operations/BinaryOps.mm
+33-46
diff --git a/‎aten/src/ATen/native/mps/operations/Blas.mm
+66-93 b/‎aten/src/ATen/native/mps/operations/Blas.mm
+66-93
diff --git a/‎aten/src/ATen/native/mps/operations/ConstantOps.mm
+23-36 b/‎aten/src/ATen/native/mps/operations/ConstantOps.mm
+23-36
@@ -281,6 +281,25 @@ struct MPSGraphCache
 
 };
 
+// Common template for creating graph with a specified cache if missing
+template<typename T>
+inline T* LookUpOrCreateCachedGraph(const std::string& key, std::function<void(MPSGraph*, T*)> instantiate) {
+  auto cache_ = MPSGraphCache::getInstance();
+  if (auto rc  = cache_->LookUpAs<T>(key)) {
+    return rc;
+  }
+  return cache_->CreateCachedGraphAs<T>(key, ^mps::MPSCachedGraph*() {
+    T* newCachedGraph = nil;
+    @autoreleasepool {
+      // Initialize graph
+      auto mpsGraph = mps::make_mps_graph();
+      newCachedGraph = new T(mpsGraph);
+      instantiate(mpsGraph, newCachedGraph);
+    }
+    return newCachedGraph;
+  });
+}
+
 // Common math operations
 MPSGraphTensor* log1p(MPSGraph* mpsGraph, MPSGraphTensor* inputTensor);
 
 
@@ -106,55 +106,42 @@ void binaryOpTensor(const Tensor& self,
     }
   }
 
-  MPSGraphCache* cache_ = MPSGraphCache::getInstance();
   @autoreleasepool {
     string key = op_name + getTensorsStringKey({self, other, output_});
-    BinaryOpCachedGraph* cachedGraph = static_cast<BinaryOpCachedGraph*>(cache_->LookUp(key));
-
-    if (!cachedGraph) {
-      MPSCachedGraph* tmpCachedGraph = cache_->CreateCachedGraph(key, ^MPSCachedGraph*() {
-        BinaryOpCachedGraph* newCachedGraph = nil;
-        @autoreleasepool {
-          MPSGraph* mpsGraph = make_mps_graph();
-          newCachedGraph = new BinaryOpCachedGraph(mpsGraph);
-          newCachedGraph->primaryTensor =
-              mpsGraphRankedPlaceHolder(mpsGraph, getMPSScalarType(inputDataType), getMPSShape(self));
-          newCachedGraph->secondaryTensor =
-              mpsGraphRankedPlaceHolder(mpsGraph, getMPSScalarType(otherDataType), getMPSShape(other));
-
-          MPSGraphTensor* primaryCastTensor = newCachedGraph->primaryTensor;
-          MPSGraphTensor* secondaryCastTensor = newCachedGraph->secondaryTensor;
-
-          // this type inference is only required at the time of graph creation
-          ScalarType common_dtype = c10::promoteTypes(inputDataType, otherDataType);
-          if (isIntegralType(common_dtype, true)) {
-            // integer inputs must be cast to float, if output is float
-            if (isFloatingType(outputDataType)) {
-              common_dtype = outputDataType;
-              // in boolean comparison ops with signed vs. unsigned integers, we always cast to the unsigned type
-            } else if (outputDataType == ScalarType::Bool &&
-                       (inputDataType == ScalarType::Byte || otherDataType == ScalarType::Byte)) {
-              common_dtype = ScalarType::Byte;
-            }
-          }
-          if (inputDataType != common_dtype) {
-            primaryCastTensor = castMPSTensor(mpsGraph, newCachedGraph->primaryTensor, common_dtype);
-          }
-          if (otherDataType != common_dtype) {
-            secondaryCastTensor = castMPSTensor(mpsGraph, newCachedGraph->secondaryTensor, common_dtype);
-          }
-          newCachedGraph->outputTensor = binaryBlock(newCachedGraph, primaryCastTensor, secondaryCastTensor);
-          // Cast output tensor to an expected type if needed, which addresses discrepancy when int64 scalar is added to
-          // int32 tensor Output tensor should have been promoted but it remains an int32 tensor
-          if (outputDataType != common_dtype ||
-              [newCachedGraph->outputTensor dataType] != getMPSDataType(outputDataType)) {
-            newCachedGraph->outputTensor = castMPSTensor(mpsGraph, newCachedGraph->outputTensor, outputDataType);
-          }
+    auto cachedGraph = LookUpOrCreateCachedGraph<BinaryOpCachedGraph>(key, [&](auto mpsGraph, auto newCachedGraph) {
+      newCachedGraph->primaryTensor =
+          mpsGraphRankedPlaceHolder(mpsGraph, getMPSScalarType(inputDataType), getMPSShape(self));
+      newCachedGraph->secondaryTensor =
+          mpsGraphRankedPlaceHolder(mpsGraph, getMPSScalarType(otherDataType), getMPSShape(other));
+
+      MPSGraphTensor* primaryCastTensor = newCachedGraph->primaryTensor;
+      MPSGraphTensor* secondaryCastTensor = newCachedGraph->secondaryTensor;
+
+      // this type inference is only required at the time of graph creation
+      ScalarType common_dtype = c10::promoteTypes(inputDataType, otherDataType);
+      if (isIntegralType(common_dtype, true)) {
+        // integer inputs must be cast to float, if output is float
+        if (isFloatingType(outputDataType)) {
+          common_dtype = outputDataType;
+          // in boolean comparison ops with signed vs. unsigned integers, we always cast to the unsigned type
+        } else if (outputDataType == ScalarType::Bool &&
+                   (inputDataType == ScalarType::Byte || otherDataType == ScalarType::Byte)) {
+          common_dtype = ScalarType::Byte;
         }
-        return newCachedGraph;
-      });
-      cachedGraph = static_cast<BinaryOpCachedGraph*>(tmpCachedGraph);
-    }
+      }
+      if (inputDataType != common_dtype) {
+        primaryCastTensor = castMPSTensor(mpsGraph, newCachedGraph->primaryTensor, common_dtype);
+      }
+      if (otherDataType != common_dtype) {
+        secondaryCastTensor = castMPSTensor(mpsGraph, newCachedGraph->secondaryTensor, common_dtype);
+      }
+      newCachedGraph->outputTensor = binaryBlock(newCachedGraph, primaryCastTensor, secondaryCastTensor);
+      // Cast output tensor to an expected type if needed, which addresses discrepancy when int64 scalar is added to
+      // int32 tensor Output tensor should have been promoted but it remains an int32 tensor
+      if (outputDataType != common_dtype || [newCachedGraph->outputTensor dataType] != getMPSDataType(outputDataType)) {
+        newCachedGraph->outputTensor = castMPSTensor(mpsGraph, newCachedGraph->outputTensor, outputDataType);
+      }
+    });
 
     NSMutableDictionary* feeds = [[NSMutableDictionary new] autorelease];
     Placeholder selfPlaceholder;
 
@@ -24,57 +24,43 @@ Tensor dot_mps(const Tensor& self, const Tensor& other) {
   using CachedGraph = MPSBinaryCachedGraph;
   auto output = at::empty({}, self.scalar_type(), c10::nullopt, kMPS, c10::nullopt, c10::nullopt);
 
-  MPSGraphCache* cache_ = MPSGraphCache::getInstance();
-
   MPSStream* stream = at::mps::getCurrentMPSStream();
 
   @autoreleasepool {
     string key = "dot_mps" + getTensorsStringKey({self, other});
 
-    CachedGraph* cachedGraph = static_cast<CachedGraph*>(cache_->LookUp(key));
-    if (!cachedGraph) {
-      mps::MPSCachedGraph* tmpCachedGraph = cache_->CreateCachedGraph(key, ^mps::MPSCachedGraph*() {
-        CachedGraph* newCachedGraph = nil;
-
-        @autoreleasepool {
-          MPSGraph* mpsGraph = mps::make_mps_graph();
-          newCachedGraph = new CachedGraph(mpsGraph);
-
-          MPSGraphTensor* selfTensor = mps::mpsGraphRankedPlaceHolder(mpsGraph, self);
-          MPSGraphTensor* otherTensor = mps::mpsGraphRankedPlaceHolder(mpsGraph, other);
-
-          MPSGraphTensor* castSelf = nil;
-          MPSGraphTensor* castOther = nil;
-
-          if (self.scalar_type() == ScalarType::Short || self.scalar_type() == ScalarType::Byte ||
-              self.scalar_type() == ScalarType::Char) {
-            castSelf = [mpsGraph castTensor:selfTensor toType:MPSDataTypeInt32 name:@"castSelfTensor"];
-            castOther = [mpsGraph castTensor:otherTensor toType:MPSDataTypeInt32 name:@"castOtherTensor"];
-          } else {
-            castSelf = selfTensor;
-            castOther = otherTensor;
-          }
-
-          MPSGraphTensor* dot = [mpsGraph multiplicationWithPrimaryTensor:castSelf
-                                                          secondaryTensor:castOther
-                                                                     name:@"multiplication"];
-
-          MPSGraphTensor* dotProductTensor = [mpsGraph reductionSumWithTensor:dot axes:nil name:@"dotProduct"];
-
-          if (self.scalar_type() == ScalarType::Short || self.scalar_type() == ScalarType::Byte ||
-              self.scalar_type() == ScalarType::Char)
-            dotProductTensor = [mpsGraph castTensor:dotProductTensor
-                                             toType:getMPSDataType(self)
-                                               name:@"castDotProductTensor"];
-
-          newCachedGraph->inputTensor_ = selfTensor;
-          newCachedGraph->otherTensor_ = otherTensor;
-          newCachedGraph->outputTensor_ = dotProductTensor;
-        }
-        return newCachedGraph;
-      });
-      cachedGraph = static_cast<CachedGraph*>(tmpCachedGraph);
-    }
+    auto cachedGraph = LookUpOrCreateCachedGraph<CachedGraph>(key, [&](auto mpsGraph, auto newCachedGraph) {
+      MPSGraphTensor* selfTensor = mpsGraphRankedPlaceHolder(mpsGraph, self);
+      MPSGraphTensor* otherTensor = mpsGraphRankedPlaceHolder(mpsGraph, other);
+
+      MPSGraphTensor* castSelf = nil;
+      MPSGraphTensor* castOther = nil;
+
+      if (self.scalar_type() == ScalarType::Short || self.scalar_type() == ScalarType::Byte ||
+          self.scalar_type() == ScalarType::Char) {
+        castSelf = [mpsGraph castTensor:selfTensor toType:MPSDataTypeInt32 name:@"castSelfTensor"];
+        castOther = [mpsGraph castTensor:otherTensor toType:MPSDataTypeInt32 name:@"castOtherTensor"];
+      } else {
+        castSelf = selfTensor;
+        castOther = otherTensor;
+      }
+
+      MPSGraphTensor* dot = [mpsGraph multiplicationWithPrimaryTensor:castSelf
+                                                      secondaryTensor:castOther
+                                                                 name:@"multiplication"];
+
+      MPSGraphTensor* dotProductTensor = [mpsGraph reductionSumWithTensor:dot axes:nil name:@"dotProduct"];
+
+      if (self.scalar_type() == ScalarType::Short || self.scalar_type() == ScalarType::Byte ||
+          self.scalar_type() == ScalarType::Char)
+        dotProductTensor = [mpsGraph castTensor:dotProductTensor
+                                         toType:getMPSDataType(self)
+                                           name:@"castDotProductTensor"];
+
+      newCachedGraph->inputTensor_ = selfTensor;
+      newCachedGraph->otherTensor_ = otherTensor;
+      newCachedGraph->outputTensor_ = dotProductTensor;
+    });
 
     Placeholder selfPlaceholder = Placeholder(cachedGraph->inputTensor_, self);
     Placeholder otherPlaceholder = Placeholder(cachedGraph->otherTensor_, other);
@@ -110,64 +96,51 @@ Tensor dot_mps(const Tensor& self, const Tensor& other) {
   c10::MaybeOwned<Tensor> self_ = expand_size(self, {mat.size(0)});
   auto betaval = beta_.toComplexDouble();
 
-  struct CachedGraph : public mps::MPSCachedGraph {
+  struct CachedGraph : public MPSCachedGraph {
     CachedGraph(MPSGraph* graph) : MPSCachedGraph(graph) {}
     MPSGraphTensor* selfTensor_ = nil;
     MPSGraphTensor* matMulVecTensor_ = nil;
     MPSGraphTensor* outputTensor_ = nil;
   };
-  mps::MPSGraphCache* cache_ = mps::MPSGraphCache::getInstance();
 
   MPSStream* stream = at::mps::getCurrentMPSStream();
   Tensor matMulVec = at::mm(mat, vec.unsqueeze(1)).squeeze(1);
 
   @autoreleasepool {
     string key = "addmv_out_mps_impl" + getTensorsStringKey({self, matMulVec}) + ":" + to_string(beta_.toDouble()) +
         ":" + to_string(alpha_.toDouble());
-    CachedGraph* cachedGraph = nil;
-    if (!cachedGraph) {
-      mps::MPSCachedGraph* tmpCachedGraph = cache_->CreateCachedGraph(key, ^mps::MPSCachedGraph*() {
-        CachedGraph* newCachedGraph = nil;
-
-        @autoreleasepool {
-          MPSGraph* mpsGraph = mps::make_mps_graph();
-          newCachedGraph = new CachedGraph(mpsGraph);
-
-          MPSGraphTensor* matMulVecTensor = mps::mpsGraphRankedPlaceHolder(mpsGraph, matMulVec);
-          MPSGraphTensor* selfTensor = mps::mpsGraphRankedPlaceHolder(mpsGraph, self);
-
-          // Intermediates for beta and alpha
-          MPSGraphTensor* alphaTensor = [mpsGraph constantWithScalar:alpha_.toDouble()
-                                                            dataType:getMPSScalarType(mat.scalar_type())];
-
-          // Intermediates for multiplying by beta and alpha
-          MPSGraphTensor* productTimesAlphaTensor = [mpsGraph multiplicationWithPrimaryTensor:matMulVecTensor
-                                                                              secondaryTensor:alphaTensor
-                                                                                         name:@"MM/alpha*(mat@vec)"];
-          newCachedGraph->outputTensor_ = productTimesAlphaTensor;
-
-          if (betaval != 0.0) {
-            MPSGraphTensor* betaTensor = [mpsGraph constantWithScalar:beta_.toDouble()
-                                                             dataType:getMPSScalarType(self.scalar_type())];
-
-            MPSGraphTensor* selfTimesBetaTensor = [mpsGraph multiplicationWithPrimaryTensor:selfTensor
-                                                                            secondaryTensor:betaTensor
-                                                                                       name:@"MM/beta*input"];
-
-            MPSGraphTensor* outputTensor = [mpsGraph additionWithPrimaryTensor:productTimesAlphaTensor
-                                                               secondaryTensor:selfTimesBetaTensor
-                                                                          name:@"MM/beta*input + alpha*(mat@vec)"];
-
-            newCachedGraph->outputTensor_ = outputTensor;
-          }
-
-          newCachedGraph->selfTensor_ = selfTensor;
-          newCachedGraph->matMulVecTensor_ = matMulVecTensor;
-        }
-        return newCachedGraph;
-      });
-      cachedGraph = static_cast<CachedGraph*>(tmpCachedGraph);
-    }
+    auto cachedGraph = LookUpOrCreateCachedGraph<CachedGraph>(key, [&](auto mpsGraph, auto newCachedGraph) {
+      MPSGraphTensor* matMulVecTensor = mpsGraphRankedPlaceHolder(mpsGraph, matMulVec);
+      MPSGraphTensor* selfTensor = mpsGraphRankedPlaceHolder(mpsGraph, self);
+
+      // Intermediates for beta and alpha
+      MPSGraphTensor* alphaTensor = [mpsGraph constantWithScalar:alpha_.toDouble()
+                                                        dataType:getMPSScalarType(mat.scalar_type())];
+
+      // Intermediates for multiplying by beta and alpha
+      MPSGraphTensor* productTimesAlphaTensor = [mpsGraph multiplicationWithPrimaryTensor:matMulVecTensor
+                                                                          secondaryTensor:alphaTensor
+                                                                                     name:@"MM/alpha*(mat@vec)"];
+      newCachedGraph->outputTensor_ = productTimesAlphaTensor;
+
+      if (betaval != 0.0) {
+        MPSGraphTensor* betaTensor = [mpsGraph constantWithScalar:beta_.toDouble()
+                                                         dataType:getMPSScalarType(self.scalar_type())];
+
+        MPSGraphTensor* selfTimesBetaTensor = [mpsGraph multiplicationWithPrimaryTensor:selfTensor
+                                                                        secondaryTensor:betaTensor
+                                                                                   name:@"MM/beta*input"];
+
+        MPSGraphTensor* outputTensor = [mpsGraph additionWithPrimaryTensor:productTimesAlphaTensor
+                                                           secondaryTensor:selfTimesBetaTensor
+                                                                      name:@"MM/beta*input + alpha*(mat@vec)"];
+
+        newCachedGraph->outputTensor_ = outputTensor;
+      }
+
+      newCachedGraph->selfTensor_ = selfTensor;
+      newCachedGraph->matMulVecTensor_ = matMulVecTensor;
+    });
 
     Placeholder matMulVecPlaceholder = Placeholder(cachedGraph->matMulVecTensor_, matMulVec);
     Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor_, result);
@@ -182,7 +155,7 @@ Tensor dot_mps(const Tensor& self, const Tensor& other) {
     NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results =
         @{outputPlaceholder.getMPSGraphTensor() : outputPlaceholder.getMPSGraphTensorData()};
 
-    mps::runMPSGraph(stream, cachedGraph->graph(), feeds, results);
+    runMPSGraph(stream, cachedGraph->graph(), feeds, results);
   }
 
   return result;
 
@@ -22,45 +22,32 @@
     MPSGraphTensor* outputTensor_ = nil;
   };
 
-  MPSGraphCache* cache_ = MPSGraphCache::getInstance();
-
   @autoreleasepool {
     string key = "fill_scalar_mps_impl" + getTensorsStringKey(self) + ":" + to_string(value.toDouble());
 
-    CachedGraph* cachedGraph = cache_->LookUpAs<CachedGraph>(key);
-    if (!cachedGraph) {
-      cachedGraph = cache_->CreateCachedGraphAs<CachedGraph>(key, ^MPSCachedGraph*() {
-        CachedGraph* newCachedGraph = nil;
-
-        @autoreleasepool {
-          MPSGraph* mpsGraph = make_mps_graph();
-          newCachedGraph = new CachedGraph(mpsGraph);
-          auto isBool = self.scalar_type() == c10::ScalarType::Bool;
-          auto isUInt8 = self.scalar_type() == c10::ScalarType::Byte;
-          auto dataType =
-              !isUInt8 ? !isBool ? getMPSScalarType(self.scalar_type()) : MPSDataTypeInt8 : MPSDataTypeUInt32;
-          // constantWithScalar does not work for boolTypes on MacOS-12.[34]
-          // workaround by filing it as int8 tensor and than casting to bool
-          // See https://github.com/pytorch/pytorch/issues/82427
-          // constantWithScalar does not work for UInt8 Types on MacOS-12.[34]/Ventura preview
-          // workaround by filing it as uint32 tensor and than casting to uint8
-          // See https://github.com/pytorch/pytorch/issues/83692
-          MPSGraphTensor* inputTensor = [mpsGraph constantWithScalar:value.toDouble()
-                                                               shape:getMPSShape(self)
-                                                            dataType:dataType];
-          MPSGraphTensor* outputTensor = [mpsGraph identityWithTensor:inputTensor name:nil];
-          if (isBool) {
-            outputTensor = [mpsGraph castTensor:outputTensor toType:MPSDataTypeBool name:@"constWithBool-workaround"];
-          }
-          if (isUInt8) {
-            outputTensor = [mpsGraph castTensor:outputTensor toType:MPSDataTypeUInt8 name:@"constWithUInt8-workaround"];
-          }
-
-          newCachedGraph->outputTensor_ = outputTensor;
-        }
-        return newCachedGraph;
-      });
-    }
+    auto cachedGraph = LookUpOrCreateCachedGraph<CachedGraph>(key, [&](auto mpsGraph, auto newCachedGraph) {
+      auto isBool = self.scalar_type() == c10::ScalarType::Bool;
+      auto isUInt8 = self.scalar_type() == c10::ScalarType::Byte;
+      auto dataType = !isUInt8 ? !isBool ? getMPSScalarType(self.scalar_type()) : MPSDataTypeInt8 : MPSDataTypeUInt32;
+      // constantWithScalar does not work for boolTypes on MacOS-12.[34]
+      // workaround by filing it as int8 tensor and than casting to bool
+      // See https://github.com/pytorch/pytorch/issues/82427
+      // constantWithScalar does not work for UInt8 Types on MacOS-12.[34]/Ventura preview
+      // workaround by filing it as uint32 tensor and than casting to uint8
+      // See https://github.com/pytorch/pytorch/issues/83692
+      MPSGraphTensor* inputTensor = [mpsGraph constantWithScalar:value.toDouble()
+                                                           shape:getMPSShape(self)
+                                                        dataType:dataType];
+      MPSGraphTensor* outputTensor = [mpsGraph identityWithTensor:inputTensor name:nil];
+      if (isBool) {
+        outputTensor = [mpsGraph castTensor:outputTensor toType:MPSDataTypeBool name:@"constWithBool-workaround"];
+      }
+      if (isUInt8) {
+        outputTensor = [mpsGraph castTensor:outputTensor toType:MPSDataTypeUInt8 name:@"constWithUInt8-workaround"];
+      }
+
+      newCachedGraph->outputTensor_ = outputTensor;
+    });
 
     Placeholder outputPlaceholder =
         Placeholder(cachedGraph->outputTensor_, needsCopyToOutput ? output : self, nullptr, !needsCopyToOutput);