diff --git a/caffe2/operators/reverse_packed_segs_op.cu b/caffe2/operators/reverse_packed_segs_op.cu
new file mode 100644
index 00000000000..766a1748d21
--- /dev/null
+++ b/caffe2/operators/reverse_packed_segs_op.cu
@@ -0,0 +1,9 @@
+#include "caffe2/core/context_gpu.h"
+#include "reverse_packed_segs_op.h"
+
+namespace caffe2 {
+namespace {
+REGISTER_CUDA_OPERATOR(ReversePackedSegs, ReversePackedSegsOp<CUDAContext>);
+
+} // namespace
+} // namespace caffe2
diff --git a/caffe2/operators/reverse_packed_segs_op.h b/caffe2/operators/reverse_packed_segs_op.h
index e285ec20ebc..93dfca02c33 100644
--- a/caffe2/operators/reverse_packed_segs_op.h
+++ b/caffe2/operators/reverse_packed_segs_op.h
@@ -20,7 +20,8 @@ class ReversePackedSegsOp final : public Operator<Context> {
 
   template <typename T>
   bool DoRunWithType() {
-    if (Input(LENGTHS).template IsType<int>()) {
+    if (OperatorBase::Input<Tensor<CPUContext>>(LENGTHS)
+            .template IsType<int>()) {
       DoRunWithLengthType<T, int>();
     } else {
       DoRunWithLengthType<T, long>();
@@ -34,7 +35,7 @@ class ReversePackedSegsOp final : public Operator<Context> {
   template <typename T, typename LengthType>
   void DoRunWithLengthType() {
     const auto& data = Input(DATA);
-    const auto& lengths = Input(LENGTHS);
+    const auto& lengths = OperatorBase::Input<Tensor<CPUContext>>(LENGTHS);
 
     CAFFE_ENFORCE(
         data.ndim() == 3,
@@ -56,6 +57,7 @@ class ReversePackedSegsOp final : public Operator<Context> {
 
     const T* data_ptr = data.template data<T>();
     const LengthType* lengths_ptr = lengths.template data<LengthType>();
+
     T* rev_data_ptr = output->template mutable_data<T>();
     for (TIndex i = 0; i < batch_size; i++) {
       const auto& seg_length = lengths_ptr[i];
diff --git a/caffe2/python/hypothesis_test_util.py b/caffe2/python/hypothesis_test_util.py
index 0b03d5ab188..2defe01a4fe 100644
--- a/caffe2/python/hypothesis_test_util.py
+++ b/caffe2/python/hypothesis_test_util.py
@@ -244,9 +244,11 @@ def runOpBenchmark(
     device_option,
     op,
     inputs,
-    input_device_options={},
+    input_device_options=None,
     iterations=10,
 ):
+    if input_device_options is None:
+        input_device_options = {}
     op = copy.deepcopy(op)
     op.device_option.CopyFrom(device_option)
     net = caffe2_pb2.NetDef()
@@ -445,7 +447,7 @@ def assertReferenceChecks(
         op,
         inputs,
         reference,
-        input_device_options={},
+        input_device_options=None,
         threshold=1e-4,
         output_to_grad=None,
         grad_reference=None,
@@ -473,6 +475,9 @@ def softsign(X):
 
                 self.assertReferenceChecks(gc, op, [X], softsign)
         """
+        if input_device_options is None:
+            input_device_options = {}
+
         op = copy.deepcopy(op)
         op.device_option.CopyFrom(device_option)
 
@@ -483,6 +488,7 @@ def softsign(X):
                     b,
                     device_option=input_device_options.get(n, device_option)
                 )
+                print("Input", n, input_device_options.get(n, device_option))
             net = core.Net("opnet")
             net.Proto().op.extend([op])
             test_shape_inference = False
@@ -538,9 +544,11 @@ def assertValidationChecks(
         op,
         inputs,
         validator,
-        input_device_options={},
+        input_device_options=None,
         as_kwargs=True
     ):
+        if input_device_options is None:
+            input_device_options = {}
         if as_kwargs:
             assert len(set(list(op.input) + list(op.output))) == \
                 len(op.input) + len(op.output), \
diff --git a/caffe2/python/operator_test/sequence_ops_test.py b/caffe2/python/operator_test/sequence_ops_test.py
index 59e46517fba..e78b16cc2f5 100644
--- a/caffe2/python/operator_test/sequence_ops_test.py
+++ b/caffe2/python/operator_test/sequence_ops_test.py
@@ -5,6 +5,7 @@
 from caffe2.python import core
 from hypothesis import given
 import caffe2.python.hypothesis_test_util as hu
+from caffe2.proto import caffe2_pb2
 import hypothesis.strategies as st
 import numpy as np
 from functools import partial
@@ -84,11 +85,11 @@ def _gather_padding_ref(start_pad_width, end_pad_width, data, lengths):
     pad_width = start_pad_width + end_pad_width
     ptr = 0
     for length in lengths:
-        for i in range(start_pad_width):
+        for _ in range(start_pad_width):
             start_padding += data[ptr]
             ptr += 1
         ptr += length - pad_width
-        for i in range(end_pad_width):
+        for _ in range(end_pad_width):
             end_padding += data[ptr]
             ptr += 1
     return (start_padding, end_padding)
@@ -190,7 +191,7 @@ def test_gather_padding(self, start_pad_width, end_pad_width, args):
                           elements=st.floats(min_value=-np.inf,
                                              max_value=np.inf),
                           min_value=1, max_value=10),
-                          **hu.gcs_cpu_only)
+                          **hu.gcs)
     def test_reverse_packed_segs(self, data, gc, dc):
         max_length = data.shape[0]
         batch_size = data.shape[1]
@@ -217,6 +218,7 @@ def op_grad_ref(grad_out, outputs, inputs):
             op=op,
             inputs=[data, lengths],
             reference=op_ref,
+            input_device_options={"lengths": core.DeviceOption(caffe2_pb2.CPU)},
             output_to_grad='reversed_data',
             grad_reference=op_grad_ref)