Skip to content

Commit a9cbfc4

Browse files
zero323rxin
authored andcommitted
[SPARK-18690][PYTHON][SQL] Backward compatibility of unbounded frames
## What changes were proposed in this pull request? Makes `Window.unboundedPreceding` and `Window.unboundedFollowing` backward compatible. ## How was this patch tested? Pyspark SQL unittests. Please review http://spark.apache.org/contributing.html before opening a pull request. Author: zero323 <[email protected]> Closes apache#16123 from zero323/SPARK-17845-follow-up.
1 parent 2dc0d7e commit a9cbfc4

File tree

2 files changed

+51
-14
lines changed

2 files changed

+51
-14
lines changed

python/pyspark/sql/tests.py

+35
Original file line numberDiff line numberDiff line change
@@ -1980,6 +1980,41 @@ def assert_runs_only_one_job_stage_and_task(job_group_name, f):
19801980
# Regression test for SPARK-17514: limit(n).collect() should the perform same as take(n)
19811981
assert_runs_only_one_job_stage_and_task("collect_limit", lambda: df.limit(1).collect())
19821982

1983+
@unittest.skipIf(sys.version_info < (3, 3), "Unittest < 3.3 doesn't support mocking")
1984+
def test_unbounded_frames(self):
1985+
from unittest.mock import patch
1986+
from pyspark.sql import functions as F
1987+
from pyspark.sql import window
1988+
import importlib
1989+
1990+
df = self.spark.range(0, 3)
1991+
1992+
def rows_frame_match():
1993+
return "ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" in df.select(
1994+
F.count("*").over(window.Window.rowsBetween(-sys.maxsize, sys.maxsize))
1995+
).columns[0]
1996+
1997+
def range_frame_match():
1998+
return "RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" in df.select(
1999+
F.count("*").over(window.Window.rangeBetween(-sys.maxsize, sys.maxsize))
2000+
).columns[0]
2001+
2002+
with patch("sys.maxsize", 2 ** 31 - 1):
2003+
importlib.reload(window)
2004+
self.assertTrue(rows_frame_match())
2005+
self.assertTrue(range_frame_match())
2006+
2007+
with patch("sys.maxsize", 2 ** 63 - 1):
2008+
importlib.reload(window)
2009+
self.assertTrue(rows_frame_match())
2010+
self.assertTrue(range_frame_match())
2011+
2012+
with patch("sys.maxsize", 2 ** 127 - 1):
2013+
importlib.reload(window)
2014+
self.assertTrue(rows_frame_match())
2015+
self.assertTrue(range_frame_match())
2016+
2017+
importlib.reload(window)
19832018

19842019
if __name__ == "__main__":
19852020
from pyspark.sql.tests import *

python/pyspark/sql/window.py

+16-14
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ class Window(object):
4949

5050
_JAVA_MIN_LONG = -(1 << 63) # -9223372036854775808
5151
_JAVA_MAX_LONG = (1 << 63) - 1 # 9223372036854775807
52+
_PRECEDING_THRESHOLD = max(-sys.maxsize, _JAVA_MIN_LONG)
53+
_FOLLOWING_THRESHOLD = min(sys.maxsize, _JAVA_MAX_LONG)
5254

5355
unboundedPreceding = _JAVA_MIN_LONG
5456

@@ -98,9 +100,9 @@ def rowsBetween(start, end):
98100
The frame is unbounded if this is ``Window.unboundedFollowing``, or
99101
any value greater than or equal to 9223372036854775807.
100102
"""
101-
if start <= Window._JAVA_MIN_LONG:
103+
if start <= Window._PRECEDING_THRESHOLD:
102104
start = Window.unboundedPreceding
103-
if end >= Window._JAVA_MAX_LONG:
105+
if end >= Window._FOLLOWING_THRESHOLD:
104106
end = Window.unboundedFollowing
105107
sc = SparkContext._active_spark_context
106108
jspec = sc._jvm.org.apache.spark.sql.expressions.Window.rowsBetween(start, end)
@@ -123,14 +125,14 @@ def rangeBetween(start, end):
123125
124126
:param start: boundary start, inclusive.
125127
The frame is unbounded if this is ``Window.unboundedPreceding``, or
126-
any value less than or equal to -9223372036854775808.
128+
any value less than or equal to max(-sys.maxsize, -9223372036854775808).
127129
:param end: boundary end, inclusive.
128130
The frame is unbounded if this is ``Window.unboundedFollowing``, or
129-
any value greater than or equal to 9223372036854775807.
131+
any value greater than or equal to min(sys.maxsize, 9223372036854775807).
130132
"""
131-
if start <= Window._JAVA_MIN_LONG:
133+
if start <= Window._PRECEDING_THRESHOLD:
132134
start = Window.unboundedPreceding
133-
if end >= Window._JAVA_MAX_LONG:
135+
if end >= Window._FOLLOWING_THRESHOLD:
134136
end = Window.unboundedFollowing
135137
sc = SparkContext._active_spark_context
136138
jspec = sc._jvm.org.apache.spark.sql.expressions.Window.rangeBetween(start, end)
@@ -185,14 +187,14 @@ def rowsBetween(self, start, end):
185187
186188
:param start: boundary start, inclusive.
187189
The frame is unbounded if this is ``Window.unboundedPreceding``, or
188-
any value less than or equal to -9223372036854775808.
190+
any value less than or equal to max(-sys.maxsize, -9223372036854775808).
189191
:param end: boundary end, inclusive.
190192
The frame is unbounded if this is ``Window.unboundedFollowing``, or
191-
any value greater than or equal to 9223372036854775807.
193+
any value greater than or equal to min(sys.maxsize, 9223372036854775807).
192194
"""
193-
if start <= Window._JAVA_MIN_LONG:
195+
if start <= Window._PRECEDING_THRESHOLD:
194196
start = Window.unboundedPreceding
195-
if end >= Window._JAVA_MAX_LONG:
197+
if end >= Window._FOLLOWING_THRESHOLD:
196198
end = Window.unboundedFollowing
197199
return WindowSpec(self._jspec.rowsBetween(start, end))
198200

@@ -211,14 +213,14 @@ def rangeBetween(self, start, end):
211213
212214
:param start: boundary start, inclusive.
213215
The frame is unbounded if this is ``Window.unboundedPreceding``, or
214-
any value less than or equal to -9223372036854775808.
216+
any value less than or equal to max(-sys.maxsize, -9223372036854775808).
215217
:param end: boundary end, inclusive.
216218
The frame is unbounded if this is ``Window.unboundedFollowing``, or
217-
any value greater than or equal to 9223372036854775807.
219+
any value greater than or equal to min(sys.maxsize, 9223372036854775807).
218220
"""
219-
if start <= Window._JAVA_MIN_LONG:
221+
if start <= Window._PRECEDING_THRESHOLD:
220222
start = Window.unboundedPreceding
221-
if end >= Window._JAVA_MAX_LONG:
223+
if end >= Window._FOLLOWING_THRESHOLD:
222224
end = Window.unboundedFollowing
223225
return WindowSpec(self._jspec.rangeBetween(start, end))
224226

0 commit comments

Comments
 (0)