Skip to content

Commit

Permalink
AVRO-831 Refactor lang/py setup and test structure (apache#733)
Browse files Browse the repository at this point in the history
* AVRO-831: Pythonic Build

* AVRO-831: Rework lang/py Setup

Decouple python setup from ant and enable tests to run without it.

* AVRO-831: Run Tests Normally

* AVRO-831: Skip Java Tether if No JDK

Make it easier to test on systems without Java

* AVRO-831: Modern Exception Syntax

* AVRO-831: Exterminate Ants

* AVRO-831: Found One More Ant

* AVRO-831: Remove pdb

* AVRO-831: iSort Order

* AVRO-831: Rename Build Subcommand

* AVRO-831: Correct PYTHONPATH in Interop Tests

* AVRO-831: Skip Test if JAR Not Found

* AVRO-831: Set Package Version Correctly

Fixes an error where the package metadata version is not used, causing setuptools to ignore it.

Co-Authored-By: RyanSkraba <[email protected]>

* AVRO-831: Ignore More Files Created by Setup

Adds additional files that are created during setup to .gitignore because they are generated and should not be in version control.

* AVRO-831: Clean Directly from build.sh

Python packaging is [moving away from embedding commands in
setup.py][1]. It is difficult to maintain external commands in Python
this way. Managing the dependencies needed to run commands from within
setup.py is gnarly, because dependencies cannot be resolved so early.
Furthermore, it's difficult to test code that happens at the point at
which tests themselves are triggered, so that code needs to be very
simple. A shell script is simple and more appropriate for this use case.

At another time we should look to extract the lint command as well.

[1][pypa/setuptools#931]
  • Loading branch information
kojiromike authored Dec 11, 2019
1 parent 2b8d730 commit 9ab19ee
Show file tree
Hide file tree
Showing 48 changed files with 294 additions and 420 deletions.
14 changes: 4 additions & 10 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ do
(cd lang/php; ./build.sh test)
(cd lang/perl; ./build.sh test)

(cd lang/py; ant interop-data-generate)
(cd lang/py; ./build.sh interop-data-generate)
(cd lang/py3; python3 setup.py generate_interop_data \
--schema-file=../../share/test/schemas/interop.avsc --output-path=../../build/interop/data)
(cd lang/c; ./build.sh interop-data-generate)
Expand All @@ -72,7 +72,7 @@ do

# run interop data tests
(cd lang/java/ipc; mvn -B test -P interop-data-test)
(cd lang/py; ant interop-data-test)
(cd lang/py; ./build.sh interop-data-test)
(cd lang/py3; python3 setup.py test --test-suite avro.tests.test_datafile_interop.TestDataFileInterop)
(cd lang/c; ./build.sh interop-data-test)
#(cd lang/c++; make interop-data-test)
Expand Down Expand Up @@ -122,17 +122,11 @@ do

(cd lang/py; ./build.sh dist)
(cd lang/py3; ./build.sh dist)

(cd lang/c; ./build.sh dist)

(cd lang/c++; ./build.sh dist)

(cd lang/csharp; ./build.sh dist)

(cd lang/js; ./build.sh dist)

(cd lang/ruby; ./build.sh dist)

(cd lang/php; ./build.sh dist)

mkdir -p dist/perl
Expand Down Expand Up @@ -178,7 +172,7 @@ do
rm -rf lang/java/*/userlogs/
rm -rf lang/java/*/dependency-reduced-pom.xml

(cd lang/py; ant clean)
(cd lang/py; ./build.sh clean)
rm -rf lang/py/userlogs/

(cd lang/py3; python3 setup.py clean)
Expand Down Expand Up @@ -213,7 +207,7 @@ do
rm -rf lang/java/*/userlogs/
rm -rf lang/java/*/dependency-reduced-pom.xml

(cd lang/py; ant clean)
(cd lang/py; ./build.sh clean)
rm -rf lang/py/userlogs/

(cd lang/py3; python3 setup.py clean)
Expand Down
5 changes: 2 additions & 3 deletions doc/src/content/xdocs/gettingstartedpython.xml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
<source>
$ tar xvf avro-&AvroVersion;.tar.gz
$ cd avro-&AvroVersion;
$ sudo python setup.py install
$ python setup.py install
$ python
>>> import avro # should not raise ImportError
</source>
Expand All @@ -58,8 +58,7 @@ $ python
</p>
<source>
$ cd lang/py/
$ ant
$ sudo python setup.py install
$ python setup.py install
$ python
>>> import avro # should not raise ImportError
</source>
Expand Down
6 changes: 6 additions & 0 deletions lang/py/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,9 @@
build/
lib/
userlogs/
avro/HandshakeRequest.avsc
avro/HandshakeResponse.avsc
avro/VERSION.txt
avro/interop.avsc
avro/tether/InputProtocol.avpr
avro/tether/OutputProtocol.avpr
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
20 changes: 10 additions & 10 deletions lang/py/src/avro/ipc.py → lang/py/avro/ipc.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,22 @@

import httplib
import io
import os

import avro.io
from avro import protocol, schema

#
# Constants
#

# Handshake schema is pulled in during build
HANDSHAKE_REQUEST_SCHEMA = schema.parse("""
@HANDSHAKE_REQUEST_SCHEMA@
""")
def _load(name):
dir_path = os.path.dirname(__file__)
rsrc_path = os.path.join(dir_path, name)
with open(rsrc_path, 'rb') as f:
return f.read()

HANDSHAKE_RESPONSE_SCHEMA = schema.parse("""
@HANDSHAKE_RESPONSE_SCHEMA@
""")
HANDSHAKE_REQUEST_SCHEMA_JSON = _load('HandshakeRequest.avsc')
HANDSHAKE_RESPONSE_SCHEMA_JSON = _load('HandshakeResponse.avsc')
HANDSHAKE_REQUEST_SCHEMA = schema.parse(HANDSHAKE_REQUEST_SCHEMA_JSON)
HANDSHAKE_RESPONSE_SCHEMA = schema.parse(HANDSHAKE_RESPONSE_SCHEMA_JSON)

HANDSHAKE_REQUESTOR_WRITER = avro.io.DatumWriter(HANDSHAKE_REQUEST_SCHEMA)
HANDSHAKE_REQUESTOR_READER = avro.io.DatumReader(HANDSHAKE_RESPONSE_SCHEMA)
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
import os
import sys

from avro import datafile, io, schema
import avro.datafile
import avro.io
import avro.schema

CODECS_TO_VALIDATE = ('null', 'deflate')

Expand All @@ -41,7 +43,7 @@
DATUM = {
'intField': 12,
'longField': 15234324,
'stringField': unicode('hey'),
'stringField': 'hey',
'boolField': True,
'floatField': 1234.0,
'doubleField': -1234.0,
Expand All @@ -55,16 +57,18 @@
'recordField': {'label': 'blah', 'children': [{'label': 'inner', 'children': []}]},
}

if __name__ == "__main__":
def generate(schema_path, output_path):
for codec in CODECS_TO_VALIDATE:
interop_schema = schema.parse(open(sys.argv[1], 'r').read())
filename = sys.argv[2]
with open(schema_path, 'rb') as schema_file:
interop_schema = avro.schema.parse(schema_file.read())
filename = output_path
if codec != 'null':
base, ext = os.path.splitext(filename)
base, ext = os.path.splitext(output_path)
filename = base + "_" + codec + ext
writer = open(filename, 'wb')
datum_writer = io.DatumWriter()
# NB: not using compression
dfw = datafile.DataFileWriter(writer, datum_writer, interop_schema, codec=codec)
dfw.append(DATUM)
dfw.close()
with avro.datafile.DataFileWriter(open(filename, 'wb'), avro.io.DatumWriter(),
interop_schema, codec=codec) as dfw:
# NB: not using compression
dfw.append(DATUM)

if __name__ == "__main__":
generate(sys.argv[1], sys.argv[2])
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

import avro.tether.tether_task
import avro.tether.util
import set_avro_test_path
from avro import ipc, protocol

SERVER_ADDRESS = ('localhost', avro.tether.util.find_port())
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import os
import unittest

import set_avro_test_path
from avro import datafile, io, schema

SCHEMAS_TO_VALIDATE = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,30 @@
import os
import unittest

import set_avro_test_path
import avro
from avro import datafile, io

_INTEROP_DATA_DIR = os.path.join(os.path.dirname(avro.__file__), 'test', 'interop', 'data')

@unittest.skipUnless(os.path.exists(_INTEROP_DATA_DIR),
"{} does not exist".format(_INTEROP_DATA_DIR))
class TestDataFileInterop(unittest.TestCase):
def test_interop(self):
ran = False
print()
print('TEST INTEROP')
print('============')
print()
for f in os.listdir('@INTEROP_DATA_DIR@'):
for f in os.listdir(_INTEROP_DATA_DIR):
ran = True

base_ext = os.path.splitext(os.path.basename(f))[0].split('_', 1)
if len(base_ext) < 2 or base_ext[1] in datafile.VALID_CODECS:
print('READING %s' % f)
print('')

# read data in binary from file
reader = open(os.path.join('@INTEROP_DATA_DIR@', f), 'rb')
reader = open(os.path.join(_INTEROP_DATA_DIR, f), 'rb')
datum_reader = io.DatumReader()
dfr = datafile.DataFileReader(reader, datum_reader)
i = 0
Expand All @@ -49,6 +55,7 @@ def test_interop(self):
else:
print('SKIPPING %s due to an unsupported codec' % f)
print('')
self.assertTrue(ran, "Didn't find any interop data files to test")

if __name__ == '__main__':
unittest.main()
1 change: 0 additions & 1 deletion lang/py/test/test_io.py → lang/py/avro/test/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from decimal import Decimal

import avro.io
import set_avro_test_path
from avro import schema, timezones

SCHEMAS_TO_VALIDATE = (
Expand Down
1 change: 0 additions & 1 deletion lang/py/test/test_ipc.py → lang/py/avro/test/test_ipc.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@

import unittest

import set_avro_test_path
# This test does import this code, to make sure it at least passes
# compilation.
from avro import ipc
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import unittest
import warnings

import set_avro_test_path
from avro import schema


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def looney_records():
for f, l, t in LOONIES:
yield {"first": f, "last" : l, "type" : t}

SCRIPT = join(dirname(__file__), "..", "scripts", "avro")
SCRIPT = join(dirname(dirname(dirname(__file__))), "scripts", "avro")

_JSON_PRETTY = '''{
"type": "duck",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,11 @@
import unittest

import avro.io
import avro.test.mock_tether_parent
import avro.test.word_count_task
import avro.tether.tether_task
import avro.tether.util
import mock_tether_parent
import set_avro_test_path
from avro import schema, tether
from word_count_task import WordCountTask


class TestTetherTask(unittest.TestCase):
Expand All @@ -44,7 +43,7 @@ def test1(self):
Test that the thether_task is working. We run the mock_tether_parent in a separate
subprocess
"""
task=WordCountTask()
task=avro.test.word_count_task.WordCountTask()

proc=None
try:
Expand All @@ -54,7 +53,7 @@ def test1(self):
env["PYTHONPATH"]=':'.join(sys.path)
server_port = avro.tether.util.find_port()

pyfile=mock_tether_parent.__file__
pyfile=avro.test.mock_tether_parent.__file__
proc=subprocess.Popen(["python", pyfile,"start_server","{0}".format(server_port)])
input_port = avro.tether.util.find_port()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,8 @@
import avro.tether.tether_task
import avro.tether.tether_task_runner
import avro.tether.util
import mock_tether_parent
import set_avro_test_path
from word_count_task import WordCountTask
import avro.test.mock_tether_parent
import avro.test.word_count_task


class TestTetherTaskRunner(unittest.TestCase):
Expand All @@ -50,7 +49,7 @@ def test1(self):
env["PYTHONPATH"]=':'.join(sys.path)
parent_port = avro.tether.util.find_port()

pyfile=mock_tether_parent.__file__
pyfile=avro.test.mock_tether_parent.__file__
proc=subprocess.Popen(["python", pyfile,"start_server","{0}".format(parent_port)])
input_port = avro.tether.util.find_port()

Expand All @@ -59,7 +58,7 @@ def test1(self):
# so we give the subprocess time to start up
time.sleep(1)

runner = avro.tether.tether_task_runner.TaskRunner(WordCountTask())
runner = avro.tether.tether_task_runner.TaskRunner(avro.test.word_count_task.WordCountTask())

runner.start(outputport=parent_port,join=False)

Expand Down Expand Up @@ -154,7 +153,7 @@ def test2(self):
env["PYTHONPATH"]=':'.join(sys.path)
parent_port = avro.tether.util.find_port()

pyfile=mock_tether_parent.__file__
pyfile=avro.test.mock_tether_parent.__file__
proc=subprocess.Popen(["python", pyfile,"start_server","{0}".format(parent_port)])

#Possible race condition? when we start tether_task_runner it will call
Expand All @@ -167,7 +166,7 @@ def test2(self):
env={"AVRO_TETHER_OUTPUT_PORT":"{0}".format(parent_port)}
env["PYTHONPATH"]=':'.join(sys.path)

runnerproc = subprocess.Popen(["python", avro.tether.tether_task_runner.__file__, "word_count_task.WordCountTask"],env=env)
runnerproc = subprocess.Popen(["python", avro.tether.tether_task_runner.__file__, "avro.test.word_count_task.WordCountTask"], env=env)

#possible race condition wait for the process to start
time.sleep(1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
from __future__ import absolute_import, division, print_function

import collections
import distutils.spawn
import os
import platform
import shutil
import subprocess
import sys
Expand All @@ -32,11 +34,17 @@
import avro.io
import avro.schema
import avro.tether.tether_task_runner
import set_avro_test_path

_TOP_DIR = """@TOPDIR@"""
_AVRO_VERSION = """@AVRO_VERSION@"""
_JAR_PATH = os.path.abspath(os.path.join(_TOP_DIR, "..", "java", "tools", "target", "avro-tools-{}.jar".format(_AVRO_VERSION)))
_AVRO_DIR = os.path.abspath(os.path.dirname(avro.__file__))

def _version():
with open(os.path.join(_AVRO_DIR, 'VERSION.txt')) as v:
# Convert it back to the java version
return v.read().strip().replace('+', '-')

_AVRO_VERSION = _version()
_JAR_PATH = os.path.join(os.path.dirname(os.path.dirname(_AVRO_DIR)),
"java", "tools", "target", "avro-tools-{}.jar".format(_AVRO_VERSION))

_LINES = ("the quick brown fox jumps over the lazy dog",
"the cow jumps over the moon",
Expand All @@ -56,6 +64,24 @@
os.path.dirname(__file__)])


def _has_java():
"""Detect if this system has a usable java installed.
On most systems, this is just checking if `java` is in the PATH.
But macos always has a /usr/bin/java, which does not mean java is installed. If you invoke java on macos and java is not installed, macos will spawn a popup telling you how to install java. This code does additional work around that to be completely automatic.
"""
if platform.system() == "Darwin":
try:
output = subprocess.check_output("/usr/libexec/java_home", stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
output = e.output
return ("No Java runtime present" not in output)
return bool(distutils.spawn.find_executable("java"))


@unittest.skipUnless(_has_java(), "No Java runtime present")
@unittest.skipUnless(os.path.exists(_JAR_PATH), "{} not found".format(_JAR_PATH))
class TestTetherWordCount(unittest.TestCase):
"""unittest for a python tethered map-reduce job."""

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit 9ab19ee

Please sign in to comment.