Skip to content

Commit

Permalink
chore: ci Initial hive support (apache#10593)
Browse files Browse the repository at this point in the history
* Initial hive support

* Clone hive setup

* Make hive tests work locally

* Debugging presto failure

* sleep in dataset test

* Address comments

* Address comments

* Pin ipython, exclude new pylint rules

Co-authored-by: bogdan kyryliuk <[email protected]>
  • Loading branch information
bkyryliuk and bogdan-dbx authored Aug 27, 2020
1 parent 81525c3 commit 19a9bcc
Show file tree
Hide file tree
Showing 31 changed files with 531 additions and 186 deletions.
57 changes: 57 additions & 0 deletions .github/workflows/superset-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,63 @@ jobs:
run: |
bash <(curl -s https://codecov.io/bash) -cF python
test-postgres-hive:
runs-on: ubuntu-18.04
strategy:
matrix:
# run unit tests in multiple version just for fun
python-version: [3.7, 3.8]
env:
PYTHONPATH: ${{ github.workspace }}
SUPERSET_CONFIG: tests.superset_test_config
REDIS_PORT: 16379
SUPERSET__SQLALCHEMY_DATABASE_URI:
postgresql+psycopg2://superset:[email protected]:15432/superset
SUPERSET__SQLALCHEMY_EXAMPLES_URI: hive://localhost:10000/default
UPLOAD_FOLDER: /tmp/.superset/uploads/
services:
postgres:
image: postgres:10-alpine
env:
POSTGRES_USER: superset
POSTGRES_PASSWORD: superset
ports:
# Use custom ports for services to avoid accidentally connecting to
# GitHub action runner's default installations
- 15432:5432
redis:
image: redis:5-alpine
ports:
- 16379:6379
steps:
- uses: actions/checkout@v2
- name: Create csv upload directory
run: sudo mkdir -p /tmp/.superset/uploads
- name: Give write access to the csv upload directory
run: sudo chown -R $USER:$USER /tmp/.superset
- name: Start hadoop and hive
run: docker-compose -f scripts/databases/hive/docker-compose.yml up -d
- name: Setup Python
uses: actions/[email protected]
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
uses: apache-superset/cached-dependencies@b90713b
with:
run: |
apt-get-install
pip-upgrade
pip install -r requirements/testing.txt
setup-postgres
- name: Run celery
run: celery worker --app=superset.tasks.celery_app:app -Ofair -c 2 &
- name: Python unit tests (PostgreSQL)
run: |
./scripts/python_tests.sh
- name: Upload code coverage
run: |
bash <(curl -s https://codecov.io/bash) -cF python
test-postgres:
runs-on: ubuntu-18.04
strategy:
Expand Down
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ confidence=
# --enable=similarities". If you want to run only the classes checker, but have
# no Warning level messages displayed, use"--disable=all --enable=classes
# --disable=W"
disable=standarderror-builtin,long-builtin,dict-view-method,intern-builtin,suppressed-message,no-absolute-import,unpacking-in-except,apply-builtin,delslice-method,indexing-exception,old-raise-syntax,print-statement,cmp-builtin,reduce-builtin,useless-suppression,coerce-method,input-builtin,cmp-method,raw_input-builtin,nonzero-method,backtick,basestring-builtin,setslice-method,reload-builtin,oct-method,map-builtin-not-iterating,execfile-builtin,old-octal-literal,zip-builtin-not-iterating,buffer-builtin,getslice-method,metaclass-assignment,xrange-builtin,long-suffix,round-builtin,range-builtin-not-iterating,next-method-called,dict-iter-method,parameter-unpacking,unicode-builtin,unichr-builtin,import-star-module-level,raising-string,filter-builtin-not-iterating,old-ne-operator,using-cmp-argument,coerce-builtin,file-builtin,old-division,hex-method,invalid-unary-operand-type,missing-docstring,too-many-lines,duplicate-code,bad-continuation,ungrouped-imports,import-outside-toplevel
disable=standarderror-builtin,long-builtin,dict-view-method,intern-builtin,suppressed-message,no-absolute-import,unpacking-in-except,apply-builtin,delslice-method,indexing-exception,old-raise-syntax,print-statement,cmp-builtin,reduce-builtin,useless-suppression,coerce-method,input-builtin,cmp-method,raw_input-builtin,nonzero-method,backtick,basestring-builtin,setslice-method,reload-builtin,oct-method,map-builtin-not-iterating,execfile-builtin,old-octal-literal,zip-builtin-not-iterating,buffer-builtin,getslice-method,metaclass-assignment,xrange-builtin,long-suffix,round-builtin,range-builtin-not-iterating,next-method-called,dict-iter-method,parameter-unpacking,unicode-builtin,unichr-builtin,import-star-module-level,raising-string,filter-builtin-not-iterating,old-ne-operator,using-cmp-argument,coerce-builtin,file-builtin,old-division,hex-method,invalid-unary-operand-type,missing-docstring,too-many-lines,duplicate-code,bad-continuation,ungrouped-imports,import-outside-toplevel,raise-missing-from,super-with-arguments,bad-option-value


[REPORTS]
Expand Down
18 changes: 9 additions & 9 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,19 @@ alembic==1.4.2 # via flask-migrate
amqp==2.6.1 # via kombu
apispec[yaml]==3.3.1 # via flask-appbuilder
async-timeout==3.0.1 # via aiohttp
attrs==19.3.0 # via aiohttp, jsonschema
attrs==20.1.0 # via aiohttp, jsonschema
babel==2.8.0 # via flask-babel
backoff==1.10.0 # via apache-superset
billiard==3.6.3.0 # via celery
bleach==3.1.5 # via apache-superset
boto3==1.14.36 # via tabulator
botocore==1.17.36 # via boto3, s3transfer
boto3==1.14.48 # via tabulator
botocore==1.17.48 # via boto3, s3transfer
brotli==1.0.7 # via flask-compress
cached-property==1.5.1 # via tableschema
cachelib==0.1.1 # via apache-superset
celery==4.4.7 # via apache-superset
certifi==2020.6.20 # via requests
cffi==1.14.1 # via cryptography
cffi==1.14.2 # via cryptography
chardet==3.0.4 # via aiohttp, requests, tabulator
click==7.1.2 # via apache-superset, flask, flask-appbuilder, tableschema, tabulator
colorama==0.4.3 # via apache-superset, flask-appbuilder
Expand Down Expand Up @@ -54,7 +54,7 @@ future==0.18.2 # via pyhive
geographiclib==1.50 # via geopy
geopy==2.0.0 # via apache-superset
gunicorn==20.0.4 # via apache-superset
humanize==2.5.0 # via apache-superset
humanize==2.6.0 # via apache-superset
idna==2.10 # via email-validator, requests, yarl
ijson==3.1.1 # via tabulator
importlib-metadata==1.7.0 # via jsonschema, kombu, markdown
Expand All @@ -78,7 +78,7 @@ multidict==4.7.6 # via aiohttp, yarl
mysqlclient==1.4.2.post1 # via apache-superset
natsort==7.0.1 # via croniter
numpy==1.19.1 # via pandas, pyarrow
openpyxl==3.0.4 # via tabulator
openpyxl==3.0.5 # via tabulator
packaging==20.4 # via bleach
pandas==1.0.5 # via apache-superset
parsedatetime==2.6 # via apache-superset
Expand Down Expand Up @@ -112,13 +112,13 @@ simplejson==3.17.2 # via apache-superset
six==1.15.0 # via bleach, cryptography, flask-cors, flask-jwt-extended, flask-talisman, isodate, jsonlines, jsonschema, linear-tsv, packaging, pathlib2, polyline, prison, pyrsistent, python-dateutil, sasl, sqlalchemy-utils, tableschema, tabulator, thrift, thrift-sasl, wtforms-json
slackclient==2.5.0 # via apache-superset
sqlalchemy-utils==0.36.8 # via apache-superset, flask-appbuilder
sqlalchemy==1.3.18 # via alembic, apache-superset, flask-sqlalchemy, marshmallow-sqlalchemy, sqlalchemy-utils, tabulator
sqlalchemy==1.3.19 # via alembic, apache-superset, flask-sqlalchemy, marshmallow-sqlalchemy, sqlalchemy-utils, tabulator
sqlparse==0.3.0 # via apache-superset
tableschema==1.19.2 # via apache-superset
tableschema==1.19.3 # via apache-superset
tabulator==1.52.3 # via tableschema
thrift-sasl==0.4.2 # via pyhive
thrift==0.13.0 # via apache-superset, pyhive, thrift-sasl
typing-extensions==3.7.4.2 # via yarl
typing-extensions==3.7.4.3 # via yarl
unicodecsv==0.14.1 # via tableschema, tabulator
urllib3==1.25.10 # via botocore, requests, selenium
vine==1.3.0 # via amqp, celery
Expand Down
6 changes: 3 additions & 3 deletions requirements/docker.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
# pip-compile-multi
#
-r base.txt
-e file:. # via -r base.in
gevent==20.6.2 # via -r docker.in
-e file:. # via -r requirements/base.in
gevent==20.6.2 # via -r requirements/docker.in
greenlet==0.4.16 # via gevent
redis==3.5.3 # via -r docker.in
redis==3.5.3 # via -r requirements/docker.in
zope.event==4.4 # via gevent
zope.interface==5.1.0 # via gevent

Expand Down
2 changes: 1 addition & 1 deletion requirements/documentation.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ imagesize==1.2.0 # via sphinx
pygments==2.6.1 # via sphinx
snowballstemmer==2.0.0 # via sphinx
sphinx-rtd-theme==0.5.0 # via -r requirements/documentation.in
sphinx==3.1.2 # via -r requirements/documentation.in, sphinx-rtd-theme
sphinx==3.2.1 # via -r requirements/documentation.in, sphinx-rtd-theme
sphinxcontrib-applehelp==1.0.2 # via sphinx
sphinxcontrib-devhelp==1.0.2 # via sphinx
sphinxcontrib-htmlhelp==1.0.3 # via sphinx
Expand Down
12 changes: 6 additions & 6 deletions requirements/integration.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,22 @@ cfgv==3.2.0 # via pre-commit
click==7.1.2 # via pip-compile-multi, pip-tools
distlib==0.3.1 # via virtualenv
filelock==3.0.12 # via tox, virtualenv
identify==1.4.25 # via pre-commit
identify==1.4.29 # via pre-commit
importlib-metadata==1.7.0 # via pluggy, pre-commit, tox, virtualenv
nodeenv==1.4.0 # via pre-commit
nodeenv==1.5.0 # via pre-commit
packaging==20.4 # via tox
pip-compile-multi==1.5.8 # via -r requirements/integration.in
pip-compile-multi==2.1.0 # via -r requirements/integration.in
pip-tools==5.3.1 # via pip-compile-multi
pluggy==0.13.1 # via tox
pre-commit==2.6.0 # via -r requirements/integration.in
pre-commit==2.7.1 # via -r requirements/integration.in
py==1.9.0 # via tox
pyparsing==2.4.7 # via packaging
pyyaml==5.3.1 # via pre-commit
six==1.15.0 # via packaging, pip-tools, tox, virtualenv
toml==0.10.1 # via pre-commit, tox
toposort==1.5 # via pip-compile-multi
tox==3.18.1 # via -r requirements/integration.in
virtualenv==20.0.30 # via pre-commit, tox
tox==3.19.0 # via -r requirements/integration.in
virtualenv==20.0.31 # via pre-commit, tox
zipp==3.1.0 # via importlib-metadata

# The following packages are considered to be unsafe in a requirements file:
Expand Down
5 changes: 5 additions & 0 deletions requirements/testing.in
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
-r base.in
-r integration.in
flask-testing
docker
ipdb
# pinning ipython as pip-compile-multi was bringing higher version
# of the ipython that was not found in CI
ipython==7.16.1
openapi-spec-validator
openpyxl
parameterized
Expand Down
24 changes: 20 additions & 4 deletions requirements/testing.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SHA1:e7b15a12c98ccce1cc4b8ee977205f141201b761
# SHA1:f9f1fc59b48794bbb4512a857fd5b3c24c33aa1e
#
# This file is autogenerated by pip-compile-multi
# To update, run:
Expand All @@ -8,23 +8,39 @@
-r base.txt
-r integration.txt
-e file:. # via -r requirements/base.in
appnope==0.1.0 # via ipython
astroid==2.4.2 # via pylint
backcall==0.2.0 # via ipython
coverage==5.2.1 # via pytest-cov
docker==4.3.1 # via -r requirements/testing.in
flask-testing==0.8.0 # via -r requirements/testing.in
iniconfig==1.0.1 # via pytest
isort==4.3.21 # via pylint
ipdb==0.13.3 # via -r requirements/testing.in
ipython-genutils==0.2.0 # via traitlets
ipython==7.16.1 # via -r requirements/testing.in, ipdb
isort==5.4.2 # via pylint
jedi==0.17.2 # via ipython
lazy-object-proxy==1.4.3 # via astroid
mccabe==0.6.1 # via pylint
more-itertools==8.4.0 # via pytest
openapi-spec-validator==0.2.9 # via -r requirements/testing.in
parameterized==0.7.4 # via -r requirements/testing.in
parso==0.7.1 # via jedi
pexpect==4.8.0 # via ipython
pickleshare==0.7.5 # via ipython
prompt-toolkit==3.0.6 # via ipython
ptyprocess==0.6.0 # via pexpect
pygments==2.6.1 # via ipython
pyhive[hive,presto]==0.6.3 # via -r requirements/testing.in, apache-superset
pylint==2.5.3 # via -r requirements/testing.in
pytest-cov==2.10.0 # via -r requirements/testing.in
pylint==2.6.0 # via -r requirements/testing.in
pytest-cov==2.10.1 # via -r requirements/testing.in
pytest==6.0.1 # via -r requirements/testing.in, pytest-cov
redis==3.5.3 # via -r requirements/testing.in
statsd==3.3.0 # via -r requirements/testing.in
traitlets==4.3.3 # via ipython
typed-ast==1.4.1 # via astroid
wcwidth==0.2.5 # via prompt-toolkit
websocket-client==0.57.0 # via docker
wrapt==1.12.1 # via astroid

# The following packages are considered to be unsafe in a requirements file:
Expand Down
19 changes: 19 additions & 0 deletions scripts/databases/hive/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
current_branch := $(shell git rev-parse --abbrev-ref HEAD)
build:
docker build -t bde2020/hive:$(current_branch) ./
79 changes: 79 additions & 0 deletions scripts/databases/hive/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

version: "3.2"

services:
namenode:
container_name: namenode
image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8
volumes:
- namenode:/hadoop/dfs/name
- type: bind
source: "$UPLOAD_FOLDER"
target: /tmp/superset_uploads
environment:
- CLUSTER_NAME=test
env_file:
- ./hadoop-hive.env
ports:
- "50070:50070"
datanode:
image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8
volumes:
- datanode:/hadoop/dfs/data
- type: bind
source: "$UPLOAD_FOLDER"
target: /tmp/superset_uploads
env_file:
- ./hadoop-hive.env
environment:
SERVICE_PRECONDITION: "namenode:50070"
ports:
- "50075:50075"
hive-server:
image: bde2020/hive:2.3.2-postgresql-metastore
env_file:
- ./hadoop-hive.env
environment:
HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://hive-metastore/metastore"
SERVICE_PRECONDITION: "hive-metastore:9083"
ports:
- "10000:10000"
volumes:
- type: bind
source: "$UPLOAD_FOLDER"
target: /tmp/superset_uploads
hive-metastore:
image: bde2020/hive:2.3.2-postgresql-metastore
env_file:
- ./hadoop-hive.env
command: /opt/hive/bin/hive --service metastore
environment:
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 hive-metastore-postgresql:5432"
ports:
- "9083:9083"
volumes:
- type: bind
source: "$UPLOAD_FOLDER"
target: /tmp/superset_uploads
hive-metastore-postgresql:
image: bde2020/hive-metastore-postgresql:2.3.0

volumes:
namenode:
datanode:
46 changes: 46 additions & 0 deletions scripts/databases/hive/hadoop-hive.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore-postgresql/metastore
HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver
HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive
HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive
HIVE_SITE_CONF_datanucleus_autoCreateSchema=false
HIVE_SITE_CONF_hive_metastore_uris=thrift://hive-metastore:9083
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false

CORE_CONF_fs_defaultFS=hdfs://namenode:8020
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*

HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false

YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031
Loading

0 comments on commit 19a9bcc

Please sign in to comment.