Skip to content

Commit

Permalink
ci: add black to ci (georgia-tech-db#320)
Browse files Browse the repository at this point in the history
* ci: add black to ci

* style: ran formatter

* style: linter fix

* fix

* style fix
  • Loading branch information
gaurav274 authored Aug 13, 2022
1 parent 3dd4d8b commit df84740
Show file tree
Hide file tree
Showing 54 changed files with 678 additions and 517 deletions.
13 changes: 13 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[flake8]
exclude =
eva/filters
eva/parser/evaql
ignore =
E203
E266
E501
W503
max-line-length = 88
avoid-escape = no
inline-quotes = "
select = B,C,E,F,W,T4,B9
2 changes: 1 addition & 1 deletion eva/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .version import VERSION as __version__ # noqa: F401
from .version import VERSION as __version__ # noqa: F401
5 changes: 3 additions & 2 deletions eva/binder/binder_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,9 @@ def bind_table_info(table_info: TableInfo) -> DataFrameMetadata:
if obj:
table_info.table_obj = obj
else:
error = '{} does not exist. Create the table using' \
' CREATE TABLE.'.format(table_info.table_name)
error = "{} does not exist. Create the table using" " CREATE TABLE.".format(
table_info.table_name
)
logger.error(error)
raise BinderError(error)

Expand Down
14 changes: 9 additions & 5 deletions eva/binder/statement_binder.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@
from eva.parser.create_mat_view_statement import CreateMaterializedViewStatement
from eva.parser.drop_statement import DropTableStatement
from eva.parser.load_statement import LoadDataStatement
from eva.parser.upload_statement import UploadStatement
from eva.parser.select_statement import SelectStatement
from eva.parser.statement import AbstractStatement
from eva.parser.table_ref import TableRef
from eva.parser.types import FileFormatType
from eva.parser.upload_statement import UploadStatement
from eva.utils.generic_utils import path_to_class
from eva.utils.logging_manager import logger

Expand Down Expand Up @@ -149,16 +149,18 @@ def _bind_load_data_statement(self, node: LoadDataStatement):
@bind.register(UploadStatement)
def _bind_upload_statement(self, node: UploadStatement):
table_ref = node.table_ref
if node.file_options['file_format'] == FileFormatType.VIDEO:
if node.file_options["file_format"] == FileFormatType.VIDEO:
# Create a new metadata object
create_video_metadata(table_ref.table.table_name)

self.bind(table_ref)

table_ref_obj = table_ref.table.table_obj
if table_ref_obj is None:
error = '{} does not exists. Create the table using \
CREATE TABLE.'.format(table_ref.table.table_name)
error = "{} does not exists. Create the table using \
CREATE TABLE.".format(
table_ref.table.table_name
)
logger.error(error)
raise RuntimeError(error)

Expand All @@ -174,7 +176,9 @@ def _bind_upload_statement(self, node: UploadStatement):
TupleValueExpression(
col_name=column.name,
table_alias=table_ref_obj.name.lower(),
col_object=column))
col_object=column,
)
)

# bind the columns
for expr in column_list:
Expand Down
9 changes: 3 additions & 6 deletions eva/configuration/bootstrap_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
import importlib.resources as importlib_resources
import os
import shutil
import tempfile
from pathlib import Path

import yaml
import tempfile

from eva.configuration.config_utils import read_value_config, update_value_config
from eva.configuration.dictionary import (
Expand Down Expand Up @@ -65,9 +65,7 @@ def bootstrap_environment():
database_uri = read_value_config(cfg, "core", "catalog_database_uri")
upload_location = None

if not dataset_location or \
not database_uri or \
not upload_location:
if not dataset_location or not database_uri or not upload_location:
if not dataset_location:
dataset_location = str(eva_home_directory / EVA_DATASET_DIR)
update_value_config(cfg, "core", "datasets_dir", dataset_location)
Expand All @@ -77,8 +75,7 @@ def bootstrap_environment():

# Ref: https://stackoverflow.com/a/847866
upload_location = str(eva_home_directory / tempfile.gettempdir())
update_value_config(cfg, "storage", "upload_dir",
upload_location)
update_value_config(cfg, "storage", "upload_dir", upload_location)

# Create upload directory in eva home directory if it does not exist
upload_location = Path(upload_location)
Expand Down
2 changes: 1 addition & 1 deletion eva/executor/load_csv_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class LoadCSVExecutor(AbstractExecutor):
def __init__(self, node: LoadDataPlan):
super().__init__(node)
config = ConfigurationManager()
self.upload_dir = config.get_value('storage', 'upload_dir')
self.upload_dir = config.get_value("storage", "upload_dir")

def validate(self):
pass
Expand Down
2 changes: 1 addition & 1 deletion eva/executor/load_video_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class LoadVideoExecutor(AbstractExecutor):
def __init__(self, node: LoadDataPlan):
super().__init__(node)
config = ConfigurationManager()
self.upload_dir = Path(config.get_value('storage', 'upload_dir'))
self.upload_dir = Path(config.get_value("storage", "upload_dir"))

def validate(self):
pass
Expand Down
5 changes: 3 additions & 2 deletions eva/executor/orderby_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ def exec(self) -> Iterator[Batch]:
# sorts the batch
try:
aggregated_batch.sort_orderby(
by=self.extract_column_names(), sort_type=self.extract_sort_types()
by=self.extract_column_names(),
sort_type=self.extract_sort_types(),
)
except KeyError:
# pass for now
Expand All @@ -78,7 +79,7 @@ def exec(self) -> Iterator[Batch]:
# on self.batch_sizes which holds the input batches sizes
index = 0
for i in self.batch_sizes:
batch = aggregated_batch[index: index + i]
batch = aggregated_batch[index : index + i]
batch.reset_index()
index += i
yield batch
2 changes: 1 addition & 1 deletion eva/executor/sample_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,5 @@ def exec(self) -> Iterator[Batch]:

current = 0
for batch in child_executor.exec():
yield batch[current:: self._sample_freq]
yield batch[current :: self._sample_freq]
current = (current - len(batch)) % self._sample_freq
10 changes: 5 additions & 5 deletions eva/executor/upload_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,17 @@

from eva.configuration.configuration_manager import ConfigurationManager
from eva.executor.abstract_executor import AbstractExecutor
from eva.planner.upload_plan import UploadPlan
from eva.executor.load_csv_executor import LoadCSVExecutor
from eva.executor.load_video_executor import LoadVideoExecutor
from eva.parser.types import FileFormatType
from eva.planner.upload_plan import UploadPlan


class UploadExecutor(AbstractExecutor):
def __init__(self, node: UploadPlan):
super().__init__(node)
config = ConfigurationManager()
self.upload_dir = config.get_value('storage', 'upload_dir')
self.upload_dir = config.get_value("storage", "upload_dir")

def validate(self):
pass
Expand All @@ -43,13 +43,13 @@ def exec(self):
video_blob = self.node.video_blob
path = self.node.file_path
video_bytes = base64.b64decode(video_blob[1:])
with open(os.path.join(self.upload_dir, path), 'wb') as f:
with open(os.path.join(self.upload_dir, path), "wb") as f:
f.write(video_bytes)

# invoke the appropriate executor
if self.node.file_options['file_format'] == FileFormatType.VIDEO:
if self.node.file_options["file_format"] == FileFormatType.VIDEO:
executor = LoadVideoExecutor(self.node)
elif self.node.file_options['file_format'] == FileFormatType.CSV:
elif self.node.file_options["file_format"] == FileFormatType.CSV:
executor = LoadCSVExecutor(self.node)

# for each batch, exec the executor
Expand Down
59 changes: 35 additions & 24 deletions eva/optimizer/operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,12 +746,15 @@ class LogicalUpload(Operator):
video_blob(str): base64 encoded video string
"""

def __init__(self, path: Path,
video_blob: str,
table_metainfo: DataFrameMetadata,
column_list: List[AbstractExpression] = None,
file_options: dict = dict(),
children: List = None):
def __init__(
self,
path: Path,
video_blob: str,
table_metainfo: DataFrameMetadata,
column_list: List[AbstractExpression] = None,
file_options: dict = dict(),
children: List = None,
):
super().__init__(OperatorType.LOGICALUPLOAD, children=children)
self._path = path
self._video_blob = video_blob
Expand Down Expand Up @@ -780,34 +783,42 @@ def file_options(self):
return self._file_options

def __str__(self):
return 'LogicalUpload(path: {}, \
return "LogicalUpload(path: {}, \
blob: {}, \
table: {}, \
column_list: {}, \
file_options: {})'.format(self.path,
"string of video blob",
self.table_metainfo,
self.column_list,
self.file_options)
file_options: {})".format(
self.path,
"string of video blob",
self.table_metainfo,
self.column_list,
self.file_options,
)

def __eq__(self, other):
is_subtree_equal = super().__eq__(other)
if not isinstance(other, LogicalUpload):
return False
return (is_subtree_equal
and self.path == other.path
and self.video_blob == other.video_blob
and self.table_metainfo == other.table_metainfo
and self.column_list == other.column_list
and self.file_options == other.file_options)
return (
is_subtree_equal
and self.path == other.path
and self.video_blob == other.video_blob
and self.table_metainfo == other.table_metainfo
and self.column_list == other.column_list
and self.file_options == other.file_options
)

def __hash__(self) -> int:
return hash((super().__hash__(),
self.path,
self.video_blob,
self.table_metainfo,
tuple(self.column_list),
frozenset(self.file_options.items())))
return hash(
(
super().__hash__(),
self.path,
self.video_blob,
self.table_metainfo,
tuple(self.column_list),
frozenset(self.file_options.items()),
)
)


class LogicalFunctionScan(Operator):
Expand Down
17 changes: 10 additions & 7 deletions eva/optimizer/rules/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,15 +609,18 @@ def apply(self, before: LogicalUpload, context: OptimizerContext):

batch_mem_size = 30000000 # 30mb
config_batch_mem_size = ConfigurationManager().get_value(
"executor", "batch_mem_size")
"executor", "batch_mem_size"
)
if config_batch_mem_size:
batch_mem_size = config_batch_mem_size
after = UploadPlan(before.path,
before.video_blob,
before.table_metainfo,
batch_mem_size,
before.column_list,
before.file_options)
after = UploadPlan(
before.path,
before.video_blob,
before.table_metainfo,
batch_mem_size,
before.column_list,
before.file_options,
)

return after

Expand Down
12 changes: 7 additions & 5 deletions eva/optimizer/statement_to_opr_convertor.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,11 +268,13 @@ def visit_upload(self, statement: UploadStatement):
statement(UploadStatement): [Upload statement]
"""
table_metainfo = statement.table_ref.table.table_obj
upload_opr = LogicalUpload(statement.path,
statement.video_blob,
table_metainfo,
statement.column_list,
statement.file_options)
upload_opr = LogicalUpload(
statement.path,
statement.video_blob,
table_metainfo,
statement.column_list,
statement.file_options,
)
self._plan = upload_opr

def visit_materialized_view(self, statement: CreateMaterializedViewStatement):
Expand Down
4 changes: 2 additions & 2 deletions eva/parser/load_statement.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def __init__(

def __str__(self) -> str:
print_str = "LOAD FILE {} INTO {}({}) WITH {}".format(
self._path.name, self._table_ref,
self._column_list, self._file_options)
self._path.name, self._table_ref, self._column_list, self._file_options
)
return print_str

@property
Expand Down
8 changes: 3 additions & 5 deletions eva/parser/parser_visitor/_upload_statement.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@
# limitations under the License.
from eva.parser.evaql.evaql_parser import evaql_parser
from eva.parser.evaql.evaql_parserVisitor import evaql_parserVisitor
from eva.parser.upload_statement import UploadStatement

from eva.parser.table_ref import TableRef
from eva.parser.types import FileFormatType
from eva.parser.upload_statement import UploadStatement


class Upload(evaql_parserVisitor):
Expand All @@ -29,7 +28,7 @@ def visitUploadStatement(self, ctx: evaql_parser.UploadStatementContext):
# Set default for file_format as Video
file_format = FileFormatType.VIDEO
file_options = {}
file_options['file_format'] = file_format
file_options["file_format"] = file_format

if ctx.fileOptions():
file_options = self.visit(ctx.fileOptions())
Expand All @@ -39,6 +38,5 @@ def visitUploadStatement(self, ctx: evaql_parser.UploadStatementContext):
if ctx.uidList():
column_list = self.visit(ctx.uidList())

stmt = UploadStatement(srv_path, video_blob, table,
column_list, file_options)
stmt = UploadStatement(srv_path, video_blob, table, column_list, file_options)
return stmt
Loading

0 comments on commit df84740

Please sign in to comment.