Skip to content

Commit

Permalink
feat(dev): moved output buffers into writer methods;
Browse files Browse the repository at this point in the history
- Moved output buffers into the datastore writer methods.
- Made write record methods return bytes.
- Added end file method for returning final bytes of the files.
  • Loading branch information
JVickery-TBS committed Oct 13, 2023
1 parent 06b654c commit 0f3ea9a
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 43 deletions.
20 changes: 6 additions & 14 deletions ckanext/datastore/blueprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

from typing import Any, Optional, cast, Union
from itertools import zip_longest
from io import BytesIO

from flask import Blueprint, Response
from flask.views import MethodView
Expand Down Expand Up @@ -207,8 +206,6 @@ def dump_to(
limit: Optional[int], options: dict[str, Any], sort: str,
search_params: dict[str, Any], user: str
):
output_buffer = BytesIO()

if fmt == 'csv':
writer_factory = csv_writer
records_format = 'csv'
Expand All @@ -226,9 +223,8 @@ def dump_to(

bom = options.get('bom', False)

def start_stream_writer(output_buffer: BytesIO,
fields: list[dict[str, Any]]):
return writer_factory(output_buffer, fields, bom=bom)
def start_stream_writer(fields: list[dict[str, Any]]):
return writer_factory(fields, bom=bom)

def stream_result_page(offs: int, lim: Union[None, int]):
return get_action('datastore_search')(
Expand All @@ -246,18 +242,14 @@ def stream_result_page(offs: int, lim: Union[None, int]):

def stream_dump(offset: int, limit: Union[None, int],
paginate_by: int, result: dict[str, Any]):
with start_stream_writer(output_buffer, result['fields']) as output:
with start_stream_writer(result['fields']) as writer:
while True:
if limit is not None and limit <= 0:
break

records = result['records']

output.write_records(records)
output_buffer.seek(0)
yield output_buffer.read()
output_buffer.truncate(0)
output_buffer.seek(0)
yield writer.write_records(records)

if records_format == 'objects' or records_format == 'lists':
if len(records) < paginate_by:
Expand All @@ -272,8 +264,8 @@ def stream_dump(offset: int, limit: Union[None, int],
break

result = stream_result_page(offset, limit)
output_buffer.seek(0)
yield output_buffer.read()

yield writer.end_file()

result = stream_result_page(offset, limit)

Expand Down
2 changes: 1 addition & 1 deletion ckanext/datastore/tests/test_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ def test_dump_xml(self, app):

res = app.get(f"/datastore/dump/{resource['id']}?limit=1&format=xml")
expected_content = (
u"<data>\n"
u'<data xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">\n'
r'<row _id="1">'
u"<b\xfck>annakarenina</b\xfck>"
u"<author>tolstoy</author>"
Expand Down
84 changes: 56 additions & 28 deletions ckanext/datastore/writer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# encoding: utf-8
from __future__ import annotations

from io import BytesIO
from io import StringIO, BytesIO

from contextlib import contextmanager
from typing import Any, Optional
Expand All @@ -14,113 +14,132 @@
from codecs import BOM_UTF8


BOM = "\N{bom}"


@contextmanager
def csv_writer(output: BytesIO, fields: list[dict[str, Any]],
bom: bool = False):
def csv_writer(fields: list[dict[str, Any]], bom: bool = False):
'''Context manager for writing UTF-8 CSV data to file
:param response: file-like object for writing data
:param fields: list of datastore fields
:param bom: True to include a UTF-8 BOM at the start of the file
'''
output = StringIO()

if bom:
output.write(BOM_UTF8)
output.write(BOM)

csv.writer(output).writerow( # type: ignore
f['id'].encode('utf-8') for f in fields)
f['id'] for f in fields)
yield TextWriter(output)


@contextmanager
def tsv_writer(output: BytesIO, fields: list[dict[str, Any]],
bom: bool = False):
def tsv_writer(fields: list[dict[str, Any]], bom: bool = False):
'''Context manager for writing UTF-8 TSV data to file
:param response: file-like object for writing data
:param fields: list of datastore fields
:param bom: True to include a UTF-8 BOM at the start of the file
'''
output = StringIO()

if bom:
output.write(BOM_UTF8)
output.write(BOM)

csv.writer(
output, # type: ignore
dialect='excel-tab').writerow(
f['id'].encode('utf-8') for f in fields)
f['id'] for f in fields)
yield TextWriter(output)


class TextWriter(object):
'text in, text out'
def __init__(self, output: BytesIO):
def __init__(self, output: StringIO):
self.output = output

def write_records(self, records: list[Any]):
def write_records(self, records: list[Any]) -> bytes:
self.output.write(records) # type: ignore
self.output.seek(0)
output = self.output.read().encode('utf-8')
self.output.truncate(0)
self.output.seek(0)
return output

def end_file(self) -> bytes:
return b''


@contextmanager
def json_writer(output: BytesIO, fields: list[dict[str, Any]],
bom: bool = False):
def json_writer(fields: list[dict[str, Any]], bom: bool = False):
'''Context manager for writing UTF-8 JSON data to file
:param response: file-like object for writing data
:param fields: list of datastore fields
:param bom: True to include a UTF-8 BOM at the start of the file
'''
output = StringIO()

if bom:
output.write(BOM_UTF8)
output.write(BOM)

output.write(
b'{\n "fields": %s,\n "records": [' % dumps(
fields, ensure_ascii=False, separators=(',', ':')).encode('utf-8'))
'{\n "fields": %s,\n "records": [' % dumps(
fields, ensure_ascii=False, separators=(',', ':')))
yield JSONWriter(output)
output.write(b'\n]}\n')


class JSONWriter(object):
def __init__(self, output: BytesIO):
def __init__(self, output: StringIO):
self.output = output
self.first = True

def write_records(self, records: list[Any]):
def write_records(self, records: list[Any]) -> bytes:
for r in records:
if self.first:
self.first = False
self.output.write(b'\n ')
self.output.write('\n ')
else:
self.output.write(b',\n ')
self.output.write(',\n ')

self.output.write(dumps(
r, ensure_ascii=False, separators=(',', ':'))
.encode('utf-8'))
r, ensure_ascii=False, separators=(',', ':')))

self.output.seek(0)
output = self.output.read().encode('utf-8')
self.output.truncate(0)
self.output.seek(0)
return output

def end_file(self) -> bytes:
return b'\n]}\n'


@contextmanager
def xml_writer(output: BytesIO, fields: list[dict[str, Any]],
bom: bool = False):
def xml_writer(fields: list[dict[str, Any]], bom: bool = False):
'''Context manager for writing UTF-8 XML data to file
:param response: file-like object for writing data
:param fields: list of datastore fields
:param bom: True to include a UTF-8 BOM at the start of the file
'''
output = BytesIO()

if bom:
output.write(BOM_UTF8)

output.write(
b'<data xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">\n')
yield XMLWriter(output, [f['id'] for f in fields])
output.write(b'</data>\n')


class XMLWriter(object):
_key_attr = 'key'
_value_tag = 'value'

def __init__(self, output: BytesIO, columns: list[str]):
def __init__(self, output: StringIO, columns: list[str]):
self.output = output
self.id_col = columns[0] == '_id'
if self.id_col:
Expand All @@ -145,7 +164,7 @@ def _insert_node(self, root: Any, k: str, v: Any,
if key_attr is not None:
element.attrib[self._key_attr] = str(key_attr)

def write_records(self, records: list[Any]):
def write_records(self, records: list[Any]) -> bytes:
for r in records:
root = Element('row')
if self.id_col:
Expand All @@ -154,3 +173,12 @@ def write_records(self, records: list[Any]):
self._insert_node(root, c, r[c])
ElementTree(root).write(self.output, encoding='utf-8')
self.output.write(b'\n')
self.output.seek(0)
output = self.output.read()
self.output.truncate(0)
self.output.seek(0)
return output

def end_file(self) -> bytes:
return b'</data>\n'

0 comments on commit 0f3ea9a

Please sign in to comment.