Skip to content

Commit

Permalink
Version 0.5.4
Browse files Browse the repository at this point in the history
abcache: Speed up AbCacheFilesystem `ls()`
  • Loading branch information
mos9527 committed Nov 22, 2024
1 parent 7959355 commit 946ab9c
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 33 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
version=sssekai.__version__,
author="greats3an",
author_email="[email protected]",
description="Project SEKAI Asset Utility / PJSK 资源下载 + Live2D, Spine, USM 提取",
description="Project SEKAI Asset Utility / PJSK 资源工具",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/mos9527/sssekai",
Expand Down
2 changes: 1 addition & 1 deletion sssekai/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
__VERSION_MAJOR__ = 0
__VERSION_MINOR__ = 5
__VERSION_PATCH__ = 3
__VERSION_PATCH__ = 4

__version__ = "%s.%s.%s" % (__VERSION_MAJOR__, __VERSION_MINOR__, __VERSION_PATCH__)
2 changes: 1 addition & 1 deletion sssekai/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def write(__s):
return sys.stdout.write(__s)

parser = argparse.ArgumentParser(
description="""SSSekai Proejct SEKAI feat. Hatsune Miku (Android) Asset Utility""",
description="""Project SEKAI Asset Utility / PJSK 资源工具""",
formatter_class=argparse.RawTextHelpFormatter,
)
parser.add_argument(
Expand Down
97 changes: 77 additions & 20 deletions sssekai/abcache/fs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import math, fsspec
from typing import Callable
from functools import cached_property
from functools import cached_property, cache
from collections import defaultdict
from fsspec.spec import AbstractBufferedFile
from fsspec.caching import BaseCache, register_cache
Expand Down Expand Up @@ -68,7 +68,7 @@ class AbCacheFile(AbstractBufferedFile):
- Seeks are simulated by read-aheads (by UnidirectionalBlockCache). Meaning seek operations
will incur additional download (in-betweens will be cached as well).
"""

DEFAULT_BLOCK_SIZE = 65536 # 64KB
entry: AbCacheEntry

@property
Expand All @@ -81,12 +81,13 @@ def entry(self) -> AbCacheEntry:
assert entry is not None, "entry not found"
return entry

def __init__(self, fs, bundle: str):
def __init__(self, fs, bundle: str, block_size=None):
self.fs, self.path = fs, bundle
self.fetch_loc = 0
super().__init__(
fs,
bundle,
block_size=block_size or self.DEFAULT_BLOCK_SIZE,
mode="rb",
cache_type="unidirectional_blockcache",
size=self.entry.fileSize,
Expand All @@ -104,7 +105,7 @@ def __innner():
for block in decrypt_iter(
lambda nbytes: next(self.__resp.iter_content(nbytes)), self.blocksize
):
yield block
yield bytes(block)

return __innner()

Expand All @@ -117,7 +118,7 @@ def _fetch_range(self, start, end):
# Reference: https://github.com/fsspec/filesystem_spec/blob/master/fsspec/implementations/libarchive.py
class AbCacheFilesystem(AbstractArchiveFileSystem):
"""Filesystem for reading from an AbCache on demand."""

root_marker = "/"
protocol = "abcache"
cache: AbCache

Expand All @@ -142,25 +143,81 @@ def __init__(self, fo: str = "", cache_obj: AbCache = None, *args, **kwargs):

@cached_property
def dir_cache(self):
cache = defaultdict(dict)
for path, bundle in self.cache.abcache_index.bundles.items():
path = "/" + path
cache.update(
{
dirname: {"name": dirname, "size": 0, "type": "directory"}
for dirname in self._all_dirnames([path])
}
)
cache[path] = {
"name": path,
"size": bundle.fileSize,
"type": "file",
}
return cache
# Reference implementation did O(n) per *every* ls() call
# We can make it O(1) with DP on tree preprocessing of O(nlogn)
bundles = self.cache.abcache_index.bundles
# Only the leaf nodes are given.
keys = set((self.root_marker + key for key in bundles.keys()))
keys |= self._all_dirnames(bundles.keys())
# Sorting implies DFS order.
keys = [self.root_marker] + [key for key in sorted(keys)]
_trim = lambda key: key[len(self.root_marker):]
nodes = [{
"name": key,
"type": "directory" if not _trim(key) in bundles else "file",
"size": 0 if not _trim(key) in bundles else bundles[_trim(key)].fileSize,
"item_count": 0,
"file_count": 0,
"total_size": 0
} for key in keys]
# Already in DFS order.
# Get start index for each directory and their item count.
stack = [0]
graph = defaultdict(list)
table = {node['name']: index for index, node in enumerate(nodes)}
def is_file(name):
return _trim(name) in bundles
def is_parent_path(a, b):
# a is parent of b
if a == self.root_marker: return True
return b.startswith(a + self.root_marker)
def maintain():
# Always starts from root. Safe to assume stack size >= 2
u,v = stack[-2], stack[-1]
nodes[u]["item_count"] += nodes[v]["item_count"]
nodes[u]["file_count"] += nodes[v]["file_count"]
nodes[u]["total_size"] += nodes[v]["total_size"]
stack.pop()
for index, name in enumerate(keys):
# Skip root
if index == 0:
continue
while not is_parent_path(keys[stack[-1]], name):
maintain()
pa = stack[-1]
nodes[pa]["item_count"] += 1
graph[pa].append(index)
if not is_file(name):
stack.append(index)
else:
nodes[pa]["file_count"] += 1
nodes[pa]["total_size"] += nodes[index]["size"]
nodes[index]["total_size"] = nodes[index]["size"]
while len(stack) >= 2:
maintain()
assert nodes[0]['file_count'] == len(bundles), "file count mismatch"
return nodes, graph, table

def _get_dirs(self):
return self.dir_cache

def info(self, path, **kwargs):
nodes, graph, table = self._get_dirs()
path = path or self.root_marker
if path in table:
return nodes[table[path]]
else:
raise FileNotFoundError(path)

@cache
def ls(self, path, detail=True, **kwargs):
nodes, graph, table = self._get_dirs()
path = path or self.root_marker
if path in table:
u = table[path]
return [nodes[v] if detail else nodes[v]['name'] for v in graph[u]]
return []

def open(self, path, mode="rb"):
assert mode == "rb", "only binary read-only mode is supported"
return AbCacheFile(self, path)
Expand Down
26 changes: 16 additions & 10 deletions sssekai/entrypoint/abserve.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os, logging
import os, logging, datetime, time, sys
from shutil import copyfileobj
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from sssekai import __version__
Expand All @@ -13,7 +13,9 @@ class AbServeHTTPRequestHandler(BaseHTTPRequestHandler):
ENCODING = "utf-8"

def format_listing(self, path):
t0 = time.time()
r = []
path = path or "/"
title = f"Directory listing for {path}"
r = (
f"<!DOCTYPE HTML>"
Expand All @@ -23,13 +25,15 @@ def format_listing(self, path):
f"<style>"
f"body {{ font-family: monospace; }}"
f"body {{ background-color: black; color: white; }}"
f"a {{ color: lightblue; }}"
f"a,i {{ color: lightblue; }}"
f"</style>"
f"<title>{title}</title>"
f"</head>"
f"<body><h1>{title}</h1>"
f"<hr><ul>"
f'<li><a href="..">..</a></li>'
f"<i>children: {len(fs.ls(path))},</i>"
f"<i>total number of files: {fs.info(path)['file_count']},</i>"
f"<i>total size: {filesize.decimal(fs.info(path)['total_size'])}</i><br>"
f'<hr><ul><li><a href="..">..</a></li>'
)
for entry in sorted(
fs.listdir(path), key=lambda x: (x["type"], x["name"], x["size"])
Expand All @@ -40,14 +44,16 @@ def format_listing(self, path):
nodename = name.split("/")[-1]
linkname = name
displayname = nodename
extra_tags = ' '.join([f'{k}="{v}"' for k,v in entry.items()])
if fs.isdir(name):
linkname += "/"
linkname += "/"
else:
fsize = filesize.decimal(entry["size"])
displayname += f" ({fsize})"
r += '<li><a href="%s">%s</a></li>' % (linkname, displayname)
r += "</ul><hr>"
r += "<i>sssekai v%s, %s</i>" % (__version__, fs.cache)
displayname += f" ({filesize.decimal(entry['size'])})"
r += f'<li><a {extra_tags} href="{linkname}">{displayname}</a></li>'
r += "</ul><hr>"
r += f"<i>sssekai v{__version__} running on Python {sys.version}</i><br>"
r += f"<i>{fs.cache}</i><br>"
r += "<i>page rendered in %.3fms, server time: %s</i>" % ((time.time() - t0)*1000, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
r += "</body></html>"
encoded = r.encode(self.ENCODING, "surrogateescape")
return encoded
Expand Down

0 comments on commit 946ab9c

Please sign in to comment.