Skip to content

Commit

Permalink
Fix paths for notebooks and jupyterlab
Browse files Browse the repository at this point in the history
1) In the jupyter notebook documentation it is noted that directory
   paths have their leading and trailing slashes stripped. This library
   violated that expecation and caused a major user-facing bug in
   Jupyter Notebooks where directories would have to be clicked
   multiple times to create a correctly formatted request.
2) Jupyter Lab does not pass the type of a model to all get requests
   this was handled by assuming untypted requests are blobs/notebooks
   in the prior version of jgscm which would return as a 404. This
   change adds more advanced type resolution for cases of ambiguous
   type.
  • Loading branch information
Evan Parker committed Mar 8, 2018
1 parent c9fb372 commit d1f1f4b
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 38 deletions.
89 changes: 58 additions & 31 deletions jgscm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ class GoogleStorageContentManager(ContentsManager):
"""
)

def __init__(self, *args, **kwargs):
self._client = None
super(GoogleStorageContentManager, self).__init__(*args, **kwargs)

def debug_args(fn):
def wrapped_fn(self, *args, **kwargs):
self.log.debug("call %s(%s%s%s)", fn.__name__,
Expand Down Expand Up @@ -245,9 +249,8 @@ def file_exists(self, path=""):
return False
if path.startswith("/"):
path = path[1:]
try:
bucket_name, bucket_path = self._parse_path(path)
except ValueError:
bucket_name, bucket_path = self._parse_path(path)
if not bucket_path:
return False
bucket = self._get_bucket(bucket_name)
if bucket is None or bucket_path == "":
Expand All @@ -262,7 +265,18 @@ def dir_exists(self, path):
return True
if not path.endswith("/"):
path += "/"
return self._fetch(path, content=False)[0]
bucket_name, blob_prefix_name = self._parse_path(path)
# Get the bucket, fail if the bucket cannot be found.
bucket = self._get_bucket(bucket_name)
if not bucket:
return False
# Only check that bucket exists.
if not blob_prefix_name:
return True
# Check that some blobs exist with the prefix as a path.
if list(bucket.list_blobs(prefix=blob_prefix_name, max_results=1)):
return True
return False

@debug_args
def get(self, path, content=True, type=None, format=None):
Expand All @@ -273,11 +287,10 @@ def get(self, path, content=True, type=None, format=None):
path = path[1:]
if not path:
path = self.default_path
if "/" not in path or path.endswith("/") or type == "directory":
if type not in (None, "directory"):
raise web.HTTPError(
400, u"%s is not a directory" % path, reason="bad type")
if "/" in path and not path.endswith("/"):

type = self._resolve_storagetype(path, type)
if type == "directory":
if path and not path.endswith("/"):
path += "/"
exists, members = self._fetch(path, content=content)
if not exists:
Expand Down Expand Up @@ -428,29 +441,50 @@ def client(self):
"""
:return: used instance of :class:`google.cloud.storage.Client`.
"""
try:
return self._client
except AttributeError:
if not self.project:
self._client = GSClient()
else:
self._client = GSClient.from_service_account_json(
self.keyfile, project=self.project)
if self._client is not None:
return self._client
if not self.project:
self._client = GSClient()
else:
self._client = GSClient.from_service_account_json(
self.keyfile, project=self.project)
return self._client

def run_post_save_hook(self, model, os_path):
"""Run the post-save hook if defined, and log errors"""
if self.post_save_hook:
try:
self.log.debug("Running post-save hook on %s", os_path)
self.post_save_hook(os_path=path, model=model, contents_manager=self)
self.post_save_hook(os_path=os_path,
model=model,
contents_manager=self)
except Exception:
self.log.error("Post-save hook failed on %s", os_path, exc_info=True)

@default("checkpoints_class")
def _checkpoints_class_default(self):
return GoogleStorageCheckpoints

def _resolve_storagetype(self, path, type):
"""Based on the arguments and status of GCS, return a valid type."""
if "/" not in path or path.endswith("/") or path == "":
if type not in (None, "directory"):
raise web.HTTPError(
400, u"%s is not a directory" % path, reason="bad type")
return "directory"
if type is None and path.endswith(".ipynb"):
return "notebook"
if type is not None:
return type
# If type cannot be inferred from the argument set, hit
# the storage API to see if a blob or a prefix exists.
if self.file_exists(path):
return "file"
if self.dir_exists(path):
return "directory"
raise web.HTTPError(
404, u"%s does not exist" % path, reason="bad type")

def _get_bucket(self, name, throw=False):
"""
Get the bucket by it's name. Uses cache by default.
Expand Down Expand Up @@ -494,12 +528,8 @@ def _parse_path(path):
:param path: string to split.
:return: tuple(bucket name, bucket path).
"""
try:
bucket, name = path.split("/", 1)
except ValueError:
bucket = path
name = ""
return bucket, name
bucket, _, blobname = path.partition("/")
return bucket, blobname

@staticmethod
def _get_blob_path(blob):
Expand All @@ -508,10 +538,7 @@ def _get_blob_path(blob):
:param blob: instance of :class:`google.cloud.storage.Blob`.
:return: path string.
"""
path = url_unescape(blob.path)
path = path[3:] # /b/
path = path.replace("/o/", "/", 1)
return path
return blob.bucket.name + "/" + blob.name

@staticmethod
def _get_blob_name(blob):
Expand All @@ -521,11 +548,11 @@ def _get_blob_name(blob):
:return: name string.
"""
if isinstance(blob, Blob):
return url_unescape(blob.path).rsplit("/", 1)[-1]
return os.path.basename(blob.name)
assert isinstance(blob, (unicode, str))
if blob.endswith("/"):
blob = blob[:-1]
return blob.rsplit("/", 1)[-1]
return os.path.basename(blob)

@staticmethod
def _get_dir_name(path):
Expand Down Expand Up @@ -713,7 +740,7 @@ def _dir_model(self, path, members, content=True):
model = {
"type": "directory",
"name": self._get_dir_name(path),
"path": path,
"path": path.rstrip('/'),
"last_modified": "",
"created": "",
"content": None,
Expand Down
14 changes: 7 additions & 7 deletions jgscm/tests/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def test_file_exists(self):
self.assertFalse(self.contents_manager.file_exists(""))
self.assertFalse(self.contents_manager.file_exists("/"))
self.assertFalse(self.contents_manager.file_exists(self.BUCKET))
self.assertFalse(self.contents_manager.file_exists(self.BUCKET + "/"))
self.assertFalse(self.contents_manager.file_exists(self.BUCKET))
bucket = self.bucket
blob = bucket.blob("test")
blob.upload_from_string(b"test")
Expand Down Expand Up @@ -235,7 +235,7 @@ def test_get(self):
self.assertEqual(m["type"], "directory")
self.assertEqual(m["mimetype"], "application/x-directory")
if m["name"] == self.BUCKET:
self.assertEqual(m["path"], self.BUCKET + "/")
self.assertEqual(m["path"], self.BUCKET)
self.assertEqual(m["last_modified"], "")
self.assertEqual(m["created"], "")
self.assertIsNone(m["format"])
Expand All @@ -246,7 +246,7 @@ def test_get(self):
self.assertEqual(model["type"], "directory")
self.assertEqual(model["mimetype"], "application/x-directory")
self.assertEqual(model["name"], self.BUCKET)
self.assertEqual(model["path"], self.BUCKET + "/")
self.assertEqual(model["path"], self.BUCKET)
self.assertEqual(model["last_modified"], "")
self.assertEqual(model["created"], "")
self.assertEqual(model["format"], "json")
Expand Down Expand Up @@ -295,7 +295,7 @@ def test_get(self):
self.assertEqual(model["type"], "directory")
self.assertEqual(model["mimetype"], "application/x-directory")
self.assertEqual(model["name"], self.BUCKET)
self.assertEqual(model["path"], self.BUCKET + "/")
self.assertEqual(model["path"], self.BUCKET)
self.assertEqual(model["last_modified"], "")
self.assertEqual(model["created"], "")
self.assertEqual(model["format"], "json")
Expand All @@ -306,7 +306,7 @@ def test_get(self):
self.assertEqual(model["type"], "directory")
self.assertEqual(model["mimetype"], "application/x-directory")
self.assertEqual(model["name"], "test")
self.assertEqual(model["path"], self.path("test/")[1:])
self.assertEqual(model["path"], self.path("test")[1:])
self.assertEqual(model["last_modified"], "")
self.assertEqual(model["created"], "")
self.assertIsNone(model["content"])
Expand All @@ -321,7 +321,7 @@ def test_get(self):
self.assertEqual(model["type"], "directory")
self.assertEqual(model["mimetype"], "application/x-directory")
self.assertEqual(model["name"], "test")
self.assertEqual(model["path"], self.path("test/")[1:])
self.assertEqual(model["path"], self.path("test")[1:])
self.assertEqual(model["last_modified"], "")
self.assertEqual(model["created"], "")
self.assertEqual(model["format"], "json")
Expand All @@ -348,7 +348,7 @@ def test_get(self):
self.assertEqual(dc["type"], "directory")
self.assertEqual(dc["mimetype"], "application/x-directory")
self.assertEqual(dc["name"], "fold")
self.assertEqual(dc["path"], self.path("test/fold/")[1:])
self.assertEqual(dc["path"], self.path("test/fold")[1:])
self.assertIsNone(dc["content"])
self.assertIsNone(dc["format"])
self.assertEqual(dc["last_modified"], "")
Expand Down

0 comments on commit d1f1f4b

Please sign in to comment.