Skip to content

Commit

Permalink
Fix hang with large files by removing re.search title for non-html re…
Browse files Browse the repository at this point in the history
…sponses
  • Loading branch information
wkentaro committed Mar 25, 2023
1 parent d6b7aca commit c5fc8db
Showing 1 changed file with 29 additions and 18 deletions.
47 changes: 29 additions & 18 deletions gdown/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,26 +174,37 @@ def download(
id=gdrive_file_id
)
continue
m = re.search("<title>(.+)</title>", res.text)
if m and m.groups()[0].endswith(" - Google Docs"):
url = (
"https://docs.google.com/document/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="docx" if format is None else format,

if res.headers["Content-Type"].startswith("text/html"):
m = re.search("<title>(.+)</title>", res.text)
if m and m.groups()[0].endswith(" - Google Docs"):
url = (
"https://docs.google.com/document/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="docx" if format is None else format,
)
)
)
continue
elif m and m.groups()[0].endswith(" - Google Sheets"):
url = (
"https://docs.google.com/spreadsheets/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="xlsx" if format is None else format,
continue
elif m and m.groups()[0].endswith(" - Google Sheets"):
url = (
"https://docs.google.com/spreadsheets/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="xlsx" if format is None else format,
)
)
)
continue
elif (m and m.groups()[0].endswith(" - Google Slides")) or (
continue
elif m and m.groups()[0].endswith(" - Google Slides"):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
elif (
"Content-Disposition" in res.headers
and res.headers["Content-Disposition"].endswith("pptx")
and format not in {None, "pptx"}
Expand Down

0 comments on commit c5fc8db

Please sign in to comment.