更新项目代码

vickwv · Dec 2, 2023 · cc72e54 · cc72e54
1 parent 88cc09c
commit cc72e54
Show file tree

Hide file tree

Showing 10 changed files with 90 additions and 106 deletions.
diff --git a/README.md b/README.md
@@ -25,46 +25,44 @@
 <h1>🥣 使用方法</h1>
 <p>如果仅需下载作品文件，选择 <b>直接运行</b> 或者 <b>源码运行</b> 均可，如果需要获取作品信息，则需要进行二次开发进行调用。</p>
 <h2>🖱 直接运行</h2>
-<p>前往 Releases 下载程序压缩包，解压后打开程序文件夹，双击运行 <code>main.exe</code> 即可使用。</p>
+<p>前往 <a href="https://github.com/JoeanAmier/XHS-Downloader/releases/latest">Releases</a> 下载程序压缩包，解压后打开程序文件夹，双击运行 <code>main.exe</code> 即可使用。</p>
 <h2>⌨️ 源码运行</h2>
 <ol>
 <li>安装版本号不低于 <code>3.12</code> 的 Python 解释器</li>
 <li>运行 <code>pip install -r requirements.txt</code> 命令安装程序所需模块</li>
-<li>下载本项目最新的源码或 <code>Releases</code> 发布的源码至本地</li>
+<li>下载本项目最新的源码或 <a href="https://github.com/JoeanAmier/XHS-Downloader/releases/latest">Releases</a> 发布的源码至本地</li>
 <li>运行 <code>main.py</code> 即可使用</li>
 </ol>
 <h2>💻 二次开发</h2>
 <p>如果想要获取小红书图文/视频作品信息，可以根据 <code>main.py</code> 的注释提示进行代码调用。</p>
 <pre>
 # 测试链接
-error_demo = "https://github.com/JoeanAmier/XHS-Downloader"
+error_demo = "https://github.com/JoeanAmier/XHS_Downloader"
 image_demo = "https://www.xiaohongshu.com/explore/63b275a30000000019020185"
 video_demo = "https://www.xiaohongshu.com/explore/64edb460000000001f03cadc"
+multiple_demo = f"{image_demo} {video_demo}"
 # 实例对象
-path = "" # 作品下载储存根路径，默认值：当前路径
+path = "D:\\" # 作品下载储存根路径，默认值：当前路径
 folder = "Download" # 作品下载文件夹名称（自动创建），默认值：Download
-cookie = "" # 小红书网页版 Cookie
 proxies = None # 网络代理
 timeout = 5 # 网络请求超时限制，默认值：10
 chunk = 1024 * 1024 # 下载文件时，每次从服务器获取的数据块大小，单位字节
 xhs = XHS(
  path=path,
  folder=folder,
- cookie=cookie,
  proxies=proxies,
  timeout=timeout,
  chunk=chunk, ) # 使用自定义参数
 # xhs = XHS() # 使用默认参数
-download = True # 是否下载作品文件
+download = True # 是否下载作品文件，默认值：False
 # 返回作品详细信息，包括下载地址
 print(xhs.extract(error_demo)) # 获取数据失败时返回空字典
 print(xhs.extract(image_demo, download=download))
 print(xhs.extract(video_demo, download=download))
+print(xhs.extract(multiple_demo, download=download))
 </pre>
-<h1>⛓ 批量下载</h1>
-<p>在程序所在文件夹创建一个 <code>xhs.txt</code> 文本文件，然后将待处理的作品链接输入文件，每行输入一个作品链接，编辑完成后保存文件，然后运行程序，点击 <code>读取 xhs.txt 文件并批量下载作品</code> 按钮，程序会批量下载每个链接对应的作品文件。</p>
 <h1>⚙️ 配置文件</h1>
-<p>根目录下的 <code>settings.json</code> 文件，可以自定义部分运行参数。</p>
+<p>项目根目录下的 <code>settings.json</code> 文件，可以自定义部分运行参数。</p>
 <table>
 <thead>
 <tr>
@@ -88,12 +86,6 @@ print(xhs.extract(video_demo, download=download))
 <td align="center">Download</td>
 </tr>
 <tr>
-<td align="center">cookie</td>
-<td align="center">str</td>
-<td align="center">小红书网页版 Cookie，无需登录；建议自行设置</td>
-<td align="center">内置 Cookie</td>
-</tr>
-<tr>
 <td align="center">proxies</td>
 <td align="center">str</td>
 <td align="center">设置代理</td>
@@ -109,20 +101,10 @@ print(xhs.extract(video_demo, download=download))
 <td align="center">chunk</td>
 <td align="center">int</td>
 <td align="center">下载文件时，每次从服务器获取的数据块大小，单位：字节</td>
-<td align="center">262144(256KB)</td>
+<td align="center">1048576(1 MB)</td>
 </tr>
 </tbody>
 </table>
-<h1>🌐 Cookie</h1>
-<ol>
-<li>打开浏览器（可选无痕模式启动），访问小红书任意网页</li>
-<li>按 <code>F12</code> 打开开发人员工具</li>
-<li>选择 <code>控制台</code> 选项卡</li>
-<li>输入 <code>document.cookie</code> 后回车确认</li>
-<li>输出内容即为所需 Cookie</li>
-</ol>
-<br>
-<img src="static/获取Cookie示意图.png" alt="">
 <h1>♥️ 支持项目</h1>
 <p>如果 <b>XHS-Downloader</b> 对您有帮助，请考虑为它点个 <b>Star</b> ⭐，感谢您的支持！</p>
 <table>
@@ -141,7 +123,7 @@ print(xhs.extract(video_demo, download=download))
 <p>如果您愿意，可以考虑提供资助为 <b>XHS-Downloader</b> 提供额外的支持！</p>
 <h1>✉️ 联系作者</h1>
 <ul>
-<li>QQ: 2437596031</li>
+<li>QQ: 2437596031（联系请说明来意）</li>
 <li>QQ Group: <a href="https://github.com/JoeanAmier/XHS-Downloader/blob/master/static/QQ%E7%BE%A4%E8%81%8A%E4%BA%8C%E7%BB%B4%E7%A0%81.png">点击扫码加入群聊</a></li>
 <li>Email: [email protected]</li>
 </ul>

diff --git a/source/Download.py → source/Downloader.py b/source/Download.py → source/Downloader.py
@@ -3,31 +3,31 @@
 from requests import exceptions
 from requests import get
 
-from .Manager import Manager
-
 __all__ = ['Download']
 
 
 class Download:
- manager = Manager()
 
  def __init__(
  self,
+ manager,
  root: Path,
  path: str,
  folder: str,
- headers: dict,
  proxies=None,
- chunk=1024 * 1024, ):
- self.temp = root.joinpath("./temp")
+ chunk=1024 * 1024,
+ timeout=10, ):
+ self.manager = manager
+ self.temp = manager.temp
+ self.headers = manager.headers
  self.root = self.__init_root(root, path, folder)
- self.headers = self.__delete_cookie(headers)
  self.proxies = {
  "http": proxies,
  "https": proxies,
  "ftp": proxies,
  }
  self.chunk = chunk
+ self.timeout = timeout
 
  def __init_root(self, root: Path, path: str, folder: str) -> Path:
  if path and (r := Path(path)).is_dir():
@@ -38,39 +38,29 @@ def __init_root(self, root: Path, path: str, folder: str) -> Path:
  self.temp.mkdir(exist_ok=True)
  return root
 
- def run(self, urls: list, name: str, type_: int, log):
+ def run(self, urls: list, name: str, type_: int):
  if type_ == 0:
- self.__download(urls[0], f"{name}.mp4", log)
+ self.__download(urls[0], f"{name}.mp4")
  elif type_ == 1:
  for index, url in enumerate(urls):
- self.__download(url, f"{name}_{index + 1}.jpeg", log)
+ self.__download(url, f"{name}_{index + 1}.png")
 
- def __download(self, url: str, name: str, log):
+ def __download(self, url: str, name: str):
  temp = self.temp.joinpath(name)
  file = self.root.joinpath(name)
  if self.manager.is_exists(file):
- self.output_prompt(f"文件 {name} 已存在，跳过下载！", log)
  return
  try:
- with get(url, headers=self.headers, proxies=self.proxies, stream=True) as response:
+ with get(url, headers=self.headers, proxies=self.proxies, stream=True, timeout=self.timeout) as response:
  with temp.open("wb") as f:
  for chunk in response.iter_content(chunk_size=self.chunk):
  f.write(chunk)
  self.manager.move(temp, file)
- self.output_prompt(f"文件 {name} 下载成功！", log)
- except exceptions.ChunkedEncodingError:
+ except (
+ exceptions.ProxyError,
+ exceptions.SSLError,
+ exceptions.ChunkedEncodingError,
+ exceptions.ConnectionError,
+ exceptions.ReadTimeout,
+ ):
  self.manager.delete(temp)
- self.output_prompt(f"网络异常，文件 {name} 下载失败！", log)
-
- @staticmethod
- def __delete_cookie(headers: dict) -> dict:
- download_headers = headers.copy()
- del download_headers["Cookie"]
- return download_headers
-
- @staticmethod
- def output_prompt(tip: str, log):
- if log:
- log.write_line(tip)
- else:
- print(tip)
diff --git a/source/Explore.py b/source/Explore.py
@@ -15,8 +15,8 @@ def run(self, html: str) -> dict:
  return self.__extract_data(data)
 
  def __get_json_data(self, html: str) -> dict:
- data = self.explore_data.findall(html)
- return {} if len(data) != 1 else loads(data[0])
+ data = self.explore_data.search(html)
+ return loads(data.group(1)) if data else {}
 
  def __extract_data(self, data: dict) -> dict:
  result = {}

diff --git a/source/Html.py b/source/Html.py
@@ -19,11 +19,12 @@ def __init__(
  }
  self.timeout = timeout
 
- def get_html(
+ def request_url(
  self,
  url: str,
  params=None,
- headers=None, ) -> str:
+ headers=None,
+ text=True, ) -> str:
  try:
  response = get(
  url,
@@ -38,9 +39,9 @@ def get_html(
  exceptions.ConnectionError,
  exceptions.ReadTimeout,
  ):
- print("获取网页源码失败，请尝试设置 Cookie 后重试！")
+ print("网络异常，获取网页源码失败！")
  return ""
- return response.text
+ return response.text if text else response.url
 
  @staticmethod
  def format_url(url: str) -> str:

diff --git a/source/Image.py b/source/Image.py
@@ -1,5 +1,6 @@
 from json import loads
 from re import compile
+
 __all__ = ['Image']
 
 
@@ -25,8 +26,7 @@ def __generate_image_link(token: str) -> str:
  return f"https://ci.xiaohongshu.com/{token}?imageView2/2/w/format/png"
 
  def __extract_image_token(self, url: str) -> str:
- return self.__generate_image_link(token[0]) if len(
- token := self.IMAGE_TOKEN.findall(url)) == 1 else ""
+ return self.__generate_image_link(token.group(1)) if (token := self.IMAGE_TOKEN.search(url)) else ""
 
  def __extract_image_urls(self, data: list[dict]) -> list[str]:
  urls = []

diff --git a/source/Manager.py b/source/Manager.py
@@ -5,6 +5,14 @@
 
 
 class Manager:
+ headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
+ "Chrome/119.0.0.0 Safari/537.36",
+ }
+
+ def __init__(self, root: Path):
+ self.temp = root.joinpath("./temp")
+
  @staticmethod
  def is_exists(path: Path) -> bool:
  return path.exists()

diff --git a/source/Settings.py b/source/Settings.py
@@ -6,16 +6,17 @@
 
 
 class Settings:
- file = Path(__file__).resolve().parent.parent.joinpath("./settings.json")
  default = {
  "path": "",
  "folder": "Download",
- "cookie": "",
  "proxies": None,
  "timeout": 10,
  "chunk": 1024 * 1024,
  }
 
+ def __init__(self, root: Path):
+ self.file = root.joinpath("./settings.json")
+
  def run(self):
  return self.read() if self.file.is_file() else self.create()
 

diff --git a/source/Video.py b/source/Video.py
@@ -1,12 +1,11 @@
 from re import compile
 
-from .Html import Html
-
 __all__ = ['Video']
 
 
 class Video:
- VIDEO_ID = compile(r'"masterUrl":"(.*?)"')
+ VIDEO_TOKEN = compile(r'"originVideoKey":"pre_post\\u002F(\S+?)"')
 
- def get_video_link(self, html: str):
- return [Html.format_url(u) for u in self.VIDEO_ID.findall(html)]
+ def get_video_link(self, html: str) -> list:
+ return [f"https://sns-video-hw.xhscdn.com/pre_post/{
+ t.group(1)}"] if (t := self.VIDEO_TOKEN.search(html)) else []