Skip to content

Commit

Permalink
更新项目代码
Browse files Browse the repository at this point in the history
  • Loading branch information
JoeanAmier committed Dec 2, 2023
1 parent 88cc09c commit cc72e54
Show file tree
Hide file tree
Showing 10 changed files with 90 additions and 106 deletions.
38 changes: 10 additions & 28 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,46 +25,44 @@
<h1>🥣 使用方法</h1>
<p>如果仅需下载作品文件,选择 <b>直接运行</b> 或者 <b>源码运行</b> 均可,如果需要获取作品信息,则需要进行二次开发进行调用。</p>
<h2>🖱 直接运行</h2>
<p>前往 Releases 下载程序压缩包,解压后打开程序文件夹,双击运行 <code>main.exe</code> 即可使用。</p>
<p>前往 <a href="https://github.com/JoeanAmier/XHS-Downloader/releases/latest">Releases</a> 下载程序压缩包,解压后打开程序文件夹,双击运行 <code>main.exe</code> 即可使用。</p>
<h2>⌨️ 源码运行</h2>
<ol>
<li>安装版本号不低于 <code>3.12</code> 的 Python 解释器</li>
<li>运行 <code>pip install -r requirements.txt</code> 命令安装程序所需模块</li>
<li>下载本项目最新的源码或 <code>Releases</code> 发布的源码至本地</li>
<li>下载本项目最新的源码或 <a href="https://github.com/JoeanAmier/XHS-Downloader/releases/latest">Releases</a> 发布的源码至本地</li>
<li>运行 <code>main.py</code> 即可使用</li>
</ol>
<h2>💻 二次开发</h2>
<p>如果想要获取小红书图文/视频作品信息,可以根据 <code>main.py</code> 的注释提示进行代码调用。</p>
<pre>
# 测试链接
error_demo = "https://github.com/JoeanAmier/XHS-Downloader"
error_demo = "https://github.com/JoeanAmier/XHS_Downloader"
image_demo = "https://www.xiaohongshu.com/explore/63b275a30000000019020185"
video_demo = "https://www.xiaohongshu.com/explore/64edb460000000001f03cadc"
multiple_demo = f"{image_demo} {video_demo}"
# 实例对象
path = "" # 作品下载储存根路径,默认值:当前路径
path = "D:\\" # 作品下载储存根路径,默认值:当前路径
folder = "Download" # 作品下载文件夹名称(自动创建),默认值:Download
cookie = "" # 小红书网页版 Cookie
proxies = None # 网络代理
timeout = 5 # 网络请求超时限制,默认值:10
chunk = 1024 * 1024 # 下载文件时,每次从服务器获取的数据块大小,单位字节
xhs = XHS(
path=path,
folder=folder,
cookie=cookie,
proxies=proxies,
timeout=timeout,
chunk=chunk, ) # 使用自定义参数
# xhs = XHS() # 使用默认参数
download = True # 是否下载作品文件
download = True # 是否下载作品文件,默认值:False
# 返回作品详细信息,包括下载地址
print(xhs.extract(error_demo)) # 获取数据失败时返回空字典
print(xhs.extract(image_demo, download=download))
print(xhs.extract(video_demo, download=download))
print(xhs.extract(multiple_demo, download=download))
</pre>
<h1>⛓ 批量下载</h1>
<p>在程序所在文件夹创建一个 <code>xhs.txt</code> 文本文件,然后将待处理的作品链接输入文件,每行输入一个作品链接,编辑完成后保存文件,然后运行程序,点击 <code>读取 xhs.txt 文件并批量下载作品</code> 按钮,程序会批量下载每个链接对应的作品文件。</p>
<h1>⚙️ 配置文件</h1>
<p>根目录下的 <code>settings.json</code> 文件,可以自定义部分运行参数。</p>
<p>项目根目录下的 <code>settings.json</code> 文件,可以自定义部分运行参数。</p>
<table>
<thead>
<tr>
Expand All @@ -88,12 +86,6 @@ print(xhs.extract(video_demo, download=download))
<td align="center">Download</td>
</tr>
<tr>
<td align="center">cookie</td>
<td align="center">str</td>
<td align="center">小红书网页版 Cookie,无需登录;建议自行设置</td>
<td align="center">内置 Cookie</td>
</tr>
<tr>
<td align="center">proxies</td>
<td align="center">str</td>
<td align="center">设置代理</td>
Expand All @@ -109,20 +101,10 @@ print(xhs.extract(video_demo, download=download))
<td align="center">chunk</td>
<td align="center">int</td>
<td align="center">下载文件时,每次从服务器获取的数据块大小,单位:字节</td>
<td align="center">262144(256KB)</td>
<td align="center">1048576(1 MB)</td>
</tr>
</tbody>
</table>
<h1>🌐 Cookie</h1>
<ol>
<li>打开浏览器(可选无痕模式启动),访问小红书任意网页</li>
<li>按 <code>F12</code> 打开开发人员工具</li>
<li>选择 <code>控制台</code> 选项卡</li>
<li>输入 <code>document.cookie</code> 后回车确认</li>
<li>输出内容即为所需 Cookie</li>
</ol>
<br>
<img src="static/获取Cookie示意图.png" alt="">
<h1>♥️ 支持项目</h1>
<p>如果 <b>XHS-Downloader</b> 对您有帮助,请考虑为它点个 <b>Star</b> ⭐,感谢您的支持!</p>
<table>
Expand All @@ -141,7 +123,7 @@ print(xhs.extract(video_demo, download=download))
<p>如果您愿意,可以考虑提供资助为 <b>XHS-Downloader</b> 提供额外的支持!</p>
<h1>✉️ 联系作者</h1>
<ul>
<li>QQ: 2437596031</li>
<li>QQ: 2437596031(联系请说明来意)</li>
<li>QQ Group: <a href="https://github.com/JoeanAmier/XHS-Downloader/blob/master/static/QQ%E7%BE%A4%E8%81%8A%E4%BA%8C%E7%BB%B4%E7%A0%81.png">点击扫码加入群聊</a></li>
<li>Email: [email protected]</li>
</ul>
Expand Down
48 changes: 19 additions & 29 deletions source/Download.py → source/Downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,31 @@
from requests import exceptions
from requests import get

from .Manager import Manager

__all__ = ['Download']


class Download:
manager = Manager()

def __init__(
self,
manager,
root: Path,
path: str,
folder: str,
headers: dict,
proxies=None,
chunk=1024 * 1024, ):
self.temp = root.joinpath("./temp")
chunk=1024 * 1024,
timeout=10, ):
self.manager = manager
self.temp = manager.temp
self.headers = manager.headers
self.root = self.__init_root(root, path, folder)
self.headers = self.__delete_cookie(headers)
self.proxies = {
"http": proxies,
"https": proxies,
"ftp": proxies,
}
self.chunk = chunk
self.timeout = timeout

def __init_root(self, root: Path, path: str, folder: str) -> Path:
if path and (r := Path(path)).is_dir():
Expand All @@ -38,39 +38,29 @@ def __init_root(self, root: Path, path: str, folder: str) -> Path:
self.temp.mkdir(exist_ok=True)
return root

def run(self, urls: list, name: str, type_: int, log):
def run(self, urls: list, name: str, type_: int):
if type_ == 0:
self.__download(urls[0], f"{name}.mp4", log)
self.__download(urls[0], f"{name}.mp4")
elif type_ == 1:
for index, url in enumerate(urls):
self.__download(url, f"{name}_{index + 1}.jpeg", log)
self.__download(url, f"{name}_{index + 1}.png")

def __download(self, url: str, name: str, log):
def __download(self, url: str, name: str):
temp = self.temp.joinpath(name)
file = self.root.joinpath(name)
if self.manager.is_exists(file):
self.output_prompt(f"文件 {name} 已存在,跳过下载!", log)
return
try:
with get(url, headers=self.headers, proxies=self.proxies, stream=True) as response:
with get(url, headers=self.headers, proxies=self.proxies, stream=True, timeout=self.timeout) as response:
with temp.open("wb") as f:
for chunk in response.iter_content(chunk_size=self.chunk):
f.write(chunk)
self.manager.move(temp, file)
self.output_prompt(f"文件 {name} 下载成功!", log)
except exceptions.ChunkedEncodingError:
except (
exceptions.ProxyError,
exceptions.SSLError,
exceptions.ChunkedEncodingError,
exceptions.ConnectionError,
exceptions.ReadTimeout,
):
self.manager.delete(temp)
self.output_prompt(f"网络异常,文件 {name} 下载失败!", log)

@staticmethod
def __delete_cookie(headers: dict) -> dict:
download_headers = headers.copy()
del download_headers["Cookie"]
return download_headers

@staticmethod
def output_prompt(tip: str, log):
if log:
log.write_line(tip)
else:
print(tip)
4 changes: 2 additions & 2 deletions source/Explore.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ def run(self, html: str) -> dict:
return self.__extract_data(data)

def __get_json_data(self, html: str) -> dict:
data = self.explore_data.findall(html)
return {} if len(data) != 1 else loads(data[0])
data = self.explore_data.search(html)
return loads(data.group(1)) if data else {}

def __extract_data(self, data: dict) -> dict:
result = {}
Expand Down
9 changes: 5 additions & 4 deletions source/Html.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@ def __init__(
}
self.timeout = timeout

def get_html(
def request_url(
self,
url: str,
params=None,
headers=None, ) -> str:
headers=None,
text=True, ) -> str:
try:
response = get(
url,
Expand All @@ -38,9 +39,9 @@ def get_html(
exceptions.ConnectionError,
exceptions.ReadTimeout,
):
print("获取网页源码失败,请尝试设置 Cookie 后重试!")
print("网络异常,获取网页源码失败!")
return ""
return response.text
return response.text if text else response.url

@staticmethod
def format_url(url: str) -> str:
Expand Down
4 changes: 2 additions & 2 deletions source/Image.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from json import loads
from re import compile

__all__ = ['Image']


Expand All @@ -25,8 +26,7 @@ def __generate_image_link(token: str) -> str:
return f"https://ci.xiaohongshu.com/{token}?imageView2/2/w/format/png"

def __extract_image_token(self, url: str) -> str:
return self.__generate_image_link(token[0]) if len(
token := self.IMAGE_TOKEN.findall(url)) == 1 else ""
return self.__generate_image_link(token.group(1)) if (token := self.IMAGE_TOKEN.search(url)) else ""

def __extract_image_urls(self, data: list[dict]) -> list[str]:
urls = []
Expand Down
8 changes: 8 additions & 0 deletions source/Manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@


class Manager:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/119.0.0.0 Safari/537.36",
}

def __init__(self, root: Path):
self.temp = root.joinpath("./temp")

@staticmethod
def is_exists(path: Path) -> bool:
return path.exists()
Expand Down
5 changes: 3 additions & 2 deletions source/Settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,17 @@


class Settings:
file = Path(__file__).resolve().parent.parent.joinpath("./settings.json")
default = {
"path": "",
"folder": "Download",
"cookie": "",
"proxies": None,
"timeout": 10,
"chunk": 1024 * 1024,
}

def __init__(self, root: Path):
self.file = root.joinpath("./settings.json")

def run(self):
return self.read() if self.file.is_file() else self.create()

Expand Down
9 changes: 4 additions & 5 deletions source/Video.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from re import compile

from .Html import Html

__all__ = ['Video']


class Video:
VIDEO_ID = compile(r'"masterUrl":"(.*?)"')
VIDEO_TOKEN = compile(r'"originVideoKey":"pre_post\\u002F(\S+?)"')

def get_video_link(self, html: str):
return [Html.format_url(u) for u in self.VIDEO_ID.findall(html)]
def get_video_link(self, html: str) -> list:
return [f"https://sns-video-hw.xhscdn.com/pre_post/{
t.group(1)}"] if (t := self.VIDEO_TOKEN.search(html)) else []
Loading

0 comments on commit cc72e54

Please sign in to comment.