Skip to content

Commit

Permalink
优化项目代码
Browse files Browse the repository at this point in the history
  • Loading branch information
JoeanAmier committed Jan 10, 2024
1 parent 039f9f9 commit 0d22a8f
Show file tree
Hide file tree
Showing 9 changed files with 65 additions and 29 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ async with XHS(work_path=work_path,
<tr>
<td align="center">cookie</td>
<td align="center">str</td>
<td align="center">小红书网页版 Cookie,<b>无需登录</b></td>
<td align="center">小红书网页版 Cookie,<b>无需登录,建议修改</b></td>
<td align="center">默认 Cookie</td>
</tr>
<tr>
Expand Down
2 changes: 1 addition & 1 deletion source/TUI/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,5 @@ async def on_mount(self) -> None:
async def action_settings(self):
await self.push_screen("setting")

async def action_back(self):
async def action_index(self):
await self.push_screen("index")
2 changes: 1 addition & 1 deletion source/TUI/setting.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class Setting(Screen):
"static/css/setting.tcss")
BINDINGS = [
Binding(key="q", action="quit", description="退出程序"),
Binding(key="b", action="back", description="返回首页"),
Binding(key="b", action="index", description="返回首页"),
]

def compose(self) -> ComposeResult:
Expand Down
5 changes: 2 additions & 3 deletions source/application/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,10 @@ async def __extract_links(self, url: str, log) -> list:
async def __deal_extract(self, url: str, download: bool, log, bar):
logging(log, self.prompt.start_processing(url))
html = await self.html.request_url(url, log=log)
# logging(log, html) # 调试代码
if not html:
namespace = self.__generate_data_object(html)
if not namespace:
logging(log, self.prompt.get_data_failure(url), ERROR)
return {}
namespace = self.__generate_data_object(html)
data = self.explore.run(namespace)
# logging(log, data) # 调试代码
if not data:
Expand Down
10 changes: 8 additions & 2 deletions source/expansion/converter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Union

from lxml.etree import HTML
from yaml import safe_load

Expand All @@ -19,6 +21,8 @@ def run(self, content: str) -> dict:
self.__extract_object(content)))

def __extract_object(self, html: str) -> str:
if not html:
return ""
html_tree = HTML(html)
return d[0] if (d := html_tree.xpath(self.INITIAL_STATE)) else ""

Expand All @@ -32,18 +36,20 @@ def __filter_object(cls, data: dict) -> dict:

@classmethod
def deep_get(cls, data: dict, keys: list | tuple, default=None):
if not data:
return default
try:
for key in keys:
if key.startswith("[") and key.endswith("]"):
data = cls.safe_get(data, int(key[1:-1]))
else:
data = data[key]
return data
except (KeyError, IndexError, ValueError):
except (KeyError, IndexError, ValueError, TypeError):
return default

@staticmethod
def safe_get(data: dict | list | tuple | set, index: int):
def safe_get(data: Union[dict, list, tuple, set], index: int):
if isinstance(data, dict):
return list(data.values())[index]
elif isinstance(data, list | tuple | set):
Expand Down
29 changes: 21 additions & 8 deletions source/expansion/namespace.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from copy import deepcopy
from types import SimpleNamespace
from typing import Union

__all__ = ["Namespace"]


class Namespace:
def __init__(self, data: dict):
self.data = self.generate_data_object(data)
def __init__(self, data: dict) -> None:
self.data: SimpleNamespace = self.generate_data_object(data)

@staticmethod
def generate_data_object(data: dict) -> SimpleNamespace:
Expand All @@ -24,21 +25,21 @@ def depth_conversion(element):
def safe_extract(
self,
attribute_chain: str,
default: str | int | list | dict | SimpleNamespace = ""):
default: Union[str, int, list, dict, SimpleNamespace] = ""):
return self.__safe_extract(self.data, attribute_chain, default)

@staticmethod
def __safe_extract(
data_object,
data_object: SimpleNamespace,
attribute_chain: str,
default: str | int | list | dict | SimpleNamespace = "", ):
default: Union[str, int, list, dict, SimpleNamespace] = "", ):
data = deepcopy(data_object)
attributes = attribute_chain.split(".")
for attribute in attributes:
if "[" in attribute:
parts = attribute.split("[", 1)
attribute = parts[0]
index = parts[1].split("]", 1)[0]
index = parts[1][:-1]
try:
index = int(index)
data = getattr(data, attribute, None)[index]
Expand All @@ -55,12 +56,24 @@ def object_extract(
cls,
data_object: SimpleNamespace,
attribute_chain: str,
default: str | int | list | dict | SimpleNamespace = "",
default: Union[str, int, list, dict, SimpleNamespace] = "",
):
return cls.__safe_extract(
data_object,
attribute_chain,
default, )

@property
def __dict__(self):
return vars(self.data)
return self.convert_to_dict(self.data)

@classmethod
def convert_to_dict(cls, data) -> dict:
return {
key: cls.convert_to_dict(value) if isinstance(
value,
SimpleNamespace) else value for key,
value in vars(data).items()}

def __bool__(self):
return bool(vars(self.data))
2 changes: 2 additions & 0 deletions source/module/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
USERSCRIPT,
USERAGENT,
COOKIE,
HEADERS,
)
from .tools import (
retry,
Expand Down Expand Up @@ -49,6 +50,7 @@
"USERSCRIPT",
"USERAGENT",
"COOKIE",
"HEADERS",
"retry",
"logging",
"wait",
Expand Down
11 changes: 6 additions & 5 deletions source/module/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from source.translator import Chinese
from source.translator import English
from .static import COOKIE
from .static import HEADERS
from .static import USERAGENT

__all__ = ["Manager"]
Expand Down Expand Up @@ -40,9 +41,9 @@ def __init__(
self.temp = root.joinpath("./temp")
self.path = self.__check_path(path)
self.folder = self.__check_folder(folder)
self.headers = {
"User-Agent": user_agent or USERAGENT,
"Cookie": cookie or COOKIE}
self.blank_headers = HEADERS | {
"User-Agent": user_agent or USERAGENT, }
self.headers = self.blank_headers | {"Cookie": cookie or COOKIE}
self.retry = retry
self.chunk = chunk
self.record_data = record_data
Expand All @@ -51,11 +52,11 @@ def __init__(
self.proxy = proxy
self.request_session = ClientSession(
headers=self.headers | {
"Referer": "https://www.xiaohongshu.com/", },
"Referer": "https://www.xiaohongshu.com/explore", },
timeout=ClientTimeout(connect=timeout),
)
self.download_session = ClientSession(
headers={"User-Agent": self.headers["User-Agent"]},
headers=self.blank_headers,
timeout=ClientTimeout(connect=timeout))
self.prompt = language

Expand Down
31 changes: 23 additions & 8 deletions source/module/static.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"USERSCRIPT",
"USERAGENT",
"COOKIE",
"HEADERS",
]

VERSION_MAJOR = 1
Expand All @@ -31,17 +32,31 @@

USERSCRIPT = "https://raw.githubusercontent.com/JoeanAmier/XHS-Downloader/master/static/XHS-Downloader.js"

HEADERS = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,"
"application/signed-exchange;v=b3;q=0.7",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "max-age=0",
"Dnt": "1",
"Sec-Ch-Ua": "\"Not_A Brand\";v=\"8\", \"Chromium\";v=\"120\", \"Microsoft Edge\";v=\"120\"",
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": "\"Windows\"",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
}
USERAGENT = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 "
"Safari/537.36")
"Safari/537.36 Edg/120.0.0.0")
COOKIE = (
"abRequestId=54c534bb-a2c6-558f-8e03-5b4c5c45635c; xsecappid=xhs-pc-web; a1=18c286a400"
"4jy56qvzejvp631col0hd3032h4zjez50000106381; webId=779c977da3a15b5623015be94bdcc9e9; g"
"id=yYSJYK0qDW8KyYSJYK048quV84Vv2KAhudVhJduUKqySlx2818xfq4888y8KqYy8y2y2f8Jy; web_sess"
"ion=030037a259ce5f15c8d560dc12224a9fdc2ed1; webBuild=3.19.4; websectiga=984412fef754c"
"018e472127b8effd174be8a5d51061c991aadd200c69a2801d6; sec_poison_id=3dd48845-d604-4535"
"-bcc2-a859e97518bf; unread={%22ub%22:%22655eb3d60000000032033955%22%2C%22ue%22:%22656"
"e9ef2000000003801ff3d%22%2C%22uc%22:29}; cache_feeds=[]")
"abRequestId=a1c55c3d-edcd-5753-938b-15d22a78cb8a; webBuild=3.23.2; "
"a1=18ceecc41c5d2gkprctahn1jayh458m5eoos9grxb50000267832; webId=79879aaf1b46fa2120dfba20d6155928; "
"websectiga=3fff3a6f9f07284b62c0f2ebf91a3b10193175c06e4f71492b60e056edcdebb2; "
"sec_poison_id=52bff38d-96eb-40b6-a46b-5e7cc86014e4; web_session=030037a2ae3713ec49882425e5224a3cbb4eef; "
"gid=yYSddSS2DKdyyYSddSS4ylkFS2fJkTUFS90xlCDIyV0vxM2842Y62j888JKWYqJ8iDD4KY2d; xsecappid=xhs-pc-web")

MASTER = "b #fff200"
PROMPT = "b turquoise2"
Expand Down

0 comments on commit 0d22a8f

Please sign in to comment.