Skip to content

Commit

Permalink
dict
Browse files Browse the repository at this point in the history
  • Loading branch information
test123456654321 committed Oct 26, 2024
1 parent 74b0f24 commit d847576
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 82 deletions.
37 changes: 37 additions & 0 deletions src/LunaTranslator/cishu/japandict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import requests
from urllib.parse import quote
from cishu.cishubase import cishubase
from myutils.utils import get_element_by
import threading, base64, re


class japandict(cishubase):
def makelinkbase64(self, link, saver):
html = requests.get(
link,
proxies=self.proxy,
).content
base64_content = base64.b64encode(html).decode("utf-8")
saver[link] = f"data:application/octet-stream;base64,{base64_content}"

def search(self, word):
url = "https://www.japandict.com/?s={}&lang=eng&list=1".format(quote(word))
html = requests.get(
url,
proxies=self.proxy,
).text

res = get_element_by("class", "list-group list-group-flush", html)
if res is None:
return
ts = []
saver = {}
styles = '<link rel="stylesheet" href="https://www.japandict.com/static/css/japandict.ac087f3ecbc8.css" type="text/css"><link rel="preload" href="https://www.japandict.com/static/JapaneseRadicals-Regular.woff2" as="font"><link rel="preload" href="https://www.japandict.com/static/radicals_font.woff" as="font">'
for link in re.findall('href="(.*?)"', styles):
ts.append(threading.Thread(target=self.makelinkbase64, args=(link, saver)))
ts[-1].start()
for t in ts:
t.join()
for link in saver:
styles = styles.replace(link, saver[link])
return res + styles
80 changes: 6 additions & 74 deletions src/LunaTranslator/cishu/jisho.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,90 +2,21 @@
from urllib.parse import quote
import re
from cishu.cishubase import cishubase

from html.parser import HTMLParser


class IDParser(HTMLParser):
"""Modified HTMLParser that isolates a tag with the specified id"""

def __init__(self, id):
self.id = id
self.result = None
self.started = False
self.depth = {}
self.html = None
self.watch_startpos = False
HTMLParser.__init__(self)

def loads(self, html):
self.html = html
self.feed(html)
self.close()

def handle_starttag(self, tag, attrs):
attrs = dict(attrs)
if self.started:
self.find_startpos(None)
if "id" in attrs and attrs["id"] == self.id:
self.result = [tag]
self.started = True
self.watch_startpos = True
if self.started:
if not tag in self.depth:
self.depth[tag] = 0
self.depth[tag] += 1

def handle_endtag(self, tag):
if self.started:
if tag in self.depth:
self.depth[tag] -= 1
if self.depth[self.result[0]] == 0:
self.started = False
self.result.append(self.getpos())

def find_startpos(self, x):
"""Needed to put the start position of the result (self.result[1])
after the opening tag with the requested id"""
if self.watch_startpos:
self.watch_startpos = False
self.result.append(self.getpos())

handle_entityref = handle_charref = handle_data = handle_comment = handle_decl = (
handle_pi
) = unknown_decl = find_startpos

def get_result(self):
if self.result == None:
return None
if len(self.result) != 3:
return None
lines = self.html.split("\n")
lines = lines[self.result[1][0] - 1 : self.result[2][0]]
lines[0] = lines[0][self.result[1][1] :]
if len(lines) == 1:
lines[-1] = lines[-1][: self.result[2][1] - self.result[1][1]]
lines[-1] = lines[-1][: self.result[2][1]]
return "\n".join(lines).strip()


def get_element_by_id(id, html):
"""Return the content of the tag with the specified id in the passed HTML document"""
parser = IDParser(id)
parser.loads(html)
return parser.get_result()
from myutils.utils import get_element_by


class jisho(cishubase):

def search(self, word):
url = "https://jisho.org/word/{}".format(quote(word))
url = "https://jisho.org/search/{}".format(quote(word))
html = requests.get(
url,
proxies=self.proxy,
).text

res = get_element_by_id("page_container", html)
if get_element_by("id", "no-matches", html):
return
res = get_element_by("id", "page_container", html)
if res is None:
return
res = (
Expand All @@ -94,6 +25,7 @@ def search(self, word):
.replace(
'<a href="#" class="signin">Log in</a> to talk about this word.', ""
)
.replace(get_element_by("id", "other_dictionaries", html), "")
)

ss = re.search('href="https://assets.jisho.org/assets/application(.*)"', html)
Expand Down
16 changes: 11 additions & 5 deletions src/LunaTranslator/cishu/youdao.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,22 @@
import re, os
from cishu.cishubase import cishubase
from myutils.utils import simplehtmlparser
from myutils.utils import get_element_by


class youdao(cishubase):

def search(self, word):
url = "https://dict.youdao.com/result?word={}&lang={}".format(
quote(word), getlangsrc()
)
def search(self, word: str):
lang = getlangsrc()
if lang == "auto":
if word.isascii():
lang = "en"
else:
lang = "ja"
url = "https://dict.youdao.com/result?word={}&lang={}".format(quote(word), lang)
text = requests.get(url, proxies=self.proxy).text

if not get_element_by("class", "word-head", text):
return
text = re.sub("<header([\\s\\S]*?)></header>", "", text)
text = re.sub("<aside([\\s\\S]*?)></aside>", "", text)

Expand Down
79 changes: 77 additions & 2 deletions src/LunaTranslator/myutils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import threading, winreg
import re, heapq, winsharedutils
from myutils.wrapper import tryprint, threader
from html.parser import HTMLParser


def qimage2binary(qimage: QImage, fmt="BMP"):
Expand Down Expand Up @@ -95,8 +96,12 @@ def getlanguagespace(lang=None):

def findenclose(text, tag):
i = 0
tags = f"<{tag}"
tage = f"</{tag}>"
if tag == "link":
tags = "<link"
tage = ">"
else:
tags = f"<{tag}"
tage = f"</{tag}>"
collect = ""
__ = 0
while True:
Expand Down Expand Up @@ -947,3 +952,73 @@ def createenglishlangmap():
)
mp.update({"auto": ""})
return mp


class IDParser(HTMLParser):
"""Modified HTMLParser that isolates a tag with the specified id"""

def __init__(self, attr, attrv):
self.id = attr, attrv
self.result = None
self.started = False
self.depth = {}
self.html = None
self.watch_startpos = False
HTMLParser.__init__(self)

def loads(self, html):
self.html = html
self.feed(html)
self.close()

def handle_starttag(self, tag, attrs):
attrs = dict(attrs)
if self.started:
self.find_startpos(None)
if self.id[0] in attrs and attrs[self.id[0]] == self.id[1]:
self.result = [tag]
self.started = True
self.watch_startpos = True
if self.started:
if not tag in self.depth:
self.depth[tag] = 0
self.depth[tag] += 1

def handle_endtag(self, tag):
if self.started:
if tag in self.depth:
self.depth[tag] -= 1
if self.depth[self.result[0]] == 0:
self.started = False
self.result.append(self.getpos())

def find_startpos(self, x):
"""Needed to put the start position of the result (self.result[1])
after the opening tag with the requested id"""
if self.watch_startpos:
self.watch_startpos = False
self.result.append(self.getpos())

handle_entityref = handle_charref = handle_data = handle_comment = handle_decl = (
handle_pi
) = unknown_decl = find_startpos

def get_result(self):
if self.result == None:
return None
if len(self.result) != 3:
return None
lines = self.html.split("\n")
lines = lines[self.result[1][0] - 1 : self.result[2][0]]
lines[0] = lines[0][self.result[1][1] :]
if len(lines) == 1:
lines[-1] = lines[-1][: self.result[2][1] - self.result[1][1]]
lines[-1] = lines[-1][: self.result[2][1]]
return "\n".join(lines).strip()


def get_element_by(attr, attrv, html):
"""Return the content of the tag with the specified id in the passed HTML document"""
parser = IDParser(attr, attrv)
parser.loads(html)
return parser.get_result()
4 changes: 4 additions & 0 deletions src/files/defaultconfig/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -1378,6 +1378,10 @@
"use": false,
"name": "jisho"
},
"japandict": {
"use": false,
"name": "JapanDict"
},
"weblio": {
"use": false,
"name": "weblio"
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ include(generate_product_version)

set(VERSION_MAJOR 5)
set(VERSION_MINOR 50)
set(VERSION_PATCH 1)
set(VERSION_PATCH 2)

add_library(pch pch.cpp)
target_precompile_headers(pch PUBLIC pch.h)
Expand Down

0 comments on commit d847576

Please sign in to comment.