-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdownload.py
130 lines (105 loc) · 4.78 KB
/
download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
import requests
import time
from concurrent.futures import ThreadPoolExecutor
from PyQt5.QtCore import QObject, pyqtSignal, QMutex
import concurrent.futures
class Downloader(QObject):
log_signal = pyqtSignal(str)
finished_signal = pyqtSignal()
def __init__(self, max_workers=8, max_retries=3):
super().__init__()
self.directory = ""
self.search_tag = ""
self.start_page = 0
self.end_page = 0
self.download_original = True
self.max_workers = max_workers
self.max_retries = max_retries
self.mutex = QMutex()
def setup(self, directory, search_tag, start_page, end_page, download_original=True):
self.directory = directory
self.search_tag = search_tag
self.start_page = start_page
self.end_page = end_page
self.download_original = download_original
def download_images(self, limits):
self.mutex.lock()
try:
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
tasks = []
for page in range(self.start_page, self.end_page + 1):
tasks.extend(self.download_page(page, executor, limits))
tasks = [task for task in tasks if task is not None] # 移除空任务
if tasks:
for future in concurrent.futures.as_completed(tasks):
try:
result = future.result()
if not result:
self.log_signal.emit("Error occurred while downloading images.")
except Exception as e:
self.log_signal.emit(f"Error occurred while downloading images: {str(e)}")
else:
self.log_signal.emit("No tasks to execute. Please check if the tag you entered exists.")
finally:
self.mutex.unlock()
self.finished_signal.emit()
def download_page(self, page, executor, limits):
json_data = self.get_json(page, True, limits)
if json_data is None:
self.log_signal.emit(f"Error downloading images on page {page}. Skipping...")
return []
tasks = []
for current_post in json_data:
if self.download_original:
image_url = current_post['file_url']
else:
image_url = current_post['jpeg_url']
image_id = current_post['id']
file_extension = os.path.splitext(image_url)[1]
filename = os.path.join(self.directory, f"{image_id}{file_extension}")
try:
task = executor.submit(self.download_image, image_url, filename)
tasks.append(task)
except Exception as e:
self.log_signal.emit(f"Error submitting download task: {str(e)}")
self.log_signal.emit(f"Downloading images on page {page}...")
concurrent.futures.wait(tasks)
return tasks
def download_image(self, url, filename, retry_count=0):
if os.path.exists(filename):
self.log_signal.emit(f"Skipping download, file already exists:\n{filename}")
return True
while retry_count <= self.max_retries:
try:
response = requests.get(url, stream=True)
if response.status_code == 200:
with open(filename, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
self.log_signal.emit(f"Downloaded {filename}")
return True
except Exception as e:
self.log_signal.emit(f"Error while downloading {filename}: {e}")
retry_count += 1
self.log_signal.emit(f"Retrying download of {filename} (Attempt {retry_count}/{self.max_retries})")
time.sleep(2) # Wait before retrying
self.log_signal.emit(f"Failed to download {filename} after {self.max_retries} attempts.")
return False
def get_json(self, page, retry=True, limits=40):
try:
url = f"https://yande.re/post.json?tags={self.search_tag}&limit={limits}&page={page}"
self.log_signal.emit(f"Requesting URL: {url}")
response = requests.get(url)
if response.status_code == 200:
return response.json()
else:
response.raise_for_status()
except Exception as e:
self.log_signal.emit(f"Error while getting JSON data: {e}")
if retry:
self.log_signal.emit("Retrying...")
time.sleep(5)
return self.get_json(page, retry=False, limits=limits)
return None