Skip to content

Commit

Permalink
Features and Fixes
Browse files Browse the repository at this point in the history
Probably Fixed UltimaHoarder#650
Fixed UltimaHoarder#657
Probably Fixed UltimaHoarder#659

Script no longer uses JSON to store metadata.
We'll be using SQLITE from here on out. JSON can be easily corrupted. This also allows us to easily make sure there are no duplicate content stored in the database and folders.
  • Loading branch information
UltimaHoarder committed Jan 6, 2021
1 parent 9bae3df commit ac185ed
Show file tree
Hide file tree
Showing 12 changed files with 353 additions and 328 deletions.
22 changes: 0 additions & 22 deletions apis/api_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import requests
import ujson
import socket
import logging
import os
from multiprocessing import cpu_count
from requests.adapters import HTTPAdapter
Expand All @@ -14,28 +13,9 @@
from os.path import dirname as up


def setup_logger(name, log_file, level=logging.INFO):
"""To setup as many loggers as you want"""
log_filename = ".logs/"+log_file
log_path = os.path.dirname(log_filename)
os.makedirs(log_path, exist_ok=True)
formatter = logging.Formatter(
'%(asctime)s %(levelname)s %(name)s %(message)s')

handler = logging.FileHandler(log_filename, 'w+', encoding='utf-8')
handler.setFormatter(formatter)

logger = logging.getLogger(name)
logger.setLevel(level)
logger.addHandler(handler)

return logger


path = up(up(os.path.realpath(__file__)))
os.chdir(path)

log_error = setup_logger('errors', 'errors.log')

global_settings = None
session_rules = None
Expand Down Expand Up @@ -94,7 +74,6 @@ def json_request(link, session, method="GET", stream=False, json_format=True, da
text = r.text
if not text:
message = "ERROR: 100 Posts skipped. Please post the username you're trying to scrape on the issue "'100 Posts Skipped'""
log_error.exception(message)
return result
return ujson.loads(text)
else:
Expand All @@ -107,7 +86,6 @@ def json_request(link, session, method="GET", stream=False, json_format=True, da
sleep_number += 0.5
continue
except Exception as e:
log_error.exception(e)
continue
return result

Expand Down
4 changes: 3 additions & 1 deletion apis/onlyfans/onlyfans.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,9 @@ def __init__(self, option={}, init=False) -> None:
self.archived_stories = {}
self.mass_messages = []
self.paid_content = {}
self.sessions = option.get("sessions")
self.sessions = option.get("sessions", [])
for session in self.sessions:
session.links = []
self.auth_details = auth_details()
self.profile_directory = option.get("profile_directory", "")
self.active = False
Expand Down
10 changes: 5 additions & 5 deletions classes/prepare_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,7 @@ def __iter__(self):
post = self.post_item(old_post)
new_medias = []
for media in post.medias:
if "Texts" == key:
continue
media["media_type"] = key
media2 = self.media_item(media)
new_medias.append(media2)
post.medias = new_medias
Expand All @@ -167,8 +166,7 @@ def __iter__(self):
old_post = media_list2[0]
post = self.post_item(old_post)
for item in media_list2:
if "Texts" == key:
continue
item["media_type"] = key
media = self.media_item(item)
post.medias.append(media)
new_posts.append(post)
Expand Down Expand Up @@ -212,6 +210,7 @@ def __init__(self, option={}):
self.directory = option.get("directory", "")
self.filename = option.get("filename", "")
self.size = option.get("size", None)
self.media_type = option.get("media_type", None)
self.session = option.get("session", None)
self.downloaded = option.get("downloaded", False)

Expand Down Expand Up @@ -366,7 +365,8 @@ def __init__(self, option, keep_vars=False):
self.date = option.get('postedAt', format_variables2.date)
self.price = option.get('price', 0)
self.date_format = option.get('date_format')
self.maximum_length = option.get('maximum_length')
self.maximum_length = 255
self.text_length = option.get('text_length', self.maximum_length)
self.directory = option.get(
'directory')
if not keep_vars:
Expand Down
119 changes: 57 additions & 62 deletions extras/OFRenamer/start.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
from apis.onlyfans.onlyfans import media_types
from apis.api_helper import multiprocessing
from classes.prepare_metadata import format_types, prepare_reformat
import urllib.parse as urlparse
Expand All @@ -8,14 +9,17 @@
from itertools import product


def fix_directories(posts, base_directory, site_name, api_type, media_type, username, all_files, json_settings):
def fix_directories(posts, all_files, Session, folder, site_name, api_type, username, base_directory, json_settings):
new_directories = []

def fix_directory(post):
new_post_dict = post.convert(keep_empty_items=True)
for media in post.medias:
if media.links:
path = urlparse.urlparse(media.links[0]).path
def fix_directories(post):
database_session = Session()
post_id = post.id
result = database_session.query(folder.media_table)
media_db = result.filter_by(post_id=post_id).all()
for media in media_db:
if media.link:
path = urlparse.urlparse(media.link).path
else:
path = media.filename
new_filename = os.path.basename(path)
Expand All @@ -28,72 +32,72 @@ def fix_directory(post):
download_path = base_directory
today = datetime.today()
today = today.strftime("%d-%m-%Y %H:%M:%S")
new_media_dict = media.convert(keep_empty_items=True)
option = {}
option = option | new_post_dict | new_media_dict
option["site_name"] = site_name
option["filename"] = filename
option["post_id"] = post_id
option["media_id"] = media.id
option["username"] = username
option["api_type"] = api_type
option["media_type"] = media_type
option["media_type"] = media.media_type
option["filename"] = filename
option["ext"] = ext
option["username"] = username
option["text"] = post.text
option["postedAt"] = media.created_at
option["price"] = post.price
option["date_format"] = date_format
option["maximum_length"] = text_length
option["text_length"] = text_length
option["directory"] = download_path
prepared_format = prepare_reformat(option)
file_directory = main_helper.reformat(
prepared_format, file_directory_format)
prepared_format.directory = file_directory
old_filepath = ""
x = [x for x in all_files if media.filename == os.path.basename(x)]
if x:
# media.downloaded = True
old_filepath = x[0]
old_filepath = os.path.abspath(old_filepath)
old_filepaths = [
x for x in all_files if media.filename in os.path.basename(x)]
if not old_filepaths:
old_filepaths = [
x for x in all_files if str(media.id) in os.path.basename(x)]
print
if old_filepaths:
old_filepath = old_filepaths[0]
print
new_filepath = main_helper.reformat(
prepared_format, filename_format)
if old_filepath and old_filepath != new_filepath:
if os.path.exists(new_filepath):
os.remove(new_filepath)
if os.path.exists(old_filepath):
if media.size:
media.downloaded = True
moved = None
while not moved:
try:
moved = shutil.move(old_filepath, new_filepath)
except OSError as e:
print(e)
print
print
else:
print
if prepared_format.text:
pass
setattr(media, "old_filepath", old_filepath)
setattr(media, "new_filepath", new_filepath)
media.directory = file_directory
media.filename = os.path.basename(new_filepath)
database_session.commit()
new_directories.append(os.path.dirname(new_filepath))
database_session.close()
pool = multiprocessing()
pool.starmap(fix_directory, product(
pool.starmap(fix_directories, product(
posts))
new_directories = list(set(new_directories))
return posts, new_directories


def fix_metadata(posts):
for post in posts:
for media in post.medias:
def update(old_filepath, new_filepath):
# if os.path.exists(old_filepath):
# if not media.session:
# media.downloaded = True
if old_filepath != new_filepath:
if os.path.exists(new_filepath):
os.remove(new_filepath)
if os.path.exists(old_filepath):
if not media.session:
media.downloaded = True
moved = None
while not moved:
try:
moved = shutil.move(old_filepath, new_filepath)
except OSError as e:
print(e)
return old_filepath, new_filepath
old_filepath = media.old_filepath
new_filepath = media.new_filepath
old_filepath, new_filepath = update(old_filepath, new_filepath)
media.directory = os.path.dirname(new_filepath)
media.filename = os.path.basename(new_filepath)
return posts


def start(subscription, api_type, api_path, site_name, json_settings):
def start(Session, api_type, api_path, site_name, subscription, folder, json_settings):
api_table = folder.api_table
media_table = folder.media_table
database_session = Session()
result = database_session.query(api_table).all()
metadata = getattr(subscription.scraped, api_type)
download_info = subscription.download_info
root_directory = download_info["directory"]
Expand Down Expand Up @@ -126,19 +130,10 @@ def start(subscription, api_type, api_path, site_name, json_settings):
for root, subdirs, files in os.walk(base_directory):
x = [os.path.join(root, x) for x in files]
all_files.extend(x)
for media_type, value in metadata.content:
if media_type == "Texts":
continue
for status, value2 in value:
fixed, new_directories = fix_directories(
value2, root_directory, site_name, api_path, media_type, username, all_files, json_settings)
for new_directory in new_directories:
directory = os.path.abspath(new_directory)
os.makedirs(directory, exist_ok=True)
fixed2 = fix_metadata(
fixed)
setattr(value, status, fixed2)
setattr(metadata.content, media_type, value,)

fixed, new_directories = fix_directories(
result, all_files, Session, folder, site_name, api_type, username, root_directory, json_settings)
database_session.close()
return metadata


Expand Down
45 changes: 45 additions & 0 deletions helpers/db_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@

import sqlalchemy
from sqlalchemy.orm.session import sessionmaker
from sqlalchemy.ext.declarative.api import declarative_base
from sqlalchemy.orm import scoped_session


def create_database_session(metadata_path):
engine = sqlalchemy.create_engine(f'sqlite:///{metadata_path}')
session_factory = sessionmaker(bind=engine)
Session = scoped_session(session_factory)
return Session, engine


class type_0(object):
def __init__(self):
base = declarative_base()
self.api_table = base
self.media_table = base


def create_api_table(Base, api_type, engine=None):
class api_table(Base):
__tablename__ = api_type.lower()
id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
text = sqlalchemy.Column(sqlalchemy.String)
price = sqlalchemy.Column(sqlalchemy.Integer)
paid = sqlalchemy.Column(sqlalchemy.Integer)
created_at = sqlalchemy.Column(sqlalchemy.TIMESTAMP)
return api_table


def create_media_table(Base, engine=None):
class media_table(Base):
__tablename__ = "medias"
id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
post_id = sqlalchemy.Column(sqlalchemy.Integer)
link = sqlalchemy.Column(sqlalchemy.String)
directory = sqlalchemy.Column(sqlalchemy.String)
filename = sqlalchemy.Column(sqlalchemy.Integer)
size = sqlalchemy.Column(sqlalchemy.Integer, default=None)
media_type = sqlalchemy.Column(sqlalchemy.String)
downloaded = sqlalchemy.Column(sqlalchemy.Integer, default=0)
created_at = sqlalchemy.Column(sqlalchemy.TIMESTAMP)
return media_table
Loading

0 comments on commit ac185ed

Please sign in to comment.