Skip to content

Commit

Permalink
Updated config parsing to read config from environment
Browse files Browse the repository at this point in the history
  • Loading branch information
codders committed Aug 28, 2022
1 parent 7d7c411 commit 9734dc4
Show file tree
Hide file tree
Showing 7 changed files with 144 additions and 50 deletions.
30 changes: 15 additions & 15 deletions flathunt.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@
import os
import logging
import time
from pprint import pformat

from flathunter.logging import logger, wdm_logger
from flathunter.logging import logger, wdm_logger, configure_logging
from flathunter.idmaintainer import IdMaintainer
from flathunter.hunter import Hunter
from flathunter.config import Config
from flathunter.config import Config, Env
from flathunter.heartbeat import Heartbeat

__author__ = "Jan Harrie"
Expand All @@ -31,10 +30,10 @@ def launch_flat_hunt(config, heartbeat=None):
hunter.hunt_flats()
counter = 0

while config.get('loop', {}).get('active', False):
while config.loop_is_active():
counter += 1
counter = heartbeat.send_heartbeat(counter)
time.sleep(config.get('loop', {}).get('sleeping_time', 60 * 10))
time.sleep(config.loop_period_seconds())
hunter.hunt_flats()


Expand All @@ -45,7 +44,10 @@ def main():
" and sends results to Telegram User"),
epilog="Designed by Nody"
)
default_config_path = f"{os.path.dirname(os.path.abspath(__file__))}/config.yaml"
if Env.FLATHUNTER_TARGET_URLS is not None:
default_config_path = None
else:
default_config_path = f"{os.path.dirname(os.path.abspath(__file__))}/config.yaml"
parser.add_argument('--config', '-c',
type=argparse.FileType('r', encoding='UTF-8'),
default=default_config_path,
Expand All @@ -62,15 +64,13 @@ def main():

# load config
config_handle = args.config
config = Config(config_handle.name)
if config_handle is not None:
config = Config(config_handle.name)
else:
config = Config()

# adjust log level, if required
if config.get('verbose'):
logger.setLevel(logging.DEBUG)
# Allow logging of "webdriver-manager" module on verbose mode
wdm_logger.setLevel(logging.INFO)

logger.debug("Settings from config: %s", pformat(config))
# setup logging
configure_logging(config)

# initialize search plugins for config
config.init_searchers()
Expand All @@ -89,7 +89,7 @@ def main():
return
if not config.get('telegram', {}).get('receiver_ids'):
logger.warning("No Telegram receivers configured - nobody will get notifications.")
if not config.get('urls'):
if len(config.target_urls()) == 0:
logger.error("No URLs configured. Starting like this would be pointless...")
return

Expand Down
111 changes: 99 additions & 12 deletions flathunter/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import os
import yaml

from dotenv import load_dotenv

from flathunter.logging import logger
from flathunter.captcha.imagetyperz_solver import ImageTyperzSolver
from flathunter.captcha.twocaptcha_solver import TwoCaptchaSolver
Expand All @@ -15,18 +17,48 @@
from flathunter.crawl_idealista import CrawlIdealista
from flathunter.filter import Filter

load_dotenv()

class Env:

def readenv(key):
if key in os.environ:
return os.environ[key]
return None

# Captcha setup
FLATHUNTER_2CAPTCHA_KEY = readenv("FLATHUNTER_2CAPTCHA_KEY")
FLATHUNTER_IMAGETYPERZ_TOKEN = readenv("FLATHUNTER_IMAGETYPERZ_TOKEN")

# Generic Config
FLATHUNTER_TARGET_URLS = readenv("FLATHUNTER_TARGET_URLS")
FLATHUNTER_DATABASE_LOCATION = readenv("FLATHUNTER_DATABASE_LOCATION")
FLATHUNTER_VERBOSE_LOG = readenv("FLATHUNTER_VERBOSE_LOG")
FLATHUNTER_LOOP_PERIOD_SECONDS = readenv("FLATHUNTER_LOOP_PERIOD_SECONDS")

# Website setup
FLATHUNTER_WEBSITE_SESSION_KEY = readenv("FLATHUNTER_WEBSITE_SESSION_KEY")
FLATHUNTER_WEBSITE_DOMAIN = readenv("FLATHUNTER_WEBSITE_DOMAIN")

class Config:
"""Class to represent flathunter configuration"""

def __init__(self, filename=None, string=None):
self.useEnvironment = True
if string is not None:
self.config = yaml.safe_load(string)
self.useEnvironment = False
else:
if filename is None:
filename = os.path.dirname(os.path.abspath(__file__)) + "/../config.yaml"
logger.info("Using config %s", filename)
with open(filename, encoding="utf-8") as file:
self.config = yaml.safe_load(file)
if filename is None and Env.FLATHUNTER_TARGET_URLS is None:
raise Exception("Config file loaction must be specified, or FLATHUNTER_TARGET_URLS must be set")
if filename is not None:
logger.info("Using config path %s", filename)
if not os.path.exists(filename):
raise Exception("No config file found at location %s")
with open(filename, encoding="utf-8") as file:
self.config = yaml.safe_load(file)
else:
self.config = {}
self.__searchers__ = []
self.check_deprecated()

Expand Down Expand Up @@ -71,10 +103,21 @@ def get(self, key, value=None):
"""Emulate dictionary"""
return self.config.get(key, value)

def _read_yaml_path(self, path, default_value=None):
config = self.config
parts = path.split('.')
while len(parts) > 1:
config = config.get(parts[0], {})
parts = parts[1:]
return config.get(parts[0], default_value)

def database_location(self):
"""Return the location of the database folder"""
if "database_location" in self.config:
return self.config["database_location"]
config_database_location = self._read_yaml_path('database_location')
if config_database_location is not None:
return config_database_location
if self.useEnvironment and Env.FLATHUNTER_DATABASE_LOCATION is not None:
return Env.FLATHUNTER_DATABASE_LOCATION
return os.path.abspath(os.path.dirname(os.path.abspath(__file__)) + "/..")

def set_searchers(self, searchers):
Expand All @@ -91,19 +134,63 @@ def get_filter(self):
builder.read_config(self.config)
return builder.build()

def target_urls(self):
if self.useEnvironment and Env.FLATHUNTER_TARGET_URLS is not None:
return Env.FLATHUNTER_TARGET_URLS.split(';')
return self._read_yaml_path('urls', [])

def verbose_logging(self):
if self.useEnvironment and Env.FLATHUNTER_VERBOSE_LOG is not None:
return True
return self._read_yaml_path('verbose') is not None

def loop_is_active(self):
if self.useEnvironment and Env.FLATHUNTER_LOOP_PERIOD_SECONDS is not None:
return True
return self._read_yaml_path('loop.active', False)

def loop_period_seconds(self):
if self.useEnvironment and Env.FLATHUNTER_LOOP_PERIOD_SECONDS is not None:
return int(Env.FLATHUNTER_LOOP_PERIOD_SECONDS)
return self._read_yaml_path('loop.sleeping_time', 60 * 10)

def has_website_config(self):
if self.useEnvironment and Env.FLATHUNTER_WEBSITE_SESSION_KEY is not None:
return True
return 'website' in self.config

def website_session_key(self):
if self.useEnvironment and Env.FLATHUNTER_WEBSITE_SESSION_KEY is not None:
return Env.FLATHUNTER_WEBSITE_SESSION_KEY
return self._read_yaml_path('website.session_key', None)

def website_domain(self):
if self.useEnvironment and Env.FLATHUNTER_WEBSITE_DOMAIN is not None:
return Env.FLATHUNTER_WEBSITE_DOMAIN
return self._read_yaml_path('website.domain', None)

def website_bot_name(self):
if self.useEnvironment and Env.FLATHUNTER_WEBSITE_BOT_NAME is not None:
return Env.FLATHUNTER_WEBSITE_BOT_NAME
return self._read_yaml_path('website.bot_name', None)

def captcha_enabled(self):
"""Check if captcha is configured"""
return "captcha" in self.config

def get_captcha_solver(self) -> CaptchaSolver:
"""Get configured captcha solver"""
captcha_config = self.config.get("captcha", {})

imagetyperz_token = captcha_config.get("imagetyperz", {}).get("token", "")
twocaptcha_api_key = captcha_config.get("2captcha", {}).get("api_key", "")

if self.useEnvironment and Env.FLATHUNTER_IMAGETYPERZ_TOKEN is not None:
imagetyperz_token = Env.FLATHUNTER_IMAGETYPERZ_TOKEN
else:
imagetyperz_token = self._read_yaml_path("captcha.imagetyperz.token", "")
if imagetyperz_token:
return ImageTyperzSolver(imagetyperz_token)

if self.useEnvironment and Env.FLATHUNTER_2CAPTCHA_KEY is not None:
twocaptcha_api_key = Env.FLATHUNTER_2CAPTCHA_KEY
else:
twocaptcha_api_key = self._read_yaml_path("captcha.2captcha.api_key", "")
if twocaptcha_api_key:
return TwoCaptchaSolver(twocaptcha_api_key)

Expand Down
4 changes: 2 additions & 2 deletions flathunter/hunter.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ def try_crawl(searcher, url, max_pages):
logger.info("Error while scraping url %s:\n%s", url, traceback.format_exc())
return []

return chain(*[try_crawl(searcher,url, max_pages)
return chain(*[try_crawl(searcher, url, max_pages)
for searcher in self.config.searchers()
for url in self.config.get('urls', [])])
for url in self.config.target_urls()])

def hunt_flats(self, max_pages=None):
"""Crawl, process and filter exposes"""
Expand Down
8 changes: 8 additions & 0 deletions flathunter/logging.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Provides logger"""
import logging
import os
from pprint import pformat

class LoggerHandler(logging.StreamHandler):
"""Formats logs and alters WebDriverManager's logs properties"""
Expand Down Expand Up @@ -50,3 +51,10 @@ def setup_wdm_logger(wdm_new_logger_handler):

# Setup "requests" module's logger
logging.getLogger("requests").setLevel(logging.WARNING)

def configure_logging(config):
if config.verbose_logging():
logger.setLevel(logging.DEBUG)
# Allow logging of "webdriver-manager" module on verbose mode
wdm_logger.setLevel(logging.INFO)
logger.debug("Settings from config: %s", pformat(config))
16 changes: 6 additions & 10 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from flathunter.googlecloud_idmaintainer import GoogleCloudIdMaintainer
from flathunter.web_hunter import WebHunter
from flathunter.config import Config
from flathunter.logging import logger, wdm_logger
from flathunter.logging import logger, wdm_logger, configure_logging

from flathunter.web import app

Expand All @@ -21,22 +21,18 @@
# Use Google Cloud DB if we run on the cloud
id_watch = GoogleCloudIdMaintainer()

# adjust log level, if required
if config.get('verbose'):
logger.setLevel(logging.DEBUG)
# Allow logging of "webdriver-manager" module on verbose mode
wdm_logger.setLevel(logging.INFO)
configure_logging(config)

# initialize search plugins for config
config.init_searchers()

hunter = WebHunter(config, id_watch)

app.config["HUNTER"] = hunter
if 'website' in config:
app.secret_key = config['website']['session_key']
app.config["DOMAIN"] = config['website']['domain']
app.config["BOT_NAME"] = config['website']['bot_name']
if config.has_website_config():
app.secret_key = config.website_session_key()
app.config["DOMAIN"] = config.website_domain()
app.config["BOT_NAME"] = config.website_bot_name()
else:
app.secret_key = b'Not a secret'
notifiers = config.get("notifiers", [])
Expand Down
2 changes: 1 addition & 1 deletion test/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_loads_config(self):
config_file.flush()
config_file.close()
created = True
config = Config()
config = Config("config.yaml")
self.assertTrue(len(config.get('urls')) > 0, "Expected URLs in config file")
if created:
os.remove("config.yaml")
Expand Down
23 changes: 13 additions & 10 deletions test/test_web_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
from dummy_crawler import DummyCrawler

DUMMY_CONFIG = """
notifiers:
- telegram
telegram:
bot_token: 1234xxx.12345
Expand Down Expand Up @@ -107,7 +110,7 @@ def test_hunt_via_post_with_filters(hunt_client, **kwargs):
def test_render_index_after_login(hunt_client):
rv = hunt_client.get('/login_with_telegram?id=1234&first_name=Jason&last_name=Bourne&username=mattdamon&photo_url=https%3A%2F%2Fi.example.com%2Fprofile.jpg&auth_date=123455678&hash=c691a55de4e28b341ccd0b793d4ca17f09f6c87b28f8a893621df81475c25952')
assert rv.status_code == 302
assert rv.headers['location'] == 'http://localhost/'
assert rv.headers['location'] == '/'
assert 'user' in session
rv = hunt_client.get('/')
assert rv.status_code == 200
Expand All @@ -118,7 +121,7 @@ def test_do_not_send_messages_if_notifications_disabled(hunt_client, **kwargs):
app.config['HUNTER'].set_filters_for_user(1234, {})
rv = hunt_client.get('/login_with_telegram?id=1234&first_name=Jason&last_name=Bourne&username=mattdamon&photo_url=https%3A%2F%2Fi.example.com%2Fprofile.jpg&auth_date=123455678&hash=c691a55de4e28b341ccd0b793d4ca17f09f6c87b28f8a893621df81475c25952')
assert rv.status_code == 302
assert rv.headers['location'] == 'http://localhost/'
assert rv.headers['location'] == '/'
assert 'user' in session
rv = hunt_client.post('/toggle_notifications')
assert rv.status_code == 201
Expand All @@ -133,7 +136,7 @@ def test_toggle_notification_status(hunt_client):
app.config['HUNTER'].set_filters_for_user(1234, {})
rv = hunt_client.get('/login_with_telegram?id=1234&first_name=Jason&last_name=Bourne&username=mattdamon&photo_url=https%3A%2F%2Fi.example.com%2Fprofile.jpg&auth_date=123455678&hash=c691a55de4e28b341ccd0b793d4ca17f09f6c87b28f8a893621df81475c25952')
assert rv.status_code == 302
assert rv.headers['location'] == 'http://localhost/'
assert rv.headers['location'] == '/'
assert 'user' in session
rv = hunt_client.post('/toggle_notifications')
assert rv.status_code == 201
Expand All @@ -145,7 +148,7 @@ def test_toggle_notification_status(hunt_client):
def test_update_filters(hunt_client):
rv = hunt_client.get('/login_with_telegram?id=1234&first_name=Jason&last_name=Bourne&username=mattdamon&photo_url=https%3A%2F%2Fi.example.com%2Fprofile.jpg&auth_date=123455678&hash=c691a55de4e28b341ccd0b793d4ca17f09f6c87b28f8a893621df81475c25952')
assert rv.status_code == 302
assert rv.headers['location'] == 'http://localhost/'
assert rv.headers['location'] == '/'
assert 'user' in session
rv = hunt_client.post('/filter', data = { 'b': '3' })
assert app.config['HUNTER'].get_filters_for_user(1234) == { 'b': 3.0 }
Expand All @@ -158,7 +161,7 @@ def test_update_filters_not_logged_in(hunt_client):
def test_index_logged_in_with_filters(hunt_client):
rv = hunt_client.get('/login_with_telegram?id=1234&first_name=Jason&last_name=Bourne&username=mattdamon&photo_url=https%3A%2F%2Fi.example.com%2Fprofile.jpg&auth_date=123455678&hash=c691a55de4e28b341ccd0b793d4ca17f09f6c87b28f8a893621df81475c25952')
assert rv.status_code == 302
assert rv.headers['location'] == 'http://localhost/'
assert rv.headers['location'] == '/'
assert 'user' in session
hunt_client.post('/filter', data = { 'max_size': '35' })
rv = hunt_client.get('/')
Expand All @@ -167,33 +170,33 @@ def test_index_logged_in_with_filters(hunt_client):
def test_login_with_telegram(hunt_client):
rv = hunt_client.get('/login_with_telegram?id=1234&first_name=Jason&last_name=Bourne&username=mattdamon&photo_url=https%3A%2F%2Fi.example.com%2Fprofile.jpg&auth_date=123455678&hash=c691a55de4e28b341ccd0b793d4ca17f09f6c87b28f8a893621df81475c25952')
assert rv.status_code == 302
assert rv.headers['location'] == 'http://localhost/'
assert rv.headers['location'] == '/'
assert 'user' in session
assert session['user']['first_name'] == 'Jason'
assert json.dumps(session['user']) == '{"id": "1234", "first_name": "Jason", "last_name": "Bourne", "username": "mattdamon", "photo_url": "https://i.example.com/profile.jpg", "auth_date": "123455678"}'

def test_login_with_invalid_url(hunt_client):
rv = hunt_client.get('/login_with_telegram?username=mattdamon&id=1234&first_name=Jason&last_name=Bourne&photo_url=https%3A%2F%2Fi.example.com%2Fprofile.jpg&auth_date=123455678')
assert rv.status_code == 302
assert rv.headers['location'] == 'http://localhost/'
assert rv.headers['location'] == '/'
assert 'user' not in session

def test_login_with_missing_params(hunt_client):
rv = hunt_client.get('/login_with_telegram?ad=1234&hash=51d737e1a3ba0821359955a36d3671f2957b5a8f1f32f9a133ce95836c44a9a9')
assert rv.status_code == 302
assert rv.headers['location'] == 'http://localhost/'
assert rv.headers['location'] == '/'
assert 'user' not in session

def test_login_with_invalid_hash(hunt_client):
rv = hunt_client.get('/login_with_telegram?id=1234&first_name=Jason&last_name=Bourne&username=mattdamon&photo_url=https%3A%2F%2Fi.example.com%2Fprofile.jpg&auth_date=123455678&hash=0091a55de4e28b341ccd0b793d4ca17f09f6c87b28f8a893621df81475c25900')
assert rv.status_code == 302
assert rv.headers['location'] == 'http://localhost/'
assert rv.headers['location'] == '/'
assert 'user' not in session

def test_logout(hunt_client):
rv = hunt_client.get('/login_with_telegram?id=1234&first_name=Jason&last_name=Bourne&username=mattdamon&photo_url=https%3A%2F%2Fi.example.com%2Fprofile.jpg&auth_date=123455678&hash=c691a55de4e28b341ccd0b793d4ca17f09f6c87b28f8a893621df81475c25952')
assert rv.status_code == 302
assert rv.headers['location'] == 'http://localhost/'
assert rv.headers['location'] == '/'
assert 'user' in session
rv = hunt_client.get('/logout')
assert 'user' not in session

0 comments on commit 9734dc4

Please sign in to comment.