Skip to content

Commit

Permalink
Merge pull request #206 from BelKed/advanced-search
Browse files Browse the repository at this point in the history
Implementation of advanced search
  • Loading branch information
tomasbedrich authored Mar 28, 2023
2 parents eb000ff + cc0c92f commit bef4ca0
Show file tree
Hide file tree
Showing 10 changed files with 461 additions and 520 deletions.
241 changes: 107 additions & 134 deletions pycaching/geocaching.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
import re
import subprocess
from os import path
from typing import Optional, Union
from typing import Generator, Optional, Union
from urllib.parse import parse_qs, urljoin, urlparse

import bs4
import requests
from bs4.element import Script # Direct import as `bs4.Script` requires version >= 4.9.1.

from pycaching.cache import Cache, Size, Status
from pycaching.cache import Cache
from pycaching.errors import Error, LoginFailedException, NotLoggedInException, PMOnlyException, TooManyRequestsError
from pycaching.geo import Point, Rectangle
from pycaching.log import Log
Expand Down Expand Up @@ -52,7 +52,7 @@ class Geocaching(object):
"search": "play/search",
"search_more": "play/search/more-results",
"my_logs": "my/logs.aspx",
"api_search": "api/proxy/web/search",
"api_search": "api/proxy/web/search/v2",
}
_credentials_file = ".gc_credentials"

Expand Down Expand Up @@ -243,116 +243,43 @@ def get_logged_user(self, login_page=None):
m = re.search(r'"username":\s*"(.*)"', js_content)
return m.group(1) if m else None

def search(self, point, limit=float("inf")):
"""Return a generator of caches around some point.
Search for caches around some point by loading search pages and parsing the data from these
pages. Yield :class:`.Cache` objects filled with data from search page. You can provide limit
as a convenient way to stop generator after certain number of caches.
:param .geo.Point point: Search center point.
:param int limit: Maximum number of caches to generate.
"""
logging.info("Searching at {}".format(point))

start_index = 0
while True:
# get one page
geocaches_table, whole_page = self._search_get_page(point, start_index)
rows = geocaches_table.find_all("tr")

# leave loop if there are no (more) results
if not rows:
return

# prepare language-dependent mappings
if start_index == 0:
cache_sizes_filter_wrapper = whole_page.find("div", class_="cache-sizes-wrapper")
localized_size_mapping = {
# key = "Small" (localized), value = Size.small
label.find("span").text.strip(): Size.from_number(label.find("input").get("value"))
for label in cache_sizes_filter_wrapper.find_all("label")
}

# parse caches in result
for start_index, row in enumerate(rows, start_index):

limit -= 1 # handle limit
if limit < 0:
return

# parse raw data
cache_details = row.find("span", "cache-details").text.split("|")
wp = cache_details[1].strip()

# create and fill cache object
# values are sanitized and converted in Cache setters
c = Cache(self, wp)
c.type = cache_details[0]
c.name = row.find("span", "cache-name").text
badge = row.find("svg", class_="badge")
c.found = "found" in str(badge) if badge is not None else False
c.favorites = row.find(attrs={"data-column": "FavoritePoint"}).text
if not (row.get("class") and "disabled" in row.get("class")):
c.status = Status.enabled
c.pm_only = row.find("td", "pm-upsell") is not None

if c.pm_only:
# PM only caches doesn't have other attributes filled in
yield c
continue

c.size = localized_size_mapping[row.find(attrs={"data-column": "ContainerSize"}).text.strip()]
c.difficulty = row.find(attrs={"data-column": "Difficulty"}).text
c.terrain = row.find(attrs={"data-column": "Terrain"}).text
c.hidden = row.find(attrs={"data-column": "PlaceDate"}).text
c.author = row.find("span", "owner").text[3:] # delete "by "

logging.debug("Cache parsed: {}".format(c))
yield c

start_index += 1

def _search_get_page(self, point, start_index):
"""Return one page for standard search as class:`bs4.BeautifulSoup` object.
:param .geo.Point point: Search center point.
:param int start_index: Determines the page. If start_index is greater than zero, this
method will use AJAX andpoint which is much faster.
def search(
self,
point: Point,
limit: int = float("inf"),
*,
sort_by: Union[str, SortOrder] = SortOrder.date_last_visited,
reverse: bool = False,
per_query: int = 200,
wait_sleep: bool = True,
) -> Generator[Optional[Cache], None, None]:
"""Search for caches around a specified location using a search API.
:param point: The :class:`.geo.Point` object representing the center point of the search.
:param limit: The maximum number of caches to load.
Defaults to infinity.
:param sort_by: The criterion to sort the caches by.
Defaults to :code:`SortOrder.date_last_visited`.
:param reverse: If :code:`True`, the order of the results is reversed.
Defaults to :code:`False`.
:param per_query: The number of caches to request in each query.
Defaults to :code:`200`.
:param wait_sleep: In case of rate limits exceeding, wait appropriate time
if set to :code:`True`, otherwise just yield :code:`None`.
Defaults to :code:`True`.
:return: A generator that yields :class:`.Cache` objects.
"""
assert hasattr(point, "format") and callable(point.format)
logging.debug("Loading page from start_index {}".format(start_index))

if start_index == 0:
# first request has to load normal search page
logging.debug("Using normal search endpoint")

# make request
res = self._request(
self._urls["search"],
params={
"origin": point.format_decimal(),
},
)
return res.find(id="geocaches"), res

else:
# other requests can use AJAX endpoint
logging.debug("Using AJAX search endpoint")

# make request
res = self._request(
self._urls["search_more"],
params={
"origin": point.format_decimal(),
"startIndex": start_index,
"ssvu": 2,
"selectAll": "false",
},
expect="json",
)

return bs4.BeautifulSoup(res["HtmlString"].strip(), "html.parser"), None
return self.advanced_search(
{
"origin": "{},{}".format(point.latitude, point.longitude),
"asc": str(not reverse).lower(),
"sort": sort_by.value,
},
per_query=per_query,
limit=limit,
wait_sleep=wait_sleep,
)

@deprecated
def search_quick(self, area):
Expand All @@ -366,55 +293,99 @@ def search_quick(self, area):

return self.search_rect(area)

# add some shortcuts ------------------------------------------------------

def search_rect(
self,
rect: Rectangle,
limit: int = float("inf"),
*,
per_query: int = 200,
sort_by: Union[str, SortOrder] = SortOrder.date_last_visited,
reverse: bool = False,
limit: int = float("inf"),
per_query: int = 200,
origin: Optional[Point] = None,
wait_sleep: bool = True
):
"""
Return a generator of caches in given Rectange area.
:param rect: Search area.
:param int per_query: Number of caches requested in single query.
:param sort_by: Order cached by given criterion.
:param reverse: Reverse sort order.
:param limit: Maximum number of caches to return.
:param origin: Origin point for search by distance.
:param wait_sleep: In case of rate limits exceeding, wait appropriate time if set True,
otherwise just yield None.
wait_sleep: bool = True,
) -> Generator[Optional[Cache], None, None]:
"""Search for caches in a specified :class:`.Rectangle` area using a search API.
:param rect: The :class:`.Rectangle` object representing the search area.
:param limit: The maximum number of caches to load.
Defaults to infinity.
:param sort_by: The criterion to sort the caches by.
Defaults to :code:`SortOrder.date_last_visited`.
:param reverse: If :code:`True`, the order of the results is reversed.
Defaults to :code:`False`.
:param per_query: The number of caches to request in each query.
Defaults to :code:`200`.
:param origin: The origin point for search by distance, required when sorting by distance.
:param wait_sleep: In case of rate limits exceeding, wait appropriate time
if set to :code:`True`, otherwise just yield :code:`None`.
Defaults to :code:`True`.
:return: A generator that yields :class:`.Cache` objects.
"""

if not isinstance(sort_by, SortOrder):
sort_by = SortOrder(sort_by)

if limit <= 0:
return

take_amount = min(limit, per_query)
params = {
"box": "{},{},{},{}".format(
rect.corners[0].latitude,
rect.corners[0].longitude,
rect.corners[1].latitude,
rect.corners[1].longitude,
),
"take": take_amount,
"asc": str(not reverse).lower(),
"skip": 0,
"sort": sort_by.value,
}

if sort_by is SortOrder.distance:
assert isinstance(origin, Point)
params["origin"] = "{},{}".format(origin.latitude, origin.longitude)

return self.advanced_search(
params,
per_query=per_query,
limit=limit,
wait_sleep=wait_sleep,
)

def advanced_search(
self,
options: dict,
limit: int = float("inf"),
per_query: int = 200,
wait_sleep: bool = True,
) -> Generator[Optional[Cache], None, None]:
"""Perform an advanced search for geocaches with specific search criteria.
The search is performed using the options provided in the :code:`options` parameter.
Example of the :code:`options` parameter::
# https://www.geocaching.com/play/search?owner[0]=Geocaching%20HQ&a=0
options = {"owner[0]": "Geocaching HQ", "a": "0"}
:param options: A dictionary of search options.
:param limit: The maximum number of caches to load.
Defaults to infinity.
:param per_query: The number of caches to request in each query.
Defaults to :code:`200`.
:param wait_sleep: In case of rate limits exceeding, wait appropriate time
if set to :code:`True`, otherwise just yield :code:`None`.
Defaults to :code:`True`.
:return: A generator that yields :class:`.Cache` objects.
"""

if limit <= 0:
return

take_amount = min(limit, per_query)

params = options.copy()
params.update(
{
"take": take_amount,
"skip": 0,
}
)

total, offset = None, 0
while (offset < limit) and ((total is None) or (offset < total)):
params["skip"] = offset
Expand All @@ -434,6 +405,8 @@ def search_rect(
total = resp["total"]
offset += take_amount

# add some shortcuts ------------------------------------------------------

def geocode(self, location):
"""Return a :class:`.Point` object from geocoded location.
Expand Down
Loading

0 comments on commit bef4ca0

Please sign in to comment.