Skip to content

Commit

Permalink
Chore/improve logging (cowboy-bebug#7)
Browse files Browse the repository at this point in the history
* Add more logging in constructor

* Reset log timer whenever review is called

* Change default log interval 10 -> 5

* Prefix review logs with app_id

* Wrap logging in __log_status()

* Implement basic graceful exit

* Add a step in ci to check for patch version bump

* Bump version 0.2.2 -> 0.2.3
  • Loading branch information
cowboy-bebug authored Jun 23, 2020
1 parent 6394da0 commit fbf4be9
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 33 deletions.
77 changes: 64 additions & 13 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,25 +45,76 @@ jobs:
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Check for version bump
if: ${{ startsWith(github.head_ref, 'release') }}
- name: Check version bump and set versions as envvar
run: |
SEMVER_PATTERN="([0-9]+)\.([0-9]+)\.([0-9]+)"
VERSION=$(echo -n $(git diff origin/${{ github.base_ref }} -G '__version__' app_store_scraper/__version__.py))
VERSION=$(echo "$VERSION" | sed -E "s/.*\+__version__.*([0-9]+\.[0-9]+\.[0-9]+).+/\1/")
if [ "$VERSION" = "" ]; then
echo "Bump __version__ for release"
echo "Version must be bumped for every PR"
exit 1
fi
VERSION_OLD=$(echo "$VERSION" | sed -E "s/.*\-__version__ = \"($SEMVER_PATTERN).+/\1/")
VERSION_OLD_MAJOR=$(echo "$VERSION_OLD" | sed -E "s/$SEMVER_PATTERN/\1/")
VERSION_OLD_MINOR=$(echo "$VERSION_OLD" | sed -E "s/$SEMVER_PATTERN/\2/")
VERSION_OLD_PATCH=$(echo "$VERSION_OLD" | sed -E "s/$SEMVER_PATTERN/\3/")
VERSION_NEW=$(echo "$VERSION" | sed -E "s/.*\+__version__ = \"($SEMVER_PATTERN).+/\1/")
VERSION_NEW_MAJOR=$(echo "$VERSION_NEW" | sed -E "s/$SEMVER_PATTERN/\1/")
VERSION_NEW_MINOR=$(echo "$VERSION_NEW" | sed -E "s/$SEMVER_PATTERN/\2/")
VERSION_NEW_PATCH=$(echo "$VERSION_NEW" | sed -E "s/$SEMVER_PATTERN/\3/")
echo "::set-env name=VERSION_OLD::$VERSION_OLD"
echo "::set-env name=VERSION_OLD_MAJOR::$VERSION_OLD_MAJOR"
echo "::set-env name=VERSION_OLD_MINOR::$VERSION_OLD_MINOR"
echo "::set-env name=VERSION_OLD_PATCH::$VERSION_OLD_PATCH"
echo "::set-env name=VERSION_NEW::$VERSION_NEW"
echo "::set-env name=VERSION_NEW_MAJOR::$VERSION_NEW_MAJOR"
echo "::set-env name=VERSION_NEW_MINOR::$VERSION_NEW_MINOR"
echo "::set-env name=VERSION_NEW_PATCH::$VERSION_NEW_PATCH"
echo "Old version: $VERSION_OLD"
echo "New version: $VERSION_NEW"
- name: Check for patch version bump
if: ${{ !startsWith(github.head_ref, 'release') }}
run: |
if [ "$VERSION_OLD_MAJOR" = "$VERSION_NEW_MAJOR" ] &&
[ "$VERSION_OLD_MINOR" = "$VERSION_NEW_MINOR" ]; then
if (($VERSION_OLD_PATCH < $VERSION_NEW_PATCH)); then
echo "Bumped patch version $VERSION_OLD -> $VERSION_NEW"
exit 0
else
echo "Bump patch version in __version__"
exit 1
fi
else
echo "Version: $VERSION"
exit 0
echo "Major / minor version must be bumped in a release branch"
exit 1
fi
- name: Create tag
- name: Check for major / minor version bump
if: ${{ startsWith(github.head_ref, 'release') }}
run: |
if (($VERSION_OLD_MAJOR < $VERSION_NEW_MAJOR)) &&
(($VERSION_OLD_MINOR = $VERSION_NEW_MINOR)); then
echo "Bumped major version $VERSION_OLD -> $VERSION_NEW"
exit 0
elif (($VERSION_OLD_MAJOR = $VERSION_NEW_MAJOR)) &&
(($VERSION_OLD_MINOR < $VERSION_NEW_MINOR)); then
echo "Bumped minor version $VERSION_OLD -> $VERSION_NEW"
exit 0
else
echo "Major / minor version must be bumped for release"
exit 1
fin
- name: Create and push tag
if: github.ref == 'refs/heads/master'
run: |
VERSION=$(echo -n $(git diff HEAD^1 -G '__version__' app_store_scraper/__version__.py))
VERSION=$(echo "$VERSION" | sed -E "s/.*\+__version__.*([0-9]+\.[0-9]+\.[0-9]+).+/\1/")
if [ "$VERSION" != "" ]; then
echo "Create and push v$VERSION tag"
git tag v"$VERSION"
git push origin v"$VERSION"
version=$(echo -n $(git diff HEAD^1 -G '__version__' app_store_scraper/__version__.py))
version=$(echo "$version" | sed -E "s/.*\+__version__.*([0-9]+\.[0-9]+\.[0-9]+).+/\1/")
if [ "$version" != "" ]; then
echo "Create and push v$version tag"
git tag v"$version"
git push origin v"$version"
fi
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ Other optional parameters are:
- passed directly to `logging.basicConfig(level=log_level)`
- default is `"INFO"`
- `log_interval`
- log is produced every 10 seconds (by default) as a "heartbeat" (useful for a long scraping session)
- default is `10`
- log is produced every 5 seconds (by default) as a "heartbeat" (useful for a long scraping session)
- default is `5`


## Fetching Review
Expand Down
2 changes: 1 addition & 1 deletion app_store_scraper/__version__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
__title__ = "app-store-scraper"
__version__ = "0.2.2"
__version__ = "0.2.3"
__description__ = "Single API ☝ App Store Review Scraper 🧹"
__author__ = "Eric Lim"
__url__ = "https://github.com/cowboy-bebug/app-store-scraper"
Expand Down
52 changes: 35 additions & 17 deletions app_store_scraper/app_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,19 @@ def __init__(
app_id=None,
log_format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
log_level="INFO",
log_interval=10,
log_interval=5,
):
logging.basicConfig(format=log_format, level=log_level.upper())
self.country = str(country).lower()
self.app_name = re.sub(r"[\W_]+", "-", str(app_name).lower())
if app_id is None:
logger.info("Searching for app id")
app_id = self.search_id()
self.app_id = int(app_id)
self.url = self.__landing_url()
self.reviews = list()
self.reviews_count = int()

logging.basicConfig(format=log_format, level=log_level.upper())
self.__log_interval = float(log_interval)
self.__log_timer = float()
self.__fetched_count = int()
Expand All @@ -71,6 +72,11 @@ def __init__(
"additionalPlatforms": "appletv,ipad,iphone,mac",
}
self.__response = requests.Response()
logger.info(
f"Initialised: {self.__class__.__name__}"
f"('{self.country}', '{self.app_name}', {self.app_id})"
)
logger.info(f"Ready to fetch reviews from: {self.url}")

def __repr__(self):
return "{}(country='{}', app_name='{}', app_id={})".format(
Expand Down Expand Up @@ -144,12 +150,18 @@ def __parse_next(self):
self.__request_offset = int(offset)
self.__request_params.update({"offset": self.__request_offset})

def __log_status(self):
logger.info(
f"[id:{self.app_id}] Fetched {self.__fetched_count} reviews "
f"({self.reviews_count} fetched in total)"
)

def __heartbeat(self):
interval = self.__log_interval
if self.__log_timer == 0:
self.__log_timer = time.time()
if time.time() - self.__log_timer > interval:
logger.info(f"[{interval}s HEARTBEAT] Fetched {self.reviews_count} reviews")
self.__log_status()
self.__log_timer = 0

def search_id(self):
Expand All @@ -160,17 +172,23 @@ def search_id(self):
return app_id

def review(self, how_many=sys.maxsize):
logger.info(f"Fetching reviews for {self.url}")
while True:
self.__heartbeat()
self.__get(
self.__request_url,
headers=self.__request_headers,
params=self.__request_params,
)
self.__parse_data()
self.__parse_next()
if self.__request_offset is None or self.__fetched_count >= how_many:
logger.info(f"Fetched {self.__fetched_count} reviews")
self.__fetched_count = 0
break
self.__log_timer = 0
try:
while True:
self.__heartbeat()
self.__get(
self.__request_url,
headers=self.__request_headers,
params=self.__request_params,
)
self.__parse_data()
self.__parse_next()
if self.__request_offset is None or self.__fetched_count >= how_many:
break
except KeyboardInterrupt:
logger.error("Keyboard interrupted")
except Exception as e:
logger.error(f"Something went wrong: {e}")
finally:
self.__log_status()
self.__fetched_count = 0

0 comments on commit fbf4be9

Please sign in to comment.