From 4ec71a303b4794d5c702c50e1c64b9175ed72b71 Mon Sep 17 00:00:00 2001 From: lighting9999 Date: Thu, 18 Sep 2025 21:55:59 +0800 Subject: [PATCH] fix covid_stats_via_xpath.py (#12975) * fix covid_stats_via_xpath.py Improve error handling. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix covid_stats_via_xpath.py typo * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix ruff * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * upgrade covid_stats_via_xpath.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update and fix covid_stats_via_xpath.py --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- web_programming/covid_stats_via_xpath.py | 44 +++++++++++++++++------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/web_programming/covid_stats_via_xpath.py b/web_programming/covid_stats_via_xpath.py index 9c016ba414ea..88a248610441 100644 --- a/web_programming/covid_stats_via_xpath.py +++ b/web_programming/covid_stats_via_xpath.py @@ -1,7 +1,8 @@ """ -This is to show simple COVID19 info fetching from worldometers archive site using lxml -* The main motivation to use lxml in place of bs4 is that it is faster and therefore -more convenient to use in Python web projects (e.g. Django or Flask-based) +This script demonstrates fetching simple COVID-19 statistics from the +Worldometers archive site using lxml. lxml is chosen over BeautifulSoup +for its speed and convenience in Python web projects (such as Django or +Flask). """ # /// script @@ -25,15 +26,34 @@ class CovidData(NamedTuple): def covid_stats( - url: str = "https://web.archive.org/web/20250825095350/https://www.worldometers.info/coronavirus/", + url: str = ( + "https://web.archive.org/web/20250825095350/" + "https://www.worldometers.info/coronavirus/" + ), ) -> CovidData: xpath_str = '//div[@class = "maincounter-number"]/span/text()' - return CovidData( - *html.fromstring(httpx.get(url, timeout=10).content).xpath(xpath_str) + try: + response = httpx.get(url, timeout=10).raise_for_status() + except httpx.TimeoutException: + print( + "Request timed out. Please check your network connection " + "or try again later." + ) + return CovidData("N/A", "N/A", "N/A") + except httpx.HTTPStatusError as e: + print(f"HTTP error occurred: {e}") + return CovidData("N/A", "N/A", "N/A") + data = html.fromstring(response.content).xpath(xpath_str) + if len(data) != 3: + print("Unexpected data format. The page structure may have changed.") + data = "N/A", "N/A", "N/A" + return CovidData(*data) + + +if __name__ == "__main__": + fmt = ( + "Total COVID-19 cases in the world: {}\n" + "Total deaths due to COVID-19 in the world: {}\n" + "Total COVID-19 patients recovered in the world: {}" ) - - -fmt = """Total COVID-19 cases in the world: {} -Total deaths due to COVID-19 in the world: {} -Total COVID-19 patients recovered in the world: {}""" -print(fmt.format(*covid_stats())) + print(fmt.format(*covid_stats()))