fix location conflicts

Liu233w · May 25, 2021 · 1686ffc · 1686ffc
1 parent a1a062a
commit 1686ffc
Show file tree

Hide file tree

Showing 3 changed files with 189 additions and 138 deletions.
diff --git a/twitter-harvester/api.py b/twitter-harvester/api.py
@@ -6,6 +6,7 @@
 from datetime import datetime, timedelta
 from time import sleep
 from random import random
+import json
 
 TwitterUrl = namedtuple(
     typename='TwitterUrl',
@@ -15,15 +16,16 @@
 
 def auth(couchdb: CouchDB, redis: Redis, endpoint: str):
     # Select valid token
-    now = datetime.utcnow().timestamp()
-    min_window = (datetime.utcnow() - timedelta(minutes=15)).timestamp()
-
     complete = False
     while not complete:
         token = None
-        while not token:
-            result = couchdb["tokens"].get_query_result(
-                selector={
+        try:
+            couchdb.connect()
+            while not token:
+                now = datetime.utcnow().timestamp()
+                min_window = (datetime.utcnow() - timedelta(minutes=15)).timestamp()
+
+                selector = {
                     # ensure less than x calls per window
                     endpoint: {
                         "$or": [
@@ -36,49 +38,52 @@ def auth(couchdb: CouchDB, redis: Redis, endpoint: str):
                     "last_used": {
                         "$or": [
                             {"$exists": False},
-                            {"$lt": now - 2}
+                            {"$lt": now - 1}
                         ]
                     }
-                },
-                sort=[{"last_used": "asc"}],
-                limit=1).all()
-            if len(result) == 0:
-                print("No valid token, waiting...")
-                sleep(1)
-                continue
-            else:
-                token = result[0]
-
-        doc = couchdb["tokens"][token["_id"]]
-        doc.fetch()
-
-        # Update last_used
-        needs_new_window = endpoint not in doc or doc[endpoint]["since"] < min_window
-        if needs_new_window:
-            doc.update({
-                "last_used": now,
-                endpoint: {
-                    "since": now,
-                    "total": 1
-                }
-            })
-        else:
-            doc.update({
-                "last_used": now,
-                endpoint: {
-                    **doc[endpoint],
-                    "total": doc[endpoint]["total"] + 1
                 }
-            })
 
-        try:
+                result = couchdb["tokens"].get_query_result(
+                    selector=selector,
+                    sort=[{"last_used": "asc"}],
+                    limit=1).all()
+                if len(result) == 0:
+                    print("No valid token, waiting...")
+                    couchdb.disconnect()
+                    sleep(random() * 0.5 + 0.5)
+                    couchdb.connect()
+                else:
+                    token = result[0]
+
+            doc = couchdb["tokens"][token["_id"]]
+
+            # Update last_used
+            needs_new_window = endpoint not in doc or doc[endpoint]["since"] < min_window
+            if needs_new_window:
+                doc.update({
+                    "last_used": now,
+                    endpoint: {
+                        "since": now,
+                        "total": 1
+                    }
+                })
+            else:
+                doc.update({
+                    "last_used": now,
+                    endpoint: {
+                        **doc[endpoint],
+                        "total": doc[endpoint]["total"] + 1
+                    }
+                })
+
             doc.save()
             print("Using token: %s" % token["_id"])
             complete = True
         except Exception as e:
-            print("CouchDB Token Error", e)
+            print("Auth Error:", e)
             sleep(random() * 0.3 + 0.1)
-            continue
+        finally:
+            couchdb.disconnect()
 
     return token["token"]
 

diff --git a/twitter-harvester/features.py b/twitter-harvester/features.py
@@ -5,61 +5,65 @@
 
 
 def load_features(filepath, couchdb: CouchDB):
-    if "features" not in couchdb.all_dbs():
-        couchdb.create_database("features", partitioned=False)
-        couchdb["features"].create_query_index(fields=["newest"])
-        couchdb["features"].create_query_index(fields=[{"oldest": "desc"}])
+    try:
+        couchdb.connect()
+        if "features" not in couchdb.all_dbs():
+            couchdb.create_database("features", partitioned=False)
+            couchdb["features"].create_query_index(fields=["newest"])
+            couchdb["features"].create_query_index(fields=[{"oldest": "desc"}])
 
-    poly_features = []
-    with open(filepath) as file:
-        content = file.read()
-        polygons = json.loads(content)
-        poly_features = polygons["features"]
+        poly_features = []
+        with open(filepath) as file:
+            content = file.read()
+            polygons = json.loads(content)
+            poly_features = polygons["features"]
 
-    features = []
-    for feature in poly_features:
-        if feature["geometry"] is None:
-            continue
-        coords = feature["geometry"]["coordinates"]
-        box = [None, None, None, None]
+        features = []
+        for feature in poly_features:
+            if feature["geometry"] is None:
+                continue
+            coords = feature["geometry"]["coordinates"]
+            box = [None, None, None, None]
 
-        for x in coords:
-            for y in x:
-                if box[0] is None or y[0] < box[0]:
-                    box[0] = y[0]
-                if box[1] is None or y[1] < box[1]:
-                    box[1] = y[1]
-                if box[2] is None or y[0] > box[2]:
-                    box[2] = y[0]
-                if box[3] is None or y[1] > box[3]:
-                    box[3] = y[1]
+            for x in coords:
+                for y in x:
+                    if box[0] is None or y[0] < box[0]:
+                        box[0] = y[0]
+                    if box[1] is None or y[1] < box[1]:
+                        box[1] = y[1]
+                    if box[2] is None or y[0] > box[2]:
+                        box[2] = y[0]
+                    if box[3] is None or y[1] > box[3]:
+                        box[3] = y[1]
 
-        id = sha1(json.dumps(box).encode('utf8')).digest().hex()
-        features.append({
-            "id": id,
-            "name": feature["properties"]["name"],
-            "loc_pid": feature["properties"]["loc_pid"],
-            "box": box
-        })
+            id = sha1(json.dumps(box).encode('utf8')).digest().hex()
+            features.append({
+                "id": id,
+                "name": feature["properties"]["name"],
+                "loc_pid": feature["properties"]["loc_pid"],
+                "box": box
+            })
 
-    docs = couchdb["features"].get_query_result(
-        selector={
-            "_id": {
-                "$gt": None
-            }
-        }).all()
+        docs = couchdb["features"].get_query_result(
+            selector={
+                "_id": {
+                    "$gt": None
+                }
+            }).all()
 
-    known_ids = list(map(lambda doc: doc["_id"], docs))
-    new_features = list(filter(lambda feature: feature["id"] not in known_ids, features))
+        known_ids = list(map(lambda doc: doc["_id"], docs))
+        new_features = list(filter(lambda feature: feature["id"] not in known_ids, features))
 
-    new_docs = list(map(lambda feature: {
-        "_id": feature["id"],
-        "box": feature["box"],
-        "name": feature["name"],
-        "loc_pid": feature["loc_pid"],
-        "newest": None,
-        "oldest": None
-    }, new_features))
-    couchdb["features"].bulk_docs(new_docs)
+        new_docs = list(map(lambda feature: {
+            "_id": feature["id"],
+            "box": feature["box"],
+            "name": feature["name"],
+            "loc_pid": feature["loc_pid"],
+            "newest": None,
+            "oldest": None
+        }, new_features))
+        couchdb["features"].bulk_docs(new_docs)
+    finally:
+        couchdb.disconnect()
 
     return features