Skip to content

Commit

Permalink
Fixs: potential failure in node status detection
Browse files Browse the repository at this point in the history
  • Loading branch information
luyaxi committed Nov 26, 2023
1 parent cea1180 commit a72d3af
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 6 deletions.
14 changes: 10 additions & 4 deletions ToolServer/ToolServerManager/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import psutil
import uvicorn
import httpx
import sqlite3
import asyncio
import traceback
import datetime
Expand Down Expand Up @@ -30,13 +30,19 @@ async def startup():
# create subprocess to update node status
if CONFIG['builtin_monitor']:
from node_checker import check_nodes_status_loop
checker = await NodeChecker.find_one()

checker = None
async for checker in NodeChecker.find_all():
if not psutil.pid_exists(checker.pid):
checker.delete()

if checker is None:
checker = NodeChecker(
manager_id=MANAGER_ID,
interval=float(CONFIG['node'].get('health_check_interval',1))
interval=float(CONFIG['node'].get('health_check_interval',1)),
pid=os.getpid()
)
await checker.insert()
await checker.save()

loop = asyncio.get_running_loop()
loop.create_task(check_nodes_status_loop())
Expand Down
3 changes: 2 additions & 1 deletion ToolServer/ToolServerManager/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ class ToolServerNode(Document):

class NodeChecker(Document):
manager_id: str
interval: float
interval: float
pid: int
3 changes: 2 additions & 1 deletion ToolServer/ToolServerManager/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ httpx
PyYAML
motor
gunicorn
beanie
beanie
psutil

0 comments on commit a72d3af

Please sign in to comment.