Skip to content

Commit

Permalink
Start creating admin
Browse files Browse the repository at this point in the history
  • Loading branch information
wilsonzlin committed May 16, 2024
1 parent 549d9d9 commit f755fd9
Show file tree
Hide file tree
Showing 10 changed files with 291 additions and 0 deletions.
134 changes: 134 additions & 0 deletions admin/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
from common.data import DatasetEmbModel
from common.data import load_embs
from common.data import load_table
from common.util import env
from db_rpc_client_py import DbRpcClient
from fastapi import FastAPI
from fastapi import Form
from fastapi import Request
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from pandas import DataFrame
from typing import Annotated
import msgpack
import os
import uvicorn

DIR = os.path.dirname(os.path.realpath(__file__))

db = DbRpcClient(
endpoint="https://db-rpc.posh.wilsonl.in",
api_key=env("DB_RPC_API_KEY"),
).database("hndr")


def get_kv(k: str):
rows = db.query("select v from kv where k = ?", [k])
return rows[0]["v"] if rows else None


print("Loading model")
model = DatasetEmbModel("post")
print("Loading posts")
data = load_table("posts").rename(columns={"url": "url_id"})
print("Loading post titles")
data = data.merge(
load_table("post_titles").rename(columns={"text": "title"}),
on="id",
how="inner",
)
print("Loading post URLs")
data = data.merge(
load_table("urls").rename(columns={"id": "url_id"}),
on="url_id",
how="inner",
)
print("Loading embeddings")
mat_ids, mat_embs = load_embs("post")
print("All data loaded")

templates = Jinja2Templates(directory=f"{DIR}/templates")

app = FastAPI()

app.mount("/static", StaticFiles(directory=f"{DIR}/static"), name="static")


@app.get("/")
def get_home(request: Request):
return templates.TemplateResponse(request=request, name="home.html")


@app.get("/c")
def get_community(request: Request, community: str, sim_threshold: float):
df = data.copy(deep=False)
mat_q = model.encode(community)
mat_sims = mat_embs @ mat_q
df_sim = DataFrame(
{
"id": mat_ids,
"sim": mat_sims,
}
)
df = df.merge(df_sim, on="id", how="inner")
df = df[df["sim"] >= sim_threshold]
df = df.sort_values("sim", ascending=True)
df = df[:100]
return templates.TemplateResponse(
request=request,
name="community.html",
context={
"community": community,
"sim_threshold": sim_threshold,
"posts": df.to_dict("records"),
},
)


@app.get("/post/{post_id}")
def get_post(
request: Request,
post_id: int,
):
emb_input_raw = get_kv(f"post/{post_id}/emb_input").decode("utf-8")
url_id = db.query("select url from post where id = ?", [post_id])[0]["url"]
text = get_kv(f"url/{url_id}/text").decode("utf-8")
meta = msgpack.loads(get_kv(f"url/{url_id}/meta"))
# Use `.get(key) or ""` instead of `.get(key, "")` as the key may exist but value is None.
emb_input = (
emb_input_raw.replace("<<<REPLACE_WITH_PAGE_TITLE>>>", meta.get("title") or "")
.replace("<<<REPLACE_WITH_PAGE_DESCRIPTION>>>", meta.get("description") or "")
.replace("<<<REPLACE_WITH_PAGE_TEXT>>>", text)
)
return templates.TemplateResponse(
request=request,
name="post.html",
context={
"emb_input": emb_input,
},
)


@app.post("/c/examples")
def set_community_example(
request: Request,
community: Annotated[str, Form()],
item: Annotated[int, Form()],
sim: Annotated[float, Form()],
positive: Annotated[bool, Form()] = False,
):
db.exec(
"""
insert into community_example (community, item, positive, sim)
values (?, ?, ?, ?)
on duplicate key update
positive = values(positive),
sim = values(sim)
""",
[community, item, positive, sim],
)
return templates.TemplateResponse(request=request, name="autoclose.html")


if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=int(env("ADMIN_PORT")))
29 changes: 29 additions & 0 deletions admin/static/base.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
* {
box-sizing: border-box;
}

:root {
font-family:
system-ui,
-apple-system,
BlinkMacSystemFont,
"Segoe UI",
Roboto,
Oxygen,
Ubuntu,
Cantarell,
"Open Sans",
"Helvetica Neue",
sans-serif;
font-size: 16px;
line-height: 1.5;
}

a {
color: #007bff;
text-decoration: none;

&:hover {
text-decoration: underline;
}
}
14 changes: 14 additions & 0 deletions admin/templates/autoclose.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Please wait</title>
</head>
<body>
<script>
// https://stackoverflow.com/a/54505340/6249022.
window.close("", "_parent", "");
</script>
</body>
</html>
15 changes: 15 additions & 0 deletions admin/templates/base.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta
name="viewport"
content="width=device-width, initial-scale=1.0, user-scalable=1.0, minimum-scale=1.0, maximum-scale=1.0"
/>
<title>{% block title %}{% endblock %}</title>
<link rel="stylesheet" href="/static/base.css" />
</head>
<body>
{% block content %}{% endblock %}
</body>
</html>
52 changes: 52 additions & 0 deletions admin/templates/community.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{% extends 'base.html' %}
{% block title %}{{ community }} Community{% endblock %}
{% block content %}
<form action="/c">
<input name="community" value="{{ community }}" />
<input name="sim_threshold" value="{{ sim_threshold }}" />
<button type="submit">Go</button>
</form>

<div>
{% for post in posts %}
<article>
<h1>
<a href="/post/{{ post.id }}" target="_blank">{{ post.title }}</a>
</h1>
<a
href="https://news.ycombinator.com/item?id={{ post.id }}"
target="_blank"
rel="noopener noreferrer"
>HN</a
>
<a
href="{{ post.proto }}//{{ post.url }}"
target="_blank"
rel="noopener noreferrer"
>URL</a
>
<a
href="https://web.archive.org/web/{{ post.proto }}//{{ post.url }}"
target="_blank"
rel="noopener noreferrer"
>Archive</a
>
<p>URL: {{ post.url }}</p>
<p>Similarity: {{ post.sim }}</p>
<form action="/c/examples" method="post" target="_blank">
<input type="hidden" name="community" value="{{ community }}" />
<input type="hidden" name="item" value="{{ post.id }}" />
<input type="hidden" name="sim" value="{{ post.sim }}" />
<input type="hidden" name="positive" value="true" />
<button type="submit">Positive</button>
</form>
<form action="/c/examples" method="post" target="_blank">
<input type="hidden" name="community" value="{{ community }}" />
<input type="hidden" name="item" value="{{ post.id }}" />
<input type="hidden" name="sim" value="{{ post.sim }}" />
<button type="submit">Negative</button>
</form>
</article>
{% endfor %}
</div>
{% endblock %}
5 changes: 5 additions & 0 deletions admin/templates/home.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{% extends 'base.html' %}
{% block title %}Hackerverse Admin{% endblock %}
{% block content %}
<h1>Hackerverse</h1>
{% endblock %}
10 changes: 10 additions & 0 deletions admin/templates/post.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{% extends 'base.html' %}
{% block title %}Post{% endblock %}
{% block content %}
<p id="emb-input">{{ emb_input }}</p>
<style>
#emb-input {
white-space: pre-wrap;
}
</style>
{% endblock %}
15 changes: 15 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,21 @@
# We use `init: true` as many processes won't respond to signals or process.exit() when PID 1.

services:
admin:
network_mode: host
image: wilsonzlin/hndr-admin
init: true
build:
context: .
dockerfile: Dockerfile.python-base
args:
MAIN: admin
environment:
ADMIN_PORT: 9345
DB_RPC_API_KEY: ${DB_RPC_API_KEY}
volumes:
- "${DOCKER_VOLUME_DIR}:/hndr-data"

api:
network_mode: host
image: wilsonzlin/hndr-api
Expand Down
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
dataclasses-json
db-rpc-client-py
fastapi
FlagEmbedding
implicit
jinja2
msgpipe
numpy
opencv-contrib-python-headless
pandas
Pillow
pyarrow
python-multipart
requests
scikit-learn
scipy
Expand Down
14 changes: 14 additions & 0 deletions schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,20 @@ create table kv (
primary key (k)
);

create table community (
name varchar(200) not null,
sim_threshold float not null,
primary key (name)
);

create table community_example (
community varchar(200) not null,
item int not null,
positive boolean not null,
sim float not null,
primary key (community, item)
);

create table usr (
id bigint not null auto_increment,
username varchar(100) not null,
Expand Down

0 comments on commit f755fd9

Please sign in to comment.