Skip to content

Commit

Permalink
feat: add datset viewer, remove treesitter languages that hasn't been…
Browse files Browse the repository at this point in the history
… verified (TabbyML#509)

* refactor: remove not verified tree sitter queries

* feat(experimental): add dataset viewer

update
  • Loading branch information
wsxiaoys authored Oct 5, 2023
1 parent 55f68d4 commit 1babc38
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 137 deletions.
55 changes: 0 additions & 55 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 0 additions & 5 deletions crates/tabby-scheduler/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ job_scheduler = "1.2.1"
tabby-common = { path = "../tabby-common" }
tantivy = { workspace = true }
tracing = { workspace = true }
tree-sitter-javascript = "0.20.0"
tree-sitter-tags = "0.20.2"
walkdir = "2.3.3"
lazy_static = { workspace = true }
Expand All @@ -21,10 +20,6 @@ serde-jsonlines = { workspace = true }
file-rotate = "0.7.5"
tree-sitter-python = "0.20.2"
tree-sitter-rust = "0.20.3"
tree-sitter-go = "0.20.0"
tree-sitter-java = "0.20.0"
tree-sitter-typescript = "0.20.2"
tree-sitter-lua = "0.0.19"

[dev-dependencies]
temp_testdir = "0.2"
Expand Down
77 changes: 0 additions & 77 deletions crates/tabby-scheduler/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,83 +248,6 @@ lazy_static! {
.unwrap(),
),
),
(
"javascript",
TagsConfigurationSync(
TagsConfiguration::new(
tree_sitter_javascript::language(),
tree_sitter_javascript::TAGGING_QUERY,
"",
)
.unwrap(),
),
),
(
"jsx",
TagsConfigurationSync(
TagsConfiguration::new(
tree_sitter_javascript::language(),
tree_sitter_javascript::TAGGING_QUERY,
"",
)
.unwrap(),
),
),
(
"typescript",
TagsConfigurationSync(
TagsConfiguration::new(
tree_sitter_typescript::language_typescript(),
tree_sitter_typescript::TAGGING_QUERY,
"",
)
.unwrap(),
),
),
(
"tsx",
TagsConfigurationSync(
TagsConfiguration::new(
tree_sitter_typescript::language_tsx(),
tree_sitter_typescript::TAGGING_QUERY,
"",
)
.unwrap(),
),
),
(
"java",
TagsConfigurationSync(
TagsConfiguration::new(
tree_sitter_java::language(),
tree_sitter_java::TAGGING_QUERY,
"",
)
.unwrap(),
),
),
(
"go",
TagsConfigurationSync(
TagsConfiguration::new(
tree_sitter_go::language(),
tree_sitter_go::TAGGING_QUERY,
"",
)
.unwrap(),
),
),
(
"lua",
TagsConfigurationSync(
TagsConfiguration::new(
tree_sitter_lua::language(),
tree_sitter_lua::TAGS_QUERY,
"",
)
.unwrap(),
),
),
])
};
}
47 changes: 47 additions & 0 deletions experimental/dataset-viewer/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import pandas as pd
import streamlit as st

# force wide mode
st.set_page_config(layout="wide")

st.write("Files")

# read dataframe.
df = pd.read_json("~/.tabby/dataset/data.jsonl", lines = True)

# remove useless columns
del df["git_url"]

# filter df
df = df[df["max_line_length"] < 200]
df = df[df.apply(lambda x: len(x['tags']) > 0, axis=1)]

selected = st.selectbox(
"Filename",
df.filepath,
)

selected_row = df[df.filepath == selected].iloc[0]

def get_range(lst, x):
return lst[x['start']:x['end']]

if selected_row is not None:
kinds = set([x['syntax_type_name'] for x in selected_row.tags])
enabled_kinds = st.multiselect("Displayed Kinds", kinds, default=kinds, key=selected_row.filepath)
col1, col2 = st.columns(2)

content = selected_row.content
with col1:
st.write(f"File: {selected_row.filepath}")
st.code(content, line_numbers=True)

with col2:
for tag in selected_row.tags:
name = get_range(content, tag['name_range'])
kind = tag['syntax_type_name']
if kind not in enabled_kinds:
continue
is_definition = '✅' if tag['is_definition'] else '❌'
st.markdown(f"### `{name}`\nkind: {kind}, is_definition: {is_definition}")
st.code(get_range(content, tag['range']))

0 comments on commit 1babc38

Please sign in to comment.