Skip to content

Commit

Permalink
feat(context): add support for Pull Request in GitHub Context (TabbyM…
Browse files Browse the repository at this point in the history
…L#3429)

* WIP: add pr answer mock

* WIP: rename diff to patch

* WIP: index pulls and answer from them

* [autofix.ci] apply automated fixes

* [autofix.ci] apply automated fixes (attempt 2/3)

* chore: use diff instread of patch

* chore: drop PR placeholder in answer engine

* [autofix.ci] apply automated fixes

* chore: use consistent pull and pullDoc

* chore(graphQL): update schema for pr context

* chore: minor update for comments

* chore: skip diff larger than 10MB

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
  • Loading branch information
zwpaper and autofix-ci[bot] authored Nov 19, 2024
1 parent 565304f commit 97210ec
Show file tree
Hide file tree
Showing 15 changed files with 312 additions and 11 deletions.
50 changes: 50 additions & 0 deletions crates/tabby-common/src/api/structured_doc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub struct DocSearchHit {
pub enum DocSearchDocument {
Web(DocSearchWebDocument),
Issue(DocSearchIssueDocument),
Pull(DocSearchPullDocument),
}

#[derive(Error, Debug)]
Expand Down Expand Up @@ -65,6 +66,15 @@ pub struct DocSearchIssueDocument {
pub closed: bool,
}

#[derive(Clone)]
pub struct DocSearchPullDocument {
pub title: String,
pub link: String,
pub body: String,
pub diff: String,
pub merged: bool,
}

pub trait FromTantivyDocument {
fn from_tantivy_document(doc: &TantivyDocument, chunk: &TantivyDocument) -> Option<Self>
where
Expand All @@ -82,6 +92,8 @@ impl FromTantivyDocument for DocSearchDocument {
}
"issue" => DocSearchIssueDocument::from_tantivy_document(doc, chunk)
.map(DocSearchDocument::Issue),
"pull" => DocSearchPullDocument::from_tantivy_document(doc, chunk)
.map(DocSearchDocument::Pull),
_ => None,
}
}
Expand Down Expand Up @@ -146,6 +158,44 @@ impl FromTantivyDocument for DocSearchIssueDocument {
}
}

impl FromTantivyDocument for DocSearchPullDocument {
fn from_tantivy_document(doc: &TantivyDocument, _: &TantivyDocument) -> Option<Self> {
let schema = IndexSchema::instance();
let title = get_json_text_field(
doc,
schema.field_attributes,
structured_doc::fields::pull::TITLE,
);
let link = get_json_text_field(
doc,
schema.field_attributes,
structured_doc::fields::pull::LINK,
);
let body = get_json_text_field(
doc,
schema.field_attributes,
structured_doc::fields::pull::BODY,
);
let diff = get_json_text_field(
doc,
schema.field_attributes,
structured_doc::fields::pull::DIFF,
);
let merged = get_json_bool_field(
doc,
schema.field_attributes,
structured_doc::fields::pull::MERGED,
);
Some(Self {
title: title.into(),
link: link.into(),
body: body.into(),
diff: diff.into(),
merged,
})
}
}

fn get_json_field<'a>(
doc: &'a TantivyDocument,
field: schema::Field,
Expand Down
8 changes: 8 additions & 0 deletions crates/tabby-common/src/index/structured_doc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,12 @@ pub mod fields {
pub const BODY: &str = "body";
pub const CLOSED: &str = "closed";
}

pub mod pull {
pub const TITLE: &str = "title";
pub const LINK: &str = "link";
pub const BODY: &str = "body";
pub const DIFF: &str = "diff";
pub const MERGED: &str = "merged";
}
}
2 changes: 1 addition & 1 deletion crates/tabby-index/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pub mod public {
code::CodeIndexer,
structured_doc::public::{
StructuredDoc, StructuredDocFields, StructuredDocIndexer, StructuredDocIssueFields,
StructuredDocWebFields,
StructuredDocPullDocumentFields, StructuredDocWebFields,
},
};

Expand Down
5 changes: 3 additions & 2 deletions crates/tabby-index/src/structured_doc/public.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ use tabby_common::index::corpus;
use tabby_inference::Embedding;

pub use super::types::{
issue::IssueDocument as StructuredDocIssueFields, web::WebDocument as StructuredDocWebFields,
StructuredDoc, StructuredDocFields,
issue::IssueDocument as StructuredDocIssueFields,
pull::PullDocument as StructuredDocPullDocumentFields,
web::WebDocument as StructuredDocWebFields, StructuredDoc, StructuredDocFields,
};
use super::{create_structured_doc_builder, types::BuildStructuredDoc};
use crate::{indexer::TantivyDocBuilder, Indexer};
Expand Down
7 changes: 7 additions & 0 deletions crates/tabby-index/src/structured_doc/types.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pub mod issue;
pub mod pull;
pub mod web;

use std::sync::Arc;
Expand All @@ -21,13 +22,15 @@ impl StructuredDoc {
match &self.fields {
StructuredDocFields::Web(web) => &web.link,
StructuredDocFields::Issue(issue) => &issue.link,
StructuredDocFields::Pull(pull) => &pull.link,
}
}

pub fn kind(&self) -> &'static str {
match &self.fields {
StructuredDocFields::Web(_) => "web",
StructuredDocFields::Issue(_) => "issue",
StructuredDocFields::Pull(_) => "pull",
}
}
}
Expand Down Expand Up @@ -55,6 +58,7 @@ pub trait BuildStructuredDoc {
pub enum StructuredDocFields {
Web(web::WebDocument),
Issue(issue::IssueDocument),
Pull(pull::PullDocument),
}

#[async_trait]
Expand All @@ -63,13 +67,15 @@ impl BuildStructuredDoc for StructuredDoc {
match &self.fields {
StructuredDocFields::Web(doc) => doc.should_skip(),
StructuredDocFields::Issue(doc) => doc.should_skip(),
StructuredDocFields::Pull(doc) => doc.should_skip(),
}
}

async fn build_attributes(&self) -> serde_json::Value {
match &self.fields {
StructuredDocFields::Web(doc) => doc.build_attributes().await,
StructuredDocFields::Issue(doc) => doc.build_attributes().await,
StructuredDocFields::Pull(doc) => doc.build_attributes().await,
}
}

Expand All @@ -80,6 +86,7 @@ impl BuildStructuredDoc for StructuredDoc {
match &self.fields {
StructuredDocFields::Web(doc) => doc.build_chunk_attributes(embedding).await,
StructuredDocFields::Issue(doc) => doc.build_chunk_attributes(embedding).await,
StructuredDocFields::Pull(doc) => doc.build_chunk_attributes(embedding).await,
}
}
}
Expand Down
58 changes: 58 additions & 0 deletions crates/tabby-index/src/structured_doc/types/pull.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
use std::sync::Arc;

use async_stream::stream;
use async_trait::async_trait;
use futures::stream::BoxStream;
use serde_json::json;
use tabby_common::index::structured_doc::fields;
use tabby_inference::Embedding;
use tokio::task::JoinHandle;

use super::{build_tokens, BuildStructuredDoc};

pub struct PullDocument {
pub link: String,
pub title: String,
pub body: String,

/// The diff represents the code changes in this PR,
/// including metadata, affected line ranges, and added (+) or removed (-) lines.
/// For more details on the diff format, refer to:
/// https://git-scm.com/docs/diff-format#_combined_diff_format
pub diff: String,
pub merged: bool,
}

#[async_trait]
impl BuildStructuredDoc for PullDocument {
fn should_skip(&self) -> bool {
false
}

async fn build_attributes(&self) -> serde_json::Value {
json!({
fields::pull::LINK: self.link,
fields::pull::TITLE: self.title,
fields::pull::BODY: self.body,
fields::pull::DIFF: self.diff,
fields::pull::MERGED: self.merged,
})
}

async fn build_chunk_attributes(
&self,
embedding: Arc<dyn Embedding>,
) -> BoxStream<JoinHandle<(Vec<String>, serde_json::Value)>> {
// currently not indexing the diff
let text = format!("{}\n\n{}", self.title, self.body);
let s = stream! {
yield tokio::spawn(async move {
let tokens = build_tokens(embedding, &text).await;
let chunk_attributes = json!({});
(tokens, chunk_attributes)
})
};

Box::pin(s)
}
}
4 changes: 2 additions & 2 deletions ee/tabby-db/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ pub use server_setting::ServerSettingDAO;
use sqlx::{query, query_scalar, sqlite::SqliteQueryResult, Pool, Sqlite, SqlitePool};
pub use threads::{
ThreadDAO, ThreadMessageAttachmentClientCode, ThreadMessageAttachmentCode,
ThreadMessageAttachmentDoc, ThreadMessageAttachmentIssueDoc, ThreadMessageAttachmentWebDoc,
ThreadMessageDAO,
ThreadMessageAttachmentDoc, ThreadMessageAttachmentIssueDoc, ThreadMessageAttachmentPullDoc,
ThreadMessageAttachmentWebDoc, ThreadMessageDAO,
};
use tokio::sync::Mutex;
use user_completions::UserCompletionDailyStatsDAO;
Expand Down
10 changes: 10 additions & 0 deletions ee/tabby-db/src/threads.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ pub struct ThreadMessageDAO {
pub enum ThreadMessageAttachmentDoc {
Web(ThreadMessageAttachmentWebDoc),
Issue(ThreadMessageAttachmentIssueDoc),
Pull(ThreadMessageAttachmentPullDoc),
}

#[derive(Serialize, Deserialize)]
Expand All @@ -53,6 +54,15 @@ pub struct ThreadMessageAttachmentIssueDoc {
pub closed: bool,
}

#[derive(Serialize, Deserialize)]
pub struct ThreadMessageAttachmentPullDoc {
pub title: String,
pub link: String,
pub body: String,
pub diff: String,
pub merged: bool,
}

#[derive(Serialize, Deserialize)]
pub struct ThreadMessageAttachmentCode {
pub git_url: String,
Expand Down
10 changes: 9 additions & 1 deletion ee/tabby-schema/graphql/schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,14 @@ type MessageAttachmentIssueDoc {
closed: Boolean!
}

type MessageAttachmentPullDoc {
title: String!
link: String!
body: String!
patch: String!
merged: Boolean!
}

type MessageAttachmentWebDoc {
title: String!
link: String!
Expand Down Expand Up @@ -900,7 +908,7 @@ type WebContextSource implements ContextSourceId & ContextSource {
sourceName: String!
}

union MessageAttachmentDoc = MessageAttachmentWebDoc | MessageAttachmentIssueDoc
union MessageAttachmentDoc = MessageAttachmentWebDoc | MessageAttachmentIssueDoc | MessageAttachmentPullDoc

"""
Schema of thread run stream.
Expand Down
22 changes: 20 additions & 2 deletions ee/tabby-schema/src/dao.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ use lazy_static::lazy_static;
use tabby_db::{
EmailSettingDAO, IntegrationDAO, InvitationDAO, JobRunDAO, OAuthCredentialDAO,
ServerSettingDAO, ThreadDAO, ThreadMessageAttachmentClientCode, ThreadMessageAttachmentCode,
ThreadMessageAttachmentDoc, ThreadMessageAttachmentIssueDoc, ThreadMessageAttachmentWebDoc,
ThreadMessageDAO, UserEventDAO,
ThreadMessageAttachmentDoc, ThreadMessageAttachmentIssueDoc, ThreadMessageAttachmentPullDoc,
ThreadMessageAttachmentWebDoc, ThreadMessageDAO, UserEventDAO,
};

use crate::{
Expand Down Expand Up @@ -246,6 +246,15 @@ impl From<ThreadMessageAttachmentDoc> for thread::MessageAttachmentDoc {
closed: val.closed,
})
}
ThreadMessageAttachmentDoc::Pull(val) => {
thread::MessageAttachmentDoc::Pull(thread::MessageAttachmentPullDoc {
title: val.title,
link: val.link,
body: val.body,
patch: val.diff,
merged: val.merged,
})
}
}
}
}
Expand All @@ -268,6 +277,15 @@ impl From<&thread::MessageAttachmentDoc> for ThreadMessageAttachmentDoc {
closed: val.closed,
})
}
thread::MessageAttachmentDoc::Pull(val) => {
ThreadMessageAttachmentDoc::Pull(ThreadMessageAttachmentPullDoc {
title: val.title.clone(),
link: val.link.clone(),
body: val.body.clone(),
diff: val.patch.clone(),
merged: val.merged,
})
}
}
}
}
Expand Down
17 changes: 17 additions & 0 deletions ee/tabby-schema/src/schema/thread/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ impl From<CodeSearchHit> for MessageCodeSearchHit {
pub enum MessageAttachmentDoc {
Web(MessageAttachmentWebDoc),
Issue(MessageAttachmentIssueDoc),
Pull(MessageAttachmentPullDoc),
}

#[derive(GraphQLObject, Clone)]
Expand All @@ -142,6 +143,15 @@ pub struct MessageAttachmentIssueDoc {
pub closed: bool,
}

#[derive(GraphQLObject, Clone)]
pub struct MessageAttachmentPullDoc {
pub title: String,
pub link: String,
pub body: String,
pub patch: String,
pub merged: bool,
}

impl From<DocSearchDocument> for MessageAttachmentDoc {
fn from(doc: DocSearchDocument) -> Self {
match doc {
Expand All @@ -158,6 +168,13 @@ impl From<DocSearchDocument> for MessageAttachmentDoc {
closed: issue.closed,
})
}
DocSearchDocument::Pull(pull) => MessageAttachmentDoc::Pull(MessageAttachmentPullDoc {
title: pull.title,
link: pull.link,
body: pull.body,
patch: pull.diff,
merged: pull.merged,
}),
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion ee/tabby-webserver/src/service/answer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ impl AnswerService {
.map(|x| x.doc.clone().into())
.collect::<Vec<_>>();

debug!("doc content: {:?}", doc_query.content);
debug!("doc content: {:?}: {:?}", doc_query.content, attachment.doc.len());

if !attachment.doc.is_empty() {
let hits = hits.into_iter().map(|x| x.into()).collect::<Vec<_>>();
Expand Down Expand Up @@ -603,6 +603,7 @@ fn get_content(doc: &MessageAttachmentDoc) -> &str {
match doc {
MessageAttachmentDoc::Web(web) => &web.content,
MessageAttachmentDoc::Issue(issue) => &issue.body,
MessageAttachmentDoc::Pull(pull) => &pull.body,
}
}

Expand Down Expand Up @@ -711,6 +712,7 @@ mod tests {
match doc {
DocSearchDocument::Web(web_doc) => &web_doc.title,
DocSearchDocument::Issue(issue_doc) => &issue_doc.title,
DocSearchDocument::Pull(pull_doc) => &pull_doc.title,
}
}

Expand Down
Loading

0 comments on commit 97210ec

Please sign in to comment.