Skip to content

Commit

Permalink
rework check and rename to proc (BloopAI#447)
Browse files Browse the repository at this point in the history
  • Loading branch information
oppiliappan authored and calyptobai committed May 19, 2023
1 parent 7df6e0f commit b9e4ab7
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 69 deletions.
89 changes: 48 additions & 41 deletions server/bleep/src/webserver/answer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::{
borrow::Cow,
collections::{HashMap, HashSet},
mem,
path::{Component, PathBuf},
str::FromStr,
};

Expand Down Expand Up @@ -282,30 +283,6 @@ impl Conversation {
.join("\n")
}

Action::File(file_ref) => {
// Retrieve the contents of a file.

let path = match &file_ref {
FileRef::Alias(idx) => self
.path_aliases
.get(*idx)
.with_context(|| format!("unknown path alias {idx}"))?,

FileRef::Path(p) => p,
};

update
.send(Update::Step(SearchStep::File(path.clone())))
.await?;

ctx.app
.indexes
.file
.by_path(&self.repo_ref, path)
.await?
.content
}

Action::Code(query) => {
// Semantic search.

Expand Down Expand Up @@ -347,8 +324,8 @@ impl Conversation {
serde_json::to_string(&chunks).unwrap()
}

Action::Check(question, path_aliases) => {
self.check(ctx, update, question, path_aliases).await?
Action::Proc(question, path_aliases) => {
self.proc(ctx, update, question, path_aliases).await?
}
};

Expand All @@ -365,13 +342,34 @@ impl Conversation {
Ok(Some(action_stream))
}

async fn check(
async fn proc(
&mut self,
ctx: &AppContext,
update: Sender<Update>,
question: String,
path_aliases: Vec<usize>,
) -> Result<String> {
// filesystem agnostic trivial path normalization
//
// - a//b -> a/b
// - a/./b -> a/b
// - a/b/../c -> a/c (regardless of whether this exists)
// - ../b/c -> None
fn normalize(path: PathBuf) -> Option<PathBuf> {
let mut stack = vec![];
for c in path.components() {
match c {
Component::Normal(s) => stack.push(s),
Component::ParentDir if stack.is_empty() => return None,
Component::ParentDir => {
_ = stack.pop();
}
_ => (),
}
}
Some(stack.iter().collect::<PathBuf>())
}

let paths = path_aliases
.into_iter()
.map(|i| self.path_aliases.get(i).ok_or(i))
Expand All @@ -380,7 +378,7 @@ impl Conversation {

for u in paths
.iter()
.map(|&p| Update::Step(SearchStep::Check(p.clone())))
.map(|&p| Update::Step(SearchStep::Proc(p.clone())))
{
update.send(u).await?;
}
Expand Down Expand Up @@ -415,22 +413,38 @@ impl Conversation {
.try_collect::<String>()
.await?;

#[derive(serde::Deserialize)]
struct ProcResult {
// list of paths relative to the currently processed file
dependencies: Vec<String>,
// list of relevant line ranges
lines: Vec<Range>,
}

#[derive(serde::Deserialize)]
struct Range {
start: usize,
end: usize,
answer: String,
}

let explanations = serde_json::from_str::<Vec<Range>>(&json)?
.into_iter()
let proc_result = serde_json::from_str::<ProcResult>(&json)?;

// turn relative paths into absolute paths
let normalized_deps = proc_result
.dependencies
.iter()
.filter_map(|d| normalize(PathBuf::from(path).join(d)))
.collect::<Vec<_>>();

let explanations = proc_result
.lines
.iter()
.filter(|r| r.start > 0 && r.end > 0)
.map(|r| {
let end = r.end.min(r.start + 10);

serde_json::json!({
"start": r.start,
"answer": r.answer,
"end": end,
"relevant_code": lines[r.start..end].join("\n"),
})
Expand All @@ -440,6 +454,7 @@ impl Conversation {
Ok::<_, anyhow::Error>(serde_json::json!({
"explanations": explanations,
"path": path,
"relevant_dependencies": normalized_deps,
}))
});

Expand Down Expand Up @@ -589,15 +604,7 @@ enum Action {
Path(String),
Answer(String),
Code(String),
File(FileRef),
Check(String, Vec<usize>),
}

#[derive(Debug, serde::Serialize, serde::Deserialize)]
#[serde(untagged)]
enum FileRef {
Path(String),
Alias(usize),
Proc(String, Vec<usize>),
}

impl Action {
Expand Down
50 changes: 24 additions & 26 deletions server/bleep/src/webserver/answer/prompts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,46 +33,44 @@ To list all files within a repo, leave the search terms blank.
To find all files from a particular programming language, write a single file extension.
To search for all files within a folder, write just the name of the folder.
3. Read a file's contents
["file",INT: §ALIAS]
OR
["file",STRING: PATH]
Retrieve the contents of a single file.
4. Check files for answer
["check",STRING: QUESTION,INT[]: §ALIAS FOR EACH FILE]
Check more than one file. Do not use this action if you are only checking one file.
3. Process files to find answer
["proc",STRING: PROCESS,INT[]: PATH ALIAS FOR EACH FILE]
Process the files with the given aliases to find the answer to the question.
Do not check the same file more than once.
This will return a list of paths, relevant line ranges and relevant dependencies. You may wish to check the relevant dependencies.
PROCESS should be a question, or detailed instruction of information to extract like:
- find references to API
- find react components
5. State that you are ready to answer the question after absolutely all information has been gathered
4. State that you are ready to answer the question after absolutely all information has been gathered
["answer",STRING: STANDALONE USER REQUEST]
Signal that you are ready to answer the user's request. Do not write your response.
Your STANDALONE USER REQUEST should be based on all of the previous conversation with the user.
It should be possible to understand from this string alone what the user is asking for."#;

pub fn file_explanation(question: &str, path: &str, code: &str) -> String {
format!(
r#"Here's the contents of the code file {path} in <code> tags:
r#"Here's the contents of the code file /{path}:
#####
<code>
{code}
</code>
The code is one file of many that can help answer a user query.
#####
The user's query is: {question}
Your job is to perform the following tasks:
1. Find out which other files and dependencies we should look at for information relevant to the query. You must answer with a json list of relevant paths, relative to the current file.
2. Find all the relevant line ranges of code.
Answer in the following JSON format, identifying any relevant line ranges:
[{{
"start":int,
"end":int,
"answer":[natural language description]
}}]
Q: find Kafka auth keys
A: {{"dependencies":["../../utils/kafkaHandler","../src/config/index.ts"],"lines":[{{"start":12,"end":15}}]}}
If the user's query cannot be answered by the file do not answer, instead reply with "0".
Q: find where we submit payment requests
A: {{"dependencies":["../paymentRequestProvider"],"lines":[{{"start":12,"end":15}}]}}
Do not repeat the question in your answer.
Do not make any assumptions, your answer should only refer to insights taken from the code."#
ANSWER ONLY IN JSON
Q: {question}
A: "#
)
}

Expand All @@ -88,8 +86,8 @@ Your answer should be in the following JSON format: a list of objects, where eac
1. citing a single file from the codebase (this object can appear multiple times, in the form of a JSON array)
START LINE and END LINE should focus on the code mentioned in the COMMENT.
[["cite",INT: §ALIAS, STRING: COMMENT, INT: START LINE, INT: END LINE],
["cite",INT: §ALIAS, STRING: COMMENT, INT: START LINE, INT: END LINE]]
[["cite",INT: §ALIAS,STRING: COMMENT,INT: START LINE,INT: END LINE],
["cite",INT: §ALIAS,STRING: COMMENT,INT: START LINE,INT: END LINE]]
2. write a new code file (this object can appear multiple times)
Do not use this to demonstrate updating an existing file.
Expand Down
3 changes: 1 addition & 2 deletions server/bleep/src/webserver/answer/response.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,7 @@ pub enum SearchStep {
Query(String),
Path(String),
Code(String),
Check(String),
File(String),
Proc(String),
Prompt(String),
}

Expand Down

0 comments on commit b9e4ab7

Please sign in to comment.