rework check and rename to proc (BloopAI#447)

Phoenix-Game · May 19, 2023 · b9e4ab7 · b9e4ab7
1 parent 7df6e0f
commit b9e4ab7
Show file tree

Hide file tree

Showing 3 changed files with 73 additions and 69 deletions.
diff --git a/server/bleep/src/webserver/answer.rs b/server/bleep/src/webserver/answer.rs
@@ -2,6 +2,7 @@ use std::{
     borrow::Cow,
     collections::{HashMap, HashSet},
     mem,
+    path::{Component, PathBuf},
     str::FromStr,
 };
 
@@ -282,30 +283,6 @@ impl Conversation {
                     .join("\n")
             }
 
-            Action::File(file_ref) => {
-                // Retrieve the contents of a file.
-
-                let path = match &file_ref {
-                    FileRef::Alias(idx) => self
-                        .path_aliases
-                        .get(*idx)
-                        .with_context(|| format!("unknown path alias {idx}"))?,
-
-                    FileRef::Path(p) => p,
-                };
-
-                update
-                    .send(Update::Step(SearchStep::File(path.clone())))
-                    .await?;
-
-                ctx.app
-                    .indexes
-                    .file
-                    .by_path(&self.repo_ref, path)
-                    .await?
-                    .content
-            }
-
             Action::Code(query) => {
                 // Semantic search.
 
@@ -347,8 +324,8 @@ impl Conversation {
                 serde_json::to_string(&chunks).unwrap()
             }
 
-            Action::Check(question, path_aliases) => {
-                self.check(ctx, update, question, path_aliases).await?
+            Action::Proc(question, path_aliases) => {
+                self.proc(ctx, update, question, path_aliases).await?
             }
         };
 
@@ -365,13 +342,34 @@ impl Conversation {
         Ok(Some(action_stream))
     }
 
-    async fn check(
+    async fn proc(
         &mut self,
         ctx: &AppContext,
         update: Sender<Update>,
         question: String,
         path_aliases: Vec<usize>,
     ) -> Result<String> {
+        // filesystem agnostic trivial path normalization
+        //
+        // - a//b -> a/b
+        // - a/./b -> a/b
+        // - a/b/../c -> a/c (regardless of whether this exists)
+        // - ../b/c -> None
+        fn normalize(path: PathBuf) -> Option<PathBuf> {
+            let mut stack = vec![];
+            for c in path.components() {
+                match c {
+                    Component::Normal(s) => stack.push(s),
+                    Component::ParentDir if stack.is_empty() => return None,
+                    Component::ParentDir => {
+                        _ = stack.pop();
+                    }
+                    _ => (),
+                }
+            }
+            Some(stack.iter().collect::<PathBuf>())
+        }
+
         let paths = path_aliases
             .into_iter()
             .map(|i| self.path_aliases.get(i).ok_or(i))
@@ -380,7 +378,7 @@ impl Conversation {
 
         for u in paths
             .iter()
-            .map(|&p| Update::Step(SearchStep::Check(p.clone())))
+            .map(|&p| Update::Step(SearchStep::Proc(p.clone())))
         {
             update.send(u).await?;
         }
@@ -415,22 +413,38 @@ impl Conversation {
                 .try_collect::<String>()
                 .await?;
 
+            #[derive(serde::Deserialize)]
+            struct ProcResult {
+                // list of paths relative to the currently processed file
+                dependencies: Vec<String>,
+                // list of relevant line ranges
+                lines: Vec<Range>,
+            }
+
             #[derive(serde::Deserialize)]
             struct Range {
                 start: usize,
                 end: usize,
-                answer: String,
             }
 
-            let explanations = serde_json::from_str::<Vec<Range>>(&json)?
-                .into_iter()
+            let proc_result = serde_json::from_str::<ProcResult>(&json)?;
+
+            // turn relative paths into absolute paths
+            let normalized_deps = proc_result
+                .dependencies
+                .iter()
+                .filter_map(|d| normalize(PathBuf::from(path).join(d)))
+                .collect::<Vec<_>>();
+
+            let explanations = proc_result
+                .lines
+                .iter()
                 .filter(|r| r.start > 0 && r.end > 0)
                 .map(|r| {
                     let end = r.end.min(r.start + 10);
 
                     serde_json::json!({
                         "start": r.start,
-                        "answer": r.answer,
                         "end": end,
                         "relevant_code": lines[r.start..end].join("\n"),
                     })
@@ -440,6 +454,7 @@ impl Conversation {
             Ok::<_, anyhow::Error>(serde_json::json!({
                 "explanations": explanations,
                 "path": path,
+                "relevant_dependencies": normalized_deps,
             }))
         });
 
@@ -589,15 +604,7 @@ enum Action {
     Path(String),
     Answer(String),
     Code(String),
-    File(FileRef),
-    Check(String, Vec<usize>),
-}
-
-#[derive(Debug, serde::Serialize, serde::Deserialize)]
-#[serde(untagged)]
-enum FileRef {
-    Path(String),
-    Alias(usize),
+    Proc(String, Vec<usize>),
 }
 
 impl Action {

diff --git a/server/bleep/src/webserver/answer/prompts.rs b/server/bleep/src/webserver/answer/prompts.rs
@@ -33,46 +33,44 @@ To list all files within a repo, leave the search terms blank.
 To find all files from a particular programming language, write a single file extension.
 To search for all files within a folder, write just the name of the folder.
 
-3. Read a file's contents
-["file",INT: §ALIAS]
-OR
-["file",STRING: PATH]
-Retrieve the contents of a single file.
-
-4. Check files for answer
-["check",STRING: QUESTION,INT[]: §ALIAS FOR EACH FILE]
-Check more than one file. Do not use this action if you are only checking one file.
+3. Process files to find answer
+["proc",STRING: PROCESS,INT[]: PATH ALIAS FOR EACH FILE]
+Process the files with the given aliases to find the answer to the question.
 Do not check the same file more than once.
+This will return a list of paths, relevant line ranges and relevant dependencies. You may wish to check the relevant dependencies.
+PROCESS should be a question, or detailed instruction of information to extract like:
+- find references to API
+- find react components
 
-5. State that you are ready to answer the question after absolutely all information has been gathered
+4. State that you are ready to answer the question after absolutely all information has been gathered
 ["answer",STRING: STANDALONE USER REQUEST]
 Signal that you are ready to answer the user's request. Do not write your response.
 Your STANDALONE USER REQUEST should be based on all of the previous conversation with the user.
 It should be possible to understand from this string alone what the user is asking for."#;
 
 pub fn file_explanation(question: &str, path: &str, code: &str) -> String {
     format!(
-        r#"Here's the contents of the code file {path} in <code> tags:
+        r#"Here's the contents of the code file /{path}:
+
+#####
 
-<code>
 {code}
-</code>
 
-The code is one file of many that can help answer a user query.
+#####
 
-The user's query is: {question}
+Your job is to perform the following tasks:
+1. Find out which other files and dependencies we should look at for information relevant to the query. You must answer with a json list of relevant paths, relative to the current file.
+2. Find all the relevant line ranges of code.
 
-Answer in the following JSON format, identifying any relevant line ranges:
-[{{
-"start":int,
-"end":int,
-"answer":[natural language description]
-}}]
+Q: find Kafka auth keys
+A: {{"dependencies":["../../utils/kafkaHandler","../src/config/index.ts"],"lines":[{{"start":12,"end":15}}]}}
 
-If the user's query cannot be answered by the file do not answer, instead reply with "0".
+Q: find where we submit payment requests
+A: {{"dependencies":["../paymentRequestProvider"],"lines":[{{"start":12,"end":15}}]}}
 
-Do not repeat the question in your answer.
-Do not make any assumptions, your answer should only refer to insights taken from the code."#
+ANSWER ONLY IN JSON
+Q: {question}
+A: "#
     )
 }
 
@@ -88,8 +86,8 @@ Your answer should be in the following JSON format: a list of objects, where eac
 1. citing a single file from the codebase (this object can appear multiple times, in the form of a JSON array)
 START LINE and END LINE should focus on the code mentioned in the COMMENT.
 
-[["cite",INT: §ALIAS, STRING: COMMENT, INT: START LINE, INT: END LINE],
-["cite",INT: §ALIAS, STRING: COMMENT, INT: START LINE, INT: END LINE]]
+[["cite",INT: §ALIAS,STRING: COMMENT,INT: START LINE,INT: END LINE],
+["cite",INT: §ALIAS,STRING: COMMENT,INT: START LINE,INT: END LINE]]
 
 2. write a new code file (this object can appear multiple times)
 Do not use this to demonstrate updating an existing file.

diff --git a/server/bleep/src/webserver/answer/response.rs b/server/bleep/src/webserver/answer/response.rs
@@ -65,8 +65,7 @@ pub enum SearchStep {
     Query(String),
     Path(String),
     Code(String),
-    Check(String),
-    File(String),
+    Proc(String),
     Prompt(String),
 }