Skip to content

Commit

Permalink
Use 0-based indexes for line offsets in code chunks (BloopAI#836)
Browse files Browse the repository at this point in the history
* Use 0-based indexes for line offsets in code chunks

* use 0-based lines when communicating with the server

* Adjust line number indexing when encoding/decoding

* Adjust offsets for /explain query synthesis

* Fix more off-by-one errors

* Transcode index base for linked line ranges

* fix lines in user message for explaining lines

* fixes after rebase

* Store and read code chunks with exclusive end range

This fixes a bug where `proc` and `code` differed; `code` would store
code chunks with an open-close (`[start,end)`) range, while `proc` would
store code chunks with an open-open range (`[start,end]`). Now, we only
store `[start,end)` in order to be more consistent with the default rust
`Range` type.

* Fix link offset loop

Previously, the logic to check child elements was using `.any`, and thus
short-circuiting. Now we use a combination of `map` and `fold`, which
exhausts the `.children()` iterator properly.

* Fix offsets for single-line links

Now, links like `[foo](foo.rs#L1)` are correctly updated. Note the lack
of an end range, like `#L1-L2`.

---------

Co-authored-by: anastasiia <[email protected]>
  • Loading branch information
calyptobai and anastasiya1155 authored Aug 14, 2023
1 parent 0b357fd commit 6c18236
Show file tree
Hide file tree
Showing 10 changed files with 175 additions and 51 deletions.
2 changes: 1 addition & 1 deletion client/src/components/Chat/AllCoversations/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ const AllConversations = ({
title={c.title}
subtitle={format(
new Date(c.created_at * 1000),
'EEEE, MMMM d, h:m a',
'EEEE, MMMM d, HH:mm',
getDateFnsLocale(locale),
)}
onClick={() => onClick(c.thread_id)}
Expand Down
4 changes: 2 additions & 2 deletions client/src/components/Chat/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,8 @@ const Chat = () => {
userQuery = t(
`Explain lines {{lineStart}} - {{lineEnd}} in {{filePath}}`,
{
lineStart: Number(lineStart),
lineEnd: Number(lineEnd),
lineStart: Number(lineStart) + 1,
lineEnd: Number(lineEnd) + 1,
filePath,
},
);
Expand Down
10 changes: 5 additions & 5 deletions client/src/components/CodeBlock/CodeFull/ExplainButton.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -121,14 +121,14 @@ const ExplainButton = ({
setConversation([]);
setThreadId('');
setSelectedLines([
currentSelection[0]![0] + 1,
currentSelection[1]![0] + 1,
currentSelection[0]![0],
currentSelection[1]![0],
]);
setRightPanelOpen(false);
setSubmittedQuery(
`#explain_${relativePath}:${
currentSelection[0]![0] + 1
}-${currentSelection[1]![0] + 1}-${Date.now()}`,
`#explain_${relativePath}:${currentSelection[0]![0]}-${
currentSelection[1]![0]
}-${Date.now()}`,
);
setChatOpen(true);
setPopupPosition(null);
Expand Down
6 changes: 3 additions & 3 deletions client/src/components/MarkdownWithCode/CodeRenderer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,12 @@ const CodeRenderer = ({
);

const linesToUse: [number, number] | undefined = useMemo(
() => [lines[0] - 1, (lines[1] ?? lines[0]) - 1],
() => [lines[0], lines[1] ?? lines[0]],
[lines],
);

const handleChipClick = useCallback(() => {
updateScrollToIndex(`${lines[0] - 1}_${(lines[1] ?? lines[0]) - 1}`);
updateScrollToIndex(`${lines[0]}_${lines[1] ?? lines[0]}`);
}, [updateScrollToIndex, lines]);

return (
Expand All @@ -116,7 +116,7 @@ const CodeRenderer = ({
language={matchLang?.[1] || ''}
filePath={matchPath?.[1] || ''}
onResultClick={openFileModal}
startLine={lines[0] ? lines[0] - 1 : null}
startLine={lines[0] ? lines[0] : null}
repoName={repoName}
/>
)
Expand Down
9 changes: 3 additions & 6 deletions client/src/components/MarkdownWithCode/LinkRenderer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,13 @@ const LinkRenderer = ({
}, [children]);

const linesToUse: [number, number] | undefined = useMemo(() => {
return hideCode && start ? [start - 1, (end ?? start) - 1] : undefined;
return hideCode && start ? [start, end ?? start] : undefined;
}, [hideCode, start, end]);

const handleClickFile = useCallback(() => {
hideCode
? updateScrollToIndex(`${start - 1}_${(end ?? start) - 1}`)
: openFileModal(
filePath,
start ? `${start - 1}_${(end ?? start) - 1}` : undefined,
);
? updateScrollToIndex(`${start}_${end ?? start}`)
: openFileModal(filePath, start ? `${start}_${end ?? start}` : undefined);
}, [hideCode, updateScrollToIndex, start, end, filePath]);

const handleClickFolder = useCallback(() => {
Expand Down
16 changes: 6 additions & 10 deletions server/bleep/src/agent/tools/answer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ impl Agent {
.snippet
.lines()
.enumerate()
.map(|(i, line)| format!("{} {line}\n", i + chunk.start_line))
.map(|(i, line)| format!("{} {line}\n", i + chunk.start_line + 1))
.collect::<String>();

let formatted_snippet = format!("### {} ###\n{snippet}\n\n", chunk.path);
Expand Down Expand Up @@ -234,6 +234,7 @@ impl Agent {
let context_size = tiktoken_rs::model::get_context_size(gpt_model);
let max_tokens = (context_size as f32 * CONTEXT_CODE_RATIO) as usize;

// Note: The end line number here is *not* inclusive.
let mut spans_by_path = HashMap::<_, Vec<_>>::new();
for c in self.code_chunks().filter(|c| aliases.contains(&c.alias)) {
spans_by_path
Expand Down Expand Up @@ -279,8 +280,7 @@ impl Agent {
.iter()
.flat_map(|(path, spans)| spans.iter().map(move |s| (path, s)))
.map(|(path, span)| {
let range = span.start.saturating_sub(1)..span.end.saturating_sub(1);
let snippet = lines_by_file.get(path).unwrap()[range].join("\n");
let snippet = lines_by_file.get(path).unwrap()[span.clone()].join("\n");
bpe.encode_ordinary(&snippet).len()
})
.sum::<usize>();
Expand All @@ -305,12 +305,9 @@ impl Agent {

let old_span = span.clone();

// Decrease the start line, but make sure that we don't end up with 0, as our lines
// are 1-based.
span.start = span.start.saturating_sub(range_step).max(1);
span.start = span.start.saturating_sub(range_step);

// Expand the end line forwards, capping at the total number of lines (NB: this is
// also 1-based).
// Expand the end line forwards, capping at the total number of lines.
span.end += range_step;
span.end = span.end.min(file_lines);

Expand Down Expand Up @@ -349,8 +346,7 @@ impl Agent {
.into_iter()
.flat_map(|(path, spans)| spans.into_iter().map(move |s| (path.clone(), s)))
.map(|(path, span)| {
let range = span.start.saturating_sub(1)..span.end.saturating_sub(1);
let snippet = lines_by_file.get(&path).unwrap()[range].join("\n");
let snippet = lines_by_file.get(&path).unwrap()[span.clone()].join("\n");

CodeChunk {
alias: self.get_path_alias(&path),
Expand Down
4 changes: 2 additions & 2 deletions server/bleep/src/agent/tools/code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ impl Agent {
path: relative_path.clone(),
alias: self.get_path_alias(&relative_path),
snippet: chunk.text,
start_line: (chunk.start_line as usize).saturating_add(1),
end_line: (chunk.end_line as usize).saturating_add(1),
start_line: chunk.start_line as usize,
end_line: chunk.end_line as usize,
}
})
.collect::<Vec<_>>();
Expand Down
9 changes: 7 additions & 2 deletions server/bleep/src/agent/tools/proc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ impl Agent {
.unwrap()
.0
.parse::<usize>()
.unwrap();
.unwrap()
- 1;

// We store the lines separately, so that we can reference them later to trim
// this snippet by line number.
Expand Down Expand Up @@ -121,6 +122,10 @@ impl Agent {
r.end = r.end.min(r.start + MAX_CHUNK_LINE_LENGTH); // Cap relevant chunk size by line number
r
})
.map(|r| Range {
start: r.start - 1,
end: r.end,
})
.collect();

line_ranges.sort();
Expand All @@ -146,7 +151,7 @@ impl Agent {
code: lines
.get(
range.start.saturating_sub(start_line)
..range.end.saturating_sub(start_line),
..=range.end.saturating_sub(start_line),
)?
.iter()
.map(|line| line.split_once(' ').unwrap().1)
Expand Down
Loading

0 comments on commit 6c18236

Please sign in to comment.