Skip to content

Commit

Permalink
chore(scheduler): ignore katana request without status code (TabbyML#…
Browse files Browse the repository at this point in the history
  • Loading branch information
wsxiaoys authored Jun 23, 2024
1 parent cb90912 commit 141c209
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
8 changes: 4 additions & 4 deletions crates/tabby-scheduler/src/crawl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ async fn crawl_url(start_url: &str) -> anyhow::Result<impl Stream<Item = KatanaR
};

// Skip if the status code is not 200
if data.response.status_code != 200 {
if data.response.status_code != Some(200) {
continue;
}

Expand Down Expand Up @@ -76,7 +76,7 @@ fn to_document(data: KatanaRequestResponse) -> Option<CrawledDocument> {
let (html, metadata) = {
let (node, metadata) = Readability::new()
.base_url(Url::parse(&data.request.endpoint).ok()?)
.parse(&data.response.body);
.parse(&data.response.body?);

let mut html_bytes = vec![];
node.serialize(&mut html_bytes).ok()?;
Expand Down Expand Up @@ -134,9 +134,9 @@ mod tests {
raw: "GET / HTTP/1.1\nHost: example.com\n".to_owned(),
},
response: types::KatanaResponse {
status_code: 200,
status_code: Some(200),
headers,
body: "<p>Hello, World!</p>".to_owned(),
body: Some("<p>Hello, World!</p>".to_owned()),
technologies: Default::default(),
raw: "HTTP/1.1 200 OK\nContent-Type: text/html\n".to_owned(),
},
Expand Down
4 changes: 2 additions & 2 deletions crates/tabby-scheduler/src/crawl/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ pub struct KatanaRequest {

#[derive(Deserialize, Debug)]
pub struct KatanaResponse {
pub status_code: u16,
pub status_code: Option<u16>,
pub headers: HashMap<String, String>,
pub body: String,
pub body: Option<String>,
pub technologies: Vec<String>,
pub raw: String,
}
Expand Down

0 comments on commit 141c209

Please sign in to comment.