From 6be7f36a0d44f3007ec9ede828c44168eac1054e Mon Sep 17 00:00:00 2001 From: Baitinq Date: Sun, 30 Oct 2022 19:22:31 +0100 Subject: Crawler: Set 4 as the maximum "crawl depth" Its not really crawl depth as we just count the path segments. --- crawler/src/main.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crawler/src/main.rs b/crawler/src/main.rs index 263f67d..d7a19a4 100644 --- a/crawler/src/main.rs +++ b/crawler/src/main.rs @@ -99,6 +99,7 @@ async fn crawl_url(http_client: &Client, url: &str) -> Result<(String, Vec false, u if u.fragment().is_some() => false, //no # urls u if u.query().is_some() => false, //no ? urls + u if u.path_segments().is_some() && u.path_segments().unwrap().count() > 4 => false, // max "crawling depth" is 4 u if *u == url => false, //no same url _ => true, }; -- cgit 1.4.1