diff options
Diffstat (limited to 'crawler')
-rw-r--r-- | crawler/Cargo.toml | 1 | ||||
-rw-r--r-- | crawler/src/main.rs | 3 |
2 files changed, 4 insertions, 0 deletions
diff --git a/crawler/Cargo.toml b/crawler/Cargo.toml index 3f03217..2779421 100644 --- a/crawler/Cargo.toml +++ b/crawler/Cargo.toml @@ -9,6 +9,7 @@ edition = "2021" blockingqueue = "0.1.1" reqwest = {version = "0.11", features = ["blocking"]} scraper = "0.12.0" +itertools = "0.10.5" [[bin]] name = "crawler" diff --git a/crawler/src/main.rs b/crawler/src/main.rs index 6067ac9..15abcaf 100644 --- a/crawler/src/main.rs +++ b/crawler/src/main.rs @@ -1,3 +1,5 @@ +use itertools::Itertools; + fn main() { println!("Hello, world! Im the crawler!"); @@ -59,6 +61,7 @@ fn crawl_url(url: &str) -> (String, Vec<String>) { let next_urls = document .select(&link_selector) .filter_map(|link| link.value().attr("href")) + .unique() .map(String::from) .collect(); |