about summary refs log tree commit diff
path: root/crawler
diff options
context:
space:
mode:
Diffstat (limited to 'crawler')
-rw-r--r--crawler/Cargo.toml1
-rw-r--r--crawler/src/main.rs3
2 files changed, 4 insertions, 0 deletions
diff --git a/crawler/Cargo.toml b/crawler/Cargo.toml
index 3f03217..2779421 100644
--- a/crawler/Cargo.toml
+++ b/crawler/Cargo.toml
@@ -9,6 +9,7 @@ edition = "2021"
 blockingqueue = "0.1.1"
 reqwest = {version = "0.11", features = ["blocking"]}
 scraper = "0.12.0"
+itertools = "0.10.5"
 
 [[bin]]
 name = "crawler"
diff --git a/crawler/src/main.rs b/crawler/src/main.rs
index 6067ac9..15abcaf 100644
--- a/crawler/src/main.rs
+++ b/crawler/src/main.rs
@@ -1,3 +1,5 @@
+use itertools::Itertools;
+
 fn main() {
     println!("Hello, world! Im the crawler!");
 
@@ -59,6 +61,7 @@ fn crawl_url(url: &str) -> (String, Vec<String>) {
     let next_urls = document
         .select(&link_selector)
         .filter_map(|link| link.value().attr("href"))
+        .unique()
         .map(String::from)
         .collect();