about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock16
-rw-r--r--crawler/Cargo.toml1
-rw-r--r--crawler/src/main.rs3
3 files changed, 20 insertions, 0 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 7dfc67a..91830a3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -87,6 +87,7 @@ name = "crawler"
 version = "0.1.0"
 dependencies = [
  "blockingqueue",
+ "itertools",
  "reqwest",
  "scraper",
 ]
@@ -153,6 +154,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591"
 
 [[package]]
+name = "either"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797"
+
+[[package]]
 name = "encoding_rs"
 version = "0.8.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -457,6 +464,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "879d54834c8c76457ef4293a689b2a8c59b076067ad77b15efafbb05f92a592b"
 
 [[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
+
+[[package]]
 name = "itoa"
 version = "0.4.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/crawler/Cargo.toml b/crawler/Cargo.toml
index 3f03217..2779421 100644
--- a/crawler/Cargo.toml
+++ b/crawler/Cargo.toml
@@ -9,6 +9,7 @@ edition = "2021"
 blockingqueue = "0.1.1"
 reqwest = {version = "0.11", features = ["blocking"]}
 scraper = "0.12.0"
+itertools = "0.10.5"
 
 [[bin]]
 name = "crawler"
diff --git a/crawler/src/main.rs b/crawler/src/main.rs
index 6067ac9..15abcaf 100644
--- a/crawler/src/main.rs
+++ b/crawler/src/main.rs
@@ -1,3 +1,5 @@
+use itertools::Itertools;
+
 fn main() {
     println!("Hello, world! Im the crawler!");
 
@@ -59,6 +61,7 @@ fn crawl_url(url: &str) -> (String, Vec<String>) {
     let next_urls = document
         .select(&link_selector)
         .filter_map(|link| link.value().attr("href"))
+        .unique()
         .map(String::from)
         .collect();