From 645ec232114a3149bf4d501550de54406e63b907 Mon Sep 17 00:00:00 2001 From: Baitinq Date: Thu, 20 Oct 2022 16:27:02 +0200 Subject: Crawler: Remove duplicate parsed urls --- crawler/src/main.rs | 3 +++ 1 file changed, 3 insertions(+) (limited to 'crawler/src/main.rs') diff --git a/crawler/src/main.rs b/crawler/src/main.rs index 6067ac9..15abcaf 100644 --- a/crawler/src/main.rs +++ b/crawler/src/main.rs @@ -1,3 +1,5 @@ +use itertools::Itertools; + fn main() { println!("Hello, world! Im the crawler!"); @@ -59,6 +61,7 @@ fn crawl_url(url: &str) -> (String, Vec) { let next_urls = document .select(&link_selector) .filter_map(|link| link.value().attr("href")) + .unique() .map(String::from) .collect(); -- cgit 1.4.1