diff options
| author | Baitinq <[email protected]> | 2022-10-20 16:27:02 +0200 |
|---|---|---|
| committer | Baitinq <[email protected]> | 2022-10-20 16:27:02 +0200 |
| commit | 645ec232114a3149bf4d501550de54406e63b907 (patch) | |
| tree | 8482c4d6931c1d0f308ad4ed2ea23a4bcf4e4f01 /crawler/src/main.rs | |
| parent | Crawler: Add basic html parsing and link-following (diff) | |
| download | OSSE-645ec232114a3149bf4d501550de54406e63b907.tar.gz OSSE-645ec232114a3149bf4d501550de54406e63b907.tar.bz2 OSSE-645ec232114a3149bf4d501550de54406e63b907.zip | |
Crawler: Remove duplicate parsed urls
Diffstat (limited to '')
| -rw-r--r-- | crawler/src/main.rs | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/crawler/src/main.rs b/crawler/src/main.rs index 6067ac9..15abcaf 100644 --- a/crawler/src/main.rs +++ b/crawler/src/main.rs @@ -1,3 +1,5 @@ +use itertools::Itertools; + fn main() { println!("Hello, world! Im the crawler!"); @@ -59,6 +61,7 @@ fn crawl_url(url: &str) -> (String, Vec<String>) { let next_urls = document .select(&link_selector) .filter_map(|link| link.value().attr("href")) + .unique() .map(String::from) .collect(); |