From a87ccbb43b99432d7468ff7e294cdfb23a48861f Mon Sep 17 00:00:00 2001 From: Baitinq Date: Tue, 25 Oct 2022 01:41:32 +0200 Subject: Crawler: Shuffle crawled urls --- Cargo.lock | 1 + crawler/Cargo.toml | 3 ++- crawler/src/main.rs | 5 ++--- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9282a8a..c10b6ae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -414,6 +414,7 @@ version = "0.1.0" dependencies = [ "async-channel", "itertools", + "rand 0.7.3", "reqwest", "scraper", "serde", diff --git a/crawler/Cargo.toml b/crawler/Cargo.toml index 2b93f53..16d7b41 100644 --- a/crawler/Cargo.toml +++ b/crawler/Cargo.toml @@ -12,7 +12,8 @@ itertools = "0.10.5" serde = { version = "1.0", features = ["derive"] } tokio = { version = "0.2.22", features = ["full"] } async-channel = "1.7.1" -url = "*" +url = "2.3.1" +rand = "0.7.3" [[bin]] name = "crawler" diff --git a/crawler/src/main.rs b/crawler/src/main.rs index 908a2c1..72c3e4d 100644 --- a/crawler/src/main.rs +++ b/crawler/src/main.rs @@ -1,4 +1,5 @@ use itertools::Itertools; +use rand::seq::IteratorRandom; use reqwest::blocking::{Client, Response}; use serde::Serialize; use url::Url; @@ -104,10 +105,8 @@ async fn crawl_url(http_client: &Client, url: &str) -> Result<(String, Vec