From c4d8cfda83197c1574da52e0aa39c9b1557e8e7c Mon Sep 17 00:00:00 2001
From: Baitinq <manuelpalenzuelamerino@gmail.com>
Date: Sun, 30 Oct 2022 18:33:57 +0100
Subject: Crawler: Accept max_queue_size as an argument for crawler()

We also now set the max queue size to the max of the root url list or
the max_queue_size. This is useful because if someone changes the root
url list the crawler would previously hang if it had more entries than
the max_queue_size.
---
 crawler/src/main.rs | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/crawler/src/main.rs b/crawler/src/main.rs
index ce9943f..263f67d 100644
--- a/crawler/src/main.rs
+++ b/crawler/src/main.rs
@@ -11,17 +11,19 @@ async fn main() {
     let root_urls = include_str!("../top-1000-websites.txt");
     let root_urls = root_urls.split('\n').collect();
 
+    let max_queue_size = 2222;
+
     let http_client = reqwest::Client::new();
 
-    crawler(http_client, root_urls).await;
+    crawler(http_client, root_urls, max_queue_size).await;
 }
 
 //TODO: crawling depth? - async http client
-async fn crawler(http_client: Client, root_urls: Vec<&str>) {
+async fn crawler(http_client: Client, root_urls: Vec<&str>, max_queue_size: usize) {
     dbg!("Starting to crawl!");
 
     //add root urls to queue - TODO: max q size
-    let (tx_crawling_queue, rx_crawling_queue) = async_channel::bounded::<String>(2222);
+    let (tx_crawling_queue, rx_crawling_queue) = async_channel::bounded::<String>(std::cmp::max(max_queue_size, root_urls.len()));
     for url in root_urls {
         tx_crawling_queue.send(url.to_string()).await.unwrap();
     }
-- 
cgit 1.4.1