about summary refs log tree commit diff
diff options
context:
space:
mode:
authorBaitinq <manuelpalenzuelamerino@gmail.com>2022-10-30 18:33:57 +0100
committerBaitinq <manuelpalenzuelamerino@gmail.com>2022-10-30 18:34:01 +0100
commitc4d8cfda83197c1574da52e0aa39c9b1557e8e7c (patch)
tree1cfb2fecfc17950d1639577037d05519565e1d5d
parentFrontend: Change navbar links (diff)
downloadOSSE-c4d8cfda83197c1574da52e0aa39c9b1557e8e7c.tar.gz
OSSE-c4d8cfda83197c1574da52e0aa39c9b1557e8e7c.tar.bz2
OSSE-c4d8cfda83197c1574da52e0aa39c9b1557e8e7c.zip
Crawler: Accept max_queue_size as an argument for crawler()
We also now set the max queue size to the max of the root url list or
the max_queue_size. This is useful because if someone changes the root
url list the crawler would previously hang if it had more entries than
the max_queue_size.
-rw-r--r--crawler/src/main.rs8
1 files changed, 5 insertions, 3 deletions
diff --git a/crawler/src/main.rs b/crawler/src/main.rs
index ce9943f..263f67d 100644
--- a/crawler/src/main.rs
+++ b/crawler/src/main.rs
@@ -11,17 +11,19 @@ async fn main() {
     let root_urls = include_str!("../top-1000-websites.txt");
     let root_urls = root_urls.split('\n').collect();
 
+    let max_queue_size = 2222;
+
     let http_client = reqwest::Client::new();
 
-    crawler(http_client, root_urls).await;
+    crawler(http_client, root_urls, max_queue_size).await;
 }
 
 //TODO: crawling depth? - async http client
-async fn crawler(http_client: Client, root_urls: Vec<&str>) {
+async fn crawler(http_client: Client, root_urls: Vec<&str>, max_queue_size: usize) {
     dbg!("Starting to crawl!");
 
     //add root urls to queue - TODO: max q size
-    let (tx_crawling_queue, rx_crawling_queue) = async_channel::bounded::<String>(2222);
+    let (tx_crawling_queue, rx_crawling_queue) = async_channel::bounded::<String>(std::cmp::max(max_queue_size, root_urls.len()));
     for url in root_urls {
         tx_crawling_queue.send(url.to_string()).await.unwrap();
     }