about summary refs log tree commit diff
path: root/crawler
diff options
context:
space:
mode:
Diffstat (limited to 'crawler')
-rw-r--r--crawler/src/main.rs6
1 files changed, 6 insertions, 0 deletions
diff --git a/crawler/src/main.rs b/crawler/src/main.rs
index f8dc226..d1333fe 100644
--- a/crawler/src/main.rs
+++ b/crawler/src/main.rs
@@ -86,9 +86,11 @@ async fn crawl_url(http_client: &Client, url: &str) -> Result<(String, Vec<Strin
         .select(&link_selector)
         .filter_map(|link| link.value().attr("href"))
         .unique()
+        .take(2)
         .map(String::from)
         .collect();
 
+    //we need to not append http if already has it
     let fixup_urls = |us: Vec<String>| {
         us.into_iter()
             .map(|u| {
@@ -105,6 +107,10 @@ async fn crawl_url(http_client: &Client, url: &str) -> Result<(String, Vec<Strin
     };
 
     let next_urls = fixup_urls(next_urls);
+    //limit to 2 or smth for ram? or depth
+    //normalise words somewhere
+    //fuzzy?
+    //probs lots of places where we can borrow or not do stupid stuff
 
     Ok((response_text, next_urls))
 }