diff options
author | Baitinq <manuelpalenzuelamerino@gmail.com> | 2022-10-23 18:53:49 +0200 |
---|---|---|
committer | Baitinq <manuelpalenzuelamerino@gmail.com> | 2022-10-23 18:53:51 +0200 |
commit | 096ef9e2a2f21281f1b516b2de420f04df1db56e (patch) | |
tree | fea725991fe073b935a307a1a01d121b7ed30841 | |
parent | Crawler: Replace println! with dbg! (diff) | |
download | OSSE-096ef9e2a2f21281f1b516b2de420f04df1db56e.tar.gz OSSE-096ef9e2a2f21281f1b516b2de420f04df1db56e.tar.bz2 OSSE-096ef9e2a2f21281f1b516b2de420f04df1db56e.zip |
Crawler+Indexer: Rust cleanup
Getting more familiar with the language so fixed some non optimal into_iter() usage, unnecessary .clone()s and unnecessary hack when we could just get a &mut for inserting into the indexer url database.
-rw-r--r-- | crawler/src/main.rs | 5 | ||||
-rw-r--r-- | indexer/src/main.rs | 15 |
2 files changed, 6 insertions, 14 deletions
diff --git a/crawler/src/main.rs b/crawler/src/main.rs index 6161578..e8efe77 100644 --- a/crawler/src/main.rs +++ b/crawler/src/main.rs @@ -90,7 +90,7 @@ async fn crawl_url(http_client: &Client, url: &str) -> Result<(String, Vec<Strin //we need to not append http if already has it let fixup_urls = |us: Vec<String>| { - us.into_iter() + us.iter() .map(|u| { //https://stackoverflow.com/questions/9646407/two-forward-slashes-in-a-url-src-href-attribute if u.starts_with("//") { @@ -98,14 +98,13 @@ async fn crawl_url(http_client: &Client, url: &str) -> Result<(String, Vec<Strin } else if u.starts_with('/') { format!("{}{}", &url, &u) } else { - u + u.to_string() } }) .collect() }; let next_urls = fixup_urls(next_urls); - //limit to 2 or smth for ram? or depth //normalise words somewhere //fuzzy? //probs lots of places where we can borrow or not do stupid stuff diff --git a/indexer/src/main.rs b/indexer/src/main.rs index 17c71ec..43a5f7f 100644 --- a/indexer/src/main.rs +++ b/indexer/src/main.rs @@ -55,16 +55,10 @@ async fn add_resource(data: web::Data<AppState>, resource: web::Json<Resource>) let mut database = data.database.lock().unwrap(); for word in fixed_words { //should probs do some priority - let maybe_urls = database.get(&word); + let maybe_urls = database.get_mut(&word); match maybe_urls { - Some(urls) => { - let mut updated_urls = urls.clone(); - updated_urls.insert(resource.url.clone()); - database.insert(word, updated_urls); - } - None => { - database.insert(word.clone(), HashSet::from([resource.url.clone()])); - } + Some(urls) => _ = urls.insert(resource.url.clone()), + None => _ = database.insert(word, HashSet::from([resource.url.clone()])), } } @@ -93,8 +87,7 @@ async fn greet(data: web::Data<AppState>, term: web::Path<String>) -> impl Respo .intersection(&results) .map(|s| s.to_owned()) .collect(); - let set: HashSet<String> = HashSet::from_iter(intersection); - valid_results = Some(set); + valid_results = Some(HashSet::from_iter(intersection)); } } } |