diff options
author | Baitinq <manuelpalenzuelamerino@gmail.com> | 2022-10-28 20:41:37 +0200 |
---|---|---|
committer | Baitinq <manuelpalenzuelamerino@gmail.com> | 2022-10-29 00:17:14 +0200 |
commit | d1df551df660484b7de2a05e764b76a0b2bdd03b (patch) | |
tree | 750684cdd9377bd9a8a817f6c57295fd150e3af8 | |
parent | Frontend: Show results in reverse order with priority (diff) | |
download | OSSE-d1df551df660484b7de2a05e764b76a0b2bdd03b.tar.gz OSSE-d1df551df660484b7de2a05e764b76a0b2bdd03b.tar.bz2 OSSE-d1df551df660484b7de2a05e764b76a0b2bdd03b.zip |
Indexer: Implement basic priority calculation of words in a site
We just calculate priority to be the number of occurences of the word in the site. This is very basic and should be changed:))
-rw-r--r-- | indexer/src/main.rs | 13 |
1 files changed, 6 insertions, 7 deletions
diff --git a/indexer/src/main.rs b/indexer/src/main.rs index 36eabff..37a7256 100644 --- a/indexer/src/main.rs +++ b/indexer/src/main.rs @@ -1,6 +1,5 @@ use actix_cors::Cors; use actix_web::{get, post, web, App, HttpServer, Responder}; -use rand::Rng; use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet}; use std::hash::{Hash, Hasher}; @@ -101,16 +100,16 @@ async fn add_resource(data: web::Data<AppState>, resource: web::Json<Resource>) //and for each changed content word we add it to the db (word -> list.append(url)) let mut database = data.database.lock().unwrap(); - for word in fixed_words { + for word in &fixed_words { let resource_to_add = CrawledResource { url: resource.url.clone(), - priority: calculate_word_priority(&word, resource.content.as_str()), + priority: calculate_word_priority(word, resource.content.as_str(), &fixed_words), word: Arc::new(word.clone()), title: page_title.clone(), description: page_description.clone(), }; - match database.get_mut(&word) { + match database.get_mut(word) { Some(resources) => _ = resources.insert(resource_to_add), None => _ = database.insert(word.clone(), HashSet::from([resource_to_add])), } @@ -163,7 +162,7 @@ fn search_word_in_db( db.get(&word) } -//TODO! -fn calculate_word_priority(_word: &str, _html_site: &str) -> u32 { - rand::thread_rng().gen::<u32>() +fn calculate_word_priority(word: &str, _html_site: &str, words: &[String]) -> u32 { + //atm priority is just the number of occurences in the site. + words.iter().filter(|w| *w == word).count() as u32 } |