From d1df551df660484b7de2a05e764b76a0b2bdd03b Mon Sep 17 00:00:00 2001 From: Baitinq Date: Fri, 28 Oct 2022 20:41:37 +0200 Subject: Indexer: Implement basic priority calculation of words in a site We just calculate priority to be the number of occurences of the word in the site. This is very basic and should be changed:)) --- indexer/src/main.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/indexer/src/main.rs b/indexer/src/main.rs index 36eabff..37a7256 100644 --- a/indexer/src/main.rs +++ b/indexer/src/main.rs @@ -1,6 +1,5 @@ use actix_cors::Cors; use actix_web::{get, post, web, App, HttpServer, Responder}; -use rand::Rng; use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet}; use std::hash::{Hash, Hasher}; @@ -101,16 +100,16 @@ async fn add_resource(data: web::Data, resource: web::Json) //and for each changed content word we add it to the db (word -> list.append(url)) let mut database = data.database.lock().unwrap(); - for word in fixed_words { + for word in &fixed_words { let resource_to_add = CrawledResource { url: resource.url.clone(), - priority: calculate_word_priority(&word, resource.content.as_str()), + priority: calculate_word_priority(word, resource.content.as_str(), &fixed_words), word: Arc::new(word.clone()), title: page_title.clone(), description: page_description.clone(), }; - match database.get_mut(&word) { + match database.get_mut(word) { Some(resources) => _ = resources.insert(resource_to_add), None => _ = database.insert(word.clone(), HashSet::from([resource_to_add])), } @@ -163,7 +162,7 @@ fn search_word_in_db( db.get(&word) } -//TODO! -fn calculate_word_priority(_word: &str, _html_site: &str) -> u32 { - rand::thread_rng().gen::() +fn calculate_word_priority(word: &str, _html_site: &str, words: &[String]) -> u32 { + //atm priority is just the number of occurences in the site. + words.iter().filter(|w| *w == word).count() as u32 } -- cgit 1.4.1