Indexer: Implement basic priority calculation of words in a site

We just calculate priority to be the number of occurences of the word in the site. This is very basic and should be changed:))
author: Baitinq <manuelpalenzuelamerino@gmail.com> 2022-10-28 20:41:37 +0200
committer: Baitinq <manuelpalenzuelamerino@gmail.com> 2022-10-29 00:17:14 +0200
commit: d1df551df660484b7de2a05e764b76a0b2bdd03b (patch)
tree: 750684cdd9377bd9a8a817f6c57295fd150e3af8
parent: Frontend: Show results in reverse order with priority (diff)
download: OSSE-d1df551df660484b7de2a05e764b76a0b2bdd03b.tar.gz
OSSE-d1df551df660484b7de2a05e764b76a0b2bdd03b.tar.bz2
OSSE-d1df551df660484b7de2a05e764b76a0b2bdd03b.zip
1 files changed, 6 insertions, 7 deletions
diff --git a/indexer/src/main.rs b/indexer/src/main.rs
index 36eabff..37a7256 100644
--- a/indexer/src/main.rs
+++ b/indexer/src/main.rs
@@ -1,6 +1,5 @@
 use actix_cors::Cors;
 use actix_web::{get, post, web, App, HttpServer, Responder};
-use rand::Rng;
 use serde::{Deserialize, Serialize};
 use std::collections::{HashMap, HashSet};
 use std::hash::{Hash, Hasher};
@@ -101,16 +100,16 @@ async fn add_resource(data: web::Data<AppState>, resource: web::Json<Resource>)
 
     //and for each changed content word we add it to the db (word -> list.append(url))
     let mut database = data.database.lock().unwrap();
-    for word in fixed_words {
+    for word in &fixed_words {
         let resource_to_add = CrawledResource {
             url: resource.url.clone(),
-            priority: calculate_word_priority(&word, resource.content.as_str()),
+            priority: calculate_word_priority(word, resource.content.as_str(), &fixed_words),
             word: Arc::new(word.clone()),
             title: page_title.clone(),
             description: page_description.clone(),
         };
 
-        match database.get_mut(&word) {
+        match database.get_mut(word) {
             Some(resources) => _ = resources.insert(resource_to_add),
             None => _ = database.insert(word.clone(), HashSet::from([resource_to_add])),
         }
@@ -163,7 +162,7 @@ fn search_word_in_db(
     db.get(&word)
 }
 
-//TODO!
-fn calculate_word_priority(_word: &str, _html_site: &str) -> u32 {
-    rand::thread_rng().gen::<u32>()
+fn calculate_word_priority(word: &str, _html_site: &str, words: &[String]) -> u32 {
+    //atm priority is just the number of occurences in the site.
+    words.iter().filter(|w| *w == word).count() as u32
 }
author	Baitinq <manuelpalenzuelamerino@gmail.com>	2022-10-28 20:41:37 +0200
committer	Baitinq <manuelpalenzuelamerino@gmail.com>	2022-10-29 00:17:14 +0200
commit	d1df551df660484b7de2a05e764b76a0b2bdd03b (patch)
tree	750684cdd9377bd9a8a817f6c57295fd150e3af8
parent	Frontend: Show results in reverse order with priority (diff)
download	OSSE-d1df551df660484b7de2a05e764b76a0b2bdd03b.tar.gz OSSE-d1df551df660484b7de2a05e764b76a0b2bdd03b.tar.bz2 OSSE-d1df551df660484b7de2a05e764b76a0b2bdd03b.zip