about summary refs log tree commit diff
path: root/indexer/src
diff options
context:
space:
mode:
Diffstat (limited to 'indexer/src')
-rw-r--r--indexer/src/indexer_implementation.rs10
-rw-r--r--indexer/src/main.rs28
2 files changed, 28 insertions, 10 deletions
diff --git a/indexer/src/indexer_implementation.rs b/indexer/src/indexer_implementation.rs
index e3f0495..f24c2bd 100644
--- a/indexer/src/indexer_implementation.rs
+++ b/indexer/src/indexer_implementation.rs
@@ -30,8 +30,9 @@ impl crate::Indexer for IndexerImplementation {
         &mut self,
         words: &[String],
         url: &str,
-        title: Option<String>,
-        description: Option<String>,
+        title: &Option<String>,
+        description: &Option<String>,
+        language: &Option<String>,
         content: &str,
     ) -> Result<(), String> {
         for word in words {
@@ -39,8 +40,9 @@ impl crate::Indexer for IndexerImplementation {
                 url: url.to_string(),
                 priority: Self::calculate_word_priority(word, content, words),
                 word: Arc::new(word.to_string()),
-                title: title.as_ref().map(String::from),
-                description: description.as_ref().map(String::from),
+                title: title.clone(),
+                description: description.clone(),
+                language: language.clone(),
             };
 
             match self.database.get_mut(word) {
diff --git a/indexer/src/main.rs b/indexer/src/main.rs
index dcb4b9a..6e41cfb 100644
--- a/indexer/src/main.rs
+++ b/indexer/src/main.rs
@@ -14,8 +14,9 @@ pub trait Indexer {
         &mut self,
         words: &[String],
         url: &str,
-        title: Option<String>,
-        description: Option<String>,
+        title: &Option<String>,
+        description: &Option<String>,
+        language: &Option<String>,
         content: &str,
     ) -> Result<(), String>;
     fn search(&self, term: &str) -> Result<HashSet<IndexedResource>, String>;
@@ -86,7 +87,8 @@ async fn add_resource(
     println!("xd: {:?}", fixed_words);
 
     let title_selector = scraper::Selector::parse("title").unwrap();
-    let description_selector = scraper::Selector::parse("meta").unwrap();
+    let meta_selector = scraper::Selector::parse("meta").unwrap();
+    let html_selector = scraper::Selector::parse("html").unwrap();
 
     let page_title: Option<String> = match document
         .select(&title_selector)
@@ -99,7 +101,7 @@ async fn add_resource(
     };
 
     let page_description: Option<String> = match document
-        .select(&description_selector)
+        .select(&meta_selector)
         .filter(|e| e.value().attr("name") == Some("description"))
         .filter_map(|e| e.value().attr("content"))
         .take(1)
@@ -109,17 +111,31 @@ async fn add_resource(
         string => Some(string),
     };
 
+    //TODO: rewrite with if let else
+    let page_language: Option<String> = match document
+        .select(&html_selector)
+        .filter_map(|e| e.value().attr("lang"))
+        .take(1)
+        .collect::<String>()
+    {
+        s if s.is_empty() => None,
+        string => Some(string),
+    };
+
     //and for each changed content word we add it to the db (word -> list.append(url))
     let mut indexer = data.indexer.lock().unwrap();
     let _ = indexer.insert(
         &fixed_words,
         &resource.url,
-        page_title.clone(),
-        page_description.clone(),
+        &page_title,
+        &page_description,
+        &page_language,
         &resource.content,
     );
 
     //TODO: ADD LANG? EN in meta tag (frontend)
+    //Now what to do, global lang?, per index lang?, website lang?
+    //TODO: max number of results in query
 
     println!("Added resource: {:?}", indexer.num_of_words());