diff options
Diffstat (limited to 'indexer/src/main.rs')
-rw-r--r-- | indexer/src/main.rs | 28 |
1 files changed, 22 insertions, 6 deletions
diff --git a/indexer/src/main.rs b/indexer/src/main.rs index dcb4b9a..6e41cfb 100644 --- a/indexer/src/main.rs +++ b/indexer/src/main.rs @@ -14,8 +14,9 @@ pub trait Indexer { &mut self, words: &[String], url: &str, - title: Option<String>, - description: Option<String>, + title: &Option<String>, + description: &Option<String>, + language: &Option<String>, content: &str, ) -> Result<(), String>; fn search(&self, term: &str) -> Result<HashSet<IndexedResource>, String>; @@ -86,7 +87,8 @@ async fn add_resource( println!("xd: {:?}", fixed_words); let title_selector = scraper::Selector::parse("title").unwrap(); - let description_selector = scraper::Selector::parse("meta").unwrap(); + let meta_selector = scraper::Selector::parse("meta").unwrap(); + let html_selector = scraper::Selector::parse("html").unwrap(); let page_title: Option<String> = match document .select(&title_selector) @@ -99,7 +101,7 @@ async fn add_resource( }; let page_description: Option<String> = match document - .select(&description_selector) + .select(&meta_selector) .filter(|e| e.value().attr("name") == Some("description")) .filter_map(|e| e.value().attr("content")) .take(1) @@ -109,17 +111,31 @@ async fn add_resource( string => Some(string), }; + //TODO: rewrite with if let else + let page_language: Option<String> = match document + .select(&html_selector) + .filter_map(|e| e.value().attr("lang")) + .take(1) + .collect::<String>() + { + s if s.is_empty() => None, + string => Some(string), + }; + //and for each changed content word we add it to the db (word -> list.append(url)) let mut indexer = data.indexer.lock().unwrap(); let _ = indexer.insert( &fixed_words, &resource.url, - page_title.clone(), - page_description.clone(), + &page_title, + &page_description, + &page_language, &resource.content, ); //TODO: ADD LANG? EN in meta tag (frontend) + //Now what to do, global lang?, per index lang?, website lang? + //TODO: max number of results in query println!("Added resource: {:?}", indexer.num_of_words()); |