From ab126ea4df01af6ca7da2118740fca8ffd22ac37 Mon Sep 17 00:00:00 2001 From: Baitinq Date: Wed, 2 Nov 2022 21:39:32 +0100 Subject: Lib+Indexer: Make IndexedResource title and description Optional --- indexer/src/indexer_implementation.rs | 8 ++++---- indexer/src/main.rs | 24 ++++++++++++++++-------- 2 files changed, 20 insertions(+), 12 deletions(-) (limited to 'indexer/src') diff --git a/indexer/src/indexer_implementation.rs b/indexer/src/indexer_implementation.rs index 4bb3857..d5cfead 100644 --- a/indexer/src/indexer_implementation.rs +++ b/indexer/src/indexer_implementation.rs @@ -30,8 +30,8 @@ impl crate::Indexer for IndexerImplementation { &mut self, word: &str, url: &str, - title: &str, - description: &str, + title: Option, + description: Option, content: &str, fixed_words: &[String], ) -> Result<(), String> { @@ -39,8 +39,8 @@ impl crate::Indexer for IndexerImplementation { url: url.to_string(), priority: Self::calculate_word_priority(word, content, fixed_words), word: Arc::new(word.to_string()), - title: title.to_string(), - description: description.to_string(), + title: title.map(String::from), + description: description.map(String::from), }; match self.database.get_mut(word) { diff --git a/indexer/src/main.rs b/indexer/src/main.rs index 289789c..9467cff 100644 --- a/indexer/src/main.rs +++ b/indexer/src/main.rs @@ -14,8 +14,8 @@ pub trait Indexer { &mut self, word: &str, url: &str, - title: &str, - description: &str, + title: Option, + description: Option, content: &str, fixed_words: &[String], ) -> Result<(), String>; @@ -89,18 +89,26 @@ async fn add_resource( let title_selector = scraper::Selector::parse("title").unwrap(); let description_selector = scraper::Selector::parse("meta").unwrap(); - let page_title: String = document + let page_title: Option = match document .select(&title_selector) .map(|e| e.inner_html()) .take(1) - .collect(); + .collect::() + { + s if s.is_empty() => None, + string => Some(string), + }; - let page_description: String = document + let page_description: Option = match document .select(&description_selector) .filter(|e| e.value().attr("name") == Some("description")) .filter_map(|e| e.value().attr("content")) .take(1) - .collect(); + .collect::() + { + s if s.is_empty() => None, + string => Some(string), + }; //and for each changed content word we add it to the db (word -> list.append(url)) let mut indexer = data.indexer.lock().unwrap(); @@ -108,8 +116,8 @@ async fn add_resource( let _ = indexer.insert( word, &resource.url, - &page_title, - &page_description, + page_title.clone(), + page_description.clone(), &resource.content, &fixed_words, ); -- cgit 1.4.1