From 4e4b9e48fb779ea2a94bed7207a0e179de4e4484 Mon Sep 17 00:00:00 2001 From: Baitinq Date: Sun, 6 Nov 2022 23:23:19 +0100 Subject: Indexer: Decode html entities for website title and description Maybe we should do it for all the website's content too? :)) --- indexer/Cargo.toml | 2 +- indexer/src/main.rs | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'indexer') diff --git a/indexer/Cargo.toml b/indexer/Cargo.toml index b94f004..b437edf 100644 --- a/indexer/Cargo.toml +++ b/indexer/Cargo.toml @@ -10,7 +10,7 @@ actix-web = "4.2.1" actix-web-lab = "0.18.5" actix-cors = "0.6.3" scraper = "0.12.0" -html2text = "0.4.3" +html-escape = "0.2.12" serde = { version = "1.0", features = ["derive", "rc"] } serde_json = "1.0.87" kuchiki = "0.8.1" diff --git a/indexer/src/main.rs b/indexer/src/main.rs index 515062d..70a7649 100644 --- a/indexer/src/main.rs +++ b/indexer/src/main.rs @@ -55,9 +55,6 @@ async fn serve_http_endpoint(address: &str, port: u16) -> std::io::Result<()> { .await } -//TODO: Max description size -//TODO: Current result below search bar updates with it -//TODO: Remove html symbols italic and stuff in frontend (or apply them?) //TODO: Better readme //TODO: sufficiently simmilar word in search (algorithm) @@ -105,7 +102,7 @@ async fn add_resource( .collect::() { s if s.is_empty() => None, - string => Some(string), + string => Some(html_escape::decode_html_entities(&string).to_string()), }; let page_description: Option = match document @@ -116,7 +113,7 @@ async fn add_resource( .collect::() { s if s.is_empty() => None, - string => Some(string), + string => Some(html_escape::decode_html_entities(&string).to_string()), }; //TODO: rewrite with if let else -- cgit 1.4.1