about summary refs log tree commit diff
path: root/indexer/Cargo.toml
diff options
context:
space:
mode:
authorBaitinq <manuelpalenzuelamerino@gmail.com>2022-10-30 23:08:16 +0100
committerBaitinq <manuelpalenzuelamerino@gmail.com>2022-10-30 23:14:37 +0100
commit1a26c40191bda843a0500f12bbb7d67b3e8c238e (patch)
tree55cc8cb2878c82e29bef0e7e65f7b03be81ab39d /indexer/Cargo.toml
parentCrawler: Set 4 as the maximum "crawl depth" (diff)
downloadOSSE-1a26c40191bda843a0500f12bbb7d67b3e8c238e.tar.gz
OSSE-1a26c40191bda843a0500f12bbb7d67b3e8c238e.tar.bz2
OSSE-1a26c40191bda843a0500f12bbb7d67b3e8c238e.zip
Indexer: Use kuchiki to split html content into words
This is better than html2text when using non-ascii characters.
Diffstat (limited to 'indexer/Cargo.toml')
-rw-r--r--indexer/Cargo.toml1
1 files changed, 1 insertions, 0 deletions
diff --git a/indexer/Cargo.toml b/indexer/Cargo.toml
index 28e6f17..2c8f905 100644
--- a/indexer/Cargo.toml
+++ b/indexer/Cargo.toml
@@ -11,6 +11,7 @@ actix-cors = "0.6.3"
 scraper = "0.12.0"
 html2text = "0.4.3"
 serde_json = "1.0.87"
+kuchiki = "0.8.1"
 lib = { path = "../lib" }
 
 [[bin]]