about summary refs log tree commit diff
path: root/indexer
diff options
context:
space:
mode:
Diffstat (limited to 'indexer')
-rw-r--r--indexer/Cargo.toml1
-rw-r--r--indexer/src/indexer_implementation.rs5
2 files changed, 6 insertions, 0 deletions
diff --git a/indexer/Cargo.toml b/indexer/Cargo.toml
index 7b64bb3..1dfb33f 100644
--- a/indexer/Cargo.toml
+++ b/indexer/Cargo.toml
@@ -14,6 +14,7 @@ html2text = "0.4.3"
 serde = { version = "1.0", features = ["derive", "rc"] }
 serde_json = "1.0.87"
 kuchiki = "0.8.1"
+ngrams = "1.0.1"
 lib = { path = "../lib" }
 
 [[bin]]
diff --git a/indexer/src/indexer_implementation.rs b/indexer/src/indexer_implementation.rs
index f24c2bd..6f12644 100644
--- a/indexer/src/indexer_implementation.rs
+++ b/indexer/src/indexer_implementation.rs
@@ -1,4 +1,5 @@
 use lib::lib::*;
+use ngrams::Ngram;
 use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 
@@ -36,6 +37,10 @@ impl crate::Indexer for IndexerImplementation {
         content: &str,
     ) -> Result<(), String> {
         for word in words {
+            let ngrams: Vec<_> = word.chars().ngrams(2).pad().collect();
+
+            println!("Ngrams for {}: {:?}", word, ngrams);
+
             let resource_to_add = IndexedResource {
                 url: url.to_string(),
                 priority: Self::calculate_word_priority(word, content, words),