diff options
| author | Baitinq <[email protected]> | 2022-11-05 18:41:48 +0100 |
|---|---|---|
| committer | Baitinq <[email protected]> | 2022-11-10 17:56:59 +0100 |
| commit | 93009fd53e58286c6a5e2da600d70a8ec85d9a0b (patch) | |
| tree | 74cc0c9cd1ff89dab3ec82cab7c6b56d015f609f /indexer/src | |
| parent | Indexer: Switch back to not serving frontend with actix (diff) | |
| download | OSSE-93009fd53e58286c6a5e2da600d70a8ec85d9a0b.tar.gz OSSE-93009fd53e58286c6a5e2da600d70a8ec85d9a0b.tar.bz2 OSSE-93009fd53e58286c6a5e2da600d70a8ec85d9a0b.zip | |
Indexer: Ngrams ngrams_indexer
Diffstat (limited to 'indexer/src')
| -rw-r--r-- | indexer/src/indexer_implementation.rs | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/indexer/src/indexer_implementation.rs b/indexer/src/indexer_implementation.rs index f24c2bd..6f12644 100644 --- a/indexer/src/indexer_implementation.rs +++ b/indexer/src/indexer_implementation.rs @@ -1,4 +1,5 @@ use lib::lib::*; +use ngrams::Ngram; use std::collections::{HashMap, HashSet}; use std::sync::Arc; @@ -36,6 +37,10 @@ impl crate::Indexer for IndexerImplementation { content: &str, ) -> Result<(), String> { for word in words { + let ngrams: Vec<_> = word.chars().ngrams(2).pad().collect(); + + println!("Ngrams for {}: {:?}", word, ngrams); + let resource_to_add = IndexedResource { url: url.to_string(), priority: Self::calculate_word_priority(word, content, words), |