about summary refs log tree commit diff
path: root/indexer/src/main.rs
blob: 43a5f7f934950fdf6f665d69e636d3c77521d51c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
use actix_web::{get, post, web, App, HttpServer, Responder};
use serde::Deserialize;
use std::collections::{HashMap, HashSet};
use std::sync::Mutex;

struct AppState {
    database: Mutex<HashMap<String, HashSet<String>>>,
}

#[actix_web::main]
async fn main() -> std::io::Result<()> {
    println!("Hello, world! Im the indexer!");

    serve_http_endpoint("0.0.0.0", 4444).await
}

async fn serve_http_endpoint(address: &str, port: u16) -> std::io::Result<()> {
    let shared_state = web::Data::new(AppState {
        database: Mutex::new(HashMap::new()),
    });
    HttpServer::new(move || {
        App::new()
            .app_data(shared_state.clone())
            .service(greet)
            .service(add_resource)
    })
    .bind((address, port))?
    .run()
    .await
}

#[derive(Deserialize, Debug)]
struct Resource {
    url: String,
    content: String,
}

#[post("/resource")]
async fn add_resource(data: web::Data<AppState>, resource: web::Json<Resource>) -> impl Responder {
    //parse content
    let text = html2text::from_read(resource.content.as_str().as_bytes(), resource.content.len());

    let split_words = text.split(' ');

    //fixup words (remove words with non alphabetic chars, empty words, transform to lowercase...)
    let fixed_words: Vec<String> = split_words
        .filter(|w| !w.chars().any(|c| !c.is_ascii_alphabetic()))
        .filter(|w| !w.is_empty() && *w != " ")
        .map(|w| w.to_ascii_lowercase())
        .collect();

    println!("xd: {:?}", fixed_words);

    //and for each changed content word we add it to the db (word -> list.append(url))
    let mut database = data.database.lock().unwrap();
    for word in fixed_words {
        //should probs do some priority
        let maybe_urls = database.get_mut(&word);
        match maybe_urls {
            Some(urls) => _ = urls.insert(resource.url.clone()),
            None => _ = database.insert(word, HashSet::from([resource.url.clone()])),
        }
    }

    println!("Added resource! {:?}", database.len());
    format!("{:?}", resource)
}

#[get("/search/{term}")]
async fn greet(data: web::Data<AppState>, term: web::Path<String>) -> impl Responder {
    let query: Vec<&str> = term.split(' ').collect();
    let database = data.database.lock().unwrap();

    let mut valid_results: Option<HashSet<String>> = None;
    for w in query {
        let curr_word_results = database.get(w);
        if curr_word_results.is_none() {
            return format!("No results found for {:?}!", w);
        }
        let curr_word_results = curr_word_results.unwrap();
        match valid_results {
            None => {
                valid_results = Some(curr_word_results.clone());
            }
            Some(results) => {
                let intersection: Vec<String> = curr_word_results
                    .intersection(&results)
                    .map(|s| s.to_owned())
                    .collect();
                valid_results = Some(HashSet::from_iter(intersection));
            }
        }
    }

    format!(
        "Searching for: {term}\nResults: {:?}",
        valid_results.unwrap()
    )
}