diff options
author | Baitinq <manuelpalenzuelamerino@gmail.com> | 2022-10-30 23:16:05 +0100 |
---|---|---|
committer | Baitinq <manuelpalenzuelamerino@gmail.com> | 2022-10-30 23:59:47 +0100 |
commit | 0ec4263cc70f7b4d236dc9c03816fcba88a3a476 (patch) | |
tree | 2d62c8802f69a356d34debd2965322ad8764cc7c | |
parent | Indexer: Use kuchiki to split html content into words (diff) | |
download | OSSE-0ec4263cc70f7b4d236dc9c03816fcba88a3a476.tar.gz OSSE-0ec4263cc70f7b4d236dc9c03816fcba88a3a476.tar.bz2 OSSE-0ec4263cc70f7b4d236dc9c03816fcba88a3a476.zip |
Misc: Cargo fmt
-rw-r--r-- | crawler/src/main.rs | 12 | ||||
-rw-r--r-- | frontend/src/app.rs | 27 | ||||
-rw-r--r-- | frontend/src/main.rs | 3 | ||||
-rw-r--r-- | indexer/src/main.rs | 4 | ||||
-rw-r--r-- | lib/src/lib.rs | 9 |
5 files changed, 30 insertions, 25 deletions
diff --git a/crawler/src/main.rs b/crawler/src/main.rs index d7a19a4..7e7f397 100644 --- a/crawler/src/main.rs +++ b/crawler/src/main.rs @@ -1,8 +1,8 @@ use itertools::Itertools; +use lib::lib::*; use rand::seq::IteratorRandom; use reqwest::{Client, Response, StatusCode}; use url::Url; -use lib::lib::*; #[tokio::main] async fn main() { @@ -18,12 +18,12 @@ async fn main() { crawler(http_client, root_urls, max_queue_size).await; } -//TODO: crawling depth? - async http client async fn crawler(http_client: Client, root_urls: Vec<&str>, max_queue_size: usize) { dbg!("Starting to crawl!"); - //add root urls to queue - TODO: max q size - let (tx_crawling_queue, rx_crawling_queue) = async_channel::bounded::<String>(std::cmp::max(max_queue_size, root_urls.len())); + //add root urls to queue + let (tx_crawling_queue, rx_crawling_queue) = + async_channel::bounded::<String>(std::cmp::max(max_queue_size, root_urls.len())); for url in root_urls { tx_crawling_queue.send(url.to_string()).await.unwrap(); } @@ -47,7 +47,7 @@ async fn crawler(http_client: Client, root_urls: Vec<&str>, max_queue_size: usiz //DONT FORGET ENUMS //CAN WE DO UNWRAP OR RETURN or lambda - //HOW TF DOES CRAWLER WORK. DOESNT QUEUE FILL. LOTS OF WAITING THINGS?? + //HOW DOES CRAWLER WORK. DOESNT QUEUE FILL. LOTS OF WAITING THINGS?? //dbg!("Content: {:?}", &content); dbg!("Next urls: {:?}", &crawled_urls); @@ -100,7 +100,7 @@ async fn crawl_url(http_client: &Client, url: &str) -> Result<(String, Vec<Strin u if u.fragment().is_some() => false, //no # urls u if u.query().is_some() => false, //no ? urls u if u.path_segments().is_some() && u.path_segments().unwrap().count() > 4 => false, // max "crawling depth" is 4 - u if *u == url => false, //no same url + u if *u == url => false, //no same url _ => true, }; diff --git a/frontend/src/app.rs b/frontend/src/app.rs index efa0312..024ea6c 100644 --- a/frontend/src/app.rs +++ b/frontend/src/app.rs @@ -1,11 +1,11 @@ +use crate::Route; use gloo_net::http::Request; use itertools::Itertools; +use lib::lib::*; use wasm_bindgen::*; use web_sys::{EventTarget, HtmlInputElement}; use yew::prelude::*; -use lib::lib::*; use yew_router::scope_ext::RouterScopeExt; -use crate::Route; #[derive(Properties, Clone, PartialEq, Eq)] pub struct ResultComponentProps { @@ -34,6 +34,7 @@ pub struct OSSEProps { pub initial_search_query: Option<String>, } +//TODO: Error pub enum OSSEMessage { SearchSubmitted, SearchChanged(String), @@ -44,7 +45,7 @@ impl Component for OSSE { type Message = OSSEMessage; type Properties = OSSEProps; - //TODO: No code duplication for fetching in create() and update() - NEED TO URL ENCODE AND DECODE SEARCH QUERY + //TODO: No code duplication for fetching in create() and update() fn create(ctx: &Context<Self>) -> Self { let mut search_query = String::from(""); @@ -68,7 +69,10 @@ impl Component for OSSE { } OSSE { - search_query: urlencoding::decode(search_query.as_str()).to_owned().unwrap().to_string(), + search_query: urlencoding::decode(search_query.as_str()) + .to_owned() + .unwrap() + .to_string(), results: None, } } @@ -80,14 +84,17 @@ impl Component for OSSE { let search_query = self.search_query.clone(); let navigator = ctx.link().navigator().unwrap(); - navigator.push(&Route::OSSESearch { query: urlencoding::encode(search_query.as_str()).to_string() }); + navigator.push(&Route::OSSESearch { + query: urlencoding::encode(search_query.as_str()).to_string(), + }); ctx.link().send_future(async move { let endpoint = format!("{}/search/{}", api_endpoint, search_query); let fetched_response = Request::get(endpoint.as_str()).send().await.unwrap(); - let fetched_results: Vec<IndexedResource> = match fetched_response.json().await { + let fetched_results: Vec<IndexedResource> = match fetched_response.json().await + { Err(e) => panic!("Im panic: {}", e), Ok(json) => json, }; @@ -96,17 +103,17 @@ impl Component for OSSE { }); false - }, + } OSSEMessage::SearchChanged(search_query) => { self.search_query = search_query; true - }, + } OSSEMessage::SearchFinished(search_results) => { self.results = Some(search_results); true - }, + } } } @@ -204,4 +211,4 @@ impl Component for OSSE { } //Your favorite search engine in navbar -//Search in middle \ No newline at end of file +//Search in middle diff --git a/frontend/src/main.rs b/frontend/src/main.rs index 7f73cf7..6732466 100644 --- a/frontend/src/main.rs +++ b/frontend/src/main.rs @@ -1,9 +1,8 @@ mod app; +use app::OSSE; use yew::prelude::*; use yew_router::prelude::*; -use app::OSSE; - #[derive(Clone, Routable, PartialEq)] enum Route { diff --git a/indexer/src/main.rs b/indexer/src/main.rs index 1df3cf5..8b2e54d 100644 --- a/indexer/src/main.rs +++ b/indexer/src/main.rs @@ -1,9 +1,9 @@ use actix_cors::Cors; use actix_web::{get, post, web, App, HttpServer, Responder}; +use kuchiki::traits::TendrilSink; +use lib::lib::*; use std::collections::{HashMap, HashSet}; use std::sync::{Arc, Mutex}; -use lib::lib::*; -use kuchiki::traits::TendrilSink; struct AppState { database: Mutex<HashMap<String, HashSet<IndexedResource>>>, diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 2f0b750..8a006f6 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -1,9 +1,9 @@ pub mod lib { - use serde::{Serialize,Deserialize}; - use std::sync::Arc; - use std::hash::{Hash, Hasher}; + use serde::{Deserialize, Serialize}; use std::cmp::Ordering; + use std::hash::{Hash, Hasher}; + use std::sync::Arc; #[derive(Serialize, Deserialize, Debug)] pub struct CrawledResource { @@ -47,5 +47,4 @@ pub mod lib { self.word.hash(state); } } - -} \ No newline at end of file +} |