about summary refs log tree commit diff
diff options
context:
space:
mode:
authorBaitinq <manuelpalenzuelamerino@gmail.com>2022-10-25 14:12:20 +0200
committerBaitinq <manuelpalenzuelamerino@gmail.com>2022-10-25 14:12:20 +0200
commitcfb52a8a13fdd5c75518503acd7f2d723e7763a1 (patch)
treed53f7d07c9b9171fea615018f22add1fe7ad4f46
parentIndexer: Use CrawledResource structure as values in the reverse index db (diff)
downloadOSSE-cfb52a8a13fdd5c75518503acd7f2d723e7763a1.tar.gz
OSSE-cfb52a8a13fdd5c75518503acd7f2d723e7763a1.tar.bz2
OSSE-cfb52a8a13fdd5c75518503acd7f2d723e7763a1.zip
Crawler: Use async Client
-rw-r--r--Cargo.lock179
-rw-r--r--crawler/Cargo.toml2
-rw-r--r--crawler/src/main.rs17
-rw-r--r--frontend/Cargo.toml2
4 files changed, 152 insertions, 48 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 8d50a21..e579f8b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -16,7 +16,7 @@ dependencies = [
  "memchr",
  "pin-project-lite 0.2.9",
  "tokio 1.21.2",
- "tokio-util",
+ "tokio-util 0.7.4",
 ]
 
 [[package]]
@@ -39,10 +39,10 @@ dependencies = [
  "encoding_rs",
  "flate2",
  "futures-core",
- "h2",
+ "h2 0.3.14",
  "http",
  "httparse",
- "httpdate",
+ "httpdate 1.0.2",
  "itoa 1.0.4",
  "language-tags",
  "local-channel",
@@ -102,7 +102,7 @@ dependencies = [
  "futures-util",
  "mio 0.8.4",
  "num_cpus",
- "socket2",
+ "socket2 0.4.7",
  "tokio 1.21.2",
  "tracing",
 ]
@@ -164,7 +164,7 @@ dependencies = [
  "serde_json",
  "serde_urlencoded",
  "smallvec",
- "socket2",
+ "socket2 0.4.7",
  "time",
  "url",
 ]
@@ -824,6 +824,26 @@ dependencies = [
 
 [[package]]
 name = "h2"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e4728fd124914ad25e99e3d15a9361a879f6620f63cb56bbb08f95abb97a535"
+dependencies = [
+ "bytes 0.5.6",
+ "fnv",
+ "futures-core",
+ "futures-sink",
+ "futures-util",
+ "http",
+ "indexmap",
+ "slab",
+ "tokio 0.2.25",
+ "tokio-util 0.3.1",
+ "tracing",
+ "tracing-futures",
+]
+
+[[package]]
+name = "h2"
 version = "0.3.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5ca32592cf21ac7ccab1825cd87f6c9b3d9022c44d086172ed0966bec8af30be"
@@ -837,7 +857,7 @@ dependencies = [
  "indexmap",
  "slab",
  "tokio 1.21.2",
- "tokio-util",
+ "tokio-util 0.7.4",
  "tracing",
 ]
 
@@ -910,13 +930,12 @@ dependencies = [
 
 [[package]]
 name = "http-body"
-version = "0.4.5"
+version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1"
+checksum = "13d5ff830006f7646652e057693569bfe0d51760c0085a071769d142a205111b"
 dependencies = [
- "bytes 1.2.1",
+ "bytes 0.5.6",
  "http",
- "pin-project-lite 0.2.9",
 ]
 
 [[package]]
@@ -927,29 +946,35 @@ checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904"
 
 [[package]]
 name = "httpdate"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "494b4d60369511e7dea41cf646832512a94e542f68bb9c49e54518e0f468eb47"
+
+[[package]]
+name = "httpdate"
 version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421"
 
 [[package]]
 name = "hyper"
-version = "0.14.20"
+version = "0.13.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02c929dc5c39e335a03c405292728118860721b10190d98c2a0f0efd5baafbac"
+checksum = "8a6f157065790a3ed2f88679250419b5cdd96e714a0d65f7797fd337186e96bb"
 dependencies = [
- "bytes 1.2.1",
+ "bytes 0.5.6",
  "futures-channel",
  "futures-core",
  "futures-util",
- "h2",
+ "h2 0.2.7",
  "http",
  "http-body",
  "httparse",
- "httpdate",
- "itoa 1.0.4",
- "pin-project-lite 0.2.9",
- "socket2",
- "tokio 1.21.2",
+ "httpdate 0.3.2",
+ "itoa 0.4.8",
+ "pin-project",
+ "socket2 0.3.19",
+ "tokio 0.2.25",
  "tower-service",
  "tracing",
  "want",
@@ -957,15 +982,15 @@ dependencies = [
 
 [[package]]
 name = "hyper-tls"
-version = "0.5.0"
+version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905"
+checksum = "d979acc56dcb5b8dddba3917601745e877576475aa046df3226eabdecef78eed"
 dependencies = [
- "bytes 1.2.1",
+ "bytes 0.5.6",
  "hyper",
  "native-tls",
- "tokio 1.21.2",
- "tokio-native-tls",
+ "tokio 0.2.25",
+ "tokio-tls",
 ]
 
 [[package]]
@@ -1180,6 +1205,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d"
 
 [[package]]
+name = "mime_guess"
+version = "2.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef"
+dependencies = [
+ "mime",
+ "unicase",
+]
+
+[[package]]
 name = "miniz_oxide"
 version = "0.5.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1502,6 +1537,26 @@ dependencies = [
 ]
 
 [[package]]
+name = "pin-project"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ad29a609b6bcd67fee905812e544992d216af9d755757c05ed2d0e15a74c6ecc"
+dependencies = [
+ "pin-project-internal",
+]
+
+[[package]]
+name = "pin-project-internal"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
 name = "pin-project-lite"
 version = "0.1.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1703,34 +1758,33 @@ dependencies = [
 
 [[package]]
 name = "reqwest"
-version = "0.11.12"
+version = "0.10.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "431949c384f4e2ae07605ccaa56d1d9d2ecdb5cadd4f9577ccfab29f2e5149fc"
+checksum = "0718f81a8e14c4dbb3b34cf23dc6aaf9ab8a0dfec160c534b3dbca1aaa21f47c"
 dependencies = [
  "base64",
- "bytes 1.2.1",
+ "bytes 0.5.6",
  "encoding_rs",
  "futures-core",
  "futures-util",
- "h2",
  "http",
  "http-body",
  "hyper",
  "hyper-tls",
  "ipnet",
  "js-sys",
+ "lazy_static",
  "log",
  "mime",
+ "mime_guess",
  "native-tls",
- "once_cell",
  "percent-encoding",
  "pin-project-lite 0.2.9",
  "serde",
  "serde_json",
  "serde_urlencoded",
- "tokio 1.21.2",
- "tokio-native-tls",
- "tower-service",
+ "tokio 0.2.25",
+ "tokio-tls",
  "url",
  "wasm-bindgen",
  "wasm-bindgen-futures",
@@ -1936,6 +1990,17 @@ checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
 
 [[package]]
 name = "socket2"
+version = "0.3.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "122e570113d28d773067fab24266b66753f6ea915758651696b6e35e49f88d6e"
+dependencies = [
+ "cfg-if 1.0.0",
+ "libc",
+ "winapi 0.3.9",
+]
+
+[[package]]
+name = "socket2"
 version = "0.4.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd"
@@ -2106,11 +2171,10 @@ dependencies = [
  "libc",
  "memchr",
  "mio 0.8.4",
- "num_cpus",
  "parking_lot",
  "pin-project-lite 0.2.9",
  "signal-hook-registry",
- "socket2",
+ "socket2 0.4.7",
  "winapi 0.3.9",
 ]
 
@@ -2126,13 +2190,27 @@ dependencies = [
 ]
 
 [[package]]
-name = "tokio-native-tls"
-version = "0.3.0"
+name = "tokio-tls"
+version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f7d995660bd2b7f8c1568414c1126076c13fbb725c40112dc0120b78eb9b717b"
+checksum = "9a70f4fcd7b3b24fb194f837560168208f669ca8cb70d0c4b862944452396343"
 dependencies = [
  "native-tls",
- "tokio 1.21.2",
+ "tokio 0.2.25",
+]
+
+[[package]]
+name = "tokio-util"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be8242891f2b6cbef26a2d7e8605133c2c554cd35b3e4948ea892d6d68436499"
+dependencies = [
+ "bytes 0.5.6",
+ "futures-core",
+ "futures-sink",
+ "log",
+ "pin-project-lite 0.1.12",
+ "tokio 0.2.25",
 ]
 
 [[package]]
@@ -2177,6 +2255,16 @@ dependencies = [
 ]
 
 [[package]]
+name = "tracing-futures"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2"
+dependencies = [
+ "pin-project",
+ "tracing",
+]
+
+[[package]]
 name = "try-lock"
 version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2189,6 +2277,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
 
 [[package]]
+name = "unicase"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6"
+dependencies = [
+ "version_check",
+]
+
+[[package]]
 name = "unicode-bidi"
 version = "0.3.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2273,6 +2370,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268"
 dependencies = [
  "cfg-if 1.0.0",
+ "serde",
+ "serde_json",
  "wasm-bindgen-macro",
 ]
 
@@ -2478,9 +2577,9 @@ checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5"
 
 [[package]]
 name = "winreg"
-version = "0.10.1"
+version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d"
+checksum = "0120db82e8a1e0b9fb3345a539c478767c0048d842860994d96113d5b667bd69"
 dependencies = [
  "winapi 0.3.9",
 ]
diff --git a/crawler/Cargo.toml b/crawler/Cargo.toml
index 16d7b41..c58b458 100644
--- a/crawler/Cargo.toml
+++ b/crawler/Cargo.toml
@@ -6,7 +6,7 @@ edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-reqwest = {version = "0.11", features = ["blocking", "json"]}
+reqwest = {version = "0.10.9", features = ["blocking", "json"]}
 scraper = "0.12.0"
 itertools = "0.10.5"
 serde = { version = "1.0", features = ["derive"] }
diff --git a/crawler/src/main.rs b/crawler/src/main.rs
index 72c3e4d..d7f60c9 100644
--- a/crawler/src/main.rs
+++ b/crawler/src/main.rs
@@ -1,6 +1,6 @@
 use itertools::Itertools;
 use rand::seq::IteratorRandom;
-use reqwest::blocking::{Client, Response};
+use reqwest::{Client, Response};
 use serde::Serialize;
 use url::Url;
 
@@ -11,7 +11,7 @@ async fn main() {
     let root_urls = include_str!("../top-1000-websites.txt");
     let root_urls = root_urls.split('\n').collect();
 
-    let http_client = reqwest::blocking::Client::new();
+    let http_client = reqwest::Client::new();
 
     crawler(http_client, root_urls).await;
 }
@@ -59,7 +59,7 @@ async fn crawler(http_client: Client, root_urls: Vec<&str>) {
                     println!("{e}");
                     return;
                 }
-                Ok(res) => res.text(),
+                Ok(res) => res.text().await,
             };
 
             dbg!("Pushed to indexer {:?}", &indexer_response);
@@ -76,9 +76,9 @@ async fn crawl_url(http_client: &Client, url: &str) -> Result<(String, Vec<Strin
 
     let url = Url::parse(url).unwrap();
 
-    let response_text = match http_client.get(url.as_str()).send() {
+    let response_text = match http_client.get(url.as_str()).send().await {
         Err(_) => Err("Error fetching ".to_owned() + url.as_str()),
-        Ok(text_res) => match text_res.text() {
+        Ok(text_res) => match text_res.text().await {
             Err(_) => {
                 Err("Error unwrapping the fetched HTML's text (".to_owned() + url.as_str() + ")")
             }
@@ -139,7 +139,12 @@ async fn push_crawl_entry_to_indexer(
 
     let request_body = Resource { url, content };
 
-    match http_client.post(&indexer_url).json(&request_body).send() {
+    match http_client
+        .post(&indexer_url)
+        .json(&request_body)
+        .send()
+        .await
+    {
         Err(_) => Err(format!(
             "Error pushing the crawler to indexer! {:?}",
             &indexer_url
diff --git a/frontend/Cargo.toml b/frontend/Cargo.toml
index 2f2715c..4963867 100644
--- a/frontend/Cargo.toml
+++ b/frontend/Cargo.toml
@@ -6,4 +6,4 @@ edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-yew = "0.19"
+yew = "0.19"
\ No newline at end of file