about summary refs log tree commit diff
path: root/crawler/src/main.rs
blob: 946d9299ed5be464d5795d3ee8f15595929c6e2d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
use blockingqueue::*;

fn main() {
    println!("Hello, world! Im the crawler!");

    let root_urls = include_str!("../top-1000-websites.txt");
    let root_urls = root_urls.split('\n').collect();

    crawler(root_urls);
}

//takes list of strings - multithread here?
fn crawler(root_urls: Vec<&str>) {
    println!("Starting to crawl!");

    //add root urls to queue
    let crawling_queue: BlockingQueue<&str> = BlockingQueue::new();
    for url in root_urls {
        crawling_queue.push(url);
    }

    //and start crawling
    //FIXME: Async!
    loop {
        //blocks
        let url = crawling_queue.pop();

        let (content, crawled_urls) = crawl_url(url);

        //push content to index

        for url in crawled_urls {
            crawling_queue.push(url);
        }
    }
}

//takes url, returns content and list of urls
fn crawl_url(url: &str) -> (&str, Vec<&str>) {
    println!("Crawling {:?}", "https://".to_owned() + url);

    ("", vec![])
}