Initial commit

This commit is contained in:
Peter 2024-02-09 11:06:50 +01:00
commit 6a206031fb
Signed by: pludi
GPG Key ID: FB1A00FEE77E2C36
4 changed files with 1727 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

1678
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

10
Cargo.toml Normal file
View File

@ -0,0 +1,10 @@
[package]
name = "markify-rs"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
reqwest = { version = "0.11", features = ["blocking", "json"] }
scraper = "0.12"

38
src/main.rs Normal file
View File

@ -0,0 +1,38 @@
use reqwest::header::{HeaderMap, USER_AGENT};
use scraper::{Html, Selector};
fn main() -> Result<(), Box<dyn std::error::Error>> {
let args: Vec<String> = std::env::args().collect();
if args.len() < 2 {
eprintln!("Usage: {} <URL> [g]", args[0]);
return Ok(());
}
let url = &args[1];
let use_google_bot = args.get(2).map(|s| s == "g").unwrap_or(false);
let mut headers = HeaderMap::new();
if use_google_bot {
headers.insert(USER_AGENT, "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)".parse()?);
}
let client = reqwest::blocking::Client::builder().default_headers(headers).build()?;
let res = client.get(url).send()?;
let body = res.text()?;
let document = Html::parse_document(&body);
let title_selector = Selector::parse("title").unwrap();
let title = document.select(&title_selector).next().map(|title| title.text().collect::<Vec<_>>().join(""));
match title {
Some(title) => {
println!("[{}]({})", title.trim(), url);
}
None => {
eprintln!("Title tag not found in the response from {}", url);
}
}
Ok(())
}