From 452c550b5ca97b0126bbb916d4f9d8d83085f90e Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Wed, 27 Nov 2024 17:00:28 -0800 Subject: [PATCH 01/26] windows --- lychee-lib/src/types/base.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lychee-lib/src/types/base.rs b/lychee-lib/src/types/base.rs index b7b76c7e5b..a9ca9212bb 100644 --- a/lychee-lib/src/types/base.rs +++ b/lychee-lib/src/types/base.rs @@ -61,6 +61,12 @@ impl TryFrom<&str> for Base { type Error = ErrorKind; fn try_from(value: &str) -> Result { + let path = PathBuf::from(value); + if path.exists() { + // need to check path first since Url::parse accepts windows paths + // e.g. C:\src\lychee and C:/src/lychee are both parsed as URLs + return Ok(Self::Local(PathBuf::from(value))) + } if let Ok(url) = Url::parse(value) { if url.cannot_be_a_base() { return Err(ErrorKind::InvalidBase( From 866ba38e3674929f8cc2c95885186130e0345149 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Thu, 28 Nov 2024 10:28:23 -0800 Subject: [PATCH 02/26] Introduce --root-path --- README.md | 3 +++ lychee-bin/src/client.rs | 10 ++++++++-- lychee-bin/src/main.rs | 9 ++++++++- lychee-bin/src/options.rs | 6 ++++++ lychee-lib/src/types/base.rs | 20 +++++++++++++------- lychee-lib/src/utils/path.rs | 4 ++-- lychee-lib/src/utils/request.rs | 8 ++++---- 7 files changed, 44 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 779e9f7d53..d57805af1f 100644 --- a/README.md +++ b/README.md @@ -480,6 +480,9 @@ Options: -b, --base Base URL or website root directory to check relative URLs e.g. or `/path/to/public` + --root-path + Root path to use when checking absolute local links, --base is ignored when this is set + --basic-auth Basic authentication support. E.g. `http://example.com username:password` diff --git a/lychee-bin/src/client.rs b/lychee-bin/src/client.rs index d1b982dc26..b89e49578f 100644 --- a/lychee-bin/src/client.rs +++ b/lychee-bin/src/client.rs @@ -2,7 +2,7 @@ use crate::options::Config; use crate::parse::{parse_duration_secs, parse_headers, parse_remaps}; use anyhow::{Context, Result}; use http::StatusCode; -use lychee_lib::{Client, ClientBuilder}; +use lychee_lib::{Base, Client, ClientBuilder}; use regex::RegexSet; use reqwest_cookie_store::CookieStoreMutex; use std::sync::Arc; @@ -53,9 +53,15 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc>) - cfg.include_mail }; + let base = if let Some(root_path) = &cfg.root_path { + Base::create_root_path(&root_path).ok() + } else { + cfg.base.clone() + }; + ClientBuilder::builder() .remaps(remaps) - .base(cfg.base.clone()) + .base(base) .includes(includes) .excludes(excludes) .exclude_all_private(cfg.exclude_all_private) diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs index bb6ac4aac9..bf4148ed2c 100644 --- a/lychee-bin/src/main.rs +++ b/lychee-bin/src/main.rs @@ -75,6 +75,7 @@ use openssl_sys as _; // required for vendored-openssl feature use options::LYCHEE_CONFIG_FILE; use ring as _; // required for apple silicon +use lychee_lib::Base; use lychee_lib::BasicAuthExtractor; use lychee_lib::Collector; use lychee_lib::CookieJar; @@ -288,7 +289,13 @@ fn underlying_io_error_kind(error: &Error) -> Option { async fn run(opts: &LycheeOptions) -> Result { let inputs = opts.inputs()?; - let mut collector = Collector::new(opts.config.base.clone()) + let base = if let Some(root_path) = &opts.config.root_path { + Base::create_root_path(&root_path).ok() + } else { + opts.config.base.clone() + }; + + let mut collector = Collector::new(base) .skip_missing_inputs(opts.config.skip_missing) .skip_hidden(!opts.config.hidden) .skip_ignored(!opts.config.no_ignore) diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index 60c259d2ea..61de03a9c3 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -445,6 +445,12 @@ separated list of accepted status codes. This example will accept 200, 201, #[serde(default)] pub(crate) base: Option, + /// Root path to use when checking absolute local links, + /// "base" option is ignored when this is set + #[arg(long)] + #[serde(default)] + pub(crate) root_path: Option, + /// Basic authentication support. E.g. `http://example.com username:password` #[arg(long)] #[serde(default)] diff --git a/lychee-lib/src/types/base.rs b/lychee-lib/src/types/base.rs index a9ca9212bb..0851687500 100644 --- a/lychee-lib/src/types/base.rs +++ b/lychee-lib/src/types/base.rs @@ -15,6 +15,8 @@ pub enum Base { Local(PathBuf), /// Remote URL pointing to a website homepage Remote(Url), + /// Root path for checking absolute local links + RootPath(PathBuf), } impl Base { @@ -27,18 +29,28 @@ impl Base { let full_path = path.join(link); Url::from_file_path(full_path).ok() } + Self::RootPath(_path) => { + // this is unused currently because joining on RootPath is handled by create_uri_from_file_path + unreachable!() + } } } - /// Return the directory if the base is local + /// Return the directory if the base is not remote #[must_use] pub(crate) fn dir(&self) -> Option { match self { Self::Remote(_) => None, Self::Local(d) => Some(d.clone()), + Self::RootPath(d) => Some(d.clone()), } } + /// Create a root path base + pub fn create_root_path(value: &str) -> Result { + return Ok(Self::RootPath(PathBuf::from(value))); + } + pub(crate) fn from_source(source: &InputSource) -> Option { match &source { InputSource::RemoteUrl(url) => { @@ -61,12 +73,6 @@ impl TryFrom<&str> for Base { type Error = ErrorKind; fn try_from(value: &str) -> Result { - let path = PathBuf::from(value); - if path.exists() { - // need to check path first since Url::parse accepts windows paths - // e.g. C:\src\lychee and C:/src/lychee are both parsed as URLs - return Ok(Self::Local(PathBuf::from(value))) - } if let Ok(url) = Url::parse(value) { if url.cannot_be_a_base() { return Err(ErrorKind::InvalidBase( diff --git a/lychee-lib/src/utils/path.rs b/lychee-lib/src/utils/path.rs index bb4847ed9a..89abde1cc8 100644 --- a/lychee-lib/src/utils/path.rs +++ b/lychee-lib/src/utils/path.rs @@ -42,14 +42,14 @@ fn dirname(src: &'_ Path) -> Option<&'_ Path> { /// Returns Ok(None) in case of an absolute local link without a `base_url` pub(crate) fn resolve(src: &Path, dst: &Path, base: &Option) -> Result> { let resolved = match dst { - relative if dst.is_relative() => { + relative if !dst.starts_with("/") => { // Find `dst` in the parent directory of `src` let Some(parent) = src.parent() else { return Err(ErrorKind::InvalidFile(relative.to_path_buf())); }; parent.join(relative) } - absolute if dst.is_absolute() => { + absolute if dst.starts_with("/") => { // Absolute local links (leading slash) require the `base_url` to // define the document root. Silently ignore the link in case the // `base_url` is not defined. diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 7867e50f09..d02de1760e 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -53,14 +53,14 @@ fn try_parse_into_uri(raw_uri: &RawUri, source: &InputSource, base: &Option uri, Err(_) => match base { + Some(Base::RootPath(_)) | None => match source { + InputSource::FsPath(root) => create_uri_from_file_path(root, &text, base)?, + _ => return Err(ErrorKind::UnsupportedUriType(text)), + }, Some(base_url) => match base_url.join(&text) { Some(url) => Uri { url }, None => return Err(ErrorKind::InvalidBaseJoin(text.clone())), }, - None => match source { - InputSource::FsPath(root) => create_uri_from_file_path(root, &text, base)?, - _ => return Err(ErrorKind::UnsupportedUriType(text)), - }, }, }; Ok(uri) From 250f572cf5da314b50b5dba701c6579da0aec999 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Thu, 28 Nov 2024 14:01:00 -0800 Subject: [PATCH 03/26] lint --- lychee-bin/src/client.rs | 2 +- lychee-bin/src/main.rs | 2 +- lychee-lib/src/types/base.rs | 7 +++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/lychee-bin/src/client.rs b/lychee-bin/src/client.rs index b89e49578f..ce9d841d55 100644 --- a/lychee-bin/src/client.rs +++ b/lychee-bin/src/client.rs @@ -54,7 +54,7 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc>) - }; let base = if let Some(root_path) = &cfg.root_path { - Base::create_root_path(&root_path).ok() + Some(Base::create_root_path(&root_path)) } else { cfg.base.clone() }; diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs index bf4148ed2c..1912f4950d 100644 --- a/lychee-bin/src/main.rs +++ b/lychee-bin/src/main.rs @@ -290,7 +290,7 @@ async fn run(opts: &LycheeOptions) -> Result { let inputs = opts.inputs()?; let base = if let Some(root_path) = &opts.config.root_path { - Base::create_root_path(&root_path).ok() + Some(Base::create_root_path(&root_path)) } else { opts.config.base.clone() }; diff --git a/lychee-lib/src/types/base.rs b/lychee-lib/src/types/base.rs index 0851687500..f875d6446a 100644 --- a/lychee-lib/src/types/base.rs +++ b/lychee-lib/src/types/base.rs @@ -41,14 +41,13 @@ impl Base { pub(crate) fn dir(&self) -> Option { match self { Self::Remote(_) => None, - Self::Local(d) => Some(d.clone()), - Self::RootPath(d) => Some(d.clone()), + Self::Local(d) | Self::RootPath(d) => Some(d.clone()), } } /// Create a root path base - pub fn create_root_path(value: &str) -> Result { - return Ok(Self::RootPath(PathBuf::from(value))); + pub fn create_root_path(value: &str) -> Base { + Self::RootPath(PathBuf::from(value)) } pub(crate) fn from_source(source: &InputSource) -> Option { From cd074f99b7915b4416ea0b35a3632451009da8df Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Thu, 28 Nov 2024 14:07:59 -0800 Subject: [PATCH 04/26] lint --- lychee-lib/src/types/base.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/lychee-lib/src/types/base.rs b/lychee-lib/src/types/base.rs index f875d6446a..aa716b3f26 100644 --- a/lychee-lib/src/types/base.rs +++ b/lychee-lib/src/types/base.rs @@ -46,6 +46,7 @@ impl Base { } /// Create a root path base + #[must_use] pub fn create_root_path(value: &str) -> Base { Self::RootPath(PathBuf::from(value)) } From 2e9c8fd5c370756c2c9d26c0535f76bd02bbf2c6 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Thu, 28 Nov 2024 18:19:09 -0800 Subject: [PATCH 05/26] Simplification --- lychee-lib/src/collector.rs | 3 +- lychee-lib/src/utils/request.rs | 78 ++++++++++----------------------- 2 files changed, 25 insertions(+), 56 deletions(-) diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index 955bdd24e7..8b91851871 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -124,7 +124,8 @@ impl Collector { let content = content?; let extractor = Extractor::new(self.use_html5ever, self.include_verbatim); let uris: Vec = extractor.extract(&content); - let requests = request::create(uris, &content, &base, &basic_auth_extractor); + let requests = + request::create(uris, &content.source, &base, &basic_auth_extractor); Result::Ok(stream::iter(requests.into_iter().map(Ok))) } }) diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index d02de1760e..cdd6e946d6 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -8,7 +8,7 @@ use std::{ use crate::{ basic_auth::BasicAuthExtractor, - types::{uri::raw::RawUri, InputContent, InputSource}, + types::{uri::raw::RawUri, InputSource}, utils::{path, url}, Base, BasicAuthCredentials, ErrorKind, Request, Result, Uri, }; @@ -125,24 +125,22 @@ fn truncate_source(source: &InputSource) -> InputSource { /// it will not be added to the `HashSet`. pub(crate) fn create( uris: Vec, - input_content: &InputContent, + source: &InputSource, base: &Option, extractor: &Option, ) -> HashSet { - let base = base - .clone() - .or_else(|| Base::from_source(&input_content.source)); + let base = base.clone().or_else(|| Base::from_source(&source)); uris.into_iter() - .filter_map(|raw_uri| { - match create_request(&raw_uri, &input_content.source, &base, extractor) { + .filter_map( + |raw_uri| match create_request(&raw_uri, &source, &base, extractor) { Ok(request) => Some(request), Err(e) => { warn!("Error creating request: {:?}", e); None } - } - }) + }, + ) .collect() } @@ -184,7 +182,6 @@ fn resolve_and_create_url( #[cfg(test)] mod tests { use super::*; - use crate::types::FileType; #[test] fn test_is_anchor() { @@ -199,24 +196,13 @@ mod tests { assert_eq!(result.as_str(), "file:///test+encoding"); } - fn create_input(content: &str, file_type: FileType) -> InputContent { - InputContent { - content: content.to_string(), - file_type, - source: InputSource::String(content.to_string()), - } - } - #[test] fn test_relative_url_resolution() { let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); - let input = create_input( - r#"Relative Link"#, - FileType::Html, - ); + let source = InputSource::String(String::new()); let uris = vec![RawUri::from("relative.html")]; - let requests = create(uris, &input, &base, &None); + let requests = create(uris, &source, &base, &None); assert_eq!(requests.len(), 1); assert!(requests @@ -227,13 +213,10 @@ mod tests { #[test] fn test_absolute_url_resolution() { let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); - let input = create_input( - r#"Absolute Link"#, - FileType::Html, - ); + let source = InputSource::String(String::new()); let uris = vec![RawUri::from("https://another.com/page")]; - let requests = create(uris, &input, &base, &None); + let requests = create(uris, &source, &base, &None); assert_eq!(requests.len(), 1); assert!(requests @@ -244,13 +227,10 @@ mod tests { #[test] fn test_root_relative_url_resolution() { let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); - let input = create_input( - r#"Root Relative Link"#, - FileType::Html, - ); + let source = InputSource::String(String::new()); let uris = vec![RawUri::from("/root-relative")]; - let requests = create(uris, &input, &base, &None); + let requests = create(uris, &source, &base, &None); assert_eq!(requests.len(), 1); assert!(requests @@ -261,13 +241,10 @@ mod tests { #[test] fn test_parent_directory_url_resolution() { let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); - let input = create_input( - r#"Parent Directory Link"#, - FileType::Html, - ); + let source = InputSource::String(String::new()); let uris = vec![RawUri::from("../parent")]; - let requests = create(uris, &input, &base, &None); + let requests = create(uris, &source, &base, &None); assert_eq!(requests.len(), 1); assert!(requests @@ -278,10 +255,10 @@ mod tests { #[test] fn test_fragment_url_resolution() { let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); - let input = create_input(r##"Fragment Link"##, FileType::Html); + let source = InputSource::String(String::new()); let uris = vec![RawUri::from("#fragment")]; - let requests = create(uris, &input, &base, &None); + let requests = create(uris, &source, &base, &None); assert_eq!(requests.len(), 1); assert!(requests @@ -292,13 +269,10 @@ mod tests { #[test] fn test_no_base_url_resolution() { let base = None; - let input = create_input( - r#"Absolute Link"#, - FileType::Html, - ); + let source = InputSource::String(String::new()); let uris = vec![RawUri::from("https://example.com/page")]; - let requests = create(uris, &input, &base, &None); + let requests = create(uris, &source, &base, &None); assert_eq!(requests.len(), 1); assert!(requests @@ -359,13 +333,10 @@ mod tests { #[test] fn test_parse_relative_path_into_uri() { let base = Some(Base::Local(PathBuf::from("/tmp/lychee"))); - let input = create_input( - r#"Relative Link"#, - FileType::Html, - ); + let source = InputSource::String(String::new()); let raw_uri = RawUri::from("relative.html"); - let uri = try_parse_into_uri(&raw_uri, &input.source, &base).unwrap(); + let uri = try_parse_into_uri(&raw_uri, &source, &base).unwrap(); assert_eq!(uri.url.as_str(), "file:///tmp/lychee/relative.html"); } @@ -373,13 +344,10 @@ mod tests { #[test] fn test_parse_absolute_path_into_uri() { let base = Some(Base::Local(PathBuf::from("/tmp/lychee"))); - let input = create_input( - r#"Absolute Link"#, - FileType::Html, - ); + let source = InputSource::String(String::new()); let raw_uri = RawUri::from("absolute.html"); - let uri = try_parse_into_uri(&raw_uri, &input.source, &base).unwrap(); + let uri = try_parse_into_uri(&raw_uri, &source, &base).unwrap(); assert_eq!(uri.url.as_str(), "file:///tmp/lychee/absolute.html"); } From 4de28cd23ed8124a5bde6f2d8f7db559ae41ad92 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Thu, 28 Nov 2024 18:19:32 -0800 Subject: [PATCH 06/26] Add unit tests --- lychee-lib/src/utils/request.rs | 70 +++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index cdd6e946d6..72a59b701f 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -266,6 +266,76 @@ mod tests { .any(|r| r.uri.url.as_str() == "https://example.com/path/page.html#fragment")); } + #[test] + fn test_relative_url_resolution_from_root_path() { + let base = Some(Base::create_root_path("/tmp/lychee")); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("relative.html")]; + let requests = create(uris, &source, &base, &None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "file:///some/relative.html")); + } + + #[test] + fn test_absolute_url_resolution_from_root_path() { + let base = Some(Base::create_root_path("/tmp/lychee")); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("https://another.com/page")]; + let requests = create(uris, &source, &base, &None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "https://another.com/page")); + } + + #[test] + fn test_root_relative_url_resolution_from_root_path() { + let base = Some(Base::create_root_path("/tmp/lychee")); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("/root-relative")]; + let requests = create(uris, &source, &base, &None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "file:///tmp/lychee/root-relative")); + } + + #[test] + fn test_parent_directory_url_resolution_from_root_path() { + let base = Some(Base::create_root_path("/tmp/lychee")); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("../parent")]; + let requests = create(uris, &source, &base, &None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "file:///parent")); + } + + #[test] + fn test_fragment_url_resolution_from_root_path() { + let base = Some(Base::create_root_path("/tmp/lychee")); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("#fragment")]; + let requests = create(uris, &source, &base, &None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "file:///some/page.html#fragment")); + } + #[test] fn test_no_base_url_resolution() { let base = None; From 2489d56cae6eaf8fec36dc85da675c720412ef2f Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Thu, 28 Nov 2024 19:04:58 -0800 Subject: [PATCH 07/26] Add integration test --- .../nested/about/index.html | 8 +++++ .../nested/another page/index.html | 0 .../nested/index.html | 34 +++++++++++++++++++ lychee-bin/tests/cli.rs | 18 ++++++++++ lychee-lib/src/types/input.rs | 6 +--- 5 files changed, 61 insertions(+), 5 deletions(-) create mode 100644 fixtures/resolve_paths_from_root_path/nested/about/index.html create mode 100644 fixtures/resolve_paths_from_root_path/nested/another page/index.html create mode 100644 fixtures/resolve_paths_from_root_path/nested/index.html diff --git a/fixtures/resolve_paths_from_root_path/nested/about/index.html b/fixtures/resolve_paths_from_root_path/nested/about/index.html new file mode 100644 index 0000000000..3141b661a0 --- /dev/null +++ b/fixtures/resolve_paths_from_root_path/nested/about/index.html @@ -0,0 +1,8 @@ + + + About + + +

About

+ + diff --git a/fixtures/resolve_paths_from_root_path/nested/another page/index.html b/fixtures/resolve_paths_from_root_path/nested/another page/index.html new file mode 100644 index 0000000000..e69de29bb2 diff --git a/fixtures/resolve_paths_from_root_path/nested/index.html b/fixtures/resolve_paths_from_root_path/nested/index.html new file mode 100644 index 0000000000..8f4528f54a --- /dev/null +++ b/fixtures/resolve_paths_from_root_path/nested/index.html @@ -0,0 +1,34 @@ + + + Index + + +

Index Title

+ +

+

+

+ + \ No newline at end of file diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 7dcd716f1d..c8219d58a5 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -393,6 +393,24 @@ mod cli { .stdout(contains("3 OK")); } + #[test] + fn test_resolve_paths_from_root_path() { + let mut cmd = main_command(); + let dir = fixtures_path().join("resolve_paths_from_root_path"); + + cmd.arg("--offline") + .arg("--include-fragments") + .arg("--root-path") + .arg(&dir) + .arg(dir.join("nested").join("index.html")) + .env_clear() + .assert() + .failure() + .stdout(contains("7 Total")) + .stdout(contains("5 OK")) + .stdout(contains("2 Errors")); + } + #[test] fn test_youtube_quirk() { let url = "https://www.youtube.com/watch?v=NlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7"; diff --git a/lychee-lib/src/types/input.rs b/lychee-lib/src/types/input.rs index c32be7feb8..a0fba1ca3e 100644 --- a/lychee-lib/src/types/input.rs +++ b/lychee-lib/src/types/input.rs @@ -140,11 +140,7 @@ impl Input { Ok(url) if url.scheme() == "http" || url.scheme() == "https" => { InputSource::RemoteUrl(Box::new(url)) } - Ok(_) => { - // URL parsed successfully, but it's not http or https - return Err(ErrorKind::InvalidFile(PathBuf::from(value))); - } - _ => { + Ok(_) | _ => { // this seems to be the only way to determine if this is a glob pattern let is_glob = glob::Pattern::escape(value) != value; From be3a0623be5b390bcb4db5c06048967fa9d44ab6 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Thu, 28 Nov 2024 20:57:59 -0800 Subject: [PATCH 08/26] Sync docs --- README.md | 2 +- lychee-bin/src/options.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d57805af1f..256be0c698 100644 --- a/README.md +++ b/README.md @@ -481,7 +481,7 @@ Options: Base URL or website root directory to check relative URLs e.g. or `/path/to/public` --root-path - Root path to use when checking absolute local links, --base is ignored when this is set + Root path to use when checking absolute local links, base option is ignored when this is set --basic-auth Basic authentication support. E.g. `http://example.com username:password` diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index 61de03a9c3..e9a291f599 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -446,7 +446,7 @@ separated list of accepted status codes. This example will accept 200, 201, pub(crate) base: Option, /// Root path to use when checking absolute local links, - /// "base" option is ignored when this is set + /// base option is ignored when this is set #[arg(long)] #[serde(default)] pub(crate) root_path: Option, From bdb5ec6a2b3665e78573d0a5e50e0486bac1eeb8 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Thu, 28 Nov 2024 21:11:09 -0800 Subject: [PATCH 09/26] Add missing comment to make CI happy --- lychee-lib/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/lychee-lib/src/lib.rs b/lychee-lib/src/lib.rs index 93df6d0db8..023278c034 100644 --- a/lychee-lib/src/lib.rs +++ b/lychee-lib/src/lib.rs @@ -71,6 +71,7 @@ pub mod remap; /// local IPs or e-mail addresses pub mod filter; +/// Test utilities #[cfg(test)] #[macro_use] pub mod test_utils; From 144e1e53cafaa7b97820053b4821807ff31f8215 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Thu, 28 Nov 2024 21:07:04 -0800 Subject: [PATCH 10/26] Revert one of the Windows-specific changes because causing a test failure --- lychee-lib/src/types/input.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lychee-lib/src/types/input.rs b/lychee-lib/src/types/input.rs index a0fba1ca3e..c32be7feb8 100644 --- a/lychee-lib/src/types/input.rs +++ b/lychee-lib/src/types/input.rs @@ -140,7 +140,11 @@ impl Input { Ok(url) if url.scheme() == "http" || url.scheme() == "https" => { InputSource::RemoteUrl(Box::new(url)) } - Ok(_) | _ => { + Ok(_) => { + // URL parsed successfully, but it's not http or https + return Err(ErrorKind::InvalidFile(PathBuf::from(value))); + } + _ => { // this seems to be the only way to determine if this is a glob pattern let is_glob = glob::Pattern::escape(value) != value; From d2283d462cfab827fa931125b3837266b9b62525 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Sat, 30 Nov 2024 10:17:40 -0800 Subject: [PATCH 11/26] Support both options at the same time --- README.md | 2 +- examples/collect_links/collect_links.rs | 2 +- lychee-bin/src/client.rs | 10 +-- lychee-bin/src/main.rs | 9 +-- lychee-bin/src/options.rs | 5 +- lychee-lib/src/collector.rs | 51 +++++++++----- lychee-lib/src/types/base.rs | 14 +--- lychee-lib/src/utils/path.rs | 53 +++++++++----- lychee-lib/src/utils/request.rs | 92 ++++++++++++++++--------- 9 files changed, 135 insertions(+), 103 deletions(-) diff --git a/README.md b/README.md index 256be0c698..5f0da195d8 100644 --- a/README.md +++ b/README.md @@ -481,7 +481,7 @@ Options: Base URL or website root directory to check relative URLs e.g. or `/path/to/public` --root-path - Root path to use when checking absolute local links, base option is ignored when this is set + Root path to use when checking absolute local links --basic-auth Basic authentication support. E.g. `http://example.com username:password` diff --git a/examples/collect_links/collect_links.rs b/examples/collect_links/collect_links.rs index 4a86924c56..3a2ab520e7 100644 --- a/examples/collect_links/collect_links.rs +++ b/examples/collect_links/collect_links.rs @@ -21,7 +21,7 @@ async fn main() -> Result<()> { }, ]; - let links = Collector::new(None) // base + let links = Collector::new(None, None) // root_path and base .skip_missing_inputs(false) // don't skip missing inputs? (default=false) .skip_hidden(false) // skip hidden files? (default=true) .skip_ignored(false) // skip files that are ignored by git? (default=true) diff --git a/lychee-bin/src/client.rs b/lychee-bin/src/client.rs index ce9d841d55..d1b982dc26 100644 --- a/lychee-bin/src/client.rs +++ b/lychee-bin/src/client.rs @@ -2,7 +2,7 @@ use crate::options::Config; use crate::parse::{parse_duration_secs, parse_headers, parse_remaps}; use anyhow::{Context, Result}; use http::StatusCode; -use lychee_lib::{Base, Client, ClientBuilder}; +use lychee_lib::{Client, ClientBuilder}; use regex::RegexSet; use reqwest_cookie_store::CookieStoreMutex; use std::sync::Arc; @@ -53,15 +53,9 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc>) - cfg.include_mail }; - let base = if let Some(root_path) = &cfg.root_path { - Some(Base::create_root_path(&root_path)) - } else { - cfg.base.clone() - }; - ClientBuilder::builder() .remaps(remaps) - .base(base) + .base(cfg.base.clone()) .includes(includes) .excludes(excludes) .exclude_all_private(cfg.exclude_all_private) diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs index 1912f4950d..3250bdf41b 100644 --- a/lychee-bin/src/main.rs +++ b/lychee-bin/src/main.rs @@ -75,7 +75,6 @@ use openssl_sys as _; // required for vendored-openssl feature use options::LYCHEE_CONFIG_FILE; use ring as _; // required for apple silicon -use lychee_lib::Base; use lychee_lib::BasicAuthExtractor; use lychee_lib::Collector; use lychee_lib::CookieJar; @@ -289,13 +288,7 @@ fn underlying_io_error_kind(error: &Error) -> Option { async fn run(opts: &LycheeOptions) -> Result { let inputs = opts.inputs()?; - let base = if let Some(root_path) = &opts.config.root_path { - Some(Base::create_root_path(&root_path)) - } else { - opts.config.base.clone() - }; - - let mut collector = Collector::new(base) + let mut collector = Collector::new(opts.config.root_path.clone(), opts.config.base.clone()) .skip_missing_inputs(opts.config.skip_missing) .skip_hidden(!opts.config.hidden) .skip_ignored(!opts.config.no_ignore) diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index e9a291f599..4706fa1433 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -445,11 +445,10 @@ separated list of accepted status codes. This example will accept 200, 201, #[serde(default)] pub(crate) base: Option, - /// Root path to use when checking absolute local links, - /// base option is ignored when this is set + /// Root path to use when checking absolute local links #[arg(long)] #[serde(default)] - pub(crate) root_path: Option, + pub(crate) root_path: Option, /// Basic authentication support. E.g. `http://example.com username:password` #[arg(long)] diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index 8b91851871..c21dd67999 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -9,6 +9,7 @@ use futures::{ StreamExt, }; use par_stream::ParStreamExt; +use std::path::PathBuf; /// Collector keeps the state of link collection /// It drives the link extraction from inputs @@ -21,13 +22,14 @@ pub struct Collector { skip_hidden: bool, include_verbatim: bool, use_html5ever: bool, + root_path: Option, base: Option, } impl Collector { /// Create a new collector with an empty cache #[must_use] - pub const fn new(base: Option) -> Self { + pub const fn new(root_path: Option, base: Option) -> Self { Collector { basic_auth_extractor: None, skip_missing_inputs: false, @@ -35,6 +37,7 @@ impl Collector { use_html5ever: false, skip_hidden: true, skip_ignored: true, + root_path, base, } } @@ -119,13 +122,19 @@ impl Collector { }) .flatten() .par_then_unordered(None, move |(content, base)| { + let root_path = self.root_path.clone(); let basic_auth_extractor = self.basic_auth_extractor.clone(); async move { let content = content?; let extractor = Extractor::new(self.use_html5ever, self.include_verbatim); let uris: Vec = extractor.extract(&content); - let requests = - request::create(uris, &content.source, &base, &basic_auth_extractor); + let requests = request::create( + uris, + &content.source, + &root_path, + &base, + &basic_auth_extractor, + ); Result::Ok(stream::iter(requests.into_iter().map(Ok))) } }) @@ -149,14 +158,22 @@ mod tests { }; // Helper function to run the collector on the given inputs - async fn collect(inputs: Vec, base: Option) -> HashSet { - let responses = Collector::new(base).collect_links(inputs); + async fn collect( + inputs: Vec, + root_path: Option, + base: Option, + ) -> HashSet { + let responses = Collector::new(root_path, base).collect_links(inputs); responses.map(|r| r.unwrap().uri).collect().await } // Helper function for collecting verbatim links - async fn collect_verbatim(inputs: Vec, base: Option) -> HashSet { - let responses = Collector::new(base) + async fn collect_verbatim( + inputs: Vec, + root_path: Option, + base: Option, + ) -> HashSet { + let responses = Collector::new(root_path, base) .include_verbatim(true) .collect_links(inputs); responses.map(|r| r.unwrap().uri).collect().await @@ -247,7 +264,7 @@ mod tests { }, ]; - let links = collect_verbatim(inputs, None).await; + let links = collect_verbatim(inputs, None, None).await; let expected_links = HashSet::from_iter([ website(TEST_STRING), @@ -270,7 +287,7 @@ mod tests { file_type_hint: Some(FileType::Markdown), excluded_paths: None, }; - let links = collect(vec![input], Some(base)).await; + let links = collect(vec![input], None, Some(base)).await; let expected_links = HashSet::from_iter([ website("https://endler.dev"), @@ -296,7 +313,7 @@ mod tests { file_type_hint: Some(FileType::Html), excluded_paths: None, }; - let links = collect(vec![input], Some(base)).await; + let links = collect(vec![input], None, Some(base)).await; let expected_links = HashSet::from_iter([ website("https://github.com/lycheeverse/lychee/"), @@ -325,7 +342,7 @@ mod tests { file_type_hint: Some(FileType::Html), excluded_paths: None, }; - let links = collect(vec![input], Some(base)).await; + let links = collect(vec![input], None, Some(base)).await; let expected_links = HashSet::from_iter([ website("https://example.com/static/image.png"), @@ -352,7 +369,7 @@ mod tests { excluded_paths: None, }; - let links = collect(vec![input], Some(base)).await; + let links = collect(vec![input], None, Some(base)).await; let expected = HashSet::from_iter([ website("https://localhost.com/@/internal.md"), @@ -374,7 +391,7 @@ mod tests { file_type_hint: Some(FileType::Html), excluded_paths: None, }; - let links = collect(vec![input], Some(base)).await; + let links = collect(vec![input], None, Some(base)).await; let expected_links = HashSet::from_iter([ // the body links wouldn't be present if the file was parsed strictly as XML @@ -407,7 +424,7 @@ mod tests { excluded_paths: None, }; - let links = collect(vec![input], None).await; + let links = collect(vec![input], None, None).await; let expected_urls = HashSet::from_iter([ website("https://github.com/lycheeverse/lychee/"), @@ -426,7 +443,7 @@ mod tests { file_type_hint: None, excluded_paths: None, }; - let links = collect(vec![input], None).await; + let links = collect(vec![input], None, None).await; let expected_links = HashSet::from_iter([mail("user@example.com")]); @@ -469,7 +486,7 @@ mod tests { }, ]; - let links = collect(inputs, None).await; + let links = collect(inputs, None, None).await; let expected_links = HashSet::from_iter([ website(&format!( @@ -503,7 +520,7 @@ mod tests { excluded_paths: None, }; - let links = collect(vec![input], Some(base)).await; + let links = collect(vec![input], None, Some(base)).await; let expected_links = HashSet::from_iter([ path("/path/to/root/index.html"), diff --git a/lychee-lib/src/types/base.rs b/lychee-lib/src/types/base.rs index aa716b3f26..fe21429326 100644 --- a/lychee-lib/src/types/base.rs +++ b/lychee-lib/src/types/base.rs @@ -15,8 +15,6 @@ pub enum Base { Local(PathBuf), /// Remote URL pointing to a website homepage Remote(Url), - /// Root path for checking absolute local links - RootPath(PathBuf), } impl Base { @@ -29,10 +27,6 @@ impl Base { let full_path = path.join(link); Url::from_file_path(full_path).ok() } - Self::RootPath(_path) => { - // this is unused currently because joining on RootPath is handled by create_uri_from_file_path - unreachable!() - } } } @@ -41,16 +35,10 @@ impl Base { pub(crate) fn dir(&self) -> Option { match self { Self::Remote(_) => None, - Self::Local(d) | Self::RootPath(d) => Some(d.clone()), + Self::Local(d) => Some(d.clone()), } } - /// Create a root path base - #[must_use] - pub fn create_root_path(value: &str) -> Base { - Self::RootPath(PathBuf::from(value)) - } - pub(crate) fn from_source(source: &InputSource) -> Option { match &source { InputSource::RemoteUrl(url) => { diff --git a/lychee-lib/src/utils/path.rs b/lychee-lib/src/utils/path.rs index 89abde1cc8..ca295ce6a3 100644 --- a/lychee-lib/src/utils/path.rs +++ b/lychee-lib/src/utils/path.rs @@ -40,7 +40,12 @@ fn dirname(src: &'_ Path) -> Option<&'_ Path> { /// Resolve `dst` that was linked to from within `src` /// /// Returns Ok(None) in case of an absolute local link without a `base_url` -pub(crate) fn resolve(src: &Path, dst: &Path, base: &Option) -> Result> { +pub(crate) fn resolve( + src: &Path, + dst: &PathBuf, + root_path: &Option, + base: &Option, +) -> Result> { let resolved = match dst { relative if !dst.starts_with("/") => { // Find `dst` in the parent directory of `src` @@ -50,19 +55,31 @@ pub(crate) fn resolve(src: &Path, dst: &Path, base: &Option) -> Result { - // Absolute local links (leading slash) require the `base_url` to - // define the document root. Silently ignore the link in case the - // `base_url` is not defined. - let Some(base) = get_base_dir(base) else { - return Ok(None); + let with_root_path = match root_path { + Some(root) => &join(root.to_path_buf(), absolute), + None => absolute, }; - let Some(dir) = dirname(&base) else { - return Err(ErrorKind::InvalidBase( - base.display().to_string(), - "The given directory cannot be a base".to_string(), - )); - }; - join(dir.to_path_buf(), absolute) + match get_base_dir(base) { + Some(base) => { + let Some(dir) = dirname(&base) else { + return Err(ErrorKind::InvalidBase( + base.display().to_string(), + "The given directory cannot be a base".to_string(), + )); + }; + join(dir.to_path_buf(), with_root_path) + } + None => { + if root_path.is_some() { + with_root_path.to_path_buf() + } else { + // Absolute local links (leading slash) require the `base_url` to + // define the document root. Silently ignore the link in case the + // `base_url` is not defined. + return Ok(None); + } + } + } } _ => return Err(ErrorKind::InvalidFile(dst.to_path_buf())), }; @@ -110,7 +127,7 @@ mod test_path { let dummy = PathBuf::from("index.html"); let abs_path = PathBuf::from("./foo.html"); assert_eq!( - resolve(&dummy, &abs_path, &None)?, + resolve(&dummy, &abs_path, &None, &None)?, Some(env::current_dir().unwrap().join("foo.html")) ); Ok(()) @@ -123,7 +140,7 @@ mod test_path { let dummy = PathBuf::from("./index.html"); let abs_path = PathBuf::from("./foo.html"); assert_eq!( - resolve(&dummy, &abs_path, &None)?, + resolve(&dummy, &abs_path, &None, &None)?, Some(env::current_dir().unwrap().join("foo.html")) ); Ok(()) @@ -136,7 +153,7 @@ mod test_path { let abs_index = PathBuf::from("/path/to/index.html"); let abs_path = PathBuf::from("./foo.html"); assert_eq!( - resolve(&abs_index, &abs_path, &None)?, + resolve(&abs_index, &abs_path, &None, &None)?, Some(PathBuf::from("/path/to/foo.html")) ); Ok(()) @@ -151,7 +168,7 @@ mod test_path { let abs_path = PathBuf::from("/foo.html"); let base = Some(Base::Local(PathBuf::from("/some/absolute/base/dir"))); assert_eq!( - resolve(&dummy, &abs_path, &base)?, + resolve(&dummy, &abs_path, &None, &base)?, Some(PathBuf::from("/some/absolute/base/dir/foo.html")) ); Ok(()) @@ -165,7 +182,7 @@ mod test_path { let abs_path = PathBuf::from("/other/path/to/foo.html"); let base = Some(Base::Local(PathBuf::from("/some/absolute/base/dir"))); assert_eq!( - resolve(&abs_index, &abs_path, &base)?, + resolve(&abs_index, &abs_path, &None, &base)?, Some(PathBuf::from( "/some/absolute/base/dir/other/path/to/foo.html" )) diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 72a59b701f..97db0ac8b8 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -25,10 +25,11 @@ fn extract_credentials( fn create_request( raw_uri: &RawUri, source: &InputSource, + root_path: &Option, base: &Option, extractor: &Option, ) -> Result { - let uri = try_parse_into_uri(raw_uri, source, base)?; + let uri = try_parse_into_uri(raw_uri, source, root_path, base)?; let source = truncate_source(source); let element = raw_uri.element.clone(); let attribute = raw_uri.attribute.clone(); @@ -48,19 +49,26 @@ fn create_request( /// to create a valid URI. /// - If a URI cannot be created from the file path. /// - If the source is not a file path (i.e. the URI type is not supported). -fn try_parse_into_uri(raw_uri: &RawUri, source: &InputSource, base: &Option) -> Result { +fn try_parse_into_uri( + raw_uri: &RawUri, + source: &InputSource, + root_path: &Option, + base: &Option, +) -> Result { let text = raw_uri.text.clone(); let uri = match Uri::try_from(raw_uri.clone()) { Ok(uri) => uri, Err(_) => match base { - Some(Base::RootPath(_)) | None => match source { - InputSource::FsPath(root) => create_uri_from_file_path(root, &text, base)?, - _ => return Err(ErrorKind::UnsupportedUriType(text)), - }, Some(base_url) => match base_url.join(&text) { Some(url) => Uri { url }, None => return Err(ErrorKind::InvalidBaseJoin(text.clone())), }, + None => match source { + InputSource::FsPath(root) => { + create_uri_from_file_path(root, &text, root_path, base)? + } + _ => return Err(ErrorKind::UnsupportedUriType(text)), + }, }, }; Ok(uri) @@ -81,6 +89,7 @@ pub(crate) fn is_anchor(text: &str) -> bool { fn create_uri_from_file_path( file_path: &Path, link_text: &str, + root_path: &Option, base: &Option, ) -> Result { let target_path = if is_anchor(link_text) { @@ -94,7 +103,8 @@ fn create_uri_from_file_path( } else { link_text.to_string() }; - let Ok(constructed_url) = resolve_and_create_url(file_path, &target_path, base) else { + let Ok(constructed_url) = resolve_and_create_url(file_path, &target_path, root_path, base) + else { return Err(ErrorKind::InvalidPathToUri(target_path)); }; Ok(Uri { @@ -126,21 +136,22 @@ fn truncate_source(source: &InputSource) -> InputSource { pub(crate) fn create( uris: Vec, source: &InputSource, + root_path: &Option, base: &Option, extractor: &Option, ) -> HashSet { let base = base.clone().or_else(|| Base::from_source(&source)); uris.into_iter() - .filter_map( - |raw_uri| match create_request(&raw_uri, &source, &base, extractor) { + .filter_map(|raw_uri| { + match create_request(&raw_uri, &source, &root_path, &base, extractor) { Ok(request) => Some(request), Err(e) => { warn!("Error creating request: {:?}", e); None } - }, - ) + } + }) .collect() } @@ -158,6 +169,7 @@ pub(crate) fn create( fn resolve_and_create_url( src_path: &Path, dest_path: &str, + root_path: &Option, base_uri: &Option, ) -> Result { let (dest_path, fragment) = url::remove_get_params_and_separate_fragment(dest_path); @@ -166,8 +178,12 @@ fn resolve_and_create_url( // This addresses the issue mentioned in the original comment about double-encoding let decoded_dest = percent_decode_str(dest_path).decode_utf8()?; - let Ok(Some(resolved_path)) = path::resolve(src_path, &PathBuf::from(&*decoded_dest), base_uri) - else { + let Ok(Some(resolved_path)) = path::resolve( + src_path, + &PathBuf::from(&*decoded_dest), + root_path, + base_uri, + ) else { return Err(ErrorKind::InvalidPathToUri(decoded_dest.to_string())); }; @@ -192,7 +208,8 @@ mod tests { #[test] fn test_create_uri_from_path() { let result = - resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", &None).unwrap(); + resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", &None, &None) + .unwrap(); assert_eq!(result.as_str(), "file:///test+encoding"); } @@ -202,7 +219,7 @@ mod tests { let source = InputSource::String(String::new()); let uris = vec![RawUri::from("relative.html")]; - let requests = create(uris, &source, &base, &None); + let requests = create(uris, &source, &None, &base, &None); assert_eq!(requests.len(), 1); assert!(requests @@ -216,7 +233,7 @@ mod tests { let source = InputSource::String(String::new()); let uris = vec![RawUri::from("https://another.com/page")]; - let requests = create(uris, &source, &base, &None); + let requests = create(uris, &source, &None, &base, &None); assert_eq!(requests.len(), 1); assert!(requests @@ -230,7 +247,7 @@ mod tests { let source = InputSource::String(String::new()); let uris = vec![RawUri::from("/root-relative")]; - let requests = create(uris, &source, &base, &None); + let requests = create(uris, &source, &None, &base, &None); assert_eq!(requests.len(), 1); assert!(requests @@ -244,7 +261,7 @@ mod tests { let source = InputSource::String(String::new()); let uris = vec![RawUri::from("../parent")]; - let requests = create(uris, &source, &base, &None); + let requests = create(uris, &source, &None, &base, &None); assert_eq!(requests.len(), 1); assert!(requests @@ -258,7 +275,7 @@ mod tests { let source = InputSource::String(String::new()); let uris = vec![RawUri::from("#fragment")]; - let requests = create(uris, &source, &base, &None); + let requests = create(uris, &source, &None, &base, &None); assert_eq!(requests.len(), 1); assert!(requests @@ -268,11 +285,11 @@ mod tests { #[test] fn test_relative_url_resolution_from_root_path() { - let base = Some(Base::create_root_path("/tmp/lychee")); + let root_path = Some(PathBuf::from("/tmp/lychee")); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("relative.html")]; - let requests = create(uris, &source, &base, &None); + let requests = create(uris, &source, &root_path, &None, &None); assert_eq!(requests.len(), 1); assert!(requests @@ -282,11 +299,11 @@ mod tests { #[test] fn test_absolute_url_resolution_from_root_path() { - let base = Some(Base::create_root_path("/tmp/lychee")); + let root_path = Some(PathBuf::from("/tmp/lychee")); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("https://another.com/page")]; - let requests = create(uris, &source, &base, &None); + let requests = create(uris, &source, &root_path, &None, &None); assert_eq!(requests.len(), 1); assert!(requests @@ -296,11 +313,11 @@ mod tests { #[test] fn test_root_relative_url_resolution_from_root_path() { - let base = Some(Base::create_root_path("/tmp/lychee")); + let root_path = Some(PathBuf::from("/tmp/lychee")); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("/root-relative")]; - let requests = create(uris, &source, &base, &None); + let requests = create(uris, &source, &root_path, &None, &None); assert_eq!(requests.len(), 1); assert!(requests @@ -310,11 +327,11 @@ mod tests { #[test] fn test_parent_directory_url_resolution_from_root_path() { - let base = Some(Base::create_root_path("/tmp/lychee")); + let root_path = Some(PathBuf::from("/tmp/lychee")); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("../parent")]; - let requests = create(uris, &source, &base, &None); + let requests = create(uris, &source, &root_path, &None, &None); assert_eq!(requests.len(), 1); assert!(requests @@ -324,11 +341,11 @@ mod tests { #[test] fn test_fragment_url_resolution_from_root_path() { - let base = Some(Base::create_root_path("/tmp/lychee")); + let root_path = Some(PathBuf::from("/tmp/lychee")); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("#fragment")]; - let requests = create(uris, &source, &base, &None); + let requests = create(uris, &source, &root_path, &None, &None); assert_eq!(requests.len(), 1); assert!(requests @@ -342,7 +359,7 @@ mod tests { let source = InputSource::String(String::new()); let uris = vec![RawUri::from("https://example.com/page")]; - let requests = create(uris, &source, &base, &None); + let requests = create(uris, &source, &None, &base, &None); assert_eq!(requests.len(), 1); assert!(requests @@ -355,8 +372,14 @@ mod tests { let base = Some(Base::Local(PathBuf::from("/tmp/lychee"))); let input_source = InputSource::FsPath(PathBuf::from("page.html")); - let actual = - create_request(&RawUri::from("file.html"), &input_source, &base, &None).unwrap(); + let actual = create_request( + &RawUri::from("file.html"), + &input_source, + &None, + &base, + &None, + ) + .unwrap(); assert_eq!( actual, @@ -381,6 +404,7 @@ mod tests { let actual = create_request( &RawUri::from("/usr/local/share/doc/example.html"), &input_source, + &None, &base, &None, ) @@ -406,7 +430,7 @@ mod tests { let source = InputSource::String(String::new()); let raw_uri = RawUri::from("relative.html"); - let uri = try_parse_into_uri(&raw_uri, &source, &base).unwrap(); + let uri = try_parse_into_uri(&raw_uri, &source, &None, &base).unwrap(); assert_eq!(uri.url.as_str(), "file:///tmp/lychee/relative.html"); } @@ -417,7 +441,7 @@ mod tests { let source = InputSource::String(String::new()); let raw_uri = RawUri::from("absolute.html"); - let uri = try_parse_into_uri(&raw_uri, &source, &base).unwrap(); + let uri = try_parse_into_uri(&raw_uri, &source, &None, &base).unwrap(); assert_eq!(uri.url.as_str(), "file:///tmp/lychee/absolute.html"); } From 24c2e12ff97cc2ddd1a9a6c8de7fbf6283330a45 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Sat, 30 Nov 2024 21:34:02 -0800 Subject: [PATCH 12/26] Revert a comment change that is no longer applicable --- lychee-lib/src/types/base.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lychee-lib/src/types/base.rs b/lychee-lib/src/types/base.rs index fe21429326..b7b76c7e5b 100644 --- a/lychee-lib/src/types/base.rs +++ b/lychee-lib/src/types/base.rs @@ -30,7 +30,7 @@ impl Base { } } - /// Return the directory if the base is not remote + /// Return the directory if the base is local #[must_use] pub(crate) fn dir(&self) -> Option { match self { From 4d3ef2b4e0ee2e4e8c761356691b2dc4b5d7ff41 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Sat, 30 Nov 2024 21:44:32 -0800 Subject: [PATCH 13/26] Remove unused code --- lychee-lib/src/types/base.rs | 9 ---- lychee-lib/src/utils/path.rs | 80 +++++++-------------------------- lychee-lib/src/utils/request.rs | 21 +++------ 3 files changed, 21 insertions(+), 89 deletions(-) diff --git a/lychee-lib/src/types/base.rs b/lychee-lib/src/types/base.rs index b7b76c7e5b..4c68900c18 100644 --- a/lychee-lib/src/types/base.rs +++ b/lychee-lib/src/types/base.rs @@ -30,15 +30,6 @@ impl Base { } } - /// Return the directory if the base is local - #[must_use] - pub(crate) fn dir(&self) -> Option { - match self { - Self::Remote(_) => None, - Self::Local(d) => Some(d.clone()), - } - } - pub(crate) fn from_source(source: &InputSource) -> Option { match &source { InputSource::RemoteUrl(url) => { diff --git a/lychee-lib/src/utils/path.rs b/lychee-lib/src/utils/path.rs index ca295ce6a3..83823293ff 100644 --- a/lychee-lib/src/utils/path.rs +++ b/lychee-lib/src/utils/path.rs @@ -1,4 +1,4 @@ -use crate::{Base, ErrorKind, Result}; +use crate::{ErrorKind, Result}; use cached::proc_macro::cached; use once_cell::sync::Lazy; use path_clean::PathClean; @@ -9,11 +9,6 @@ use std::path::{Path, PathBuf}; static CURRENT_DIR: Lazy = Lazy::new(|| env::current_dir().expect("cannot get current dir from environment")); -/// Returns the base if it is a valid `PathBuf` -fn get_base_dir(base: &Option) -> Option { - base.as_ref().and_then(Base::dir) -} - /// Create an absolute path out of a `PathBuf`. /// /// The `clean` method is relatively expensive @@ -44,7 +39,6 @@ pub(crate) fn resolve( src: &Path, dst: &PathBuf, root_path: &Option, - base: &Option, ) -> Result> { let resolved = match dst { relative if !dst.starts_with("/") => { @@ -55,31 +49,18 @@ pub(crate) fn resolve( parent.join(relative) } absolute if dst.starts_with("/") => { - let with_root_path = match root_path { - Some(root) => &join(root.to_path_buf(), absolute), - None => absolute, + // Absolute local links (leading slash) are ignored unless + // root_path is provided + let Some(base) = root_path else { + return Ok(None); + }; + let Some(dir) = dirname(&base) else { + return Err(ErrorKind::InvalidBase( + base.display().to_string(), + "The given directory cannot be a base".to_string(), + )); }; - match get_base_dir(base) { - Some(base) => { - let Some(dir) = dirname(&base) else { - return Err(ErrorKind::InvalidBase( - base.display().to_string(), - "The given directory cannot be a base".to_string(), - )); - }; - join(dir.to_path_buf(), with_root_path) - } - None => { - if root_path.is_some() { - with_root_path.to_path_buf() - } else { - // Absolute local links (leading slash) require the `base_url` to - // define the document root. Silently ignore the link in case the - // `base_url` is not defined. - return Ok(None); - } - } - } + join(dir.to_path_buf(), absolute) } _ => return Err(ErrorKind::InvalidFile(dst.to_path_buf())), }; @@ -127,7 +108,7 @@ mod test_path { let dummy = PathBuf::from("index.html"); let abs_path = PathBuf::from("./foo.html"); assert_eq!( - resolve(&dummy, &abs_path, &None, &None)?, + resolve(&dummy, &abs_path, &None)?, Some(env::current_dir().unwrap().join("foo.html")) ); Ok(()) @@ -140,7 +121,7 @@ mod test_path { let dummy = PathBuf::from("./index.html"); let abs_path = PathBuf::from("./foo.html"); assert_eq!( - resolve(&dummy, &abs_path, &None, &None)?, + resolve(&dummy, &abs_path, &None)?, Some(env::current_dir().unwrap().join("foo.html")) ); Ok(()) @@ -153,43 +134,12 @@ mod test_path { let abs_index = PathBuf::from("/path/to/index.html"); let abs_path = PathBuf::from("./foo.html"); assert_eq!( - resolve(&abs_index, &abs_path, &None, &None)?, + resolve(&abs_index, &abs_path, &None)?, Some(PathBuf::from("/path/to/foo.html")) ); Ok(()) } - // dummy - // foo.html - // valid base dir - #[test] - fn test_resolve_absolute_from_base_dir() -> Result<()> { - let dummy = PathBuf::new(); - let abs_path = PathBuf::from("/foo.html"); - let base = Some(Base::Local(PathBuf::from("/some/absolute/base/dir"))); - assert_eq!( - resolve(&dummy, &abs_path, &None, &base)?, - Some(PathBuf::from("/some/absolute/base/dir/foo.html")) - ); - Ok(()) - } - - // /path/to/index.html - // /other/path/to/foo.html - #[test] - fn test_resolve_absolute_from_absolute() -> Result<()> { - let abs_index = PathBuf::from("/path/to/index.html"); - let abs_path = PathBuf::from("/other/path/to/foo.html"); - let base = Some(Base::Local(PathBuf::from("/some/absolute/base/dir"))); - assert_eq!( - resolve(&abs_index, &abs_path, &None, &base)?, - Some(PathBuf::from( - "/some/absolute/base/dir/other/path/to/foo.html" - )) - ); - Ok(()) - } - #[test] fn test_contains() { let parent_dir = tempfile::tempdir().unwrap(); diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 97db0ac8b8..ef8d0b4d6b 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -64,9 +64,7 @@ fn try_parse_into_uri( None => return Err(ErrorKind::InvalidBaseJoin(text.clone())), }, None => match source { - InputSource::FsPath(root) => { - create_uri_from_file_path(root, &text, root_path, base)? - } + InputSource::FsPath(root) => create_uri_from_file_path(root, &text, root_path)?, _ => return Err(ErrorKind::UnsupportedUriType(text)), }, }, @@ -90,7 +88,6 @@ fn create_uri_from_file_path( file_path: &Path, link_text: &str, root_path: &Option, - base: &Option, ) -> Result { let target_path = if is_anchor(link_text) { // For anchors, we need to append the anchor to the file name. @@ -103,8 +100,7 @@ fn create_uri_from_file_path( } else { link_text.to_string() }; - let Ok(constructed_url) = resolve_and_create_url(file_path, &target_path, root_path, base) - else { + let Ok(constructed_url) = resolve_and_create_url(file_path, &target_path, root_path) else { return Err(ErrorKind::InvalidPathToUri(target_path)); }; Ok(Uri { @@ -170,7 +166,6 @@ fn resolve_and_create_url( src_path: &Path, dest_path: &str, root_path: &Option, - base_uri: &Option, ) -> Result { let (dest_path, fragment) = url::remove_get_params_and_separate_fragment(dest_path); @@ -178,12 +173,9 @@ fn resolve_and_create_url( // This addresses the issue mentioned in the original comment about double-encoding let decoded_dest = percent_decode_str(dest_path).decode_utf8()?; - let Ok(Some(resolved_path)) = path::resolve( - src_path, - &PathBuf::from(&*decoded_dest), - root_path, - base_uri, - ) else { + let Ok(Some(resolved_path)) = + path::resolve(src_path, &PathBuf::from(&*decoded_dest), root_path) + else { return Err(ErrorKind::InvalidPathToUri(decoded_dest.to_string())); }; @@ -208,8 +200,7 @@ mod tests { #[test] fn test_create_uri_from_path() { let result = - resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", &None, &None) - .unwrap(); + resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", &None).unwrap(); assert_eq!(result.as_str(), "file:///test+encoding"); } From 196a44102210684b901a2d049770af6336cbf3e3 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Sun, 1 Dec 2024 10:59:34 -0800 Subject: [PATCH 14/26] Fix and simplification --- lychee-lib/src/utils/path.rs | 37 +++++++-------------------------- lychee-lib/src/utils/request.rs | 31 +++++++++++++++++++-------- 2 files changed, 29 insertions(+), 39 deletions(-) diff --git a/lychee-lib/src/utils/path.rs b/lychee-lib/src/utils/path.rs index 83823293ff..58518f31e3 100644 --- a/lychee-lib/src/utils/path.rs +++ b/lychee-lib/src/utils/path.rs @@ -24,21 +24,13 @@ pub(crate) fn absolute_path(path: PathBuf) -> PathBuf { .clean() } -/// Get the directory name of a given `Path`. -fn dirname(src: &'_ Path) -> Option<&'_ Path> { - if src.is_file() { - return src.parent(); - } - Some(src) -} - /// Resolve `dst` that was linked to from within `src` /// /// Returns Ok(None) in case of an absolute local link without a `base_url` pub(crate) fn resolve( src: &Path, dst: &PathBuf, - root_path: &Option, + ignore_absolute_local_links: bool, ) -> Result> { let resolved = match dst { relative if !dst.starts_with("/") => { @@ -51,31 +43,16 @@ pub(crate) fn resolve( absolute if dst.starts_with("/") => { // Absolute local links (leading slash) are ignored unless // root_path is provided - let Some(base) = root_path else { + if ignore_absolute_local_links { return Ok(None); - }; - let Some(dir) = dirname(&base) else { - return Err(ErrorKind::InvalidBase( - base.display().to_string(), - "The given directory cannot be a base".to_string(), - )); - }; - join(dir.to_path_buf(), absolute) + } + PathBuf::from(absolute) } _ => return Err(ErrorKind::InvalidFile(dst.to_path_buf())), }; Ok(Some(absolute_path(resolved))) } -/// A cumbersome way to concatenate paths without checking their -/// existence on disk. See -fn join(base: PathBuf, dst: &Path) -> PathBuf { - let mut abs = base.into_os_string(); - let target_str = dst.as_os_str(); - abs.push(target_str); - PathBuf::from(abs) -} - /// Check if `child` is a subdirectory/file inside `parent` /// /// Note that `contains(parent, parent)` will return `true` @@ -108,7 +85,7 @@ mod test_path { let dummy = PathBuf::from("index.html"); let abs_path = PathBuf::from("./foo.html"); assert_eq!( - resolve(&dummy, &abs_path, &None)?, + resolve(&dummy, &abs_path, true)?, Some(env::current_dir().unwrap().join("foo.html")) ); Ok(()) @@ -121,7 +98,7 @@ mod test_path { let dummy = PathBuf::from("./index.html"); let abs_path = PathBuf::from("./foo.html"); assert_eq!( - resolve(&dummy, &abs_path, &None)?, + resolve(&dummy, &abs_path, true)?, Some(env::current_dir().unwrap().join("foo.html")) ); Ok(()) @@ -134,7 +111,7 @@ mod test_path { let abs_index = PathBuf::from("/path/to/index.html"); let abs_path = PathBuf::from("./foo.html"); assert_eq!( - resolve(&abs_index, &abs_path, &None)?, + resolve(&abs_index, &abs_path, true)?, Some(PathBuf::from("/path/to/foo.html")) ); Ok(()) diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index ef8d0b4d6b..55178b54d5 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -55,7 +55,14 @@ fn try_parse_into_uri( root_path: &Option, base: &Option, ) -> Result { - let text = raw_uri.text.clone(); + let mut text = raw_uri.text.clone(); + if text.starts_with('/') { + if let Some(path) = root_path { + if let Some(path_str) = path.to_str() { + text = format!("{path_str}{text}"); + } + } + } let uri = match Uri::try_from(raw_uri.clone()) { Ok(uri) => uri, Err(_) => match base { @@ -64,7 +71,9 @@ fn try_parse_into_uri( None => return Err(ErrorKind::InvalidBaseJoin(text.clone())), }, None => match source { - InputSource::FsPath(root) => create_uri_from_file_path(root, &text, root_path)?, + InputSource::FsPath(root) => { + create_uri_from_file_path(root, &text, root_path.is_none())? + } _ => return Err(ErrorKind::UnsupportedUriType(text)), }, }, @@ -87,7 +96,7 @@ pub(crate) fn is_anchor(text: &str) -> bool { fn create_uri_from_file_path( file_path: &Path, link_text: &str, - root_path: &Option, + ignore_absolute_local_links: bool, ) -> Result { let target_path = if is_anchor(link_text) { // For anchors, we need to append the anchor to the file name. @@ -100,7 +109,9 @@ fn create_uri_from_file_path( } else { link_text.to_string() }; - let Ok(constructed_url) = resolve_and_create_url(file_path, &target_path, root_path) else { + let Ok(constructed_url) = + resolve_and_create_url(file_path, &target_path, ignore_absolute_local_links) + else { return Err(ErrorKind::InvalidPathToUri(target_path)); }; Ok(Uri { @@ -165,7 +176,7 @@ pub(crate) fn create( fn resolve_and_create_url( src_path: &Path, dest_path: &str, - root_path: &Option, + ignore_absolute_local_links: bool, ) -> Result { let (dest_path, fragment) = url::remove_get_params_and_separate_fragment(dest_path); @@ -173,9 +184,11 @@ fn resolve_and_create_url( // This addresses the issue mentioned in the original comment about double-encoding let decoded_dest = percent_decode_str(dest_path).decode_utf8()?; - let Ok(Some(resolved_path)) = - path::resolve(src_path, &PathBuf::from(&*decoded_dest), root_path) - else { + let Ok(Some(resolved_path)) = path::resolve( + src_path, + &PathBuf::from(&*decoded_dest), + ignore_absolute_local_links, + ) else { return Err(ErrorKind::InvalidPathToUri(decoded_dest.to_string())); }; @@ -200,7 +213,7 @@ mod tests { #[test] fn test_create_uri_from_path() { let result = - resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", &None).unwrap(); + resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", true).unwrap(); assert_eq!(result.as_str(), "file:///test+encoding"); } From 1ce48cf546e328ccf9f6fb662a504a72fe9e9bed Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Sun, 1 Dec 2024 10:15:56 -0800 Subject: [PATCH 15/26] Integration test both at the same time --- lychee-bin/tests/cli.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index c8219d58a5..85a8bee3e0 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -411,6 +411,24 @@ mod cli { .stdout(contains("2 Errors")); } + #[test] + fn test_resolve_paths_from_root_path_and_base_url() { + let mut cmd = main_command(); + let dir = fixtures_path(); + + cmd.arg("--offline") + .arg("--root-path") + .arg("/resolve_paths") + .arg("--base") + .arg(&dir) + .arg(dir.join("resolve_paths").join("index.html")) + .env_clear() + .assert() + .success() + .stdout(contains("3 Total")) + .stdout(contains("3 OK")); + } + #[test] fn test_youtube_quirk() { let url = "https://www.youtube.com/watch?v=NlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7"; From 393919ed96788bb43df2359e6cb2bbe1aebdc6e0 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Sun, 1 Dec 2024 10:23:29 -0800 Subject: [PATCH 16/26] Unit tests both at the same time --- lychee-lib/src/utils/request.rs | 75 +++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 55178b54d5..b7023b9bff 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -357,6 +357,81 @@ mod tests { .any(|r| r.uri.url.as_str() == "file:///some/page.html#fragment")); } + #[test] + fn test_relative_url_resolution_from_root_path_and_base_url() { + let root_path = Some(PathBuf::from("/tmp/lychee")); + let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("relative.html")]; + let requests = create(uris, &source, &root_path, &base, &None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "https://example.com/path/relative.html")); + } + + #[test] + fn test_absolute_url_resolution_from_root_path_and_base_url() { + let root_path = Some(PathBuf::from("/tmp/lychee")); + let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("https://another.com/page")]; + let requests = create(uris, &source, &root_path, &base, &None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "https://another.com/page")); + } + + #[test] + fn test_root_relative_url_resolution_from_root_path_and_base_url() { + let root_path = Some(PathBuf::from("/tmp/lychee")); + let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("/root-relative")]; + let requests = create(uris, &source, &root_path, &base, &None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "https://example.com/tmp/lychee/root-relative")); + } + + #[test] + fn test_parent_directory_url_resolution_from_root_path_and_base_url() { + let root_path = Some(PathBuf::from("/tmp/lychee")); + let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("../parent")]; + let requests = create(uris, &source, &root_path, &base, &None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "https://example.com/parent")); + } + + #[test] + fn test_fragment_url_resolution_from_root_path_and_base_url() { + let root_path = Some(PathBuf::from("/tmp/lychee")); + let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("#fragment")]; + let requests = create(uris, &source, &root_path, &base, &None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "https://example.com/path/page.html#fragment")); + } + #[test] fn test_no_base_url_resolution() { let base = None; From 55a8198e337e185ae0bf8cb01f193bd6b34fb246 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Sun, 1 Dec 2024 11:24:01 -0800 Subject: [PATCH 17/26] Remove now redundant comment --- lychee-lib/src/utils/path.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/lychee-lib/src/utils/path.rs b/lychee-lib/src/utils/path.rs index 58518f31e3..979d2b26e6 100644 --- a/lychee-lib/src/utils/path.rs +++ b/lychee-lib/src/utils/path.rs @@ -41,8 +41,6 @@ pub(crate) fn resolve( parent.join(relative) } absolute if dst.starts_with("/") => { - // Absolute local links (leading slash) are ignored unless - // root_path is provided if ignore_absolute_local_links { return Ok(None); } From 70fa35bab333edb7d8ee4904319da3659461d840 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Mon, 2 Dec 2024 13:01:49 -0800 Subject: [PATCH 18/26] Revert windows-specific change, seems not needed after recent changes --- lychee-lib/src/utils/path.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lychee-lib/src/utils/path.rs b/lychee-lib/src/utils/path.rs index 979d2b26e6..3d86259157 100644 --- a/lychee-lib/src/utils/path.rs +++ b/lychee-lib/src/utils/path.rs @@ -33,14 +33,14 @@ pub(crate) fn resolve( ignore_absolute_local_links: bool, ) -> Result> { let resolved = match dst { - relative if !dst.starts_with("/") => { + relative if dst.is_relative() => { // Find `dst` in the parent directory of `src` let Some(parent) = src.parent() else { return Err(ErrorKind::InvalidFile(relative.to_path_buf())); }; parent.join(relative) } - absolute if dst.starts_with("/") => { + absolute if dst.is_absolute() => { if ignore_absolute_local_links { return Ok(None); } From ae0ed421b7956f2b9933d440054c25fc4946a5a1 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Mon, 2 Dec 2024 18:21:32 -0800 Subject: [PATCH 19/26] Use Collector::default() --- examples/collect_links/collect_links.rs | 2 +- lychee-lib/src/collector.rs | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/examples/collect_links/collect_links.rs b/examples/collect_links/collect_links.rs index 3a2ab520e7..375dad3e30 100644 --- a/examples/collect_links/collect_links.rs +++ b/examples/collect_links/collect_links.rs @@ -21,7 +21,7 @@ async fn main() -> Result<()> { }, ]; - let links = Collector::new(None, None) // root_path and base + let links = Collector::default() // root_path and base .skip_missing_inputs(false) // don't skip missing inputs? (default=false) .skip_hidden(false) // skip hidden files? (default=true) .skip_ignored(false) // skip files that are ignored by git? (default=true) diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index c21dd67999..0cde843d8d 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -26,6 +26,21 @@ pub struct Collector { base: Option, } +impl Default for Collector { + fn default() -> Self { + Collector { + basic_auth_extractor: None, + skip_missing_inputs: false, + include_verbatim: false, + use_html5ever: false, + skip_hidden: true, + skip_ignored: true, + root_path: None, + base: None, + } + } +} + impl Collector { /// Create a new collector with an empty cache #[must_use] From 3331cdacc7a4bbc219f3ddd5d7fd086aa015d0f1 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Mon, 2 Dec 2024 18:45:56 -0800 Subject: [PATCH 20/26] extract method and unit tests --- lychee-lib/src/utils/request.rs | 52 ++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index b7023b9bff..d5e002f162 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -55,14 +55,7 @@ fn try_parse_into_uri( root_path: &Option, base: &Option, ) -> Result { - let mut text = raw_uri.text.clone(); - if text.starts_with('/') { - if let Some(path) = root_path { - if let Some(path_str) = path.to_str() { - text = format!("{path_str}{text}"); - } - } - } + let text = prepend_root_path_if_absolute_local_link(&raw_uri.text, root_path); let uri = match Uri::try_from(raw_uri.clone()) { Ok(uri) => uri, Err(_) => match base { @@ -200,6 +193,17 @@ fn resolve_and_create_url( Ok(url) } +fn prepend_root_path_if_absolute_local_link(text: &str, root_path: &Option) -> String { + if text.starts_with('/') { + if let Some(path) = root_path { + if let Some(path_str) = path.to_str() { + return format!("{}{}", path_str, text); + } + } + } + text.to_string() +} + #[cfg(test)] mod tests { use super::*; @@ -524,4 +528,36 @@ mod tests { assert_eq!(uri.url.as_str(), "file:///tmp/lychee/absolute.html"); } + + #[test] + fn test_prepend_with_absolute_local_link_and_root_path() { + let text = "/absolute/path"; + let root_path = Some(PathBuf::from("/root")); + let result = prepend_root_path_if_absolute_local_link(text, &root_path); + assert_eq!(result, "/root/absolute/path"); + } + + #[test] + fn test_prepend_with_absolute_local_link_and_no_root_path() { + let text = "/absolute/path"; + let root_path: Option = None; + let result = prepend_root_path_if_absolute_local_link(text, &root_path); + assert_eq!(result, "/absolute/path"); + } + + #[test] + fn test_prepend_with_relative_link_and_root_path() { + let text = "relative/path"; + let root_path = Some(PathBuf::from("/root")); + let result = prepend_root_path_if_absolute_local_link(text, &root_path); + assert_eq!(result, "relative/path"); + } + + #[test] + fn test_prepend_with_relative_link_and_no_root_path() { + let text = "relative/path"; + let root_path = None; + let result = prepend_root_path_if_absolute_local_link(text, &root_path); + assert_eq!(result, "relative/path"); + } } From fb5aff538dbde260e9bb51f0fdca89847c4b95e6 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Mon, 2 Dec 2024 18:51:12 -0800 Subject: [PATCH 21/26] clippy --- lychee-lib/src/types/file.rs | 1 - lychee-lib/src/utils/path.rs | 4 ++-- lychee-lib/src/utils/request.rs | 12 ++++++------ 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/lychee-lib/src/types/file.rs b/lychee-lib/src/types/file.rs index 7cdeff6c80..d2f8631c6c 100644 --- a/lychee-lib/src/types/file.rs +++ b/lychee-lib/src/types/file.rs @@ -54,7 +54,6 @@ impl> From

for FileType { } /// Helper function to check if a path is likely a URL. - fn is_url(path: &Path) -> bool { path.to_str() .and_then(|s| Url::parse(s).ok()) diff --git a/lychee-lib/src/utils/path.rs b/lychee-lib/src/utils/path.rs index 3d86259157..daa4f7fde6 100644 --- a/lychee-lib/src/utils/path.rs +++ b/lychee-lib/src/utils/path.rs @@ -36,7 +36,7 @@ pub(crate) fn resolve( relative if dst.is_relative() => { // Find `dst` in the parent directory of `src` let Some(parent) = src.parent() else { - return Err(ErrorKind::InvalidFile(relative.to_path_buf())); + return Err(ErrorKind::InvalidFile(relative.clone())); }; parent.join(relative) } @@ -46,7 +46,7 @@ pub(crate) fn resolve( } PathBuf::from(absolute) } - _ => return Err(ErrorKind::InvalidFile(dst.to_path_buf())), + _ => return Err(ErrorKind::InvalidFile(dst.clone())), }; Ok(Some(absolute_path(resolved))) } diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index d5e002f162..7f45ed8e7a 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -140,18 +140,18 @@ pub(crate) fn create( base: &Option, extractor: &Option, ) -> HashSet { - let base = base.clone().or_else(|| Base::from_source(&source)); + let base = base.clone().or_else(|| Base::from_source(source)); uris.into_iter() - .filter_map(|raw_uri| { - match create_request(&raw_uri, &source, &root_path, &base, extractor) { + .filter_map( + |raw_uri| match create_request(&raw_uri, source, root_path, &base, extractor) { Ok(request) => Some(request), Err(e) => { warn!("Error creating request: {:?}", e); None } - } - }) + }, + ) .collect() } @@ -197,7 +197,7 @@ fn prepend_root_path_if_absolute_local_link(text: &str, root_path: &Option Date: Mon, 2 Dec 2024 19:26:21 -0800 Subject: [PATCH 22/26] clippy: &Option -> Option<&A> --- lychee-bin/src/commands/check.rs | 8 +- lychee-lib/src/collector.rs | 6 +- lychee-lib/src/utils/request.rs | 141 +++++++++++++++---------------- 3 files changed, 76 insertions(+), 79 deletions(-) diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index 3551c4c9c8..5c0614b179 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -192,7 +192,7 @@ async fn progress_bar_task( while let Some(response) = recv_resp.recv().await { show_progress( &mut io::stderr(), - &pb, + pb.as_ref(), &response, formatter.as_ref(), &verbose, @@ -331,7 +331,7 @@ fn ignore_cache(uri: &Uri, status: &Status, cache_exclude_status: &HashSet) fn show_progress( output: &mut dyn Write, - progress_bar: &Option, + progress_bar: Option<&ProgressBar>, response: &Response, formatter: &dyn ResponseFormatter, verbose: &Verbosity, @@ -401,7 +401,7 @@ mod tests { let formatter = get_response_formatter(&options::OutputMode::Plain); show_progress( &mut buf, - &None, + None, &response, formatter.as_ref(), &Verbosity::default(), @@ -423,7 +423,7 @@ mod tests { let formatter = get_response_formatter(&options::OutputMode::Plain); show_progress( &mut buf, - &None, + None, &response, formatter.as_ref(), &Verbosity::debug(), diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index 0cde843d8d..dc6826fccb 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -146,9 +146,9 @@ impl Collector { let requests = request::create( uris, &content.source, - &root_path, - &base, - &basic_auth_extractor, + root_path.as_ref(), + base.as_ref(), + basic_auth_extractor.as_ref(), ); Result::Ok(stream::iter(requests.into_iter().map(Ok))) } diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 7f45ed8e7a..5df498fdd5 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -15,7 +15,7 @@ use crate::{ /// Extract basic auth credentials for a given URL. fn extract_credentials( - extractor: &Option, + extractor: Option<&BasicAuthExtractor>, uri: &Uri, ) -> Option { extractor.as_ref().and_then(|ext| ext.matches(uri)) @@ -25,9 +25,9 @@ fn extract_credentials( fn create_request( raw_uri: &RawUri, source: &InputSource, - root_path: &Option, - base: &Option, - extractor: &Option, + root_path: Option<&PathBuf>, + base: Option<&Base>, + extractor: Option<&BasicAuthExtractor>, ) -> Result { let uri = try_parse_into_uri(raw_uri, source, root_path, base)?; let source = truncate_source(source); @@ -52,8 +52,8 @@ fn create_request( fn try_parse_into_uri( raw_uri: &RawUri, source: &InputSource, - root_path: &Option, - base: &Option, + root_path: Option<&PathBuf>, + base: Option<&Base>, ) -> Result { let text = prepend_root_path_if_absolute_local_link(&raw_uri.text, root_path); let uri = match Uri::try_from(raw_uri.clone()) { @@ -136,22 +136,22 @@ fn truncate_source(source: &InputSource) -> InputSource { pub(crate) fn create( uris: Vec, source: &InputSource, - root_path: &Option, - base: &Option, - extractor: &Option, + root_path: Option<&PathBuf>, + base: Option<&Base>, + extractor: Option<&BasicAuthExtractor>, ) -> HashSet { - let base = base.clone().or_else(|| Base::from_source(source)); + let base = base.cloned().or_else(|| Base::from_source(source)); uris.into_iter() - .filter_map( - |raw_uri| match create_request(&raw_uri, source, root_path, &base, extractor) { + .filter_map(|raw_uri| { + match create_request(&raw_uri, source, root_path, base.as_ref(), extractor) { Ok(request) => Some(request), Err(e) => { warn!("Error creating request: {:?}", e); None } - }, - ) + } + }) .collect() } @@ -193,7 +193,7 @@ fn resolve_and_create_url( Ok(url) } -fn prepend_root_path_if_absolute_local_link(text: &str, root_path: &Option) -> String { +fn prepend_root_path_if_absolute_local_link(text: &str, root_path: Option<&PathBuf>) -> String { if text.starts_with('/') { if let Some(path) = root_path { if let Some(path_str) = path.to_str() { @@ -223,11 +223,11 @@ mod tests { #[test] fn test_relative_url_resolution() { - let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); let source = InputSource::String(String::new()); let uris = vec![RawUri::from("relative.html")]; - let requests = create(uris, &source, &None, &base, &None); + let requests = create(uris, &source, None, Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -237,11 +237,11 @@ mod tests { #[test] fn test_absolute_url_resolution() { - let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); let source = InputSource::String(String::new()); let uris = vec![RawUri::from("https://another.com/page")]; - let requests = create(uris, &source, &None, &base, &None); + let requests = create(uris, &source, None, Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -251,11 +251,11 @@ mod tests { #[test] fn test_root_relative_url_resolution() { - let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); let source = InputSource::String(String::new()); let uris = vec![RawUri::from("/root-relative")]; - let requests = create(uris, &source, &None, &base, &None); + let requests = create(uris, &source, None, Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -265,11 +265,11 @@ mod tests { #[test] fn test_parent_directory_url_resolution() { - let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); let source = InputSource::String(String::new()); let uris = vec![RawUri::from("../parent")]; - let requests = create(uris, &source, &None, &base, &None); + let requests = create(uris, &source, None, Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -279,11 +279,11 @@ mod tests { #[test] fn test_fragment_url_resolution() { - let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); let source = InputSource::String(String::new()); let uris = vec![RawUri::from("#fragment")]; - let requests = create(uris, &source, &None, &base, &None); + let requests = create(uris, &source, None, Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -293,11 +293,11 @@ mod tests { #[test] fn test_relative_url_resolution_from_root_path() { - let root_path = Some(PathBuf::from("/tmp/lychee")); + let root_path = PathBuf::from("/tmp/lychee"); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("relative.html")]; - let requests = create(uris, &source, &root_path, &None, &None); + let requests = create(uris, &source, Some(&root_path), None, None); assert_eq!(requests.len(), 1); assert!(requests @@ -307,11 +307,11 @@ mod tests { #[test] fn test_absolute_url_resolution_from_root_path() { - let root_path = Some(PathBuf::from("/tmp/lychee")); + let root_path = PathBuf::from("/tmp/lychee"); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("https://another.com/page")]; - let requests = create(uris, &source, &root_path, &None, &None); + let requests = create(uris, &source, Some(&root_path), None, None); assert_eq!(requests.len(), 1); assert!(requests @@ -321,11 +321,11 @@ mod tests { #[test] fn test_root_relative_url_resolution_from_root_path() { - let root_path = Some(PathBuf::from("/tmp/lychee")); + let root_path = PathBuf::from("/tmp/lychee"); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("/root-relative")]; - let requests = create(uris, &source, &root_path, &None, &None); + let requests = create(uris, &source, Some(&root_path), None, None); assert_eq!(requests.len(), 1); assert!(requests @@ -335,11 +335,11 @@ mod tests { #[test] fn test_parent_directory_url_resolution_from_root_path() { - let root_path = Some(PathBuf::from("/tmp/lychee")); + let root_path = PathBuf::from("/tmp/lychee"); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("../parent")]; - let requests = create(uris, &source, &root_path, &None, &None); + let requests = create(uris, &source, Some(&root_path), None, None); assert_eq!(requests.len(), 1); assert!(requests @@ -349,11 +349,11 @@ mod tests { #[test] fn test_fragment_url_resolution_from_root_path() { - let root_path = Some(PathBuf::from("/tmp/lychee")); + let root_path = PathBuf::from("/tmp/lychee"); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("#fragment")]; - let requests = create(uris, &source, &root_path, &None, &None); + let requests = create(uris, &source, Some(&root_path), None, None); assert_eq!(requests.len(), 1); assert!(requests @@ -363,12 +363,12 @@ mod tests { #[test] fn test_relative_url_resolution_from_root_path_and_base_url() { - let root_path = Some(PathBuf::from("/tmp/lychee")); - let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); + let root_path = PathBuf::from("/tmp/lychee"); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("relative.html")]; - let requests = create(uris, &source, &root_path, &base, &None); + let requests = create(uris, &source, Some(&root_path), Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -378,12 +378,12 @@ mod tests { #[test] fn test_absolute_url_resolution_from_root_path_and_base_url() { - let root_path = Some(PathBuf::from("/tmp/lychee")); - let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); + let root_path = PathBuf::from("/tmp/lychee"); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("https://another.com/page")]; - let requests = create(uris, &source, &root_path, &base, &None); + let requests = create(uris, &source, Some(&root_path), Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -393,12 +393,12 @@ mod tests { #[test] fn test_root_relative_url_resolution_from_root_path_and_base_url() { - let root_path = Some(PathBuf::from("/tmp/lychee")); - let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); + let root_path = PathBuf::from("/tmp/lychee"); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("/root-relative")]; - let requests = create(uris, &source, &root_path, &base, &None); + let requests = create(uris, &source, Some(&root_path), Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -408,12 +408,12 @@ mod tests { #[test] fn test_parent_directory_url_resolution_from_root_path_and_base_url() { - let root_path = Some(PathBuf::from("/tmp/lychee")); - let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); + let root_path = PathBuf::from("/tmp/lychee"); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("../parent")]; - let requests = create(uris, &source, &root_path, &base, &None); + let requests = create(uris, &source, Some(&root_path), Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -423,12 +423,12 @@ mod tests { #[test] fn test_fragment_url_resolution_from_root_path_and_base_url() { - let root_path = Some(PathBuf::from("/tmp/lychee")); - let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); + let root_path = PathBuf::from("/tmp/lychee"); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("#fragment")]; - let requests = create(uris, &source, &root_path, &base, &None); + let requests = create(uris, &source, Some(&root_path), Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -438,11 +438,10 @@ mod tests { #[test] fn test_no_base_url_resolution() { - let base = None; let source = InputSource::String(String::new()); let uris = vec![RawUri::from("https://example.com/page")]; - let requests = create(uris, &source, &None, &base, &None); + let requests = create(uris, &source, None, None, None); assert_eq!(requests.len(), 1); assert!(requests @@ -452,15 +451,15 @@ mod tests { #[test] fn test_create_request_from_relative_file_path() { - let base = Some(Base::Local(PathBuf::from("/tmp/lychee"))); + let base = Base::Local(PathBuf::from("/tmp/lychee")); let input_source = InputSource::FsPath(PathBuf::from("page.html")); let actual = create_request( &RawUri::from("file.html"), &input_source, - &None, - &base, - &None, + None, + Some(&base), + None, ) .unwrap(); @@ -480,16 +479,16 @@ mod tests { #[test] fn test_create_request_from_absolute_file_path() { - let base = Some(Base::Local(PathBuf::from("/tmp/lychee"))); + let base = Base::Local(PathBuf::from("/tmp/lychee")); let input_source = InputSource::FsPath(PathBuf::from("/tmp/lychee/page.html")); // Use an absolute path that's outside the base directory let actual = create_request( &RawUri::from("/usr/local/share/doc/example.html"), &input_source, - &None, - &base, - &None, + None, + Some(&base), + None, ) .unwrap(); @@ -509,22 +508,22 @@ mod tests { #[test] fn test_parse_relative_path_into_uri() { - let base = Some(Base::Local(PathBuf::from("/tmp/lychee"))); + let base = Base::Local(PathBuf::from("/tmp/lychee")); let source = InputSource::String(String::new()); let raw_uri = RawUri::from("relative.html"); - let uri = try_parse_into_uri(&raw_uri, &source, &None, &base).unwrap(); + let uri = try_parse_into_uri(&raw_uri, &source, None, Some(&base)).unwrap(); assert_eq!(uri.url.as_str(), "file:///tmp/lychee/relative.html"); } #[test] fn test_parse_absolute_path_into_uri() { - let base = Some(Base::Local(PathBuf::from("/tmp/lychee"))); + let base = Base::Local(PathBuf::from("/tmp/lychee")); let source = InputSource::String(String::new()); let raw_uri = RawUri::from("absolute.html"); - let uri = try_parse_into_uri(&raw_uri, &source, &None, &base).unwrap(); + let uri = try_parse_into_uri(&raw_uri, &source, None, Some(&base)).unwrap(); assert_eq!(uri.url.as_str(), "file:///tmp/lychee/absolute.html"); } @@ -532,32 +531,30 @@ mod tests { #[test] fn test_prepend_with_absolute_local_link_and_root_path() { let text = "/absolute/path"; - let root_path = Some(PathBuf::from("/root")); - let result = prepend_root_path_if_absolute_local_link(text, &root_path); + let root_path = PathBuf::from("/root"); + let result = prepend_root_path_if_absolute_local_link(text, Some(&root_path)); assert_eq!(result, "/root/absolute/path"); } #[test] fn test_prepend_with_absolute_local_link_and_no_root_path() { let text = "/absolute/path"; - let root_path: Option = None; - let result = prepend_root_path_if_absolute_local_link(text, &root_path); + let result = prepend_root_path_if_absolute_local_link(text, None); assert_eq!(result, "/absolute/path"); } #[test] fn test_prepend_with_relative_link_and_root_path() { let text = "relative/path"; - let root_path = Some(PathBuf::from("/root")); - let result = prepend_root_path_if_absolute_local_link(text, &root_path); + let root_path = PathBuf::from("/root"); + let result = prepend_root_path_if_absolute_local_link(text, Some(&root_path)); assert_eq!(result, "relative/path"); } #[test] fn test_prepend_with_relative_link_and_no_root_path() { let text = "relative/path"; - let root_path = None; - let result = prepend_root_path_if_absolute_local_link(text, &root_path); + let result = prepend_root_path_if_absolute_local_link(text, None); assert_eq!(result, "relative/path"); } } From 0bb19b972c6c263a50ccd22f1b695f55e962f279 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Mon, 2 Dec 2024 20:28:29 -0800 Subject: [PATCH 23/26] Remove outdated comment --- examples/collect_links/collect_links.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/collect_links/collect_links.rs b/examples/collect_links/collect_links.rs index 375dad3e30..57edd5fcdb 100644 --- a/examples/collect_links/collect_links.rs +++ b/examples/collect_links/collect_links.rs @@ -21,7 +21,7 @@ async fn main() -> Result<()> { }, ]; - let links = Collector::default() // root_path and base + let links = Collector::default() .skip_missing_inputs(false) // don't skip missing inputs? (default=false) .skip_hidden(false) // skip hidden files? (default=true) .skip_ignored(false) // skip files that are ignored by git? (default=true) From b767b36b010ed6568031f5cd4a74d7080bff13b1 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Fri, 6 Dec 2024 13:14:46 -0800 Subject: [PATCH 24/26] Rename --root-path to --root-dir --- README.md | 2 +- .../nested/about/index.html | 0 .../nested/another page/index.html | 0 .../nested/index.html | 0 lychee-bin/src/main.rs | 2 +- lychee-bin/src/options.rs | 2 +- lychee-bin/tests/cli.rs | 10 +- lychee-lib/src/collector.rs | 20 ++-- lychee-lib/src/utils/request.rs | 98 +++++++++---------- 9 files changed, 67 insertions(+), 67 deletions(-) rename fixtures/{resolve_paths_from_root_path => resolve_paths_from_root_dir}/nested/about/index.html (100%) rename fixtures/{resolve_paths_from_root_path => resolve_paths_from_root_dir}/nested/another page/index.html (100%) rename fixtures/{resolve_paths_from_root_path => resolve_paths_from_root_dir}/nested/index.html (100%) diff --git a/README.md b/README.md index 5f0da195d8..0e211992ba 100644 --- a/README.md +++ b/README.md @@ -480,7 +480,7 @@ Options: -b, --base Base URL or website root directory to check relative URLs e.g. or `/path/to/public` - --root-path + --root-dir Root path to use when checking absolute local links --basic-auth diff --git a/fixtures/resolve_paths_from_root_path/nested/about/index.html b/fixtures/resolve_paths_from_root_dir/nested/about/index.html similarity index 100% rename from fixtures/resolve_paths_from_root_path/nested/about/index.html rename to fixtures/resolve_paths_from_root_dir/nested/about/index.html diff --git a/fixtures/resolve_paths_from_root_path/nested/another page/index.html b/fixtures/resolve_paths_from_root_dir/nested/another page/index.html similarity index 100% rename from fixtures/resolve_paths_from_root_path/nested/another page/index.html rename to fixtures/resolve_paths_from_root_dir/nested/another page/index.html diff --git a/fixtures/resolve_paths_from_root_path/nested/index.html b/fixtures/resolve_paths_from_root_dir/nested/index.html similarity index 100% rename from fixtures/resolve_paths_from_root_path/nested/index.html rename to fixtures/resolve_paths_from_root_dir/nested/index.html diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs index 3250bdf41b..b1e6988176 100644 --- a/lychee-bin/src/main.rs +++ b/lychee-bin/src/main.rs @@ -288,7 +288,7 @@ fn underlying_io_error_kind(error: &Error) -> Option { async fn run(opts: &LycheeOptions) -> Result { let inputs = opts.inputs()?; - let mut collector = Collector::new(opts.config.root_path.clone(), opts.config.base.clone()) + let mut collector = Collector::new(opts.config.root_dir.clone(), opts.config.base.clone()) .skip_missing_inputs(opts.config.skip_missing) .skip_hidden(!opts.config.hidden) .skip_ignored(!opts.config.no_ignore) diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index 4706fa1433..e1a706483f 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -448,7 +448,7 @@ separated list of accepted status codes. This example will accept 200, 201, /// Root path to use when checking absolute local links #[arg(long)] #[serde(default)] - pub(crate) root_path: Option, + pub(crate) root_dir: Option, /// Basic authentication support. E.g. `http://example.com username:password` #[arg(long)] diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 85a8bee3e0..184a5f6706 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -394,13 +394,13 @@ mod cli { } #[test] - fn test_resolve_paths_from_root_path() { + fn test_resolve_paths_from_root_dir() { let mut cmd = main_command(); - let dir = fixtures_path().join("resolve_paths_from_root_path"); + let dir = fixtures_path().join("resolve_paths_from_root_dir"); cmd.arg("--offline") .arg("--include-fragments") - .arg("--root-path") + .arg("--root-dir") .arg(&dir) .arg(dir.join("nested").join("index.html")) .env_clear() @@ -412,12 +412,12 @@ mod cli { } #[test] - fn test_resolve_paths_from_root_path_and_base_url() { + fn test_resolve_paths_from_root_dir_and_base_url() { let mut cmd = main_command(); let dir = fixtures_path(); cmd.arg("--offline") - .arg("--root-path") + .arg("--root-dir") .arg("/resolve_paths") .arg("--base") .arg(&dir) diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index dc6826fccb..cd07af9da1 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -22,7 +22,7 @@ pub struct Collector { skip_hidden: bool, include_verbatim: bool, use_html5ever: bool, - root_path: Option, + root_dir: Option, base: Option, } @@ -35,7 +35,7 @@ impl Default for Collector { use_html5ever: false, skip_hidden: true, skip_ignored: true, - root_path: None, + root_dir: None, base: None, } } @@ -44,7 +44,7 @@ impl Default for Collector { impl Collector { /// Create a new collector with an empty cache #[must_use] - pub const fn new(root_path: Option, base: Option) -> Self { + pub const fn new(root_dir: Option, base: Option) -> Self { Collector { basic_auth_extractor: None, skip_missing_inputs: false, @@ -52,7 +52,7 @@ impl Collector { use_html5ever: false, skip_hidden: true, skip_ignored: true, - root_path, + root_dir, base, } } @@ -137,7 +137,7 @@ impl Collector { }) .flatten() .par_then_unordered(None, move |(content, base)| { - let root_path = self.root_path.clone(); + let root_dir = self.root_dir.clone(); let basic_auth_extractor = self.basic_auth_extractor.clone(); async move { let content = content?; @@ -146,7 +146,7 @@ impl Collector { let requests = request::create( uris, &content.source, - root_path.as_ref(), + root_dir.as_ref(), base.as_ref(), basic_auth_extractor.as_ref(), ); @@ -175,20 +175,20 @@ mod tests { // Helper function to run the collector on the given inputs async fn collect( inputs: Vec, - root_path: Option, + root_dir: Option, base: Option, ) -> HashSet { - let responses = Collector::new(root_path, base).collect_links(inputs); + let responses = Collector::new(root_dir, base).collect_links(inputs); responses.map(|r| r.unwrap().uri).collect().await } // Helper function for collecting verbatim links async fn collect_verbatim( inputs: Vec, - root_path: Option, + root_dir: Option, base: Option, ) -> HashSet { - let responses = Collector::new(root_path, base) + let responses = Collector::new(root_dir, base) .include_verbatim(true) .collect_links(inputs); responses.map(|r| r.unwrap().uri).collect().await diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 5df498fdd5..ca66f7d857 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -25,11 +25,11 @@ fn extract_credentials( fn create_request( raw_uri: &RawUri, source: &InputSource, - root_path: Option<&PathBuf>, + root_dir: Option<&PathBuf>, base: Option<&Base>, extractor: Option<&BasicAuthExtractor>, ) -> Result { - let uri = try_parse_into_uri(raw_uri, source, root_path, base)?; + let uri = try_parse_into_uri(raw_uri, source, root_dir, base)?; let source = truncate_source(source); let element = raw_uri.element.clone(); let attribute = raw_uri.attribute.clone(); @@ -52,10 +52,10 @@ fn create_request( fn try_parse_into_uri( raw_uri: &RawUri, source: &InputSource, - root_path: Option<&PathBuf>, + root_dir: Option<&PathBuf>, base: Option<&Base>, ) -> Result { - let text = prepend_root_path_if_absolute_local_link(&raw_uri.text, root_path); + let text = prepend_root_dir_if_absolute_local_link(&raw_uri.text, root_dir); let uri = match Uri::try_from(raw_uri.clone()) { Ok(uri) => uri, Err(_) => match base { @@ -65,7 +65,7 @@ fn try_parse_into_uri( }, None => match source { InputSource::FsPath(root) => { - create_uri_from_file_path(root, &text, root_path.is_none())? + create_uri_from_file_path(root, &text, root_dir.is_none())? } _ => return Err(ErrorKind::UnsupportedUriType(text)), }, @@ -136,7 +136,7 @@ fn truncate_source(source: &InputSource) -> InputSource { pub(crate) fn create( uris: Vec, source: &InputSource, - root_path: Option<&PathBuf>, + root_dir: Option<&PathBuf>, base: Option<&Base>, extractor: Option<&BasicAuthExtractor>, ) -> HashSet { @@ -144,7 +144,7 @@ pub(crate) fn create( uris.into_iter() .filter_map(|raw_uri| { - match create_request(&raw_uri, source, root_path, base.as_ref(), extractor) { + match create_request(&raw_uri, source, root_dir, base.as_ref(), extractor) { Ok(request) => Some(request), Err(e) => { warn!("Error creating request: {:?}", e); @@ -193,9 +193,9 @@ fn resolve_and_create_url( Ok(url) } -fn prepend_root_path_if_absolute_local_link(text: &str, root_path: Option<&PathBuf>) -> String { +fn prepend_root_dir_if_absolute_local_link(text: &str, root_dir: Option<&PathBuf>) -> String { if text.starts_with('/') { - if let Some(path) = root_path { + if let Some(path) = root_dir { if let Some(path_str) = path.to_str() { return format!("{path_str}{text}"); } @@ -292,12 +292,12 @@ mod tests { } #[test] - fn test_relative_url_resolution_from_root_path() { - let root_path = PathBuf::from("/tmp/lychee"); + fn test_relative_url_resolution_from_root_dir() { + let root_dir = PathBuf::from("/tmp/lychee"); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("relative.html")]; - let requests = create(uris, &source, Some(&root_path), None, None); + let requests = create(uris, &source, Some(&root_dir), None, None); assert_eq!(requests.len(), 1); assert!(requests @@ -306,12 +306,12 @@ mod tests { } #[test] - fn test_absolute_url_resolution_from_root_path() { - let root_path = PathBuf::from("/tmp/lychee"); + fn test_absolute_url_resolution_from_root_dir() { + let root_dir = PathBuf::from("/tmp/lychee"); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("https://another.com/page")]; - let requests = create(uris, &source, Some(&root_path), None, None); + let requests = create(uris, &source, Some(&root_dir), None, None); assert_eq!(requests.len(), 1); assert!(requests @@ -320,12 +320,12 @@ mod tests { } #[test] - fn test_root_relative_url_resolution_from_root_path() { - let root_path = PathBuf::from("/tmp/lychee"); + fn test_root_relative_url_resolution_from_root_dir() { + let root_dir = PathBuf::from("/tmp/lychee"); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("/root-relative")]; - let requests = create(uris, &source, Some(&root_path), None, None); + let requests = create(uris, &source, Some(&root_dir), None, None); assert_eq!(requests.len(), 1); assert!(requests @@ -334,12 +334,12 @@ mod tests { } #[test] - fn test_parent_directory_url_resolution_from_root_path() { - let root_path = PathBuf::from("/tmp/lychee"); + fn test_parent_directory_url_resolution_from_root_dir() { + let root_dir = PathBuf::from("/tmp/lychee"); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("../parent")]; - let requests = create(uris, &source, Some(&root_path), None, None); + let requests = create(uris, &source, Some(&root_dir), None, None); assert_eq!(requests.len(), 1); assert!(requests @@ -348,12 +348,12 @@ mod tests { } #[test] - fn test_fragment_url_resolution_from_root_path() { - let root_path = PathBuf::from("/tmp/lychee"); + fn test_fragment_url_resolution_from_root_dir() { + let root_dir = PathBuf::from("/tmp/lychee"); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("#fragment")]; - let requests = create(uris, &source, Some(&root_path), None, None); + let requests = create(uris, &source, Some(&root_dir), None, None); assert_eq!(requests.len(), 1); assert!(requests @@ -362,13 +362,13 @@ mod tests { } #[test] - fn test_relative_url_resolution_from_root_path_and_base_url() { - let root_path = PathBuf::from("/tmp/lychee"); + fn test_relative_url_resolution_from_root_dir_and_base_url() { + let root_dir = PathBuf::from("/tmp/lychee"); let base = Base::try_from("https://example.com/path/page.html").unwrap(); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("relative.html")]; - let requests = create(uris, &source, Some(&root_path), Some(&base), None); + let requests = create(uris, &source, Some(&root_dir), Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -377,13 +377,13 @@ mod tests { } #[test] - fn test_absolute_url_resolution_from_root_path_and_base_url() { - let root_path = PathBuf::from("/tmp/lychee"); + fn test_absolute_url_resolution_from_root_dir_and_base_url() { + let root_dir = PathBuf::from("/tmp/lychee"); let base = Base::try_from("https://example.com/path/page.html").unwrap(); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("https://another.com/page")]; - let requests = create(uris, &source, Some(&root_path), Some(&base), None); + let requests = create(uris, &source, Some(&root_dir), Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -392,13 +392,13 @@ mod tests { } #[test] - fn test_root_relative_url_resolution_from_root_path_and_base_url() { - let root_path = PathBuf::from("/tmp/lychee"); + fn test_root_relative_url_resolution_from_root_dir_and_base_url() { + let root_dir = PathBuf::from("/tmp/lychee"); let base = Base::try_from("https://example.com/path/page.html").unwrap(); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("/root-relative")]; - let requests = create(uris, &source, Some(&root_path), Some(&base), None); + let requests = create(uris, &source, Some(&root_dir), Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -407,13 +407,13 @@ mod tests { } #[test] - fn test_parent_directory_url_resolution_from_root_path_and_base_url() { - let root_path = PathBuf::from("/tmp/lychee"); + fn test_parent_directory_url_resolution_from_root_dir_and_base_url() { + let root_dir = PathBuf::from("/tmp/lychee"); let base = Base::try_from("https://example.com/path/page.html").unwrap(); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("../parent")]; - let requests = create(uris, &source, Some(&root_path), Some(&base), None); + let requests = create(uris, &source, Some(&root_dir), Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -422,13 +422,13 @@ mod tests { } #[test] - fn test_fragment_url_resolution_from_root_path_and_base_url() { - let root_path = PathBuf::from("/tmp/lychee"); + fn test_fragment_url_resolution_from_root_dir_and_base_url() { + let root_dir = PathBuf::from("/tmp/lychee"); let base = Base::try_from("https://example.com/path/page.html").unwrap(); let source = InputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("#fragment")]; - let requests = create(uris, &source, Some(&root_path), Some(&base), None); + let requests = create(uris, &source, Some(&root_dir), Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -529,32 +529,32 @@ mod tests { } #[test] - fn test_prepend_with_absolute_local_link_and_root_path() { + fn test_prepend_with_absolute_local_link_and_root_dir() { let text = "/absolute/path"; - let root_path = PathBuf::from("/root"); - let result = prepend_root_path_if_absolute_local_link(text, Some(&root_path)); + let root_dir = PathBuf::from("/root"); + let result = prepend_root_dir_if_absolute_local_link(text, Some(&root_dir)); assert_eq!(result, "/root/absolute/path"); } #[test] - fn test_prepend_with_absolute_local_link_and_no_root_path() { + fn test_prepend_with_absolute_local_link_and_no_root_dir() { let text = "/absolute/path"; - let result = prepend_root_path_if_absolute_local_link(text, None); + let result = prepend_root_dir_if_absolute_local_link(text, None); assert_eq!(result, "/absolute/path"); } #[test] - fn test_prepend_with_relative_link_and_root_path() { + fn test_prepend_with_relative_link_and_root_dir() { let text = "relative/path"; - let root_path = PathBuf::from("/root"); - let result = prepend_root_path_if_absolute_local_link(text, Some(&root_path)); + let root_dir = PathBuf::from("/root"); + let result = prepend_root_dir_if_absolute_local_link(text, Some(&root_dir)); assert_eq!(result, "relative/path"); } #[test] - fn test_prepend_with_relative_link_and_no_root_path() { + fn test_prepend_with_relative_link_and_no_root_dir() { let text = "relative/path"; - let result = prepend_root_path_if_absolute_local_link(text, None); + let result = prepend_root_dir_if_absolute_local_link(text, None); assert_eq!(result, "relative/path"); } } From 83b28c491198248be56775d7cd4a9465d3f52a41 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Tue, 10 Dec 2024 13:58:58 -0800 Subject: [PATCH 25/26] Restrict --root-dir to absolute paths for now --- README.md | 2 +- lychee-bin/src/main.rs | 6 ++++++ lychee-bin/src/options.rs | 3 ++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0e211992ba..d2b6a2d896 100644 --- a/README.md +++ b/README.md @@ -481,7 +481,7 @@ Options: Base URL or website root directory to check relative URLs e.g. or `/path/to/public` --root-dir - Root path to use when checking absolute local links + Root path to use when checking absolute local links, must be an absolute path --basic-auth Basic authentication support. E.g. `http://example.com username:password` diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs index b1e6988176..8c9635b2b2 100644 --- a/lychee-bin/src/main.rs +++ b/lychee-bin/src/main.rs @@ -288,6 +288,12 @@ fn underlying_io_error_kind(error: &Error) -> Option { async fn run(opts: &LycheeOptions) -> Result { let inputs = opts.inputs()?; + if let Some(root_dir) = &opts.config.root_dir { + if root_dir.is_relative() { + bail!("`--root_dir` must be an absolute path"); + } + } + let mut collector = Collector::new(opts.config.root_dir.clone(), opts.config.base.clone()) .skip_missing_inputs(opts.config.skip_missing) .skip_hidden(!opts.config.hidden) diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index e1a706483f..bcb70da09c 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -445,7 +445,8 @@ separated list of accepted status codes. This example will accept 200, 201, #[serde(default)] pub(crate) base: Option, - /// Root path to use when checking absolute local links + /// Root path to use when checking absolute local links, + /// must be an absolute path #[arg(long)] #[serde(default)] pub(crate) root_dir: Option, From 909e8c3f303da3d1361064195f6c627af2cf582c Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Thu, 12 Dec 2024 13:30:09 -0800 Subject: [PATCH 26/26] Move root dir check --- lychee-bin/src/main.rs | 8 +----- lychee-lib/src/collector.rs | 49 +++++++++++++++++++++-------------- lychee-lib/src/types/error.rs | 5 ++++ 3 files changed, 35 insertions(+), 27 deletions(-) diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs index 8c9635b2b2..521a9b8eef 100644 --- a/lychee-bin/src/main.rs +++ b/lychee-bin/src/main.rs @@ -288,13 +288,7 @@ fn underlying_io_error_kind(error: &Error) -> Option { async fn run(opts: &LycheeOptions) -> Result { let inputs = opts.inputs()?; - if let Some(root_dir) = &opts.config.root_dir { - if root_dir.is_relative() { - bail!("`--root_dir` must be an absolute path"); - } - } - - let mut collector = Collector::new(opts.config.root_dir.clone(), opts.config.base.clone()) + let mut collector = Collector::new(opts.config.root_dir.clone(), opts.config.base.clone())? .skip_missing_inputs(opts.config.skip_missing) .skip_hidden(!opts.config.hidden) .skip_ignored(!opts.config.no_ignore) diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index cd07af9da1..7eb4c8c82b 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -1,3 +1,4 @@ +use crate::ErrorKind; use crate::InputSource; use crate::{ basic_auth::BasicAuthExtractor, extract::Extractor, types::uri::raw::RawUri, utils::request, @@ -43,9 +44,17 @@ impl Default for Collector { impl Collector { /// Create a new collector with an empty cache - #[must_use] - pub const fn new(root_dir: Option, base: Option) -> Self { - Collector { + /// + /// # Errors + /// + /// Returns an `Err` if the `root_dir` is not an absolute path + pub fn new(root_dir: Option, base: Option) -> Result { + if let Some(root_dir) = &root_dir { + if root_dir.is_relative() { + return Err(ErrorKind::RootDirMustBeAbsolute(root_dir.clone())); + } + } + Ok(Collector { basic_auth_extractor: None, skip_missing_inputs: false, include_verbatim: false, @@ -54,7 +63,7 @@ impl Collector { skip_ignored: true, root_dir, base, - } + }) } /// Skip missing input files (default is to error if they don't exist) @@ -177,9 +186,9 @@ mod tests { inputs: Vec, root_dir: Option, base: Option, - ) -> HashSet { - let responses = Collector::new(root_dir, base).collect_links(inputs); - responses.map(|r| r.unwrap().uri).collect().await + ) -> Result> { + let responses = Collector::new(root_dir, base)?.collect_links(inputs); + Ok(responses.map(|r| r.unwrap().uri).collect().await) } // Helper function for collecting verbatim links @@ -187,11 +196,11 @@ mod tests { inputs: Vec, root_dir: Option, base: Option, - ) -> HashSet { - let responses = Collector::new(root_dir, base) + ) -> Result> { + let responses = Collector::new(root_dir, base)? .include_verbatim(true) .collect_links(inputs); - responses.map(|r| r.unwrap().uri).collect().await + Ok(responses.map(|r| r.unwrap().uri).collect().await) } const TEST_STRING: &str = "http://test-string.com"; @@ -279,7 +288,7 @@ mod tests { }, ]; - let links = collect_verbatim(inputs, None, None).await; + let links = collect_verbatim(inputs, None, None).await.ok().unwrap(); let expected_links = HashSet::from_iter([ website(TEST_STRING), @@ -302,7 +311,7 @@ mod tests { file_type_hint: Some(FileType::Markdown), excluded_paths: None, }; - let links = collect(vec![input], None, Some(base)).await; + let links = collect(vec![input], None, Some(base)).await.ok().unwrap(); let expected_links = HashSet::from_iter([ website("https://endler.dev"), @@ -328,7 +337,7 @@ mod tests { file_type_hint: Some(FileType::Html), excluded_paths: None, }; - let links = collect(vec![input], None, Some(base)).await; + let links = collect(vec![input], None, Some(base)).await.ok().unwrap(); let expected_links = HashSet::from_iter([ website("https://github.com/lycheeverse/lychee/"), @@ -357,7 +366,7 @@ mod tests { file_type_hint: Some(FileType::Html), excluded_paths: None, }; - let links = collect(vec![input], None, Some(base)).await; + let links = collect(vec![input], None, Some(base)).await.ok().unwrap(); let expected_links = HashSet::from_iter([ website("https://example.com/static/image.png"), @@ -384,7 +393,7 @@ mod tests { excluded_paths: None, }; - let links = collect(vec![input], None, Some(base)).await; + let links = collect(vec![input], None, Some(base)).await.ok().unwrap(); let expected = HashSet::from_iter([ website("https://localhost.com/@/internal.md"), @@ -406,7 +415,7 @@ mod tests { file_type_hint: Some(FileType::Html), excluded_paths: None, }; - let links = collect(vec![input], None, Some(base)).await; + let links = collect(vec![input], None, Some(base)).await.ok().unwrap(); let expected_links = HashSet::from_iter([ // the body links wouldn't be present if the file was parsed strictly as XML @@ -439,7 +448,7 @@ mod tests { excluded_paths: None, }; - let links = collect(vec![input], None, None).await; + let links = collect(vec![input], None, None).await.ok().unwrap(); let expected_urls = HashSet::from_iter([ website("https://github.com/lycheeverse/lychee/"), @@ -458,7 +467,7 @@ mod tests { file_type_hint: None, excluded_paths: None, }; - let links = collect(vec![input], None, None).await; + let links = collect(vec![input], None, None).await.ok().unwrap(); let expected_links = HashSet::from_iter([mail("user@example.com")]); @@ -501,7 +510,7 @@ mod tests { }, ]; - let links = collect(inputs, None, None).await; + let links = collect(inputs, None, None).await.ok().unwrap(); let expected_links = HashSet::from_iter([ website(&format!( @@ -535,7 +544,7 @@ mod tests { excluded_paths: None, }; - let links = collect(vec![input], None, Some(base)).await; + let links = collect(vec![input], None, Some(base)).await.ok().unwrap(); let expected_links = HashSet::from_iter([ path("/path/to/root/index.html"), diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index 7246fe7e84..cbcfefe5e9 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -94,6 +94,10 @@ pub enum ErrorKind { #[error("Cannot convert path '{0}' to a URI")] InvalidPathToUri(String), + /// Root dir must be an absolute path + #[error("Root dir must be an absolute path: '{0}'")] + RootDirMustBeAbsolute(PathBuf), + /// The given URI type is not supported #[error("Unsupported URI type: '{0}'")] UnsupportedUriType(String), @@ -310,6 +314,7 @@ impl Hash for ErrorKind { Self::InvalidBase(base, e) => (base, e).hash(state), Self::InvalidBaseJoin(s) => s.hash(state), Self::InvalidPathToUri(s) => s.hash(state), + Self::RootDirMustBeAbsolute(s) => s.hash(state), Self::UnsupportedUriType(s) => s.hash(state), Self::InvalidUrlRemap(remap) => (remap).hash(state), Self::InvalidHeader(e) => e.to_string().hash(state),