From 452c550b5ca97b0126bbb916d4f9d8d83085f90e Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Wed, 27 Nov 2024 17:00:28 -0800
Subject: [PATCH 01/26] windows
---
lychee-lib/src/types/base.rs | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/lychee-lib/src/types/base.rs b/lychee-lib/src/types/base.rs
index b7b76c7e5b..a9ca9212bb 100644
--- a/lychee-lib/src/types/base.rs
+++ b/lychee-lib/src/types/base.rs
@@ -61,6 +61,12 @@ impl TryFrom<&str> for Base {
type Error = ErrorKind;
fn try_from(value: &str) -> Result {
+ let path = PathBuf::from(value);
+ if path.exists() {
+ // need to check path first since Url::parse accepts windows paths
+ // e.g. C:\src\lychee and C:/src/lychee are both parsed as URLs
+ return Ok(Self::Local(PathBuf::from(value)))
+ }
if let Ok(url) = Url::parse(value) {
if url.cannot_be_a_base() {
return Err(ErrorKind::InvalidBase(
From 866ba38e3674929f8cc2c95885186130e0345149 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Thu, 28 Nov 2024 10:28:23 -0800
Subject: [PATCH 02/26] Introduce --root-path
---
README.md | 3 +++
lychee-bin/src/client.rs | 10 ++++++++--
lychee-bin/src/main.rs | 9 ++++++++-
lychee-bin/src/options.rs | 6 ++++++
lychee-lib/src/types/base.rs | 20 +++++++++++++-------
lychee-lib/src/utils/path.rs | 4 ++--
lychee-lib/src/utils/request.rs | 8 ++++----
7 files changed, 44 insertions(+), 16 deletions(-)
diff --git a/README.md b/README.md
index 779e9f7d53..d57805af1f 100644
--- a/README.md
+++ b/README.md
@@ -480,6 +480,9 @@ Options:
-b, --base
Base URL or website root directory to check relative URLs e.g. or `/path/to/public`
+ --root-path
+ Root path to use when checking absolute local links, --base is ignored when this is set
+
--basic-auth
Basic authentication support. E.g. `http://example.com username:password`
diff --git a/lychee-bin/src/client.rs b/lychee-bin/src/client.rs
index d1b982dc26..b89e49578f 100644
--- a/lychee-bin/src/client.rs
+++ b/lychee-bin/src/client.rs
@@ -2,7 +2,7 @@ use crate::options::Config;
use crate::parse::{parse_duration_secs, parse_headers, parse_remaps};
use anyhow::{Context, Result};
use http::StatusCode;
-use lychee_lib::{Client, ClientBuilder};
+use lychee_lib::{Base, Client, ClientBuilder};
use regex::RegexSet;
use reqwest_cookie_store::CookieStoreMutex;
use std::sync::Arc;
@@ -53,9 +53,15 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc>) -
cfg.include_mail
};
+ let base = if let Some(root_path) = &cfg.root_path {
+ Base::create_root_path(&root_path).ok()
+ } else {
+ cfg.base.clone()
+ };
+
ClientBuilder::builder()
.remaps(remaps)
- .base(cfg.base.clone())
+ .base(base)
.includes(includes)
.excludes(excludes)
.exclude_all_private(cfg.exclude_all_private)
diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs
index bb6ac4aac9..bf4148ed2c 100644
--- a/lychee-bin/src/main.rs
+++ b/lychee-bin/src/main.rs
@@ -75,6 +75,7 @@ use openssl_sys as _; // required for vendored-openssl feature
use options::LYCHEE_CONFIG_FILE;
use ring as _; // required for apple silicon
+use lychee_lib::Base;
use lychee_lib::BasicAuthExtractor;
use lychee_lib::Collector;
use lychee_lib::CookieJar;
@@ -288,7 +289,13 @@ fn underlying_io_error_kind(error: &Error) -> Option {
async fn run(opts: &LycheeOptions) -> Result {
let inputs = opts.inputs()?;
- let mut collector = Collector::new(opts.config.base.clone())
+ let base = if let Some(root_path) = &opts.config.root_path {
+ Base::create_root_path(&root_path).ok()
+ } else {
+ opts.config.base.clone()
+ };
+
+ let mut collector = Collector::new(base)
.skip_missing_inputs(opts.config.skip_missing)
.skip_hidden(!opts.config.hidden)
.skip_ignored(!opts.config.no_ignore)
diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs
index 60c259d2ea..61de03a9c3 100644
--- a/lychee-bin/src/options.rs
+++ b/lychee-bin/src/options.rs
@@ -445,6 +445,12 @@ separated list of accepted status codes. This example will accept 200, 201,
#[serde(default)]
pub(crate) base: Option ,
+ /// Root path to use when checking absolute local links,
+ /// "base" option is ignored when this is set
+ #[arg(long)]
+ #[serde(default)]
+ pub(crate) root_path: Option,
+
/// Basic authentication support. E.g. `http://example.com username:password`
#[arg(long)]
#[serde(default)]
diff --git a/lychee-lib/src/types/base.rs b/lychee-lib/src/types/base.rs
index a9ca9212bb..0851687500 100644
--- a/lychee-lib/src/types/base.rs
+++ b/lychee-lib/src/types/base.rs
@@ -15,6 +15,8 @@ pub enum Base {
Local(PathBuf),
/// Remote URL pointing to a website homepage
Remote(Url),
+ /// Root path for checking absolute local links
+ RootPath(PathBuf),
}
impl Base {
@@ -27,18 +29,28 @@ impl Base {
let full_path = path.join(link);
Url::from_file_path(full_path).ok()
}
+ Self::RootPath(_path) => {
+ // this is unused currently because joining on RootPath is handled by create_uri_from_file_path
+ unreachable!()
+ }
}
}
- /// Return the directory if the base is local
+ /// Return the directory if the base is not remote
#[must_use]
pub(crate) fn dir(&self) -> Option {
match self {
Self::Remote(_) => None,
Self::Local(d) => Some(d.clone()),
+ Self::RootPath(d) => Some(d.clone()),
}
}
+ /// Create a root path base
+ pub fn create_root_path(value: &str) -> Result {
+ return Ok(Self::RootPath(PathBuf::from(value)));
+ }
+
pub(crate) fn from_source(source: &InputSource) -> Option {
match &source {
InputSource::RemoteUrl(url) => {
@@ -61,12 +73,6 @@ impl TryFrom<&str> for Base {
type Error = ErrorKind;
fn try_from(value: &str) -> Result {
- let path = PathBuf::from(value);
- if path.exists() {
- // need to check path first since Url::parse accepts windows paths
- // e.g. C:\src\lychee and C:/src/lychee are both parsed as URLs
- return Ok(Self::Local(PathBuf::from(value)))
- }
if let Ok(url) = Url::parse(value) {
if url.cannot_be_a_base() {
return Err(ErrorKind::InvalidBase(
diff --git a/lychee-lib/src/utils/path.rs b/lychee-lib/src/utils/path.rs
index bb4847ed9a..89abde1cc8 100644
--- a/lychee-lib/src/utils/path.rs
+++ b/lychee-lib/src/utils/path.rs
@@ -42,14 +42,14 @@ fn dirname(src: &'_ Path) -> Option<&'_ Path> {
/// Returns Ok(None) in case of an absolute local link without a `base_url`
pub(crate) fn resolve(src: &Path, dst: &Path, base: &Option ) -> Result> {
let resolved = match dst {
- relative if dst.is_relative() => {
+ relative if !dst.starts_with("/") => {
// Find `dst` in the parent directory of `src`
let Some(parent) = src.parent() else {
return Err(ErrorKind::InvalidFile(relative.to_path_buf()));
};
parent.join(relative)
}
- absolute if dst.is_absolute() => {
+ absolute if dst.starts_with("/") => {
// Absolute local links (leading slash) require the `base_url` to
// define the document root. Silently ignore the link in case the
// `base_url` is not defined.
diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs
index 7867e50f09..d02de1760e 100644
--- a/lychee-lib/src/utils/request.rs
+++ b/lychee-lib/src/utils/request.rs
@@ -53,14 +53,14 @@ fn try_parse_into_uri(raw_uri: &RawUri, source: &InputSource, base: &Option uri,
Err(_) => match base {
+ Some(Base::RootPath(_)) | None => match source {
+ InputSource::FsPath(root) => create_uri_from_file_path(root, &text, base)?,
+ _ => return Err(ErrorKind::UnsupportedUriType(text)),
+ },
Some(base_url) => match base_url.join(&text) {
Some(url) => Uri { url },
None => return Err(ErrorKind::InvalidBaseJoin(text.clone())),
},
- None => match source {
- InputSource::FsPath(root) => create_uri_from_file_path(root, &text, base)?,
- _ => return Err(ErrorKind::UnsupportedUriType(text)),
- },
},
};
Ok(uri)
From 250f572cf5da314b50b5dba701c6579da0aec999 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Thu, 28 Nov 2024 14:01:00 -0800
Subject: [PATCH 03/26] lint
---
lychee-bin/src/client.rs | 2 +-
lychee-bin/src/main.rs | 2 +-
lychee-lib/src/types/base.rs | 7 +++----
3 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/lychee-bin/src/client.rs b/lychee-bin/src/client.rs
index b89e49578f..ce9d841d55 100644
--- a/lychee-bin/src/client.rs
+++ b/lychee-bin/src/client.rs
@@ -54,7 +54,7 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc>) -
};
let base = if let Some(root_path) = &cfg.root_path {
- Base::create_root_path(&root_path).ok()
+ Some(Base::create_root_path(&root_path))
} else {
cfg.base.clone()
};
diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs
index bf4148ed2c..1912f4950d 100644
--- a/lychee-bin/src/main.rs
+++ b/lychee-bin/src/main.rs
@@ -290,7 +290,7 @@ async fn run(opts: &LycheeOptions) -> Result {
let inputs = opts.inputs()?;
let base = if let Some(root_path) = &opts.config.root_path {
- Base::create_root_path(&root_path).ok()
+ Some(Base::create_root_path(&root_path))
} else {
opts.config.base.clone()
};
diff --git a/lychee-lib/src/types/base.rs b/lychee-lib/src/types/base.rs
index 0851687500..f875d6446a 100644
--- a/lychee-lib/src/types/base.rs
+++ b/lychee-lib/src/types/base.rs
@@ -41,14 +41,13 @@ impl Base {
pub(crate) fn dir(&self) -> Option {
match self {
Self::Remote(_) => None,
- Self::Local(d) => Some(d.clone()),
- Self::RootPath(d) => Some(d.clone()),
+ Self::Local(d) | Self::RootPath(d) => Some(d.clone()),
}
}
/// Create a root path base
- pub fn create_root_path(value: &str) -> Result {
- return Ok(Self::RootPath(PathBuf::from(value)));
+ pub fn create_root_path(value: &str) -> Base {
+ Self::RootPath(PathBuf::from(value))
}
pub(crate) fn from_source(source: &InputSource) -> Option {
From cd074f99b7915b4416ea0b35a3632451009da8df Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Thu, 28 Nov 2024 14:07:59 -0800
Subject: [PATCH 04/26] lint
---
lychee-lib/src/types/base.rs | 1 +
1 file changed, 1 insertion(+)
diff --git a/lychee-lib/src/types/base.rs b/lychee-lib/src/types/base.rs
index f875d6446a..aa716b3f26 100644
--- a/lychee-lib/src/types/base.rs
+++ b/lychee-lib/src/types/base.rs
@@ -46,6 +46,7 @@ impl Base {
}
/// Create a root path base
+ #[must_use]
pub fn create_root_path(value: &str) -> Base {
Self::RootPath(PathBuf::from(value))
}
From 2e9c8fd5c370756c2c9d26c0535f76bd02bbf2c6 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Thu, 28 Nov 2024 18:19:09 -0800
Subject: [PATCH 05/26] Simplification
---
lychee-lib/src/collector.rs | 3 +-
lychee-lib/src/utils/request.rs | 78 ++++++++++-----------------------
2 files changed, 25 insertions(+), 56 deletions(-)
diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs
index 955bdd24e7..8b91851871 100644
--- a/lychee-lib/src/collector.rs
+++ b/lychee-lib/src/collector.rs
@@ -124,7 +124,8 @@ impl Collector {
let content = content?;
let extractor = Extractor::new(self.use_html5ever, self.include_verbatim);
let uris: Vec = extractor.extract(&content);
- let requests = request::create(uris, &content, &base, &basic_auth_extractor);
+ let requests =
+ request::create(uris, &content.source, &base, &basic_auth_extractor);
Result::Ok(stream::iter(requests.into_iter().map(Ok)))
}
})
diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs
index d02de1760e..cdd6e946d6 100644
--- a/lychee-lib/src/utils/request.rs
+++ b/lychee-lib/src/utils/request.rs
@@ -8,7 +8,7 @@ use std::{
use crate::{
basic_auth::BasicAuthExtractor,
- types::{uri::raw::RawUri, InputContent, InputSource},
+ types::{uri::raw::RawUri, InputSource},
utils::{path, url},
Base, BasicAuthCredentials, ErrorKind, Request, Result, Uri,
};
@@ -125,24 +125,22 @@ fn truncate_source(source: &InputSource) -> InputSource {
/// it will not be added to the `HashSet`.
pub(crate) fn create(
uris: Vec,
- input_content: &InputContent,
+ source: &InputSource,
base: &Option ,
extractor: &Option,
) -> HashSet {
- let base = base
- .clone()
- .or_else(|| Base::from_source(&input_content.source));
+ let base = base.clone().or_else(|| Base::from_source(&source));
uris.into_iter()
- .filter_map(|raw_uri| {
- match create_request(&raw_uri, &input_content.source, &base, extractor) {
+ .filter_map(
+ |raw_uri| match create_request(&raw_uri, &source, &base, extractor) {
Ok(request) => Some(request),
Err(e) => {
warn!("Error creating request: {:?}", e);
None
}
- }
- })
+ },
+ )
.collect()
}
@@ -184,7 +182,6 @@ fn resolve_and_create_url(
#[cfg(test)]
mod tests {
use super::*;
- use crate::types::FileType;
#[test]
fn test_is_anchor() {
@@ -199,24 +196,13 @@ mod tests {
assert_eq!(result.as_str(), "file:///test+encoding");
}
- fn create_input(content: &str, file_type: FileType) -> InputContent {
- InputContent {
- content: content.to_string(),
- file_type,
- source: InputSource::String(content.to_string()),
- }
- }
-
#[test]
fn test_relative_url_resolution() {
let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
- let input = create_input(
- r#"Relative Link "#,
- FileType::Html,
- );
+ let source = InputSource::String(String::new());
let uris = vec![RawUri::from("relative.html")];
- let requests = create(uris, &input, &base, &None);
+ let requests = create(uris, &source, &base, &None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -227,13 +213,10 @@ mod tests {
#[test]
fn test_absolute_url_resolution() {
let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
- let input = create_input(
- r#"Absolute Link "#,
- FileType::Html,
- );
+ let source = InputSource::String(String::new());
let uris = vec![RawUri::from("https://another.com/page")];
- let requests = create(uris, &input, &base, &None);
+ let requests = create(uris, &source, &base, &None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -244,13 +227,10 @@ mod tests {
#[test]
fn test_root_relative_url_resolution() {
let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
- let input = create_input(
- r#"Root Relative Link "#,
- FileType::Html,
- );
+ let source = InputSource::String(String::new());
let uris = vec![RawUri::from("/root-relative")];
- let requests = create(uris, &input, &base, &None);
+ let requests = create(uris, &source, &base, &None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -261,13 +241,10 @@ mod tests {
#[test]
fn test_parent_directory_url_resolution() {
let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
- let input = create_input(
- r#"Parent Directory Link "#,
- FileType::Html,
- );
+ let source = InputSource::String(String::new());
let uris = vec![RawUri::from("../parent")];
- let requests = create(uris, &input, &base, &None);
+ let requests = create(uris, &source, &base, &None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -278,10 +255,10 @@ mod tests {
#[test]
fn test_fragment_url_resolution() {
let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
- let input = create_input(r##"Fragment Link "##, FileType::Html);
+ let source = InputSource::String(String::new());
let uris = vec![RawUri::from("#fragment")];
- let requests = create(uris, &input, &base, &None);
+ let requests = create(uris, &source, &base, &None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -292,13 +269,10 @@ mod tests {
#[test]
fn test_no_base_url_resolution() {
let base = None;
- let input = create_input(
- r#"Absolute Link "#,
- FileType::Html,
- );
+ let source = InputSource::String(String::new());
let uris = vec![RawUri::from("https://example.com/page")];
- let requests = create(uris, &input, &base, &None);
+ let requests = create(uris, &source, &base, &None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -359,13 +333,10 @@ mod tests {
#[test]
fn test_parse_relative_path_into_uri() {
let base = Some(Base::Local(PathBuf::from("/tmp/lychee")));
- let input = create_input(
- r#"Relative Link "#,
- FileType::Html,
- );
+ let source = InputSource::String(String::new());
let raw_uri = RawUri::from("relative.html");
- let uri = try_parse_into_uri(&raw_uri, &input.source, &base).unwrap();
+ let uri = try_parse_into_uri(&raw_uri, &source, &base).unwrap();
assert_eq!(uri.url.as_str(), "file:///tmp/lychee/relative.html");
}
@@ -373,13 +344,10 @@ mod tests {
#[test]
fn test_parse_absolute_path_into_uri() {
let base = Some(Base::Local(PathBuf::from("/tmp/lychee")));
- let input = create_input(
- r#"Absolute Link "#,
- FileType::Html,
- );
+ let source = InputSource::String(String::new());
let raw_uri = RawUri::from("absolute.html");
- let uri = try_parse_into_uri(&raw_uri, &input.source, &base).unwrap();
+ let uri = try_parse_into_uri(&raw_uri, &source, &base).unwrap();
assert_eq!(uri.url.as_str(), "file:///tmp/lychee/absolute.html");
}
From 4de28cd23ed8124a5bde6f2d8f7db559ae41ad92 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Thu, 28 Nov 2024 18:19:32 -0800
Subject: [PATCH 06/26] Add unit tests
---
lychee-lib/src/utils/request.rs | 70 +++++++++++++++++++++++++++++++++
1 file changed, 70 insertions(+)
diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs
index cdd6e946d6..72a59b701f 100644
--- a/lychee-lib/src/utils/request.rs
+++ b/lychee-lib/src/utils/request.rs
@@ -266,6 +266,76 @@ mod tests {
.any(|r| r.uri.url.as_str() == "https://example.com/path/page.html#fragment"));
}
+ #[test]
+ fn test_relative_url_resolution_from_root_path() {
+ let base = Some(Base::create_root_path("/tmp/lychee"));
+ let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
+
+ let uris = vec![RawUri::from("relative.html")];
+ let requests = create(uris, &source, &base, &None);
+
+ assert_eq!(requests.len(), 1);
+ assert!(requests
+ .iter()
+ .any(|r| r.uri.url.as_str() == "file:///some/relative.html"));
+ }
+
+ #[test]
+ fn test_absolute_url_resolution_from_root_path() {
+ let base = Some(Base::create_root_path("/tmp/lychee"));
+ let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
+
+ let uris = vec![RawUri::from("https://another.com/page")];
+ let requests = create(uris, &source, &base, &None);
+
+ assert_eq!(requests.len(), 1);
+ assert!(requests
+ .iter()
+ .any(|r| r.uri.url.as_str() == "https://another.com/page"));
+ }
+
+ #[test]
+ fn test_root_relative_url_resolution_from_root_path() {
+ let base = Some(Base::create_root_path("/tmp/lychee"));
+ let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
+
+ let uris = vec![RawUri::from("/root-relative")];
+ let requests = create(uris, &source, &base, &None);
+
+ assert_eq!(requests.len(), 1);
+ assert!(requests
+ .iter()
+ .any(|r| r.uri.url.as_str() == "file:///tmp/lychee/root-relative"));
+ }
+
+ #[test]
+ fn test_parent_directory_url_resolution_from_root_path() {
+ let base = Some(Base::create_root_path("/tmp/lychee"));
+ let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
+
+ let uris = vec![RawUri::from("../parent")];
+ let requests = create(uris, &source, &base, &None);
+
+ assert_eq!(requests.len(), 1);
+ assert!(requests
+ .iter()
+ .any(|r| r.uri.url.as_str() == "file:///parent"));
+ }
+
+ #[test]
+ fn test_fragment_url_resolution_from_root_path() {
+ let base = Some(Base::create_root_path("/tmp/lychee"));
+ let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
+
+ let uris = vec![RawUri::from("#fragment")];
+ let requests = create(uris, &source, &base, &None);
+
+ assert_eq!(requests.len(), 1);
+ assert!(requests
+ .iter()
+ .any(|r| r.uri.url.as_str() == "file:///some/page.html#fragment"));
+ }
+
#[test]
fn test_no_base_url_resolution() {
let base = None;
From 2489d56cae6eaf8fec36dc85da675c720412ef2f Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Thu, 28 Nov 2024 19:04:58 -0800
Subject: [PATCH 07/26] Add integration test
---
.../nested/about/index.html | 8 +++++
.../nested/another page/index.html | 0
.../nested/index.html | 34 +++++++++++++++++++
lychee-bin/tests/cli.rs | 18 ++++++++++
lychee-lib/src/types/input.rs | 6 +---
5 files changed, 61 insertions(+), 5 deletions(-)
create mode 100644 fixtures/resolve_paths_from_root_path/nested/about/index.html
create mode 100644 fixtures/resolve_paths_from_root_path/nested/another page/index.html
create mode 100644 fixtures/resolve_paths_from_root_path/nested/index.html
diff --git a/fixtures/resolve_paths_from_root_path/nested/about/index.html b/fixtures/resolve_paths_from_root_path/nested/about/index.html
new file mode 100644
index 0000000000..3141b661a0
--- /dev/null
+++ b/fixtures/resolve_paths_from_root_path/nested/about/index.html
@@ -0,0 +1,8 @@
+
+
+ About
+
+
+ About
+
+
diff --git a/fixtures/resolve_paths_from_root_path/nested/another page/index.html b/fixtures/resolve_paths_from_root_path/nested/another page/index.html
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/fixtures/resolve_paths_from_root_path/nested/index.html b/fixtures/resolve_paths_from_root_path/nested/index.html
new file mode 100644
index 0000000000..8f4528f54a
--- /dev/null
+++ b/fixtures/resolve_paths_from_root_path/nested/index.html
@@ -0,0 +1,34 @@
+
+
+ Index
+
+
+ Index Title
+
+
+
+
+
+
\ No newline at end of file
diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs
index 7dcd716f1d..c8219d58a5 100644
--- a/lychee-bin/tests/cli.rs
+++ b/lychee-bin/tests/cli.rs
@@ -393,6 +393,24 @@ mod cli {
.stdout(contains("3 OK"));
}
+ #[test]
+ fn test_resolve_paths_from_root_path() {
+ let mut cmd = main_command();
+ let dir = fixtures_path().join("resolve_paths_from_root_path");
+
+ cmd.arg("--offline")
+ .arg("--include-fragments")
+ .arg("--root-path")
+ .arg(&dir)
+ .arg(dir.join("nested").join("index.html"))
+ .env_clear()
+ .assert()
+ .failure()
+ .stdout(contains("7 Total"))
+ .stdout(contains("5 OK"))
+ .stdout(contains("2 Errors"));
+ }
+
#[test]
fn test_youtube_quirk() {
let url = "https://www.youtube.com/watch?v=NlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7";
diff --git a/lychee-lib/src/types/input.rs b/lychee-lib/src/types/input.rs
index c32be7feb8..a0fba1ca3e 100644
--- a/lychee-lib/src/types/input.rs
+++ b/lychee-lib/src/types/input.rs
@@ -140,11 +140,7 @@ impl Input {
Ok(url) if url.scheme() == "http" || url.scheme() == "https" => {
InputSource::RemoteUrl(Box::new(url))
}
- Ok(_) => {
- // URL parsed successfully, but it's not http or https
- return Err(ErrorKind::InvalidFile(PathBuf::from(value)));
- }
- _ => {
+ Ok(_) | _ => {
// this seems to be the only way to determine if this is a glob pattern
let is_glob = glob::Pattern::escape(value) != value;
From be3a0623be5b390bcb4db5c06048967fa9d44ab6 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Thu, 28 Nov 2024 20:57:59 -0800
Subject: [PATCH 08/26] Sync docs
---
README.md | 2 +-
lychee-bin/src/options.rs | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index d57805af1f..256be0c698 100644
--- a/README.md
+++ b/README.md
@@ -481,7 +481,7 @@ Options:
Base URL or website root directory to check relative URLs e.g. or `/path/to/public`
--root-path
- Root path to use when checking absolute local links, --base is ignored when this is set
+ Root path to use when checking absolute local links, base option is ignored when this is set
--basic-auth
Basic authentication support. E.g. `http://example.com username:password`
diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs
index 61de03a9c3..e9a291f599 100644
--- a/lychee-bin/src/options.rs
+++ b/lychee-bin/src/options.rs
@@ -446,7 +446,7 @@ separated list of accepted status codes. This example will accept 200, 201,
pub(crate) base: Option ,
/// Root path to use when checking absolute local links,
- /// "base" option is ignored when this is set
+ /// base option is ignored when this is set
#[arg(long)]
#[serde(default)]
pub(crate) root_path: Option,
From bdb5ec6a2b3665e78573d0a5e50e0486bac1eeb8 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Thu, 28 Nov 2024 21:11:09 -0800
Subject: [PATCH 09/26] Add missing comment to make CI happy
---
lychee-lib/src/lib.rs | 1 +
1 file changed, 1 insertion(+)
diff --git a/lychee-lib/src/lib.rs b/lychee-lib/src/lib.rs
index 93df6d0db8..023278c034 100644
--- a/lychee-lib/src/lib.rs
+++ b/lychee-lib/src/lib.rs
@@ -71,6 +71,7 @@ pub mod remap;
/// local IPs or e-mail addresses
pub mod filter;
+/// Test utilities
#[cfg(test)]
#[macro_use]
pub mod test_utils;
From 144e1e53cafaa7b97820053b4821807ff31f8215 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Thu, 28 Nov 2024 21:07:04 -0800
Subject: [PATCH 10/26] Revert one of the Windows-specific changes because
causing a test failure
---
lychee-lib/src/types/input.rs | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/lychee-lib/src/types/input.rs b/lychee-lib/src/types/input.rs
index a0fba1ca3e..c32be7feb8 100644
--- a/lychee-lib/src/types/input.rs
+++ b/lychee-lib/src/types/input.rs
@@ -140,7 +140,11 @@ impl Input {
Ok(url) if url.scheme() == "http" || url.scheme() == "https" => {
InputSource::RemoteUrl(Box::new(url))
}
- Ok(_) | _ => {
+ Ok(_) => {
+ // URL parsed successfully, but it's not http or https
+ return Err(ErrorKind::InvalidFile(PathBuf::from(value)));
+ }
+ _ => {
// this seems to be the only way to determine if this is a glob pattern
let is_glob = glob::Pattern::escape(value) != value;
From d2283d462cfab827fa931125b3837266b9b62525 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Sat, 30 Nov 2024 10:17:40 -0800
Subject: [PATCH 11/26] Support both options at the same time
---
README.md | 2 +-
examples/collect_links/collect_links.rs | 2 +-
lychee-bin/src/client.rs | 10 +--
lychee-bin/src/main.rs | 9 +--
lychee-bin/src/options.rs | 5 +-
lychee-lib/src/collector.rs | 51 +++++++++-----
lychee-lib/src/types/base.rs | 14 +---
lychee-lib/src/utils/path.rs | 53 +++++++++-----
lychee-lib/src/utils/request.rs | 92 ++++++++++++++++---------
9 files changed, 135 insertions(+), 103 deletions(-)
diff --git a/README.md b/README.md
index 256be0c698..5f0da195d8 100644
--- a/README.md
+++ b/README.md
@@ -481,7 +481,7 @@ Options:
Base URL or website root directory to check relative URLs e.g. or `/path/to/public`
--root-path
- Root path to use when checking absolute local links, base option is ignored when this is set
+ Root path to use when checking absolute local links
--basic-auth
Basic authentication support. E.g. `http://example.com username:password`
diff --git a/examples/collect_links/collect_links.rs b/examples/collect_links/collect_links.rs
index 4a86924c56..3a2ab520e7 100644
--- a/examples/collect_links/collect_links.rs
+++ b/examples/collect_links/collect_links.rs
@@ -21,7 +21,7 @@ async fn main() -> Result<()> {
},
];
- let links = Collector::new(None) // base
+ let links = Collector::new(None, None) // root_path and base
.skip_missing_inputs(false) // don't skip missing inputs? (default=false)
.skip_hidden(false) // skip hidden files? (default=true)
.skip_ignored(false) // skip files that are ignored by git? (default=true)
diff --git a/lychee-bin/src/client.rs b/lychee-bin/src/client.rs
index ce9d841d55..d1b982dc26 100644
--- a/lychee-bin/src/client.rs
+++ b/lychee-bin/src/client.rs
@@ -2,7 +2,7 @@ use crate::options::Config;
use crate::parse::{parse_duration_secs, parse_headers, parse_remaps};
use anyhow::{Context, Result};
use http::StatusCode;
-use lychee_lib::{Base, Client, ClientBuilder};
+use lychee_lib::{Client, ClientBuilder};
use regex::RegexSet;
use reqwest_cookie_store::CookieStoreMutex;
use std::sync::Arc;
@@ -53,15 +53,9 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc>) -
cfg.include_mail
};
- let base = if let Some(root_path) = &cfg.root_path {
- Some(Base::create_root_path(&root_path))
- } else {
- cfg.base.clone()
- };
-
ClientBuilder::builder()
.remaps(remaps)
- .base(base)
+ .base(cfg.base.clone())
.includes(includes)
.excludes(excludes)
.exclude_all_private(cfg.exclude_all_private)
diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs
index 1912f4950d..3250bdf41b 100644
--- a/lychee-bin/src/main.rs
+++ b/lychee-bin/src/main.rs
@@ -75,7 +75,6 @@ use openssl_sys as _; // required for vendored-openssl feature
use options::LYCHEE_CONFIG_FILE;
use ring as _; // required for apple silicon
-use lychee_lib::Base;
use lychee_lib::BasicAuthExtractor;
use lychee_lib::Collector;
use lychee_lib::CookieJar;
@@ -289,13 +288,7 @@ fn underlying_io_error_kind(error: &Error) -> Option {
async fn run(opts: &LycheeOptions) -> Result {
let inputs = opts.inputs()?;
- let base = if let Some(root_path) = &opts.config.root_path {
- Some(Base::create_root_path(&root_path))
- } else {
- opts.config.base.clone()
- };
-
- let mut collector = Collector::new(base)
+ let mut collector = Collector::new(opts.config.root_path.clone(), opts.config.base.clone())
.skip_missing_inputs(opts.config.skip_missing)
.skip_hidden(!opts.config.hidden)
.skip_ignored(!opts.config.no_ignore)
diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs
index e9a291f599..4706fa1433 100644
--- a/lychee-bin/src/options.rs
+++ b/lychee-bin/src/options.rs
@@ -445,11 +445,10 @@ separated list of accepted status codes. This example will accept 200, 201,
#[serde(default)]
pub(crate) base: Option ,
- /// Root path to use when checking absolute local links,
- /// base option is ignored when this is set
+ /// Root path to use when checking absolute local links
#[arg(long)]
#[serde(default)]
- pub(crate) root_path: Option,
+ pub(crate) root_path: Option,
/// Basic authentication support. E.g. `http://example.com username:password`
#[arg(long)]
diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs
index 8b91851871..c21dd67999 100644
--- a/lychee-lib/src/collector.rs
+++ b/lychee-lib/src/collector.rs
@@ -9,6 +9,7 @@ use futures::{
StreamExt,
};
use par_stream::ParStreamExt;
+use std::path::PathBuf;
/// Collector keeps the state of link collection
/// It drives the link extraction from inputs
@@ -21,13 +22,14 @@ pub struct Collector {
skip_hidden: bool,
include_verbatim: bool,
use_html5ever: bool,
+ root_path: Option,
base: Option ,
}
impl Collector {
/// Create a new collector with an empty cache
#[must_use]
- pub const fn new(base: Option ) -> Self {
+ pub const fn new(root_path: Option, base: Option ) -> Self {
Collector {
basic_auth_extractor: None,
skip_missing_inputs: false,
@@ -35,6 +37,7 @@ impl Collector {
use_html5ever: false,
skip_hidden: true,
skip_ignored: true,
+ root_path,
base,
}
}
@@ -119,13 +122,19 @@ impl Collector {
})
.flatten()
.par_then_unordered(None, move |(content, base)| {
+ let root_path = self.root_path.clone();
let basic_auth_extractor = self.basic_auth_extractor.clone();
async move {
let content = content?;
let extractor = Extractor::new(self.use_html5ever, self.include_verbatim);
let uris: Vec = extractor.extract(&content);
- let requests =
- request::create(uris, &content.source, &base, &basic_auth_extractor);
+ let requests = request::create(
+ uris,
+ &content.source,
+ &root_path,
+ &base,
+ &basic_auth_extractor,
+ );
Result::Ok(stream::iter(requests.into_iter().map(Ok)))
}
})
@@ -149,14 +158,22 @@ mod tests {
};
// Helper function to run the collector on the given inputs
- async fn collect(inputs: Vec , base: Option ) -> HashSet {
- let responses = Collector::new(base).collect_links(inputs);
+ async fn collect(
+ inputs: Vec ,
+ root_path: Option,
+ base: Option ,
+ ) -> HashSet {
+ let responses = Collector::new(root_path, base).collect_links(inputs);
responses.map(|r| r.unwrap().uri).collect().await
}
// Helper function for collecting verbatim links
- async fn collect_verbatim(inputs: Vec , base: Option ) -> HashSet {
- let responses = Collector::new(base)
+ async fn collect_verbatim(
+ inputs: Vec ,
+ root_path: Option,
+ base: Option ,
+ ) -> HashSet {
+ let responses = Collector::new(root_path, base)
.include_verbatim(true)
.collect_links(inputs);
responses.map(|r| r.unwrap().uri).collect().await
@@ -247,7 +264,7 @@ mod tests {
},
];
- let links = collect_verbatim(inputs, None).await;
+ let links = collect_verbatim(inputs, None, None).await;
let expected_links = HashSet::from_iter([
website(TEST_STRING),
@@ -270,7 +287,7 @@ mod tests {
file_type_hint: Some(FileType::Markdown),
excluded_paths: None,
};
- let links = collect(vec![input], Some(base)).await;
+ let links = collect(vec![input], None, Some(base)).await;
let expected_links = HashSet::from_iter([
website("https://endler.dev"),
@@ -296,7 +313,7 @@ mod tests {
file_type_hint: Some(FileType::Html),
excluded_paths: None,
};
- let links = collect(vec![input], Some(base)).await;
+ let links = collect(vec![input], None, Some(base)).await;
let expected_links = HashSet::from_iter([
website("https://github.com/lycheeverse/lychee/"),
@@ -325,7 +342,7 @@ mod tests {
file_type_hint: Some(FileType::Html),
excluded_paths: None,
};
- let links = collect(vec![input], Some(base)).await;
+ let links = collect(vec![input], None, Some(base)).await;
let expected_links = HashSet::from_iter([
website("https://example.com/static/image.png"),
@@ -352,7 +369,7 @@ mod tests {
excluded_paths: None,
};
- let links = collect(vec![input], Some(base)).await;
+ let links = collect(vec![input], None, Some(base)).await;
let expected = HashSet::from_iter([
website("https://localhost.com/@/internal.md"),
@@ -374,7 +391,7 @@ mod tests {
file_type_hint: Some(FileType::Html),
excluded_paths: None,
};
- let links = collect(vec![input], Some(base)).await;
+ let links = collect(vec![input], None, Some(base)).await;
let expected_links = HashSet::from_iter([
// the body links wouldn't be present if the file was parsed strictly as XML
@@ -407,7 +424,7 @@ mod tests {
excluded_paths: None,
};
- let links = collect(vec![input], None).await;
+ let links = collect(vec![input], None, None).await;
let expected_urls = HashSet::from_iter([
website("https://github.com/lycheeverse/lychee/"),
@@ -426,7 +443,7 @@ mod tests {
file_type_hint: None,
excluded_paths: None,
};
- let links = collect(vec![input], None).await;
+ let links = collect(vec![input], None, None).await;
let expected_links = HashSet::from_iter([mail("user@example.com")]);
@@ -469,7 +486,7 @@ mod tests {
},
];
- let links = collect(inputs, None).await;
+ let links = collect(inputs, None, None).await;
let expected_links = HashSet::from_iter([
website(&format!(
@@ -503,7 +520,7 @@ mod tests {
excluded_paths: None,
};
- let links = collect(vec![input], Some(base)).await;
+ let links = collect(vec![input], None, Some(base)).await;
let expected_links = HashSet::from_iter([
path("/path/to/root/index.html"),
diff --git a/lychee-lib/src/types/base.rs b/lychee-lib/src/types/base.rs
index aa716b3f26..fe21429326 100644
--- a/lychee-lib/src/types/base.rs
+++ b/lychee-lib/src/types/base.rs
@@ -15,8 +15,6 @@ pub enum Base {
Local(PathBuf),
/// Remote URL pointing to a website homepage
Remote(Url),
- /// Root path for checking absolute local links
- RootPath(PathBuf),
}
impl Base {
@@ -29,10 +27,6 @@ impl Base {
let full_path = path.join(link);
Url::from_file_path(full_path).ok()
}
- Self::RootPath(_path) => {
- // this is unused currently because joining on RootPath is handled by create_uri_from_file_path
- unreachable!()
- }
}
}
@@ -41,16 +35,10 @@ impl Base {
pub(crate) fn dir(&self) -> Option {
match self {
Self::Remote(_) => None,
- Self::Local(d) | Self::RootPath(d) => Some(d.clone()),
+ Self::Local(d) => Some(d.clone()),
}
}
- /// Create a root path base
- #[must_use]
- pub fn create_root_path(value: &str) -> Base {
- Self::RootPath(PathBuf::from(value))
- }
-
pub(crate) fn from_source(source: &InputSource) -> Option {
match &source {
InputSource::RemoteUrl(url) => {
diff --git a/lychee-lib/src/utils/path.rs b/lychee-lib/src/utils/path.rs
index 89abde1cc8..ca295ce6a3 100644
--- a/lychee-lib/src/utils/path.rs
+++ b/lychee-lib/src/utils/path.rs
@@ -40,7 +40,12 @@ fn dirname(src: &'_ Path) -> Option<&'_ Path> {
/// Resolve `dst` that was linked to from within `src`
///
/// Returns Ok(None) in case of an absolute local link without a `base_url`
-pub(crate) fn resolve(src: &Path, dst: &Path, base: &Option ) -> Result> {
+pub(crate) fn resolve(
+ src: &Path,
+ dst: &PathBuf,
+ root_path: &Option,
+ base: &Option ,
+) -> Result> {
let resolved = match dst {
relative if !dst.starts_with("/") => {
// Find `dst` in the parent directory of `src`
@@ -50,19 +55,31 @@ pub(crate) fn resolve(src: &Path, dst: &Path, base: &Option ) -> Result {
- // Absolute local links (leading slash) require the `base_url` to
- // define the document root. Silently ignore the link in case the
- // `base_url` is not defined.
- let Some(base) = get_base_dir(base) else {
- return Ok(None);
+ let with_root_path = match root_path {
+ Some(root) => &join(root.to_path_buf(), absolute),
+ None => absolute,
};
- let Some(dir) = dirname(&base) else {
- return Err(ErrorKind::InvalidBase(
- base.display().to_string(),
- "The given directory cannot be a base".to_string(),
- ));
- };
- join(dir.to_path_buf(), absolute)
+ match get_base_dir(base) {
+ Some(base) => {
+ let Some(dir) = dirname(&base) else {
+ return Err(ErrorKind::InvalidBase(
+ base.display().to_string(),
+ "The given directory cannot be a base".to_string(),
+ ));
+ };
+ join(dir.to_path_buf(), with_root_path)
+ }
+ None => {
+ if root_path.is_some() {
+ with_root_path.to_path_buf()
+ } else {
+ // Absolute local links (leading slash) require the `base_url` to
+ // define the document root. Silently ignore the link in case the
+ // `base_url` is not defined.
+ return Ok(None);
+ }
+ }
+ }
}
_ => return Err(ErrorKind::InvalidFile(dst.to_path_buf())),
};
@@ -110,7 +127,7 @@ mod test_path {
let dummy = PathBuf::from("index.html");
let abs_path = PathBuf::from("./foo.html");
assert_eq!(
- resolve(&dummy, &abs_path, &None)?,
+ resolve(&dummy, &abs_path, &None, &None)?,
Some(env::current_dir().unwrap().join("foo.html"))
);
Ok(())
@@ -123,7 +140,7 @@ mod test_path {
let dummy = PathBuf::from("./index.html");
let abs_path = PathBuf::from("./foo.html");
assert_eq!(
- resolve(&dummy, &abs_path, &None)?,
+ resolve(&dummy, &abs_path, &None, &None)?,
Some(env::current_dir().unwrap().join("foo.html"))
);
Ok(())
@@ -136,7 +153,7 @@ mod test_path {
let abs_index = PathBuf::from("/path/to/index.html");
let abs_path = PathBuf::from("./foo.html");
assert_eq!(
- resolve(&abs_index, &abs_path, &None)?,
+ resolve(&abs_index, &abs_path, &None, &None)?,
Some(PathBuf::from("/path/to/foo.html"))
);
Ok(())
@@ -151,7 +168,7 @@ mod test_path {
let abs_path = PathBuf::from("/foo.html");
let base = Some(Base::Local(PathBuf::from("/some/absolute/base/dir")));
assert_eq!(
- resolve(&dummy, &abs_path, &base)?,
+ resolve(&dummy, &abs_path, &None, &base)?,
Some(PathBuf::from("/some/absolute/base/dir/foo.html"))
);
Ok(())
@@ -165,7 +182,7 @@ mod test_path {
let abs_path = PathBuf::from("/other/path/to/foo.html");
let base = Some(Base::Local(PathBuf::from("/some/absolute/base/dir")));
assert_eq!(
- resolve(&abs_index, &abs_path, &base)?,
+ resolve(&abs_index, &abs_path, &None, &base)?,
Some(PathBuf::from(
"/some/absolute/base/dir/other/path/to/foo.html"
))
diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs
index 72a59b701f..97db0ac8b8 100644
--- a/lychee-lib/src/utils/request.rs
+++ b/lychee-lib/src/utils/request.rs
@@ -25,10 +25,11 @@ fn extract_credentials(
fn create_request(
raw_uri: &RawUri,
source: &InputSource,
+ root_path: &Option,
base: &Option ,
extractor: &Option,
) -> Result {
- let uri = try_parse_into_uri(raw_uri, source, base)?;
+ let uri = try_parse_into_uri(raw_uri, source, root_path, base)?;
let source = truncate_source(source);
let element = raw_uri.element.clone();
let attribute = raw_uri.attribute.clone();
@@ -48,19 +49,26 @@ fn create_request(
/// to create a valid URI.
/// - If a URI cannot be created from the file path.
/// - If the source is not a file path (i.e. the URI type is not supported).
-fn try_parse_into_uri(raw_uri: &RawUri, source: &InputSource, base: &Option ) -> Result {
+fn try_parse_into_uri(
+ raw_uri: &RawUri,
+ source: &InputSource,
+ root_path: &Option,
+ base: &Option ,
+) -> Result {
let text = raw_uri.text.clone();
let uri = match Uri::try_from(raw_uri.clone()) {
Ok(uri) => uri,
Err(_) => match base {
- Some(Base::RootPath(_)) | None => match source {
- InputSource::FsPath(root) => create_uri_from_file_path(root, &text, base)?,
- _ => return Err(ErrorKind::UnsupportedUriType(text)),
- },
Some(base_url) => match base_url.join(&text) {
Some(url) => Uri { url },
None => return Err(ErrorKind::InvalidBaseJoin(text.clone())),
},
+ None => match source {
+ InputSource::FsPath(root) => {
+ create_uri_from_file_path(root, &text, root_path, base)?
+ }
+ _ => return Err(ErrorKind::UnsupportedUriType(text)),
+ },
},
};
Ok(uri)
@@ -81,6 +89,7 @@ pub(crate) fn is_anchor(text: &str) -> bool {
fn create_uri_from_file_path(
file_path: &Path,
link_text: &str,
+ root_path: &Option,
base: &Option ,
) -> Result {
let target_path = if is_anchor(link_text) {
@@ -94,7 +103,8 @@ fn create_uri_from_file_path(
} else {
link_text.to_string()
};
- let Ok(constructed_url) = resolve_and_create_url(file_path, &target_path, base) else {
+ let Ok(constructed_url) = resolve_and_create_url(file_path, &target_path, root_path, base)
+ else {
return Err(ErrorKind::InvalidPathToUri(target_path));
};
Ok(Uri {
@@ -126,21 +136,22 @@ fn truncate_source(source: &InputSource) -> InputSource {
pub(crate) fn create(
uris: Vec,
source: &InputSource,
+ root_path: &Option,
base: &Option ,
extractor: &Option,
) -> HashSet {
let base = base.clone().or_else(|| Base::from_source(&source));
uris.into_iter()
- .filter_map(
- |raw_uri| match create_request(&raw_uri, &source, &base, extractor) {
+ .filter_map(|raw_uri| {
+ match create_request(&raw_uri, &source, &root_path, &base, extractor) {
Ok(request) => Some(request),
Err(e) => {
warn!("Error creating request: {:?}", e);
None
}
- },
- )
+ }
+ })
.collect()
}
@@ -158,6 +169,7 @@ pub(crate) fn create(
fn resolve_and_create_url(
src_path: &Path,
dest_path: &str,
+ root_path: &Option,
base_uri: &Option ,
) -> Result {
let (dest_path, fragment) = url::remove_get_params_and_separate_fragment(dest_path);
@@ -166,8 +178,12 @@ fn resolve_and_create_url(
// This addresses the issue mentioned in the original comment about double-encoding
let decoded_dest = percent_decode_str(dest_path).decode_utf8()?;
- let Ok(Some(resolved_path)) = path::resolve(src_path, &PathBuf::from(&*decoded_dest), base_uri)
- else {
+ let Ok(Some(resolved_path)) = path::resolve(
+ src_path,
+ &PathBuf::from(&*decoded_dest),
+ root_path,
+ base_uri,
+ ) else {
return Err(ErrorKind::InvalidPathToUri(decoded_dest.to_string()));
};
@@ -192,7 +208,8 @@ mod tests {
#[test]
fn test_create_uri_from_path() {
let result =
- resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", &None).unwrap();
+ resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", &None, &None)
+ .unwrap();
assert_eq!(result.as_str(), "file:///test+encoding");
}
@@ -202,7 +219,7 @@ mod tests {
let source = InputSource::String(String::new());
let uris = vec![RawUri::from("relative.html")];
- let requests = create(uris, &source, &base, &None);
+ let requests = create(uris, &source, &None, &base, &None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -216,7 +233,7 @@ mod tests {
let source = InputSource::String(String::new());
let uris = vec![RawUri::from("https://another.com/page")];
- let requests = create(uris, &source, &base, &None);
+ let requests = create(uris, &source, &None, &base, &None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -230,7 +247,7 @@ mod tests {
let source = InputSource::String(String::new());
let uris = vec![RawUri::from("/root-relative")];
- let requests = create(uris, &source, &base, &None);
+ let requests = create(uris, &source, &None, &base, &None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -244,7 +261,7 @@ mod tests {
let source = InputSource::String(String::new());
let uris = vec![RawUri::from("../parent")];
- let requests = create(uris, &source, &base, &None);
+ let requests = create(uris, &source, &None, &base, &None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -258,7 +275,7 @@ mod tests {
let source = InputSource::String(String::new());
let uris = vec![RawUri::from("#fragment")];
- let requests = create(uris, &source, &base, &None);
+ let requests = create(uris, &source, &None, &base, &None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -268,11 +285,11 @@ mod tests {
#[test]
fn test_relative_url_resolution_from_root_path() {
- let base = Some(Base::create_root_path("/tmp/lychee"));
+ let root_path = Some(PathBuf::from("/tmp/lychee"));
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("relative.html")];
- let requests = create(uris, &source, &base, &None);
+ let requests = create(uris, &source, &root_path, &None, &None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -282,11 +299,11 @@ mod tests {
#[test]
fn test_absolute_url_resolution_from_root_path() {
- let base = Some(Base::create_root_path("/tmp/lychee"));
+ let root_path = Some(PathBuf::from("/tmp/lychee"));
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("https://another.com/page")];
- let requests = create(uris, &source, &base, &None);
+ let requests = create(uris, &source, &root_path, &None, &None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -296,11 +313,11 @@ mod tests {
#[test]
fn test_root_relative_url_resolution_from_root_path() {
- let base = Some(Base::create_root_path("/tmp/lychee"));
+ let root_path = Some(PathBuf::from("/tmp/lychee"));
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("/root-relative")];
- let requests = create(uris, &source, &base, &None);
+ let requests = create(uris, &source, &root_path, &None, &None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -310,11 +327,11 @@ mod tests {
#[test]
fn test_parent_directory_url_resolution_from_root_path() {
- let base = Some(Base::create_root_path("/tmp/lychee"));
+ let root_path = Some(PathBuf::from("/tmp/lychee"));
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("../parent")];
- let requests = create(uris, &source, &base, &None);
+ let requests = create(uris, &source, &root_path, &None, &None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -324,11 +341,11 @@ mod tests {
#[test]
fn test_fragment_url_resolution_from_root_path() {
- let base = Some(Base::create_root_path("/tmp/lychee"));
+ let root_path = Some(PathBuf::from("/tmp/lychee"));
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("#fragment")];
- let requests = create(uris, &source, &base, &None);
+ let requests = create(uris, &source, &root_path, &None, &None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -342,7 +359,7 @@ mod tests {
let source = InputSource::String(String::new());
let uris = vec![RawUri::from("https://example.com/page")];
- let requests = create(uris, &source, &base, &None);
+ let requests = create(uris, &source, &None, &base, &None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -355,8 +372,14 @@ mod tests {
let base = Some(Base::Local(PathBuf::from("/tmp/lychee")));
let input_source = InputSource::FsPath(PathBuf::from("page.html"));
- let actual =
- create_request(&RawUri::from("file.html"), &input_source, &base, &None).unwrap();
+ let actual = create_request(
+ &RawUri::from("file.html"),
+ &input_source,
+ &None,
+ &base,
+ &None,
+ )
+ .unwrap();
assert_eq!(
actual,
@@ -381,6 +404,7 @@ mod tests {
let actual = create_request(
&RawUri::from("/usr/local/share/doc/example.html"),
&input_source,
+ &None,
&base,
&None,
)
@@ -406,7 +430,7 @@ mod tests {
let source = InputSource::String(String::new());
let raw_uri = RawUri::from("relative.html");
- let uri = try_parse_into_uri(&raw_uri, &source, &base).unwrap();
+ let uri = try_parse_into_uri(&raw_uri, &source, &None, &base).unwrap();
assert_eq!(uri.url.as_str(), "file:///tmp/lychee/relative.html");
}
@@ -417,7 +441,7 @@ mod tests {
let source = InputSource::String(String::new());
let raw_uri = RawUri::from("absolute.html");
- let uri = try_parse_into_uri(&raw_uri, &source, &base).unwrap();
+ let uri = try_parse_into_uri(&raw_uri, &source, &None, &base).unwrap();
assert_eq!(uri.url.as_str(), "file:///tmp/lychee/absolute.html");
}
From 24c2e12ff97cc2ddd1a9a6c8de7fbf6283330a45 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Sat, 30 Nov 2024 21:34:02 -0800
Subject: [PATCH 12/26] Revert a comment change that is no longer applicable
---
lychee-lib/src/types/base.rs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lychee-lib/src/types/base.rs b/lychee-lib/src/types/base.rs
index fe21429326..b7b76c7e5b 100644
--- a/lychee-lib/src/types/base.rs
+++ b/lychee-lib/src/types/base.rs
@@ -30,7 +30,7 @@ impl Base {
}
}
- /// Return the directory if the base is not remote
+ /// Return the directory if the base is local
#[must_use]
pub(crate) fn dir(&self) -> Option {
match self {
From 4d3ef2b4e0ee2e4e8c761356691b2dc4b5d7ff41 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Sat, 30 Nov 2024 21:44:32 -0800
Subject: [PATCH 13/26] Remove unused code
---
lychee-lib/src/types/base.rs | 9 ----
lychee-lib/src/utils/path.rs | 80 +++++++--------------------------
lychee-lib/src/utils/request.rs | 21 +++------
3 files changed, 21 insertions(+), 89 deletions(-)
diff --git a/lychee-lib/src/types/base.rs b/lychee-lib/src/types/base.rs
index b7b76c7e5b..4c68900c18 100644
--- a/lychee-lib/src/types/base.rs
+++ b/lychee-lib/src/types/base.rs
@@ -30,15 +30,6 @@ impl Base {
}
}
- /// Return the directory if the base is local
- #[must_use]
- pub(crate) fn dir(&self) -> Option {
- match self {
- Self::Remote(_) => None,
- Self::Local(d) => Some(d.clone()),
- }
- }
-
pub(crate) fn from_source(source: &InputSource) -> Option {
match &source {
InputSource::RemoteUrl(url) => {
diff --git a/lychee-lib/src/utils/path.rs b/lychee-lib/src/utils/path.rs
index ca295ce6a3..83823293ff 100644
--- a/lychee-lib/src/utils/path.rs
+++ b/lychee-lib/src/utils/path.rs
@@ -1,4 +1,4 @@
-use crate::{Base, ErrorKind, Result};
+use crate::{ErrorKind, Result};
use cached::proc_macro::cached;
use once_cell::sync::Lazy;
use path_clean::PathClean;
@@ -9,11 +9,6 @@ use std::path::{Path, PathBuf};
static CURRENT_DIR: Lazy =
Lazy::new(|| env::current_dir().expect("cannot get current dir from environment"));
-/// Returns the base if it is a valid `PathBuf`
-fn get_base_dir(base: &Option ) -> Option {
- base.as_ref().and_then(Base::dir)
-}
-
/// Create an absolute path out of a `PathBuf`.
///
/// The `clean` method is relatively expensive
@@ -44,7 +39,6 @@ pub(crate) fn resolve(
src: &Path,
dst: &PathBuf,
root_path: &Option,
- base: &Option ,
) -> Result> {
let resolved = match dst {
relative if !dst.starts_with("/") => {
@@ -55,31 +49,18 @@ pub(crate) fn resolve(
parent.join(relative)
}
absolute if dst.starts_with("/") => {
- let with_root_path = match root_path {
- Some(root) => &join(root.to_path_buf(), absolute),
- None => absolute,
+ // Absolute local links (leading slash) are ignored unless
+ // root_path is provided
+ let Some(base) = root_path else {
+ return Ok(None);
+ };
+ let Some(dir) = dirname(&base) else {
+ return Err(ErrorKind::InvalidBase(
+ base.display().to_string(),
+ "The given directory cannot be a base".to_string(),
+ ));
};
- match get_base_dir(base) {
- Some(base) => {
- let Some(dir) = dirname(&base) else {
- return Err(ErrorKind::InvalidBase(
- base.display().to_string(),
- "The given directory cannot be a base".to_string(),
- ));
- };
- join(dir.to_path_buf(), with_root_path)
- }
- None => {
- if root_path.is_some() {
- with_root_path.to_path_buf()
- } else {
- // Absolute local links (leading slash) require the `base_url` to
- // define the document root. Silently ignore the link in case the
- // `base_url` is not defined.
- return Ok(None);
- }
- }
- }
+ join(dir.to_path_buf(), absolute)
}
_ => return Err(ErrorKind::InvalidFile(dst.to_path_buf())),
};
@@ -127,7 +108,7 @@ mod test_path {
let dummy = PathBuf::from("index.html");
let abs_path = PathBuf::from("./foo.html");
assert_eq!(
- resolve(&dummy, &abs_path, &None, &None)?,
+ resolve(&dummy, &abs_path, &None)?,
Some(env::current_dir().unwrap().join("foo.html"))
);
Ok(())
@@ -140,7 +121,7 @@ mod test_path {
let dummy = PathBuf::from("./index.html");
let abs_path = PathBuf::from("./foo.html");
assert_eq!(
- resolve(&dummy, &abs_path, &None, &None)?,
+ resolve(&dummy, &abs_path, &None)?,
Some(env::current_dir().unwrap().join("foo.html"))
);
Ok(())
@@ -153,43 +134,12 @@ mod test_path {
let abs_index = PathBuf::from("/path/to/index.html");
let abs_path = PathBuf::from("./foo.html");
assert_eq!(
- resolve(&abs_index, &abs_path, &None, &None)?,
+ resolve(&abs_index, &abs_path, &None)?,
Some(PathBuf::from("/path/to/foo.html"))
);
Ok(())
}
- // dummy
- // foo.html
- // valid base dir
- #[test]
- fn test_resolve_absolute_from_base_dir() -> Result<()> {
- let dummy = PathBuf::new();
- let abs_path = PathBuf::from("/foo.html");
- let base = Some(Base::Local(PathBuf::from("/some/absolute/base/dir")));
- assert_eq!(
- resolve(&dummy, &abs_path, &None, &base)?,
- Some(PathBuf::from("/some/absolute/base/dir/foo.html"))
- );
- Ok(())
- }
-
- // /path/to/index.html
- // /other/path/to/foo.html
- #[test]
- fn test_resolve_absolute_from_absolute() -> Result<()> {
- let abs_index = PathBuf::from("/path/to/index.html");
- let abs_path = PathBuf::from("/other/path/to/foo.html");
- let base = Some(Base::Local(PathBuf::from("/some/absolute/base/dir")));
- assert_eq!(
- resolve(&abs_index, &abs_path, &None, &base)?,
- Some(PathBuf::from(
- "/some/absolute/base/dir/other/path/to/foo.html"
- ))
- );
- Ok(())
- }
-
#[test]
fn test_contains() {
let parent_dir = tempfile::tempdir().unwrap();
diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs
index 97db0ac8b8..ef8d0b4d6b 100644
--- a/lychee-lib/src/utils/request.rs
+++ b/lychee-lib/src/utils/request.rs
@@ -64,9 +64,7 @@ fn try_parse_into_uri(
None => return Err(ErrorKind::InvalidBaseJoin(text.clone())),
},
None => match source {
- InputSource::FsPath(root) => {
- create_uri_from_file_path(root, &text, root_path, base)?
- }
+ InputSource::FsPath(root) => create_uri_from_file_path(root, &text, root_path)?,
_ => return Err(ErrorKind::UnsupportedUriType(text)),
},
},
@@ -90,7 +88,6 @@ fn create_uri_from_file_path(
file_path: &Path,
link_text: &str,
root_path: &Option,
- base: &Option ,
) -> Result {
let target_path = if is_anchor(link_text) {
// For anchors, we need to append the anchor to the file name.
@@ -103,8 +100,7 @@ fn create_uri_from_file_path(
} else {
link_text.to_string()
};
- let Ok(constructed_url) = resolve_and_create_url(file_path, &target_path, root_path, base)
- else {
+ let Ok(constructed_url) = resolve_and_create_url(file_path, &target_path, root_path) else {
return Err(ErrorKind::InvalidPathToUri(target_path));
};
Ok(Uri {
@@ -170,7 +166,6 @@ fn resolve_and_create_url(
src_path: &Path,
dest_path: &str,
root_path: &Option,
- base_uri: &Option ,
) -> Result {
let (dest_path, fragment) = url::remove_get_params_and_separate_fragment(dest_path);
@@ -178,12 +173,9 @@ fn resolve_and_create_url(
// This addresses the issue mentioned in the original comment about double-encoding
let decoded_dest = percent_decode_str(dest_path).decode_utf8()?;
- let Ok(Some(resolved_path)) = path::resolve(
- src_path,
- &PathBuf::from(&*decoded_dest),
- root_path,
- base_uri,
- ) else {
+ let Ok(Some(resolved_path)) =
+ path::resolve(src_path, &PathBuf::from(&*decoded_dest), root_path)
+ else {
return Err(ErrorKind::InvalidPathToUri(decoded_dest.to_string()));
};
@@ -208,8 +200,7 @@ mod tests {
#[test]
fn test_create_uri_from_path() {
let result =
- resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", &None, &None)
- .unwrap();
+ resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", &None).unwrap();
assert_eq!(result.as_str(), "file:///test+encoding");
}
From 196a44102210684b901a2d049770af6336cbf3e3 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Sun, 1 Dec 2024 10:59:34 -0800
Subject: [PATCH 14/26] Fix and simplification
---
lychee-lib/src/utils/path.rs | 37 +++++++--------------------------
lychee-lib/src/utils/request.rs | 31 +++++++++++++++++++--------
2 files changed, 29 insertions(+), 39 deletions(-)
diff --git a/lychee-lib/src/utils/path.rs b/lychee-lib/src/utils/path.rs
index 83823293ff..58518f31e3 100644
--- a/lychee-lib/src/utils/path.rs
+++ b/lychee-lib/src/utils/path.rs
@@ -24,21 +24,13 @@ pub(crate) fn absolute_path(path: PathBuf) -> PathBuf {
.clean()
}
-/// Get the directory name of a given `Path`.
-fn dirname(src: &'_ Path) -> Option<&'_ Path> {
- if src.is_file() {
- return src.parent();
- }
- Some(src)
-}
-
/// Resolve `dst` that was linked to from within `src`
///
/// Returns Ok(None) in case of an absolute local link without a `base_url`
pub(crate) fn resolve(
src: &Path,
dst: &PathBuf,
- root_path: &Option,
+ ignore_absolute_local_links: bool,
) -> Result> {
let resolved = match dst {
relative if !dst.starts_with("/") => {
@@ -51,31 +43,16 @@ pub(crate) fn resolve(
absolute if dst.starts_with("/") => {
// Absolute local links (leading slash) are ignored unless
// root_path is provided
- let Some(base) = root_path else {
+ if ignore_absolute_local_links {
return Ok(None);
- };
- let Some(dir) = dirname(&base) else {
- return Err(ErrorKind::InvalidBase(
- base.display().to_string(),
- "The given directory cannot be a base".to_string(),
- ));
- };
- join(dir.to_path_buf(), absolute)
+ }
+ PathBuf::from(absolute)
}
_ => return Err(ErrorKind::InvalidFile(dst.to_path_buf())),
};
Ok(Some(absolute_path(resolved)))
}
-/// A cumbersome way to concatenate paths without checking their
-/// existence on disk. See
-fn join(base: PathBuf, dst: &Path) -> PathBuf {
- let mut abs = base.into_os_string();
- let target_str = dst.as_os_str();
- abs.push(target_str);
- PathBuf::from(abs)
-}
-
/// Check if `child` is a subdirectory/file inside `parent`
///
/// Note that `contains(parent, parent)` will return `true`
@@ -108,7 +85,7 @@ mod test_path {
let dummy = PathBuf::from("index.html");
let abs_path = PathBuf::from("./foo.html");
assert_eq!(
- resolve(&dummy, &abs_path, &None)?,
+ resolve(&dummy, &abs_path, true)?,
Some(env::current_dir().unwrap().join("foo.html"))
);
Ok(())
@@ -121,7 +98,7 @@ mod test_path {
let dummy = PathBuf::from("./index.html");
let abs_path = PathBuf::from("./foo.html");
assert_eq!(
- resolve(&dummy, &abs_path, &None)?,
+ resolve(&dummy, &abs_path, true)?,
Some(env::current_dir().unwrap().join("foo.html"))
);
Ok(())
@@ -134,7 +111,7 @@ mod test_path {
let abs_index = PathBuf::from("/path/to/index.html");
let abs_path = PathBuf::from("./foo.html");
assert_eq!(
- resolve(&abs_index, &abs_path, &None)?,
+ resolve(&abs_index, &abs_path, true)?,
Some(PathBuf::from("/path/to/foo.html"))
);
Ok(())
diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs
index ef8d0b4d6b..55178b54d5 100644
--- a/lychee-lib/src/utils/request.rs
+++ b/lychee-lib/src/utils/request.rs
@@ -55,7 +55,14 @@ fn try_parse_into_uri(
root_path: &Option,
base: &Option ,
) -> Result {
- let text = raw_uri.text.clone();
+ let mut text = raw_uri.text.clone();
+ if text.starts_with('/') {
+ if let Some(path) = root_path {
+ if let Some(path_str) = path.to_str() {
+ text = format!("{path_str}{text}");
+ }
+ }
+ }
let uri = match Uri::try_from(raw_uri.clone()) {
Ok(uri) => uri,
Err(_) => match base {
@@ -64,7 +71,9 @@ fn try_parse_into_uri(
None => return Err(ErrorKind::InvalidBaseJoin(text.clone())),
},
None => match source {
- InputSource::FsPath(root) => create_uri_from_file_path(root, &text, root_path)?,
+ InputSource::FsPath(root) => {
+ create_uri_from_file_path(root, &text, root_path.is_none())?
+ }
_ => return Err(ErrorKind::UnsupportedUriType(text)),
},
},
@@ -87,7 +96,7 @@ pub(crate) fn is_anchor(text: &str) -> bool {
fn create_uri_from_file_path(
file_path: &Path,
link_text: &str,
- root_path: &Option,
+ ignore_absolute_local_links: bool,
) -> Result {
let target_path = if is_anchor(link_text) {
// For anchors, we need to append the anchor to the file name.
@@ -100,7 +109,9 @@ fn create_uri_from_file_path(
} else {
link_text.to_string()
};
- let Ok(constructed_url) = resolve_and_create_url(file_path, &target_path, root_path) else {
+ let Ok(constructed_url) =
+ resolve_and_create_url(file_path, &target_path, ignore_absolute_local_links)
+ else {
return Err(ErrorKind::InvalidPathToUri(target_path));
};
Ok(Uri {
@@ -165,7 +176,7 @@ pub(crate) fn create(
fn resolve_and_create_url(
src_path: &Path,
dest_path: &str,
- root_path: &Option,
+ ignore_absolute_local_links: bool,
) -> Result {
let (dest_path, fragment) = url::remove_get_params_and_separate_fragment(dest_path);
@@ -173,9 +184,11 @@ fn resolve_and_create_url(
// This addresses the issue mentioned in the original comment about double-encoding
let decoded_dest = percent_decode_str(dest_path).decode_utf8()?;
- let Ok(Some(resolved_path)) =
- path::resolve(src_path, &PathBuf::from(&*decoded_dest), root_path)
- else {
+ let Ok(Some(resolved_path)) = path::resolve(
+ src_path,
+ &PathBuf::from(&*decoded_dest),
+ ignore_absolute_local_links,
+ ) else {
return Err(ErrorKind::InvalidPathToUri(decoded_dest.to_string()));
};
@@ -200,7 +213,7 @@ mod tests {
#[test]
fn test_create_uri_from_path() {
let result =
- resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", &None).unwrap();
+ resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", true).unwrap();
assert_eq!(result.as_str(), "file:///test+encoding");
}
From 1ce48cf546e328ccf9f6fb662a504a72fe9e9bed Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Sun, 1 Dec 2024 10:15:56 -0800
Subject: [PATCH 15/26] Integration test both at the same time
---
lychee-bin/tests/cli.rs | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs
index c8219d58a5..85a8bee3e0 100644
--- a/lychee-bin/tests/cli.rs
+++ b/lychee-bin/tests/cli.rs
@@ -411,6 +411,24 @@ mod cli {
.stdout(contains("2 Errors"));
}
+ #[test]
+ fn test_resolve_paths_from_root_path_and_base_url() {
+ let mut cmd = main_command();
+ let dir = fixtures_path();
+
+ cmd.arg("--offline")
+ .arg("--root-path")
+ .arg("/resolve_paths")
+ .arg("--base")
+ .arg(&dir)
+ .arg(dir.join("resolve_paths").join("index.html"))
+ .env_clear()
+ .assert()
+ .success()
+ .stdout(contains("3 Total"))
+ .stdout(contains("3 OK"));
+ }
+
#[test]
fn test_youtube_quirk() {
let url = "https://www.youtube.com/watch?v=NlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7";
From 393919ed96788bb43df2359e6cb2bbe1aebdc6e0 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Sun, 1 Dec 2024 10:23:29 -0800
Subject: [PATCH 16/26] Unit tests both at the same time
---
lychee-lib/src/utils/request.rs | 75 +++++++++++++++++++++++++++++++++
1 file changed, 75 insertions(+)
diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs
index 55178b54d5..b7023b9bff 100644
--- a/lychee-lib/src/utils/request.rs
+++ b/lychee-lib/src/utils/request.rs
@@ -357,6 +357,81 @@ mod tests {
.any(|r| r.uri.url.as_str() == "file:///some/page.html#fragment"));
}
+ #[test]
+ fn test_relative_url_resolution_from_root_path_and_base_url() {
+ let root_path = Some(PathBuf::from("/tmp/lychee"));
+ let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+ let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
+
+ let uris = vec![RawUri::from("relative.html")];
+ let requests = create(uris, &source, &root_path, &base, &None);
+
+ assert_eq!(requests.len(), 1);
+ assert!(requests
+ .iter()
+ .any(|r| r.uri.url.as_str() == "https://example.com/path/relative.html"));
+ }
+
+ #[test]
+ fn test_absolute_url_resolution_from_root_path_and_base_url() {
+ let root_path = Some(PathBuf::from("/tmp/lychee"));
+ let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+ let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
+
+ let uris = vec![RawUri::from("https://another.com/page")];
+ let requests = create(uris, &source, &root_path, &base, &None);
+
+ assert_eq!(requests.len(), 1);
+ assert!(requests
+ .iter()
+ .any(|r| r.uri.url.as_str() == "https://another.com/page"));
+ }
+
+ #[test]
+ fn test_root_relative_url_resolution_from_root_path_and_base_url() {
+ let root_path = Some(PathBuf::from("/tmp/lychee"));
+ let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+ let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
+
+ let uris = vec![RawUri::from("/root-relative")];
+ let requests = create(uris, &source, &root_path, &base, &None);
+
+ assert_eq!(requests.len(), 1);
+ assert!(requests
+ .iter()
+ .any(|r| r.uri.url.as_str() == "https://example.com/tmp/lychee/root-relative"));
+ }
+
+ #[test]
+ fn test_parent_directory_url_resolution_from_root_path_and_base_url() {
+ let root_path = Some(PathBuf::from("/tmp/lychee"));
+ let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+ let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
+
+ let uris = vec![RawUri::from("../parent")];
+ let requests = create(uris, &source, &root_path, &base, &None);
+
+ assert_eq!(requests.len(), 1);
+ assert!(requests
+ .iter()
+ .any(|r| r.uri.url.as_str() == "https://example.com/parent"));
+ }
+
+ #[test]
+ fn test_fragment_url_resolution_from_root_path_and_base_url() {
+ let root_path = Some(PathBuf::from("/tmp/lychee"));
+ let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+ let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
+
+ let uris = vec![RawUri::from("#fragment")];
+ let requests = create(uris, &source, &root_path, &base, &None);
+
+ assert_eq!(requests.len(), 1);
+ assert!(requests
+ .iter()
+ .any(|r| r.uri.url.as_str() == "https://example.com/path/page.html#fragment"));
+ }
+
#[test]
fn test_no_base_url_resolution() {
let base = None;
From 55a8198e337e185ae0bf8cb01f193bd6b34fb246 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Sun, 1 Dec 2024 11:24:01 -0800
Subject: [PATCH 17/26] Remove now redundant comment
---
lychee-lib/src/utils/path.rs | 2 --
1 file changed, 2 deletions(-)
diff --git a/lychee-lib/src/utils/path.rs b/lychee-lib/src/utils/path.rs
index 58518f31e3..979d2b26e6 100644
--- a/lychee-lib/src/utils/path.rs
+++ b/lychee-lib/src/utils/path.rs
@@ -41,8 +41,6 @@ pub(crate) fn resolve(
parent.join(relative)
}
absolute if dst.starts_with("/") => {
- // Absolute local links (leading slash) are ignored unless
- // root_path is provided
if ignore_absolute_local_links {
return Ok(None);
}
From 70fa35bab333edb7d8ee4904319da3659461d840 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Mon, 2 Dec 2024 13:01:49 -0800
Subject: [PATCH 18/26] Revert windows-specific change, seems not needed after
recent changes
---
lychee-lib/src/utils/path.rs | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lychee-lib/src/utils/path.rs b/lychee-lib/src/utils/path.rs
index 979d2b26e6..3d86259157 100644
--- a/lychee-lib/src/utils/path.rs
+++ b/lychee-lib/src/utils/path.rs
@@ -33,14 +33,14 @@ pub(crate) fn resolve(
ignore_absolute_local_links: bool,
) -> Result> {
let resolved = match dst {
- relative if !dst.starts_with("/") => {
+ relative if dst.is_relative() => {
// Find `dst` in the parent directory of `src`
let Some(parent) = src.parent() else {
return Err(ErrorKind::InvalidFile(relative.to_path_buf()));
};
parent.join(relative)
}
- absolute if dst.starts_with("/") => {
+ absolute if dst.is_absolute() => {
if ignore_absolute_local_links {
return Ok(None);
}
From ae0ed421b7956f2b9933d440054c25fc4946a5a1 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Mon, 2 Dec 2024 18:21:32 -0800
Subject: [PATCH 19/26] Use Collector::default()
---
examples/collect_links/collect_links.rs | 2 +-
lychee-lib/src/collector.rs | 15 +++++++++++++++
2 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/examples/collect_links/collect_links.rs b/examples/collect_links/collect_links.rs
index 3a2ab520e7..375dad3e30 100644
--- a/examples/collect_links/collect_links.rs
+++ b/examples/collect_links/collect_links.rs
@@ -21,7 +21,7 @@ async fn main() -> Result<()> {
},
];
- let links = Collector::new(None, None) // root_path and base
+ let links = Collector::default() // root_path and base
.skip_missing_inputs(false) // don't skip missing inputs? (default=false)
.skip_hidden(false) // skip hidden files? (default=true)
.skip_ignored(false) // skip files that are ignored by git? (default=true)
diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs
index c21dd67999..0cde843d8d 100644
--- a/lychee-lib/src/collector.rs
+++ b/lychee-lib/src/collector.rs
@@ -26,6 +26,21 @@ pub struct Collector {
base: Option ,
}
+impl Default for Collector {
+ fn default() -> Self {
+ Collector {
+ basic_auth_extractor: None,
+ skip_missing_inputs: false,
+ include_verbatim: false,
+ use_html5ever: false,
+ skip_hidden: true,
+ skip_ignored: true,
+ root_path: None,
+ base: None,
+ }
+ }
+}
+
impl Collector {
/// Create a new collector with an empty cache
#[must_use]
From 3331cdacc7a4bbc219f3ddd5d7fd086aa015d0f1 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Mon, 2 Dec 2024 18:45:56 -0800
Subject: [PATCH 20/26] extract method and unit tests
---
lychee-lib/src/utils/request.rs | 52 ++++++++++++++++++++++++++++-----
1 file changed, 44 insertions(+), 8 deletions(-)
diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs
index b7023b9bff..d5e002f162 100644
--- a/lychee-lib/src/utils/request.rs
+++ b/lychee-lib/src/utils/request.rs
@@ -55,14 +55,7 @@ fn try_parse_into_uri(
root_path: &Option,
base: &Option ,
) -> Result {
- let mut text = raw_uri.text.clone();
- if text.starts_with('/') {
- if let Some(path) = root_path {
- if let Some(path_str) = path.to_str() {
- text = format!("{path_str}{text}");
- }
- }
- }
+ let text = prepend_root_path_if_absolute_local_link(&raw_uri.text, root_path);
let uri = match Uri::try_from(raw_uri.clone()) {
Ok(uri) => uri,
Err(_) => match base {
@@ -200,6 +193,17 @@ fn resolve_and_create_url(
Ok(url)
}
+fn prepend_root_path_if_absolute_local_link(text: &str, root_path: &Option) -> String {
+ if text.starts_with('/') {
+ if let Some(path) = root_path {
+ if let Some(path_str) = path.to_str() {
+ return format!("{}{}", path_str, text);
+ }
+ }
+ }
+ text.to_string()
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -524,4 +528,36 @@ mod tests {
assert_eq!(uri.url.as_str(), "file:///tmp/lychee/absolute.html");
}
+
+ #[test]
+ fn test_prepend_with_absolute_local_link_and_root_path() {
+ let text = "/absolute/path";
+ let root_path = Some(PathBuf::from("/root"));
+ let result = prepend_root_path_if_absolute_local_link(text, &root_path);
+ assert_eq!(result, "/root/absolute/path");
+ }
+
+ #[test]
+ fn test_prepend_with_absolute_local_link_and_no_root_path() {
+ let text = "/absolute/path";
+ let root_path: Option = None;
+ let result = prepend_root_path_if_absolute_local_link(text, &root_path);
+ assert_eq!(result, "/absolute/path");
+ }
+
+ #[test]
+ fn test_prepend_with_relative_link_and_root_path() {
+ let text = "relative/path";
+ let root_path = Some(PathBuf::from("/root"));
+ let result = prepend_root_path_if_absolute_local_link(text, &root_path);
+ assert_eq!(result, "relative/path");
+ }
+
+ #[test]
+ fn test_prepend_with_relative_link_and_no_root_path() {
+ let text = "relative/path";
+ let root_path = None;
+ let result = prepend_root_path_if_absolute_local_link(text, &root_path);
+ assert_eq!(result, "relative/path");
+ }
}
From fb5aff538dbde260e9bb51f0fdca89847c4b95e6 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Mon, 2 Dec 2024 18:51:12 -0800
Subject: [PATCH 21/26] clippy
---
lychee-lib/src/types/file.rs | 1 -
lychee-lib/src/utils/path.rs | 4 ++--
lychee-lib/src/utils/request.rs | 12 ++++++------
3 files changed, 8 insertions(+), 9 deletions(-)
diff --git a/lychee-lib/src/types/file.rs b/lychee-lib/src/types/file.rs
index 7cdeff6c80..d2f8631c6c 100644
--- a/lychee-lib/src/types/file.rs
+++ b/lychee-lib/src/types/file.rs
@@ -54,7 +54,6 @@ impl> From for FileType {
}
/// Helper function to check if a path is likely a URL.
-
fn is_url(path: &Path) -> bool {
path.to_str()
.and_then(|s| Url::parse(s).ok())
diff --git a/lychee-lib/src/utils/path.rs b/lychee-lib/src/utils/path.rs
index 3d86259157..daa4f7fde6 100644
--- a/lychee-lib/src/utils/path.rs
+++ b/lychee-lib/src/utils/path.rs
@@ -36,7 +36,7 @@ pub(crate) fn resolve(
relative if dst.is_relative() => {
// Find `dst` in the parent directory of `src`
let Some(parent) = src.parent() else {
- return Err(ErrorKind::InvalidFile(relative.to_path_buf()));
+ return Err(ErrorKind::InvalidFile(relative.clone()));
};
parent.join(relative)
}
@@ -46,7 +46,7 @@ pub(crate) fn resolve(
}
PathBuf::from(absolute)
}
- _ => return Err(ErrorKind::InvalidFile(dst.to_path_buf())),
+ _ => return Err(ErrorKind::InvalidFile(dst.clone())),
};
Ok(Some(absolute_path(resolved)))
}
diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs
index d5e002f162..7f45ed8e7a 100644
--- a/lychee-lib/src/utils/request.rs
+++ b/lychee-lib/src/utils/request.rs
@@ -140,18 +140,18 @@ pub(crate) fn create(
base: &Option ,
extractor: &Option,
) -> HashSet {
- let base = base.clone().or_else(|| Base::from_source(&source));
+ let base = base.clone().or_else(|| Base::from_source(source));
uris.into_iter()
- .filter_map(|raw_uri| {
- match create_request(&raw_uri, &source, &root_path, &base, extractor) {
+ .filter_map(
+ |raw_uri| match create_request(&raw_uri, source, root_path, &base, extractor) {
Ok(request) => Some(request),
Err(e) => {
warn!("Error creating request: {:?}", e);
None
}
- }
- })
+ },
+ )
.collect()
}
@@ -197,7 +197,7 @@ fn prepend_root_path_if_absolute_local_link(text: &str, root_path: &Option
Date: Mon, 2 Dec 2024 19:26:21 -0800
Subject: [PATCH 22/26] clippy: &Option -> Option<&A>
---
lychee-bin/src/commands/check.rs | 8 +-
lychee-lib/src/collector.rs | 6 +-
lychee-lib/src/utils/request.rs | 141 +++++++++++++++----------------
3 files changed, 76 insertions(+), 79 deletions(-)
diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs
index 3551c4c9c8..5c0614b179 100644
--- a/lychee-bin/src/commands/check.rs
+++ b/lychee-bin/src/commands/check.rs
@@ -192,7 +192,7 @@ async fn progress_bar_task(
while let Some(response) = recv_resp.recv().await {
show_progress(
&mut io::stderr(),
- &pb,
+ pb.as_ref(),
&response,
formatter.as_ref(),
&verbose,
@@ -331,7 +331,7 @@ fn ignore_cache(uri: &Uri, status: &Status, cache_exclude_status: &HashSet)
fn show_progress(
output: &mut dyn Write,
- progress_bar: &Option,
+ progress_bar: Option<&ProgressBar>,
response: &Response,
formatter: &dyn ResponseFormatter,
verbose: &Verbosity,
@@ -401,7 +401,7 @@ mod tests {
let formatter = get_response_formatter(&options::OutputMode::Plain);
show_progress(
&mut buf,
- &None,
+ None,
&response,
formatter.as_ref(),
&Verbosity::default(),
@@ -423,7 +423,7 @@ mod tests {
let formatter = get_response_formatter(&options::OutputMode::Plain);
show_progress(
&mut buf,
- &None,
+ None,
&response,
formatter.as_ref(),
&Verbosity::debug(),
diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs
index 0cde843d8d..dc6826fccb 100644
--- a/lychee-lib/src/collector.rs
+++ b/lychee-lib/src/collector.rs
@@ -146,9 +146,9 @@ impl Collector {
let requests = request::create(
uris,
&content.source,
- &root_path,
- &base,
- &basic_auth_extractor,
+ root_path.as_ref(),
+ base.as_ref(),
+ basic_auth_extractor.as_ref(),
);
Result::Ok(stream::iter(requests.into_iter().map(Ok)))
}
diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs
index 7f45ed8e7a..5df498fdd5 100644
--- a/lychee-lib/src/utils/request.rs
+++ b/lychee-lib/src/utils/request.rs
@@ -15,7 +15,7 @@ use crate::{
/// Extract basic auth credentials for a given URL.
fn extract_credentials(
- extractor: &Option,
+ extractor: Option<&BasicAuthExtractor>,
uri: &Uri,
) -> Option {
extractor.as_ref().and_then(|ext| ext.matches(uri))
@@ -25,9 +25,9 @@ fn extract_credentials(
fn create_request(
raw_uri: &RawUri,
source: &InputSource,
- root_path: &Option,
- base: &Option ,
- extractor: &Option,
+ root_path: Option<&PathBuf>,
+ base: Option<&Base>,
+ extractor: Option<&BasicAuthExtractor>,
) -> Result {
let uri = try_parse_into_uri(raw_uri, source, root_path, base)?;
let source = truncate_source(source);
@@ -52,8 +52,8 @@ fn create_request(
fn try_parse_into_uri(
raw_uri: &RawUri,
source: &InputSource,
- root_path: &Option,
- base: &Option ,
+ root_path: Option<&PathBuf>,
+ base: Option<&Base>,
) -> Result {
let text = prepend_root_path_if_absolute_local_link(&raw_uri.text, root_path);
let uri = match Uri::try_from(raw_uri.clone()) {
@@ -136,22 +136,22 @@ fn truncate_source(source: &InputSource) -> InputSource {
pub(crate) fn create(
uris: Vec,
source: &InputSource,
- root_path: &Option,
- base: &Option ,
- extractor: &Option,
+ root_path: Option<&PathBuf>,
+ base: Option<&Base>,
+ extractor: Option<&BasicAuthExtractor>,
) -> HashSet {
- let base = base.clone().or_else(|| Base::from_source(source));
+ let base = base.cloned().or_else(|| Base::from_source(source));
uris.into_iter()
- .filter_map(
- |raw_uri| match create_request(&raw_uri, source, root_path, &base, extractor) {
+ .filter_map(|raw_uri| {
+ match create_request(&raw_uri, source, root_path, base.as_ref(), extractor) {
Ok(request) => Some(request),
Err(e) => {
warn!("Error creating request: {:?}", e);
None
}
- },
- )
+ }
+ })
.collect()
}
@@ -193,7 +193,7 @@ fn resolve_and_create_url(
Ok(url)
}
-fn prepend_root_path_if_absolute_local_link(text: &str, root_path: &Option) -> String {
+fn prepend_root_path_if_absolute_local_link(text: &str, root_path: Option<&PathBuf>) -> String {
if text.starts_with('/') {
if let Some(path) = root_path {
if let Some(path_str) = path.to_str() {
@@ -223,11 +223,11 @@ mod tests {
#[test]
fn test_relative_url_resolution() {
- let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+ let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::String(String::new());
let uris = vec![RawUri::from("relative.html")];
- let requests = create(uris, &source, &None, &base, &None);
+ let requests = create(uris, &source, None, Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -237,11 +237,11 @@ mod tests {
#[test]
fn test_absolute_url_resolution() {
- let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+ let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::String(String::new());
let uris = vec![RawUri::from("https://another.com/page")];
- let requests = create(uris, &source, &None, &base, &None);
+ let requests = create(uris, &source, None, Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -251,11 +251,11 @@ mod tests {
#[test]
fn test_root_relative_url_resolution() {
- let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+ let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::String(String::new());
let uris = vec![RawUri::from("/root-relative")];
- let requests = create(uris, &source, &None, &base, &None);
+ let requests = create(uris, &source, None, Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -265,11 +265,11 @@ mod tests {
#[test]
fn test_parent_directory_url_resolution() {
- let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+ let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::String(String::new());
let uris = vec![RawUri::from("../parent")];
- let requests = create(uris, &source, &None, &base, &None);
+ let requests = create(uris, &source, None, Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -279,11 +279,11 @@ mod tests {
#[test]
fn test_fragment_url_resolution() {
- let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+ let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::String(String::new());
let uris = vec![RawUri::from("#fragment")];
- let requests = create(uris, &source, &None, &base, &None);
+ let requests = create(uris, &source, None, Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -293,11 +293,11 @@ mod tests {
#[test]
fn test_relative_url_resolution_from_root_path() {
- let root_path = Some(PathBuf::from("/tmp/lychee"));
+ let root_path = PathBuf::from("/tmp/lychee");
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("relative.html")];
- let requests = create(uris, &source, &root_path, &None, &None);
+ let requests = create(uris, &source, Some(&root_path), None, None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -307,11 +307,11 @@ mod tests {
#[test]
fn test_absolute_url_resolution_from_root_path() {
- let root_path = Some(PathBuf::from("/tmp/lychee"));
+ let root_path = PathBuf::from("/tmp/lychee");
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("https://another.com/page")];
- let requests = create(uris, &source, &root_path, &None, &None);
+ let requests = create(uris, &source, Some(&root_path), None, None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -321,11 +321,11 @@ mod tests {
#[test]
fn test_root_relative_url_resolution_from_root_path() {
- let root_path = Some(PathBuf::from("/tmp/lychee"));
+ let root_path = PathBuf::from("/tmp/lychee");
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("/root-relative")];
- let requests = create(uris, &source, &root_path, &None, &None);
+ let requests = create(uris, &source, Some(&root_path), None, None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -335,11 +335,11 @@ mod tests {
#[test]
fn test_parent_directory_url_resolution_from_root_path() {
- let root_path = Some(PathBuf::from("/tmp/lychee"));
+ let root_path = PathBuf::from("/tmp/lychee");
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("../parent")];
- let requests = create(uris, &source, &root_path, &None, &None);
+ let requests = create(uris, &source, Some(&root_path), None, None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -349,11 +349,11 @@ mod tests {
#[test]
fn test_fragment_url_resolution_from_root_path() {
- let root_path = Some(PathBuf::from("/tmp/lychee"));
+ let root_path = PathBuf::from("/tmp/lychee");
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("#fragment")];
- let requests = create(uris, &source, &root_path, &None, &None);
+ let requests = create(uris, &source, Some(&root_path), None, None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -363,12 +363,12 @@ mod tests {
#[test]
fn test_relative_url_resolution_from_root_path_and_base_url() {
- let root_path = Some(PathBuf::from("/tmp/lychee"));
- let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+ let root_path = PathBuf::from("/tmp/lychee");
+ let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("relative.html")];
- let requests = create(uris, &source, &root_path, &base, &None);
+ let requests = create(uris, &source, Some(&root_path), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -378,12 +378,12 @@ mod tests {
#[test]
fn test_absolute_url_resolution_from_root_path_and_base_url() {
- let root_path = Some(PathBuf::from("/tmp/lychee"));
- let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+ let root_path = PathBuf::from("/tmp/lychee");
+ let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("https://another.com/page")];
- let requests = create(uris, &source, &root_path, &base, &None);
+ let requests = create(uris, &source, Some(&root_path), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -393,12 +393,12 @@ mod tests {
#[test]
fn test_root_relative_url_resolution_from_root_path_and_base_url() {
- let root_path = Some(PathBuf::from("/tmp/lychee"));
- let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+ let root_path = PathBuf::from("/tmp/lychee");
+ let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("/root-relative")];
- let requests = create(uris, &source, &root_path, &base, &None);
+ let requests = create(uris, &source, Some(&root_path), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -408,12 +408,12 @@ mod tests {
#[test]
fn test_parent_directory_url_resolution_from_root_path_and_base_url() {
- let root_path = Some(PathBuf::from("/tmp/lychee"));
- let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+ let root_path = PathBuf::from("/tmp/lychee");
+ let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("../parent")];
- let requests = create(uris, &source, &root_path, &base, &None);
+ let requests = create(uris, &source, Some(&root_path), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -423,12 +423,12 @@ mod tests {
#[test]
fn test_fragment_url_resolution_from_root_path_and_base_url() {
- let root_path = Some(PathBuf::from("/tmp/lychee"));
- let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+ let root_path = PathBuf::from("/tmp/lychee");
+ let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("#fragment")];
- let requests = create(uris, &source, &root_path, &base, &None);
+ let requests = create(uris, &source, Some(&root_path), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -438,11 +438,10 @@ mod tests {
#[test]
fn test_no_base_url_resolution() {
- let base = None;
let source = InputSource::String(String::new());
let uris = vec![RawUri::from("https://example.com/page")];
- let requests = create(uris, &source, &None, &base, &None);
+ let requests = create(uris, &source, None, None, None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -452,15 +451,15 @@ mod tests {
#[test]
fn test_create_request_from_relative_file_path() {
- let base = Some(Base::Local(PathBuf::from("/tmp/lychee")));
+ let base = Base::Local(PathBuf::from("/tmp/lychee"));
let input_source = InputSource::FsPath(PathBuf::from("page.html"));
let actual = create_request(
&RawUri::from("file.html"),
&input_source,
- &None,
- &base,
- &None,
+ None,
+ Some(&base),
+ None,
)
.unwrap();
@@ -480,16 +479,16 @@ mod tests {
#[test]
fn test_create_request_from_absolute_file_path() {
- let base = Some(Base::Local(PathBuf::from("/tmp/lychee")));
+ let base = Base::Local(PathBuf::from("/tmp/lychee"));
let input_source = InputSource::FsPath(PathBuf::from("/tmp/lychee/page.html"));
// Use an absolute path that's outside the base directory
let actual = create_request(
&RawUri::from("/usr/local/share/doc/example.html"),
&input_source,
- &None,
- &base,
- &None,
+ None,
+ Some(&base),
+ None,
)
.unwrap();
@@ -509,22 +508,22 @@ mod tests {
#[test]
fn test_parse_relative_path_into_uri() {
- let base = Some(Base::Local(PathBuf::from("/tmp/lychee")));
+ let base = Base::Local(PathBuf::from("/tmp/lychee"));
let source = InputSource::String(String::new());
let raw_uri = RawUri::from("relative.html");
- let uri = try_parse_into_uri(&raw_uri, &source, &None, &base).unwrap();
+ let uri = try_parse_into_uri(&raw_uri, &source, None, Some(&base)).unwrap();
assert_eq!(uri.url.as_str(), "file:///tmp/lychee/relative.html");
}
#[test]
fn test_parse_absolute_path_into_uri() {
- let base = Some(Base::Local(PathBuf::from("/tmp/lychee")));
+ let base = Base::Local(PathBuf::from("/tmp/lychee"));
let source = InputSource::String(String::new());
let raw_uri = RawUri::from("absolute.html");
- let uri = try_parse_into_uri(&raw_uri, &source, &None, &base).unwrap();
+ let uri = try_parse_into_uri(&raw_uri, &source, None, Some(&base)).unwrap();
assert_eq!(uri.url.as_str(), "file:///tmp/lychee/absolute.html");
}
@@ -532,32 +531,30 @@ mod tests {
#[test]
fn test_prepend_with_absolute_local_link_and_root_path() {
let text = "/absolute/path";
- let root_path = Some(PathBuf::from("/root"));
- let result = prepend_root_path_if_absolute_local_link(text, &root_path);
+ let root_path = PathBuf::from("/root");
+ let result = prepend_root_path_if_absolute_local_link(text, Some(&root_path));
assert_eq!(result, "/root/absolute/path");
}
#[test]
fn test_prepend_with_absolute_local_link_and_no_root_path() {
let text = "/absolute/path";
- let root_path: Option = None;
- let result = prepend_root_path_if_absolute_local_link(text, &root_path);
+ let result = prepend_root_path_if_absolute_local_link(text, None);
assert_eq!(result, "/absolute/path");
}
#[test]
fn test_prepend_with_relative_link_and_root_path() {
let text = "relative/path";
- let root_path = Some(PathBuf::from("/root"));
- let result = prepend_root_path_if_absolute_local_link(text, &root_path);
+ let root_path = PathBuf::from("/root");
+ let result = prepend_root_path_if_absolute_local_link(text, Some(&root_path));
assert_eq!(result, "relative/path");
}
#[test]
fn test_prepend_with_relative_link_and_no_root_path() {
let text = "relative/path";
- let root_path = None;
- let result = prepend_root_path_if_absolute_local_link(text, &root_path);
+ let result = prepend_root_path_if_absolute_local_link(text, None);
assert_eq!(result, "relative/path");
}
}
From 0bb19b972c6c263a50ccd22f1b695f55e962f279 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Mon, 2 Dec 2024 20:28:29 -0800
Subject: [PATCH 23/26] Remove outdated comment
---
examples/collect_links/collect_links.rs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/collect_links/collect_links.rs b/examples/collect_links/collect_links.rs
index 375dad3e30..57edd5fcdb 100644
--- a/examples/collect_links/collect_links.rs
+++ b/examples/collect_links/collect_links.rs
@@ -21,7 +21,7 @@ async fn main() -> Result<()> {
},
];
- let links = Collector::default() // root_path and base
+ let links = Collector::default()
.skip_missing_inputs(false) // don't skip missing inputs? (default=false)
.skip_hidden(false) // skip hidden files? (default=true)
.skip_ignored(false) // skip files that are ignored by git? (default=true)
From b767b36b010ed6568031f5cd4a74d7080bff13b1 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Fri, 6 Dec 2024 13:14:46 -0800
Subject: [PATCH 24/26] Rename --root-path to --root-dir
---
README.md | 2 +-
.../nested/about/index.html | 0
.../nested/another page/index.html | 0
.../nested/index.html | 0
lychee-bin/src/main.rs | 2 +-
lychee-bin/src/options.rs | 2 +-
lychee-bin/tests/cli.rs | 10 +-
lychee-lib/src/collector.rs | 20 ++--
lychee-lib/src/utils/request.rs | 98 +++++++++----------
9 files changed, 67 insertions(+), 67 deletions(-)
rename fixtures/{resolve_paths_from_root_path => resolve_paths_from_root_dir}/nested/about/index.html (100%)
rename fixtures/{resolve_paths_from_root_path => resolve_paths_from_root_dir}/nested/another page/index.html (100%)
rename fixtures/{resolve_paths_from_root_path => resolve_paths_from_root_dir}/nested/index.html (100%)
diff --git a/README.md b/README.md
index 5f0da195d8..0e211992ba 100644
--- a/README.md
+++ b/README.md
@@ -480,7 +480,7 @@ Options:
-b, --base
Base URL or website root directory to check relative URLs e.g. or `/path/to/public`
- --root-path
+ --root-dir
Root path to use when checking absolute local links
--basic-auth
diff --git a/fixtures/resolve_paths_from_root_path/nested/about/index.html b/fixtures/resolve_paths_from_root_dir/nested/about/index.html
similarity index 100%
rename from fixtures/resolve_paths_from_root_path/nested/about/index.html
rename to fixtures/resolve_paths_from_root_dir/nested/about/index.html
diff --git a/fixtures/resolve_paths_from_root_path/nested/another page/index.html b/fixtures/resolve_paths_from_root_dir/nested/another page/index.html
similarity index 100%
rename from fixtures/resolve_paths_from_root_path/nested/another page/index.html
rename to fixtures/resolve_paths_from_root_dir/nested/another page/index.html
diff --git a/fixtures/resolve_paths_from_root_path/nested/index.html b/fixtures/resolve_paths_from_root_dir/nested/index.html
similarity index 100%
rename from fixtures/resolve_paths_from_root_path/nested/index.html
rename to fixtures/resolve_paths_from_root_dir/nested/index.html
diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs
index 3250bdf41b..b1e6988176 100644
--- a/lychee-bin/src/main.rs
+++ b/lychee-bin/src/main.rs
@@ -288,7 +288,7 @@ fn underlying_io_error_kind(error: &Error) -> Option {
async fn run(opts: &LycheeOptions) -> Result {
let inputs = opts.inputs()?;
- let mut collector = Collector::new(opts.config.root_path.clone(), opts.config.base.clone())
+ let mut collector = Collector::new(opts.config.root_dir.clone(), opts.config.base.clone())
.skip_missing_inputs(opts.config.skip_missing)
.skip_hidden(!opts.config.hidden)
.skip_ignored(!opts.config.no_ignore)
diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs
index 4706fa1433..e1a706483f 100644
--- a/lychee-bin/src/options.rs
+++ b/lychee-bin/src/options.rs
@@ -448,7 +448,7 @@ separated list of accepted status codes. This example will accept 200, 201,
/// Root path to use when checking absolute local links
#[arg(long)]
#[serde(default)]
- pub(crate) root_path: Option,
+ pub(crate) root_dir: Option,
/// Basic authentication support. E.g. `http://example.com username:password`
#[arg(long)]
diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs
index 85a8bee3e0..184a5f6706 100644
--- a/lychee-bin/tests/cli.rs
+++ b/lychee-bin/tests/cli.rs
@@ -394,13 +394,13 @@ mod cli {
}
#[test]
- fn test_resolve_paths_from_root_path() {
+ fn test_resolve_paths_from_root_dir() {
let mut cmd = main_command();
- let dir = fixtures_path().join("resolve_paths_from_root_path");
+ let dir = fixtures_path().join("resolve_paths_from_root_dir");
cmd.arg("--offline")
.arg("--include-fragments")
- .arg("--root-path")
+ .arg("--root-dir")
.arg(&dir)
.arg(dir.join("nested").join("index.html"))
.env_clear()
@@ -412,12 +412,12 @@ mod cli {
}
#[test]
- fn test_resolve_paths_from_root_path_and_base_url() {
+ fn test_resolve_paths_from_root_dir_and_base_url() {
let mut cmd = main_command();
let dir = fixtures_path();
cmd.arg("--offline")
- .arg("--root-path")
+ .arg("--root-dir")
.arg("/resolve_paths")
.arg("--base")
.arg(&dir)
diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs
index dc6826fccb..cd07af9da1 100644
--- a/lychee-lib/src/collector.rs
+++ b/lychee-lib/src/collector.rs
@@ -22,7 +22,7 @@ pub struct Collector {
skip_hidden: bool,
include_verbatim: bool,
use_html5ever: bool,
- root_path: Option,
+ root_dir: Option,
base: Option ,
}
@@ -35,7 +35,7 @@ impl Default for Collector {
use_html5ever: false,
skip_hidden: true,
skip_ignored: true,
- root_path: None,
+ root_dir: None,
base: None,
}
}
@@ -44,7 +44,7 @@ impl Default for Collector {
impl Collector {
/// Create a new collector with an empty cache
#[must_use]
- pub const fn new(root_path: Option, base: Option ) -> Self {
+ pub const fn new(root_dir: Option, base: Option ) -> Self {
Collector {
basic_auth_extractor: None,
skip_missing_inputs: false,
@@ -52,7 +52,7 @@ impl Collector {
use_html5ever: false,
skip_hidden: true,
skip_ignored: true,
- root_path,
+ root_dir,
base,
}
}
@@ -137,7 +137,7 @@ impl Collector {
})
.flatten()
.par_then_unordered(None, move |(content, base)| {
- let root_path = self.root_path.clone();
+ let root_dir = self.root_dir.clone();
let basic_auth_extractor = self.basic_auth_extractor.clone();
async move {
let content = content?;
@@ -146,7 +146,7 @@ impl Collector {
let requests = request::create(
uris,
&content.source,
- root_path.as_ref(),
+ root_dir.as_ref(),
base.as_ref(),
basic_auth_extractor.as_ref(),
);
@@ -175,20 +175,20 @@ mod tests {
// Helper function to run the collector on the given inputs
async fn collect(
inputs: Vec ,
- root_path: Option,
+ root_dir: Option,
base: Option ,
) -> HashSet {
- let responses = Collector::new(root_path, base).collect_links(inputs);
+ let responses = Collector::new(root_dir, base).collect_links(inputs);
responses.map(|r| r.unwrap().uri).collect().await
}
// Helper function for collecting verbatim links
async fn collect_verbatim(
inputs: Vec ,
- root_path: Option,
+ root_dir: Option,
base: Option ,
) -> HashSet {
- let responses = Collector::new(root_path, base)
+ let responses = Collector::new(root_dir, base)
.include_verbatim(true)
.collect_links(inputs);
responses.map(|r| r.unwrap().uri).collect().await
diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs
index 5df498fdd5..ca66f7d857 100644
--- a/lychee-lib/src/utils/request.rs
+++ b/lychee-lib/src/utils/request.rs
@@ -25,11 +25,11 @@ fn extract_credentials(
fn create_request(
raw_uri: &RawUri,
source: &InputSource,
- root_path: Option<&PathBuf>,
+ root_dir: Option<&PathBuf>,
base: Option<&Base>,
extractor: Option<&BasicAuthExtractor>,
) -> Result {
- let uri = try_parse_into_uri(raw_uri, source, root_path, base)?;
+ let uri = try_parse_into_uri(raw_uri, source, root_dir, base)?;
let source = truncate_source(source);
let element = raw_uri.element.clone();
let attribute = raw_uri.attribute.clone();
@@ -52,10 +52,10 @@ fn create_request(
fn try_parse_into_uri(
raw_uri: &RawUri,
source: &InputSource,
- root_path: Option<&PathBuf>,
+ root_dir: Option<&PathBuf>,
base: Option<&Base>,
) -> Result {
- let text = prepend_root_path_if_absolute_local_link(&raw_uri.text, root_path);
+ let text = prepend_root_dir_if_absolute_local_link(&raw_uri.text, root_dir);
let uri = match Uri::try_from(raw_uri.clone()) {
Ok(uri) => uri,
Err(_) => match base {
@@ -65,7 +65,7 @@ fn try_parse_into_uri(
},
None => match source {
InputSource::FsPath(root) => {
- create_uri_from_file_path(root, &text, root_path.is_none())?
+ create_uri_from_file_path(root, &text, root_dir.is_none())?
}
_ => return Err(ErrorKind::UnsupportedUriType(text)),
},
@@ -136,7 +136,7 @@ fn truncate_source(source: &InputSource) -> InputSource {
pub(crate) fn create(
uris: Vec,
source: &InputSource,
- root_path: Option<&PathBuf>,
+ root_dir: Option<&PathBuf>,
base: Option<&Base>,
extractor: Option<&BasicAuthExtractor>,
) -> HashSet {
@@ -144,7 +144,7 @@ pub(crate) fn create(
uris.into_iter()
.filter_map(|raw_uri| {
- match create_request(&raw_uri, source, root_path, base.as_ref(), extractor) {
+ match create_request(&raw_uri, source, root_dir, base.as_ref(), extractor) {
Ok(request) => Some(request),
Err(e) => {
warn!("Error creating request: {:?}", e);
@@ -193,9 +193,9 @@ fn resolve_and_create_url(
Ok(url)
}
-fn prepend_root_path_if_absolute_local_link(text: &str, root_path: Option<&PathBuf>) -> String {
+fn prepend_root_dir_if_absolute_local_link(text: &str, root_dir: Option<&PathBuf>) -> String {
if text.starts_with('/') {
- if let Some(path) = root_path {
+ if let Some(path) = root_dir {
if let Some(path_str) = path.to_str() {
return format!("{path_str}{text}");
}
@@ -292,12 +292,12 @@ mod tests {
}
#[test]
- fn test_relative_url_resolution_from_root_path() {
- let root_path = PathBuf::from("/tmp/lychee");
+ fn test_relative_url_resolution_from_root_dir() {
+ let root_dir = PathBuf::from("/tmp/lychee");
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("relative.html")];
- let requests = create(uris, &source, Some(&root_path), None, None);
+ let requests = create(uris, &source, Some(&root_dir), None, None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -306,12 +306,12 @@ mod tests {
}
#[test]
- fn test_absolute_url_resolution_from_root_path() {
- let root_path = PathBuf::from("/tmp/lychee");
+ fn test_absolute_url_resolution_from_root_dir() {
+ let root_dir = PathBuf::from("/tmp/lychee");
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("https://another.com/page")];
- let requests = create(uris, &source, Some(&root_path), None, None);
+ let requests = create(uris, &source, Some(&root_dir), None, None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -320,12 +320,12 @@ mod tests {
}
#[test]
- fn test_root_relative_url_resolution_from_root_path() {
- let root_path = PathBuf::from("/tmp/lychee");
+ fn test_root_relative_url_resolution_from_root_dir() {
+ let root_dir = PathBuf::from("/tmp/lychee");
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("/root-relative")];
- let requests = create(uris, &source, Some(&root_path), None, None);
+ let requests = create(uris, &source, Some(&root_dir), None, None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -334,12 +334,12 @@ mod tests {
}
#[test]
- fn test_parent_directory_url_resolution_from_root_path() {
- let root_path = PathBuf::from("/tmp/lychee");
+ fn test_parent_directory_url_resolution_from_root_dir() {
+ let root_dir = PathBuf::from("/tmp/lychee");
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("../parent")];
- let requests = create(uris, &source, Some(&root_path), None, None);
+ let requests = create(uris, &source, Some(&root_dir), None, None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -348,12 +348,12 @@ mod tests {
}
#[test]
- fn test_fragment_url_resolution_from_root_path() {
- let root_path = PathBuf::from("/tmp/lychee");
+ fn test_fragment_url_resolution_from_root_dir() {
+ let root_dir = PathBuf::from("/tmp/lychee");
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("#fragment")];
- let requests = create(uris, &source, Some(&root_path), None, None);
+ let requests = create(uris, &source, Some(&root_dir), None, None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -362,13 +362,13 @@ mod tests {
}
#[test]
- fn test_relative_url_resolution_from_root_path_and_base_url() {
- let root_path = PathBuf::from("/tmp/lychee");
+ fn test_relative_url_resolution_from_root_dir_and_base_url() {
+ let root_dir = PathBuf::from("/tmp/lychee");
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("relative.html")];
- let requests = create(uris, &source, Some(&root_path), Some(&base), None);
+ let requests = create(uris, &source, Some(&root_dir), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -377,13 +377,13 @@ mod tests {
}
#[test]
- fn test_absolute_url_resolution_from_root_path_and_base_url() {
- let root_path = PathBuf::from("/tmp/lychee");
+ fn test_absolute_url_resolution_from_root_dir_and_base_url() {
+ let root_dir = PathBuf::from("/tmp/lychee");
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("https://another.com/page")];
- let requests = create(uris, &source, Some(&root_path), Some(&base), None);
+ let requests = create(uris, &source, Some(&root_dir), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -392,13 +392,13 @@ mod tests {
}
#[test]
- fn test_root_relative_url_resolution_from_root_path_and_base_url() {
- let root_path = PathBuf::from("/tmp/lychee");
+ fn test_root_relative_url_resolution_from_root_dir_and_base_url() {
+ let root_dir = PathBuf::from("/tmp/lychee");
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("/root-relative")];
- let requests = create(uris, &source, Some(&root_path), Some(&base), None);
+ let requests = create(uris, &source, Some(&root_dir), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -407,13 +407,13 @@ mod tests {
}
#[test]
- fn test_parent_directory_url_resolution_from_root_path_and_base_url() {
- let root_path = PathBuf::from("/tmp/lychee");
+ fn test_parent_directory_url_resolution_from_root_dir_and_base_url() {
+ let root_dir = PathBuf::from("/tmp/lychee");
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("../parent")];
- let requests = create(uris, &source, Some(&root_path), Some(&base), None);
+ let requests = create(uris, &source, Some(&root_dir), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -422,13 +422,13 @@ mod tests {
}
#[test]
- fn test_fragment_url_resolution_from_root_path_and_base_url() {
- let root_path = PathBuf::from("/tmp/lychee");
+ fn test_fragment_url_resolution_from_root_dir_and_base_url() {
+ let root_dir = PathBuf::from("/tmp/lychee");
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("#fragment")];
- let requests = create(uris, &source, Some(&root_path), Some(&base), None);
+ let requests = create(uris, &source, Some(&root_dir), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@@ -529,32 +529,32 @@ mod tests {
}
#[test]
- fn test_prepend_with_absolute_local_link_and_root_path() {
+ fn test_prepend_with_absolute_local_link_and_root_dir() {
let text = "/absolute/path";
- let root_path = PathBuf::from("/root");
- let result = prepend_root_path_if_absolute_local_link(text, Some(&root_path));
+ let root_dir = PathBuf::from("/root");
+ let result = prepend_root_dir_if_absolute_local_link(text, Some(&root_dir));
assert_eq!(result, "/root/absolute/path");
}
#[test]
- fn test_prepend_with_absolute_local_link_and_no_root_path() {
+ fn test_prepend_with_absolute_local_link_and_no_root_dir() {
let text = "/absolute/path";
- let result = prepend_root_path_if_absolute_local_link(text, None);
+ let result = prepend_root_dir_if_absolute_local_link(text, None);
assert_eq!(result, "/absolute/path");
}
#[test]
- fn test_prepend_with_relative_link_and_root_path() {
+ fn test_prepend_with_relative_link_and_root_dir() {
let text = "relative/path";
- let root_path = PathBuf::from("/root");
- let result = prepend_root_path_if_absolute_local_link(text, Some(&root_path));
+ let root_dir = PathBuf::from("/root");
+ let result = prepend_root_dir_if_absolute_local_link(text, Some(&root_dir));
assert_eq!(result, "relative/path");
}
#[test]
- fn test_prepend_with_relative_link_and_no_root_path() {
+ fn test_prepend_with_relative_link_and_no_root_dir() {
let text = "relative/path";
- let result = prepend_root_path_if_absolute_local_link(text, None);
+ let result = prepend_root_dir_if_absolute_local_link(text, None);
assert_eq!(result, "relative/path");
}
}
From 83b28c491198248be56775d7cd4a9465d3f52a41 Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Tue, 10 Dec 2024 13:58:58 -0800
Subject: [PATCH 25/26] Restrict --root-dir to absolute paths for now
---
README.md | 2 +-
lychee-bin/src/main.rs | 6 ++++++
lychee-bin/src/options.rs | 3 ++-
3 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 0e211992ba..d2b6a2d896 100644
--- a/README.md
+++ b/README.md
@@ -481,7 +481,7 @@ Options:
Base URL or website root directory to check relative URLs e.g. or `/path/to/public`
--root-dir
- Root path to use when checking absolute local links
+ Root path to use when checking absolute local links, must be an absolute path
--basic-auth
Basic authentication support. E.g. `http://example.com username:password`
diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs
index b1e6988176..8c9635b2b2 100644
--- a/lychee-bin/src/main.rs
+++ b/lychee-bin/src/main.rs
@@ -288,6 +288,12 @@ fn underlying_io_error_kind(error: &Error) -> Option {
async fn run(opts: &LycheeOptions) -> Result {
let inputs = opts.inputs()?;
+ if let Some(root_dir) = &opts.config.root_dir {
+ if root_dir.is_relative() {
+ bail!("`--root_dir` must be an absolute path");
+ }
+ }
+
let mut collector = Collector::new(opts.config.root_dir.clone(), opts.config.base.clone())
.skip_missing_inputs(opts.config.skip_missing)
.skip_hidden(!opts.config.hidden)
diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs
index e1a706483f..bcb70da09c 100644
--- a/lychee-bin/src/options.rs
+++ b/lychee-bin/src/options.rs
@@ -445,7 +445,8 @@ separated list of accepted status codes. This example will accept 200, 201,
#[serde(default)]
pub(crate) base: Option ,
- /// Root path to use when checking absolute local links
+ /// Root path to use when checking absolute local links,
+ /// must be an absolute path
#[arg(long)]
#[serde(default)]
pub(crate) root_dir: Option,
From 909e8c3f303da3d1361064195f6c627af2cf582c Mon Sep 17 00:00:00 2001
From: Trask Stalnaker
Date: Thu, 12 Dec 2024 13:30:09 -0800
Subject: [PATCH 26/26] Move root dir check
---
lychee-bin/src/main.rs | 8 +-----
lychee-lib/src/collector.rs | 49 +++++++++++++++++++++--------------
lychee-lib/src/types/error.rs | 5 ++++
3 files changed, 35 insertions(+), 27 deletions(-)
diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs
index 8c9635b2b2..521a9b8eef 100644
--- a/lychee-bin/src/main.rs
+++ b/lychee-bin/src/main.rs
@@ -288,13 +288,7 @@ fn underlying_io_error_kind(error: &Error) -> Option {
async fn run(opts: &LycheeOptions) -> Result {
let inputs = opts.inputs()?;
- if let Some(root_dir) = &opts.config.root_dir {
- if root_dir.is_relative() {
- bail!("`--root_dir` must be an absolute path");
- }
- }
-
- let mut collector = Collector::new(opts.config.root_dir.clone(), opts.config.base.clone())
+ let mut collector = Collector::new(opts.config.root_dir.clone(), opts.config.base.clone())?
.skip_missing_inputs(opts.config.skip_missing)
.skip_hidden(!opts.config.hidden)
.skip_ignored(!opts.config.no_ignore)
diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs
index cd07af9da1..7eb4c8c82b 100644
--- a/lychee-lib/src/collector.rs
+++ b/lychee-lib/src/collector.rs
@@ -1,3 +1,4 @@
+use crate::ErrorKind;
use crate::InputSource;
use crate::{
basic_auth::BasicAuthExtractor, extract::Extractor, types::uri::raw::RawUri, utils::request,
@@ -43,9 +44,17 @@ impl Default for Collector {
impl Collector {
/// Create a new collector with an empty cache
- #[must_use]
- pub const fn new(root_dir: Option, base: Option ) -> Self {
- Collector {
+ ///
+ /// # Errors
+ ///
+ /// Returns an `Err` if the `root_dir` is not an absolute path
+ pub fn new(root_dir: Option, base: Option ) -> Result {
+ if let Some(root_dir) = &root_dir {
+ if root_dir.is_relative() {
+ return Err(ErrorKind::RootDirMustBeAbsolute(root_dir.clone()));
+ }
+ }
+ Ok(Collector {
basic_auth_extractor: None,
skip_missing_inputs: false,
include_verbatim: false,
@@ -54,7 +63,7 @@ impl Collector {
skip_ignored: true,
root_dir,
base,
- }
+ })
}
/// Skip missing input files (default is to error if they don't exist)
@@ -177,9 +186,9 @@ mod tests {
inputs: Vec ,
root_dir: Option,
base: Option ,
- ) -> HashSet {
- let responses = Collector::new(root_dir, base).collect_links(inputs);
- responses.map(|r| r.unwrap().uri).collect().await
+ ) -> Result> {
+ let responses = Collector::new(root_dir, base)?.collect_links(inputs);
+ Ok(responses.map(|r| r.unwrap().uri).collect().await)
}
// Helper function for collecting verbatim links
@@ -187,11 +196,11 @@ mod tests {
inputs: Vec ,
root_dir: Option,
base: Option ,
- ) -> HashSet {
- let responses = Collector::new(root_dir, base)
+ ) -> Result> {
+ let responses = Collector::new(root_dir, base)?
.include_verbatim(true)
.collect_links(inputs);
- responses.map(|r| r.unwrap().uri).collect().await
+ Ok(responses.map(|r| r.unwrap().uri).collect().await)
}
const TEST_STRING: &str = "http://test-string.com";
@@ -279,7 +288,7 @@ mod tests {
},
];
- let links = collect_verbatim(inputs, None, None).await;
+ let links = collect_verbatim(inputs, None, None).await.ok().unwrap();
let expected_links = HashSet::from_iter([
website(TEST_STRING),
@@ -302,7 +311,7 @@ mod tests {
file_type_hint: Some(FileType::Markdown),
excluded_paths: None,
};
- let links = collect(vec![input], None, Some(base)).await;
+ let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
let expected_links = HashSet::from_iter([
website("https://endler.dev"),
@@ -328,7 +337,7 @@ mod tests {
file_type_hint: Some(FileType::Html),
excluded_paths: None,
};
- let links = collect(vec![input], None, Some(base)).await;
+ let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
let expected_links = HashSet::from_iter([
website("https://github.com/lycheeverse/lychee/"),
@@ -357,7 +366,7 @@ mod tests {
file_type_hint: Some(FileType::Html),
excluded_paths: None,
};
- let links = collect(vec![input], None, Some(base)).await;
+ let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
let expected_links = HashSet::from_iter([
website("https://example.com/static/image.png"),
@@ -384,7 +393,7 @@ mod tests {
excluded_paths: None,
};
- let links = collect(vec![input], None, Some(base)).await;
+ let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
let expected = HashSet::from_iter([
website("https://localhost.com/@/internal.md"),
@@ -406,7 +415,7 @@ mod tests {
file_type_hint: Some(FileType::Html),
excluded_paths: None,
};
- let links = collect(vec![input], None, Some(base)).await;
+ let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
let expected_links = HashSet::from_iter([
// the body links wouldn't be present if the file was parsed strictly as XML
@@ -439,7 +448,7 @@ mod tests {
excluded_paths: None,
};
- let links = collect(vec![input], None, None).await;
+ let links = collect(vec![input], None, None).await.ok().unwrap();
let expected_urls = HashSet::from_iter([
website("https://github.com/lycheeverse/lychee/"),
@@ -458,7 +467,7 @@ mod tests {
file_type_hint: None,
excluded_paths: None,
};
- let links = collect(vec![input], None, None).await;
+ let links = collect(vec![input], None, None).await.ok().unwrap();
let expected_links = HashSet::from_iter([mail("user@example.com")]);
@@ -501,7 +510,7 @@ mod tests {
},
];
- let links = collect(inputs, None, None).await;
+ let links = collect(inputs, None, None).await.ok().unwrap();
let expected_links = HashSet::from_iter([
website(&format!(
@@ -535,7 +544,7 @@ mod tests {
excluded_paths: None,
};
- let links = collect(vec![input], None, Some(base)).await;
+ let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
let expected_links = HashSet::from_iter([
path("/path/to/root/index.html"),
diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs
index 7246fe7e84..cbcfefe5e9 100644
--- a/lychee-lib/src/types/error.rs
+++ b/lychee-lib/src/types/error.rs
@@ -94,6 +94,10 @@ pub enum ErrorKind {
#[error("Cannot convert path '{0}' to a URI")]
InvalidPathToUri(String),
+ /// Root dir must be an absolute path
+ #[error("Root dir must be an absolute path: '{0}'")]
+ RootDirMustBeAbsolute(PathBuf),
+
/// The given URI type is not supported
#[error("Unsupported URI type: '{0}'")]
UnsupportedUriType(String),
@@ -310,6 +314,7 @@ impl Hash for ErrorKind {
Self::InvalidBase(base, e) => (base, e).hash(state),
Self::InvalidBaseJoin(s) => s.hash(state),
Self::InvalidPathToUri(s) => s.hash(state),
+ Self::RootDirMustBeAbsolute(s) => s.hash(state),
Self::UnsupportedUriType(s) => s.hash(state),
Self::InvalidUrlRemap(remap) => (remap).hash(state),
Self::InvalidHeader(e) => e.to_string().hash(state),