Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce --root-dir #1576

Merged
merged 26 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,9 @@ Options:
-b, --base <BASE>
Base URL or website root directory to check relative URLs e.g. <https://example.com> or `/path/to/public`

--root-path <ROOT_PATH>
Root path to use when checking absolute local links, base option is ignored when this is set

--basic-auth <BASIC_AUTH>
Basic authentication support. E.g. `http://example.com username:password`

Expand Down
8 changes: 8 additions & 0 deletions fixtures/resolve_paths_from_root_path/nested/about/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<html>
<head>
<title>About</title>
</head>
<body>
<h1 id="fragment">About</h1>
</body>
</html>
Empty file.
34 changes: 34 additions & 0 deletions fixtures/resolve_paths_from_root_path/nested/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<html>
<head>
<title>Index</title>
</head>
<body>
<h1>Index Title</h1>
<a id="good"></a>
<p>
<ul>
<li>
<a href="/nested">home</a>
</li>
<li>
<a href="/nested/about">About</a>
</li>
<li>
<a href="/nested/another page">About</a>
</li>
<li>
<a href="/nested/about/index.html#fragment">Fragment</a>
</li>
<li>
<a href="/nested/about/index.html#missing">Missing</a>
</li>
<li>
<a href="#good">Good</a>
</li>
<li>
<a href="#bad">Bad</a>
</li>
</ul>
</p>
</body>
</html>
10 changes: 8 additions & 2 deletions lychee-bin/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::options::Config;
use crate::parse::{parse_duration_secs, parse_headers, parse_remaps};
use anyhow::{Context, Result};
use http::StatusCode;
use lychee_lib::{Client, ClientBuilder};
use lychee_lib::{Base, Client, ClientBuilder};
use regex::RegexSet;
use reqwest_cookie_store::CookieStoreMutex;
use std::sync::Arc;
Expand Down Expand Up @@ -53,9 +53,15 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc<CookieStoreMutex>>) -
cfg.include_mail
};

let base = if let Some(root_path) = &cfg.root_path {
Some(Base::create_root_path(&root_path))
} else {
cfg.base.clone()
};

ClientBuilder::builder()
.remaps(remaps)
.base(cfg.base.clone())
.base(base)
.includes(includes)
.excludes(excludes)
.exclude_all_private(cfg.exclude_all_private)
Expand Down
9 changes: 8 additions & 1 deletion lychee-bin/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ use openssl_sys as _; // required for vendored-openssl feature
use options::LYCHEE_CONFIG_FILE;
use ring as _; // required for apple silicon

use lychee_lib::Base;
use lychee_lib::BasicAuthExtractor;
use lychee_lib::Collector;
use lychee_lib::CookieJar;
Expand Down Expand Up @@ -288,7 +289,13 @@ fn underlying_io_error_kind(error: &Error) -> Option<io::ErrorKind> {
async fn run(opts: &LycheeOptions) -> Result<i32> {
let inputs = opts.inputs()?;

let mut collector = Collector::new(opts.config.base.clone())
let base = if let Some(root_path) = &opts.config.root_path {
Some(Base::create_root_path(&root_path))
} else {
opts.config.base.clone()
};

let mut collector = Collector::new(base)
.skip_missing_inputs(opts.config.skip_missing)
.skip_hidden(!opts.config.hidden)
.skip_ignored(!opts.config.no_ignore)
Expand Down
6 changes: 6 additions & 0 deletions lychee-bin/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,12 @@ separated list of accepted status codes. This example will accept 200, 201,
#[serde(default)]
pub(crate) base: Option<Base>,

/// Root path to use when checking absolute local links,
/// base option is ignored when this is set
#[arg(long)]
#[serde(default)]
pub(crate) root_path: Option<String>,

/// Basic authentication support. E.g. `http://example.com username:password`
#[arg(long)]
#[serde(default)]
Expand Down
18 changes: 18 additions & 0 deletions lychee-bin/tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,24 @@ mod cli {
.stdout(contains("3 OK"));
}

#[test]
fn test_resolve_paths_from_root_path() {
let mut cmd = main_command();
let dir = fixtures_path().join("resolve_paths_from_root_path");

cmd.arg("--offline")
.arg("--include-fragments")
.arg("--root-path")
.arg(&dir)
.arg(dir.join("nested").join("index.html"))
.env_clear()
.assert()
.failure()
.stdout(contains("7 Total"))
.stdout(contains("5 OK"))
.stdout(contains("2 Errors"));
}

#[test]
fn test_youtube_quirk() {
let url = "https://www.youtube.com/watch?v=NlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7";
Expand Down
3 changes: 2 additions & 1 deletion lychee-lib/src/collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ impl Collector {
let content = content?;
let extractor = Extractor::new(self.use_html5ever, self.include_verbatim);
let uris: Vec<RawUri> = extractor.extract(&content);
let requests = request::create(uris, &content, &base, &basic_auth_extractor);
let requests =
request::create(uris, &content.source, &base, &basic_auth_extractor);
Result::Ok(stream::iter(requests.into_iter().map(Ok)))
}
})
Expand Down
1 change: 1 addition & 0 deletions lychee-lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ pub mod remap;
/// local IPs or e-mail addresses
pub mod filter;

/// Test utilities
trask marked this conversation as resolved.
Show resolved Hide resolved
#[cfg(test)]
#[macro_use]
pub mod test_utils;
Expand Down
16 changes: 14 additions & 2 deletions lychee-lib/src/types/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ pub enum Base {
Local(PathBuf),
/// Remote URL pointing to a website homepage
Remote(Url),
/// Root path for checking absolute local links
RootPath(PathBuf),
}

impl Base {
Expand All @@ -27,18 +29,28 @@ impl Base {
let full_path = path.join(link);
Url::from_file_path(full_path).ok()
}
Self::RootPath(_path) => {
// this is unused currently because joining on RootPath is handled by create_uri_from_file_path
unreachable!()
}
trask marked this conversation as resolved.
Show resolved Hide resolved
}
}

/// Return the directory if the base is local
/// Return the directory if the base is not remote
#[must_use]
pub(crate) fn dir(&self) -> Option<PathBuf> {
match self {
Self::Remote(_) => None,
Self::Local(d) => Some(d.clone()),
Self::Local(d) | Self::RootPath(d) => Some(d.clone()),
}
}

/// Create a root path base
#[must_use]
pub fn create_root_path(value: &str) -> Base {
Self::RootPath(PathBuf::from(value))
}

pub(crate) fn from_source(source: &InputSource) -> Option<Base> {
match &source {
InputSource::RemoteUrl(url) => {
Expand Down
4 changes: 2 additions & 2 deletions lychee-lib/src/utils/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ fn dirname(src: &'_ Path) -> Option<&'_ Path> {
/// Returns Ok(None) in case of an absolute local link without a `base_url`
pub(crate) fn resolve(src: &Path, dst: &Path, base: &Option<Base>) -> Result<Option<PathBuf>> {
let resolved = match dst {
relative if dst.is_relative() => {
relative if !dst.starts_with("/") => {
trask marked this conversation as resolved.
Show resolved Hide resolved
// Find `dst` in the parent directory of `src`
let Some(parent) = src.parent() else {
return Err(ErrorKind::InvalidFile(relative.to_path_buf()));
};
parent.join(relative)
}
absolute if dst.is_absolute() => {
absolute if dst.starts_with("/") => {
// Absolute local links (leading slash) require the `base_url` to
// define the document root. Silently ignore the link in case the
// `base_url` is not defined.
Expand Down
Loading
Loading