Skip to content

Commit

Permalink
Support both options at the same time
Browse files Browse the repository at this point in the history
  • Loading branch information
trask committed Nov 30, 2024
1 parent 144e1e5 commit d8288f2
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 78 deletions.
2 changes: 1 addition & 1 deletion examples/collect_links/collect_links.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ async fn main() -> Result<()> {
},
];

let links = Collector::new(None) // base
let links = Collector::new(None, None) // root_path and base
.skip_missing_inputs(false) // don't skip missing inputs? (default=false)
.skip_hidden(false) // skip hidden files? (default=true)
.skip_ignored(false) // skip files that are ignored by git? (default=true)
Expand Down
10 changes: 2 additions & 8 deletions lychee-bin/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::options::Config;
use crate::parse::{parse_duration_secs, parse_headers, parse_remaps};
use anyhow::{Context, Result};
use http::StatusCode;
use lychee_lib::{Base, Client, ClientBuilder};
use lychee_lib::{Client, ClientBuilder};
use regex::RegexSet;
use reqwest_cookie_store::CookieStoreMutex;
use std::sync::Arc;
Expand Down Expand Up @@ -53,15 +53,9 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc<CookieStoreMutex>>) -
cfg.include_mail
};

let base = if let Some(root_path) = &cfg.root_path {
Some(Base::create_root_path(&root_path))
} else {
cfg.base.clone()
};

ClientBuilder::builder()
.remaps(remaps)
.base(base)
.base(cfg.base.clone())
.includes(includes)
.excludes(excludes)
.exclude_all_private(cfg.exclude_all_private)
Expand Down
9 changes: 1 addition & 8 deletions lychee-bin/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ use openssl_sys as _; // required for vendored-openssl feature
use options::LYCHEE_CONFIG_FILE;
use ring as _; // required for apple silicon

use lychee_lib::Base;
use lychee_lib::BasicAuthExtractor;
use lychee_lib::Collector;
use lychee_lib::CookieJar;
Expand Down Expand Up @@ -289,13 +288,7 @@ fn underlying_io_error_kind(error: &Error) -> Option<io::ErrorKind> {
async fn run(opts: &LycheeOptions) -> Result<i32> {
let inputs = opts.inputs()?;

let base = if let Some(root_path) = &opts.config.root_path {
Some(Base::create_root_path(&root_path))
} else {
opts.config.base.clone()
};

let mut collector = Collector::new(base)
let mut collector = Collector::new(opts.config.root_path.clone(), opts.config.base.clone())
.skip_missing_inputs(opts.config.skip_missing)
.skip_hidden(!opts.config.hidden)
.skip_ignored(!opts.config.no_ignore)
Expand Down
50 changes: 33 additions & 17 deletions lychee-lib/src/collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,22 @@ pub struct Collector {
skip_hidden: bool,
include_verbatim: bool,
use_html5ever: bool,
root_path: Option<String>,
base: Option<Base>,
}

impl Collector {
/// Create a new collector with an empty cache
#[must_use]
pub const fn new(base: Option<Base>) -> Self {
pub const fn new(root_path: Option<String>, base: Option<Base>) -> Self {
Collector {
basic_auth_extractor: None,
skip_missing_inputs: false,
include_verbatim: false,
use_html5ever: false,
skip_hidden: true,
skip_ignored: true,
root_path,
base,
}
}
Expand Down Expand Up @@ -119,13 +121,19 @@ impl Collector {
})
.flatten()
.par_then_unordered(None, move |(content, base)| {
let root_path = self.root_path.clone();
let basic_auth_extractor = self.basic_auth_extractor.clone();
async move {
let content = content?;
let extractor = Extractor::new(self.use_html5ever, self.include_verbatim);
let uris: Vec<RawUri> = extractor.extract(&content);
let requests =
request::create(uris, &content.source, &base, &basic_auth_extractor);
let requests = request::create(
uris,
&content.source,
&root_path,
&base,
&basic_auth_extractor,
);
Result::Ok(stream::iter(requests.into_iter().map(Ok)))
}
})
Expand All @@ -149,14 +157,22 @@ mod tests {
};

// Helper function to run the collector on the given inputs
async fn collect(inputs: Vec<Input>, base: Option<Base>) -> HashSet<Uri> {
let responses = Collector::new(base).collect_links(inputs);
async fn collect(
inputs: Vec<Input>,
root_path: Option<String>,
base: Option<Base>,
) -> HashSet<Uri> {
let responses = Collector::new(root_path, base).collect_links(inputs);
responses.map(|r| r.unwrap().uri).collect().await
}

// Helper function for collecting verbatim links
async fn collect_verbatim(inputs: Vec<Input>, base: Option<Base>) -> HashSet<Uri> {
let responses = Collector::new(base)
async fn collect_verbatim(
inputs: Vec<Input>,
root_path: Option<String>,
base: Option<Base>,
) -> HashSet<Uri> {
let responses = Collector::new(root_path, base)
.include_verbatim(true)
.collect_links(inputs);
responses.map(|r| r.unwrap().uri).collect().await
Expand Down Expand Up @@ -247,7 +263,7 @@ mod tests {
},
];

let links = collect_verbatim(inputs, None).await;
let links = collect_verbatim(inputs, None, None).await;

let expected_links = HashSet::from_iter([
website(TEST_STRING),
Expand All @@ -270,7 +286,7 @@ mod tests {
file_type_hint: Some(FileType::Markdown),
excluded_paths: None,
};
let links = collect(vec![input], Some(base)).await;
let links = collect(vec![input], None, Some(base)).await;

let expected_links = HashSet::from_iter([
website("https://endler.dev"),
Expand All @@ -296,7 +312,7 @@ mod tests {
file_type_hint: Some(FileType::Html),
excluded_paths: None,
};
let links = collect(vec![input], Some(base)).await;
let links = collect(vec![input], None, Some(base)).await;

let expected_links = HashSet::from_iter([
website("https://github.com/lycheeverse/lychee/"),
Expand Down Expand Up @@ -325,7 +341,7 @@ mod tests {
file_type_hint: Some(FileType::Html),
excluded_paths: None,
};
let links = collect(vec![input], Some(base)).await;
let links = collect(vec![input], None, Some(base)).await;

let expected_links = HashSet::from_iter([
website("https://example.com/static/image.png"),
Expand All @@ -352,7 +368,7 @@ mod tests {
excluded_paths: None,
};

let links = collect(vec![input], Some(base)).await;
let links = collect(vec![input], None, Some(base)).await;

let expected = HashSet::from_iter([
website("https://localhost.com/@/internal.md"),
Expand All @@ -374,7 +390,7 @@ mod tests {
file_type_hint: Some(FileType::Html),
excluded_paths: None,
};
let links = collect(vec![input], Some(base)).await;
let links = collect(vec![input], None, Some(base)).await;

let expected_links = HashSet::from_iter([
// the body links wouldn't be present if the file was parsed strictly as XML
Expand Down Expand Up @@ -407,7 +423,7 @@ mod tests {
excluded_paths: None,
};

let links = collect(vec![input], None).await;
let links = collect(vec![input], None, None).await;

let expected_urls = HashSet::from_iter([
website("https://github.com/lycheeverse/lychee/"),
Expand All @@ -426,7 +442,7 @@ mod tests {
file_type_hint: None,
excluded_paths: None,
};
let links = collect(vec![input], None).await;
let links = collect(vec![input], None, None).await;

let expected_links = HashSet::from_iter([mail("user@example.com")]);

Expand Down Expand Up @@ -469,7 +485,7 @@ mod tests {
},
];

let links = collect(inputs, None).await;
let links = collect(inputs, None, None).await;

let expected_links = HashSet::from_iter([
website(&format!(
Expand Down Expand Up @@ -503,7 +519,7 @@ mod tests {
excluded_paths: None,
};

let links = collect(vec![input], Some(base)).await;
let links = collect(vec![input], None, Some(base)).await;

let expected_links = HashSet::from_iter([
path("/path/to/root/index.html"),
Expand Down
14 changes: 1 addition & 13 deletions lychee-lib/src/types/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ pub enum Base {
Local(PathBuf),
/// Remote URL pointing to a website homepage
Remote(Url),
/// Root path for checking absolute local links
RootPath(PathBuf),
}

impl Base {
Expand All @@ -29,10 +27,6 @@ impl Base {
let full_path = path.join(link);
Url::from_file_path(full_path).ok()
}
Self::RootPath(_path) => {
// this is unused currently because joining on RootPath is handled by create_uri_from_file_path
unreachable!()
}
}
}

Expand All @@ -41,16 +35,10 @@ impl Base {
pub(crate) fn dir(&self) -> Option<PathBuf> {
match self {
Self::Remote(_) => None,
Self::Local(d) | Self::RootPath(d) => Some(d.clone()),
Self::Local(d) => Some(d.clone()),
}
}

/// Create a root path base
#[must_use]
pub fn create_root_path(value: &str) -> Base {
Self::RootPath(PathBuf::from(value))
}

pub(crate) fn from_source(source: &InputSource) -> Option<Base> {
match &source {
InputSource::RemoteUrl(url) => {
Expand Down
Loading

0 comments on commit d8288f2

Please sign in to comment.