From 196a44102210684b901a2d049770af6336cbf3e3 Mon Sep 17 00:00:00 2001 From: Trask Stalnaker Date: Sun, 1 Dec 2024 10:59:34 -0800 Subject: [PATCH] Fix and simplification --- lychee-lib/src/utils/path.rs | 37 +++++++-------------------------- lychee-lib/src/utils/request.rs | 31 +++++++++++++++++++-------- 2 files changed, 29 insertions(+), 39 deletions(-) diff --git a/lychee-lib/src/utils/path.rs b/lychee-lib/src/utils/path.rs index 83823293ff..58518f31e3 100644 --- a/lychee-lib/src/utils/path.rs +++ b/lychee-lib/src/utils/path.rs @@ -24,21 +24,13 @@ pub(crate) fn absolute_path(path: PathBuf) -> PathBuf { .clean() } -/// Get the directory name of a given `Path`. -fn dirname(src: &'_ Path) -> Option<&'_ Path> { - if src.is_file() { - return src.parent(); - } - Some(src) -} - /// Resolve `dst` that was linked to from within `src` /// /// Returns Ok(None) in case of an absolute local link without a `base_url` pub(crate) fn resolve( src: &Path, dst: &PathBuf, - root_path: &Option, + ignore_absolute_local_links: bool, ) -> Result> { let resolved = match dst { relative if !dst.starts_with("/") => { @@ -51,31 +43,16 @@ pub(crate) fn resolve( absolute if dst.starts_with("/") => { // Absolute local links (leading slash) are ignored unless // root_path is provided - let Some(base) = root_path else { + if ignore_absolute_local_links { return Ok(None); - }; - let Some(dir) = dirname(&base) else { - return Err(ErrorKind::InvalidBase( - base.display().to_string(), - "The given directory cannot be a base".to_string(), - )); - }; - join(dir.to_path_buf(), absolute) + } + PathBuf::from(absolute) } _ => return Err(ErrorKind::InvalidFile(dst.to_path_buf())), }; Ok(Some(absolute_path(resolved))) } -/// A cumbersome way to concatenate paths without checking their -/// existence on disk. See -fn join(base: PathBuf, dst: &Path) -> PathBuf { - let mut abs = base.into_os_string(); - let target_str = dst.as_os_str(); - abs.push(target_str); - PathBuf::from(abs) -} - /// Check if `child` is a subdirectory/file inside `parent` /// /// Note that `contains(parent, parent)` will return `true` @@ -108,7 +85,7 @@ mod test_path { let dummy = PathBuf::from("index.html"); let abs_path = PathBuf::from("./foo.html"); assert_eq!( - resolve(&dummy, &abs_path, &None)?, + resolve(&dummy, &abs_path, true)?, Some(env::current_dir().unwrap().join("foo.html")) ); Ok(()) @@ -121,7 +98,7 @@ mod test_path { let dummy = PathBuf::from("./index.html"); let abs_path = PathBuf::from("./foo.html"); assert_eq!( - resolve(&dummy, &abs_path, &None)?, + resolve(&dummy, &abs_path, true)?, Some(env::current_dir().unwrap().join("foo.html")) ); Ok(()) @@ -134,7 +111,7 @@ mod test_path { let abs_index = PathBuf::from("/path/to/index.html"); let abs_path = PathBuf::from("./foo.html"); assert_eq!( - resolve(&abs_index, &abs_path, &None)?, + resolve(&abs_index, &abs_path, true)?, Some(PathBuf::from("/path/to/foo.html")) ); Ok(()) diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index ef8d0b4d6b..55178b54d5 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -55,7 +55,14 @@ fn try_parse_into_uri( root_path: &Option, base: &Option, ) -> Result { - let text = raw_uri.text.clone(); + let mut text = raw_uri.text.clone(); + if text.starts_with('/') { + if let Some(path) = root_path { + if let Some(path_str) = path.to_str() { + text = format!("{path_str}{text}"); + } + } + } let uri = match Uri::try_from(raw_uri.clone()) { Ok(uri) => uri, Err(_) => match base { @@ -64,7 +71,9 @@ fn try_parse_into_uri( None => return Err(ErrorKind::InvalidBaseJoin(text.clone())), }, None => match source { - InputSource::FsPath(root) => create_uri_from_file_path(root, &text, root_path)?, + InputSource::FsPath(root) => { + create_uri_from_file_path(root, &text, root_path.is_none())? + } _ => return Err(ErrorKind::UnsupportedUriType(text)), }, }, @@ -87,7 +96,7 @@ pub(crate) fn is_anchor(text: &str) -> bool { fn create_uri_from_file_path( file_path: &Path, link_text: &str, - root_path: &Option, + ignore_absolute_local_links: bool, ) -> Result { let target_path = if is_anchor(link_text) { // For anchors, we need to append the anchor to the file name. @@ -100,7 +109,9 @@ fn create_uri_from_file_path( } else { link_text.to_string() }; - let Ok(constructed_url) = resolve_and_create_url(file_path, &target_path, root_path) else { + let Ok(constructed_url) = + resolve_and_create_url(file_path, &target_path, ignore_absolute_local_links) + else { return Err(ErrorKind::InvalidPathToUri(target_path)); }; Ok(Uri { @@ -165,7 +176,7 @@ pub(crate) fn create( fn resolve_and_create_url( src_path: &Path, dest_path: &str, - root_path: &Option, + ignore_absolute_local_links: bool, ) -> Result { let (dest_path, fragment) = url::remove_get_params_and_separate_fragment(dest_path); @@ -173,9 +184,11 @@ fn resolve_and_create_url( // This addresses the issue mentioned in the original comment about double-encoding let decoded_dest = percent_decode_str(dest_path).decode_utf8()?; - let Ok(Some(resolved_path)) = - path::resolve(src_path, &PathBuf::from(&*decoded_dest), root_path) - else { + let Ok(Some(resolved_path)) = path::resolve( + src_path, + &PathBuf::from(&*decoded_dest), + ignore_absolute_local_links, + ) else { return Err(ErrorKind::InvalidPathToUri(decoded_dest.to_string())); }; @@ -200,7 +213,7 @@ mod tests { #[test] fn test_create_uri_from_path() { let result = - resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", &None).unwrap(); + resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", true).unwrap(); assert_eq!(result.as_str(), "file:///test+encoding"); }