diff --git a/fixtures/TEST_STYLESHEET_LINK.md b/fixtures/TEST_STYLESHEET_LINK.md
new file mode 100644
index 0000000000..90ad264547
--- /dev/null
+++ b/fixtures/TEST_STYLESHEET_LINK.md
@@ -0,0 +1 @@
+
diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs
index fcb7530cb7..637a59d8f0 100644
--- a/lychee-bin/tests/cli.rs
+++ b/lychee-bin/tests/cli.rs
@@ -231,6 +231,17 @@ mod cli {
Ok(())
}
+ #[test]
+ fn test_stylesheet_misinterpreted_as_email() -> Result<()> {
+ test_json_output!(
+ "TEST_STYLESHEET_LINK.md",
+ MockResponseStats {
+ total: 0,
+ ..MockResponseStats::default()
+ }
+ )
+ }
+
/// Test that a GitHub link can be checked without specifying the token.
#[test]
fn test_check_github_no_token() -> Result<()> {
diff --git a/lychee-lib/src/extract/html/html5ever.rs b/lychee-lib/src/extract/html/html5ever.rs
index 0a1414855b..1d30888a21 100644
--- a/lychee-lib/src/extract/html/html5ever.rs
+++ b/lychee-lib/src/extract/html/html5ever.rs
@@ -92,7 +92,7 @@ impl TokenSink for LinkExtractor {
return TokenSinkResult::Continue;
}
- for attr in attrs {
+ for attr in &attrs {
let urls = LinkExtractor::extract_urls_from_elem_attr(
&attr.name.local,
&name,
@@ -104,8 +104,11 @@ impl TokenSink for LinkExtractor {
Some(urls) => urls
.into_iter()
.filter(|url| {
- // Only accept email addresses, which occur in `href` attributes
- // and start with `mailto:`. Technically, email addresses could
+ // Only accept email addresses which
+ // - occur in `href` attributes
+ // - start with `mailto:`
+ //
+ // Technically, email addresses could
// also occur in plain text, but we don't want to extract those
// because of the high false positive rate.
//
@@ -115,6 +118,18 @@ impl TokenSink for LinkExtractor {
let is_phone = url.starts_with("tel:");
let is_href = attr.name.local.as_ref() == "href";
+ if attrs.iter().any(|attr| {
+ &attr.name.local == "rel" && attr.value.contains("stylesheet")
+ }) {
+ // Skip virtual/framework-specific stylesheet paths that start with /@ or @
+ // These are typically resolved by dev servers or build tools rather than being real URLs
+ // Examples: /@global/style.css, @tailwind/base.css as in
+ // ``
+ if url.starts_with("/@") || url.starts_with('@') {
+ return false;
+ }
+ }
+
!is_email || (is_mailto && is_href) || (is_phone && is_href)
})
.map(|url| RawUri {
@@ -466,4 +481,14 @@ mod tests {
let uris = extract_html(input, false);
assert!(uris.is_empty());
}
+
+ #[test]
+ fn test_skip_emails_in_stylesheets() {
+ let input = r#"
+
+ "#;
+
+ let uris = extract_html(input, false);
+ assert!(uris.is_empty());
+ }
}
diff --git a/lychee-lib/src/extract/html/html5gum.rs b/lychee-lib/src/extract/html/html5gum.rs
index 5fb41be69f..d28007af00 100644
--- a/lychee-lib/src/extract/html/html5gum.rs
+++ b/lychee-lib/src/extract/html/html5gum.rs
@@ -183,6 +183,22 @@ impl LinkExtractor {
return;
}
+ // Skip virtual/framework-specific stylesheet paths that start with /@ or @
+ // These are typically resolved by dev servers or build tools rather than being real URLs
+ // Examples: /@global/style.css, @tailwind/base.css
+ if self
+ .current_attributes
+ .get("rel")
+ .map_or(false, |rel| rel.contains("stylesheet"))
+ {
+ if let Some(href) = self.current_attributes.get("href") {
+ if href.starts_with("/@") || href.starts_with('@') {
+ self.current_attributes.clear();
+ return;
+ }
+ }
+ }
+
let new_urls = self
.extract_urls_from_elem_attr()
.into_iter()
@@ -662,4 +678,14 @@ mod tests {
let uris = extract_html(input, false);
assert!(uris.is_empty());
}
+
+ #[test]
+ fn test_skip_emails_in_stylesheets() {
+ let input = r#"
+
+ "#;
+
+ let uris = extract_html(input, false);
+ assert!(uris.is_empty());
+ }
}