Skip to content

Commit

Permalink
v0.1.10, support empty tableCells, and tableCells containing just har…
Browse files Browse the repository at this point in the history
…d breaks'
  • Loading branch information
wouterken committed Feb 18, 2024
1 parent 1aa2a34 commit cd747ac
Show file tree
Hide file tree
Showing 10 changed files with 285 additions and 14 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "htmltoadf"
version = "0.1.9"
version = "0.1.10"
edition = "2021"
license = "MIT"
description = "An HTML to Atlassian Document Format (ADF) converter"
Expand Down
16 changes: 8 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ $ html2adf -h
```

```
htmltoadf 0.1.9
htmltoadf 0.1.10
An HTML to Atlassian Document Format (ADF) converter
USAGE:
Expand All @@ -56,20 +56,20 @@ OPTIONS:
### Install Binary from Crates.io with `cargo install`
```
$ cargo install htmltoadf
installing htmltoadf v0.1.9 (/usr/src/html2adf)
installing htmltoadf v0.1.10 (/usr/src/html2adf)
Updating crates.io index
Downloading crates ...
Downloaded lock_api v0.4.6
--snip--
Compiling htmltoadf v0.1.9
Compiling htmltoadf v0.1.10
Finished release [optimized] target(s) in 1m 42s
Installing ~/.cargo/bin/htmltoadf
Installed package `htmltoadf v0.1.9` (executable `html2adf`)
Installed package `htmltoadf v0.1.10` (executable `html2adf`)
```

### Download Binary file from Github
Pre-built binaries can be downloaded from here:
https://github.com/wouterken/htmltoadf/releases/tag/0.1.9
https://github.com/wouterken/htmltoadf/releases/tag/0.1.10

### Docker Image
**Docker Repo:**
Expand All @@ -79,10 +79,10 @@ https://hub.docker.com/r/wouterken/html2adf
**Usage**

```bash
$ echo "<h1>Hello world<p>Test</p></h1>" | docker run --rm -i wouterken/html2adf:0.1.9
$ echo "<h1>Hello world<p>Test</p></h1>" | docker run --rm -i wouterken/html2adf:0.1.10
{"version":1,"type":"doc","content":[{"type":"heading","attrs":{"level":1},"content":[{"type":"text","text":"Hello world"},{"type":"text","text":"Test"}]}]}

$ echo "<h1>Hello world<p>Test</p></h1>" | docker run --rm -i wouterken/html2adf:0.1.9 | jq
$ echo "<h1>Hello world<p>Test</p></h1>" | docker run --rm -i wouterken/html2adf:0.1.10 | jq
{
"version": 1,
"type": "doc",
Expand Down Expand Up @@ -115,7 +115,7 @@ $ echo "<h1>Hello world<p>Test</p></h1>" | docker run --rm -i wouterken/html2adf

```toml
[dependencies]
htmltoadf = "0.1.9"
htmltoadf = "0.1.10"
```

**Code**
Expand Down
2 changes: 1 addition & 1 deletion docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@

</style>
<script defer type="module">
import init, {convert} from "https://unpkg.com/htmltoadf@0.1.9/htmltoadf.js";
import init, {convert} from "https://unpkg.com/htmltoadf@0.1.10/htmltoadf.js";

let editor;

Expand Down
2 changes: 1 addition & 1 deletion src/adf_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use scraper::ElementRef;
use scraper::Html;
use serde_json::{Map, Value};

static VALID_EMPTY_TYPES: [&str; 4] = ["hr", "iframe", "img", "br"];
static VALID_EMPTY_TYPES: [&str; 5] = ["hr", "iframe", "img", "br", "td"];

/**
* The main procedure for our ADF Builder.
Expand Down
2 changes: 1 addition & 1 deletion src/adf_structure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ lazy_static! {
),
(
String::from("tableCell"),
AdfPermittedChildren::any(&["codeBlock", "blockCard", "paragraph", "bulletList", "mediaSingle", "orderedList", "heading", "panel", "blockquote", "rule", "mediaGroup", "decisionList", "taskList", "extension", "embedCard", "nestedExpand"])
AdfPermittedChildren::any(&["codeBlock", "blockCard", "paragraph", "bulletList", "mediaSingle", "orderedList", "heading", "panel", "blockquote", "rule", "mediaGroup", "decisionList", "taskList", "extension", "embedCard", "nestedExpand", "hardBreak"])
),
(
String::from("doc"),
Expand Down
22 changes: 22 additions & 0 deletions src/extractor.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use ego_tree::iter::Edge;
use ego_tree::NodeRef;
use regex::Regex;
use scraper::Node;
use scraper::{ElementRef, Html};
Expand Down Expand Up @@ -32,6 +33,18 @@ pub fn squish_surrounding_whitespace(input: &str) -> String {
re.replace_all(input, " ").to_string()
}

pub fn has_text_node(node: NodeRef<Node>) -> bool {
node.children().any(|node| {
if let Some(element) = node.value().as_element() {
element.name() == "br" || has_text_node(node)
}
else if let Some(text_node) = node.value().as_text() {
!text_node.text.trim().is_empty()
} else {
false
}
})
}
/**
* We parse a raw scraper::HTML and return a
* list of leaf doc nodes (each with a linked list pointer to the root)
Expand Down Expand Up @@ -63,6 +76,15 @@ pub fn extract_leaves(fragment: &Html) -> Vec<DocNode> {
text: "".trim().to_owned(),
node,
})
} else if element.value().name() == "td" {
let has_text_node = has_text_node(node);
if !has_text_node {
leaf_nodes.push(DocNode {
name: "td",
text: "".trim().to_owned(),
node,
})
}
}
} else if let Node::Text(text_node) = node.value() {
if let Some(parent) = node.parent().and_then(ElementRef::wrap) {
Expand Down
1 change: 1 addition & 0 deletions src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ mod image;
mod lists;
mod marks;
mod paragraphs;
mod tables;
use crate::convert_html_str_to_adf_str;

#[allow(dead_code)]
Expand Down
247 changes: 247 additions & 0 deletions src/tests/tables.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
#[allow(unused_imports)]
use super::assert_output_json_eq;

#[allow(unused_imports)]
use serde_json::json;

#[cfg(test)]
#[test]
fn test_empty_cell() {
assert_output_json_eq(
r"<div><table ><tbody>
<tr><td >A</td><td >B</td><td >C</td></tr>
<tr><td >value 1</td><td ></td><td >value 2</td></tr>
</tbody></table>
</div>",
json!({
"version": 1,
"type": "doc",
"content": [
{
"type": "table",
"content": [
{
"type": "tableRow",
"content": [
{
"type": "tableCell",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "A"
}
]
}
]
},
{
"type": "tableCell",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "B"
}
]
}
]
},
{
"type": "tableCell",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "C"
}
]
}
]
}
]
},
{
"type": "tableRow",
"content": [
{
"type": "tableCell",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "value 1"
}
]
}
]
},
{
"type": "tableCell"
},
{
"type": "tableCell",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "value 2"
}
]
}
]
}
]
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": " "
}
]
}
]
}),
);
}

#[test]
/**
* Nested Paragraphs are flattened
*/
fn test_hard_break_in_cell() {
assert_output_json_eq(
r"<div><table ><tbody>
<tr><td >A</td><td >B</td><td >C</td></tr>
<tr><td >value 1</td><td ><br/></td><td >value 2</td></tr>
</tbody></table>
</div>",
json!({
"version": 1,
"type": "doc",
"content": [
{
"type": "table",
"content": [
{
"type": "tableRow",
"content": [
{
"type": "tableCell",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "A"
}
]
}
]
},
{
"type": "tableCell",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "B"
}
]
}
]
},
{
"type": "tableCell",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "C"
}
]
}
]
}
]
},
{
"type": "tableRow",
"content": [
{
"type": "tableCell",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "value 1"
}
]
}
]
},
{
"type": "tableCell",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "hardBreak"
}
]
}
]
},
{
"type": "tableCell",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "value 2"
}
]
}
]
}
]
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": " "
}
]
}
]
}),
);
}
Loading

0 comments on commit cd747ac

Please sign in to comment.