Skip to content

Commit

Permalink
Implement ordering of the include-groups
Browse files Browse the repository at this point in the history
Signed-off-by: Bernát Gábor <bgabor8@bloomberg.net>
  • Loading branch information
gaborbernat committed Oct 30, 2024
1 parent 8399b12 commit a49d0d6
Show file tree
Hide file tree
Showing 12 changed files with 258 additions and 148 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ edition = "2021"
taplo = { version = "0.13.2" } # formatter
pep508_rs = { version = "0.6.1" }
pep440_rs = { version = "0.6.5" } # align up with pep508_rs for now https://github.com/konstin/pep508_rs/issues/19
lexical-sort = { version = "0.3.1" }

[dev-dependencies]
rstest = { version = "0.23.0" } # parametrized tests
Expand Down
225 changes: 117 additions & 108 deletions common/src/array.rs
Original file line number Diff line number Diff line change
@@ -1,143 +1,152 @@
use std::cell::RefCell;
use std::cmp::Ordering;
use std::collections::HashMap;

use lexical_sort::{natural_lexical_cmp, StringSort};
use std::hash::Hash;
use taplo::syntax::SyntaxKind::{ARRAY, COMMA, NEWLINE, STRING, VALUE, WHITESPACE};
use taplo::syntax::{SyntaxElement, SyntaxKind, SyntaxNode};

use crate::create::{make_comma, make_newline};
use crate::string::{load_text, update_content};
use crate::util::{find_first, iter};

pub fn transform<F>(node: &SyntaxNode, transform: &F)
where
F: Fn(&str) -> String,
{
for array in node.children_with_tokens() {
if array.kind() == ARRAY {
for array_entry in array.as_node().unwrap().children_with_tokens() {
if array_entry.kind() == VALUE {
update_content(array_entry.as_node().unwrap(), transform);
}
}
}
}
iter(node, [ARRAY, VALUE].as_ref(), &|array_entry| {
update_content(array_entry, transform);
});
}

#[allow(clippy::range_plus_one, clippy::too_many_lines)]
pub fn sort<F>(node: &SyntaxNode, transform: F)
pub fn sort<T, K, C>(node: &SyntaxNode, to_key: K, cmp: &C)
where
F: Fn(&str) -> String,
K: Fn(&SyntaxNode) -> Option<T>,
C: Fn(&T, &T) -> Ordering,
T: Clone + Eq + Hash,
{
for array in node.children_with_tokens() {
if array.kind() == ARRAY {
let array_node = array.as_node().unwrap();
let has_trailing_comma = array_node
.children_with_tokens()
.map(|x| x.kind())
.filter(|x| *x == COMMA || *x == VALUE)
.last()
== Some(COMMA);
let multiline = array_node.children_with_tokens().any(|e| e.kind() == NEWLINE);
let mut value_set = Vec::<Vec<SyntaxElement>>::new();
let entry_set = RefCell::new(Vec::<SyntaxElement>::new());
let mut key_to_pos = HashMap::<String, usize>::new();
iter(node, [ARRAY].as_ref(), &|array| {
let has_trailing_comma = array
.children_with_tokens()
.map(|x| x.kind())
.filter(|x| *x == COMMA || *x == VALUE)
.last()
== Some(COMMA);
let multiline = array.children_with_tokens().any(|e| e.kind() == NEWLINE);

let mut add_to_value_set = |entry: String| {
let mut entry_set_borrow = entry_set.borrow_mut();
if !entry_set_borrow.is_empty() {
key_to_pos.insert(entry, value_set.len());
value_set.push(entry_set_borrow.clone());
entry_set_borrow.clear();
}
};
let mut entries = Vec::<SyntaxElement>::new();
let mut has_value = false;
let mut previous_is_bracket_open = false;
let mut entry_value = String::new();
let mut count = 0;
let mut entries = Vec::<SyntaxElement>::new();
let mut order_sets = Vec::<Vec<SyntaxElement>>::new();
let mut key_to_order_set = HashMap::<T, usize>::new();
let current_set = RefCell::new(Vec::<SyntaxElement>::new());
let mut current_set_value: Option<T> = None;
let mut previous_is_bracket_open = false;

let mut add_to_order_sets = |entry: T| {
let mut entry_set_borrow = current_set.borrow_mut();
if !entry_set_borrow.is_empty() {
key_to_order_set.insert(entry, order_sets.len());
order_sets.push(entry_set_borrow.clone());
entry_set_borrow.clear();
}
};

let mut count = 0;

for entry in array_node.children_with_tokens() {
count += 1;
if previous_is_bracket_open {
// make sure ends with trailing comma
if entry.kind() == NEWLINE || entry.kind() == WHITESPACE {
continue;
// collect elements to order into to_order_sets, the rest goes into entries
for entry in array.children_with_tokens() {
count += 1;
if previous_is_bracket_open {
// make sure ends with trailing comma
if entry.kind() == NEWLINE || entry.kind() == WHITESPACE {
continue;
}
previous_is_bracket_open = false;
}
match &entry.kind() {
SyntaxKind::BRACKET_START => {
entries.push(entry);
if multiline {
entries.push(make_newline());
}
previous_is_bracket_open = false;
previous_is_bracket_open = true;
}
match &entry.kind() {
SyntaxKind::BRACKET_START => {
entries.push(entry);
if multiline {
entries.push(make_newline());
SyntaxKind::BRACKET_END => {
match current_set_value.take() {
None => {
entries.extend(current_set.borrow_mut().clone());
}
previous_is_bracket_open = true;
}
SyntaxKind::BRACKET_END => {
if has_value {
add_to_value_set(entry_value.clone());
} else {
entries.extend(entry_set.borrow_mut().clone());
Some(val) => {
add_to_order_sets(val);
}
entries.push(entry);
}
VALUE => {
if has_value {
entries.push(entry);
}
VALUE => {
match current_set_value.take() {
None => {}
Some(val) => {
if multiline {
entry_set.borrow_mut().push(make_newline());
}
add_to_value_set(entry_value.clone());
}
has_value = true;
let value_node = entry.as_node().unwrap();
let mut found_string = false;
for child in value_node.children_with_tokens() {
let kind = child.kind();
if kind == STRING {
entry_value = transform(load_text(child.as_token().unwrap().text(), STRING).as_str());
found_string = true;
break;
current_set.borrow_mut().push(make_newline());
}
add_to_order_sets(val);
}
if !found_string {
// abort if not correct types
return;
}
entry_set.borrow_mut().push(entry);
entry_set.borrow_mut().push(make_comma());
}
NEWLINE => {
entry_set.borrow_mut().push(entry);
if has_value {
add_to_value_set(entry_value.clone());
has_value = false;
}
}
COMMA => {}
_ => {
entry_set.borrow_mut().push(entry);
let value_node = entry.as_node().unwrap();
current_set_value = to_key(value_node);

current_set.borrow_mut().push(entry);
current_set.borrow_mut().push(make_comma());
}
NEWLINE => {
current_set.borrow_mut().push(entry);
if current_set_value.is_some() {
add_to_order_sets(current_set_value.unwrap());
current_set_value = None;
}
}
COMMA => {}
_ => {
current_set.borrow_mut().push(entry);
}
}
}

let mut order: Vec<String> = key_to_pos.clone().into_keys().collect();
order.string_sort_unstable(natural_lexical_cmp);
let end = entries.split_off(if multiline { 2 } else { 1 });
for key in order {
entries.extend(value_set[key_to_pos[&key]].clone());
}
entries.extend(end);
array_node.splice_children(0..count, entries);
if !has_trailing_comma {
if let Some((i, _)) = array_node
.children_with_tokens()
.enumerate()
.filter(|(_, x)| x.kind() == COMMA)
.last()
{
array_node.splice_children(i..i + 1, vec![]);
}
let trailing_content = entries.split_off(if multiline { 2 } else { 1 });
let mut order: Vec<T> = key_to_order_set.keys().cloned().collect();
order.sort_by(&cmp);
for key in order {
entries.extend(order_sets[key_to_order_set[&key]].clone());
}
entries.extend(trailing_content);
array.splice_children(0..count, entries);

if !has_trailing_comma {
if let Some((i, _)) = array
.children_with_tokens()
.enumerate()
.filter(|(_, x)| x.kind() == COMMA)
.last()
{
array.splice_children(i..i + 1, vec![]);
}
}
}
});
}

#[allow(clippy::range_plus_one, clippy::too_many_lines)]
pub fn sort_strings<T, K, C>(node: &SyntaxNode, to_key: K, cmp: &C)
where
K: Fn(String) -> String,
C: Fn(&String, &String) -> Ordering,
T: Clone + Eq + Hash,
{
sort(
node,
|e| -> Option<String> {
find_first(e, &[STRING], &|s| -> String {
to_key(load_text(s.as_token().unwrap().text(), STRING))
})
},
cmp,
);
}
16 changes: 16 additions & 0 deletions common/src/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,22 @@ where
}
}

pub fn find_key(table: &SyntaxNode, key: &str) -> Option<SyntaxNode> {
let mut current_key = String::new();
for table_entry in table.children_with_tokens() {
if table_entry.kind() == ENTRY {
for entry in table_entry.as_node().unwrap().children_with_tokens() {
if entry.kind() == KEY {
current_key = entry.as_node().unwrap().text().to_string().trim().to_string();
} else if entry.kind() == VALUE && current_key == key {
return Some(entry.as_node().unwrap().clone());
}
}
}
}
None
}

pub fn collapse_sub_tables(tables: &mut Tables, name: &str) {
let h2p = tables.header_to_pos.clone();
let sub_name_prefix = format!("{name}.");
Expand Down
10 changes: 7 additions & 3 deletions common/src/tests/array_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use taplo::formatter::{format_syntax, Options};
use taplo::parser::parse;
use taplo::syntax::SyntaxKind::{ENTRY, VALUE};

use crate::array::{sort, transform};
use crate::array::{sort_strings, transform};
use crate::pep508::format_requirement;

#[rstest]
Expand Down Expand Up @@ -148,7 +148,9 @@ fn test_order_array(#[case] start: &str, #[case] expected: &str) {
if children.kind() == ENTRY {
for entry in children.as_node().unwrap().children_with_tokens() {
if entry.kind() == VALUE {
sort(entry.as_node().unwrap(), str::to_lowercase);
sort_strings::<String, _, _>(entry.as_node().unwrap(), |s| s.to_lowercase(), &|lhs, rhs| {
lhs.cmp(rhs)
});
}
}
}
Expand All @@ -172,7 +174,9 @@ fn test_reorder_no_trailing_comma(#[case] start: &str, #[case] expected: &str) {
if children.kind() == ENTRY {
for entry in children.as_node().unwrap().children_with_tokens() {
if entry.kind() == VALUE {
sort(entry.as_node().unwrap(), str::to_lowercase);
sort_strings::<String, _, _>(entry.as_node().unwrap(), |s| s.to_lowercase(), &|lhs, rhs| {
lhs.cmp(rhs)
});
}
}
}
Expand Down
24 changes: 20 additions & 4 deletions common/src/util.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,33 @@
use taplo::syntax::{SyntaxKind, SyntaxNode};
use taplo::syntax::{SyntaxElement, SyntaxKind, SyntaxNode};

pub fn iter<F>(node: &SyntaxNode, paths: &[SyntaxKind], transform: &F)
pub fn iter<F>(node: &SyntaxNode, paths: &[SyntaxKind], handle: &F)
where
F: Fn(&SyntaxNode),
{
for entry in node.children_with_tokens() {
if entry.kind() == paths[0] {
let found = entry.as_node().unwrap();
if paths.len() == 1 {
transform(found);
handle(found);
} else {
iter(found, &paths[1..], transform);
iter(found, &paths[1..], handle);
}
}
}
}

pub fn find_first<F, T>(node: &SyntaxNode, paths: &[SyntaxKind], extract: &F) -> Option<T>
where
F: Fn(SyntaxElement) -> T,
{
for entry in node.children_with_tokens() {
if entry.kind() == paths[0] {
if paths.len() == 1 {
return Some(extract(entry));
} else {
find_first(entry.as_node().unwrap(), &paths[1..], extract);
}
}
}
None
}
1 change: 1 addition & 0 deletions pyproject-fmt/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ crate-type = ["cdylib"]
common = {path = "../common" }
regex = { version = "1.11.0" }
pyo3 = { version = "0.22.5", features = ["abi3-py38"] } # integration with Python
lexical-sort = { version = "0.3.1" }

[features]
extension-module = ["pyo3/extension-module"]
Expand Down
Loading

0 comments on commit a49d0d6

Please sign in to comment.