-
Notifications
You must be signed in to change notification settings - Fork 201
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add regex stuff #529
Add regex stuff #529
Changes from all commits
4a413d0
f32bb95
80adac8
786dee6
2b0b086
6857c6e
99fd457
8ed0493
c55bca1
99d2218
1454962
3ac21b0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
/* | ||
Copyright (c) 2023 Uber Technologies, Inc. | ||
|
||
<p>Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file | ||
except in compliance with the License. You may obtain a copy of the License at | ||
<p>http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
<p>Unless required by applicable law or agreed to in writing, software distributed under the | ||
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either | ||
express or implied. See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
use crate::models::matches::Match; | ||
use itertools::Itertools; | ||
use regex::Regex; | ||
use std::collections::HashMap; | ||
use tree_sitter::Node; | ||
|
||
/// Applies the query upon the given `node`, and gets all the matches | ||
/// # Arguments | ||
/// * `node` - the root node to apply the query upon | ||
/// * `source_code` - the corresponding source code string for the node. | ||
/// * `recursive` - if `true` it matches the query to `self` and `self`'s sub-ASTs, else it matches the `query` only to `self`. | ||
/// * `replace_node` - node to replace | ||
/// | ||
/// # Returns | ||
/// List containing all the matches against `node` | ||
pub(crate) fn get_all_matches_for_regex( | ||
node: &Node, source_code: String, regex: &Regex, recursive: bool, replace_node: Option<String>, | ||
) -> Vec<Match> { | ||
let all_captures = regex.captures_iter(&source_code).collect_vec(); | ||
let names = regex.capture_names().collect_vec(); | ||
let mut all_matches = vec![]; | ||
for captures in all_captures { | ||
// Check if the range of the self (node), and the range of outermost node captured by the query are equal. | ||
let range_matches_node = node.start_byte() == captures.get(0).unwrap().start() | ||
&& node.end_byte() == captures.get(0).unwrap().end(); | ||
let range_matches_inside_node = node.start_byte() <= captures.get(0).unwrap().start() | ||
&& node.end_byte() >= captures.get(0).unwrap().end(); | ||
if (recursive && range_matches_inside_node) || range_matches_node { | ||
let replace_node_match = if let Some(ref rn) = replace_node { | ||
captures | ||
.name(rn) | ||
.unwrap_or_else(|| panic!("The tag {rn} provided in the replace node is not present")) | ||
} else { | ||
captures.get(0).unwrap() | ||
}; | ||
let matches = extract_captures(&captures, &names); | ||
all_matches.push(Match::from_regex( | ||
&replace_node_match, | ||
matches, | ||
&source_code, | ||
)); | ||
} | ||
} | ||
all_matches | ||
} | ||
|
||
// Creates a hashmap from the capture group(name) to the corresponding code snippet. | ||
fn extract_captures( | ||
captures: ®ex::Captures<'_>, names: &Vec<Option<&str>>, | ||
) -> HashMap<String, String> { | ||
names | ||
.iter() | ||
.flatten() | ||
.flat_map(|x| { | ||
captures | ||
.name(x) | ||
.map(|v| (x.to_string(), v.as_str().to_string())) | ||
}) | ||
.collect() | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# Copyright (c) 2023 Uber Technologies, Inc. | ||
# | ||
# <p>Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file | ||
# except in compliance with the License. You may obtain a copy of the License at | ||
# <p>http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# <p>Unless required by applicable law or agreed to in writing, software distributed under the | ||
# License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either | ||
# express or implied. See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
[[edges]] | ||
scope = "File" | ||
from = "update_import" | ||
to = ["update_list_int"] | ||
|
||
[[edges]] | ||
scope = "Method" | ||
from = "update_list_int" | ||
to = ["update_add"] | ||
|
||
|
||
[[edges]] | ||
scope = "File" | ||
from = "delete_import_our_map_of" | ||
to = ["change_type_from_OurHashMap", "add_import_For_hashmap"] | ||
|
||
[[edges]] | ||
scope = "Method" | ||
from = "change_type_from_OurHashMap" | ||
to = ["update_push"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
# Copyright (c) 2023 Uber Technologies, Inc. | ||
# | ||
# <p>Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file | ||
# except in compliance with the License. You may obtain a copy of the License at | ||
# <p>http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# <p>Unless required by applicable law or agreed to in writing, software distributed under the | ||
# License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either | ||
# express or implied. See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# Replace foo().bar().baz() with `true` inside methods not nnotated as @DoNotCleanup | ||
[[rules]] | ||
name = "replace_call" | ||
query = """rgx (?P<n1>foo\\(\\)\\.bar\\(\\)\\.baz\\(\\))""" | ||
replace_node = "n1" | ||
replace = "true" | ||
groups = ["replace_expression_with_boolean_literal"] | ||
[[rules.filters]] | ||
enclosing_node = """(method_declaration) @md""" | ||
not_contains = ["""rgx @DoNotCleanup"""] | ||
|
||
# Before: | ||
# abc().def().ghi() | ||
# abc().fed().ghi() | ||
[[rules]] | ||
name = "replace_call_def_fed" | ||
query = """rgx (?P<n>abc\\(\\)\\.(?P<m_def>def)\\(\\)\\.ghi\\(\\))""" | ||
replace_node = "m_def" | ||
replace = "fed" | ||
|
||
|
||
# The below three rules do a dummy type migration from OurListOfInteger to List<Integer> | ||
|
||
# Updates the import statement from `out.list.OurListOfInteger` to `java.util.List` | ||
[[rules]] | ||
name = "update_import" | ||
query = """rgx (?P<n>our\\.list\\.OurListOfInteger)""" | ||
replace_node = "n" | ||
replace = "java.util.List" | ||
|
||
# Updates the type of local variables from `OurListOfInteger` to `List<Integer>` | ||
[[rules]] | ||
name = "update_list_int" | ||
query = """rgx (?P<var_decl>(?P<type>OurListOf(?P<param>Integer))\\s*(?P<name>\\w+)\\s*=.*;)""" | ||
replace_node = "type" | ||
replace = "List<@param>" | ||
is_seed_rule = false | ||
[[rules.filter]] | ||
enclosing_node = "(method_declaration) @cmd" | ||
|
||
# Updates the relevant callsite from `addToOurList` to `add` | ||
[[rules]] | ||
name = "update_add" | ||
query = """rgx (?P<call>@name\\.(?P<m_name>addToOurList)\\(\\w+\\))""" | ||
replace_node = "m_name" | ||
replace = "add" | ||
holes = ["name"] | ||
is_seed_rule = false | ||
|
||
|
||
# The below three rules do a dummy type migration OurMapOf{T1}{T2} to HashMap<T1, T2>. For example, from OurMapOfStringInteger to HashMap<String, Integer>. | ||
# This is to exercise non-constant regex matches for replace_node. | ||
|
||
# Deletes the import statement `our.map.OurMapOf...` | ||
[[rules]] | ||
name = "delete_import_our_map_of" | ||
query = """rgx (?P<n>import our\\.map\\.OurMapOf\\w+;)""" | ||
replace_node = "n" | ||
replace = "" | ||
|
||
# Adds Import to java.util.Hashmap if absent | ||
[[rules]] | ||
name = "add_import_For_hashmap" | ||
query = """(package_declaration) @pkg""" | ||
replace_node = "pkg" | ||
replace = "@pkg\nimport java.util.HashMap;" | ||
is_seed_rule = false | ||
[[rules.filters]] | ||
enclosing_node = "(program) @prgrm" | ||
not_contains = [ | ||
"((import_declaration) @im (#match? @im \"java.util.HashMap\"))", | ||
] | ||
|
||
# Before: | ||
# OurMapOfStringInteger | ||
# After: | ||
# HashMap<String, Integer> | ||
[[rules]] | ||
name = "change_type_from_OurHashMap" | ||
query = """rgx (?P<var_decl>(?P<type>\\bOurMapOf(?P<param1>[A-Z]\\w+)(?P<param2>[A-Z]\\w+))\\s*(?P<name>\\w+)\\s*=.*;)""" | ||
replace_node = "type" | ||
replace = "HashMap<@param1, @param2>" | ||
is_seed_rule = false | ||
[[rules.filter]] | ||
enclosing_node = "(method_declaration) @cmd" | ||
|
||
# Updates the relevant callsite from `pushIntoOurMap` to `push` | ||
[[rules]] | ||
name = "update_push" | ||
query = """rgx (?P<call>@name\\.(?P<m_name>pushIntoOurMap)\\(.*\\))""" | ||
replace_node = "m_name" | ||
replace = "push" | ||
holes = ["name"] | ||
is_seed_rule = false | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Every Or is something like that not meant to be supported? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thats a brilliant idea ! We can absolutely do that! Done ✅ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we populate associated comments later and does that work for regex matches?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes.
But for testing purposes, I added a scenario where we expect the associated comment to be cleaned up.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not blocking, but still curious about this.