Skip to content

Commit

Permalink
Add support for regex based CGPatterns
Browse files Browse the repository at this point in the history
ghstack-source-id: aaad598e2655a3ba76e46fc0dc969c0701fdaa5c
Pull Request resolved: #529
  • Loading branch information
ketkarameya committed Jul 6, 2023
1 parent e1c44bf commit e4c75d0
Show file tree
Hide file tree
Showing 11 changed files with 152 additions and 6 deletions.
15 changes: 13 additions & 2 deletions src/models/capture_group_patterns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Copyright (c) 2023 Uber Technologies, Inc.
use crate::{
models::Validator,
utilities::{
regex_utilities::get_all_matches_for_regex,
tree_sitter_utilities::{get_all_matches_for_query, get_ts_query_parser, number_of_errors},
Instantiate,
},
Expand All @@ -38,12 +39,20 @@ impl CGPattern {
pub(crate) fn pattern(&self) -> String {
self.0.to_string()
}

pub(crate) fn extract_regex(&self) -> String {
let mut _val = &self.pattern()[4..];
_val.to_string()
}
}

impl Validator for CGPattern {
fn validate(&self) -> Result<(), String> {
if self.pattern().starts_with("rgx ") {
panic!("Regex not supported")
let mut _val = &self.pattern()[4..];
return Regex::new(_val)
.map(|_| Ok(()))
.unwrap_or(Err(format!("Cannot parse the regex - {_val}")));
}
let mut parser = get_ts_query_parser();
parser
Expand Down Expand Up @@ -109,7 +118,9 @@ impl CompiledCGPattern {
replace_node,
replace_node_idx,
),
CompiledCGPattern::R(_) => panic!("Regex is not yet supported!!!"),
CompiledCGPattern::R(regex) => {
get_all_matches_for_regex(node, source_code, regex, recursive, replace_node)
}
}
}
}
39 changes: 38 additions & 1 deletion src/models/matches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,18 @@ pub(crate) struct Match {
gen_py_str_methods!(Match);

impl Match {
pub(crate) fn from_regex(
mtch: &regex::Match, matches: HashMap<String, String>, source_code: &str,
) -> Self {
Match {
matched_string: mtch.as_str().to_string(),
range: Range::from_regex_match(mtch, source_code),
matches,
associated_comma: None,
associated_comments: Vec::new(),
}
}

pub(crate) fn new(
matched_string: String, range: tree_sitter::Range, matches: HashMap<String, String>,
) -> Self {
Expand Down Expand Up @@ -231,7 +243,7 @@ impl Match {
serde_derive::Serialize, Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Deserialize,
)]
#[pyclass]
struct Range {
pub(crate) struct Range {
#[pyo3(get)]
start_byte: usize,
#[pyo3(get)]
Expand Down Expand Up @@ -260,6 +272,31 @@ impl From<tree_sitter::Range> for Range {
}
gen_py_str_methods!(Range);

impl Range {
pub(crate) fn from_regex_match(mtch: &regex::Match, source_code: &str) -> Self {
Self {
start_byte: mtch.start(),
end_byte: mtch.end(),
start_point: position_for_offset(source_code.as_bytes(), mtch.start()),
end_point: position_for_offset(source_code.as_bytes(), mtch.end()),
}
}
}

// Finds the position (col and row number) for a given offset.
fn position_for_offset(input: &[u8], offset: usize) -> Point {
let mut result = Point { row: 0, column: 0 };
for c in &input[0..offset] {
if *c as char == '\n' {
result.row += 1;
result.column = 0;
} else {
result.column += 1;
}
}
result
}

/// A range of positions in a multi-line text document, both in terms of bytes and of
/// rows and columns.
#[derive(
Expand Down
5 changes: 4 additions & 1 deletion src/models/rule_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@ impl RuleStore {
pub(crate) fn query(&mut self, cg_pattern: &CGPattern) -> &CompiledCGPattern {
let pattern = cg_pattern.pattern();
if pattern.starts_with("rgx ") {
panic!("Regex not supported.")
return &*self
.rule_query_cache
.entry(pattern)
.or_insert_with(|| CompiledCGPattern::R(Regex::new(&cg_pattern.extract_regex()).unwrap()));
}

&*self
Expand Down
3 changes: 2 additions & 1 deletion src/models/unit_tests/rule_graph_validation_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,12 @@ fn test_filter_bad_arg_contains_n_sibling() {
}

#[test]
#[should_panic(expected = "Regex not supported")]
// #[should_panic(expected = "Regex not supported")]
fn test_unsupported_regex() {
RuleGraphBuilder::default()
.rules(vec![
piranha_rule! {name = "Test rule", query = "rgx (\\w+) (\\w)+"},
])
.build();
assert!(true)
}
1 change: 1 addition & 0 deletions src/tests/test_piranha_java.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ create_rewrite_tests! {
test_new_line_character_used_in_string_literal: "new_line_character_used_in_string_literal", 1;
test_java_delete_method_invocation_argument: "delete_method_invocation_argument", 1;
test_java_delete_method_invocation_argument_no_op: "delete_method_invocation_argument_no_op", 0;
test_regex_based_matcher: "regex_based_matcher", 1;
}

create_match_tests! {
Expand Down
1 change: 1 addition & 0 deletions src/utilities/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Copyright (c) 2023 Uber Technologies, Inc.
limitations under the License.
*/

pub(crate) mod regex_utilities;
pub(crate) mod tree_sitter_utilities;
use std::collections::HashMap;
use std::error::Error;
Expand Down
67 changes: 67 additions & 0 deletions src/utilities/regex_utilities.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
Copyright (c) 2023 Uber Technologies, Inc.
<p>Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
except in compliance with the License. You may obtain a copy of the License at
<p>http://www.apache.org/licenses/LICENSE-2.0
<p>Unless required by applicable law or agreed to in writing, software distributed under the
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
express or implied. See the License for the specific language governing permissions and
limitations under the License.
*/

use crate::models::matches::Match;
use itertools::Itertools;
use regex::Regex;
use std::collections::HashMap;
use tree_sitter::Node;

/// Applies the query upon the given `node`, and gets the first match
/// # Arguments
/// * `node` - the root node to apply the query upon
/// * `source_code` - the corresponding source code string for the node.
/// * `query` - the query to be applied
/// * `recursive` - if `true` it matches the query to `self` and `self`'s sub-ASTs, else it matches the `query` only to `self`.
///
/// # Returns
/// The range of the match in the source code and the corresponding mapping from tags to code snippets.
pub(crate) fn get_all_matches_for_regex(
node: &Node, source_code: String, regex: &Regex, recursive: bool, replace_node: Option<String>,
) -> Vec<Match> {
let code_snippet = node.utf8_text(source_code.as_bytes()).unwrap();
let all_captures = regex.captures_iter(code_snippet).collect_vec();
let names = regex.capture_names().collect_vec();
let mut all_matches = vec![];
for captures in all_captures {
// Check if the range of the self (node), and the range of outermost node captured by the query are equal.
let range_matches_node = node.start_byte() == captures.get(0).unwrap().start()
&& node.end_byte() == captures.get(0).unwrap().end();
if recursive || range_matches_node {
let group_by_tag = if let Some(ref rn) = replace_node {
captures
.name(rn)
.unwrap_or_else(|| panic!("the tag {rn} provided in the replace node is not present"))
} else {
captures.get(0).unwrap()
};
let matches = extract_captures(&captures, &names);
all_matches.push(Match::from_regex(&group_by_tag, matches, code_snippet));
}
}
all_matches
}

fn extract_captures(
captures: &regex::Captures<'_>, names: &Vec<Option<&str>>,
) -> HashMap<String, String> {
names
.iter()
.flatten()
.flat_map(|x| {
captures
.name(x)
.map(|v| (x.to_string(), v.as_str().to_string()))
})
.collect()
}
2 changes: 1 addition & 1 deletion src/utilities/tree_sitter_utilities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ pub(crate) fn get_all_matches_for_query(
// If `recursive` it allows matches to the subtree of self (Node)
// Else it ensure that the query perfectly matches the node (`self`).
if recursive || range_matches_self {
let mut replace_node_range = captured_node_range;
let mut replace_node_range: Range = captured_node_range;
if let Some(replace_node_name) = &replace_node {
if let Some(r) =
get_range_for_replace_node(query, &query_matches, replace_node_name, replace_node_idx)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[[rules]]
name = "replace_call"
query = """rgx (?P<n>foo\\(\\))"""
replace_node = "n"
replace = "Foo"
10 changes: 10 additions & 0 deletions test-resources/java/regex_based_matcher/expected/Sample.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package com.uber.piranha;

class A {

void foobar() {
int total = Foo;
System.out.println(total);
}

}
10 changes: 10 additions & 0 deletions test-resources/java/regex_based_matcher/input/Sample.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package com.uber.piranha;

class A {

void foobar() {
int total = foo();
System.out.println(total);
}

}

0 comments on commit e4c75d0

Please sign in to comment.