From 825eb9536e8323e5862f3dfc5a376a61e64169f4 Mon Sep 17 00:00:00 2001 From: molsonkiko <46202915+molsonkiko@users.noreply.github.com> Date: Wed, 13 Dec 2023 23:25:14 -0800 Subject: [PATCH] add s_lines func; improve RPath func docs * new s_lines function splits a string into lines. --- CHANGELOG.md | 6 +++ .../JSONTools/RemesPathFunctions.cs | 44 +++++++++++++++++-- JsonToolsNppPlugin/Properties/AssemblyInfo.cs | 4 +- JsonToolsNppPlugin/Tests/RemesPathTests.cs | 1 + JsonToolsNppPlugin/Utils/ArrayExtensions.cs | 8 ++++ docs/RemesPath.md | 17 ++++--- 6 files changed, 70 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b479e2..dd0fcc6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - `loop()` function used in `s_sub` callbacks is not thread-safe. *This doesn't matter right now* because RemesPath is single-threaded, but it could matter in the future. - __GrepperForm loses its JSON permanently when the buffer associated with its treeview is deleted.__ +## [6.1.0] - (UNRELEASED) YYYY-MM-DD + +### Added + +1. [`s_lines` RemesPath vectorized function](/docs/RemesPath.md#vectorized-functions). + ## [6.0.0] - 2023-12-13 ### Added diff --git a/JsonToolsNppPlugin/JSONTools/RemesPathFunctions.cs b/JsonToolsNppPlugin/JSONTools/RemesPathFunctions.cs index 54e2cb3..3c8d9be 100644 --- a/JsonToolsNppPlugin/JSONTools/RemesPathFunctions.cs +++ b/JsonToolsNppPlugin/JSONTools/RemesPathFunctions.cs @@ -2963,6 +2963,12 @@ public static JNode StrFindAll(List args) return StrFindAllHelper(text, rex, args, 3, "s_fa", -1, headerHandling); } + /// + /// s_split(x: string, sep: string | regex = g`\s+`) -> array[string]

+ /// split x by each match to sep (which is always treated as a regex)

+ /// if sep is not provided, split on whitespace.

+ /// See https://learn.microsoft.com/en-us/dotnet/api/system.text.regularexpressions.regex.split?view=netframework-4.8#system-text-regularexpressions-regex-split(system-string) for implementation details. + ///
public static JNode StrSplit(List args) { JNode node = args[0]; @@ -2981,21 +2987,52 @@ public static JNode StrSplit(List args) return new JArray(0, out_nodes); } + /// + /// s_lines(x: string) -> array[string]

+ /// splits x into an array of lines, treating '\r', '\n', and '\r\n' all as line terminators.

+ /// Use s_split(x, `\r\n`) or s_split(x, `\r`) or s_split(x, `\n`) instead if you only want to consider one type of line terminator. + ///
+ public static JNode StrSplitLines(List args) + { + string s = (string)args[0].value; + string[] lines = Regex.Split(s, @"\r\n?|\n"); + var lineArr = new List(lines.Length); + foreach (string line in lines) + lineArr.Add(new JNode(line)); + return new JArray(0, lineArr); + } + + /// + /// s_lower(x: string) -> string

+ /// returns x converted to lowercase + ///
public static JNode StrLower(List args) { return new JNode(((string)args[0].value).ToLower()); } + /// + /// s_lower(x: string) -> string

+ /// returns x converted to uppercase + ///
public static JNode StrUpper(List args) { return new JNode(((string)args[0].value).ToUpper()); } + /// + /// s_lower(x: string) -> string

+ /// returns x with no leading or trailing whitespace + ///
public static JNode StrStrip(List args) { return new JNode(((string)args[0].value).Trim()); } + + /// + /// See string.Slice extension method in ArrayExtensions.cs + /// public static JNode StrSlice(List args) { string s = (string)args[0].value; @@ -3071,7 +3108,7 @@ string replacementFunction(Match m) } /// - /// returns true is x is string + /// returns true iff is x is a string /// public static JNode IsStr(List args) { @@ -3079,7 +3116,7 @@ public static JNode IsStr(List args) } /// - /// returns true is x is long, double, or bool + /// returns true iff x is long, double, or bool /// public static JNode IsNum(List args) { @@ -3087,7 +3124,7 @@ public static JNode IsNum(List args) } /// - /// returns true if x is JObject or JArray + /// returns true iff x is JObject or JArray /// public static JNode IsExpr(List args) { @@ -3422,6 +3459,7 @@ public static JNode ObjectsToJNode(object obj) ["s_fa"] = new ArgFunction(StrFindAll, "s_fa", Dtype.ARR, 2, int.MaxValue, true, new Dtype[] { Dtype.STR | Dtype.ITERABLE, Dtype.STR_OR_REGEX, Dtype.BOOL | Dtype.NULL, Dtype.INT}, true, new ArgsTransform((1, Dtype.STR_OR_REGEX, TransformRegex), (2, Dtype.NULL, x => new JNode(false)))), ["s_find"] = new ArgFunction(StrFind, "s_find", Dtype.ARR, 2, 2, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE, Dtype.REGEX}), ["s_len"] = new ArgFunction(StrLen, "s_len", Dtype.INT, 1, 1, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE}), + ["s_lines"] = new ArgFunction(StrSplitLines, "s_lines", Dtype.ARR, 1, 1, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE}), ["s_lower"] = new ArgFunction(StrLower, "s_lower", Dtype.STR, 1, 1, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE}), ["s_mul"] = new ArgFunction(StrMul, "s_mul", Dtype.STR, 2, 2, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE, Dtype.INT }), ["s_slice"] = new ArgFunction(StrSlice, "s_slice", Dtype.STR, 2, 2, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE, Dtype.INT_OR_SLICE}), diff --git a/JsonToolsNppPlugin/Properties/AssemblyInfo.cs b/JsonToolsNppPlugin/Properties/AssemblyInfo.cs index 1b71161..0b94f1a 100644 --- a/JsonToolsNppPlugin/Properties/AssemblyInfo.cs +++ b/JsonToolsNppPlugin/Properties/AssemblyInfo.cs @@ -29,5 +29,5 @@ // Build Number // Revision // -[assembly: AssemblyVersion("6.0.0.0")] -[assembly: AssemblyFileVersion("6.0.0.0")] +[assembly: AssemblyVersion("6.0.0.1")] +[assembly: AssemblyFileVersion("6.0.0.1")] diff --git a/JsonToolsNppPlugin/Tests/RemesPathTests.cs b/JsonToolsNppPlugin/Tests/RemesPathTests.cs index 31de802..1d6b242 100644 --- a/JsonToolsNppPlugin/Tests/RemesPathTests.cs +++ b/JsonToolsNppPlugin/Tests/RemesPathTests.cs @@ -349,6 +349,7 @@ public static bool Test() new Query_DesiredResult("log2(j`[1, 4, 8]`)", $"[0, 2, 3]"), new Query_DesiredResult("abs(j`[-1, 0, 1]`)", "[1, 0, 1]"), new Query_DesiredResult("is_str(@.bar.b)", "[true, true]"), + new Query_DesiredResult("s_lines(j`[\"a\\r\\nb\\rc\\td\\ne\\n\", \"foo bar\"]`)", "[[\"a\", \"b\", \"c\\td\", \"e\", \"\"], [\"foo bar\"]]"), new Query_DesiredResult("s_split(@.bar.b[0], g`[^a-z]+`)", "[\"a\", \"g\"]"), new Query_DesiredResult("s_split(@.bar.b, `a`)", "[[\"\", \"`g\"], [\"b\", \"h\"]]"), new Query_DesiredResult("s_split(`foo\\r\\nb\\t c \\r bar-baz\\n`, )", "[\"foo\", \"b\", \"c\", \"bar-baz\", \"\"]"), // omit optional 2nd arg; split on whitespace diff --git a/JsonToolsNppPlugin/Utils/ArrayExtensions.cs b/JsonToolsNppPlugin/Utils/ArrayExtensions.cs index ee67202..6b03dbf 100644 --- a/JsonToolsNppPlugin/Utils/ArrayExtensions.cs +++ b/JsonToolsNppPlugin/Utils/ArrayExtensions.cs @@ -273,6 +273,14 @@ public static string Slice(this string source, int start, int stop, int stride) return new string(source.ToCharArray().LazySlice(start, stop, stride).ToArray()); } + /// + /// s_slice(x: string, sli: integer | slicer) -> string

+ /// uses Python slicing syntax.

+ /// EXAMPLES:

+ /// * s_slice(abcde, 1:-2) returns "bc"

+ /// * s_slice(abcde, :2) returns "ab"

+ /// * s_slice(abcde, -2) returns "d"

+ ///
public static string Slice(this string source, int?[] slicer) { return new string(source.ToCharArray().LazySlice(slicer).ToArray()); diff --git a/docs/RemesPath.md b/docs/RemesPath.md index e3d115c..f58ccb6 100644 --- a/docs/RemesPath.md +++ b/docs/RemesPath.md @@ -895,6 +895,15 @@ The length of string x. The lower-case form of x. +--- +`s_lines(x: string) -> array[string]` + +*Added in [v6.1](/CHANGELOG.md#610---unreleased-yyyy-mm-dd)* + +Returns an array of all the lines (including an empty string at the end if there's a trailing newline) in `x`. + +This function treats `\r`, `\n`, and `\r\n` all as valid newlines. Use `s_split` below if you want to only accept one or two of those. + ---- `s_mul(x: string, reps: int) -> string` @@ -917,17 +926,15 @@ Prior to [v5.5.0](/CHANGELOG.md#550---2023-08-13), Python-style negative indices If `sep` is not specified (the function is called with one argument): * Returns `x` split by whitespace. * E.g., ``s_split(`a b c\n d `)`` returns `["a", "b", "c", "d", ""]` (the last empty string is because `x` ends with whitespace) + * The 1-argument option was added in [v6.0](/CHANGELOG.md#600---2023-12-13). -If `sep` is a string: -* Returns an array containing all the substrings of `x` that don't contain `sep`, split by the places where `sep` occurs. - * E.g., ``s_split(`abac`, `a`)`` returns `["", "b", "c"]` - -If `sep` is a regex: +If `sep` is a string (which is treated as a regex) or regex: * Returns an array containing substrings of `x` where the parts that match `sep` are missing. * E.g., ``s_split(`a big bad man`, g`\\s+`)`` returns `["a", "big", "bad", "man"]`. * However, if `sep` contains any capture groups, the capture groups are included in the array. * ``s_split(`a big bad man`, g`(\\s+)`)`` returns `["a", " " "big", " ", "bad", " ", "man"]`. * ``s_split(`bob num: 111-222-3333, carol num: 123-456-7890`, g`(\\d{3})-(\\d{3}-\\d{4})`)`` returns `["bob num: ", "111", "222-3333", ", carol num: ", "123", "456-7890", ""]` +* See [the docs for C# Regex.Split](https://learn.microsoft.com/en-us/dotnet/api/system.text.regularexpressions.regex.split?view=netframework-4.8#system-text-regularexpressions-regex-split(system-string)) for more info. ---- `s_strip(x: string) -> string`