From 2b251b15b1abc452fcf041b79e8d4c1b322d14b9 Mon Sep 17 00:00:00 2001 From: elliVM <126466762+elliVM@users.noreply.github.com> Date: Fri, 15 Nov 2024 13:31:25 +0200 Subject: [PATCH] Lowercase search term tokens (#117) * lowercase search term tokens * apply spotless --- .../planner/bloomfilter/SearchTermBloomFilter.java | 2 +- .../planner/bloomfilter/SearchTermBloomFilterTest.java | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java index a30dc70f..4bd37c0f 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java @@ -86,7 +86,7 @@ public byte[] bytes() { } final BloomFilter filter = BloomFilter.create(expected, fpp); for (final String token : stringTokens) { - filter.put(token); + filter.put(token.toLowerCase()); } try (final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream()) { filter.writeTo(filterBAOS); diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java index ba9f6fc3..a46d6ede 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java @@ -100,7 +100,7 @@ public void testRegexExtractedTokens() { byte[] bytes = Assertions.assertDoesNotThrow(filter::bytes); BloomFilter resultFilter = Assertions .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes))); - Assertions.assertTrue(resultFilter.mightContain("Pattern")); + Assertions.assertTrue(resultFilter.mightContain("pattern")); } @Test @@ -110,9 +110,10 @@ public void testTokenizerTokens() { byte[] bytes = Assertions.assertDoesNotThrow(filter::bytes); BloomFilter resultFilter = Assertions .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes))); - Assertions.assertFalse(resultFilter.mightContain("Pattern")); - Assertions.assertTrue(resultFilter.mightContain("Without")); - Assertions.assertTrue(resultFilter.mightContain("SearchValuePatternInThisString")); + // test that tokens present and in lower case + Assertions.assertFalse(resultFilter.mightContain("pattern")); + Assertions.assertTrue(resultFilter.mightContain("without")); + Assertions.assertTrue(resultFilter.mightContain("searchvaluepatterninthisstring")); } @Test