Skip to content

Commit

Permalink
Pattern acceleration (#87)
Browse files Browse the repository at this point in the history
* merge squash pattern-acceleration-refactor

* run jooq generate with new settings and spotless

* rename BloomFilterTempTable tokenSet to searchTermTokenSet

* make ConditionWalker and ValidElement classes final

* rename method isIndexStatement to isBloomSearchCondition

* add indexstatement with bad connection tests for ElementConditionTest and IndexStatementConditionTest and tests for isBloomSearchCondition method

* apply spotless

* add javadoc to tests that use Condition.toString for equality testing

* refactoring iteration: multiple new classes and interfaces to simplify and ensure single responsibility for objects

* apply spotless

* renaming of names and methods

* refactor ElementCondition to directly use IndexStatementCondition class to get pattern match tables

* Renaming and some comments

* Remove decorators and use class methods, fix testing, clean equality test assertions

* apply spotless

* create testing patterns to Strings first add comments for clarity

* disable search term tokenization

* clean up code and separate condition interfaces, use decorators for category table building and remove interfaces from tests

* enable tokenization of search term, filter category table filter tokens using pattern from each filter_type_id

* TokenizedValue finds minor tokens, add tests that correct tokens are inserted to category table, some code cleanup
  • Loading branch information
elliVM authored Sep 30, 2024
1 parent 2109fb0 commit c1fbd98
Show file tree
Hide file tree
Showing 48 changed files with 3,920 additions and 1,601 deletions.
45 changes: 12 additions & 33 deletions database/bloomdb.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* This program handles user requests that require archive access.
* Copyright (C) 2022 Suomen Kanuuna Oy
* Copyright (C) 2024 Suomen Kanuuna Oy
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
Expand Down Expand Up @@ -44,36 +44,15 @@
* a licensee so wish it.
*/

DROP TABLE IF EXISTS `filter_expected_100000_fpp_001`;
DROP TABLE IF EXISTS `filter_expected_1000000_fpp_003`;
DROP TABLE IF EXISTS `filter_expected_2500000_fpp_005`;
DROP TABLE IF EXISTS `filtertype`;

CREATE TABLE `filter_expected_100000_fpp_001` (
`id` INT NOT NULL AUTO_INCREMENT,
`partition_id` BIGINT(20) unsigned NOT NULL UNIQUE,
`filter` LONGBLOB,
CONSTRAINT `fk_smallfilter_partition`
FOREIGN KEY (`partition_id`) REFERENCES `journaldb`.`logfile`(`id`) ON DELETE CASCADE,
CONSTRAINT `pk_small`
PRIMARY KEY (`id`)
)ENGINE=InnoDB ROW_FORMAT=COMPRESSED;

CREATE TABLE `filter_expected_1000000_fpp_003` (
`id` INT NOT NULL AUTO_INCREMENT,
`partition_id` BIGINT(20) unsigned NOT NULL UNIQUE ,
`filter` LONGBLOB,
CONSTRAINT `fk_mediumfilter_partition`
FOREIGN KEY (`partition_id`) REFERENCES `journaldb`.`logfile`(`id`) ON DELETE CASCADE,
CONSTRAINT `pk_medium`
PRIMARY KEY (`id`)
)ENGINE=InnoDB ROW_FORMAT=COMPRESSED;

CREATE TABLE `filter_expected_2500000_fpp_005` (
`id` INT NOT NULL AUTO_INCREMENT,
`partition_id` BIGINT(20) unsigned NOT NULL UNIQUE ,
`filter` LONGBLOB,
CONSTRAINT `fk_largefilter_partition`
FOREIGN KEY (`partition_id`) REFERENCES `journaldb`.`logfile`(`id`) ON DELETE CASCADE,
CONSTRAINT `pk_large`
PRIMARY KEY (`id`)
)ENGINE=InnoDB ROW_FORMAT=COMPRESSED;
CREATE TABLE `filtertype`
(
`id` bigint(20) UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
`expectedElements` bigint(20) UNSIGNED NOT NULL,
`targetFpp` DOUBLE(2, 2) UNSIGNED NOT NULL,
`pattern` VARCHAR(2048) NOT NULL,
UNIQUE KEY (`expectedElements`, `targetFpp`, `pattern`)
) ENGINE = InnoDB
DEFAULT CHARSET = utf8mb4
COLLATE = utf8mb4_unicode_ci;
8 changes: 7 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,12 @@
<version>${jclouds.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
<version>2.2.224</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
Expand Down Expand Up @@ -304,7 +310,7 @@
</jdbc>
<generator>
<database>
<includes>streamdb.log_group|streamdb.host|streamdb.stream|journaldb.host|journaldb.bucket|journaldb.logfile|bloomdb.filter_expected_100000_fpp_001|bloomdb.filter_expected_1000000_fpp_003|bloomdb.filter_expected_2500000_fpp_005</includes>
<includes>streamdb.log_group|streamdb.host|streamdb.stream|journaldb.host|journaldb.bucket|journaldb.logfile|bloomdb.filtertype</includes>
</database>
<target>
<packageName>com.teragrep.pth_06.jooq.generated</packageName>
Expand Down
3 changes: 3 additions & 0 deletions src/main/java/com/teragrep/pth_06/config/ArchiveConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ public final class ArchiveConfig {

// bloom
public final boolean bloomEnabled;
public final boolean withoutFilters;
public final String bloomDbName;

public final boolean isStub;
Expand All @@ -79,6 +80,7 @@ public ArchiveConfig(Map<String, String> opts) {
dbPassword = getOrThrow(opts, "DBpassword");
dbUrl = getOrThrow(opts, "DBurl");
bloomEnabled = opts.getOrDefault("bloom.enabled", "false").equalsIgnoreCase("true");
withoutFilters = opts.getOrDefault("bloom.enabled", "false").equalsIgnoreCase("true");
bloomDbName = opts.getOrDefault("DBbloomdbname", "bloomdb");

dbJournalDbName = opts.getOrDefault("DBjournaldbname", "journaldb");
Expand Down Expand Up @@ -108,6 +110,7 @@ public ArchiveConfig() {
dbStreamDbName = "";

bloomEnabled = false;
withoutFilters = false;
bloomDbName = "";

hideDatabaseExceptions = false;
Expand Down
37 changes: 29 additions & 8 deletions src/main/java/com/teragrep/pth_06/config/ConditionConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,36 @@ public final class ConditionConfig {
private final boolean streamQuery;
private final boolean bloomEnabled;
private final boolean withoutFilters;
private final long bloomTermId;

public ConditionConfig(DSLContext ctx, boolean streamQuery) {
this.ctx = ctx;
this.streamQuery = streamQuery;
this.bloomEnabled = false;
this.withoutFilters = false;
this(ctx, streamQuery, false, false, 0L);
}

public ConditionConfig(DSLContext ctx, boolean streamQuery, boolean bloomEnabled) {
this(ctx, streamQuery, bloomEnabled, false, 0L);
}

public ConditionConfig(DSLContext ctx, boolean streamQuery, boolean bloomEnabled, boolean withoutFilters) {
this(ctx, streamQuery, bloomEnabled, withoutFilters, 0L);
}

public ConditionConfig(DSLContext ctx, boolean streamQuery, boolean bloomEnabled, long bloomTermId) {
this(ctx, streamQuery, bloomEnabled, false, bloomTermId);
}

public ConditionConfig(
DSLContext ctx,
boolean streamQuery,
boolean bloomEnabled,
boolean withoutFilters,
long bloomTermId
) {
this.ctx = ctx;
this.streamQuery = streamQuery;
this.bloomEnabled = bloomEnabled;
this.withoutFilters = withoutFilters;
this.bloomTermId = bloomTermId;
}

public DSLContext context() {
Expand All @@ -76,12 +93,16 @@ public boolean bloomEnabled() {
return bloomEnabled;
}

public boolean streamQuery() {
return streamQuery;
public boolean withoutFilters() {
return withoutFilters;
}

public boolean withoutFilter() {
return withoutFilters;
public long bloomTermId() {
return bloomTermId;
}

public boolean streamQuery() {
return streamQuery;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,7 @@


import com.teragrep.pth_06.jooq.generated.DefaultCatalog;
import com.teragrep.pth_06.jooq.generated.bloomdb.tables.FilterExpected_1000000Fpp_003;
import com.teragrep.pth_06.jooq.generated.bloomdb.tables.FilterExpected_100000Fpp_001;
import com.teragrep.pth_06.jooq.generated.bloomdb.tables.FilterExpected_2500000Fpp_005;
import com.teragrep.pth_06.jooq.generated.bloomdb.tables.Filtertype;

import java.util.ArrayList;
import java.util.Arrays;
Expand All @@ -78,27 +76,17 @@
@SuppressWarnings({ "all", "unchecked", "rawtypes" })
public class Bloomdb extends SchemaImpl {

private static final long serialVersionUID = 1310856944;
private static final long serialVersionUID = -1839179080;

/**
* The reference instance of <code>bloomdb</code>
*/
public static final Bloomdb BLOOMDB = new Bloomdb();

/**
* The table <code>bloomdb.filter_expected_1000000_fpp_003</code>.
* The table <code>bloomdb.filtertype</code>.
*/
public final FilterExpected_1000000Fpp_003 FILTER_EXPECTED_1000000_FPP_003 = com.teragrep.pth_06.jooq.generated.bloomdb.tables.FilterExpected_1000000Fpp_003.FILTER_EXPECTED_1000000_FPP_003;

/**
* The table <code>bloomdb.filter_expected_100000_fpp_001</code>.
*/
public final FilterExpected_100000Fpp_001 FILTER_EXPECTED_100000_FPP_001 = com.teragrep.pth_06.jooq.generated.bloomdb.tables.FilterExpected_100000Fpp_001.FILTER_EXPECTED_100000_FPP_001;

/**
* The table <code>bloomdb.filter_expected_2500000_fpp_005</code>.
*/
public final FilterExpected_2500000Fpp_005 FILTER_EXPECTED_2500000_FPP_005 = com.teragrep.pth_06.jooq.generated.bloomdb.tables.FilterExpected_2500000Fpp_005.FILTER_EXPECTED_2500000_FPP_005;
public final Filtertype FILTERTYPE = com.teragrep.pth_06.jooq.generated.bloomdb.tables.Filtertype.FILTERTYPE;

/**
* No further instances allowed
Expand All @@ -122,8 +110,6 @@ public final List<Table<?>> getTables() {

private final List<Table<?>> getTables0() {
return Arrays.<Table<?>>asList(
FilterExpected_1000000Fpp_003.FILTER_EXPECTED_1000000_FPP_003,
FilterExpected_100000Fpp_001.FILTER_EXPECTED_100000_FPP_001,
FilterExpected_2500000Fpp_005.FILTER_EXPECTED_2500000_FPP_005);
Filtertype.FILTERTYPE);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,7 @@
package com.teragrep.pth_06.jooq.generated.bloomdb;


import com.teragrep.pth_06.jooq.generated.bloomdb.tables.FilterExpected_1000000Fpp_003;
import com.teragrep.pth_06.jooq.generated.bloomdb.tables.FilterExpected_100000Fpp_001;
import com.teragrep.pth_06.jooq.generated.bloomdb.tables.FilterExpected_2500000Fpp_005;
import com.teragrep.pth_06.jooq.generated.bloomdb.tables.Filtertype;

import javax.annotation.Generated;

Expand All @@ -77,23 +75,15 @@ public class Indexes {
// INDEX definitions
// -------------------------------------------------------------------------

public static final Index FILTER_EXPECTED_1000000_FPP_003_PARTITION_ID = Indexes0.FILTER_EXPECTED_1000000_FPP_003_PARTITION_ID;
public static final Index FILTER_EXPECTED_1000000_FPP_003_PRIMARY = Indexes0.FILTER_EXPECTED_1000000_FPP_003_PRIMARY;
public static final Index FILTER_EXPECTED_100000_FPP_001_PARTITION_ID = Indexes0.FILTER_EXPECTED_100000_FPP_001_PARTITION_ID;
public static final Index FILTER_EXPECTED_100000_FPP_001_PRIMARY = Indexes0.FILTER_EXPECTED_100000_FPP_001_PRIMARY;
public static final Index FILTER_EXPECTED_2500000_FPP_005_PARTITION_ID = Indexes0.FILTER_EXPECTED_2500000_FPP_005_PARTITION_ID;
public static final Index FILTER_EXPECTED_2500000_FPP_005_PRIMARY = Indexes0.FILTER_EXPECTED_2500000_FPP_005_PRIMARY;
public static final Index FILTERTYPE_EXPECTEDELEMENTS = Indexes0.FILTERTYPE_EXPECTEDELEMENTS;
public static final Index FILTERTYPE_PRIMARY = Indexes0.FILTERTYPE_PRIMARY;

// -------------------------------------------------------------------------
// [#1459] distribute members to avoid static initialisers > 64kb
// -------------------------------------------------------------------------

private static class Indexes0 {
public static Index FILTER_EXPECTED_1000000_FPP_003_PARTITION_ID = Internal.createIndex("partition_id", FilterExpected_1000000Fpp_003.FILTER_EXPECTED_1000000_FPP_003, new OrderField[] { FilterExpected_1000000Fpp_003.FILTER_EXPECTED_1000000_FPP_003.PARTITION_ID }, true);
public static Index FILTER_EXPECTED_1000000_FPP_003_PRIMARY = Internal.createIndex("PRIMARY", FilterExpected_1000000Fpp_003.FILTER_EXPECTED_1000000_FPP_003, new OrderField[] { FilterExpected_1000000Fpp_003.FILTER_EXPECTED_1000000_FPP_003.ID }, true);
public static Index FILTER_EXPECTED_100000_FPP_001_PARTITION_ID = Internal.createIndex("partition_id", FilterExpected_100000Fpp_001.FILTER_EXPECTED_100000_FPP_001, new OrderField[] { FilterExpected_100000Fpp_001.FILTER_EXPECTED_100000_FPP_001.PARTITION_ID }, true);
public static Index FILTER_EXPECTED_100000_FPP_001_PRIMARY = Internal.createIndex("PRIMARY", FilterExpected_100000Fpp_001.FILTER_EXPECTED_100000_FPP_001, new OrderField[] { FilterExpected_100000Fpp_001.FILTER_EXPECTED_100000_FPP_001.ID }, true);
public static Index FILTER_EXPECTED_2500000_FPP_005_PARTITION_ID = Internal.createIndex("partition_id", FilterExpected_2500000Fpp_005.FILTER_EXPECTED_2500000_FPP_005, new OrderField[] { FilterExpected_2500000Fpp_005.FILTER_EXPECTED_2500000_FPP_005.PARTITION_ID }, true);
public static Index FILTER_EXPECTED_2500000_FPP_005_PRIMARY = Internal.createIndex("PRIMARY", FilterExpected_2500000Fpp_005.FILTER_EXPECTED_2500000_FPP_005, new OrderField[] { FilterExpected_2500000Fpp_005.FILTER_EXPECTED_2500000_FPP_005.ID }, true);
public static Index FILTERTYPE_EXPECTEDELEMENTS = Internal.createIndex("expectedElements", Filtertype.FILTERTYPE, new OrderField[] { Filtertype.FILTERTYPE.EXPECTEDELEMENTS, Filtertype.FILTERTYPE.TARGETFPP, Filtertype.FILTERTYPE.PATTERN }, true);
public static Index FILTERTYPE_PRIMARY = Internal.createIndex("PRIMARY", Filtertype.FILTERTYPE, new OrderField[] { Filtertype.FILTERTYPE.ID }, true);
}
}
Loading

0 comments on commit c1fbd98

Please sign in to comment.