From bdd00a39a70824648027013cd369df3b989b57eb Mon Sep 17 00:00:00 2001 From: Christopher Grote Date: Thu, 30 Nov 2023 21:07:52 +0000 Subject: [PATCH] Makes duplicate detector more configurable and adds integration test Signed-off-by: Christopher Grote --- .../src/main/kotlin/DuplicateDetector.kt | 18 ++-- .../src/main/kotlin/DuplicateDetectorCfg.kt | 1 + .../src/main/kotlin/PackageConfig.kt | 18 +++- .../src/test/kotlin/DuplicateDetectorTest.kt | 93 +++++++++++++++++++ 4 files changed, 120 insertions(+), 10 deletions(-) create mode 100644 samples/packages/duplicate-detector/src/test/kotlin/DuplicateDetectorTest.kt diff --git a/samples/packages/duplicate-detector/src/main/kotlin/DuplicateDetector.kt b/samples/packages/duplicate-detector/src/main/kotlin/DuplicateDetector.kt index 3a85ab6e8d..105da5f70c 100644 --- a/samples/packages/duplicate-detector/src/main/kotlin/DuplicateDetector.kt +++ b/samples/packages/duplicate-detector/src/main/kotlin/DuplicateDetector.kt @@ -10,6 +10,7 @@ import com.atlan.model.assets.IColumn import com.atlan.model.assets.MaterializedView import com.atlan.model.assets.Table import com.atlan.model.assets.View +import com.atlan.model.enums.AtlanIcon import com.atlan.model.enums.CertificateStatus import com.atlan.model.search.CompoundQuery import com.atlan.pkg.Utils @@ -22,8 +23,6 @@ import java.util.concurrent.atomic.AtomicLong object DuplicateDetector { private val logger = KotlinLogging.logger {} - private const val GLOSSARY_NAME = "Duplicate assets" - data class AssetKey(val typeName: String, val qualifiedName: String, val guid: String) private val hashToAssetKeys = ConcurrentHashMap>() @@ -34,6 +33,7 @@ object DuplicateDetector { fun main(args: Array) { val config = Utils.setPackageOps() + val glossaryName = Utils.getOrDefault(config.glossaryName, "Duplicate assets") val qnPrefix = Utils.getOrDefault(config.qnPrefix, "default") val types = Utils.getOrDefault(config.assetTypes, listOf(Table.TYPE_NAME, View.TYPE_NAME, MaterializedView.TYPE_NAME)) @@ -44,7 +44,7 @@ object DuplicateDetector { } findAssets(qnPrefix, types, batchSize) - val glossaryQN = glossaryForDuplicates() + val glossaryQN = glossaryForDuplicates(glossaryName) termsForDuplicates(glossaryQN, batchSize) } @@ -94,14 +94,16 @@ object DuplicateDetector { /** * Idempotently create (or fetch) a glossary to capture the duplicate assets. * + * @param glossaryName name of the glossary * @return the qualifiedName of the glossary */ - fun glossaryForDuplicates(): String { + fun glossaryForDuplicates(glossaryName: String): String { return try { - Glossary.findByName(GLOSSARY_NAME).qualifiedName + Glossary.findByName(glossaryName).qualifiedName } catch (e: NotFoundException) { - val glossary = Glossary.creator(GLOSSARY_NAME) - .description("Glossary whose terms represent potential duplicate assets.") + val glossary = Glossary.creator(glossaryName) + .assetIcon(AtlanIcon.COPY) + .userDescription("Each term represents a set of potential duplicate assets, based on assets that have the same set of columns (case-insensitive, in any order). The assets that are potential duplicates of each other are all linked to the same term.") .build() logger.info { "Creating glossary to hold duplicates." } glossary.save().getResult(glossary).qualifiedName @@ -140,7 +142,7 @@ object DuplicateDetector { } catch (e: NotFoundException) { val toCreate = GlossaryTerm.creator(termName, glossaryQN) .description( - "Assets with the same set of ${columns?.size} columns:\n" + columns?.joinToString( + "Assets with the same set of ${columns?.size} columns:\n" + columns?.joinToString( separator = "\n", ) { "- $it" }, ) diff --git a/samples/packages/duplicate-detector/src/main/kotlin/DuplicateDetectorCfg.kt b/samples/packages/duplicate-detector/src/main/kotlin/DuplicateDetectorCfg.kt index e88f85e6ac..2144da119e 100644 --- a/samples/packages/duplicate-detector/src/main/kotlin/DuplicateDetectorCfg.kt +++ b/samples/packages/duplicate-detector/src/main/kotlin/DuplicateDetectorCfg.kt @@ -14,6 +14,7 @@ import javax.annotation.processing.Generated @Generated("com.atlan.pkg.CustomPackage") @JsonAutoDetect(fieldVisibility = JsonAutoDetect.Visibility.ANY) data class DuplicateDetectorCfg( + @JsonProperty("glossary_name") val glossaryName: String?, @JsonProperty("qn_prefix") val qnPrefix: String?, @JsonProperty("control_config_strategy") val controlConfigStrategy: String?, @JsonDeserialize(using = WidgetSerde.MultiSelectDeserializer::class) diff --git a/samples/packages/duplicate-detector/src/main/kotlin/PackageConfig.kt b/samples/packages/duplicate-detector/src/main/kotlin/PackageConfig.kt index 88b5663307..f8b0832bf9 100644 --- a/samples/packages/duplicate-detector/src/main/kotlin/PackageConfig.kt +++ b/samples/packages/duplicate-detector/src/main/kotlin/PackageConfig.kt @@ -3,6 +3,7 @@ import com.atlan.Atlan import com.atlan.pkg.CustomPackage import com.atlan.pkg.config.model.ui.UIConfig +import com.atlan.pkg.config.model.ui.UIRule import com.atlan.pkg.config.model.ui.UIStep import com.atlan.pkg.config.model.workflow.WorkflowOutputs import com.atlan.pkg.config.widgets.DropDown @@ -24,12 +25,19 @@ object PackageConfig : CustomPackage( title = "Configuration", description = "Duplicate detection configuration", inputs = mapOf( + "glossary_name" to TextInput( + label = "Glossary name", + required = true, + help = "Name for the glossary where the duplicate sets of assets will be recorded and tracked.", + placeholder = "Duplicate assets", + grid = 4, + ), "qn_prefix" to TextInput( label = "Qualified name prefix", required = false, help = "Starting value for a qualifiedName that will determine which assets to check for duplicates.", placeholder = "default", - grid = 6, + grid = 4, ), "control_config_strategy" to Radio( label = "Options", @@ -43,7 +51,7 @@ object PackageConfig : CustomPackage( ), "asset_types" to DropDown( label = "Asset types", - required = true, + required = false, possibleValues = mapOf( "Table" to "Table", "View" to "View", @@ -56,6 +64,12 @@ object PackageConfig : CustomPackage( ), ), ), + rules = listOf( + UIRule( + whenInputs = mapOf("control_config_strategy" to "advanced"), + required = listOf("asset_types"), + ), + ), ), containerImage = "ghcr.io/atlanhq/csa-duplicate-detector:${Atlan.VERSION}", containerImagePullPolicy = "Always", diff --git a/samples/packages/duplicate-detector/src/test/kotlin/DuplicateDetectorTest.kt b/samples/packages/duplicate-detector/src/test/kotlin/DuplicateDetectorTest.kt new file mode 100644 index 0000000000..eec479f643 --- /dev/null +++ b/samples/packages/duplicate-detector/src/test/kotlin/DuplicateDetectorTest.kt @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: Apache-2.0 + Copyright 2023 Atlan Pte. Ltd. */ +import com.atlan.Atlan +import com.atlan.model.assets.Glossary +import com.atlan.model.assets.GlossaryTerm +import com.atlan.model.assets.MaterializedView +import com.atlan.model.assets.Table +import com.atlan.model.assets.View +import com.atlan.model.enums.AtlanDeleteType +import com.atlan.model.enums.CertificateStatus +import com.atlan.pkg.PackageTest +import org.testng.Assert.assertTrue +import org.testng.ITestContext +import org.testng.annotations.AfterClass +import org.testng.annotations.BeforeClass +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertNotNull + +/** + * Test detection of duplicate assets. + */ +class DuplicateDetectorTest : PackageTest() { + + private val testId = makeUnique("dupdetect") + private val files = listOf( + "debug.log", + ) + private val glossaryName = testId + + @BeforeClass + fun beforeClass() { + setup( + DuplicateDetectorCfg( + glossaryName = glossaryName, + qnPrefix = "default/snowflake", + controlConfigStrategy = "default", + assetTypes = listOf(Table.TYPE_NAME, View.TYPE_NAME, MaterializedView.TYPE_NAME), + ), + ) + DuplicateDetector.main(arrayOf()) + } + + @Test + fun glossaryCreated() { + val glossary = Glossary.findByName(glossaryName) + assertNotNull(glossary) + assertEquals(testId, glossary.name) + } + + @Test + fun termsCreated() { + val glossaryQN = Glossary.findByName(glossaryName).qualifiedName!! + val terms = GlossaryTerm.select() + .where(GlossaryTerm.ANCHOR.eq(glossaryQN)) + .includeOnResults(GlossaryTerm.DESCRIPTION) + .includeOnResults(GlossaryTerm.ASSIGNED_ENTITIES) + .includeOnResults(GlossaryTerm.CERTIFICATE_STATUS) + .stream() + .toList() + assertTrue(terms.size > 0) + terms.forEach { term -> + term as GlossaryTerm + assertTrue(term.name.startsWith("Dup. (")) + assertTrue(term.assignedEntities.size > 0) + assertTrue(term.description.startsWith("Assets with the same set of")) + assertEquals(CertificateStatus.DRAFT, term.certificateStatus) + } + } + + @Test + fun filesCreated() { + validateFilesExist(files) + } + + @Test + fun errorFreeLog() { + validateErrorFreeLog() + } + + @AfterClass(alwaysRun = true) + fun afterClass(context: ITestContext) { + val glossary = Glossary.findByName(glossaryName)!! + val terms = GlossaryTerm.select() + .where(GlossaryTerm.ANCHOR.eq(glossary.qualifiedName)) + .stream() + .map { it.guid } + .toList() + Atlan.getDefaultClient().assets.delete(terms, AtlanDeleteType.PURGE) + Glossary.purge(Glossary.findByName(glossaryName).guid) + teardown(context.failedTests.size() > 0) + } +}