From ef9b9fa5f7fd1cdfd8d6381bb26f72aeebc71b85 Mon Sep 17 00:00:00 2001 From: Daniel Tomes <10840635+GeekSheikh@users.noreply.github.com> Date: Sun, 15 Aug 2021 18:55:44 -0400 Subject: [PATCH] Enable streaming (#24) * enable streaming * scaffolding for simpleExpr validation * completed refactor -- tests outstanding * refactor and enablement complete * updated readme * added implicit boolean * added filter for summary report * Update ValidatorTestSuite (#19) * Update Validator tests with API changes. * Add tests for implicit and explicit expression rules. * imported outstanding spark sql functions * Add test suite for Rules class. * Add tests for RuleSet class. * Add test for complex expressions on aggregates. * Fix isGrouped bug when groupBys array is empty by default or explicitly set. * Fix overloaded add function that merges 2 RuleSets. * Add ignoreCase and invertMatch to ValidateStrings and ValidateNumerics rule types. * Update documentation with latest features in categorical Rules. Co-authored-by: Daniel Tomes [GeekSheikh] <10840635+geeksheikh@users.noreply.github.com> * Update sbt (#23) * simple update to build sbt * Add scoverage. Co-authored-by: Will Girten * removed unused imports * Accept expanded sequence of Rules to RuleSet Class. * cleaning up (#30) * cleaning up * removed dependencies from assembly * Fix whitespaces and special characters in Rule Names (#25) * Parse white spaces and special characters in failure report. * Update variable name with more meaningful name. * Add method to remove whitespace and special characters from Rule names. * Simplify ruleName public accessor. * Change special character replacement to underscores. * Update warning messages and assign private ruleName only once. * Update demo notebook (#33) * Update demo notebook with examples of latest features added. * added scala demo example Co-authored-by: Daniel Tomes [GeekSheikh] <10840635+geeksheikh@users.noreply.github.com> * implemented new inclusive boundaries option (#32) * implemented new inclusive boundaries option * enhanced logic for upper and lower inclusivity * readme updated * Update validation logic for Bounds class. Add test case for inclusive boundary rules. (#35) Co-authored-by: Will Girten <47335283+goodwillpunning@users.noreply.github.com> Co-authored-by: Will Girten <47335283+goodwillpunning@users.noreply.github.com> Co-authored-by: Will Girten --- README.md | 314 +++++++-- build.sbt | 29 +- codecov.yml | 7 + demo/Example.scala | 300 ++++++-- demo/Rules_Engine_Examples.dbc | Bin 5198 -> 55766 bytes demo/Rules_Engine_Examples.html | 49 +- project/plugins.sbt | 1 + .../com/databricks/labs/validation/Rule.scala | 255 ++++--- .../databricks/labs/validation/RuleSet.scala | 204 +++--- .../databricks/labs/validation/RuleType.scala | 1 + .../labs/validation/Validator.scala | 208 ++---- .../labs/validation/utils/Helpers.scala | 18 - .../labs/validation/utils/MinMaxFunc.scala | 18 - .../utils/SparkSessionWrapper.scala | 1 + .../labs/validation/utils/Structures.scala | 28 +- .../labs/validation/RuleSetTestSuite.scala | 187 +++++ .../labs/validation/RuleTestSuite.scala | 111 +++ .../labs/validation/ValidatorTestSuite.scala | 659 ++++++++++++++---- 18 files changed, 1702 insertions(+), 688 deletions(-) create mode 100644 codecov.yml delete mode 100644 src/main/scala/com/databricks/labs/validation/utils/Helpers.scala delete mode 100644 src/main/scala/com/databricks/labs/validation/utils/MinMaxFunc.scala create mode 100644 src/test/scala/com/databricks/labs/validation/RuleSetTestSuite.scala create mode 100644 src/test/scala/com/databricks/labs/validation/RuleTestSuite.scala diff --git a/README.md b/README.md index 1648c26..9a04923 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ [![Scala CI](https://github.com/databrickslabs/dataframe-rules-engine/actions/workflows/scala.yml/badge.svg?branch=master)](https://github.com/databrickslabs/dataframe-rules-engine/actions/workflows/scala.yml) [![codecov](https://codecov.io/gh/databrickslabs/dataframe-rules-engine/branch/master/graph/badge.svg?token=6DEXO6I0BG)](https://codecov.io/gh/databrickslabs/dataframe-rules-engine) # dataframe-rules-engine -Simplified Validation for Production Workloads +Simplified Validation at scale for Production Spark Workloads on streaming / standard DataFrames and DataSets ## Project Description As pipelines move from bronze to gold, it's very common that some level of governance be performed in @@ -23,13 +23,13 @@ move the data to production and/or in-line (coming soon). ## Getting Started Add [the dependency](https://mvnrepository.com/artifact/com.databricks.labs/dataframe-rules-engine_2.12) to your build.sbt or pom.xml -`libraryDependencies += "com.databricks.labs" %% "dataframe-rules-engine" % "0.1.2"` +`libraryDependencies += "com.databricks.labs" %% "dataframe-rules-engine" % "0.2.0"` ``` com.databricks.labs dataframe-rules-engine_2.12 - 0.1.2 + 0.2.0 ``` @@ -44,50 +44,160 @@ import com.databricks.labs.validation.utils.Structures._ import com.databricks.labs.validation._ ``` -As of version 0.1 There are three primary rule types -* Boundary Rules -* Categorical Rules (Strings and Numerical) -* Date Rules (in progress) +## Streaming Update +As of version 0.2 streaming dataframes are fully supported -Rules can be composed of: -* simple column references `col("my_column_name")` -* complex columns `col("Revenue") - col("Cost")` -* aggregate columns `min("ColumnName")` +## Quickstart +The basic steps to validating data with the rules engine are: +* create rules +* create ruleset +* validate -Rules can be applied to simple DataFrames or grouped Dataframes. To use a grouped dataframe simply pass -your dataframe into the RuleSet and pass one or more columns in as `by` columns. This will apply the rule -at the group level which can be helpful at times. +Below are some examples to demonstrate the basic process. + +```scala +val myRules = ??? // Definition of my base rules +val myAggRules = ??? // Definition of my agg rules +val validationResults = RuleSet(df) + .add(myRules) + .validate() + +// or for validation executed at a grouped level +val validationResults = RuleSet(df, by = "myGroup") + .add(myAggRules) + .validate() + +// grouping across multiple columns +val validationResults = RuleSet(df, by = Array("myGroupA", "myGroupB")) + .add(myAggRules) + .validate() +``` + +## Rules + +There are four primary rule types +* [Simple Rules](#simple-rule) +* [Boundary Rules](#boundary-rules) +* [Implicit Boolean rules](#implicit-boolean-rules) +* [Categorical Rule](#categorical-rules) + +These rule types can be applied to: +* Streaming Datasets +* Stateful Streaming Datasets +* Grouped Datasets + * Important distinction as the rules apply only within the range of the grouped keys when a grouped dataset + is passed for testing ### Simple Rule -`val validateRetailPrice = Rule("Retail_Price_Validation", col("retail_price"), Bounds(0.0, 6.99))` +A rule with a name, a check column, and an allowed value +```scala +Rule("Require_specific_version", col("version"), lit(0.2)) +Rule("Require_version>=0.2", col("version") >= 0.2, lit(true)) +``` -### List of Rules -NOTE: While validations can be performed on aggregate cols (whether the DF is grouped or not) aggregate columns -only return a single value - as such the failed count will be set to 1 for failures so for aggregate columns -the `Invalid_Count` is rendered somewhat useless. Better granularity can be seen in the report when not using -aggregates. +### Implicit Boolean Rules +These rules are the same as columnar expression based rules except they don't require the comparison against `lit(true)`. +A type validation is done on the column before validation begins to ensure that the resolved expression resolves to +a boolean type. ```scala -val specializedRules = Array( - // Example of aggregate column - Rule("Reasonable_sku_counts", count(col("sku")), Bounds(lower = 20.0, upper = 200.0)), - // Example of calculated column from optimized UDF - Rule("Max_allowed_discount", - max(getDiscountPercentage(col("retail_price"), col("scan_price"))), - Bounds(upper = 90.0)), - // Example distinct values rule - Rule("Unique_Skus", countDistinct("sku"), Bounds(upper = 1.0)) +// Passes where result is true +Rule("Require_version>=0.2", col("version") >= 0.2) +Rule("Require_version>=0.2", col("myDFBooleanCol")) +``` + +Note that the following is true, conceptually, since the implicit boolean compares against an implicit true. This +just means that when you're using simple rules that resolve to true or false, you don't have to state it explicitly. +```scala +Rule("Require_version>=0.2", col("version") >= 0.2, lit(true)) == Rule("Require_version>=0.2", col("version") >= 0.2) +``` + +### Boundary Rules +* Boundary Rules + + **Example:** `Rule("Longitude Range", col("longitude"), Bounds(-180, 180, lowerInclusive = true, upperInclusive = true))` + * Rules that fail if the input column is outside of the specified boundaries + * `Bounds(lower = 0.0)` **FAILS** when value >= 0.0 + * Boundary rules are created when the validation is of type `Bounds()`. + * The default Bounds() are `Bounds(lower: Double = Double.NegativeInfinity, upper: Double = Double.PositiveInfinity, + lowerInclusive: Boolean = false, upperInclusive: Boolean = false)`; + therefore, if a lower / upper is not specified, any numeric value will pass. + * **Inclusive Vs Exclusive:** When `Bounds` are defined, the user can decide whether to make a Boundary + inclusive or exclusive. The **default** is exclusive. + * **Exclusive Example:** `Bounds(0.0, lowerInclusive = true)` **FAILS** when 0.0 > value + * **Exclusive Example:** `Bounds(0.0, 10.0)` **FAILS** when 0.0 >= value <= 10.0 + * **Inclusive Example:** `Bounds(0.0, 10.0, lowerInclusive = true, upperInclusive = true)` **FAILS** when 0.0 > value < 10.0 + * **Mixed Inclusion Example:** `Bounds(0.0, 10.0, lowerInclusive = true)` **FAILS** when 0.0 > value <= 10.0 + * Grouped Boundaries often come in pairs, to minimize the amount of rules that must be manually created, helper logic + was created to define [MinMax Rules](#minmax-rules) +* Categorical Rules (Strings and Numerical) + * Rules that fail if the result of the input column is not in a list of values +* Expression Rules + * Rules that fail if the input column != defined expression + +#### Additional Boundary Rule Examples +**Non-grouped RuleSet** - Passes when the retail_price in a record is exclusive between the Bounds +```scala +// Passes when retail_price > 0.0 AND retail_price < 6.99 +Rule("Retail_Price_Validation", col("retail_price"), Bounds(0.0, 6.99)) +// Passes when retail_price >= 0.0 AND retail_price <= 6.99 +Rule("Retail_Price_Validation", col("retail_price"), Bounds(0.0, 6.99, lowerInclusive = true, upperInclusive = true)) +// Passes when retail_price > 0.0 +Rule("Retail_Price_GT0", col("retail_price"), Bounds(lower = 0.0)) +// Passes when retail_price >= 0.0 +Rule("Retail_Price_GT0", col("retail_price"), Bounds(lower = 0.0, lowerInclusive = true)) +``` +**Grouped RuleSet** - Passes when the minimum value in the group is within (exclusive) the boundary +```scala +// max(retail_price) > 0.0 +Rule("Retail_Price_Validation", col("retail_price"), Bounds(lower = 0.0)) +// min(retail_price) > 0.0 && min(retail_price) < 1000.0 within the group +Rule("Retail_Price_Validation", col("retail_price"), Bounds(0.0, 1000.0)) +``` + +### Categorical Rules +There are two types of categorical rules which are used to validate against a pre-defined list of valid +values. As of 0.2 accepted categorical types are String, Double, Int, Long but any types outside of this can +be input as an array() column of any type so long as it can be evaluated against the input column. + +```scala +val catNumerics = Array( +Rule("Valid_Stores", col("store_id"), Lookups.validStoreIDs), +Rule("Valid_Skus", col("sku"), Lookups.validSkus), +Rule("Valid_zips", array_contains(col("zips"), expr("x -> f(x)")), lit(true)) +) + +val catStrings = Array( +Rule("Valid_Regions", col("region"), Lookups.validRegions) ) ``` +An optional `ignoreCase` parameter can be specified when evaluating against a list of String values to ignore or apply +case-sensitivity. By default, input columns will be evaluated against a list of Strings with case-sensitivity applied. +```scala +Rule("Valid_Regions", col("region"), Lookups.validRegions, ignoreCase=true) +``` + +Furthermore, the evaluation of categorical rules can be inverted by specifying `invertMatch=true` as a parameter. +This can be handy when defining a Rule that an input column cannot match list of invalid values. For example: +```scala +Rule("Invalid_Skus", col("sku"), Lookups.invalidSkus, invertMatch=true) +``` + ### MinMax Rules -It's very common to build rules to validate min and max allowable values so there's a helper function -to speed up this process. It really only makes sense to use minmax when specifying both an upper and a lower bound -in the Bounds object. Using this method in the example below will only require three lines of code instead of the 6 -if each rule was built manually +This is not considered a rule type as it isn't actually a rule type but rather a helper that builds in-between +rules for you when validating grouped datasets with agg functions. + +It's very common to build rules on a grouped dataset to validate some upper and lower boundary within a group +so there's a helper function to speed up this process. +It really only makes sense to use minmax when specifying both an upper and a lower bound on a grouped dataset as +otherwise it's magically handled for you and it doesn't make sense. + +Using this method in the example below will only require three lines of code instead of the 6 if each rule were built manually. +The same inclusive / exclusive overrides are available here as defined above. ```scala val minMaxPriceDefs = Array( MinMaxRuleDef("MinMax_Sku_Price", col("retail_price"), Bounds(0.0, 29.99)), - MinMaxRuleDef("MinMax_Scan_Price", col("scan_price"), Bounds(0.0, 29.99)), + MinMaxRuleDef("MinMax_Scan_Price", col("scan_price"), Bounds(0.0, 29.99, upperInclusive = true)), MinMaxRuleDef("MinMax_Cost", col("cost"), Bounds(0.0, 12.0)) ) @@ -97,52 +207,130 @@ val minMaxPriceRules = RuleSet.generateMinMaxRules(minMaxPriceDefs: _*) OR -- simply add the list of minmax rules or simple individual rule definitions to an existing RuleSet (if not using builder pattern) ```scala -val someRuleSet = RuleSet(df) +val someRuleSet = RuleSet(df, by = "region_id") someRuleSet.addMinMaxRules(minMaxPriceDefs: _*) someRuleSet.addMinMaxRules("Retail_Price_Validation", col("retail_price"), Bounds(0.0, 6.99)) ``` -### Categorical Rules -There are two types of categorical rules which are used to validate against a pre-defined list of valid -values. Currently (as of 0.1) accepted categorical types are String, Double, Int, Long +Without minMax ```scala -val catNumerics = Array( -Rule("Valid_Stores", col("store_id"), Lookups.validStoreIDs), -Rule("Valid_Skus", col("sku"), Lookups.validSkus) +import com.databricks.labs.validation.RuleSet +val validationReport = RuleSet(df, by = "region_id") + .add(Rule("Min_Sku_Price", min(col("retail_price")), Bounds(0.0))) + .add(Rule("Max_Sku_Price", max(col("retail_price")), Bounds(29.99, upperInclusive = true))) +// PLUS 4 more rules. +//.add(Rule(...)) +//.add(Rule(...)) +//.add(Rule(...)) +//.add(Rule(...)) +``` + +## Lists of Rules +A list of rules can be created as an Array and added to a RuleSet to simplify Rule management. It's very common +for more complex sets of rules to be rolled up and packaged by business group / region / etc. These are also +commonly packaged into logical structures (like case classes) and unrolled later and then unpacked into the right +rule sets. This is made easy through the ability to add lists of rules in various ways. +```scala +val specializedRules = Array( + // Example of aggregate column + Rule("Reasonable_sku_counts", count(col("sku")), Bounds(lower = 20.0, upper = 200.0)), + // Example of calculated column from catalyst UDF def getDiscountPercentage(retailPrice: Column, scanPrice: Column): Column = ??? + Rule("Max_allowed_discount", + max(getDiscountPercentage(col("retail_price"), col("scan_price"))), + Bounds(upper = 90.0)), + // Example distinct values rule + Rule("Unique_Skus", countDistinct("sku"), Bounds(upper = 1.0)) ) +RuleSet(df, by = "store").add(specializedRules) +``` +Common Real World Example +```scala +case class GlobalRules(regionID: Int, bu: String, subOrg: String, rules: Array[Rule]*) +// a structure like this will be fed from all over the world with their own specific rules that can all be tested +// on the global source of truth +``` -val catStrings = Array( -Rule("Valid_Regions", col("region"), Lookups.validRegions) +## Constructing the Check Column +So far, we've only discussed simple column references as the input column, but remember, a column is just an +expression and thus, the check column can actually be a check expression +* simple column references `col("my_column_name")` +* complex columns `col("Revenue") - col("Cost")` +* aggregates `min("ColumnName")` + * It can be confusing to mix aggregate and non-aggregate aggregate input columns. It's generally better to create two Rule Sets + * If any of the rules' input columns are aggregates and no groupBy columns are provided + into the RuleSet the dataframe will be grouped by all df columns. + +## Grouped Datasets +Rules can be applied to simple DataFrames or grouped Dataframes. To use a grouped dataframe simply pass +your dataframe into the RuleSet and pass one or more columns in as `by` columns. This will apply the rule +at the group level which can be helpful at times. Any input column expressions passed into a RuleSet must be able +to be evaluated inside of the `.agg()` of a `groupedDataframe` +```scala +RuleSet(df, by = "region_id") +// +RuleSet(df, by = Seq("region_id", "store_id")) +``` + +Below shows a more, real-world example of validating a dataset and another way to instantiate a RuleSet. +```scala +def momValue(c: Column): Column = coalesce(lag(c, 1).over(regionalTimeW), c) / c + +val regionalTimeW = Window.partitionBy(col("region_id")).orderBy(col("year"), col("month")) +val regionalRules = Array( + // No region has more than 42 stores, thus 100 is a safe fat-finger check number + Rule("storeCount", countDistinct(col("store_id")), Bounds(0, 100, inclusiveLower = true)), + // month over month sales should be pretty stable within the region, if it's not, flag for review + Rule("momSalesIncrease", momValue(col("total_sales")), Bounds(0.25, 4.0), inclusiveLower = true) ) +RuleSet(df, regionalRules, by = "region_id") ``` + ### Validation Now that you have some rules built up... it's time to build the ruleset and validate it. As mentioned above, -the dataframe can be simple or groupBy column[s] can be passed in (as string) to perform validation at the -grouped level. +the dataframe can be a simple df or a grouped df by passing column[s] to perform validation at the +defined grouped level. + +The below is meant as a theoretical example, it will not execute because rules containing aggregate input columns +AND non-aggregate input columns are defined throughout the rules added to the RuleSet. In practice if rules need to +be validated at different levels, it's best to complete a validation at each level with a RuleSet at that level. ```scala -val (rulesReport, passed) = RuleSet(df) +val validationResults = RuleSet(df) .add(specializedRules) .add(minMaxPriceRules) .add(catNumerics) .add(catStrings) .validate() -val (rulesReport, passed) = RuleSet(df, Array("store_id")) +val validationResults = RuleSet(df, Array("store_id")) .add(specializedRules) .add(minMaxPriceRules) .add(catNumerics) .add(catStrings) .validate() ``` -The validation returns two items, a boolean (true/false) as to whether all rules passed or not. If a single rule -fails the `passed` value above will return false. The `rulesReport` is a summary of which rules failed and, -if the input column was not an aggregate column, the number of failed records. An image of the report is below. -![Alt Text](images/rulesReport.png) + +The `validate()` method returns a case class of ValidationResults which is defined as: +```scala +ValidationResults(completeReport: DataFrame, summaryReport: DataFrame) +``` +AS you can see, there are two reports included, a `completeReport` and a `summaryReport`. +#### The completeReport +`validationResults.completeReport.show()` + +The complete report is verbose and will add all rule validations to the right side of the original df +passed into RuleSet. Note that if the RuleSet is grouped, the result will include the groupBy columns and all rule +evaluation specs and results + +#### The summaryReport +`validationResults.summaryReport.show()` + +The summary report is meant to be just that, a summary of the failed rules. This will return only the records that +failed and only the rules that failed for that record; thus, if the `summaryReport.isEmpty` then all rules passed. ## Next Steps Clearly, this is just a start. This is a small package and, as such, a GREAT place to start if you've never -contributed to a project before. Please feel free to fork the repo and/or submit PRs. I'd love to see what +contributed to a project before. Please feel free to fork the repo and/or submit PRs. We'd love to see what you come up with. If you're not much of a developer or don't have the time you can still contribute! Please post your ideas in the issues and label them appropriately (i.e. bug/enhancement) and someone will review it and add it as soon as possible. @@ -150,23 +338,7 @@ and add it as soon as possible. Some ideas of great adds are: * Add a Python wrapper * Enable an external table to host the rules and have rules compiled from externally managed source (GREAT idea from Sri Tikkireddy) -* Refactor Rule and/or Validator to implement an Abstract class or trait - * There's a clear opportunity to abstract away some of the redundancy between rule types. -* Implement a fast runner - * Optimize performance by failing fast for big data. Smart sampling could be implemented to review subsets - of columns/records and look for failures to enable a faster failure. -* Implement tests - * Yeah, I know...I should have done this on day 0...but...time is always an issue. I plan to come back and add - tests but if you'd like to add tests, that's a great way to learn code base (especially one this small) -* Implement the date time rule (or somet other custom rule) - * The date time rule has already been scaffolded, it just needs to be built out - * What kind of complex rules does your business require that isn't possible here -* Add a quarantine pattern - * Enable a configuration to a Ruleset to identify records that didn't pass the validations and add - them to a predefined quarantine zone. -* Add logic to attempt to auto-handle certain types of failures based on common business patterns -* When Delta Pipelines feature is release, simplify this package by wrapping the logic with pipelines. - +* Implement smart sampling for large datasets and faster validation ## Legal Information This software is provided as-is and is not officially supported by Databricks through customer technical support channels. @@ -175,9 +347,7 @@ Please see the [legal agreement](LICENSE.txt) and understand that issues with th not be answered or investigated by Databricks Support. ## Core Contribution team -* Lead Developer: [Daniel Tomes](https://www.linkedin.com/in/tomes/), Practice Leader, Databricks -* Developer: your name here Contribute to the project - +* Lead Developer: [Daniel Tomes](https://www.linkedin.com/in/tomes/), Principal Architect, Databricks ## Project Support Please note that all projects in the /databrickslabs github account are provided for your exploration only, diff --git a/build.sbt b/build.sbt index b7ceded..cac2193 100644 --- a/build.sbt +++ b/build.sbt @@ -2,7 +2,7 @@ name := "dataframe-rules-engine" organization := "com.databricks.labs" -version := "0.1.2" +version := "0.2.0" scalaVersion := "2.12.12" scalacOptions ++= Seq("-Xmax-classfile-name", "78") @@ -23,21 +23,23 @@ publishTo := Some( libraryDependencies += "org.apache.spark" %% "spark-core" % "3.0.1" % Provided libraryDependencies += "org.apache.spark" %% "spark-sql" % "3.0.1" % Provided -libraryDependencies += "org.scalactic" %% "scalactic" % "3.2.6" libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.6" % Test +run in Compile := Defaults.runTask(fullClasspath in Compile, mainClass in(Compile, run), runner in(Compile, run)).evaluated +runMain in Compile := Defaults.runMainTask(fullClasspath in Compile, runner in(Compile, run)).evaluated + lazy val excludes = jacocoExcludes in Test := Seq() -lazy val jacoco = jacocoReportSettings in test :=JacocoReportSettings( +lazy val jacoco = jacocoReportSettings in test := JacocoReportSettings( "Jacoco Scala Example Coverage Report", None, - JacocoThresholds (branch = 100), + JacocoThresholds(branch = 100), Seq(JacocoReportFormats.ScalaHTML, JacocoReportFormats.CSV), "utf-8") val jacocoSettings = Seq(jacoco) -lazy val jse = (project in file (".")).settings(jacocoSettings: _*) +lazy val jse = (project in file(".")).settings(jacocoSettings: _*) fork in Test := true javaOptions ++= Seq("-Xms512M", "-Xmx2048M", "-XX:+CMSClassUnloadingEnabled") @@ -45,7 +47,22 @@ testOptions in Test += Tests.Argument(TestFrameworks.ScalaTest, "-oD") lazy val commonSettings = Seq( - version := "0.1.2", + version := "0.2.0", organization := "com.databricks.labs", scalaVersion := "2.12.12" ) + +assemblyMergeStrategy in assembly := { + case PathList("META-INF", xs@_*) => MergeStrategy.discard + case x => MergeStrategy.first +} +assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false) + +// exclude scala-library dependency +assemblyExcludedJars in assembly := { + val cp = (fullClasspath in assembly).value + cp filter { f => + f.data.getName.contains("spark-core") || + f.data.getName.contains("spark-sql") + } +} \ No newline at end of file diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..ac41890 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,7 @@ +ignore: + - "src/test/**/*" + - "target/**/*" + - "images/**/*" + - "project/**/*" + - ".github/**/*" + - "src/main/scala/com/databricks/labs/validation/utils/SparkSessionWrapper.scala" \ No newline at end of file diff --git a/demo/Example.scala b/demo/Example.scala index ec278ed..ff4ffbb 100644 --- a/demo/Example.scala +++ b/demo/Example.scala @@ -1,65 +1,24 @@ -package com.databricks.labs.validation - -import com.databricks.labs.validation.utils.{Lookups, SparkSessionWrapper} +// Databricks notebook source import com.databricks.labs.validation.utils.Structures._ -import org.apache.spark.sql.Column +import com.databricks.labs.validation._ import org.apache.spark.sql.functions._ +import org.apache.spark.sql.{Column, DataFrame} + +// COMMAND ---------- + +// MAGIC %md +// MAGIC # Sample Dataset + +// COMMAND ---------- -object Example extends App with SparkSessionWrapper { - import spark.implicits._ - - /** - * Validation example - * Passing pre-built array of rules into a RuleSet and validating a non-grouped dataframe - */ - - /** - * Example of a proper UDF to simplify rules logic. Simplification UDFs should take in zero or many - * columns and return one column - * @param retailPrice column 1 - * @param scanPrice column 2 - * @return result column of applied logic - */ - def getDiscountPercentage(retailPrice: Column, scanPrice: Column): Column = { - (retailPrice - scanPrice) / retailPrice - } - - // Example of creating array of custom rules - val specializedRules = Array( - Rule("Reasonable_sku_counts", count(col("sku")), Bounds(lower = 20.0, upper = 200.0)), - Rule("Max_allowed_discount", - max(getDiscountPercentage(col("retail_price"), col("scan_price"))), - Bounds(upper = 90.0)), - Rule("Retail_Price_Validation", col("retail_price"), Bounds(0.0, 6.99)), - Rule("Unique_Skus", countDistinct("sku"), Bounds(upper = 1.0)) - ) - - // It's common to generate many min/max boundaries. These can be generated easily - // The generator function can easily be extended or overridden to satisfy more complex requirements - val minMaxPriceDefs = Array( - MinMaxRuleDef("MinMax_Sku_Price", col("retail_price"), Bounds(0.0, 29.99)), - MinMaxRuleDef("MinMax_Scan_Price", col("scan_price"), Bounds(0.0, 29.99)), - MinMaxRuleDef("MinMax_Cost", col("cost"), Bounds(0.0, 12.0)) - ) - - val minMaxPriceRules = RuleSet.generateMinMaxRules(minMaxPriceDefs: _*) - val someRuleSet = RuleSet(df) - someRuleSet.addMinMaxRules(minMaxPriceDefs: _*) - someRuleSet.addMinMaxRules("Retail_Price_Validation", col("retail_price"), Bounds(0.0, 6.99)) - - - val catNumerics = Array( - Rule("Valid_Stores", col("store_id"), Lookups.validStoreIDs), - Rule("Valid_Skus", col("sku"), Lookups.validSkus) - ) - - val catStrings = Array( - Rule("Valid_Regions", col("region"), Lookups.validRegions) - ) - - //TODO - validate datetime - // Test, example data frame - val df = sc.parallelize(Seq( +object Lookups { + final val validStoreIDs = Array(1001, 1002) + final val validRegions = Array("Northeast", "Southeast", "Midwest", "Northwest", "Southcentral", "Southwest") + final val validSkus = Array(123456, 122987, 123256, 173544, 163212, 365423, 168212) + final val invalidSkus = Array(9123456, 9122987, 9123256, 9173544, 9163212, 9365423, 9168212) +} + +val df = sc.parallelize(Seq( ("Northwest", 1001, 123456, 9.32, 8.99, 4.23, "2020-02-01 00:00:00.000"), ("Northwest", 1001, 123256, 19.99, 16.49, 12.99, "2020-02-01"), ("Northwest", 1001, 123456, 0.99, 0.99, 0.10, "2020-02-01"), @@ -75,19 +34,218 @@ object Example extends App with SparkSessionWrapper { .withColumn("create_ts", 'create_ts.cast("timestamp")) .withColumn("create_dt", 'create_ts.cast("date")) - // Doing the validation - // The validate method will return the rules report dataframe which breaks down which rules passed and which - // rules failed and how/why. The second return value returns a boolean to determine whether or not all tests passed -// val (rulesReport, passed) = RuleSet(df, Array("store_id")) - val (rulesReport, passed) = RuleSet(df) - .add(specializedRules) - .add(minMaxPriceRules) - .add(catNumerics) - .add(catStrings) - .validate(2) +// COMMAND ---------- + +display(df) + +// COMMAND ---------- + +// MAGIC %md +// MAGIC # Rule Types +// MAGIC There are several Rule types available: +// MAGIC +// MAGIC 1. Categorical (numerical and string) - used to validate if row values fall in a pre-defined list of values, e.g. lookups +// MAGIC 2. Boundaries - used to validate if row values fall within a range of numerical values +// MAGIC 3. Expressions - used to validate if row values pass expressed conditions. These can be simple expressions like a Boolean column `col('valid')`, or complex, like `col('a') - col('b') > 0.0` + +// COMMAND ---------- + +// MAGIC %md +// MAGIC ### Example 1: Writing your first Rule +// MAGIC Let's look at a very simple example... + +// COMMAND ---------- + +// First, begin by defining your RuleSet by passing in your input DataFrame +val myRuleSet = RuleSet(df) + +// Next, define a Rule that validates that the `store_id` values fall within a list of pre-defined Store Ids +val validStoreIdsRule = Rule("Valid_Store_Ids_Rule", col("store_id"), Array(1001, 1002)) - rulesReport.show(200, false) -// rulesReport.printSchema() +// Finally, add the Rule to the RuleSet and validate! +val validationResults = myRuleSet.add(validStoreIdsRule).validate() +// COMMAND ---------- +// MAGIC %md +// MAGIC ## Viewing the Validation Results +// MAGIC +// MAGIC The result from calling `validate()` on your RuleSet will be 2 DataFrames - a complete report and a summary report. +// MAGIC +// MAGIC #### The completeReport +// MAGIC The complete report is verbose and will add all rule validations to the right side of the original df +// MAGIC passed into RuleSet. Note that if the RuleSet is grouped, the result will include the groupBy columns and all rule +// MAGIC evaluation specs and results +// MAGIC +// MAGIC #### The summaryReport +// MAGIC The summary report is meant to be just that, a summary of the failed rules. This will return only the records that +// MAGIC failed and only the rules that failed for that record; thus, if the `summaryReport.isEmpty` then all rules passed. + +// COMMAND ---------- + +// Let's look at the completeReport from the example above +display(validationResults.completeReport) + +// COMMAND ---------- + +// MAGIC %md +// MAGIC ## Example 2: Boundaries +// MAGIC Boundary Rules can be used to validate if row values fall within a range of numerical values. +// MAGIC +// MAGIC It's quite common to generate many min/max boundaries and can be passed as an Array of Rules. + +// COMMAND ---------- + +// Let's define several Boundary Rules to apply +val minMaxPriceDefs = Array( + MinMaxRuleDef("MinMax_Sku_Price", col("retail_price"), Bounds(0.0, 29.99)), + MinMaxRuleDef("MinMax_Scan_Price", col("scan_price"), Bounds(0.0, 29.99)), + MinMaxRuleDef("MinMax_Cost", col("cost"), Bounds(0.0, 12.0)) +) + +// Add all the Rules at once using the array of Rules +val minMaxPriceRules = RuleSet(df).addMinMaxRules(minMaxPriceDefs: _*) + +// Validate rows against all the Boundary Rules +val validationResults = minMaxPriceRules.validate() + +// Let's look at the failed rows this time +display(validationResults.summaryReport) + +// COMMAND ---------- + +// MAGIC %md +// MAGIC ## Example 3: Expressions +// MAGIC Expressions can used to validate if row values pass expressed conditions. +// MAGIC +// MAGIC These can be simple expressions like a Boolean column `col('valid')`, or complex, like `col('a') - col('b') > 0.0` + +// COMMAND ---------- + +// Ensure that each product has a distinct Product SKU +val distinctProductsRule = Rule("Unique_Skus", countDistinct("sku"), Bounds(upper = 1.0)) + +// Rules can even be used in conjunction with user defined functions +def getDiscountPercentage(retailPrice: Column, scanPrice: Column): Column = { + (retailPrice - scanPrice) / retailPrice } + +val maxDiscountRule = Rule("Max_allowed_discount", + max(getDiscountPercentage(col("retail_price"), col("scan_price"))), + Bounds(upper = 90.0)) + +// Notice the builder patthern. The idea is to buld up your rules and then add them to your RuleSet[s]. +// RuleSets can be combined to using the RuleSet.add(ruleSet: RuleSet) method +var productRuleSet = RuleSet(df).add(distinctProductsRule) + .add(maxDiscountRule) + +// ...or add Rules together as an Array +val specializedProductRules = Array(distinctProductsRule, maxDiscountRule) +productRuleSet = RuleSet(df).add(specializedProductRules: _*) + +val validationResults = productRuleSet.validate() + +display(validationResults.summaryReport) + +// COMMAND ---------- + +// MAGIC %md +// MAGIC ### Inverting matches +// MAGIC We can even invert the match to validate row values do not fall in a list of values + +// COMMAND ---------- + +// Invert match to ensure values are **not** in a LOV +val invalidStoreIdsRule = Rule("Invalid_Store_Ids_Rule", col("store_id"), Array(9001, 9002, 9003), invertMatch = true) + +// COMMAND ---------- + +// MAGIC %md +// MAGIC ### Case-sensitivity +// MAGIC Case-sensitivity is enabled by default. However, an optional `ignoreCase` parameter can be used to apply/not apply case sensitivity to a list of String values + +// COMMAND ---------- + +// Numerical categorical rules. Build create a list of values to be validated against. +val catNumerics = Array( + // Only allow store_ids in my validStoreIDs lookup + Rule("Valid_Stores", col("store_id"), Lookups.validStoreIDs), + // Validate against a pre-built list of skus that have been verified to be accurate + // Currently this is manually created for demo but can easily be created from a dataframe, etc. + Rule("Valid_Skus", col("sku"), Lookups.validSkus), + // Ensure that the skus do not match any of the invalid skus defined earlier + Rule("Invalid_Skus", col("sku"), Lookups.invalidSkus, invertMatch=true) +) + +// Validate strings as well as numericals. They don't need to be in a separate array, it's just done here for demonstration +val catStrings = Array( + // Case-sensitivity is enabled by default. However, `ignoreCase` parameter can be used + // to apply/not apply case sensitivity to a list of String values + Rule("Valid_Regions", col("region"), Lookups.validRegions, ignoreCase=true) +) + +// COMMAND ---------- + +// MAGIC %md +// MAGIC # Aggregates +// MAGIC Dataframes can be simple or a Seq of columns can be passed in as "bys" for the DataFrame to be grouped by.
+// MAGIC If the dataframe is grouped validations will be per group + +// COMMAND ---------- + +// Grouped Dataframe +// Let's assume we want to perform validation by some grouping of one or many columns +val validationResults = RuleSet(df, Array("store_id")) + .add(specializedProductRules) + .add(minMaxPriceRules) + .add(catNumerics) + .add(catStrings) + .validate() + +display(validationResults.summaryReport) + +// COMMAND ---------- + +// MAGIC %md +// MAGIC ## Streaming DataFrames +// MAGIC Rules can be applied to streaming DataFrames, as well. + +// COMMAND ---------- + +val yellowTaxi = spark.readStream + .format("delta") + .option("maxBytesPerTrigger", (1024 * 1024 * 4).toString) + .load("/databricks-datasets/nyctaxi/tables/nyctaxi_yellow") + +// COMMAND ---------- + +val validPaymentTypes = Array("Cash", "Credit") +val rangeRules = Array( + MinMaxRuleDef("Pickup Longitude On Earth", 'pickup_longitude, Bounds(-180, 180)), + MinMaxRuleDef("Dropoff Longitude On Earth", 'dropoff_longitude, Bounds(-180, 180)), + MinMaxRuleDef("Pickup Latitude On Earth", 'pickup_latitude, Bounds(-90, 90)), + MinMaxRuleDef("Dropoff Latitude On Earth", 'dropoff_latitude, Bounds(-90, 90)), + MinMaxRuleDef("Realistic Passenger Count", 'passenger_count, Bounds(1, 10)) +) + +val taxiBaseRules = Array( + Rule("dropoff after pickup", (unix_timestamp('dropoff_datetime) * 1.05).cast("long") >= unix_timestamp('pickup_datetime)), + Rule("total is sum of parts", 'fare_amount + 'extra + 'mta_tax + 'tip_amount + 'tolls_amount, 'total_amount), + Rule("total greater than 0", 'total_amount > 0), + Rule("valid payment types", lower('payment_type), validPaymentTypes) +) + +val yellowTaxiReport = RuleSet(yellowTaxi) + .add(taxiBaseRules: _*) + .addMinMaxRules(rangeRules: _*) + .validate() + +// COMMAND ---------- + +display( + yellowTaxiReport.summaryReport +) + +// COMMAND ---------- + + diff --git a/demo/Rules_Engine_Examples.dbc b/demo/Rules_Engine_Examples.dbc index f4770a77b4ac2700be2b59dca5406415b3c175e9..f1331031198698d3389f8dfd5cc0517444417bfa 100644 GIT binary patch literal 55766 zcmV)6K*+yPO9KQH00;;O0A`U5Qvd(}000000000002=@V08({qWpiIeZf9w3WnV>j zVQp}1WpgfbV_|Gz{>lIV?7a(-CXhQ(mJ{d7Hi{0w4>znKFyYQ~nn}_S0 z-RirC`!|;lZ|=9d)$?b!@Al5=g?INaR_oX6%a_~L?)CcqFRR^u-mI?P++M=#c0c#R zzkPXk^XAp<+2X74S^x7sd>MbAp7l?(xLk)dd00QdlV4rmUhiIRVM)Rrzqxq`U&P;@ z;Bviucy$v5aO2^pf7o3Ab#ZyK-tB(-?8*8zT)We4{LQQF!#XY3-~Jxn zw7tK-yZ`P@SpNH;o}Aylxw*mLJ-uIFZqw!~eoH_6aCiTgtDCzY@S@?nx_@|j4JV5m z;hc5MdCz3r*{`lUkzxJ}zt~#jrQink!_Ce5^)7tr=U3~S-S+Is#r5sx`t}7r>h$9d|;yXV~@O z#ZO<}Z`Yf&koc?JpRRY;@CbB;ut&nLe}BEbU*BK8{0YC@>#M7;Z+F}K{B4I{e6_vV zK5X$9FK+HG);E7z-{U9Vgd??k__y#DScJtJ_)Ga)&|&FuNc&trz+F569rg0=hcBz& z;=R4R-fU6u<=tlZ`#;8mnN`*g^o6IL{q6nsuh-ij{tyn!)4MNUZNA=ovwodkahp`Izi``2er1;%y)jFrHk;*o zBiBpeuKeZ2l@OZACx1VCN7e4LYS%%fZ!gzkDYxQksf6WAFSmYqbtNu^@SDxm#nn93 zE^5_&ckv(F%ZJ6k1e^Be^=|REXSa(*I6Lc`#b49^uQ%U4+}&@#{%W`Q?c$&A@7F(l z&Kcupi|}vp#o*o8mr&RN90)4J+m>{8x(jvz1z@@Lv-Yf;d}9qm_{GvkN~r zm%YC2FYM{co-Xa_+MX`%>FS;?@ALJ8=6wd2U2LwxHFuY*a01siH`|-*|8M*GciaE` z9ByH;_`F#UTzC5Hl?+#REAP)1Y6Tzg?1^B4Etyy{zF_P;{`|^1R!F z3x9LH+r8d~uVu4HdwTCZi)oLg74&fd(=x_O^ZL?Xr%eU6c>5bLQo+W(di~bx)1F<4 zeCxdX*5A6af_8Y=@$o8r=2h}}dDOi~=dB68*n+Ec1>xshNfzh$=boxu`rF{|1dnU6 z1MT)a{Nly^-J91JKP|vq!KZ!k#p>bitN$53@;-h0QUB1(@u#re;$QBANx8mx{yJQ7 zi=xZ*ZTa)%9VxZTU~L|@&mVTE!WN6w57!SblPmCfB|lqymVa7Z29+7a4|%&h`QmNw z-#m=pA8gwe-a7D%A%r=cXS~^lYlOp7o0w;}o2&D&i<&*tpcMa{7X3VYKli~kJImkG zdCtd~T(dmBz zE-Bn-dF}L!c>VO!@T^b3y9h37bkV|_x2<2D-%o2YT_LQ^_C@r&rc0pnv+o*zdGnaJ zmm7b&&gSk-@{!;9F7j!4#6{qUJ?8CnQl@LSm!`>-{4=H-U;NkHX4!4=2iAOBK_cL8kavk?Oh|kbPdJ~mD&+cd5So(tR_@1!?_zc~zpZUCj*Y|GktX;i#d{6J+RngQtymwQ7 zXpE%#w}r1Ve7E=DYh@d`e(~a~q{Y9FKYT~ub^7sN2e1Dw#HZWczg!1j?2lWBI5ws9 zcgeMbj}JegSpt((dMwEhU+?~(-Q6vHVsbt=7gxJv;lEvjyOmzqXY?KNc31#*>ru*{!${2BY$29m3pZwDQntrLjG;41+Hu$CbVyP~J!oTnvwe%rw z@>Lp)WccnH)Y>b}gm#W$?{Xbr9p56Yp z1r(?k@Rr@P+ovzL_uEC-6N}w8-1hn=N#TQbi}hcFa)HCT6`H+?L~Nw%ka9- zF^3lZ8m^9+nlBbh*bAG*!`%YQnA^qm)#Co{2l)9-xM=u#Ag{hyFJ9komy{;D3EO>f zcZDxHTWnV^R*M@-8j027pYPt>Zq}d^-|-_LD;7WIetr953)h}*IKA%Ktz0dB_hYz? z-7Xd_e)htz*Spi7@zXy0;@{5}clS^c4)6W(EWIPWVEq{^Z2ap*`0GC|g7@+7qvE#t znH*lBe-EjtS*@L$Q~~>x9{8k&52uE=a&u|igNzzg!C?m*J<2%PYpY7X01S zu$TZf)?euhZI(hyzEs<*_0oB@Sqkg6(p~6FYwSEVY*xBd!@v2>;&->ZH$mqT?M!n_83 zcKaU)Mv0m?Zp8&@_l#3{b{l@R2&X*URlK`zxA#!tUBB3Vo??k_As{$k5MBj>gz8sc z&|kuh$9i@7zMv%Y8@^clW>JV@-SzE{2oDKs{A&Fp-Cn)C->iRp9!$&K58DkC9pfv) zI;ZP~cYQv7Fwom=k?4!FMf!DFDf;3IDs?sAkZ14m72%ikhr0(@iielm#l@TJn@zaF z>-9tU@%}cNsYN)(>&5jB?A*nho6X|Q>%~uZZ|)cO@!JJs9$p+Q=O+H;6}&FWVHLjH zKKyz2{VMN<@biwWUfAIm@#_w6c(V)2J>CPpY85W``F;BPIsNjBMTj9@-fdvr@97Mn z43|Cp4f1I5?W4{A4yzL1+kC0-x|plgD%fOLinRA1?!tE*mU02-^fn;^(yrOP-d zp@_fvc6yKrHZZ#Rvv#kA+xpp6*z-Z>i|7CSot`SgC8D`YnjCJk{$N6(Z9o44oIgrw zc5htmk{|KMEuhba`A)|UnDKmca`G%(x6qSM&d^Z(c^N|6aCZ+sLH(G0AKnGoocE9< zoP7re%+wv?9v+*5eDpmWAYFHWbvQ^a^3nHkfO5d$?%~57jYk&+4skFZoA5uxrhIJH z{g>Xur?mHf?X>swwyUr-!9kARCo*eKvN1;_bGh;3uUoFwII5^yHI zC_!hQ!5(=QKP0{YuK$Pl!&&{dxK+aM2yyX@V$o;$o}VFXqqrQTcD9@PVize{(_Ss+ z(BFQs?ft9khllohvu|#F`S2!UV1M>v!+ZPw?`Hr(3(?5+?d}iZ#q_Cv|5U#C70MSz zDZQ_JVKv`VzOa(Lhjs<7rTvsR`>&ZelUJ8(;kIh2Y_Rc)ZTQkN&6Z-jR(5N&Q|jW~ z6K66f&el#|Y?QxPdd}BNC9POqNaL1k#l`l5UDzu=J8{OE#M!@WA3ob*UMZI>ur}~h zj?owE*RO9dE`N1>`_1~tm;?H1dsTEMgi!gLD1&ze5hQrx&*1Pp|CYeJd$Ko}p$YEO z*&@39n7i3`NwE6OHLKLmFTUcJ2q{k2N&w^LDj_zGX`>jyhsFD+&1M0m5lYMK7VC$_ z-RZr_m%OrhD9d-YhugCE^YIiOMSXL18Y|FTfr>3>r^~aXgC^p9Putt;1o;O4E*5T3-`%=Ar z{o~tT|9(T??vxB3AYMJ)k#8QMd|&q<-TOWTE1!auPr=HkVC7S=G6+^ee6Y6(z(Tcj z7_5X4=RH*UxmBOQR{uqVm5pU;d&&J$t)Uukuguc%wOele+Fx*Kv{3HdgB4ZuR%>f7 zq+JSax8dTREid>L#4eX^Bbip>g3k+9Y!|HjAp?#s3*c0=HLJxxLv41E%+_Mv0H9)R zcAH^H8}gM`$tw<*!)w>^O!&-yy}kKq5sTEhX>JDv`d2?S{oDy$1@FS@c6834NBOP@ zQgX{<2rjUTZ&h6>MJc6spB?)M@q!}sLw>#;v{Fah!^`zwx8WhTw~O!m~g~y7Hd-x};aIi4p7isl2i>o`p8oYvf_CtavtasNz4n=8t$^G3c zKvsnPcLmLQXN&E_<*I-D44t?s02B3dhnM7ME`b~nXT-Q%tT)teaZ(}$G zLhL5T7a=xcyS~2(3Zl3ZY*PK1dnsDe&)~{IpL_!!_**bLMbMYOm1Ki=P`UnL8$zz| z-;9V$0JfjPo!@@;u(;jk?;;}Tc3Wssf56}{d>TYo{}Hq!ynefg5JLJ2!uNh3HEiCg z-{I#r8}-Ye2bR0-EgYY}UO)T-i#DgJ-v2gPlj1&iUfAtU@XQe$FJE5T56YTx41-7xdR>xAC49(?k9l z!snoMAr=k6D!l+V+*tEONr5$690j3nUTXFR?5Se~Gp6@Rw8zjK6X5H}7GkDxGy)YvuD|`Mg>_ zFPG2j<@191ykh>KOBMu^K0MeNCH2&;I)2bQEjp^S(Yll9moNQbiBcK>mZ(|IKi?8h z%-N18V$NPAc9^qU$~z?)a>Aq^;Md>3WyMdG<;n*~mFDS`oO7JyXGym?9n9h-yy1eA>N`^}+f3;NRiZ9*e`f_Om z=XSl8hUu9EhHlvQLVFJUnXf1e*flL10QOvbxBXA>3R6ZVzak~wqDQ=2JbQBSQ}76* z+Y7G!#9os8=Ex@#J=xXbA1?0yF({}M1LOb%{9St0r0;`oejPL=d|dEtqkP~wD*36+ zVVnq9)5Me?D(trmD?Ly z6u|pH4U~`h>uj;Z_d|xtR@~nTgf;1iASHx%mXS`2@Lne~DhVf1H71`ye;Y zbM*;w^C{zU$c)R@vb8qLi#5Mksw-oceyuK+8|gMoUTrS?_T3Y`Y9iKHU#z9{7Z*$J zH)5&m#nsXo@0LuPt+bpuxt`e{5Zp=KXL|n|RF%RaK)xjBWw8vFv$+5ZK*-5%@ai*y zNvuYN`%f%8pOnEzt_<9j@^%aPR$+oNu+lCsl-5gQjJhM*zK3%PV1d_{goYmzT4ZfvvOwrI zq|4vlfRnVBS7UV-cV>8Hh!Fqzr?AZ5Zo~P$ei1{2v&H9OmDJ+DFYrGKrV6CA=>ALI z+^sj^g1?zS0ZaVWcE7p(>GB~w%WvXI%|AX*PZ{oWpse(}@(sTe$Y@&6ccJnB>-KhY zcOQBCiOK(Uc%V10pF@}cVaE9)2OE*rpZ_|%{?lVTd2jsw`QfJ^=a*9aH;Xl0Xa+U? z=Kk*W-PP4$E*7Jq+px)>N9vmCTB8{PZ@p4jk=Jp8%lh_b=>OkkrEczSUtB-D*&N_X z!Aw2wLV%bI%d`3U84d3+<{_Nu=T|?3|KF@%%->J62M(}I`BINwr`PMB;9NhCZ=d~W zS8G_MSM_fErMI&E@!@_w|Lw0H*3TaxQ#khm58*64_8A_66SzCTRUXziGp{n7W--H_ zJ^59qUpO!J!mXSb!>u6QY2O3M@F&Onzpmr0{G}AiY?c>S%rBL3ez|5BV!6HA_-zQm z3-2xj&B!*(?wzOo&VW;9bU7w&hL29KW^915^;UG z_?8^J#h1u(8E&+8`tmj7ILOR10AC|0@qZ56z5gb$u>$XJvA%*9j$QFX!PJWBFKD0{d>Js@?H_-;Xy4m-&-l!E2hqR*25G>kfaN>~#F8u()UN<77GqANe9yO#n5xzFn~RiRC+h+_`#JY8gT&DG_tS zn}CJ%K0IULJbiK2AG-2enS!2#t|{O_lW0QlR9|9dmu%S{rV~58WWxH4l4ndEG9V2` z3|gNcd-uc3>!5B!_?G(T!rly@c?ktd0ByoOezseHe{&x0^0Uzv{A{sY{+}-%{;%l( zMd@>t!a~meyy1qMJ}~Wp>)rDpetviLJc$qRiS>SKKM2uS|p68c6|JU2+)63r19jt$Rr#lEQd(<85H(2!|;OpMYD!lC;%4K-l zJv??D_Sx6*R=`)ihn>)W{%9lgpMS7@@b(4BU%}y5;4ODB+ysYTgT1~>y$1PZ&-2Tk zPcM64cTlduTkZhYptIuCE0$8n-)D~s{OGeq@}Yz2Ep72THJ814xW3t~zKc1`H{q#P zl|wjvPVFb|)q7{1=Uw>6(0>U3{^IO^-LGH2-fq&RgICtLq5BoeS?__*`G9?=^{n;8 zC!ZNw0e{iGch~Mb@?U=Oz5DB|^~}jJkG-*jD~G+(cz2KPMI1RyRZAiaLwT20#MCeN zvw^_b;-~Ou`OP=W&1UiY-=DvFb-vr3E&gzK8$|KKIaGjB?#1)?(r;0XR~r0@>c z{$kiFIllMzdF!+8{QY)v{*uO6$R4(>#6wSb-c|~KhT+^7MLhRK9ngKzMs$0=>F}q% za=vhJ-TJc4RgdRe7iCt%vr?n|I*4o@)r&of?1!tBUf8_u zJ$GqGk0Uu8Z)>`4Qw`^fU+RhcY0%8n{qFM_6=un*3iH{VBDJk9ak@?Bv3}w9Rz09u zcU<+bPJgO;e5!gJ5PRsky$~>wTRE{*-uk86>h+RaEfo_N)?8e@d)33{s>juZx!SDN z<&tgfW~tbAy}Vd!zcftP4Yx4mYd+k_%ew4AU5$_gZ8c!s*f=HxdRIE2j#2i@Z=S|> z4$hZMLAM7N=aQdmRd;(-VtXz`Ar@zBWzUvYuDmnOoT>1m;LP$fXx@5~B;M$t6QW3K z5d>d;_xmSjD)ue1@PCc}WoLS&prvGB8>eQGGJ1tpk;~mnuGt)gSh90vs>OuoPFuix z&EsNn$Ay92K`Y6$m1*N}Wr0QV%Rl}Oc84x@ht;?}?2L81-yXp-!lsMsB*eK=;T-jQ zGK*bU$!Zz-isKpc#*4V?f@ZcolSY9Dq>{^{v= zv{+m$g?h?mP*z=c+9X!z;T5c2OR;h`Z9Hx`Zb=R(gp>Q>bcp_R=R`{u7u~Ia*73 zdP`aEOT85CDYNG)7#G%=2h^SJnvC50QFCDrk z^}dAPp6FtNrM93J!7GyIvi0&xaq#k7<>kS7L@&>ho2NDN*^q;X3)GhiZ4a0pi6(q< z^31#qIWPjjG4)_E<=6<2SlRZN$rYDmGL=h8OG+KoS7QYXkO0Tcp@gSn#-}IJrOG;4 z9}f#UWM4#O!M;;=THaw{<} zoH~jfsnH(}F`67|{v_5WyrNo06&^bs?hu-#<13sGd$ZB}5+-bc)GEcZw;UpLKG6K| z+Y{T9h2~GX5H=lH(oXYdF=yvmHy#PY#pF6Ee$6CvReUc5OUtZ+y41KKL#JA=+>Eeoa|ElH zRA7iB1cO!7gMHX|$TDW*OK!mV zWo{U=kytcVTL&xa=y+SBlSN@B*zGQ8;}SE5-V_N1!O=;MuT(2(Qqs_@`23tg?UckM zUDymf?1VlfEyY^2j|a*M^%Jw{39MvF+R7;jx(kn7u(cMCT$$n(xJ~Swyc3+bK~@rO znSlc^&Pt-#)q!ISvMDEFT>45RXTj0uWx65+23dqj$rHO`D;N*E#)P9%d866$u7p zbeOZaIqSgW$1$Dd9JjdJeK6eJgIT7shwe4ShB|H$T|bzavqy#x792IfpE9-Q!LnF& zdC#zS$#RB!$J~+UVgGmEm`Ldo1HElF2fZAhtO|O?>AE2~s~O_3;Q6Xyl_-hffcR!L zF}cKq#_B~_Ow%SkgJXiJr?8TR%Gb1j@#UIS3H;@xlgL1i3;F_*2!oJ~#5ycDZ`PAG zC*+Ca?$h3yHe}OYyJ%*NS3=|@Linsfg(Lp<#5QMn)LBfqYe>0|jyj1sA6RToF`LC2 zrZ8IdN8Nf)8G9z}q{9YjWIa2L+~r5e*BI!qQrzn>u~JkBW5IMM<*c+LPlWC@1)!zF z|0ol4L_31Xr`$%5XSgFk98B8Vd^ib;DEV+k_EyaZ+Ax+oK|4Z?lHqE0F1*tlX2~^H z>*YWXXR)u*2VkxZ-ngVpkG5JjYQrgD7LB%ALb?~#h8g7^&FtJ`>vD&^%wtkT>5y_p~ZY^366Y?JYU_Djj0LRvy~u~`k8VagK${QX*Y(WFG&F(SXF&+Ylg5PkEPpjMk0xO|Aq%g5I2b=#6dXWqSB6z8$9#U zIG`1bK7lK+8&j2JMw(@~>s|NLgyK~K39g+*}#w3`RW|U@}Vk8UDuPEi3 zXVqv&A=0d5+;N2ONkxdDZ-X=J&-qY;Brzxi^QfeCC7i)}DN{-_M4cuD zM~MF7jRu*A{sKtD2R~ye%^?hWP$R2GVlCSzj|0R?6OB~!c$G^Lj4fQJCyCZ-hgfxVW5Y7@>#+9AC2t~gg@WrUj)a&koQ$$~qt@wmD9uH%P4 zaVa)uPniL~PUz;um10+y^$O(`&QdaP(wP|0r=Nc;lJxC}xWt@!|;cOmt((3*Jp75andK~o+ zBr#-tRI@QC^QSrY=onk0%AH|?Gc3^CSH|PcFe}~d3_B-;IU0{x!&e_xNemMPli0WF zimeS~ z@t|licz~M4q_3RQhLB}aXpWh=5RFYIO|JlVu?kKTV1-T%gwo+AAbm9i;Z9=HD=k4v zOJf-#L~5+BQUpDgHVC(N%1Mh+PFkEUKD#X6>L$@$i8W*c6>Aa*F7HZ7=_T3u223iI zY;0IdowI=mc5iCC{&d$exg0`ci=P%Fk0gn;sNF0cb3|BV(uivm0_3&#*KrM*gwk1hf&V4@8X+DNHqhug@X2Dg)7c zwAH+P#R!lh!1#-Zkr+4Pp?J{h=u>5luA+a^`4#ScaDe%sFhrXCBB)5qq6gmzV0zS9Mne*hE+M9VG_L^?woI-;4YYz z)Ol_Y6-do8qc+L;(2x(lnCKg(UO3+sW%?t}WAPS4!@EkVB2#AuzJ@Or%C+3v^53Mz(JXla24fb1+XTrUc@2}Ka zL!r2c@%yk6pT)h~G4=m5Noak+phLI=P7u6C0e+NlC2@rKXxFG=O@JqbfF}{$jt=qCNOSwh0HN$pFZwG{(Q3(KPs+$hXR(4D ze4~PO;tWY3SYLo(OB)@IAV>dAYn>BTWu_(Q5l670l#w2Cs^&*}G?^(p8+;Ti{)o7y zCzL5Gt*8St39;uG(MDp;`VfuhmUV$niE;oBtiei(c+`wG@~mw9*NvYwqb+9`b;?*Q z3%hb0btZAxl^uq;*p=2H!-`VObn&#or~|yPjCbV2KB`BA(;t3+MCJ$ZN`2=>HDaN9 zIEtoHiStCRWPgmVA$c-%b^ODGgP2jOm>I;GraU1O~b1a)lVClf)^f zlQIrf$6#UrpU5ayelFVF98s7tj{A;#sM~hsAIl8Tz(w8fpg2i?o*)EK4YD!W!>j4q zCkL;aFc$7P%6_?%Hn_77|K(t2)JJ2&hnRg5D^+Vjh+rs>WU(M1yy`0M)+vOTYnHM| z)Ui6EJv>3KzEW*ysHumY+#-crv1_ZGY~^xux<#uY7(^^BE2q=QizTGWyoc|jJ+bub zEtI$+b>2I{rFftYS0Z=fN{vpiR=)P@sY5FSW}a~7dH{d2bJ0&nanN||PUT=zMm?S+ z4#80a))z$A0a;p1jcL(G5{@Y>f$I=(OnhE!SfZE($~&H;#$z$qg3;K6x=P|MTfm`x z5!3bt6Ij6mR(&flb5y3_Is0u#YRjIs))G-IiG5_PByPirSnbJO)s~R8n)iiOA1ng~ zZAO}^O`xbH5MFc^`(fE%T4G^i&HtPN3!{Z<`6z^Re`T_7ztj`3HyN z2a;HZrERJ8a=hV)6;oJ0Ra~R@ED~#o4c~&Gycr(>-}x-$`%&yr9MfPRxNUV+CH6z+ zQBN2VW=K~7Wq;+T2;{`FFwrHiqa0v}V7T{*tz+Fmzzxw84$%}ztRRgrK!?Mglaf&f zfpWe*+R94g+3?KqqJuyT0|PNDF!<0d1U2Ui(@}T!p}{0JT_>J^dK=}D0l$;FUTs5( z3+fSF8OA9s!$py@Q?xM;%0K5KH-oHah&1A$vt%3AQ>{0|I@s;4w6DmtgEhsnXfP6y z3ls?Ga#}p~CL<$-KUoSoq@GPYeYQSKm6*qNxN>JGD4JwS1$6wCjqK!;>SuyrvD~L~ zhSlXhnalY>YUXIJWMp<36F?`i8scqKMq>$yY8W^;>u^zvtcHndv7~(=0MR@|Tf5aU za*xDas^ROH#PTUibRIR9KCCnmr3KG8+2=IyrB0IwjUKY&!n549DCCIyHvaZhot>l^ z1hn`A+Pm#cD8&uVS4=+GF-73xhQD%r+ zCb71)o>WsCzEdj4PfAtf(%Lgb004ahv7P%tS1_N|I6ka;dZgVMFgHzN*?IbMDvdD4 z5icj4b{#8E@aS=VhfaB{X}FA^>Yk4@T6>saZ47AHIUk|{v$*BBfsPiPvvg>;l+>a& z#aejOPByi^Mb-p9;9{G{snHFBK#Z`D<9@~oIuf^qRZzHsKuVH4GA(qSp9|X)zbvED z7Mz+?+Q#&mJP-p;2JjyEXokQf79IQ%n#5IRE>OhfR)>~R8&io(T6k5C+6Ubtvp}0e zS}5oSNgd36C8QK^P2BGr^0I`vs>x`cd$1Y~fR%(;~c+!b}9#sZPFhEW-4 zRJNVDj$^Z_BmMD!h_w*6yg-n3qrLzZTsK60DAeh2W#E*ZJW$E;S{=5OiA9;sC%CJjbT=e&gYC1xT~&h z+0#n~*S?R81CsQ(ww1-cKWWZq_gj*iz<6)jv_W2O#kdkvX8rK=kv$LufCj^bm z9#2&8${TzG`~nkJip9D?#-3B%K(173pB1sF`s-lN;iJl}DQ9BO2j(E`JvijP9-C2# zV{u+Ivs5P@4~^MwE1a-x{lGe&C{YX~PIA|b99CWB36@>^$+Q}L^HGlpi3RKXgLB_A z_BC^*9B(^YrP3G#@l@4%A5_90{FoA~Q#=tpa?X0L9vGsU zmO3n@t=yCz%|DB*?P&I821si?DiWet#gcaGJ@agU9FCZm&|X_?%-HQm81-O&OqnE{ zvS|zmvy(l7)Zq#8j5RG#Zcn&cid(-BJR44R6U7lf2y}uCVSb2EF~!G?p}BB%N$nB+ zGo#8$8VZQX?_POL;${wB!SC*!<-|wfZJY7(;gOP^P(qrly@qv@`DK&NlQr;FFdrYQ zAs=4c@R{N~$3?c>K;~XA|WmgsF z(5r`TKN&X>xs*DpLP<2}9;h+iW1_ci8-oFkdIUr%#cg&Q%Kdpv6+=!B0r0vGb|yut zuY?W4#S{>~cfKPANfGGb8jcCf;#NmNL!2SjdZyBeHr5*7Pi!zU52Q%U^%KjxoTB{* zjEF9C92t0WioY3T8Bz9<*G2wT+>QF>Q+b(8sj$G>*7$e;wZn9PvjqUN8g;~JMa@Ys zXq*ESXH0W&KGcBTGK*c!jYso0A^=6;gbm1`RLtn!a89X8uBpA<4r(m+Q5C-OfqdKo zJWhKaJdnp70@>G^8*A$73jP2pN-u<|DOogTRRpw7zYku84{mO-5+4_F02;5vT{$2e zGYX}Yan#QuTYh1h$ZMD~6MuKVEoM#KJ&ILC)_4_JVDxL|08*G4SFWd? z@d}6BF=+Uv;C|E*J!_!$*kqds;<5WWy_48=2t7H`a9OKzZPx3)cgJ&rX-Vfr?TU0r z@q;8?Q&h`ziqIf~z_O*bh-w!?4(3$}n$=~ADJW$S2HYw3bhED5Sz_b3sBxEV z67vp(#3K)a%&UnR1mgx3e1-G78I1qR)!8A&SeXYX}bW`j)dk;YhdHKS6jMLisk$5EGvGKRU#2`ngCJE9i6yJT&+3@0-~ z6Gmi3VkTM9k!0**lTO5`0v-8qizKnHeN2hc`W1%-C_El`cmK7B;S! z3oxtZ-J|-jf6~X1di%!LCvoeQ@KhDVOcUkK1ZS=vU(Y=D-b&9^u7}x?UowBP$T8@W zg!c)Wd7LQEV%a&$V#>>S8fzdWq#ZjK55^>=nwT##<%dJ2AIGsnK(mT-xI|rwZNyU5 zv8pTzIEKbOO0KFtiLlHXLpKBLLJ2lw2k_vTTXJt1-^b_K%jW>pp>4C8T2ThbOHX6R zn&8?-WKcqSczPZ#N7FF$N%wq6Q9Hz+rVmWmju*xy+pU=yCLQ zrD`3Ei)g#C&7~Mp=`FEJ&NVTt%qs(krZQtEasv#{1Hnd9OlA!+ITk;JIXJga5FI;p zNz8}rdoI)RTH0<}_u;mQGnZm+gXscT=!O-0e|i=BhMw0&2Y)X;ZxpwXc_=rUslJRs zpk}_sy9!0Q(MWGaoq5x#7I|0B90)U_+=-oedmtTIJQx{gGk=jrPc_`N>(h2hD2fV- zB*GBVal(j>X_~N%Dr?Y~M`EDU*fW5`X+@;KGN|{eY9a|5BQ=wRhAxK~hWpd#sL1DFw~B%!u`VvQcc9{2%q4>8>WTR_$$ zyc@Al{VXIhYM#jQrIOB6o`E0)uM9_0zK>YuD2|y0Dq|7J`PE<%Ni6NT;LRfDae~w@ zs$mvm#l5XP^-)NU)$>{jPwmkse8_OZf!UX$AaM&Fp+$_TV@J6xA^afSqj2ysvxLGy z(xM`On!1Tk8G9C+U6_k49OL#L66*n}tfc%Imo%o9sSajW#B+Xh!mttoqU+zp*fVf2S4>Q;=3W?gx;_I>UO>v?(dmC?X}a92RA> zxMvg(>p@u(t?-yL%RBlmAhe|%ik*<;ZZPHE9}XU-q0x7Le|-|m0YQeI=442Y^UQ=D zVN=3Ns3E&*U!(x=58f1-+@F44{J{`VXC1 zz7I=?@WJCwb7Tcd$n8G(N4989X_&2SWkM51v34B#FI z0Ovxi2R+*|ceY_u61Uve)C)m3VymvwhEbo58?{@CS*53Am`FPD%~-G_VwgHk;&zoe zwksvJ2aPpRuex?^>jffk40n8*@i(mX^7+Ny55Abl_hF+A>1PgSv4};{W=?K~3VM#Y zVNh08t}|>z9Q>+{$uX}jiWvfzG#Kzx^Trzr90{2?@O{R*DvDuna~aYo#KR|uDox3- zGI4=r3LK6Dlq?|YNaM%yaR-Uva}M^9#42L6q3vo+VnyOnfZ;NiAOS@+_ce)?lM;t| zTb3yq4>~gLC~TOF2!08gYP^U^tVC_d#IkmB2Av_A^M|8xav`JKsPcb&LV0JUBVE8+l}JX*=;sF<7x0 zU@+C|JjZS%<+QJ|azW`%#O)fv?gyMcJx-9tp?cDUNP3tci(!DYgKo`Oybxu2Ax-}Q6^_C=w1x?g%wqTC60Nu*mafK zd(rbf*o?8`&>6_q9q}dFoa+HBJc(ngMBAfJq=nWQRKG&0uCVH*_1M40ipET6edt1n zS^W@4QdW)LEp!w4E)s9V{dkOU(x}Om8&p`TX)1xDHKseD*Oka4STVsnr~A}g!qcNz z#(1_NZM2snIf~naKTjH)&~Q{25xP-W&}lM_$IxdV>sT`$fh7*wa|XDwEEXO4hFr0n zF;$ox;CW(UksPo%gjY*FjU>fztjoZiEazFOB88(1p3}J9gT!t6nS;TgN-puj2sUWr zRgPg2&PgClC5|0F0u>8!x~O8L-%qKOar<2scXVsO8%VS;!jYb|rVa+ZH(0Qf3GbL@ z(@mHtsxh7RY&r|+bJ&x6nNg#7H2slLM+Bo`OfsDHdQ+ytC~+vqminSwxY(5(j92oL z)%<}iA6#W(MMC15b!-lWSc$rABBpw>w?pHtI8cR_GZP&F;Y|dVPE$!+k8&1m-4WTH z5KsW#QFU37N(^%Vo70^KGC53ra1-9sGJWt2(-N}Sl(Gm#>4eWpNq^`aSr-V?Y|=I{ z)`;6`yZ~m(oRwC@RAgD-jcURY6DgS~2G8?hj-NlSp{oWVir8f>hc%!qZsSt$PkbK9 z#E|w?{f{=d4_G+%$Oblqd3l+ z6WsC%8!Lt!BePa>UwK^c*s{ELj$#yx0Zvs<6UjUQq*>o&3p`!~{Sl(Y_GAlZDbvay zIg+gV;?g>b(lr0sJ?V^6+*g-zm_RII z7^L2pahz7WKP`e&xG6caeU~{jr`Jc*%uS8F$1R?r6i59$SjnP&fhZ+Jkr|5WnNd$5 zpQ=_kFGX|9VLX-pLR$jIL{p4*H< zc3z38!Jy+7GCW~P91NtM5^h7@JaNJ!8!${{PeVMuXEl>pk2n!5crln&vls+$*Uuvt zUL+s`jH#M9!sW1iPM%=x5Lm0>fdsOwC8ROFcZx^n` z5w{@tU|6dV2czkWfF@{1)%$n{qZnv<_H@vBtyFIMWfhvxb1e}X6i%Hg(1euQp#0ZK zPsB`w7=RY!`{n~G?E0;W{9p~KOVuf&DiW$_LySkz5bv&aiU=kS274Y2Gm8X`z!uz2 zqi@6`ZW=?|LaJ=j`dYhCmT-nb7A>lSfPYr;<9Gu8G{TuhTo@lYTZ~!{nk=)(vm*)H z>|G6fj+;XgH=D8m|M(xo`xn~-j{Um5TU@4Ve)uJYw7kaol#o#y%pNB(L<^=_ zQsU0Anh|$~N>f>K^~2z#2CJ0U#5wK;(^|tVo+^4Fb@g%$f`PkTlsjCh9ZZwV|JQ+5~7Ft$16mQ)i9Y8MK%IM z!Hmk1@Xf$=VtO?EG{ZMI3R8-0uqgu2UBC7uhQvyJ^m&QP_c+&&s|>QuE{S56CU^?~ z;}(o>RgYeU4$X$uU9FhaA653ZW>FF*7I;)oI8lh9QrJ2tA|EGDV`Nyv zq7)7qk`S`gKGHS}2F)c%bNM+j)iJaTPRtw{+AZ47^hs=DF}YqxOB21h_Sgu(xir6&oP{B ztj&N5ImG+u7kk4CUg?9)lNn*GcpP>p_Qf$IZl_^Hs+t_f5a3!E!#lcoql~2yRhE@y z^$I{%bg!JC%imNpBKuj;A?-xoQVcqzO#u=64=`6y^DnQd1Wc~C3l*n_q(01^_QaeV zIa)o2h3S|m8^an&+;%m_CILqstIB6|++f0n2Xd+?_G#d0ao)O2+R1gMl{+#bB}MeI z)W9_$v2bTw{aQ1_Z8I{Q>9|QwFe!p#I5~Vldq#dG zad4kS|NRlzNK+@3k}$5I2|_K+)zdtLp%jFEEI{nqOXe~rkcQZpEN(IHyck^!MBU^E z3poG{D@IUQHBAgTV9N6t))JGNI?_arBR3NFh}RH)S;~&kS}9<;5JmaR8{yGzD=o8L zAy83NEcAn6bBVe?_(CGnvWr7A$I#&<7K4UUktY>8mYGBt3^M4ts3eBSra}(;RXtZV zo1W8-aAa)GiepCG{AVeKf7+~F9t$KfYo|F3cdVF;wIfDLO##lw!TwAr!sO||xE~)! z2o9zJkyx=h5hW(#4W~3!G$dy=uNb%un6r^uP+jPN3FI;3fQW>i!|BCY3~Yrhl?a*9 z@Jb49GiJBQLFPJ`6Go63b%R{f2_j7El@uI@{-{1Hp2f=6-A+7mf}xYlX|j^!q8)a} zXo?MVcH6v?9~r$3_H3tF7Hn8I%wkvPbBpOK&Kb%ga2RS(aq?mD3&z8mMo*Lw5obIFW>u|!Hz&Ryj zYBS6wG%0xnZU`aqIm4O^hi1fT6@^YkSOAb~B9%0QjLa;Y-4J}kVdewmkj6acByM}= zV>uL?+`@BEBFR9c)Pzr*8KO^MMmOl2DWgCScEYK6Y@&{;NxLDtSrW%d?@g7f9{j-I6oEcl83>tB3{>nj?O4;lQbon1Al*O8`Q#^wZ zayL&i&GM98SNQ&Y1hn-i@=~mMyFElMH%kf)gR9VuzA#n@JaLiHjdKtziaF1x=UCW|LV-9$lDtYi|?SmU^%(SpLtY@5PC2`B84cTp)O%C`9 z0(1-Cx5zR}<1ni7S|p)*>yGC0;{bBSSZe0)N{u!z*!1Lh-5 zJyOgP^AQI++zJyf6gBo}gU#r5_&e>#S;?&<*p9htC-_n@KXAabM`C5_u4!u6i-Zz@ z;7*7%VLG?fYNCe}fbp0ZUPqI|*+~~JVbf`#I%#o`jkxX8&_!>qb9v~g9SxA>7H45A z8DjMJI7qN_%Aq4RX6Rg3YbeFgOj{2Im?l*yM^mD-(;dx=Wkkoyw9HQVftg7LEFs8H zHFhUS+|%;PG#M*alMSj}reLOCOZ%lHv4SbUn$&~#_^KVWr%t`HXP}V=n77ANRavZq zb{<1ntPlU7w_sg`6ayAnXFl~OZXLB(3?f>}EG81`c9IESuO24(v$)0D+=exR&V32K5>S?$=+M((7uL)mNQ#e*K4+=JXv5XHvJ{8yx>C) zlgLRNob{%~UXeG>xxjVg3~;Pn5?Nv5lRNJ}>Ljv?ZGUm_6C0${NUUUyFCvg|jBK37 z6Rus?Ug?mmKq<#-@I&LQ&PxBRN6H_Qn>_Xp4KPzA4nBB=!H3-EMAwfyKns{$Qi6oT z)=BPymJvLjhyf4O6f!hsJT{KmnA03gzay~{^=wj6%(!+M`=LS%?<>%=Wx`<3kn=82 zm9hiYF76Ih2TC--;Mzj}-zWx5VlX_sQH506(m7QIWBWS8VwuDl0E(uLQMu4m<-(*M zp+o8f=gBt{K`q&j7Uoupo5Xn$dOE`qJq_u{YTJ8daJd1(N*LC%`EcTqm^mbIG!HZ~ zPYnhZPGa8Cc4PVuZ<`}ErRI#+$t&#?g8Gb-vAz{|{^%_8!yM*Rpb*j3maHbEYsIFf zvnv5-;lx^k?rGi)ZLk@-=WU(>X3AYZsi>1HU(`xOq(ss<;Z9;DYiJ+1=`@tt5YAC} zV>&8w?-ZVgp!2<&x(PmxgnP|JlQ{7AQjjNcJ0n6Q4T>6B6Gj$r#_d{aqN)j}t{ni- za?#A12R2H{9@IUm##xHw^?Y?=-m`TdmcsK6_+>ob2g zn>CFyRW0XJV_q83Yte=~FBF3M)gU=Bvv_?R3JYCCHVUREabhuQgXuieLm&mX2+mFC zL0V^0qY1<}MLyb>sfnD9=5t)*5A8iR>;ybUMMEqAip5|^PzVaL`)C=JTz${7=3p)< zik&Q0R8&NtdgT*>PwJI9aRi@)Bz_3npdozFf=gm4+B9V5-0>{?-9{Q)njT9BU2`cW zZem$@l!EH{t7a?Q+ z-7uw_#iNnmGFu@=e7(MDEz{6kmYgEbrJiahgOu*$knxSHC*wHVUiy(?3Ts@qsHi>N;RIv;O&@LAzA@$!A z8F@CCG__?zaS&1J9a5={bHxnL>-q$&Em42dC>Y?F5sP{}-en*--UnP1hC7$RfO z*T;s=>evt<=pF`Dli2q(C)Ai~0KtaDJEorrl$+8)%S?)R02Y~rr#KFyhDmS-a0xJK z;~cpIR%WK#To89TNv@Q1joU=?-7^v4iPT$tLd(4IEiDHsO$74WemuqxR19MeErz$o{$K@J8KIhJFV;Pt@02dvejTpEaz>mVCfoOTBc*uyR_>wQDGmMYi= zs6;tcu=%JZX;!>_OpFAP{8#CI07M~f{f2_K&MT>j;nLdzwG@LLO?2i;Dzd0IA$P{ngeL#LvyG;ZEVA`i5Ws=$BKOh)aBLz29Ug zsEjBa*t(+Eq)Uj*9NRPwEkqpJqIu0+(uxO1I@56&il)P{tk~U%%r0oo%n5wBO3ksH zXHEGV-_ib+V(n|>1)E6n6;`#1C zq!Q*oAY2da3lE}RRWamCH(Z&L^q3F247oh@HbX;ghQp~blXx)vmJ?N-fuXjkiNu*M zLen%8Q)_N>gr-O7+gYq8$C03eBdlWMR-A@8LgbRTohTC-+jBRu;dxpst9#82qgmoc zNaW+jLq64L9&^Z#0v>?Gt1{B1@OlbhE7rA`6`q#joB(D*)#OitAB zU)gfA*^9^qc(>GzDZ9?MM%;2MO3;*32^K9*aO!y7%o2h^Ow2$n(M_4vzA1={xh#~U zw2OMm43rDnYEvbKf>Yy;@tp&u9-buDz2W6}*qhbV*jh5Tb@o92o z?6>H1EkfZb*6P>c-G)zM#UFlZwA(e&2kbo2^+7h`zVj%Yl1$&02W5qg*MoaRMt^GK zi(#pKo8PQ1zaxnXBkWw**bHepipEkL-191fk){ct+R|p?xh%DoR1*7=ys{$C_)&%4 z%7=SaI_VLscsO4N8{v8EJKD!jl*Fwj<&egvW%N94Ipdrg48L?5TX+DnJ4J<6t9-`q z+0$<)=V@$XPh?w)BR6+bOC}F|YSs#d)s)!^&nT-F{;Ny|m3|C;&>bNXEYUtvNDvF? zgYK%Ioaj;I;J3ls5z?MJMePi7Y7v^O49owYPjEJx zu^{5m`*Czj;?^Jlo28cd^bq-pkxl!v3br(=sG!75-hP4&Vi>_66~pwS>Uh@z--=Cp zrV6q`E+}H>J;KX6>YtZJo1zYLFx5M1Qge~4JJ#_<-(j7_lDAliiCWTXWawccPo|!B zpfEJ!(y5AunwMEd0k%FW!bm7^a`w4lhl{pD?wews#640}t&}8Hg}6x$iN)Rnm(;Uf zE@;KaNL5yD9^vkEAXtGM+HLsIh+8UUJ=Niz7TI>#dhDe)RL>KRa#khrd(nY0llXm9 zu}Davy%mdu6lxB&f_uqyU(}+>S>IX`A}I<0g45N^pKON$9E8Iu_Z30I^_W=I>@CC~ zKVh`-mI?$xKYO4VE#BeLF`v!{>v_bA*qV zams7>Aabyz%Ht?4jbxDn=#DyF=uV81VW^~ty}1KtfW#Wc(=`bRqJYz$(;T-ppX5Dx z2})0`>XXHs3jK48%8?{>0BjdA9s{D}QY;3XY$Ef6J4~QTmZ!j!^(!1)WrO8W8DH*4 z$wq&A_~C||C%V)FzT{HeR|0wVPQ#9*)U5%dRVhcJ992>=jfKD(dqVpD)H<~q_QN>- zOR_FYT#0Qhj&y8f?in#V8l!riOoezo!DJ3xK9iHu1E#409vZ3#4R1*dT9UT}0C_Y8X95wm%!e?nkEfZZ4lY+B zP@s>83^yfs_$;P~4;{)hZ7VhcYllOMvKVYn%Z5vAfsPoq#6;dRjX9PKy-d#av^6<& zz4S#cHoLLugD)mVN0DS5*eM2$D_Pudos|=9rOs0vn61gGtidVB5@v8$$caPHfF6r0 z#3S5Q?i*oF;+EV5Sj=3u-juLVg3GSHWfL*}4gaF{$T$W#6{_8hW+w7k2;-wrSy(RC*tR%gLnko=Z*>!p$^46kA%6+iT(lHc z#(86~DK-Xm}yMXWIQ6_=(zD*7lp1i zM>A`v+bDO(W8*_mPA%76BNnPpu_`J|8gHo1LlSpcrV4;H2XjXP3Y&oHr9ZO?*iNtl#s1Sd97^?h4#|br`VFxXW z$0M+*%xayaxy*4GKx?zLu|)BQ-0oAjG3CWI0rpN%J=Pv_;%nstx`9F*$;tL46__!! zzoo6({6fHnwNUL6*Y5;dY7g_N+3XJK&P10DY{VmaC+)JUh)FCC8skir)mLcVjn!8s zyu|#L7~z)0%r>|qSx18n&||`vEEcXm-I_X}Ig$y4<7-XbFdz-tX`OsXSVuR4hVYZ? z7idasHj>DUfi@(TT`h{QIG?gE7R|8GJvEBtd3s%%UWi>ReKYwiZ|8X88XY@y{k|C( zB<6g8L7+Z0O`~}|0DQ`3a?X{d)ECb^EM=b>ZAmwGN47KmK&4YguI&OX#lbzONXfx_ zBfc*oIpsuTKWru(K^hL`94`v61LB4#Wbo)7ziI@X~f3qdzhH*q}omS~}k z$Z`D08|H^`hmouxPQC^D#X%Y{i4&crA3Hf#S()A8y1hd_w3v)}S3%s3HZ7A*xrY&3|LI0-;pWVh32ji}ZUq7S(#LxvM zad>dPXVkB?I9%H}CF&%xmyVbKn3Ys7Z-NOSD4LK5>Xu29eBrLgjH5~1!)ziV%H&%Y zI5=J_?fO7TQ=r6b87Nf@X@mygvr2|%H4=J)gh+F=dN$_iS^%2ZN@R?0`vynOJT0;IpO=z5f<;UaeQ02cog{pqyOLOd zaZ|2YM8ak%s@)0(Oymvbz%fkcDyFRYSnGBe?nUx|ag#?9Lrd*|q(NuA0f7a@sJN>Q zhPYB{DGRsmDWcjS(sOJupbUA`$ zL=t2ZOQ8KSmg2VYD6$&lp(2XWFn88dl6xjNLCA$HN_Lr)yy+C1=f98IZv_HkHL6aYPHJcgRaR#E? zH_ai5+t3TR6>Rp3IQ8g+t=iAnR@88!S>lYqB{enoot|F_-Pgz{WBU;wJzOd7YZ*az z=Y|uiWk{%uS2QWXXQ6YMtGjt1X=Mv$;UbU16d+TSk3?}pcJeIlXL^S{g7niN>c(sd zbgA_jcx>uX-qbs!AQ2=D=1&n7S(AUTG`45#rX;cHX6bTJ=!`nNBr@Fy2(g+4b(Rwk zQgHfAF+xj<^2|A0QN>O@5;2hIhn8eFB6!VWDcUX(meXDi!wuttZ&C&##%W3&NzQd{ zY5y47l}d-tLtibEBf7X!+$u{6tFpmI&#plvhPqYfsUqY-n#pY-1jJL0K$DRc4tZM@O+C3}flJ z5|NKdDDN{rfEr9_nZ@OBG=U>@N$5SD?vq$_Rb~aW#~FrWiBj7%MK@z8K=)ctH3(gJ zU2??@0Y24C!tK%2GX}|nF%BV%`<_^yr=EGCiD|*SP1r&doWRS0j(>BHmaMe zqb*NV;wCr|6^lJTS1ZKq6Rf+-e-S+hm_oZVT3jjjf zc86MJkjqPJZ;Q5`f!&!!eRdp<2?DA$GMEn$!$s8 z2d82C5Q3Z$LQyi^<+p<8Pb>+>DVxt(9u3y;5TER932Zu#MjDzo2SYYV45QZC0up)4 zo@t6Z7zG|h`wLRcC@EMoWAji9Lmzx0k$E`I0S7>6KTJJ~Lx9KIAsvKP5SGytO~q>D zq$UCsBVjmTHH?x@H$TMhNVvtcIv1T=WC?E`p@+!pb1dilgHh|7DQBS(Pnrhhg`I3!{GG|u_J zGuJ-&MuGF>qSrml%$MR(|5t93^)$3XSYsP5Vri(w&KODKA6zwe2hBD+S>h8MtfOI} z)F2g9h#I2fCI-w$=}sEyZdu1dxUm z1hyv8^tl26>WC7$2LrfzU4ObKRb)>Vn!kS?S*{XTH zo*UXZRx^$zbYH;J^5)?4EcI~G6yu0&>c%)1V%3n3fcKzsXhiK$Ldm96iepku+RTG6 zf`QILfetsjk7y-utM^dmB`^~t{Y|ska{)>b@1Sztw7#jPIkwEI0y9!gUkwZC@VWd6 zBi|CBf!WB>KkkMlaSOu{FnM;G^NmBa0NU07T%{w_rUT>{YDC{4TouGGLapSCC}!9? zN<3hLDT<}I9UT_ok%W0u#tQ1+*Y#s5>+!LiR7K8za!@5!h9b1%Gz}&ik2JJ%?4gpl zO%?)o5llar{!C0p)P=5ekGC+MQd-*ciW<;kUvA~E3USI~NU*jR``TcA2h(S>*tFx` zvO05|6ES576G`OqnLHLFBGrSsrj& z$rEQ%W#BHt)FwI1T9ddPB@f9~YHBq+v&5^8_{|vZv6uoW6lx+A#a_%V<*X5vN41m& zO1v62-f$@mY|1!OwZSOlQGre(Lz#d!*9M~ia4RG>N|T0&|H@~8UtY@tjSfkT%}mC! zrQ9<#JBc-K$B(NORf)pRFw}Su*+`~`QkL2{TxvRi&gb0DjjihGnanId<#>9ZdCVs9 zrpvMsgUe@|Fvm^G-%wb<`gUDLA+QnJm)gG#-ezHe_HGUxiCuix6ByhEFQp~B_%0@;M8lPnk1GDwi_4HPt$D= z%5M0mb(_RGx%yMnU?cW)g_Pnx-zt)C zmq^*L?NtKvaaYWfShCR}9L_~^J6g*VtmQS+ZX*a~|Fl~kCqTQl*ek;6oyd%^Rs;kc zeYX;EQfTH2f;)kLx0=OiR>iju3{#m4T6s-Gj}AIENUU{V@}(yW_il)DNK|l0P;6RK z{ zu5b?90H&D5!JSm~K5;oGE^M-6#!J(|W)-x}rUa8sQ-p~qM3nVXxzfp37iOhSxXC`x z2?X*a(d95Iv8#KjXv7T8kjZEZn%P!(VSp1L5Q)_j+JcSH7AME9)F4+(r2iH}plKGX zHb0)h30~+t8QIu=EjcHwvf`Z6TFO!ROP+460off)gpxKx&Rel*kt;a^>TGx^uBG=C z`vXwaaau=7(3Qg{3p=JlI7VZ7+9dYf6qpDxrJSy2L<%K=lc9Bmv{aZ{P{0t{bfY(w zsJSWs55GGS&d0`1SkskHDzR%D=<<9-7fc%s#!9Rf1?=B20yZ&Eu(yLv;XMjhA+6GW zZje$u7~Cy0122`CKp!2W(q5}VpV5a>uoADDT> zYI7y(VIjegC6SSdNIOjy?x&H@BCi}W6xfKFmZQl(rMR8^Q*nZ5Mm8nl&^snCsIn!* zeHyn_wKq1==qXc4rs3S<3`r^OQT{S9hs7K@rvbIr%3cL4OrR*^%E@KboUP0iJ4<~W zi}RRrO58`IrMkGS6azhHU2_mw4V#4GHX$zK_3+BzEGG0yFlL%KAMC2hF&OL4o% zQSoPx4@0vb+$Yl?ZJCFqb7-ulAEu9`(HdNd<8OeCg&OhN5eSK{a~>K5HYTTv6lUPv!H+FNUh2rB3; zGi4x$-pIU(4M!5hL~k^xcF20NI;~lWZ5&MA3^-t`3HacV7)e5B;j|~6;Z7QArV+ zHwV6b42y`BEf;|{w|A-Yib{6hl2Saa?17 z0WB@}5h9aVccHFK^AFL8Qq@k0u6Cjg!{m!m%H}XC(slQ>6LFZ=IEqRGZ-OqEnN4tsj&YoX2qqP9?cBr9?e2g9fwjzZfqC1m!Qfp%Hbw)&ycTk zDN0L(F>?ijGFuae22QIK)vR$)${v$N8OIaOy>1f6 z9U$Rkc}y8iQ-|aImSO9|;~fk^&Zi;no@?LRIBP7UP=J?ACjyc8bqZ)EO#L`eM_AN~ z!{_#Q)FpA(naEd$I&zi6=t;PK5{8vXWmi~LJu<5E=|DcbvSipBhZH2D_hCt_WIF+a z=KvKrF)%>orkcBCLo`Kk0=CUf^G<6zs97P^k*uKxM?b`;&@(q^Fd(tk{Tk-75EprN zmEcv;kSXjw;f5Kaj5h;wW9R9}<2ccPyc96WU~e2CqKa%=kpV}@$=rx+ZF~!y8bET^d0}ODRoe z&!Zmh1Hmi?$w?lGzlG^tyh1E(J-IZ_-=pW!bgZ;*dU~X^)Vjg7!|bH5B5$oI;p$Fp z*k#?|AAI4|&O2`K2XRkpAc2gRQfn567H}`x+0cw3ZBnqJnfaNQe1HN9t zVFQp3ir7!Ak@~Dn7|!cr&PS~$#xXz{s`nD(nAlFyP7u#yJGExuL}J=l2;)LnC~aMr z4KI>hGYd0d)61lgoHI5}%MybYoDhPaT6bN8m0=ORHD=~dVn3KdIZga?j`$X!)>i9h zt1y)zl*q`ElCUgK`s1@^Iv)eiXo_NZoIDqsW=BB|ihcN}g|m>m;Hg83MRK09N0Guv z&y~ymc~9Ro)Bib+HHUSvOzgTX=IbJK;oTC0M+EoqMUMaMCVOWF%9V(=3BE^ zslM>WDRpJN%F#bb=-7IeT4T7}_@eK912}fS3=|_?R z4;J>fX*U7n`H07W&m=M!B}p{x@&Q^si+KdI*F82iL;Qrsa3gr$qN3ppp`0b&X>Cw1 z3sueQ;R(VeBhlk&Y33R#$z3#ILpFgd9t}v;M0Xp$`8>;mH5Sf5ZX{vh(s)GheQYu3 zh2~?;;uH9wo==J5auE^`X}Vhu)k-W{2h!%&8cp>=VS|U$H=^X=*g8HjS?krH=S$5Z zQ$3Dc!ZfT2oT{)dhoJH#4(@`i)E3W0ZtH2}G!MqC?OkP_p_ge?G;r^ucXiZiX`jXs zX?ut+k~sEuHz|_5Nr<>Za^8`>X+e{7xPx3&J>Wh+8DBX`hFc$q%{^0NLzp=x9m?Xt zq&=4t*Ulofu7sjd70`!RG(8>T))Y*_2VvJb7LyG{N%t0D!vD|Sn=nUq9BG1o1ygNf zL)jFd@VKLPdTb?8ZOyUPVNH(4Y*jKU0VGI_s(>0CB5P&*?;9TB9xtwBldrOfOe{5r zvEVo|U&i6bcW}7pTVN>~N=s6B7O8Y2r!9bEr#g!~l{&pM)AVjlO?p6-3Kuh-M7UM6 z>b``r&U1YgN9$4w!pumYmSx#<7F~|4Sy&0cXvW>e6M0NZ zEEVO}V7GNSN$_2i&#aszt8k2mbdn^rgG{CO>?GlETu7#umMSL$A?)tYn| zk#$T}ho6l&);Vk(NXZeC;g|^Em}ssiU%~?zLX@UjV0mD)fbNWe0s|#f0N{#4H|~L) zC#+A`Eo$}WB|EB9b%aH);7EyHcd4?d9K*4ajBsz-O&Mx}5M*@UHna)!JO ztkX)2Nu(mLM9kP1iEcNWZ2RMzY_~&zEU-L*V>aQTMIslVQQ^7CfDSM9fn|Mh1TsOe zlbLP!f;?ke)r>rV$4I<-n~;KYm~aUX$G$2UI53iiY>>D`U6v)KX0T?+-b!grh8fXq z6&2<{)+BHX#1F&qE<6GXaqE_BTo-}g&b$Oc3*=>tJ#q#in2BCTOzMt}e%Wd@fktE| z@+&9uR~**5H#?QgO))W}qgIo|`lf9)^-*dyg;`Cr%3JS1)}%=Fgu?Nzm=8-!c)}M4 zS^7V0RfwqnF0l&*jVXcEyZ6T#+|G$$8as_<-i*m%J!LNGz%yQ^fhaKwg+QJ}Su4FR z$-(r%f+IY!S}pCIkfKW7gmTYew8QP?_qnGVJfT{Z(xSoQ$D*2?auotq$i<+(L!sm% zE9dAlutUP(CKuc(*de2MeJ@*F`XS!1^QN>u!*mNGOb6n0+n@}0JHtfi2gF*XpC?}y zxu1yxrX&qJvE(UXG{?0~2A=5!?TE?5E3NB^dv8fiNofgYY4z`Q*gKqsd2SYVorN6k zySNBS#5!)O+@$*F+Uox_6=yXY8=9 zH45}- zhubst{kaJV+LeOdjWjx8dEhN&%@q%I(H1XbC>_=#7vhA}N>XW-GMOWC7$$d)5AFX7 zX27z!NIY3aGQKh8D1st2Kb_I8AJNYCrAn$6V-8z~(QH0+H&LliUA~EI(4kdmL-95` zKz;nS1&=&pK-W!*4&^FBtV8ma(cM_N38(2Tw4y+;h`qBBJDyg(r#%=NNyn327k9Wa z-soCEN2{w{6t)uLc((u-UVpPua8b1hOjH6iR%))lU^(V>u?($olgpKUY$Wy0`cYAR zp(g_z4(kCh0E;#dn^K`OZ=W0m#Uf|%5-t+ndv?Yc-jzzUTR>K>n!q49918{01J;+s z*)Zuwn(j@->VWb=jr7vzDvq2oC4A-;`FW$gU_wIH@GjncovE(yf7W1`%9$pLIp*Di_{e;yl95 zc$;0gr&=pYXzA19Rd2$mI2xlFKkTKeJR6jliFc?~?4kL|m?RZrexddBWaO~vbaWCO zk(;`PTM9a`P0F$ijb$ILJZbnCweTTq=uDDEZr&V^$>F$2pr3!SWqFXAb+?KpNp*7; zfqv_)?00l`bsinvptV(nKmyM&NsBogtA5p1{kFInNM#5;mE<%!?uKNgtdO*w77)q8 zr4~rE%1u!z&h!zwO0_cwnFOye$(bz961FXcyLKX#f``sV`<@Nsr3&nLfJ!efKE1ww z_xP~gqi+2r{l+QSPA17aguzU~b~)T#N|lU!5Y`uThiu$x1F+_j5k@f|aVAf-(n;q4 zR!GLms?2?b!S$jJGI#{YoQT6V)TOq!U-Sv^tus}e=T z4TisUINkxO7y0FOj{pKx>!gKH3)crjfIsDZ!hP1n{R=66Jpk+CE$bf zZZyBM#iv=pqUg0ZKi z67N{i%T9zB+}^xTZA_q0uQ-1AJH}rrjF)M9&^z8nqCfM+_P)gz3nVa*hL886UjMIv*XM*&HwMhR7HXTyy zp|^AF!u`|;5eSN^c(8^oebFa#7z=5tc4LvRLe1cuEfIaxO9YY_rIb5VtxLFX9+=9E z8tIO7^Mi$s?kG7wjpwyDA0sQ66NV54Jl8Pd@F>)+&Qy`%4@A3?(%FezB;;JuIxln$ zVx37siQ~RvWU6NkW;AM2zvQr;kS0oZIzCHw|4{~-D+b*VNykVi$M)K0l6$XIgF1kR zSja%Q=^PK!&^(0FwBZ0Dk!`lc*lI9Y%M{NoJ!v4UR9K%jl@t&? z8zwH997g^cbZdm0O5nMv6pU#lu1rvx-m5gQj9hCwL@D_QQ7ck}gRn-)1~RC5>}O-V zI^3ITCn_ap;vhA%YU167ZpWIWj3BmO=h5V zVTU^%6t@oKj0C9G71#&x90{hHgwExZN?*c#tr}O~;(v>cIeJ(qJvAtuGs(-vM7F*{ zn}$UZ&ReAhQcIlFdo62{CcE)|tXzk&8?L%qn{gt6hc6P4S=;m<8H*E(8o^Q$`y0$0 z#3G?KZIX)UeYyMaL~}|O<#rZSP=KqTAqvIfkS40*n=lL-SZS)k@6IW~y1I50-nWWsLoebXw)Cvk)H_fguR;NJCInFv zb~G+Tf(egao$)YZ8eNgGz%Ew5R+=Eo7pcAnl#EZc$6NY%pC-aAll$|IfeMEWj9br_ z8o$iz6)w}BCloS~)dpw;*qWWEmx^G2I}g~6Z6X{{hezK#II|1*tjO!SQ4!XA&Kj^o zs$O>Tx_dBnI*6MtHG9wl*C&+Lv=@}ay~%hz<#4vRh~q?~hTj{_twIpeE#2f=I@RzB zIbx?_TyrjUD5hik5*|B2@(j2W6j*{*nIKxSkn;dMD_RUS_GZ*}NXJ$ZbDliLB|KK< z5GzceI{>61D6Vj3v<+0j3X%m6W^uETQ@sLtgn9+_5 zl(9Ji_0T7wIn5J*kys2!y|SFkxrtNtIpB{Q^`?G!+H+{bt-EU zW*i$>PqKOPN0~vFG9;q?+?t)ky| zmkIo}*qev$R4nQalF^?=u;k%Fp3}i|TiM8p#5q}%t5k;OEKW$e$}Z6qMou7w zlS7e$E)QX{qn8YM2HP`JgHQ}e6Q;bWJ=)D3&Q{Z@X9l|x3{!z6 zOkVtyu1Zl)1LD5R|B@>S;Rk}aBk|WHQlZ^IuCK$WSJ^XPxTPl9K2|C@2vf!HCc?Ng zu0Bon1-d(weP{9Hl{=fgB|H%Wk-Y5`V`dciv6JoCVhtNk6@yf^r4%O&>)UAuGWSCv zMS*j_34L@4hnBIWs%$x42!Yy8x|1=&j6z$*%3~f}+rb-rulIG+rGqM&Rk^7>3Ws|d zRT7OUoNYE3hzn+@0&L)!@j`u{jxvvquie?hC8e-l`?U-A7+AaufFT%;)D&gR7=|{G zW;;t~rK~#_dz`I2^?vL0pVPvd`P7!M8S(~9EJeYzaN3}Q2yMA-*w#|TT+EQHY27X7 z5!@;kXmhHZPY|l8mA10?SL&mk(%gb_Ek^V12+!npjX*N^ z+Nlvv;qH!04HXs-(`+aY8f9UjIgx{+c8zerPNBNNh4Oj4D(#wA`93lAZL4?IgwfBl z_Z67v5 z2h^7b9}4lHmcK1y{8cc`L(yqkYJ4{s4A)_;T11qpJcyj2?xvJB^lVwrmXH!*l~<5bei zRh*@nth&Xf753@oTDKe?i8gUvu-2`0ilwZq86*+F(q<4wc=@tHE}A~wKiaMqrJt58 zsdi+iDcoN@$;g547G*rI4+ov;A{#Oq>+MG|Cz%}b(xlOjc==*VgXi- zybh&WqGu+O@b=C}n!W4@sV0dN1C@n_Hcl{MX@K*O@cSGh_vdJ zyQ;9k!fA4NWobda*-%2FzN~csHm>(dm&16>cvkA#9?0@YLM?I^0JDTm2Y=9*#1cB# z8C^^vv>VaE&Wln{S7;G6(=OHfK?Gzhn^4FwpMYSNFbu6E1Uf8Q9l*kq!FHTS1@x}8 z7eO}AjXkW7T}E?~($5`6b2&U|?iYs~%LZslYZ%q7jAS{DTAxPKgtF@s9PfRd3L>mu zCFaF5Kq-ggO*H7gGIc^Ja%45&1T^&$0Cjb(w4Oa3_oh8^5D&4S-DMIa)pmBdV1V(>b3x4tdR$_S)ofTk9V`OH-5ejj*`3 zVNw}#*tmGHcQ;KjverD~N!ZJprKc1%m0}jASxC`Md`{fD&3qv1BP0b`nBj2iaF2Vy z^4hW9px331boDtf~503iGC>4w#T}#Hob?yBCHY|iE!g^*A0EhAPOfuRIsJS zMW9UX#Jb7uVF2p#4kiOwF+9&EtXtXpT3Jy`qH<{q&}!IuNj)@$Q&75d%%q;T>&p29 zxN|UnQ{*;2Bg<9czTJ;-V9$4sbSxLGO=!t-@xG?^LcEPEljZg(a(fWFI~UVv{+HMn z!&t2f_cjLjAh|cbsQPG?Ws{11tD=quLayb@%ARxb6pzidvYRmYgeL3MrJAroWP|DT zt3|;-F%A~d>D8SbGb6}0JRlH|n74Fk;Wke(tQA~7h^J^tSrGaAlarCdcoCUN%YsDq zSqjbBP(Mbj)I4qJlKVg{4xg0d=rM*>V7)_lj>+~O)v#@not+$xIqY!k&>)%w8kuoD zFD;XWq4mMk><45lmdJ}E^8~NGB8B{bEiG0*L@^mCr>S7PhbP^43MV?Y&|}pri{*o3 zJ?WSOb=JlF7@hIJ3dp40)op6CRtjzMo}`#bcXr{KMmAS!&av2}?B0Vs+XXdbt5*~EB1nPotMr`?jaRMtoLdBLATqzUX zIo4@r@+fmS89VAe7q+|}1Oqz zfi(l*B=bg`QdLZcJgy2KgUSraDU@rTgdWui&q+rguu6YgGN|3c2+9selOi6 z(;#71ZL}#kTgPlBfdbu-=`7)-Orv_30kc9i(}tCf>!a^Ys4_p7pq?8>s$x?mzC$`5 zx|UcM9vyf}$ILS?7_gl?AW@vB&Z9PNVPL!208K!$zczEnx6N8Tr0iCNM)SQAaTS)X zoo=UoIFll^A!3>^pqlY@2&@VSB>nek3-_(?W@{v04?+$pqoO^oxLy;z4^Ofz8(8v4 z2l9kM&!R>RNoClS^+6Vhh@PlouhPvuh^3gDvnr9Yq#tbS>ad1>S2r#$M1m70^m?9P z3~4KZGoqsJJ`Y`mPM%971o1cpUBWPk`KD1JPsz2VT-69&TP9^f27}XerUB*SactcB zHI3VYc#Jf9@noaBLkz~x8Dg0Wk%3T}oe;t}m@!N-RNmH*cQY1G>uv96ZPe6qLA#`W zkX(ghY6UwAnP4QPRto#3jX|ViEh1x0vKVI`a(`O+qjQMt41rAoVvA`u$6<7cY~>pk zi{+TGWkDTOxs$!fhxOE{wo1s7&cU=Agd;8obM1`Sg%P&Xgkk7kVXw#V{mCFpv zTrs+Yha#6c2I-vO4?~(TA?ymvemjUwUk28Vi~^KPX8pW+R>4c^m+h$F(>mJOJmQDS-RXrb zdKay%0P=;I)0GjGST4CnJ(gGCvDpZ92D%EWAIlZN zr&vk(O8BTdpe*&tj@Yk@o);wBrCrQSLPSm=ZMw*f8G?uk7e>iKW7#1Ufd~zxZA3i6 z@uec%TcW{jk~V_5mnesED%ILm+cWm=6^1or9bS*k+{p++8K7;LXETJl@u-l?olY%4 zKehd^cjM6%lJeD@@=?OBKa0iU+~qW_0*JNYC?PJ%BS*z@(ZB4_;2nGSgUC69h#I;> z`d4P+#m!*@JzHHAi_LgH^5S-G7;lAmZ4&b|esl#pr)DywYW4ei@^geVXn|}ecSzLc z@SvWY)S0+3K=XEqveOOGNqeFT=y;lN&p=Hiz4ft6dfOQhvJ3Z>4vyk&8Mu<`D{9c0 zVN8fK>nt5&y;8mTcVEn`A}Y)`n@uD4tm;0sEv4{8UVF)Xs8y^&t3ZU-J*!f8(dkB1g%vYAN(23GB`!du2a=EUB5U>J`OjeBmY(G*H>m^3hZ*IKCcJa}1c&>kE8dzG%G0E- zi%MLX)>z!L>A1H^IwUt%YuzJ5b;X)w?s7VX5i+s*b`g;=tcH=6soY6XsWyL^D0?xzKM0=uyeeVb-igcNkfy-JP#&oc}Qb|Y~7F4gEU{THSvVk*#5rG3f!SW{R zvm7?6mo8f_=fF%=Y5gz~x4HKp%!#Z5QrYBsh3C1y3d@pD`e9XZNTz-OoXRW5vqywmP`}%8u0lk`rVe z)0d=#>fc3wnS4F^ge;uSg=_s!`o(S`iJB7ob;fJ6kr3>_f zu3+?(XopwW%gN`jR;I8jW+EZ5s*!Ur`6whTKB7!^s0`(BKXFAh=5j;H37@=#U?+dvH9}QQ>xuktpGTL=DA+#{-e5Z9L(}$!$yt za^-8S6Sml_>zOpv`dV;+w74Wunwq9^qDIf*gq+-g_tx?5Cq$L*B=cgHBP|1!Q=uYrw0?`lyOku)MesjO4V);PJ0=ty*2Bb1oOw?BjqpsGvVFp;9jl~C5mc41jvUWp zEv$1l9VaW_<#Eg`jpF~<^g$HpgVb!CJ(iq*LNG{}fllX9pXM2-^n z{reKRVUTTo_-s-D9%?m#6xFA6JUG;4eLu$};)^T{Tim<4Uh-T+OC}_l>JQX>NJ%hfQ z<<^nHrgJV?K{k&OJU>PibAPrH->Y%rL2PJX+Vvc8UBVMlS;tdp`xNG8%_`=UmBC@=%u$P9r@ft ze4MV6plm_jZmNV#rE_2V$ny6E9^Qm^3 zBk?#qI?kI#>`W?BqERT5(I3PRlkdF8j1rZQ8#Lqgm`8f9sO1rI={rFhk@!S&SCbMR zo~c+cHJE=>|4)lv#TG~tvc}r7jc41)U)gRsh{tGY_zXLh^iy~g8Li;yGhpQ~cKk#4Aeq9iSpX*{ldW@> zRRj)+Wq0vk-7^K|$)93Q+p^syzz$Md4&w;4e~N5AA1h}9$;&X%EsY{61Qj##g*QES zJBH)V>yil)wr9;0zrbPirWf7aU93a7HU#Zi{%l#4I^-kL2Br1$MKFle)R8zt-JyP_ zlQFOu`f8~WUZ{Io)1wq!! zunWKkVO(%3v;z`DR#RFo>~K9%=rwC%n3uZi*?XbD@Q_r%l9%N6b>gvOXr*47VR_2Knnr=JZZU zBROnvbhv}&rUfBR3<%X=Xj}-aSEL+?+wYO)b}$pIjIf?lj7wM{+;dzYI+CS^^8jpq%K-h)*CV3KwDM%2u?v~2x!5}~=bO@!qQFNY9-8Ne_BAhep zWvag7;V|KiRN)!vOfYcRJMJ6ESe4j^g7G+JtkR ziM)3S`&UwRi=J};$t+mVR@H*3!2Fs7uo zFP4fsu2+sZ8Zn*H4QX41qHQdxRN5nI=OB_&(%dac+C-|0!#HMW$GM@jYy(*gTdxK5 zFf!{U*Y2XKlB6X2fo4IbuO8WoY!56D$SIyM9h&e2*C=_aE1V~$gVCxwuu^R==(+=K zEa&)YvL@BhA;~(?elx>=cHy1?Ulh->9H?i3uwicJJp=L-0Jx8tHFm0J$@ZvMxaq<@ zOA?zqhToQ9S9B@lL;=T!0OGdhn8-r1BwfxrElmWvhjRL5Vdtf^UxvpVyB6$#G=r%v-`Uz3l{|NV`o@ zeki05C9;HvwJFY2LF_F|0UZWzhv$7Rq%FijIlBo~d0`*M zqa^i7oy=_KV$~e(XS7M2h_O;Okd0h0zHTrjBBRu7AiCh0T_bYzAu7wIn(FWdKsH&b zmm8|W3S#lw9bJ5K8SR+m;AA7)1k#$^hY{8%p*+;?@Qrqd40LAsl}6Z4eqxa^SBBYD^xD1`jq3_l#$U{l%2nv$)Arb2}S&|%%yBm5a7&lp!X8k&?lyACC2H0cH zI?s+Oq|C!Ke;_KRS%-W-1g$6%UQ0dBiN8-B?qy;T4W<@X|-~A)Er78DI_%tUzdWM}r}$Oh41`AOjP+H5B{#=SGdsD?laK$qw`R9EL#$9I~V_6@ZrY z=*47(7(vV-<7*C0s0WsnEzS|bwmZ=VKoN2s)5M7l1cwdu_Ri(D_DJ5%mzM_9txOwY z>O=g=APd{X%tip__(lMoJeB$)<&aa_4u^5v=DK{G7iMrgBT|TVZy%+WioD2UAt}n> z5?B$X&l1+kr5(h>vZ>DDOOVE;k@WcP(o{(YKnNAwn}Ez zp{dTS&Vyh;&Hba>=^ZyUVP@qnQN$eX4_1R21;Z1(;5mR4rUmp_qcZw9d7bivN-of* zMdl{a5%<$@RcRO`UL|J9n00YDwd5zP!*qNBTVV^;v}L^D?WhykGS|Q@g4JjKjzxIj zaaF+Wga|zOTg|+HO4#&9eWknQ9YdD@h4h9?)de2ytU=+Jzt(Sxt&LJ? zLmdz_ZclQmOZPMGJYZt4WW2qZfJjz4qfLb}rA0Q{juA75XCr381VG`qlF;DC!Z+4% zbeBsor8mAucIY9j_TRl>R)>4Ybt-NcQynC$#0u|6S7IkZ!6xf66n$t7ST0IZJ2L0wdndU(`7161 zW7q^v^`o0E*c6F-aDX|Sv(hC>70f6GOIS|27h+M=cB}ydYP5g}QV{APUZ%>w{5Ym1 zPyQ^4kD)M?l5_kBhes%m%pAZ}BQiz=y%pO*gV&aa+d=w5P^vR6&R-FuaJ+asq{X?^ zRNcv%OW|JV4aXa*OcE`1u4A$@`h29qk!XUtawZbf`PWKayqRWgs$3+X{g<9-)^b=5 zjaRwY#TrHRVcbwdcy)l^&uZP=*&6_W;@4vi%eEa3?ReXzg z56fHoB>wMebGQ9+^AaK5V|l|9rT9N8gbCb^GScJ^vd& zWqBq;^S`uUj`|Ll1iz5|UN7N17O>st$TLOsO3X{pfoksXqAP0x7}m+#BbomoZgJ9$ z^~uv$wgi+utXp(ieg=e8aloWryP&F)m5U~Q4s>!kBugZ*`WcG3gqKU1kQi2C6Ao=D zjNB4#q6NpJ9B(J{2U(2fQ+`g9oJpr-J->MFJ%i$1em3Ezuz!`v;2ko@^MuoV>4yGu^*0fEEykPRlImP*x^O=+^eGoNI=t9{`2W{(_a=umDG zcb{*sv@Q&0w9pkSAmG{mAe)I-7XuJxQwuFDjacd?DU{3Z*unkI;==eLmIU285mQ7_-R&H z)mgZ7$K!el&f$#(9(&An55vx}2ePG^hHh8@$9XeW`gYOBQM%H{nNY#(@d5tobJ1fj z9%U@zN|@LP_|!H@qvs|p$`9+k=|P|@01KunfKZB?_Uf$KKu5bga&Jij4$b4!GUHw=Vclsx z>q-nl2JVVOk)m1<`f;N%XFSM)`LmPR!z!x7&J!dlZ1rIHC7je5dJl8?GI0dxXbg(5 z&}ebi9Uw+~N*0_m@@4NZXm~zlip8MXlf_{1gpmnXqJ+r^PX z-}(W&C&{ba-%s7DCexhGntbRYJur6WJeytzp(Dc%Bh zFlDe3m?oH3JG+_&Aw zSV)KjHSR&XOIRa>(zq_mDpe>Ood6R2`|cx?&YX;imapoJ8B$)XzH=r6HkHyTtwu_2 z9gc@rue4GGb!m*p9OyPsqkh14Y^Q%`&K^i3V~06=4o@V@^m1l7PDs$e zaS)GWxf*n7C_7Ef*i!2F*iwU#1~wM)^G z!!)r5^Ko^WSp86{7jqYbTT>-pl$1RCj)jk-LuK>x{rHfG_?K*hh_GuQ5r<&bMs-{A z079)Zp=qXIolzMF99=d=rf zBrbHxJ#5>eLly98ZURdXnTf=;U#oC^5YJGtA)6V|IVFs3&|de{mT6+?hZ~56PyGz7 zCr4GZJaclHZH#RbdAzd8WU-hq0e$nV3->Y=P(e$aA%0f;k!z)aj(J& zPyQ4MLz~s5=lMekPik;eyt6Hku0;Q?t`1-0d`+aLA&D4m2he z#)m=XJ?=wFCxtG`;d{1xobk9JS!{J+(mw3lAz9si!kn3#czVdK6l>;c3Hs$TW5m~@ zxD0EUTf$LT4)QWMEHVr@Rmv8?X$#O--e$V7w2H4Ym}RK2Dn*^H4Nsj55QCp&6At~^ zp^92U!Z~E5JUHmq1{w|#;35831utqelL2Ql)Vpv;j6@^oi%##-StvT zW+WlIRXjqP+|m0WY2xO*ch&XM=RDzq#G*P=C6DKWF@xIf5;h1P>D*g&DJYy~rGIHE zAMJ#)rDWKNU>01!j4TVQ*s=#xmnErGL=UM}46qBE4qXAszK}6Cg$f0_u_HGM*pou2 z;_BY$1Zvz_si;V>&*QqK#}R-_TB3hCGMjEA)P*jR~EuTln4Nij=k&rP1bC1nFK&_R2e)7jyc(;f#vn}to3oB^Z zZ?c{@1W#lX>truE^+I-$h)}h#Q(+3IM zBLOeS!LKNrc8Q}}2SJ-RY%NN>Dceq3yRdfcg6Aqai!C#4Ncgs4n?M$pK^7jYUJ}PT z*?}{kF)NT{EcSz%668pq*rS7JK@*N?oa?R_PI)90Fk5_kjXa13wnVZd)YO6=;6rYg zgFuMWUQJf?J*c<~%ZU_cX|rgadRn2!`7#Sgq8y1DDK7P+w5uRZ2U1lCcKS>Wo_HNk zk*qn4!gf8XiAx85no8TDPIHdUO^wXkjgp?pRc(*>s!shd1cjq$UEAR6!ZCk@-XM-4 zkw#OKPix_1+y7(X%6Kfp%%p*r|NWFuZREK*j&m?}KcpkzE3CfNXo><8k0zI2h1G*dWha#vd zS2@V z-1O8ZaX^vye|;BMvw)If2(Sd*ao4=HG(LAOl^Cg5Gm{Yz*fmZ59X>s1j^MCXH$70R z3hN<1D13L}iW!eVw~5*>Svr=QK)o&U>1hNdBuj#rb<6z#mImcfko4oHZdf^-VJJh= z>KTMD>AZ#D!boygmWI~gq457~eH1TK>!ahi*SjngNr=>%@3I^odCh4mSzOuz?IZ)S zjOy&xSuH>+NfLUAk3HFDa@{QLK=M_MonhSCVG2g!un~Q}DoZh{B&k(o$)AQ7VpxdK z6ph=?fs4{r|I^^w!kPPUrJqAd?;z!GU$|qIK7@p%25c)~D|G^H3iDcel1O6#hWzpb=UbpU1*_c}aN_GP67 z{G^z>uWd`g0ts|}fM>XpoyaV#BC;Ju&qok`IG)2|WV_U1L`n6@Na^{|YDXSJbAy7Q zyxE{uB|XCW3SFwE6{+!ITwsK`m zM!NVh1k@0n3Ey)#UXaia$C=IUv;rP~gYCyMARHT@5A?V_0ob1W#K@&y{O~i;M2*+U z64vd6sPS!L6HEnq5Rem+Gy}AT+S{=c=yHZ0rd*%7&B+#d_A+sAD&e?DhAz@_RR*RA z2-k~c1)*t}phYErythu$Ar-lhm;gHv?i}u0Nv1j!y*F&{rDS)01Wn2d!UQgH_a?6} z8|&h_R>>J-Ua)W&C%g6ep{u;m2V0i?h!qWr+)3iPui$`oR zH4Ikop%VCJ`5st&uIl0Ha?&52!Aa+YtY{txlZC;u+kMNO74}lMmHfA5x;p{?4kd@e z5a8Qt71#;{_Giugq+O~mmr9m8eX`VrNAYq|+I?OIHW^y#riB(6~}CSCXz#MNCrjI z%WMfxiZLi4Sjo1KDbHxOo7Oyi$8?`*o;8TUP~x$#0hUF;7$R8Q6lKm~Wkv#UF&hM6 zJc0I$DrdUrm~^3(bPMKcbaGqUB0YffqgWfDY=_xVwdAl8{axnI7Bqq9-dz}ALfW}} zMtgL9F8IlW8W<|Tby$MKNDQ(!5#(ul+(i?JBZn1?@aU2-I?|ArGVZcI!_tOCnhl~- z=~;$ZF&&PtnC>JnbaK?1R}~!Yt)y+-mY#Dg!c2m+Q& zp{M;Ua1SJZwrbGPI?{80j z^Y!NLt1X_H6NI)}k;LBIo?PVLc^GRnTQ&$JnC!?IQh3C1in2yMn)uQMBxaiBz@1kY zWVu@REKoPIrb#jay&7B>#(8g_R&tImSY#k>J>5Gx_`t|0V;8A7n5C8Jt4F-^5{U#` z)n;m{&f$2JDcju(yz}ttq(|x8pw}S4lZ=my7l&?k-rlBOhqUvsssUm^^wgX_JQ1w3 zta5->A{j7hlMZ|h?dM&mg%dm#Rg>9}K01_fy*3p=XvAE5p2O1hI$Wv@5S?e(tr&-F zMjOvfnk+1yH@ceXZltds??%sbLYZV==XR|*94~{x_K?dgt1WGGgGhr;rlO4MK{xoG zT9r$p$_qcuRq6C$0O<$_kmlF%5|Wckh{I@((5Y02Vvr`7cOpXkp+2P+N@l}{m~e}w zQ>6P0P@ly+TKZL*1oI2lrUObRu=}(Lmcz!kYRpy96(lR*DPjFUB37J^X(yJqW@kMx zdweahItkM$89~E`X%be#6MJ8hWwh2QHhoimxL}*`(%qOzsjo?D59m>*(fM3TeF>)x z**(g5HL`)8Cp-W?%8{4^iu80X>P(fIOU^2U^wlGNnkErmVV0xR;lA;PUNyD=s{@Vf zrMEEh=3>bpGvhw~&Wc|QP3ukb16_sg`yQSb(4cQN2P|RLYn&8g(?y6bp2N1)s0)hq zJ#A3tSvB8kM6kfD0ErXR<}8IrQ>^4s#_&ELD9IAS4NJr*NlGyuHJGcxV^eMM5|Qg3 z=N!a^O&Wm*23C?2q#uWsZ0$NBX>65=83PwA>bNx5oVSKmA)T;`WUidnJ?q}v;hZPQ z<~Hs&H79;?xaWmQv;hF_d9gOG!W^<&D^{w^81%@rTRLJ>>o?e=_nS)#K@3#vF5Khd z4Nb9f5p0Ux;doHS#Sb)mQ6ZFf_A#vAIyv31EKJobyR9n@>M&5)+ZIaxfbhlyKZ_ zO*aF@FPN+yk0;JujkQDW>J|6jJo+#qVf)k1zUYib;~F=NF4!N?HY@vJaymK|*Wno^ zy6}Dr7sP^T+lStdH(32-3~)swSP>b*{9?wXX8be|Y75Y`5X@ z2O!Igj@QA<669JL$4Nsg`_9Pu0^xA)eVZ`SrAc8~{z+Lo65YL*Y+tW^H3;J?j&>Yd zfgEu$LtDzFTtH3TBsn}ILUPqBJr{tkcp%nN(nR1>!51uspD_5s@j;+kr=P6%H@WHR z@CYc27d({Ok0wDzK_{u@f+X~w3BgnX$O}8X^x;=IG9N_GI=j!vWYz)`>_rvsfz(J~ zFl-;d3Y0pSnnsm|vV!sGcomS?ey!tG0R~p&uhTLkl}DH*>}su2c%rFQgae>}bVfnG zcw{j~C;16KSNB7~ZD@h3qbF`RVbf2xi>7Wo;0frU0Fo9+HP!~|DWJrKCY86E9R|z} zvqTR2_-at`FTn?^K^+RPik)_5ZrUYmhQ(r~i?tv{yu?87adlm|H*7Z8 zJ7J+8LzcSMi{KkIfw=Jl735ZV&s1b7s@_Ez=E1rS{N%*asc1phg8>D%gwYoL6|@cb zMU{!&x-BSKpI1gvD<&>?uQ&ylCG|EhC3TLIgx7I4V=~GHT@R8^qokq_aN22Lu)A8qfT;@fnhD&S){J^HLWgES zdJ8lNwNeu|Rbweoxd@2P3uxP*JbGHebO&1xT51~uT!xbOQILvH9AfrOmW zJr`npPJAH4+l9+e}U6 zav1%?M7K(q?jN%!9tnn4uF)$^Q&-TTSg^qakq`1{A^B7vC6^3D1I){(Y@!kRWF#!& zXp%aD@gjAhI{m#dhKp1!lDbC9ukPb(j>XfQBm8xzc#qqvgY4+@KkFo-otsblBgZRap90s^?*Gn?L5G zl&vNNMJ)lWD!>9EdCGX+4XM5#if zN32-{$x6u(Iti6?k~m}1RlE<5b}?pUK9eN9_26s1gi#WrpWev=qf4YC7T0K9`jRy=6+#v}xPd_$VQApSEKV^MSnIxl z+omE%=ce1XMTAV#muW7tga@>bJk#6@7mjXD5igY?8Hts?;J;1iJx4jb3@D6s-vvQ@TBLZ_i_nE(|8)*CMt*#b4Ktkv;#`a8i8nV=_g>P`-4 zZ#~)VIf8M31(yC1n3r_@GwhXLt&Y-Ux6v(g>|g z;|XF6lPbLm#aO-er|0oGtO6V#L{gH+2r3z5;B`4^o^e=1+kui~2A+;5$HwVsSKR9r zAS#{d;<|Jq_uSJRdV-jhrfxJ$JZyT1n)JaVdx3Q0^2h-MJ}{kJKbxm z>cjavOP+3!98F-3O4npP)L6Kvq-8@lx2B#TpeatzSW`@B!?j;1I}oM<6JdXL*^ctM z9bW1jRueE014f)<1$t?GuO-YV2P3^JosqFrTcy-}FrATPErrykIrg}O2XH?=8Co8J zou^{85k_#Xu?#d4lpy~r3xp5qNoy6thxdhQA3Ka^oA5xRhvL~0FHB-~BWG>6JOyU_ zxvi#+>}U%ECM}XILTK~DM8AT;q&p;&t^BNosx-DH9FXf8W#@54hwx7K7<>qPWrIBqztmDD_j1YYZDi&LO2a><1f( znkWsb_`h1+`Zl?wGM7i9TDJ~jeR;pKo%`fZv9K?YlsOd~DdAoy2bYp9w6ehV%ILK= zNM-_Q7>juC@IkpZV&j}dFH({i5UbJSVd}8y;90V&ot*U4{fYnUlYZ|7%}Zk%CUsTT zyYhCbs(vpLP&r2w*(kxilKs5A3di(V^)@!5Bngh!e5eP#!N-$0Fa*;ve@#z1gR2-x zQy5)HxNnoVnDdUYP2Oy>=QLKhB@lFdfDEl3jIqVRPL#g8nUI23xiuXGww?&B2_C&e z%qoXRNulIO%=i#1ibshaGDoF#DS(Zr2v%5mw!=b?$>XZD_^Gd!PJ=@r9RZuiW6yxX z*g5XS07_FZu1j{^1Q+__!v&tcS}@)X=%sF4cy#Gw>&PlI`eZJwz+q)u!9KZAZi6yZ zuf?l9%}cST@aDs~@sz(jJm-;K$NkP{E>n2`Xrxnin5 zJt0V>6@KxHG*u4Aht`^1b_jnwwlb_{>*sCAJsx---E$nb3U&1p+ zEYI3vU9GsBbDhGv(yUKxtYqI<)j@#rlRP%?XJ;9gMv`*0Xs*L}8MGNQ=+eE=fs6&? z?JjCIkBb=*f%-sNR{BwOnIF?6m623>WhFPA+ho+zBx|n2qY!0W8@~8M6dGW_8Qw^k zrdmKjD4iydGv5nPA7*^fa@%n>V~4?A7asMd!qSM;ba$Ir(KL-nAKy*>xSU4Ysay}p zdevLZ0dUF39M7(~wFlqA40%-K=FywP)nVBgCg<)lZC5-Tg*3&EQE4ZFQk80NmRolS z>64a+=u|g_Fo~r5CrbEaH(R zYYPK)1~xrL8IQnKx!qwrNJxw)nb^>Mro4m!pL3&xAS+I+^O6O!m2Ov~^mMs`&lrKF zx4`WZkd)WipGXM-|I!DSazmdc1EeIvjd$ub=5VjqnEEuVN}!oR48enJ8#U;zM$b79 zHt=EYQF7`~0909VIHdmZ#PEf~sDaa>`pjY%svUzMk)ziaZ`vBox1U`cU#5cGaO8sA zE=lvsU5dmU)qch3`sk}?ULZ( za1RDbK3RrkIwX~7kbFpc#3`_uf*?cBUoAKIAOe2Opc%|lLz3y6_n>lE4hSTaiWlrg ziMA*CX{D31C!JYRJv1@h{fSt{WTRwA8ntH`*I^}leyl8AQmx7m+w_iANd78bYa@8Q zvEg3DL_d@ozi7wJwSJbc8PGrmSrB|3I4DCK7&M-d9h+B)_n{s(7BlWz7EP`qS4<_I zvkVy|EYq$^E{A&#AyTtHIfftFLyhjkB|5yjTo1wu_)r5pa9x1I=+ zF@FIrK5BO!evYfCq(f<+8@OnNqlQ6!7uKrx(e#DZ8SgCi2tjIMhyB++H#aO=rWF{F zoMLL_5rS$vT4J(5K<}wZjn83&RIvFF2cgx4d*~KgVgjI`nO;VS21^=!wZvO>gRUA( z9mqrUsMV4ITzc{lIh^3O-H}yDJydp@c+t3qXKW0~yK5Kkp#oVP0bFiPZgnt;YSQ0E z6SQ5!QVh!hNpEo0UoJF;=YT=^xIR%0p2A)PT@OneqX~6psOto0YF{pCnF$ioRA00b z&P_OR0ifs66{AN%>JJ*#o*S4S7@-M!1cLSDLff~{yw{}uX}Uo8r&O(n>5okivu&9` zYSXDvkf#Z|zTgmrb0%3CSSc9gJA$>0PBqS!iwEaF!@aZ0ZuoG@QkBRWQ$3j`JR$t> zP%$PYjK#wh->0vNqwyD(v5}g!OV&TiNgX9kf~W5i#n0i9_6aHc38P9Bnd=>-t~uO6~^+d*F5KLuM@Pq-u*}inAx4K`Gnn3X;h6|O zUjT|2pRt&8en|HM2{=H^h@LquGW%@oQu4^*UIQz+AC=ynWR#)u6O^I!Y0yy<4%bg7A^2q>eZ9E3`ox5V#@d%~5P@wtdk z*U%40UWhpDn?gTwc(m%o70KJp2S+PX16n{fD^de@xpgqy%BBOEd)fJ^!aBdiL#PG_ zYihp`S&L=D&n#g-@=F*S=M|ZENC>)V`Rc&DVQ7TqsZDh5YvL$;aI||#YBs;5K*X&J8G(* z;rG#n=+sbLA*C4UaoP{Dyq*y{1Q&Hgyh6+wS+r~Mp-%5+E_*qfdVsx+Oami6JmGU!dBnguw!^XB(yI{ zQUWIWb|oA}k8qao2+ZyTrA6bHoYYSRqfBH_if`yC4(t2^{eI?!V`Z@L+>)J6=``D`PHl9Su;Fds);MHOo?S07p?Xi}?) z2Y{kO%U5=fS7yuV!~3w0|lncd|hF{Xx$|FHWr!GbQK6kdQM4x*GNiPe9x3?!ZTJg;Pz(VgJrD{ z9ooL1U>{IpV#-+hej^vl9UT{|E+uR-E6}rXNgeLB1n_jSTyQE;5W23jvGLgB&!vP{ zU>1w;mABp-MiW`nn8CIn=dO)cdDapa!iAA(H%up)p_-1p6wN2eAbeh;Tdrje9J&?(< zDKQY_j02!1NG8XkmJ+m*%NtYU8(U@Xdl-dU)<4LuO-fG1`%2i3?SrC(#j1k>%iFq{ zic(m_tr4Us#&Wd1Ho_`7u>;s>#5QJbr6r$#-s?{AL^5AUdRsJT%|trYu8~eXjDxXA<{=`!HRI{R zu`K9l8n7IUf!cj2SQK71sCZUqFkWl;7~}q@UyHnDK@N&}UASM11|^&nOK|{!j-@c7 zu~|oG3iYOKT$IJ&E}V^Ss&ZBSF=K8b9XbsdjhXh5a#$gVR@SXgwSYQfRjn9}7<7+D z^v*i&d@5@e7pJO=wCu4}bCH?ttxB_ekMdo*Z?}|iz>(l^`WB;6YaNZJZD?(8J?Z>A z>>KEpi`^V7cNnRt#M6AGV9e7W9ND!|rG|l;*`8B4sgj8P;NV`ZciysSkZ80;Att0m zSj!OmtnaWKzJOI-c{CJk)Yn@|mLx?|rcF~><}GCqrni(LgoG?ZLPaw%X)Lpplw~Sp z86ql`$}(B9o2l$3rpP)N>x?nRm|+&PetrLa-@Sj_bI)_`ea?B#xzD}#k9&Vd&h zxY%WUBOUy#DBD4f=DC1$HN9C%r?j*e^WUf6jGSes99nwYem1#=fG@ATiFp17u>~EI zXUg56tsDz~{qb#>K83n=x;8J3{%#d&rmJ3-m-saPojq*HGlrnaMHP==qYoWSu_nSF+rB*wUsA2t{# z)2saG*~--%Rlie!2izs&y{+>zjWwA-Hk7*1#m+u?Ga9Q<_E4w#%+7!K;S&91#>va`avE9*W*oBnm% zY*fhy`BBSKJC=>g?B~wU__5@hGLOvFgk4Vpa-+xVK6M0LMr>~0VvkV5ANm?zX^P(I z1aPpJoZlR4e2Sc_=Jca`*J*I{KbO@J_TR<(-L9(QT}1@ zj#rAg5!gw~ym7ZiD}V&VYBO>6*{Va94}nLy{Syr9J78(Uh0Zy-M-tVH*|TC!x;Pls+at;V1r2nyT7v)8A{*4L?`JU+lqt zw==YI(;7#6svjINcE=Bc4R;xL|GD;JOVeukMf;}pADE|}#gpHCUOXzUJE#2Y;06E( zK;pUf&cEvVqd`C5yM^*3&A%qgfCmENPiwsP;%{mBb zZ8u_1VIM#H>?SjTH5~uLO@GMi$r1{+&`5g zS?v5CsoHlFV_&r&`KtV9;5Vn7eBhr;W%XNx%fQ8H-NHRH^T&>+p}}37dN%3B<}E+K z8b^5E+Q0PnE%~6saZ`cSSOzCUGvfCp!3O5G(dSPH?dhhjm!9(N7Z!K_SooFzK1iY0 z(=-w#T9>CEdBZZSLiqa1WoquFr@St;c(eR%fr`G^6t!7s%KGeN#=1m}#j7x0jn~$q zPfkv0=f*&Ok>!_Kd?@QNNnzb%XP4c5_kM`|8*o21X0*M5vT&DPhQ8JPSaAYzTIq^j zC2Z1Ozjxr)bKl+Zp@R`GcE?xz{^Q+|d`>-m17s;v^7nn zlA2Hz^^yfGC|!p0RESLFE4;|KHBvc&Gk*q5}x?D{5GnE>qvl(LB{-OD8Bfdbd<_&p+~N4#nRg##5nFanfHiO4w4H* zOXQtYTi*`C707c15j)(37RpFdyd}Xndte|@nu46G@S6-O!M5;wTFf^R6^t3lQ}{m2 zhP*r+odcb)Eg+4^Ujvuv0(uE6v4cDSA;oy{XOP^Awpw&O+qMa?+Rg#Mv^L#Ctj!I8 zxr*iy6h8=pm9JhU$#`m4iHg`f`LHhZ&1=kJp;Ou?faL53g17=wbGibv>T>l<89-5` zvfoEo<(bC4-ktYBaO7(8C6=QvJh;;)VWIjc9Z$7S#*BAZGx&Yx{#A(<+-ACnglsD3mHU z9!JA^+hgO9$T(fa1a6f8;{B7Xh!)ZXQ<@xuayWe2FH8F!OQMLt#u!Fj^%ZH*BzG*LA)oO=>2 z7YW$(B^yQe>Q~T`xh_?@tAxOV5bK*I`otKbFTfnB37tGAP%X9dEnfZ}rgb2d!4150 z)^i9vB!n{3E`TIG^l>!5vzGujoe6aUNyYT>nX`x?CC$y9u=nxmR4WS|==4>f}QSP%6Mw5rFRr zX`HQn1Ufi@gZe_6-ekon_-rug7SVygX0ga7${20V)X30^n@KuLf^BRTQ3brJJQ}7k zM$#xwELVu+Q!Hv8Eh{S*>O97z_E0GRb7q-rrY$DT%ImA8M|Y8^p7|AEG!Rrlu`#c9=W74M)?Yti`o zeVnBg86H0aC@-IO)girYQsFjEUsdqwQ`2Em2We>R2o_aUUw;w&tt4QXPxHg>KoI{v zMssv4Z6~Sxm5%5wair<{*XU;?En^JAc!x;$_8GQ_ua=Av~wh23?T~dj3NgPiTun!HI zlBi7*4SyX)tSrr8uUKv_YGg?yRxhFVt?8WQOp&|4QXy19qC!cjsoM&35CI7u$D7Hu z6$cyQ076Nyw&%v{QV>^vx-vP;q+O!v)vieyO^}}U$4W^~qloITQujYEg^x|C9;U#Ov93KOV?{l7r0#RI-G07Gkb7C{8$P|cCBB!tDe=XW#E%|R zh?zWNe+;qbp9lG%drBI~?(f^MU-}-W0{FRh9hTbzlNc>#-poTpqdO${`yzD@3tX7P zHG<7T0S8@%yI;l{Nj9a2jVA=0B@I4<4K-|Te+g(943@SwUBv$gFEjds7~@p~-0bRc0w%eBA_jFA+X#8TpDxFPIUoabn!n<>;mTYglt?2cx3xnZCSDF*nb2Kx z>8tty)}cj>SyCHGH2;oSbCapMx-vILM31=S?&p^Gw$B==1^y{3j>z&_UHw~q+e{(D za-vsKIO+Y$npln~y+0lAN8E&rHQm3mkwcSm`WUO0?N4f#8E0cPu`%57RGS=_vop^% zg10c62+&~`FB!mXX8_WDM&;4~TeW_QlXJ_e$v|=rkt6ACU0u(Je)@}?E=3y9Fzp?| zst-C9MWRLHHb7zi2c`tcJ}>Q&Vcr2gX%2|)Fk@zh@Q*;4lY7u0%j(^g$KH|g)ESE> zcAKLZcb`5sv?%Ccf<9>H65m_lxqsW8hn)F6s~r*2=MWI5$Og}y%F3vhJ&&}VDVfoJ zNes5w#Pu)+1}aKI7%?l>4G47164P?Y;%|8w^PLnVZC@xQ$Om~3?iY$hA=wCsVhqXL zTqu297-5T)B>KEMQ>`J0Q18!vgU5J+J}n%J3f)CaUa$cd`@{xAR#E!cwsO*cK!3#m zcM(T>SPZZk;n8cr_(a9v?B`m6Y!fvVGJ16nEQaAM^g^L;aTQbCb-*illyNggOHqlt z)qpfTk!mb&CHazaWgA4IyNt#|{+;YkRFSlr+{RFA#l!Sl&4h{wuvg2 z#Y!{;i=;YP50y+=w(Bgb&+A~3qZ8vNK02!4i_e6cxF9wORLw zrIB!1ZJ|Q8`MPa{J|#4KdLMA>*8bJ$5@{S5?*KDpuQ9Wt^6D0Qr&h;cGft| z9YB5pMboOYgd-1@UiJb=zc0Bk80S~aZk!QaFsya8er3VD>cwHQutaIdQjyfj&69zl7PScm^=+dS6ZaKcN^{W)86f_y%+ z8MTG}V`qhp4Xst<86vTNGU%c)MfP}yCs%;SjQbC={pE8Y7x|ocNj^2#wI(QGf5FgKO{qs%T_gx`1VcoBQ|gs*SzM~y;X9et3dP(O94n6mpU7`QQy^T5w5rh8 zUpCF}VdaTxF(jF}{L`v#4KQz21E>jw8#r&vR);*0QW6QZKy1&)JQkUBef8Lr=?)Qn zHjkA^m@yj8->b7YD_EEv9~YcZw39O^TzhBR#wH5rkZf@EtLXh|2M;Hn<>MfR%hyuS zf05Ov@j*5Bx49&`_>(3fGI1Nozd)*bY-eF>xW-n`L$e^!xRlIyLY?D43tE3zHP1>h zFfw)~D2YvC1QX}E$;WJ7lP^7t)O zFo>s`su08RLs&?Cv^HcIi;-Q|cFCc3c-2dVyzBmL46Y#jh_H7eyq81C9>Uh3h9Qgk z3Qk=n;rhM;Pe+LX_7~vPS}01yB;AD)t#UDGYx;LJo4Q>j!c=(er<nUBk7(Em5{1BGptO(x?+ZIMMcP9S z_qE&nU-CC5X&HvTdp8G@3_!<~6+HkP>jzOjLiVBaBKg-C!fwfE~ zHdzZ9lP-j(sZkH=tx~hv2L_mP0|RYY&5?}rIe>zU&KDUQ46Tp*IzPI1_cO?x>P~7- zD>mQ{&7E&0l$U~5bMv-)~lOZ?YgG@zhu?P#E!F;FCwt@2SfD z&oam*8AD*b_~gWS6H-}K290KW**pq8@NO@62Pz6F5y9ftjw=Mx);^VbiEVQ zg|^BP0`)Ya3u%z%?jf+x&mTPCT?H&Iy#+}Qbv016`6AA(rfh}&qOuQ7C~@aHG^ep0 zGy9vD@dGm-gs=8hG7ZNH+n@mKiRPuoJ53F+=}-yXrnwkLZFF=1Kxw~HryPBW%2<2( zMpfwe#2l7UC?`!r3^-8Uj3jL3d~Z1iD-n#hNJ!TgKUm`>8B*{39@T&~gtBzy&Po$J z&&2m);JDxlJpMauaPC&!s&4UM*WQzd-`k0%YMlmp8Z3|%)!D^(U50K&<#WZ3$?kls z#UQ^=PQMt`ueHyRXbmZxny84x^5_Bq9C!s?1B@g3EwiX_1T_p{fS?S4tB=|0y;!ai z=>AMG|AYzXX_uH_Z(!vsI9dly#0N)s>^oRkH9f1#uC9Yo}`eM@ZGSwGWe5nSE7tB(2on@a`$ ze)CS2;p@95-5;#^IM>0~i=BbidUKD?Mu2`XvpScnL5FR1N^JY=J(9|*Jd6AnGiWa* zCn4E@PQO%MDil7zc69;tUmv;G{+m)}VBuVWTzdimXG_$#jP8?8qgN*Xpb4|(lROxx| zM#W38JXTasmnlMf)I9Zfg!x#ETg72M6MDhL`hjE2_uF;#f5D6}c0ITdhB~oQyx)C+NXk%wX~m%Dr85G8pN6Vy{V9JD`DrVs*D?sa)TU*)@Xt~BHegU_`*TJfws#G< z)?j7i&dw-*Pm$&LR7&Zp`Z4~mN=G|@Q}#P**+;oeZ%odrocNW8PRU+4+VkpZn=G)> zreiw9RL1dj@V}jXd!3WpmMgDpqH_-<1egt#ADe>Loe06aD1W6j^6=`jb%yy5UneN* zoVnob00?e|8#`Zez16x6>(&e<^w!+ZeDd?CN8t=M*6CfJvSW)gV%^o&!?|k1@8N~YhnW9n;WXvZ!z!-& zNl&Wg58#imbCu2ejfdZ!w*B7Ye$D>8urE>2hkLtOzSsEzXs=H9vAP3gW`}pcbq3Nd zPRy#^sd(J!)q)=gjV!g{6qlY&slN8YZ}Ih?=;fO+SE5+^B9>=9Lf+E$?Y92>+Kaqp zKg_mdINk(!$JZ<;w`o&U5duQUCNIh*o^mLg1Pq>j>*tBNjEeBD|e zvP+$D^5eX=UC+rI4mvMe^K0U+h4fV0%mqe`UDwB%TfX_4aRMGoGdqYS-}!p9%`nWX zoA8afE!WNS>$o>0u3zO~fwdORxhP5Hynf-83|!PXN7 z+L5hxHO_0>_uA(h9ATXNg*AK>cvP>b-Xjt&i|qIOrG0%-UO$fVW8j{rBJHE1lhBIg zFB}nJqKUO@c*}d)@|H2l(PypIo6T68 zh)|Y$4ApY9k;GZ{{Kbj%H3^CD;B!&YRDIwD{Qfzfi3zvRJ~t+_zA-7+=|ArIe{dIP z!n0dHp7CBds1|iI!Ok^C;(GpBI%@l?-l@YuL3>Vj{k*&SLfxw@i{d@$K8DP!C#J+2 z*+VBSTQ(-{9OL`!`^`soM(6o7DEhzj8BM;2UI8_7K6(w=Z+GBy`y~baisV?C6UklgfU|)j7R;EixGLtj#%8$O>qkZC~4hg$4x4j)9TbiBT7mk z;mS&v9oPM{U1__Lii(O-NVt#=l}o! delta 5154 zcmV+-6y59Avjfg34p2)21QY-W2nYa!ZHG{i4ljT0U3+ufHj@7;sM)%TbT=Fmd{Y&t zc4NhRzKRo<kFY4S*cZXhx#PQj)tHmo1TaG+vGFU!xoJ zr^$O+)md3g&L%&VjZDh&-LG^qos?C!cp)Zdf$K(g5KJcpTS_SOy3J*M^+U193VHRz zN49@l!8?@CS+Qu@0*lm~bB)RVKfp9al5)5$wp38#}qo5AZOO;VZI%(p0W zZOe11Z^hCJEbh8#!g=BZUJTXiHfe5F*rsV~-Rc{i&f2Dg#8p{mjcT6G?7j@-XUkPt zH3Sl7f;B9uGXAcfPkat>`huV5`cugCgpH@DP>IcZpfBTus;tFI+a z3G}$l8(4==6Ej`c%`%4!>^QuAE3)^5=d7-u&L^w@T5U6*eD}Y2Zsx_ePhe2r!5Dv8 zwP=^JP{aQWuR=)ik4!9Nmr}`P`F@c0_EJwwEt?lbTB;N%CabEfE?SuI>Sl6Qw0UmQ zx|a`S^)AiJ4~7J+L)E;=aMMsf3Y;K_W8m>faqTkO&IGy)TA5(FzUna`kSvi|FUb)gTFVNsM>dW@a8$r|k11clOd`Qh0{ zs_F1`CZx%DR*IeJuhmZNbF%u!lE0JUMS+s`<*-ur>ZZ9aisyY$<#z`bOO+!ogBCtG92#X1qSx%J3JV_nL zx82F-=_pYidA%r)SwIsR1(t88spSQUw338zi@G+qeGxjY6C5VWolQ|*CjXMWAwPl! zYgaY-G%pB&{ls$eUjNI)MN?Mt#d%GhlD}0IyE&n@O{WBYI;T5|zm|Usw6yj4qS&iQ{o1`=eYPD!8=d|K{zjvq)35Q&1bM_Sf0x~`)_%2A%8o;h6y>@ z>aLj<(`Y<%fp#>D<0*gfW=MZNacsx7Y{#-GvF$VUcV^o*3}t!`N!{^SQKP}k!(Wbi z8&VxXNcGKDHM^hGKA1|Re%kw}kS9+_Q5IJqQ=1I*5~<&{Ta*v$9uj_;)%8lkvIwGC zZf~ERo&|`W!B|b8o<>EQZjydcC}u*{D4(ES0kyPT-ATXWshNM%cb(o^^*d+Q={OKL zQc%;(7XUv%i;xWgM~eb77-tL_4-_Co?yA%O;Lo=pufFGPB_ z8!`4k5rpDfJ?Uq#J{6#;X}?ZqBr6&+o6Sf-mXHJjTIQm@d|+((sk5v^3KVhJfTdBL z-Pv)}n2@z(gSn~|2A?2`#EuSW>oFSox+siwg1R42r9yvIP#Ivwvd0sjKBKNT^}9J) zQ-HaV3&2c65(b*=qw%i&n(9NwU(*p~t6}ZvUi7==D~RA0`383t zC%B{TA%M1wvISlU;M@#&#aJ+(AH>Q*d&P0Wy;e>X?XhwqkA_Dp=V;|TvXv7?)Juey zSUh32pS#A_17O-+`0X@%3DR(`9LO$mQ z9$2}XR*qr!6Or!HtIkNZ+HDg(sc+w|!Omo=5Uc^P3i>PLuYmsmg`#(+A-6a|5^TX>SM08&`7^+KsGp8_#W6Pb2Dz z%|?Hf<#f9mEG4XMh4`yik!-Oz*HZjay7g6qm z0MW?voV`Fa4t4`kN+X{hf#?y4KK5}t4_JR3iV%RP4~UV2h)-hSS+V1|vG2N^i*PIu zjqM(Y1|km9$PTO^magT6eriRLBdjpsHj|DJ!sdqoQEwB7{@YT_i@%WX+bkDEd4+f2 z(R=)ngKd4-R=4+U?*R1e#`d<4Tl%=AA9+hlSMVT8LW|jMV0keGBhc|&3*Hi*f@Xj5 z9XfhT(|$|)HWwm|QY#iloFc6lLK zUKWznnM$lc$Y^ht2_lfM2jrhBA#{H-pXkIZm3sASA8vqW)F1&)wvW(Hr{sGWt*B1` zcB3mLj`6g6yF1DchY#3`8Xs_9NZddKr@huat5=d|4Bcx&$;7qq5T;|3A~V}v5)Fm0 zL_Y)uxC+J16(Hv;wHmb^06d-OVJMft>AL$MAY}#ZIcgm3YGvNw^;EkxHrs#aEaxq5 zNMSla(yCmN5`@VT`%iv3e?I8bOZM@K<=BWoFPQ2HbV>UKmh9unZqC^=aULL*}_fId9l4K&h(~u zJA%q(zk|j!86<&}E;{5!bL%uTGpW4SwRxn7Z(I{dzRlQd;ul8jIttKz@w_7OJPm<5jb3+d%Gfd`cq{fWmFex z*bsGkqrKD6Gti440at(h)@C4fkgGK&fxdzAR^t6!rkjxDrOHG323e@()jR4<4WLG% zCUr<^l=ME5sOz_AZs`9nrufVRI%rxt>|u)vb&N(gfbQ?zqBSt{tb=qKsVVe>=`dfo zl>Nfon#B&;;qMFC%(}H*_p?6Pn)EYr_3t|aA)<$b6}K84rDlI`MeuIS_gVpOF2gpb zOot3I9V~)g$>~2T1}IBtQV3gjlk2%;x@L}psUe3 zcKva%BjBDL@FW-sX#(j8nvN_@xlG+4;v;D*^EMsXZ;#U;@}>8ug-cPi|MUn07SIgtjL+jV+J46fjd7SP^BBC1VyR6kv_xMWf-8-@_%>6=^IOv;3F~ z3yc|QC6ophx7|qi%#RZp9tM}fTX5+o9rg&ClfwioO8u-(PiE%SSo^*RBRQ}oDq>^H z1GE9$(^;p2um#JCx|wN34CD;$Tc;~O7XX>mAw?g>SCfCNAj_Mr`0reW2oBB*)H%_W z%C86bq>sS_4P(LzXB(9Ia|v_MB)iVSs8$tBFATKl$5&%acXQ1ea?RdL7$o30SV0WJ z5@yr@Ut7cyhU~Mp!kq=fYOXUFOO0@|g(2RU*%KrMTqrP_M`-Za&>-=Jh-2{>ESB~K-DLzW%}4WgZX<6d+0A?@?xywgv0cWPohKOp`Yxsbmpw}ocvuEYvt zD=2>yR3$Z;Pm&uD^-5o$CBKgI*9u47ynxcl&5V4TRNu{u7pW@PYev+tU8%Ly0;QIL zQp4tCrSo^_2=AUt4Ug#U#)y@pxBDoFeosMUdpr~%Gb}stDGDNx7@h=);YHlD-PCtn zd$b^OdO<`*oHFJ+mMv2TrVo&fMCw|JU@3p2A$LOQ9VUq4TQ&-wzxINtKYmqvy^>8^ z6{Pu48mnXqBB+w;7W(;flqY(ENXoJtPt6EWCI8go=$|Nz7)i=9mrNsNBWU^pPoENu zm&3#YO%~%lX@z6W|?&LJkFD3pQB0A~$7QndWf}x<^7iEU}!*H)ox1>&uyH$h`M* z80Ixmm$0BhTZ5QUedta|f!s-@pU=9)Q?iu6l0sv7m0iD96FfE3aJoKY#dvqb_KMQB zJ8_+}eTKKr89?rhL<4noT4!>e>@t4=!Ur`02E!e5NexGHg`07GPfaBuuXjo6u9~}b zS54348TnrGBG6I-;ju2U4Td#&wA*a`WK8b5(02D7tbX>sg2p*Jaw$&{=$QI;)5@NPWKd>GMQ&YI<7Ls{NySglm~QCv=$4h2>-T?c(ww@e z%{sQ>enG#y;mgnC*}z=%y?Tb>9Cs|{-Bg|{IliuLl*hdk9{oNa7`EPi7p)WXoa>)m>c%bJv z)DkRlSj;Hr-hqKkloF32w5Fke5)p3{dj-d1!7#P_sc1r zA_&7jEb06fzr3>3<8Pw)Sg%%0AY$60HAaii48{-5ZQ#X z15$=w|B)#u+kasSp3Uc_;AwEPbmPNP-K)yFyxj*giY>!I_u#F*pRCpeFBPi?yswQ9 z>P9d=s4J}Spl{x)l*oTsFXW!FYrpFLmTeoE6mQ?|CA2I2e8hspgGIHcN-7MPOfzX# zjAYW=1cqFCCz%Z{-9e{|qWThMzmv?D_vYa)7w@po$K5a9VU&-nxZe_=e44AL`Q&VF ztZ}@xVC?ZZc6+X;O=+G%vRCsY{`_b5| z4b+a-v3kCrhKYZqK#{`_8X;L%T2UIOQ4+hMln37Wr0y+8*x!xIyZ68=AJ2~xW3gCW zgAOAf;4j0=jwL$5)3RC)9ONXqAvG+%_Kl-2qD+;Gs;u<=2;;RM>+6~JUtAr8;Qi4K zAm>42^}o;owDDC!f1Hh@_54>i;~FLQJ&x zAS`tEgyMhT2Tk20k~$)($Nr+BAB4<}r9}k>ga?3ok|?E?fIkj%yd;iBhvLxn{{_e6 zRL1yG4`1NQTT}}Cm`p;6xTBFqS;*Gwqh?h~2m86*M z05$g<4-Izw17H6LJ^y(9`Sbq(P)i30 zbqB)1o)Z87*>V5?P)h*<6aW+e2nYxOgKdXUbqB)1o)Z87*>V5?8vq0V0000000000 Q005Jn*)s;~6951J0C8#U*#H0l diff --git a/demo/Rules_Engine_Examples.html b/demo/Rules_Engine_Examples.html index 15b5d5a..a37d33e 100644 --- a/demo/Rules_Engine_Examples.html +++ b/demo/Rules_Engine_Examples.html @@ -10,33 +10,34 @@ - - - - - - - + + + + + + diff --git a/project/plugins.sbt b/project/plugins.sbt index 286fe0e..2db64a5 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,3 +1,4 @@ +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.9") addSbtPlugin("com.github.sbt" % "sbt-jacoco" % "3.0.3") addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.1.2") addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "2.3") diff --git a/src/main/scala/com/databricks/labs/validation/Rule.scala b/src/main/scala/com/databricks/labs/validation/Rule.scala index 46bf413..20161fa 100644 --- a/src/main/scala/com/databricks/labs/validation/Rule.scala +++ b/src/main/scala/com/databricks/labs/validation/Rule.scala @@ -1,52 +1,43 @@ package com.databricks.labs.validation -import java.util.UUID - import com.databricks.labs.validation.utils.Structures.Bounds +import org.apache.log4j.Logger import org.apache.spark.sql.Column +import org.apache.spark.sql.functions.{array, lit} /** * Definition of a rule */ -class Rule { - - private var _ruleName: String = _ - private var _canonicalCol: Column = _ - private var _canonicalColName: String = _ - private var _inputCol: Column = _ - private var _inputColName: String = _ - private var _calculatedColumn: Column = _ - private var _boundaries: Bounds = _ - private var _validNumerics: Array[Double] = _ - private var _validStrings: Array[String] = _ - private var _dateTimeLogic: Column = _ - private var _ruleType: RuleType.Value = _ - private var _isAgg: Boolean = _ - - private def setRuleName(value: String): this.type = { - _ruleName = value - this - } - - /** - * Allows for use of canonical naming and rule identification. Not necessary as of version 0.1 but - * can be used for future use cases - * - * @param value input column from user - * @return Rule - */ - private[validation] def setColumn(value: Column): this.type = { - _inputCol = value - _inputColName = _inputCol.expr.toString().replace("'", "") - val cleanUUID = UUID.randomUUID().toString.replaceAll("-", "") - _canonicalColName = s"${_inputColName}_$cleanUUID" - _canonicalCol = _inputCol.alias(_canonicalColName) - _calculatedColumn = _inputCol - this - } - - private[validation] def setCalculatedColumn(value: Column): Unit = { - _calculatedColumn = value +class Rule( + private val _ruleName: String, + val inputColumn: Column, + val ruleType: RuleType.Value + ) { + + private val logger: Logger = Logger.getLogger(this.getClass) + + private var _boundaries: Bounds = Bounds() + private var _validExpr: Column = lit(null) + private var _validNumerics: Column = array(lit(null).cast("double")) + private var _validStrings: Column = array(lit(null).cast("string")) + private var _implicitBoolean: Boolean = false + private var _ignoreCase: Boolean = false + private var _invertMatch: Boolean = false + private val inputRuleName: String = setRuleName(_ruleName) + val inputColumnName: String = inputColumn.expr.toString().replace("'", "") + + override def toString: String = { + s""" + |Rule Name: $ruleName + |Rule Type: $ruleType + |Rule Is Agg: $isAgg + |Input Column: ${inputColumn.expr.toString()} + |Input Column Name: $inputColumnName + |Boundaries: ${boundaries.lower} - ${boundaries.upper} + |Valid Numerics: ${validNumerics.expr.toString()} + |Valid Strings: ${validStrings.expr.toString()} + |Implicit Bool: ${_implicitBoolean} + |""".stripMargin } private def setBoundaries(value: Bounds): this.type = { @@ -55,53 +46,73 @@ class Rule { } private def setValidNumerics(value: Array[Double]): this.type = { - _validNumerics = value + _validNumerics = lit(value) this } - private def setValidStrings(value: Array[String]): this.type = { - _validStrings = value + private def setValidStrings(value: Array[String], ignoreCase: Boolean): this.type = { + _validStrings = if(ignoreCase) lit(value.map(_.toLowerCase)) else lit(value) + inputColumn.expr.children.map(_.prettyName) this } - private def setDateTimeLogic(value: Column): this.type = { - _dateTimeLogic = value + private def setValidExpr(value: Column): this.type = { + _validExpr = lit(value) this } - private def setRuleType(value: RuleType.Value): this.type = { - _ruleType = value + private def setImplicitBool(value: Boolean): this.type = { + _implicitBoolean = value this } - private[validation] def setIsAgg: this.type = { - _isAgg = inputColumn.expr.prettyName == "aggregateexpression" + private def setIgnoreCase(value: Boolean): this.type = { + _ignoreCase = value this } - def ruleName: String = _ruleName + private def setInvertMatch(value: Boolean): this.type = { + _invertMatch = value + this + } - def inputColumn: Column = _inputCol + private def setRuleName(ruleName: String): String = { + val removedWhitespaceRuleName = ruleName.trim.replaceAll(" ", "_") + val whitespaceRemovalWarning = s"Converting whitespaces to underscores in Rule's name:\n '$ruleName' --> '$removedWhitespaceRuleName'\n" + if (_ruleName.contains(" ")) { + logger.warn(whitespaceRemovalWarning) + println(whitespaceRemovalWarning) + } + val specialCharsPattern = "[^a-zA-z0-9_-]+".r + val removedSpecialCharsRuleName = removedWhitespaceRuleName.replaceAll("[^a-zA-Z0-9_-]", "_") + val specialCharacterRemovalWarning = s"Converting special characters to underscores in Rule's name:\n '$removedWhitespaceRuleName' --> '$removedSpecialCharsRuleName'\n" + if (specialCharsPattern.findAllIn(removedWhitespaceRuleName).toSeq.nonEmpty) { + logger.warn(specialCharacterRemovalWarning) + println(specialCharacterRemovalWarning) + } + removedSpecialCharsRuleName + } - def inputColumnName: String = _inputColName + def boundaries: Bounds = _boundaries - def canonicalCol: Column = _canonicalCol + def validNumerics: Column = _validNumerics - def canonicalColName: String = _canonicalColName + def validStrings: Column = _validStrings - private[validation] def calculatedColumn: Column = _calculatedColumn + def validExpr: Column = _validExpr - def boundaries: Bounds = _boundaries + def isImplicitBool: Boolean = _implicitBoolean - def validNumerics: Array[Double] = _validNumerics + def ignoreCase: Boolean = _ignoreCase - def validStrings: Array[String] = _validStrings + def invertMatch: Boolean = _invertMatch - def dateTimeLogic: Column = _dateTimeLogic + def ruleName: String = inputRuleName - def ruleType: RuleType.Value = _ruleType - - private[validation] def isAgg: Boolean = _isAgg + def isAgg: Boolean = { + inputColumn.expr.prettyName == "aggregateexpression" || + inputColumn.expr.children.map(_.prettyName).contains("aggregateexpression") + } } @@ -117,12 +128,38 @@ object Rule { boundaries: Bounds ): Rule = { - new Rule() - .setRuleName(ruleName) - .setColumn(column) + new Rule(ruleName, column, RuleType.ValidateBounds) .setBoundaries(boundaries) - .setRuleType(RuleType.ValidateBounds) - .setIsAgg + } + + def apply( + ruleName: String, + column: Column + ): Rule = { + apply(ruleName, column, lit(true)) + .setImplicitBool(true) + } + + def apply( + ruleName: String, + column: Column, + validExpr: Column + ): Rule = { + + new Rule(ruleName, column, RuleType.ValidateExpr) + .setValidExpr(validExpr) + } + + def apply( + ruleName: String, + column: Column, + validNumerics: Array[Double], + invertMatch: Boolean + ): Rule = { + + new Rule(ruleName, column, RuleType.ValidateNumerics) + .setValidNumerics(validNumerics) + .setInvertMatch(invertMatch) } def apply( @@ -131,12 +168,21 @@ object Rule { validNumerics: Array[Double] ): Rule = { - new Rule() - .setRuleName(ruleName) - .setColumn(column) + new Rule(ruleName, column, RuleType.ValidateNumerics) .setValidNumerics(validNumerics) - .setRuleType(RuleType.ValidateNumerics) - .setIsAgg + .setInvertMatch(false) + } + + def apply( + ruleName: String, + column: Column, + validNumerics: Array[Long], + invertMatch: Boolean + ): Rule = { + + new Rule(ruleName, column, RuleType.ValidateNumerics) + .setValidNumerics(validNumerics.map(_.toString.toDouble)) + .setInvertMatch(invertMatch) } def apply( @@ -145,65 +191,46 @@ object Rule { validNumerics: Array[Long] ): Rule = { - new Rule() - .setRuleName(ruleName) - .setColumn(column) + new Rule(ruleName, column, RuleType.ValidateNumerics) .setValidNumerics(validNumerics.map(_.toString.toDouble)) - .setRuleType(RuleType.ValidateNumerics) - .setIsAgg + .setInvertMatch(false) } def apply( ruleName: String, column: Column, - validNumerics: Array[Int] + validNumerics: Array[Int], + invertMatch: Boolean ): Rule = { - new Rule() - .setRuleName(ruleName) - .setColumn(column) + new Rule(ruleName, column, RuleType.ValidateNumerics) .setValidNumerics(validNumerics.map(_.toString.toDouble)) - .setRuleType(RuleType.ValidateNumerics) - .setIsAgg + .setInvertMatch(invertMatch) } def apply( ruleName: String, column: Column, - validStrings: Array[String] + validNumerics: Array[Int] ): Rule = { - new Rule() - .setRuleName(ruleName) - .setColumn(column) - .setValidStrings(validStrings) - .setRuleType(RuleType.ValidateStrings) - .setIsAgg + new Rule(ruleName, column, RuleType.ValidateNumerics) + .setValidNumerics(validNumerics.map(_.toString.toDouble)) + .setInvertMatch(false) } - /** - * TODO -- Implement Date/Time Logic for: - * Column Type (i.e. current_timestamp and current_date) - * java.util.Date - * Validated strings compatible with Spark - * - * Additional logic can be added to extend functionality - */ + def apply( + ruleName: String, + column: Column, + validStrings: Array[String], + ignoreCase: Boolean = false, + invertMatch: Boolean = false + ): Rule = { - // def apply( - // ruleName: String, - // column: Column, - // dateTimeLogic: ???, - // ): Rule = { - // - // new Rule() - // .setRuleName(ruleName) - // .setColumn(column) - // .setAggFunc(aggFunc) - // .setAlias(alias) - // .setDateTimeLogic(dateTimeLogic) - // .setRuleType("dateTime") - // .setByCols(by) - // } + new Rule(ruleName, column, RuleType.ValidateStrings) + .setValidStrings(validStrings, ignoreCase) + .setIgnoreCase(ignoreCase) + .setInvertMatch(invertMatch) + } } diff --git a/src/main/scala/com/databricks/labs/validation/RuleSet.scala b/src/main/scala/com/databricks/labs/validation/RuleSet.scala index 6a81194..8646196 100644 --- a/src/main/scala/com/databricks/labs/validation/RuleSet.scala +++ b/src/main/scala/com/databricks/labs/validation/RuleSet.scala @@ -1,10 +1,11 @@ package com.databricks.labs.validation import com.databricks.labs.validation.utils.SparkSessionWrapper -import com.databricks.labs.validation.utils.Structures.{Bounds, MinMaxRuleDef} -import org.apache.log4j.{Level, Logger} -import org.apache.spark.sql.{Column, DataFrame} +import com.databricks.labs.validation.utils.Structures.{Bounds, MinMaxRuleDef, ValidationException, ValidationResults} +import org.apache.log4j.Logger import org.apache.spark.sql.functions.{max, min} +import org.apache.spark.sql.types.BooleanType +import org.apache.spark.sql.{Column, DataFrame} import scala.collection.mutable.ArrayBuffer @@ -17,8 +18,6 @@ class RuleSet extends SparkSessionWrapper { private val logger: Logger = Logger.getLogger(this.getClass) - import spark.implicits._ - private var _df: DataFrame = _ private var _isGrouped: Boolean = false private var _groupBys: Seq[String] = Seq.empty[String] @@ -36,7 +35,7 @@ class RuleSet extends SparkSessionWrapper { private def setGroupByCols(value: Seq[String]): this.type = { _groupBys = value - _isGrouped = true + _isGrouped = value.nonEmpty this } @@ -51,6 +50,7 @@ class RuleSet extends SparkSessionWrapper { /** * Generates two rules for each minmax definition one for the lower and one for the upper * Only valid for Bounds rule types + * * @param minMaxRuleDefs One or many minmax definitions as defined in Structures * Defined as case class to ensure proper usage * @return Array[Rule] that can be added to the RuleSet @@ -68,10 +68,11 @@ class RuleSet extends SparkSessionWrapper { /** * Builder pattern used to add individual MinMax rule sets after the RuleSet has been instantiated - * @param ruleName name of rule + * + * @param ruleName name of rule * @param inputColumn input column (base or calculated) - * @param boundaries lower/upper boundaries as defined by Bounds - * @param by groupBy cols + * @param boundaries lower/upper boundaries as defined by Bounds + * @param by groupBy cols * @return RuleSet */ def addMinMaxRules(ruleName: String, @@ -88,16 +89,29 @@ class RuleSet extends SparkSessionWrapper { /** * add array of rules + * * @param rules Array for Rules * @return RuleSet */ - def add(rules: Seq[Rule]): this.type = { + def add(rules: => Seq[Rule]): this.type = { + rules.foreach(rule => _rules.append(rule)) + this + } + + /** + * add an expanded sequence of rules + * + * @param rules expanded Rules sequence + * @return RuleSet + */ + def add(rules: Rule*): this.type = { rules.foreach(rule => _rules.append(rule)) this } /** * Add a single rule + * * @param rule single defined rule * @return RuleSet */ @@ -107,87 +121,121 @@ class RuleSet extends SparkSessionWrapper { } /** - * Merge two rule sets by adding one rule set to another - * @param ruleSet RuleSet to be added - * @return RuleSet - */ + * Merge two rule sets by adding one rule set to another + * + * @param ruleSet RuleSet to be added + * @return RuleSet + */ def add(ruleSet: RuleSet): RuleSet = { - new RuleSet().setDF(ruleSet.getDf) - .setIsGrouped(ruleSet.isGrouped) - .add(ruleSet.getRules) + val addtnlGroupBys = ruleSet.getGroupBys diff this.getGroupBys + val mergedGroupBys = this.getGroupBys ++ addtnlGroupBys + this.add(ruleSet.getRules) + .setGroupByCols(mergedGroupBys) } /** * Logic to test compliance with provided rules added through the builder * TODO What else? + * * @return this but is marked private */ private def validateRules(): Unit = { + val aggAndNonAggs = getRules.map(_.isAgg).distinct.length != 1 + // if a mixture of aggs and non-aggs -- group by * + if (aggAndNonAggs && getGroupBys.isEmpty) setGroupByCols(getDf.columns) + // if all are aggs but no group by -- group by * + if (getRules.forall(_.isAgg) && getGroupBys.isEmpty) setGroupByCols(getDf.columns) + val isGlobalGroupBy = getDf.columns.map(_.toLowerCase).sorted.sameElements(getGroupBys.map(_.toLowerCase).sorted) + require(getRules.map(_.ruleName).distinct.length == getRules.map(_.ruleName).length, s"Duplicate Rule Names: ${getRules.map(_.ruleName).diff(getRules.map(_.ruleName).distinct).mkString(", ")}") - } - - /** - * Call the action once all rules have been applied - * @param detailLevel -- For Future -- Perhaps faster way to just return true/false without - * processing everything and returning a report. For big data sets, perhaps run samples - * looking for invalids? Not sure how much faster and/or what the break-even would be - * @return Tuple of Dataframe report and final boolean of whether all rules were passed - */ - def validate(detailLevel: Int = 1): (DataFrame, Boolean) = { - validateRules() - Validator(this, detailLevel).validate - } - -} - -object RuleSet { - - /** - * Accepts DataFrame - Rules can be calculated for grouped DFs or non-grouped but not at the same time. - * Either append rule[s] at call or via builder pattern - */ - - def apply(df: DataFrame): RuleSet = { - new RuleSet().setDF(df) - } - - def apply(df: DataFrame, by: Array[String]): RuleSet = { - new RuleSet().setDF(df) - .setGroupByCols(by) - } + require(!aggAndNonAggs || (aggAndNonAggs && isGlobalGroupBy), + "\nRule set must contain:\nonly aggregates as input column\nOR\nonly non-aggregate functions\nOR\nmust be " + + "grouped by all column (i.e. '*').\nIf some rules must apply to both grouped and ungrouped DFs, create " + + "two rules sets and validators, one for grouped, one for not grouped." + ) - def apply(df: DataFrame, by: String): RuleSet = { - new RuleSet().setDF(df) - .setGroupByCols(Array(by)) - } + // If ruleset contains implicit boolean type -- validate + val implicitRules = getRules.find(r => r.ruleType == RuleType.ValidateExpr && r.isImplicitBool) + val dfWImplicitRules = implicitRules.foldLeft(getDf)((df, r) => { + df.withColumn(r.ruleName, r.inputColumn) + }) - def apply(df: DataFrame, rules: Seq[Rule], by: Seq[String] = Seq.empty[String]): RuleSet = { - new RuleSet().setDF(df) - .setGroupByCols(by) - .add(rules) - } + val nonBoolImplicits = dfWImplicitRules.schema.fields + .filter(f => implicitRules.map(_.ruleName).contains(f.name) && f.dataType != BooleanType) - def apply(df: DataFrame, rules: Rule*): RuleSet = { - new RuleSet().setDF(df) - .add(rules) - } + if (nonBoolImplicits.nonEmpty) throw new ValidationException( + "Implicit, expression based rules must evaluate to true/false. The following rules could not be confirmed " + + s"to be boolean: \n ${nonBoolImplicits.map(_.name).mkString(", ")}.\nTypes Found: " + + s"${nonBoolImplicits.map(_.dataType.typeName).mkString(", ")}") - /** - * Generates two rules for each minmax definition one for the lower and one for the upper - * Only valid for Bounds rule types - * @param minMaxRuleDefs One or many minmax definitions as defined in Structures - * Defined as case class to ensure proper usage - * @return Array[Rule] that can be added to the RuleSet - */ - def generateMinMaxRules(minMaxRuleDefs: MinMaxRuleDef*): Array[Rule] = { - - minMaxRuleDefs.flatMap(ruleDef => { - Seq( - Rule(s"${ruleDef.ruleName}_min", min(ruleDef.column), ruleDef.bounds), - Rule(s"${ruleDef.ruleName}_max", max(ruleDef.column), ruleDef.bounds) - ) - }).toArray - } +} -} \ No newline at end of file +/** + * Call the action once all rules have been applied + * + * @param detailLevel -- For Future -- Perhaps faster way to just return true/false without + * processing everything and returning a report. For big data sets, perhaps run samples + * looking for invalids? Not sure how much faster and/or what the break-even would be + * @return Tuple of Dataframe report and final boolean of whether all rules were passed + */ +def validate (detailLevel: Int = 1): ValidationResults = { + validateRules () + Validator (this, detailLevel).validate + } + + } + + object RuleSet { + + /** + * Accepts DataFrame - Rules can be calculated for grouped DFs or non-grouped but not at the same time. + * Either append rule[s] at call or via builder pattern + */ + + def apply(df: DataFrame): RuleSet = { + new RuleSet().setDF(df) + } + + def apply(df: DataFrame, by: Array[String]): RuleSet = { + new RuleSet().setDF(df) + .setGroupByCols(by) + } + + def apply(df: DataFrame, by: String): RuleSet = { + if (by == "*") apply(df, df.columns) else { + new RuleSet().setDF(df) + .setGroupByCols(Array(by)) + } + } + + def apply(df: DataFrame, rules: Seq[Rule], by: Seq[String] = Seq.empty[String]): RuleSet = { + new RuleSet().setDF(df) + .setGroupByCols(by) + .add(rules) + } + + def apply(df: DataFrame, rules: Rule*): RuleSet = { + new RuleSet().setDF(df) + .add(rules) + } + + /** + * Generates two rules for each minmax definition one for the lower and one for the upper + * Only valid for Bounds rule types + * + * @param minMaxRuleDefs One or many minmax definitions as defined in Structures + * Defined as case class to ensure proper usage + * @return Array[Rule] that can be added to the RuleSet + */ + def generateMinMaxRules(minMaxRuleDefs: MinMaxRuleDef*): Array[Rule] = { + + minMaxRuleDefs.flatMap(ruleDef => { + Seq( + Rule(s"${ruleDef.ruleName}_min", min(ruleDef.column), ruleDef.bounds), + Rule(s"${ruleDef.ruleName}_max", max(ruleDef.column), ruleDef.bounds) + ) + }).toArray + } + + } \ No newline at end of file diff --git a/src/main/scala/com/databricks/labs/validation/RuleType.scala b/src/main/scala/com/databricks/labs/validation/RuleType.scala index a4c5521..cee2bfc 100644 --- a/src/main/scala/com/databricks/labs/validation/RuleType.scala +++ b/src/main/scala/com/databricks/labs/validation/RuleType.scala @@ -4,6 +4,7 @@ package com.databricks.labs.validation * Definition of the Rule Types as an Enumeration for better type matching */ object RuleType extends Enumeration { + val ValidateExpr = Value("expr") val ValidateBounds = Value("bounds") val ValidateNumerics = Value("validNumerics") val ValidateStrings = Value("validStrings") diff --git a/src/main/scala/com/databricks/labs/validation/Validator.scala b/src/main/scala/com/databricks/labs/validation/Validator.scala index 1f06009..09c2259 100644 --- a/src/main/scala/com/databricks/labs/validation/Validator.scala +++ b/src/main/scala/com/databricks/labs/validation/Validator.scala @@ -1,191 +1,75 @@ package com.databricks.labs.validation import com.databricks.labs.validation.utils.SparkSessionWrapper +import com.databricks.labs.validation.utils.Structures.ValidationResults +import org.apache.spark.sql.functions._ import org.apache.spark.sql.{Column, DataFrame} -import org.apache.spark.sql.functions.{ - array, col, collect_set, - explode, expr, lit, struct, sum, when -} -import org.apache.spark.sql.types._ -import scala.collection.mutable class Validator(ruleSet: RuleSet, detailLvl: Int) extends SparkSessionWrapper { - import spark.implicits._ - - private val boundaryRules = ruleSet.getRules.filter(_.ruleType == RuleType.ValidateBounds) - private val categoricalRules = ruleSet.getRules.filter(rule => rule.ruleType == RuleType.ValidateNumerics || - rule.ruleType == RuleType.ValidateStrings) - private val dateTimeRules = ruleSet.getRules.filter(_.ruleType == RuleType.ValidateDateTime) - private val complexRules = ruleSet.getRules.filter(_.ruleType == RuleType.ValidateComplex) private val byCols = ruleSet.getGroupBys map col - /** - * Method for simplifying complex select statements that end up with an output - * One or more select statements can be used for more complex, lengthy select sequences - * @param output - * @param select - */ - case class Selects(output: Column, select: Column*) - - /** - * Builds struct for validations by type - * Initialize all structs to null and populate for the rule - * @param rule Specific rule - * @return struct of validation values containing the input validation values for the rule - */ - private def buildValidationsByType(rule: Rule): Column = { - val nulls = mutable.Map[String, Column]( - RuleType.ValidateBounds.toString -> lit(null).cast(ArrayType(DoubleType)).alias(RuleType.ValidateBounds.toString), - RuleType.ValidateNumerics.toString -> lit(null).cast(ArrayType(DoubleType)).alias(RuleType.ValidateNumerics.toString), - RuleType.ValidateStrings.toString -> lit(null).cast(ArrayType(StringType)).alias(RuleType.ValidateStrings.toString), - RuleType.ValidateDateTime.toString -> lit(null).cast(LongType).alias(RuleType.ValidateDateTime.toString) - ) - rule.ruleType match { - case RuleType.ValidateBounds => nulls(RuleType.ValidateBounds.toString) = array(lit(rule.boundaries.lower), lit(rule.boundaries.upper)).alias(RuleType.ValidateBounds.toString) - case RuleType.ValidateNumerics => nulls(RuleType.ValidateNumerics.toString) = lit(rule.validNumerics).alias(RuleType.ValidateNumerics.toString) - case RuleType.ValidateStrings => nulls(RuleType.ValidateStrings.toString) = lit(rule.validStrings).alias(RuleType.ValidateStrings.toString) - } - val validationsByType = nulls.toMap.values.toSeq - struct( - validationsByType: _* - ).alias("Validation_Values") - } - - /** - * Build the struct for the rule and the results - * @param rule specific rule for struct to compile - * @param results result of pass/fail and/or counts of failures - * @return - */ - private def buildOutputStruct(rule: Rule, results: Seq[Column]): Column = { - struct( - lit(rule.ruleName).alias("Rule_Name"), - lit(rule.ruleType.toString).alias("Rule_Type"), - buildValidationsByType(rule), - struct(results: _*).alias("Results") - ).alias("Validation") - } - - /** - * Create output from struct for simplified results report - * @param df array of structs with results by Rule - * @return - */ - private def simplifyReport(df: DataFrame): DataFrame = { - val summaryCols = Seq( - col("Validations.Rule_Name"), - col("Validations.Rule_Type"), - col("Validations.Validation_Values"), - col("Validations.Results.Invalid_Count"), - col("Validations.Results.Failed") - ) - if (ruleSet.getGroupBys.isEmpty) { - df.select(summaryCols: _*) - .orderBy('Failed.desc, 'Rule_Name) - } else { - df.select(byCols ++ summaryCols: _*) - .orderBy('Failed.desc, 'Rule_Name) - } + private def buildFailureReport(df: DataFrame): DataFrame = { + val rulesResultCols = ruleSet.getRules.map(r => s"`${r.ruleName}`").mkString(", ") + val onlyFailedRecords = expr(s"""filter(array($rulesResultCols), results -> !results.passed)""") + df.withColumn("failed_rules", onlyFailedRecords) + .drop(ruleSet.getRules.map(_.ruleName): _*) + .filter(size(col("failed_rules")) > 0) } - /** - * Primary logic to calculate the threshold logic and build the select structs for output summary - * @param rules input for rules within scope - * @return - */ - private def buildBaseSelects(rules: Array[Rule]): Array[Selects] = { - - // Build base selects + private def evaluatedRules(rules: Array[Rule]): Array[Column] = { rules.map(rule => { - - // Results must have Invalid_Count & Failed rule.ruleType match { case RuleType.ValidateBounds => - // Rule evaluation for NON-AGG RULES ONLY - val invalid = rule.inputColumn < rule.boundaries.lower || rule.inputColumn > rule.boundaries.upper - // This is the first select it must come before subsequent selects as it aliases the original column name - // to that of the rule name. ADDITIONALLY, this evaluates the boundary rule WHEN the input col is not an Agg. - // This can be confusing because for Non-agg columns it renames the column to the rule_name AND returns a 0 - // or 1 (not the original value) - // IF the rule is NOT an AGG then the column is simply aliased to the rule name and no evaluation takes place - // here. - val first = if (!rule.isAgg) { // Not Agg - sum(when(invalid, 1).otherwise(0)).alias(rule.ruleName) - } else { // Is Agg - rule.inputColumn.alias(rule.ruleName) - } - // WHEN RULE IS AGG -- this is where the evaluation happens. The input column was renamed to the name of the - // rule in the required previous select. - // IMPORTANT: REMEMBER - that agg expressions evaluate to a single output value thus the invalid_count in - // cases where agg is used cannot be > 1 since the sum of a single value cannot exceed 1. - - // WHEN RULE NOT AGG - determine if the result of "first" select (0 or 1) is > 0, if it is, the rule has - // failed since the sum(1 or more 1s) means that 1 or more rows have failed thus the rule has failed - val failed = if (rule.isAgg) { - when( - col(rule.ruleName) < rule.boundaries.lower || col(rule.ruleName) > rule.boundaries.upper, true) - .otherwise(false).alias("Failed") - } else{ - when(col(rule.ruleName) > 0,true).otherwise(false).alias("Failed") - } - val results = if (rule.isAgg) { - Seq(when(failed, 1).otherwise(0).cast(LongType).alias("Invalid_Count"), failed) - } else { - Seq(col(rule.ruleName).cast(LongType).alias("Invalid_Count"), failed) - } - Selects(buildOutputStruct(rule, results), first) - case x if x == RuleType.ValidateNumerics || x == RuleType.ValidateStrings => - val invalid = if (x == RuleType.ValidateNumerics) { - expr(s"size(array_except(${rule.ruleName}," + - s"array(${rule.validNumerics.mkString("D,")}D)))") - } else { - expr(s"size(array_except(${rule.ruleName}," + - s"array('${rule.validStrings.mkString("','")}')))") - } - val failed = when(invalid > 0, true).otherwise(false).alias("Failed") - // TODO -- Cardinality check and WARNING - val first = collect_set(rule.inputColumn).alias(rule.ruleName) - val results = Seq(invalid.cast(LongType).alias("Invalid_Count"), failed) - Selects(buildOutputStruct(rule, results), first) - case RuleType.ValidateDateTime => ??? // TODO - case RuleType.ValidateComplex => ??? // TODO + struct( + lit(rule.ruleName).alias("ruleName"), + rule.boundaries.validationLogic(rule.inputColumn).alias("passed"), + array(lit(rule.boundaries.lower), lit(rule.boundaries.upper)).cast("string").alias("permitted"), + rule.inputColumn.cast("string").alias("actual") + ).alias(rule.ruleName) + case RuleType.ValidateNumerics => + val ruleExpr = if(rule.invertMatch) not(array_contains(rule.validNumerics, rule.inputColumn)) else array_contains(rule.validNumerics, rule.inputColumn) + struct( + lit(rule.ruleName).alias("ruleName"), + ruleExpr.alias("passed"), + rule.validNumerics.cast("string").alias("permitted"), + rule.inputColumn.cast("string").alias("actual") + ).alias(rule.ruleName) + case RuleType.ValidateStrings => + val ruleValue = if(rule.ignoreCase) lower(rule.inputColumn) else rule.inputColumn + val ruleExpr = if(rule.invertMatch) not(array_contains(rule.validStrings, ruleValue)) else array_contains(rule.validStrings, ruleValue) + struct( + lit(rule.ruleName).alias("ruleName"), + ruleExpr.alias("passed"), + rule.validStrings.cast("string").alias("permitted"), + rule.inputColumn.cast("string").alias("actual") + ).alias(rule.ruleName) + case RuleType.ValidateExpr => + struct( + lit(rule.ruleName).alias("ruleName"), + (rule.inputColumn === rule.validExpr).alias("passed"), + lit(rule.inputColumnName).alias("permitted"), + rule.inputColumn.cast("string").alias("actual") + ).alias(rule.ruleName) } }) } - /** - * TODO - Implement rule handlers for dates accepting - * Column Type (i.e. current_timestamp and current_date) - * java.util.Date - * Validated strings (regex?) to pass into spark and convert to date/ts - */ - private def validatedateTimeRules: Unit = ??? - - /** - * Are there common complex rule patterns that should be added? - */ - private def validateComplexRules: Unit = ??? + private[validation] def validate: ValidationResults = { - private[validation] def validate: (DataFrame, Boolean) = { + val selects = evaluatedRules(ruleSet.getRules) - // val selects = buildBaseSelects(boundaryRules) - val selects = buildBaseSelects(boundaryRules) ++ buildBaseSelects(categoricalRules) - val fullOutput = explode(array(selects.map(_.output): _*)).alias("Validations") - val summaryDF = if (ruleSet.getGroupBys.isEmpty) { + val evaluatedDF = if (ruleSet.getGroupBys.isEmpty) { ruleSet.getDf - .select(selects.map(_.select.head): _*) - .select(fullOutput) + .select((ruleSet.getDf.columns map col) ++ selects: _*) } else { ruleSet.getDf .groupBy(byCols: _*) - .agg(selects.map(_.select.head).head, selects.map(_.select.head).tail: _*) - .select(byCols :+ fullOutput: _*) + .agg(evaluatedRules(ruleSet.getRules).head, evaluatedRules(ruleSet.getRules).tail: _*) + .select(byCols ++ (ruleSet.getRules.map(_.ruleName) map col): _*) } - val validationSummaryDF = simplifyReport(summaryDF) - val passed = validationSummaryDF.filter('Failed === true).count == 0 - (validationSummaryDF, passed) + ValidationResults(evaluatedDF, buildFailureReport(evaluatedDF)) } } diff --git a/src/main/scala/com/databricks/labs/validation/utils/Helpers.scala b/src/main/scala/com/databricks/labs/validation/utils/Helpers.scala deleted file mode 100644 index e098b3f..0000000 --- a/src/main/scala/com/databricks/labs/validation/utils/Helpers.scala +++ /dev/null @@ -1,18 +0,0 @@ -package com.databricks.labs.validation.utils - -import org.apache.spark.sql.{Column, DataFrame} -import org.apache.spark.sql.catalyst.expressions.NamedExpression -import org.apache.spark.sql.functions.{col, explode, array, struct, lit} - -object Helpers extends SparkSessionWrapper { - - import spark.implicits._ - private[validation] def getColumnName(c: Column): String = { - try { - c.expr.asInstanceOf[NamedExpression].name - } catch { - case e: ClassCastException => c.expr.references.map(_.name).toArray.head - } - } - -} diff --git a/src/main/scala/com/databricks/labs/validation/utils/MinMaxFunc.scala b/src/main/scala/com/databricks/labs/validation/utils/MinMaxFunc.scala deleted file mode 100644 index 86d123b..0000000 --- a/src/main/scala/com/databricks/labs/validation/utils/MinMaxFunc.scala +++ /dev/null @@ -1,18 +0,0 @@ -package com.databricks.labs.validation.utils - -import org.apache.spark.sql.Column -import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.functions.{min, max, array} - -private[validation] class MinMaxFunc(e: Expression) extends Column(e) - -object MinMaxFunc { - - private def apply(e: Expression): MinMaxFunc = new MinMaxFunc(e) - - def minMax(column: Column): MinMaxFunc = { - val alias = s"${column}_labs_validation_MinMax" - MinMaxFunc(array(min(column), max(column)).alias(alias).expr) - } - -} diff --git a/src/main/scala/com/databricks/labs/validation/utils/SparkSessionWrapper.scala b/src/main/scala/com/databricks/labs/validation/utils/SparkSessionWrapper.scala index 6d3d1cb..b6a274d 100644 --- a/src/main/scala/com/databricks/labs/validation/utils/SparkSessionWrapper.scala +++ b/src/main/scala/com/databricks/labs/validation/utils/SparkSessionWrapper.scala @@ -2,6 +2,7 @@ package com.databricks.labs.validation.utils import org.apache.spark.SparkContext import org.apache.spark.sql.SparkSession + import scala.collection.JavaConverters._ trait SparkSessionWrapper extends Serializable { diff --git a/src/main/scala/com/databricks/labs/validation/utils/Structures.scala b/src/main/scala/com/databricks/labs/validation/utils/Structures.scala index a4ba247..113c55d 100644 --- a/src/main/scala/com/databricks/labs/validation/utils/Structures.scala +++ b/src/main/scala/com/databricks/labs/validation/utils/Structures.scala @@ -1,6 +1,7 @@ package com.databricks.labs.validation.utils -import org.apache.spark.sql.Column +import com.databricks.labs.validation.Rule +import org.apache.spark.sql.{Column, DataFrame} /** * Lookups is a handy way to identify categorical values @@ -12,14 +13,35 @@ object Lookups { final val validRegions = Array("Northeast", "Southeast", "Midwest", "Northwest", "Southcentral", "Southwest") - final val validSkus = Array(123456, 122987,123256, 173544, 163212, 365423, 168212) + final val validSkus = Array(123456, 122987, 123256, 173544, 163212, 365423, 168212) + + final val invalidSkus = Array(9123456, 9122987, 9123256, 9173544, 9163212, 9365423, 9168212) } object Structures { - case class Bounds(lower: Double = Double.NegativeInfinity, upper: Double = Double.PositiveInfinity) + case class Bounds( + lower: Double = Double.NegativeInfinity, + upper: Double = Double.PositiveInfinity, + lowerInclusive: Boolean = false, + upperInclusive: Boolean = false) { + def validationLogic(c: Column): Column = { + val lowerLogic = if (lowerInclusive) c >= lower else c > lower + val upperLogic = if (upperInclusive) c <= upper else c < upper + lowerLogic && upperLogic + } + } case class MinMaxRuleDef(ruleName: String, column: Column, bounds: Bounds, by: Column*) + case class ValidationResults(completeReport: DataFrame, summaryReport: DataFrame) + + private[validation] class ValidationException(s: String) extends Exception(s) {} + + private[validation] class InvalidRuleException(r: Rule, s: String) extends Exception(s) { + val msg: String = s"RULE VALIDATION FAILED: ${r.toString}" + throw new ValidationException(msg) + } + } diff --git a/src/test/scala/com/databricks/labs/validation/RuleSetTestSuite.scala b/src/test/scala/com/databricks/labs/validation/RuleSetTestSuite.scala new file mode 100644 index 0000000..2cea4b8 --- /dev/null +++ b/src/test/scala/com/databricks/labs/validation/RuleSetTestSuite.scala @@ -0,0 +1,187 @@ +package com.databricks.labs.validation + +import com.databricks.labs.validation.utils.Structures.Bounds +import org.apache.spark.sql.functions._ +import org.scalatest.funsuite.AnyFunSuite + + +class RuleSetTestSuite extends AnyFunSuite with SparkSessionFixture { + + import spark.implicits._ + + spark.sparkContext.setLogLevel("ERROR") + + test("A rule set should be created from a DataFrame.") { + val testDF = Seq( + (1, 2, 3), + (4, 5, 6), + (7, 8, 9) + ).toDF("retail_price", "scan_price", "cost") + val testRuleSet = RuleSet(testDF) + + // Ensure that the RuleSet DataFrame is set properly + assert(testRuleSet.getDf.exceptAll(testDF).count() == 0, "RuleSet DataFrame is not equal to the input DataFrame.") + + // Ensure that the RuleSet properties are set properly + assert(!testRuleSet.isGrouped) + assert(testRuleSet.getGroupBys.isEmpty) + assert(testRuleSet.getRules.isEmpty) + + } + + test("A rule set should be created from a DataFrame grouped by multiple columns.") { + val testDF = Seq( + ("food_a", 2.51, 3, 111111111111111L), + ("food_b", 5.11, 6, 211111111111111L), + ("food_b", 5.32, 7, 311111111111111L), + ("food_d", 8.22, 99, 411111111111111L) + ).toDF("product_name", "scan_price", "cost", "id") + val testRuleSet = RuleSet(testDF, Array("product_name", "id")) + + // Ensure that the RuleSet DataFrame is set properly + assert(testRuleSet.getDf.exceptAll(testDF).count() == 0, "RuleSet DataFrame is not equal to the input DataFrame.") + + // Ensure that the group-by columns are set properly + assert(testRuleSet.isGrouped) + assert(testRuleSet.getGroupBys.length == 2) + assert(testRuleSet.getGroupBys.contains("product_name")) + assert(testRuleSet.getGroupBys.contains("id")) + + // Ensure that the RuleSet properties are set properly + assert(testRuleSet.getRules.isEmpty) + + } + + test("A rule set should be created from a DataFrame grouped by a single column.") { + val testDF = Seq( + ("food_a", 2.51, 3, 111111111111111L), + ("food_b", 5.11, 6, 211111111111111L), + ("food_b", 5.32, 7, 311111111111111L), + ("food_d", 8.22, 99, 411111111111111L) + ).toDF("product_name", "scan_price", "cost", "id") + val testRuleSet = RuleSet(testDF, "product_name") + + // Ensure that the RuleSet DataFrame is set properly + assert(testRuleSet.getDf.exceptAll(testDF).count() == 0, "RuleSet DataFrame is not equal to the input DataFrame.") + + // Ensure that the group-by columns are set properly + assert(testRuleSet.isGrouped) + assert(testRuleSet.getGroupBys.length == 1) + assert(testRuleSet.getGroupBys.head == "product_name") + + // Ensure that the RuleSet properties are set properly + assert(testRuleSet.getRules.isEmpty) + + } + + test("A rule set should be created from a DataFrame and list of rules.") { + val testDF = Seq( + ("Toyota", "Camry", 30000.00, 111111111111111L), + ("Ford", "Escape", 18750.00, 211111111111111L), + ("Ford", "Mustang", 32000.00, 311111111111111L), + ("Nissan", "Maxima", 25000.00, 411111111111111L) + ).toDF("make", "model", "msrp", "id") + val makeLovRule = Rule("Valid_Auto_Maker_Rule", col("make"), Array("Ford", "Toyota", "Nissan", "BMW", "Chevrolet")) + val modelLovRule = Rule("Valid_Auto_Models_Rule", col("model"), Array("Camry", "Mustang", "Maxima", "Escape", "330i")) + val groupedRuleSet = RuleSet(testDF, Array(makeLovRule, modelLovRule), Array("make")) + + // Ensure that the RuleSet DataFrame is set properly + assert(groupedRuleSet.getDf.exceptAll(testDF).count() == 0, "RuleSet DataFrame is not equal to the input DataFrame.") + + // Ensure that the RuleSet properties are set properly + assert(groupedRuleSet.isGrouped) + assert(groupedRuleSet.getGroupBys.length == 1) + assert(groupedRuleSet.getGroupBys.head == "make") + assert(groupedRuleSet.getRules.length == 2) + assert((groupedRuleSet.getRules.map(_.ruleName) diff Seq("Valid_Auto_Maker_Rule", "Valid_Auto_Models_Rule")).isEmpty) + + // Ensure a RuleSet can be created with a non-grouped DataFrame + val nonGroupedRuleSet = RuleSet(testDF, Array(makeLovRule, modelLovRule)) + + // Ensure that the RuleSet DataFrame is set properly + assert(nonGroupedRuleSet.getDf.exceptAll(testDF).count() == 0, "RuleSet DataFrame is not equal to the input DataFrame.") + + // Ensure that the RuleSet properties are set properly + assert(nonGroupedRuleSet.getGroupBys.isEmpty) + assert(nonGroupedRuleSet.getRules.length == 2) + assert((nonGroupedRuleSet.getRules.map(_.ruleName) diff Seq("Valid_Auto_Maker_Rule", "Valid_Auto_Models_Rule")).isEmpty) + } + + test("A rule set should be created from a DataFrame and list of MinMax rules.") { + val testDF = Seq( + ("Toyota", "Camry", 30000.00, 111111111111111L), + ("Ford", "Escape", 18750.00, 211111111111111L), + ("Ford", "Mustang", 32000.00, 311111111111111L), + ("Nissan", "Maxima", 25000.00, 411111111111111L) + ).toDF("make", "model", "msrp", "id") + val msrpBoundsRuleSet = RuleSet(testDF).addMinMaxRules("Valid_Auto_MSRP_Rule", col("msrp"), Bounds(1.0, 100000.0)) + + // Ensure that the RuleSet DataFrame is set properly + assert(msrpBoundsRuleSet.getDf.exceptAll(testDF).count() == 0, "RuleSet DataFrame is not equal to the input DataFrame.") + + // Ensure that the RuleSet properties are set properly + assert(msrpBoundsRuleSet.getGroupBys.isEmpty) + assert(msrpBoundsRuleSet.getRules.length == 2) + assert(Seq("Valid_Auto_MSRP_Rule_min", "Valid_Auto_MSRP_Rule_max").contains(msrpBoundsRuleSet.getRules(0).ruleName)) + assert(Seq("Valid_Auto_MSRP_Rule_min", "Valid_Auto_MSRP_Rule_max").contains(msrpBoundsRuleSet.getRules(1).ruleName)) + + } + + test("Two rule sets can be merged together.") { + + val testDF = Seq( + ("Toyota", "Camry", 30000.00, 111111111111111L), + ("Ford", "Escape", 18750.00, 211111111111111L), + ("Ford", "Mustang", 32000.00, 311111111111111L), + ("Nissan", "Maxima", 25000.00, 411111111111111L) + ).toDF("make", "model", "msrp", "id") + + // Create a bounds RuleSet + val msrpBoundsRuleSet = RuleSet(testDF).addMinMaxRules("Valid_Auto_MSRP_Rule", col("msrp"), Bounds(1.0, 100000.0)) + + // Create a LOV RuleSet + val makeLovRule = Rule("Valid_Auto_Maker_Rule", col("make"), Array("Ford", "Toyota", "Nissan", "BMW", "Chevrolet")) + val modelLovRule = Rule("Valid_Auto_Models_Rule", col("model"), Array("Camry", "Mustang", "Maxima", "Escape", "330i")) + val groupedRuleSet = RuleSet(testDF, Array(makeLovRule, modelLovRule), Array("make")) + + // Merge both RuleSets + val mergedRuleSet = groupedRuleSet.add(msrpBoundsRuleSet) + + // Ensure that the RuleSet DataFrame is set properly + assert(mergedRuleSet.getGroupBys.length == 1) + assert(mergedRuleSet.getDf.exceptAll(testDF).count() == 0, "RuleSet DataFrame is not equal to the input DataFrame.") + + // Ensure that the RuleSet properties are set properly + assert(mergedRuleSet.getRules.length == 4) + val mergedRuleNames = Seq("Valid_Auto_MSRP_Rule_min", "Valid_Auto_MSRP_Rule_max", "Valid_Auto_Maker_Rule", "Valid_Auto_Models_Rule") + assert(mergedRuleSet.getRules.count(r => mergedRuleNames.contains(r.ruleName)) == 4) + + // Ensure groupBy columns are merged properly + val groupedLovRuleSet = RuleSet(testDF, Array(makeLovRule, modelLovRule), Array("make")) + val mergedTheOtherWay = msrpBoundsRuleSet.add(groupedLovRuleSet) + assert(mergedTheOtherWay.getGroupBys.length == 1) + assert(mergedTheOtherWay.getGroupBys.head == "make") + assert(mergedTheOtherWay.getDf.exceptAll(testDF).count() == 0) + assert(mergedTheOtherWay.getRules.count(r => mergedRuleNames.contains(r.ruleName)) == 4) + + } + + test("An expanded sequence of rules can be added to a rule set.") { + val testDF = Seq( + (1, 2, 3), + (4, 5, 6), + (7, 8, 9) + ).toDF("retail_price", "scan_price", "cost") + val validPriceRule = Rule("Valid_Scan_Price_Rule", col("scan_price"), Bounds(0.01, 1000.0)) + val validCostRule = Rule("Valid_Cost_Rule", col("cost"), Bounds(0.0, 1000.0)) + val ruleSeq = Seq(validCostRule, validPriceRule) + val testRuleSet = RuleSet(testDF).add(ruleSeq: _*) + + // Ensure that the all expanded Rules were added to the RuleSet + assert(testRuleSet.getRules.length == 2) + assert(testRuleSet.getRules.count(r => Seq("Valid_Scan_Price_Rule", "Valid_Cost_Rule").contains(r.ruleName)) == 2) + assert(testRuleSet.getRules.count(_.ruleType == RuleType.ValidateBounds) == 2) + + } + +} diff --git a/src/test/scala/com/databricks/labs/validation/RuleTestSuite.scala b/src/test/scala/com/databricks/labs/validation/RuleTestSuite.scala new file mode 100644 index 0000000..16cdd6c --- /dev/null +++ b/src/test/scala/com/databricks/labs/validation/RuleTestSuite.scala @@ -0,0 +1,111 @@ +package com.databricks.labs.validation + +import com.databricks.labs.validation.utils.Structures.Bounds +import org.apache.spark.sql.functions.col +import org.scalatest.funsuite.AnyFunSuite + + +class RuleTestSuite extends AnyFunSuite with SparkSessionFixture { + + spark.sparkContext.setLogLevel("ERROR") + + test("A MinMaxRule should be instantiated correctly.") { + + val minMaxRule = Rule("Temperature_MinMax_Rule", col("temperature"), Bounds(34.0, 85.0)) + + // Ensure that all attributes are set correctly + assert(minMaxRule.ruleName == "Temperature_MinMax_Rule", "Rule name is not set as expected.") + assert(minMaxRule.inputColumnName == "temperature", "Input column name is not set as expected.") + assert(minMaxRule.ruleType == RuleType.ValidateBounds, "The rule type is not set as expected.") + assert(!minMaxRule.isImplicitBool, "The rule should not be an implicit boolean expression.") + assert(!minMaxRule.isAgg, "The rule should not be an aggregation.") + + // Ensure that the boundaries are set correctly + assert(minMaxRule.boundaries.lower == 34.0, "Lower boundary is not set as expected.") + assert(minMaxRule.boundaries.upper == 85.0, "Upper boundary is not set as expected.") + + } + + test("An implicit boolean expression should be instantiated correctly.") { + + // Ensure a single column of type boolean can be instantiated correctly + val coolingBoolRule = Rule("Implicit_Cooling_Rule", col("cooling_bool")) + + // Ensure that all attributes are set correctly + assert(coolingBoolRule.ruleName == "Implicit_Cooling_Rule", "Rule name is not set as expected.") + assert(coolingBoolRule.inputColumnName == "cooling_bool", "Input column name is not set as expected.") + assert(coolingBoolRule.ruleType == RuleType.ValidateExpr, "The rule type is not set as expected.") + assert(coolingBoolRule.isImplicitBool, "The rule should not be an implicit boolean expression.") + assert(!coolingBoolRule.isAgg, "The rule should not be an aggregation.") + + // Ensure that a boolean expression can be used to create an implicit boolean rule + val coolingExprRule = Rule("Implicit_Cooling_Expr", col("current_temp") > col("target_temp")) + + // Ensure that all attributes are set correctly + assert(coolingExprRule.ruleName == "Implicit_Cooling_Expr", "Rule name is not set as expected.") + assert(coolingExprRule.inputColumnName == "(current_temp > target_temp)", "Input column name is not set as expected.") + assert(coolingExprRule.ruleType == RuleType.ValidateExpr, "The rule type is not set as expected.") + assert(coolingExprRule.isImplicitBool, "The rule should not be an implicit boolean expression.") + assert(!coolingExprRule.isAgg, "The rule should not be an aggregation.") + + } + + test("A column can be ruled equivalent to an expression.") { + + // Ensure that equivalent comparision can be made between a column and expression + val coolingBoolRule = Rule("Thermostat_Cooling_Rule", col("cooling_bool"), (col("current_temp") - col("target_temp")) >= 7.0) + + // Ensure that all attributes are set correctly + assert(coolingBoolRule.ruleName == "Thermostat_Cooling_Rule", "Rule name is not set as expected.") + assert(coolingBoolRule.inputColumnName == "cooling_bool", "Input column name is not set as expected.") + assert(coolingBoolRule.ruleType == RuleType.ValidateExpr, "The rule type is not set as expected.") + assert(!coolingBoolRule.isImplicitBool, "The rule should not be an implicit boolean expression.") + assert(!coolingBoolRule.isAgg, "The rule should not be an aggregation.") + + } + + test("A list of numerical values rule can be instantiated correctly.") { + + // Ensure that a rule with a numerical LOV can be created + val heatingRateIntRule = Rule("Heating_Rate_Int_Rule", col("heating_rate"), Array(0, 1, 5, 10, 15)) + + // Ensure that all attributes are set correctly for Integers + assert(heatingRateIntRule.ruleName == "Heating_Rate_Int_Rule", "Rule name is not set as expected.") + assert(heatingRateIntRule.inputColumnName == "heating_rate", "Input column name is not set as expected.") + assert(heatingRateIntRule.ruleType == RuleType.ValidateNumerics, "The rule type is not set as expected.") + assert(!heatingRateIntRule.isImplicitBool, "The rule should not be an implicit boolean expression.") + assert(!heatingRateIntRule.isAgg, "The rule should not be an aggregation.") + + // Ensure that all attributes are set correctly for Doubles + val heatingRateDoubleRule = Rule("Heating_Rate_Double_Rule", col("heating_rate"), Array(0.0, 0.1, 0.5, 0.10, 0.15)) + assert(heatingRateDoubleRule.ruleName == "Heating_Rate_Double_Rule", "Rule name is not set as expected.") + assert(heatingRateDoubleRule.inputColumnName == "heating_rate", "Input column name is not set as expected.") + assert(heatingRateDoubleRule.ruleType == RuleType.ValidateNumerics, "The rule type is not set as expected.") + assert(!heatingRateDoubleRule.isImplicitBool, "The rule should not be an implicit boolean expression.") + assert(!heatingRateDoubleRule.isAgg, "The rule should not be an aggregation.") + + // Ensure that all attributes are set correctly for Longs + val heatingRateLongRule = Rule("Heating_Rate_Long_Rule", col("heating_rate"), Array(111111111111111L, 211111111111111L, 311111111111111L)) + assert(heatingRateLongRule.ruleName == "Heating_Rate_Long_Rule", "Rule name is not set as expected.") + assert(heatingRateLongRule.inputColumnName == "heating_rate", "Input column name is not set as expected.") + assert(heatingRateLongRule.ruleType == RuleType.ValidateNumerics, "The rule type is not set as expected.") + assert(!heatingRateLongRule.isImplicitBool, "The rule should not be an implicit boolean expression.") + assert(!heatingRateLongRule.isAgg, "The rule should not be an aggregation.") + + } + + test("A list of string values rule can be instantiated correctly.") { + + // Ensure that a rule with a numerical LOV can be created + val buildingNameRule = Rule("Building_LOV_Rule", col("site_name"), Array("SiteA", "SiteB", "SiteC")) + + // Ensure that all attributes are set correctly for Integers + assert(buildingNameRule.ruleName == "Building_LOV_Rule", "Rule name is not set as expected.") + assert(buildingNameRule.inputColumnName == "site_name", "Input column name is not set as expected.") + assert(buildingNameRule.ruleType == RuleType.ValidateStrings, "The rule type is not set as expected.") + assert(!buildingNameRule.isImplicitBool, "The rule should not be an implicit boolean expression.") + assert(!buildingNameRule.isAgg, "The rule should not be an aggregation.") + + } + +} diff --git a/src/test/scala/com/databricks/labs/validation/ValidatorTestSuite.scala b/src/test/scala/com/databricks/labs/validation/ValidatorTestSuite.scala index 4109b35..67007c7 100644 --- a/src/test/scala/com/databricks/labs/validation/ValidatorTestSuite.scala +++ b/src/test/scala/com/databricks/labs/validation/ValidatorTestSuite.scala @@ -1,30 +1,19 @@ package com.databricks.labs.validation import com.databricks.labs.validation.utils.Structures.{Bounds, MinMaxRuleDef} -import org.apache.spark.sql.functions.{col, min} +import org.apache.spark.sql.expressions.Window +import org.apache.spark.sql.functions._ import org.scalatest.funsuite.AnyFunSuite -case class ValidationValue(validDateTime: java.lang.Long, validNumerics: Array[Double], bounds: Array[Double], validStrings: Array[String]) +case class ValidationValue(ruleName: String, passed: Boolean, permitted: String, actual: String) class ValidatorTestSuite extends AnyFunSuite with SparkSessionFixture { import spark.implicits._ + spark.sparkContext.setLogLevel("ERROR") test("The input dataframe should have no rule failures on MinMaxRule") { - val expectedDF = Seq( - ("MinMax_Cost_Generated_max","bounds",ValidationValue(null,null,Array(0.0, 12.0),null),0,false), - ("MinMax_Cost_Generated_min","bounds",ValidationValue(null,null,Array(0.0, 12.0),null),0,false), - ("MinMax_Cost_manual_max","bounds",ValidationValue(null,null,Array(0.0, 12.0),null),0,false), - ("MinMax_Cost_manual_min","bounds",ValidationValue(null,null,Array(0.0, 12.0),null),0,false), - ("MinMax_Cost_max","bounds",ValidationValue(null,null,Array(0.0, 12.0),null),0,false), - ("MinMax_Cost_min","bounds",ValidationValue(null,null,Array(0.0, 12.0),null),0,false), - ("MinMax_Scan_Price_max","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false), - ("MinMax_Scan_Price_min","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false), - ("MinMax_Sku_Price_max","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false), - ("MinMax_Sku_Price_min","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false) - ).toDF("Rule_Name","Rule_Type","Validation_Values","Invalid_Count","Failed") - val data = Seq() // 2 per rule so 2 MinMax_Sku_Price + 2 MinMax_Scan_Price + 2 MinMax_Cost + 2 MinMax_Cost_Generated // + 2 MinMax_Cost_manual = 10 rules val testDF = Seq( @@ -32,6 +21,50 @@ class ValidatorTestSuite extends AnyFunSuite with SparkSessionFixture { (4, 5, 6), (7, 8, 9) ).toDF("retail_price", "scan_price", "cost") + + val expectedColumns = testDF.columns ++ Seq("MinMax_Sku_Price_min", "MinMax_Sku_Price_max", "MinMax_Scan_Price_min", + "MinMax_Scan_Price_max", "MinMax_Cost_min", "MinMax_Cost_max", "MinMax_Cost_manual_min", "MinMax_Cost_manual_max", + "MinMax_Cost_Generated_min", "MinMax_Cost_Generated_max") + val expectedDF = Seq( + (1, 2, 3, + ValidationValue("MinMax_Sku_Price_min", passed = true, "[0.0, 29.99]", "1"), + ValidationValue("MinMax_Sku_Price_max", passed = true, "[0.0, 29.99]", "1"), + ValidationValue("MinMax_Scan_Price_min", passed = true, "[0.0, 29.99]", "2"), + ValidationValue("MinMax_Scan_Price_max", passed = true, "[0.0, 29.99]", "2"), + ValidationValue("MinMax_Cost_min", passed = true, "[0.0, 12.0]", "3"), + ValidationValue("MinMax_Cost_max", passed = true, "[0.0, 12.0]", "3"), + ValidationValue("MinMax_Cost_manual_min", passed = true, "[0.0, 12.0]", "3"), + ValidationValue("MinMax_Cost_manual_max", passed = true, "[0.0, 12.0]", "3"), + ValidationValue("MinMax_Cost_Generated_min", passed = true, "[0.0, 12.0]", "3"), + ValidationValue("MinMax_Cost_Generated_max", passed = true, "[0.0, 12.0]", "3") + ), + (4, 5, 6, + ValidationValue("MinMax_Sku_Price_min", passed = true, "[0.0, 29.99]", "4"), + ValidationValue("MinMax_Sku_Price_max", passed = true, "[0.0, 29.99]", "4"), + ValidationValue("MinMax_Scan_Price_min", passed = true, "[0.0, 29.99]", "5"), + ValidationValue("MinMax_Scan_Price_max", passed = true, "[0.0, 29.99]", "5"), + ValidationValue("MinMax_Cost_min", passed = true, "[0.0, 12.0]", "6"), + ValidationValue("MinMax_Cost_max", passed = true, "[0.0, 12.0]", "6"), + ValidationValue("MinMax_Cost_manual_min", passed = true, "[0.0, 12.0]", "6"), + ValidationValue("MinMax_Cost_manual_max", passed = true, "[0.0, 12.0]", "6"), + ValidationValue("MinMax_Cost_Generated_min", passed = true, "[0.0, 12.0]", "6"), + ValidationValue("MinMax_Cost_Generated_max", passed = true, "[0.0, 12.0]", "6") + ), + (7, 8, 9, + ValidationValue("MinMax_Sku_Price_min", passed = true, "[0.0, 29.99]", "7"), + ValidationValue("MinMax_Sku_Price_max", passed = true, "[0.0, 29.99]", "7"), + ValidationValue("MinMax_Scan_Price_min", passed = true, "[0.0, 29.99]", "8"), + ValidationValue("MinMax_Scan_Price_max", passed = true, "[0.0, 29.99]", "8"), + ValidationValue("MinMax_Cost_min", passed = true, "[0.0, 12.0]", "9"), + ValidationValue("MinMax_Cost_max", passed = true, "[0.0, 12.0]", "9"), + ValidationValue("MinMax_Cost_manual_min", passed = true, "[0.0, 12.0]", "9"), + ValidationValue("MinMax_Cost_manual_max", passed = true, "[0.0, 12.0]", "9"), + ValidationValue("MinMax_Cost_Generated_min", passed = true, "[0.0, 12.0]", "9"), + ValidationValue("MinMax_Cost_Generated_max", passed = true, "[0.0, 12.0]", "9") + ) + ).toDF(expectedColumns: _*) + + // Create an Array of MinMax Rules val minMaxPriceDefs = Array( MinMaxRuleDef("MinMax_Sku_Price", col("retail_price"), Bounds(0.0, 29.99)), MinMaxRuleDef("MinMax_Scan_Price", col("scan_price"), Bounds(0.0, 29.99)), @@ -40,98 +73,202 @@ class ValidatorTestSuite extends AnyFunSuite with SparkSessionFixture { // Generate the array of Rules from the minmax generator val rulesArray = RuleSet.generateMinMaxRules(MinMaxRuleDef("MinMax_Cost_Generated", col("cost"), Bounds(0.0, 12.0))) - val someRuleSet = RuleSet(testDF) someRuleSet.addMinMaxRules(minMaxPriceDefs: _*) - someRuleSet.addMinMaxRules("MinMax_Cost_manual", col("cost"), Bounds(0.0,12.0)) + + // Manually add a Rule + someRuleSet.addMinMaxRules("MinMax_Cost_manual", col("cost"), Bounds(0.0, 12.0)) someRuleSet.add(rulesArray) - val (rulesReport, passed) = someRuleSet.validate() - assert(rulesReport.except(expectedDF).count() == 0) - assert(passed) - assert(rulesReport.count() == 10) + val validationResults = someRuleSet.validate() + + // Ensure that validate report is expected + assert(validationResults.completeReport.exceptAll(expectedDF).count() == 0, "Expected df is not equal to the returned rules report.") + + // Ensure that there are 2 Rules per MinMax Rule added as separate columns + assert(validationResults.completeReport.count() == 3) + assert((validationResults.completeReport.columns diff testDF.columns).length == 10) + + // Ensure that all Rules passed;there should be no failed Rules + assert(validationResults.summaryReport.count() == 0) } - test("The input rule should have 1 invalid count for MinMax_Scan_Price_Minus_Retail_Price_min and max for failing complex type.") { - val expectedDF = Seq( - ("MinMax_Retail_Price_Minus_Scan_Price_max","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),1,true), - ("MinMax_Retail_Price_Minus_Scan_Price_min","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),1,true), - ("MinMax_Scan_Price_Minus_Retail_Price_max","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false), - ("MinMax_Scan_Price_Minus_Retail_Price_min","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false) - ).toDF("Rule_Name","Rule_Type","Validation_Values","Invalid_Count","Failed") + test("There should be no rule failures for inclusive boundary rules.") { + val testDF = Seq( + (1001, 1.00, 2.55), + (1002, 4.25, 5.55), + (1003, 7.35, 8.99), + (1003, 5.00, 7.99) + ).toDF("sku", "retail_price", "scan_price") + + // Ensure upperInclusive boundaries can be validated + val scanPriceRule = MinMaxRuleDef("Scan_Price_Rule", col("scan_price"), Bounds(0.0, 8.99, upperInclusive = true)) + val scanPriceRuleSet = RuleSet(testDF).addMinMaxRules(scanPriceRule) + val scanPriceResults = scanPriceRuleSet.validate() + assert(!scanPriceRule.bounds.lowerInclusive) + assert(scanPriceRule.bounds.upperInclusive) + assert(scanPriceRule.bounds.lower == 0.0) + assert(scanPriceRule.bounds.upper == 8.99) + assert(scanPriceResults.summaryReport.count() == 0) + + // Ensure that both lowerInclusive and upperInclusive boundaries can be validated + val retailPriceRule = Rule("Retail_Price_Rule", col("retail_price"), Bounds(1.0, 7.35, lowerInclusive = true, upperInclusive = true)) + val retailPriceRuleSet = RuleSet(testDF).add(retailPriceRule) + val retailPriceResults = retailPriceRuleSet.validate() + assert(retailPriceRule.boundaries.lowerInclusive) + assert(retailPriceRule.boundaries.upperInclusive) + assert(retailPriceRule.boundaries.lower == 1.0) + assert(retailPriceRule.boundaries.upper == 7.35) + assert(retailPriceResults.summaryReport.count() == 0) + + // Ensure that inclusive boundaries can be applied to a grouped DataFrame + val groupedRuleSet = RuleSet(testDF, Array("sku", "retail_price")).add(retailPriceRule) + val groupedValidationResults = groupedRuleSet.validate() + assert(groupedValidationResults.summaryReport.count() == 0) + } + test("The input rule should have 3 invalid count for MinMax_Scan_Price_Minus_Retail_Price_min and max for failing complex type.") { val testDF = Seq( (1, 2, 3), (4, 5, 6), (7, 8, 9) ).toDF("retail_price", "scan_price", "cost") + val expectedColumns = testDF.columns ++ Seq("MinMax_Retail_Price_Minus_Scan_Price_min", "MinMax_Retail_Price_Minus_Scan_Price_max", + "MinMax_Scan_Price_Minus_Retail_Price_min", "MinMax_Scan_Price_Minus_Retail_Price_max") + val expectedDF = Seq( + (1, 2, 3, + ValidationValue("MinMax_Retail_Price_Minus_Scan_Price_min", passed = false, "[0.0, 29.99]", "-1"), + ValidationValue("MinMax_Retail_Price_Minus_Scan_Price_max", passed = false, "[0.0, 29.99]", "-1"), + ValidationValue("MinMax_Scan_Price_Minus_Retail_Price_min", passed = true, "[0.0, 29.99]", "1"), + ValidationValue("MinMax_Scan_Price_Minus_Retail_Price_max", passed = true, "[0.0, 29.99]", "1") + ), + (4, 5, 6, + ValidationValue("MinMax_Retail_Price_Minus_Scan_Price_min", passed = false, "[0.0, 29.99]", "-1"), + ValidationValue("MinMax_Retail_Price_Minus_Scan_Price_max", passed = false, "[0.0, 29.99]", "-1"), + ValidationValue("MinMax_Scan_Price_Minus_Retail_Price_min", passed = true, "[0.0, 29.99]", "1"), + ValidationValue("MinMax_Scan_Price_Minus_Retail_Price_max", passed = true, "[0.0, 29.99]", "1") + ), + (7, 8, 9, + ValidationValue("MinMax_Retail_Price_Minus_Scan_Price_min", passed = false, "[0.0, 29.99]", "-1"), + ValidationValue("MinMax_Retail_Price_Minus_Scan_Price_max", passed = false, "[0.0, 29.99]", "-1"), + ValidationValue("MinMax_Scan_Price_Minus_Retail_Price_min", passed = true, "[0.0, 29.99]", "1"), + ValidationValue("MinMax_Scan_Price_Minus_Retail_Price_max", passed = true, "[0.0, 29.99]", "1") + ) + ).toDF(expectedColumns: _*) + val minMaxPriceDefs = Array( - MinMaxRuleDef("MinMax_Retail_Price_Minus_Scan_Price", col("retail_price")-col("scan_price"), Bounds(0.0, 29.99)), - MinMaxRuleDef("MinMax_Scan_Price_Minus_Retail_Price", col("scan_price")-col("retail_price"), Bounds(0.0, 29.99)) + MinMaxRuleDef("MinMax_Retail_Price_Minus_Scan_Price", col("retail_price") - col("scan_price"), Bounds(0.0, 29.99)), + MinMaxRuleDef("MinMax_Scan_Price_Minus_Retail_Price", col("scan_price") - col("retail_price"), Bounds(0.0, 29.99)) ) // Generate the array of Rules from the minmax generator val someRuleSet = RuleSet(testDF) someRuleSet.addMinMaxRules(minMaxPriceDefs: _*) - val (rulesReport, passed) = someRuleSet.validate() - assert(rulesReport.except(expectedDF).count() == 0, "Expected df is not equal to the returned rules report.") - assert(!passed) - assert(rulesReport.count() == 4) + val validationResults = someRuleSet.validate() + + // Ensure that validate report is expected + assert(validationResults.completeReport.exceptAll(expectedDF).count() == 0, "Expected df is not equal to the returned rules report.") + + // Ensure that there are failed rows in summary report + assert(validationResults.summaryReport.count() > 0) + assert(validationResults.summaryReport.count() == 3) } - test("The input rule should have 3 invalid count for failing aggregate type.") { - val expectedDF = Seq( - ("MinMax_Min_Retail_Price","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false), - ("MinMax_Min_Scan_Price","bounds",ValidationValue(null,null,Array(3.0, 29.99),null),1,true) - ).toDF("Rule_Name","Rule_Type","Validation_Values","Invalid_Count","Failed") + test("The input rule should have 1 invalid count for failing aggregate type.") { val testDF = Seq( (1, 2, 3), (4, 5, 6), (7, 8, 9) ).toDF("retail_price", "scan_price", "cost") + val expectedColumns = testDF.columns ++ Seq("MinMax_Min_Retail_Price", "MinMax_Min_Scan_Price") + val expectedDF = Seq( + (1, 2, 3, + ValidationValue("MinMax_Min_Retail_Price", passed = true, "[0.0, 29.99]", "1"), + ValidationValue("MinMax_Min_Scan_Price", passed = false, "[3.0, 29.99]", "2") + ), + (4, 5, 6, + ValidationValue("MinMax_Min_Retail_Price", passed = true, "[0.0, 29.99]", "4"), + ValidationValue("MinMax_Min_Scan_Price", passed = true, "[3.0, 29.99]", "5") + ), + (7, 8, 9, + ValidationValue("MinMax_Min_Retail_Price", passed = true, "[0.0, 29.99]", "7"), + ValidationValue("MinMax_Min_Scan_Price", passed = true, "[3.0, 29.99]", "8") + ) + ).toDF(expectedColumns: _*) val minMaxPriceDefs = Seq( Rule("MinMax_Min_Retail_Price", min("retail_price"), Bounds(0.0, 29.99)), Rule("MinMax_Min_Scan_Price", min("scan_price"), Bounds(3.0, 29.99)) ) - // Generate the array of Rules from the minmax generator val someRuleSet = RuleSet(testDF) someRuleSet.add(minMaxPriceDefs) - val (rulesReport, passed) = someRuleSet.validate() - assert(rulesReport.except(expectedDF).count() == 0, "Expected df is not equal to the returned rules report.") - assert(!passed) - assert(rulesReport.count() == 2) + val validationResults = someRuleSet.validate() + + // Ensure that validate report is expected + assert(validationResults.completeReport.exceptAll(expectedDF).count() == 0, "Expected df is not equal to the returned rules report.") + + // Ensure that there is a failed row + assert(validationResults.summaryReport.count() > 0) + assert(validationResults.summaryReport.count() == 1) } test("The input dataframe should have exactly 1 rule failure on MinMaxRule") { - val expectedDF = Seq( - ("MinMax_Cost_max","bounds",ValidationValue(null,null,Array(0.0, 12.00),null),1,true), - ("MinMax_Cost_min","bounds",ValidationValue(null,null,Array(0.0, 12.00),null),0,false), - ("MinMax_Scan_Price_max","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false), - ("MinMax_Scan_Price_min","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false), - ("MinMax_Sku_Price_max","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false), - ("MinMax_Sku_Price_min","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false) - ).toDF("Rule_Name","Rule_Type","Validation_Values","Invalid_Count","Failed") val testDF = Seq( (1, 2, 3), (4, 5, 6), (7, 8, 99) ).toDF("retail_price", "scan_price", "cost") + val expectedColumns = testDF.columns ++ Seq("MinMax_Sku_Price_min", "MinMax_Sku_Price_max", + "MinMax_Scan_Price_min", "MinMax_Scan_Price_max", "MinMax_Cost_min", "MinMax_Cost_max" + ) + val expectedDF = Seq( + (1, 2, 3, + ValidationValue("MinMax_Sku_Price_min", passed = true, "[0.0, 29.99]", "1"), + ValidationValue("MinMax_Sku_Price_max", passed = true, "[0.0, 29.99]", "1"), + ValidationValue("MinMax_Scan_Price_min", passed = true, "[0.0, 29.99]", "2"), + ValidationValue("MinMax_Scan_Price_max", passed = true, "[0.0, 29.99]", "2"), + ValidationValue("MinMax_Cost_min", passed = true, "[0.0, 12.0]", "3"), + ValidationValue("MinMax_Cost_max", passed = true, "[0.0, 12.0]", "3"), + ), + (4, 5, 6, + ValidationValue("MinMax_Sku_Price_min", passed = true, "[0.0, 29.99]", "4"), + ValidationValue("MinMax_Sku_Price_max", passed = true, "[0.0, 29.99]", "4"), + ValidationValue("MinMax_Scan_Price_min", passed = true, "[0.0, 29.99]", "5"), + ValidationValue("MinMax_Scan_Price_max", passed = true, "[0.0, 29.99]", "5"), + ValidationValue("MinMax_Cost_min", passed = true, "[0.0, 12.0]", "6"), + ValidationValue("MinMax_Cost_max", passed = true, "[0.0, 12.0]", "6"), + ), + (7, 8, 99, + ValidationValue("MinMax_Sku_Price_min", passed = true, "[0.0, 29.99]", "7"), + ValidationValue("MinMax_Sku_Price_max", passed = true, "[0.0, 29.99]", "7"), + ValidationValue("MinMax_Scan_Price_min", passed = true, "[0.0, 29.99]", "8"), + ValidationValue("MinMax_Scan_Price_max", passed = true, "[0.0, 29.99]", "8"), + ValidationValue("MinMax_Cost_min", passed = false, "[0.0, 12.0]", "99"), + ValidationValue("MinMax_Cost_max", passed = false, "[0.0, 12.0]", "99"), + ) + ).toDF(expectedColumns: _*) + val minMaxPriceDefs = Array( MinMaxRuleDef("MinMax_Sku_Price", col("retail_price"), Bounds(0.0, 29.99)), MinMaxRuleDef("MinMax_Scan_Price", col("scan_price"), Bounds(0.0, 29.99)), MinMaxRuleDef("MinMax_Cost", col("cost"), Bounds(0.0, 12.0)) ) - // Generate the array of Rules from the minmax generator + // Generate the array of Rules from the minmax generator val someRuleSet = RuleSet(testDF) someRuleSet.addMinMaxRules(minMaxPriceDefs: _*) - val (rulesReport, passed) = someRuleSet.validate() - val failedResults = rulesReport.filter(rulesReport("Invalid_Count") > 0).collect() - assert(failedResults.length == 1) - assert(rulesReport.except(expectedDF).count() == 0, "Expected df is not equal to the returned rules report.") - assert(failedResults(0)(0) == "MinMax_Cost_max") - assert(!passed) + val validationResults = someRuleSet.validate() + + // Ensure that validate report is expected + assert(validationResults.completeReport.exceptAll(expectedDF).count() == 0, "Expected df is not equal to the returned rules report.") + + // Ensure that there is a failed row + assert(validationResults.summaryReport.count() > 0) + assert(validationResults.summaryReport.count() == 1) + + // Ensure that the the failed Rules are MinMax_Cost_min, MinMax_Cost_max + assert(validationResults.summaryReport.select("failed_rules.ruleName").as[Array[String]].collect()(0)(0) == "MinMax_Cost_min", "MinMax_Cost_max") + assert(validationResults.summaryReport.select("failed_rules.ruleName").as[Array[String]].collect()(0)(1) == "MinMax_Cost_max", "MinMax_Cost_max") } test("The DF in the rulesset object is the same as the input test df") { @@ -146,7 +283,6 @@ class ValidatorTestSuite extends AnyFunSuite with SparkSessionFixture { MinMaxRuleDef("MinMax_Cost", col("cost"), Bounds(0.0, 12.0)) ) // Generate the array of Rules from the minmax generator - val someRuleSet = RuleSet(testDF) someRuleSet.addMinMaxRules(minMaxPriceDefs: _*) val rulesDf = someRuleSet.getDf @@ -154,22 +290,28 @@ class ValidatorTestSuite extends AnyFunSuite with SparkSessionFixture { } test("The group by columns are the correct group by clauses in the validation") { - val expectedDF = Seq( - (3,"MinMax_Scan_Price_max","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false), - (6,"MinMax_Scan_Price_max","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false), - (3,"MinMax_Scan_Price_min","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false), - (6,"MinMax_Scan_Price_min","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false), - (3,"MinMax_Sku_Price_max","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false), - (6,"MinMax_Sku_Price_max","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false), - (3,"MinMax_Sku_Price_min","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false), - (6,"MinMax_Sku_Price_min","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false) - ).toDF("cost","Rule_Name","Rule_Type","Validation_Values","Invalid_Count","Failed") // 2 groups so count of the rules should yield (2 minmax rules * 2 columns) * 2 groups in cost (8 rows) val testDF = Seq( (1, 2, 3), (4, 5, 6), (7, 8, 3) ).toDF("retail_price", "scan_price", "cost") + val expectedColumns = Seq("cost", "MinMax_Sku_Price_min", "MinMax_Sku_Price_max", "MinMax_Scan_Price_min", "MinMax_Scan_Price_max") + val expectedDF = Seq( + (3, + ValidationValue("MinMax_Sku_Price_min", passed = true, "[0.0, 29.99]", "1"), + ValidationValue("MinMax_Sku_Price_max", passed = true, "[0.0, 29.99]", "7"), + ValidationValue("MinMax_Scan_Price_min", passed = true, "[0.0, 29.99]", "2"), + ValidationValue("MinMax_Scan_Price_max", passed = true, "[0.0, 29.99]", "8") + ), + (6, + ValidationValue("MinMax_Sku_Price_min", passed = true, "[0.0, 29.99]", "4"), + ValidationValue("MinMax_Sku_Price_max", passed = true, "[0.0, 29.99]", "4"), + ValidationValue("MinMax_Scan_Price_min", passed = true, "[0.0, 29.99]", "5"), + ValidationValue("MinMax_Scan_Price_max", passed = true, "[0.0, 29.99]", "5") + ) + ).toDF(expectedColumns: _*) + val minMaxPriceDefs = Array( MinMaxRuleDef("MinMax_Sku_Price", col("retail_price"), Bounds(0.0, 29.99)), MinMaxRuleDef("MinMax_Scan_Price", col("scan_price"), Bounds(0.0, 29.99)) @@ -178,35 +320,43 @@ class ValidatorTestSuite extends AnyFunSuite with SparkSessionFixture { val someRuleSet = RuleSet(testDF, "cost") someRuleSet.addMinMaxRules(minMaxPriceDefs: _*) val groupBys = someRuleSet.getGroupBys - val (groupByValidated, passed) = someRuleSet.validate() + val validationResults = someRuleSet.validate() + // Ensure that input DF was grouped by "cost" column assert(groupBys.length == 1) assert(groupBys.head == "cost") assert(someRuleSet.isGrouped) - assert(passed) - assert(groupByValidated.count() == 8) - assert(groupByValidated.except(expectedDF).count() == 0, "Expected df is not equal to the returned rules report.") - assert(groupByValidated.filter(groupByValidated("Invalid_Count") > 0).count() == 0) - assert(groupByValidated.filter(groupByValidated("Failed") === true).count() == 0) + + // Ensure that all rows passed + assert(validationResults.summaryReport.count() == 0) + + // Ensure that the complete report matches the expected output + assert(validationResults.completeReport.count() == 2) + assert(validationResults.completeReport.exceptAll(expectedDF).count() == 0, "Expected df is not equal to the returned rules report.") } test("The group by columns are with rules failing the validation") { - val expectedDF = Seq( - (3,"MinMax_Sku_Price_max","bounds",ValidationValue(null,null,Array(0.0, 0.0),null),1,true), - (6,"MinMax_Sku_Price_max","bounds",ValidationValue(null,null,Array(0.0, 0.0),null),1,true), - (3,"MinMax_Sku_Price_min","bounds",ValidationValue(null,null,Array(0.0, 0.0),null),1,true), - (6,"MinMax_Sku_Price_min","bounds",ValidationValue(null,null,Array(0.0, 0.0),null),1,true), - (3,"MinMax_Scan_Price_max","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false), - (6,"MinMax_Scan_Price_max","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false), - (3,"MinMax_Scan_Price_min","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false), - (6,"MinMax_Scan_Price_min","bounds",ValidationValue(null,null,Array(0.0, 29.99),null),0,false) - ).toDF("cost","Rule_Name","Rule_Type","Validation_Values","Invalid_Count","Failed") // 2 groups so count of the rules should yield (2 minmax rules * 2 columns) * 2 groups in cost (8 rows) val testDF = Seq( (1, 2, 3), (4, 5, 6), (7, 8, 3) ).toDF("retail_price", "scan_price", "cost") + val expectedColumns = Seq("cost", "MinMax_Sku_Price_min", "MinMax_Sku_Price_max", "MinMax_Scan_Price_min", "MinMax_Scan_Price_max") + val expectedDF = Seq( + (3, + ValidationValue("MinMax_Sku_Price_min", passed = false, "[0.0, 0.0]", "1"), + ValidationValue("MinMax_Sku_Price_max", passed = false, "[0.0, 0.0]", "7"), + ValidationValue("MinMax_Scan_Price_min", passed = true, "[0.0, 29.99]", "2"), + ValidationValue("MinMax_Scan_Price_max", passed = true, "[0.0, 29.99]", "8") + ), + (6, + ValidationValue("MinMax_Sku_Price_min", passed = false, "[0.0, 0.0]", "4"), + ValidationValue("MinMax_Sku_Price_max", passed = false, "[0.0, 0.0]", "4"), + ValidationValue("MinMax_Scan_Price_min", passed = true, "[0.0, 29.99]", "5"), + ValidationValue("MinMax_Scan_Price_max", passed = true, "[0.0, 29.99]", "5") + ) + ).toDF(expectedColumns: _*) val minMaxPriceDefs = Array( MinMaxRuleDef("MinMax_Sku_Price", col("retail_price"), Bounds(0.0, 0.0)), MinMaxRuleDef("MinMax_Scan_Price", col("scan_price"), Bounds(0.0, 29.99)) @@ -215,65 +365,330 @@ class ValidatorTestSuite extends AnyFunSuite with SparkSessionFixture { val someRuleSet = RuleSet(testDF, "cost") someRuleSet.addMinMaxRules(minMaxPriceDefs: _*) val groupBys = someRuleSet.getGroupBys - val (groupByValidated, passed) = someRuleSet.validate() + val validationResults = someRuleSet.validate() assert(groupBys.length == 1, "Group by length is not 1") assert(groupBys.head == "cost", "Group by column is not cost") assert(someRuleSet.isGrouped) - assert(!passed, "Rule set did not fail.") - assert(groupByValidated.count() == 8, "Rule count should be 8") - assert(groupByValidated.except(expectedDF).count() == 0, "Expected df is not equal to the returned rules report.") - assert(groupByValidated.filter(groupByValidated("Invalid_Count") > 0).count() == 4, "Invalid count is not 4.") - assert(groupByValidated.filter(groupByValidated("Failed") === true).count() == 4, "Failed count is not 4.") - } - test("Validate list of values with numeric types, string types and long types.") { + // Ensure that there are failed rows + assert(validationResults.summaryReport.count() > 0, "Rule set did not fail.") + assert(validationResults.summaryReport.count() == 2, "Failed row count should be 2") + assert(validationResults.completeReport.count() == 2, "Row count should be 2") + // Ensure that the complete report matches expected output + assert(validationResults.completeReport.exceptAll(expectedDF).count() == 0, "Expected df is not equal to the returned rules report.") + } + + test("Validate list of values with integer, double, and long types.") { val testDF = Seq( ("food_a", 2.51, 3, 111111111111111L), ("food_b", 5.11, 6, 211111111111111L), ("food_c", 8.22, 99, 311111111111111L) ).toDF("product_name", "scan_price", "cost", "id") + val expectedColumns = testDF.columns ++ Seq("CheckIfCostIsInLOV", "CheckIfScanPriceIsInLOV", "CheckIfIdIsInLOV") val numericLovExpectedDF = Seq( - ("CheckIfCostIsInLOV","validNumerics",ValidationValue(null,Array(3,6,99),null,null),0,false), - ("CheckIfScanPriceIsInLOV","validNumerics",ValidationValue(null,Array(2.51,5.11,8.22),null,null),0,false), - ("CheckIfIdIsInLOV","validNumerics",ValidationValue(null,Array(111111111111111L,211111111111111L,311111111111111L),null,null),0,false) - ).toDF("Rule_Name","Rule_Type","Validation_Values","Invalid_Count","Failed") + ("food_a", 2.51, 3, 111111111111111L, + ValidationValue("CheckIfCostIsInLOV", passed = true, "[3.0, 6.0, 99.0]", "3"), + ValidationValue("CheckIfScanPriceIsInLOV", passed = true, "[2.51, 5.11, 8.22]", "2.51"), + ValidationValue("CheckIfIdIsInLOV", passed = true, "[1.11111111111111E14, 2.11111111111111E14, 3.11111111111111E14]", "111111111111111") + ), + ("food_b", 5.11, 6, 211111111111111L, + ValidationValue("CheckIfCostIsInLOV", passed = true, "[3.0, 6.0, 99.0]", "6"), + ValidationValue("CheckIfScanPriceIsInLOV", passed = true, "[2.51, 5.11, 8.22]", "5.11"), + ValidationValue("CheckIfIdIsInLOV", passed = true, "[1.11111111111111E14, 2.11111111111111E14, 3.11111111111111E14]", "211111111111111") + ), + ("food_c", 8.22, 99, 311111111111111L, + ValidationValue("CheckIfCostIsInLOV", passed = true, "[3.0, 6.0, 99.0]", "99"), + ValidationValue("CheckIfScanPriceIsInLOV", passed = true, "[2.51, 5.11, 8.22]", "8.22"), + ValidationValue("CheckIfIdIsInLOV", passed = true, "[1.11111111111111E14, 2.11111111111111E14, 3.11111111111111E14]", "311111111111111") + ) + ).toDF(expectedColumns: _*) + val numericRules = Array( - Rule("CheckIfCostIsInLOV", col("cost"), Array(3,6,99)), - Rule("CheckIfScanPriceIsInLOV", col("scan_price"), Array(2.51,5.11,8.22)), - Rule("CheckIfIdIsInLOV", col("id"), Array(111111111111111L,211111111111111L,311111111111111L)) + Rule("CheckIfCostIsInLOV", col("cost"), Array(3, 6, 99)), + Rule("CheckIfScanPriceIsInLOV", col("scan_price"), Array(2.51, 5.11, 8.22)), + Rule("CheckIfIdIsInLOV", col("id"), Array(111111111111111L, 211111111111111L, 311111111111111L)) ) - // Generate the array of Rules from the minmax generator + // Generate the array of Rules from the minmax generator val numericRuleSet = RuleSet(testDF) numericRuleSet.add(numericRules) - val (numericValidated, numericPassed) = numericRuleSet.validate() + val numericValidationResults = numericRuleSet.validate() + + // Ensure that all ruleTypes are ValidateNumerics assert(numericRules.map(_.ruleType == RuleType.ValidateNumerics).reduce(_ && _), "Not every value is validate numerics.") - assert(numericRules.map(_.boundaries == null).reduce(_ && _), "Boundaries are not null.") - assert(numericPassed) - assert(numericValidated.except(numericLovExpectedDF).count() == 0, "Expected df is not equal to the returned rules report.") - assert(numericValidated.filter(numericValidated("Invalid_Count") > 0).count() == 0) - assert(numericValidated.filter(numericValidated("Failed") === true).count() == 0) - val stringRule = Rule("CheckIfProductNameInLOV", col("product_name"), Array("food_a","food_b","food_c")) - // Generate the array of Rules from the minmax generator + // Ensure that there are infinite boundaries, by default + assert(numericRules.map(_.boundaries.lower == Double.NegativeInfinity).reduce(_ && _), "Lower boundaries are not negatively infinite.") + assert(numericRules.map(_.boundaries.upper == Double.PositiveInfinity).reduce(_ && _), "Upper boundaries are not positively infinite.") + + // Ensure that the complete report matches expected output + assert(numericValidationResults.completeReport.exceptAll(numericLovExpectedDF).count() == 0, "Expected numeric df is not equal to the returned rules report.") + + // Ensure that all rows passed the Rules + assert(numericValidationResults.summaryReport.isEmpty) + + // Ensure rows can be validated against a list of invalid numerics + val invalidNumColumns = testDF.columns ++ Seq("CheckIfCostIsInLOV", "CheckIfScanPriceIsInLOV", "CheckIfIdIsInLOV") + val invalidNumsExpectedDF = Seq( + ("food_a", 2.51, 3, 111111111111111L, + ValidationValue("Invalid_Price_Rule", passed = true, "[-1.0, -5.0, 0.0, 1000.0]", "2.51"), + ValidationValue("Invalid_Id_Rule", passed = true, "[7.11111111111111E14, 8.11111111111111E14, 9.11111111111111E14]", "111111111111111"), + ValidationValue("Invalid_Cost_Rule", passed = true, "[99.0, 10000.0, 100000.0, 1000000.0]", "3") + ), + ("food_b", 5.11, 6, 211111111111111L, + ValidationValue("Invalid_Price_Rule", passed = true, "[-1.0, -5.0, 0.0, 1000.0]", "5.11"), + ValidationValue("Invalid_Id_Rule", passed = true, "[7.11111111111111E14, 8.11111111111111E14, 9.11111111111111E14]", "211111111111111"), + ValidationValue("Invalid_Cost_Rule", passed = true, "[99.0, 10000.0, 100000.0, 1000000.0]", "6") + ), + ("food_c", 8.22, 99, 311111111111111L, + ValidationValue("Invalid_Price_Rule", passed = true, "[-1.0, -5.0, 0.0, 1000.0]", "8.22"), + ValidationValue("Invalid_Id_Rule", passed = true, "[7.11111111111111E14, 8.11111111111111E14, 9.11111111111111E14]", "311111111111111"), + ValidationValue("Invalid_Cost_Rule", passed = false, "[99.0, 10000.0, 100000.0, 1000000.0]", "99") + ) + ).toDF(expectedColumns: _*) + + val invalidPrices = Array(-1.00, -5.00, 0.00, 1000.0) + val invalidIds = Array(711111111111111L, 811111111111111L, 911111111111111L) + val invalidCosts = Array(99, 10000, 100000, 1000000) + val invalidNumericalRules = Array( + Rule("Invalid_Price_Rule", col("scan_price"), invalidPrices, invertMatch = true), + Rule("Invalid_Id_Rule", col("id"), invalidIds, invertMatch = true), + Rule("Invalid_Cost_Rule", col("cost"), invalidCosts, invertMatch = true), + ) + val invalidNumericalResults = RuleSet(testDF).add(invalidNumericalRules).validate() + + // Ensure that there is 1 failed row + assert(invalidNumericalResults.summaryReport.count() == 1) + + // Ensure that the invertMatch attribute is set properly + assert(invalidNumericalRules.count(_.invertMatch) == 3) + + // Ensure that the validation report matches expected output + assert(invalidNumericalResults.completeReport.exceptAll(invalidNumsExpectedDF).count() == 0, "Expected invalid numerics df is not equal to the returned rules report.") + + } + + test("The input df should have no rule failures for valid string LOVs.") { + val testDF = Seq( + ("food_a", 2.51, 3, 111111111111111L), + ("food_b", 5.11, 6, 211111111111111L), + ("food_c", 8.22, 99, 311111111111111L) + ).toDF("product_name", "scan_price", "cost", "id") + // Create a String List of Values Rule + val validProductNamesRule = Rule("CheckIfProductNameInLOV", col("product_name"), Array("food_a", "food_b", "food_c")) + val stringIgnoreCaseRule = Rule("IgnoreCaseProductNameLOV", col("product_name"), Array("Food_B", "food_A", "FOOD_C"), ignoreCase = true) + val invalidFoodsRule = Rule("InvalidProductNameLOV", col("product_name"), Array("food_x", "food_y", "food_z"), invertMatch = true) + + val expectedStringLovColumns = testDF.columns ++ Seq("CheckIfProductNameInLOV", "IgnoreCaseProductNameLOV", "InvalidProductNameLOV") val stringLovExpectedDF = Seq( - ("CheckIfProductNameInLOV","validStrings",ValidationValue(null,null,null,Array("food_a", "food_b", "food_c")),0,false) - ).toDF("Rule_Name","Rule_Type","Validation_Values","Invalid_Count","Failed") - - val stringRuleSet = RuleSet(testDF) - stringRuleSet.add(stringRule) - val (stringValidated, stringPassed) = stringRuleSet.validate() - assert(stringRule.ruleType == RuleType.ValidateStrings) - assert(stringRule.boundaries == null) - assert(stringPassed) - assert(stringValidated.except(stringLovExpectedDF).count() == 0, "Expected df is not equal to the returned rules report.") - assert(stringValidated.filter(stringValidated("Invalid_Count") > 0).count() == 0) - assert(stringValidated.filter(stringValidated("Failed") === true).count() == 0) + ("food_a", 2.51, 3, 111111111111111L, + ValidationValue("CheckIfProductNameInLOV", passed = true, "[food_a, food_b, food_c]", "food_a"), + ValidationValue("IgnoreCaseProductNameLOV", passed = true, "[food_b, food_a, food_c]", "food_a"), + ValidationValue("InvalidProductNameLOV", passed = true, "[food_x, food_y, food_z]", "food_a") + ), + ("food_b", 5.11, 6, 211111111111111L, + ValidationValue("CheckIfProductNameInLOV", passed = true, "[food_a, food_b, food_c]", "food_b"), + ValidationValue("IgnoreCaseProductNameLOV", passed = true, "[food_b, food_a, food_c]", "food_b"), + ValidationValue("InvalidProductNameLOV", passed = true, "[food_x, food_y, food_z]", "food_b") + ), + ("food_c", 8.22, 99, 311111111111111L, + ValidationValue("CheckIfProductNameInLOV", passed = true, "[food_a, food_b, food_c]", "food_c"), + ValidationValue("IgnoreCaseProductNameLOV", passed = true, "[food_b, food_a, food_c]", "food_c"), + ValidationValue("InvalidProductNameLOV", passed = true, "[food_x, food_y, food_z]", "food_c") + ) + ).toDF(expectedStringLovColumns: _*) + + // Validate testDF against String LOV Rule + val productNameRules = Array(validProductNamesRule, stringIgnoreCaseRule, invalidFoodsRule) + val stringRuleSet = RuleSet(testDF).add(productNameRules) + + val stringValidationResults = stringRuleSet.validate() + + // Ensure that the ruleType is set properly + assert(validProductNamesRule.ruleType == RuleType.ValidateStrings) + + // Ensure that the complete report matches expected output + assert(stringValidationResults.completeReport.exceptAll(stringLovExpectedDF).count() == 0, "Expected String LOV df is not equal to the returned rules report.") + + // Ensure that there are infinite boundaries, by default + assert(validProductNamesRule.boundaries.lower == Double.NegativeInfinity, "Lower boundaries are not negatively infinite.") + assert(validProductNamesRule.boundaries.upper == Double.PositiveInfinity, "Upper boundaries are not positively infinite.") + + // Ensure that all rows passed; there are no failed rows + assert(stringValidationResults.summaryReport.isEmpty) + } + + test("The input df should have no rule failures for an implicit expression rule.") { + + val testDF = Seq( + (1, "iot_thermostat_1", 84.00, 74.00), + (2, "iot_thermostat_2", 67.05, 72.00), + (3, "iot_thermostat_3", 91.14, 76.00) + ).toDF("device_id", "device_name", "current_temp", "target_temp") + + val expectedColumns = testDF.columns ++ Seq("TemperatureDiffExpressionRule") + val expectedDF = Seq( + (1, "iot_thermostat_1", 84.00, 74.00, ValidationValue("TemperatureDiffExpressionRule", passed = true, "(abs((current_temp - target_temp)) < 50.0)", "true")), + (2, "iot_thermostat_2", 67.05, 72.00, ValidationValue("TemperatureDiffExpressionRule", passed = true, "(abs((current_temp - target_temp)) < 50.0)", "true")), + (3, "iot_thermostat_3", 91.14, 76.00, ValidationValue("TemperatureDiffExpressionRule", passed = true, "(abs((current_temp - target_temp)) < 50.0)", "true")) + ).toDF(expectedColumns: _*) + + val exprRuleSet = RuleSet(testDF) + exprRuleSet.add(Rule("TemperatureDiffExpressionRule", abs(col("current_temp") - col("target_temp")) < 50.00)) + + val validationResults = exprRuleSet.validate() + + // Ensure that there are no failed rows for rule expression + assert(validationResults.summaryReport.isEmpty) + + // Ensure that the ruleType is set correctly + assert(exprRuleSet.getRules.head.ruleType == RuleType.ValidateExpr) + assert(exprRuleSet.getRules.head.isImplicitBool) + + // Ensure that the complete report matches the expected output + assert(validationResults.completeReport.exceptAll(expectedDF).count() == 0, "Expected expression df is not equal to the returned rules report.") + + } + + test("The input df should have a single rule failure for an expression rule.") { + + val testDF = Seq( + (1, "iot_thermostat_1", 84.00, 74.00, -10.00, -10.00), + (2, "iot_thermostat_2", 76.00, 66.00, -10.00, -10.00), + (3, "iot_thermostat_3", 91.00, 69.00, -20.00, -10.00) + ).toDF("device_id", "device_name", "current_temp", "target_temp", "temp_diff", "cooling_rate") + + val expectedColumns = testDF.columns ++ Seq("ImplicitCoolingExpressionRule") + val expectedDF = Seq( + (1, "iot_thermostat_1", 84, 74, -10, -10, + ValidationValue("CoolingExpressionRule", passed = true, "abs(cooling_rate)", "10.0") + ), + (2, "iot_thermostat_2", 76, 66, -10, -10, + ValidationValue("CoolingExpressionRule", passed = true, "abs(cooling_rate)", "10.0") + ), + (3, "iot_thermostat_3", 91, 69, -20, -10, + ValidationValue("CoolingExpressionRule", passed = false, "abs(cooling_rate)", "10.0") + ) + ).toDF(expectedColumns: _*) + + val exprRuleSet = RuleSet(testDF) + // Create a rule that ensure the cooling rate can accommodate the temp difference + exprRuleSet.add(Rule("CoolingExpressionRule", abs(col("cooling_rate")), expr("abs(temp_diff)"))) + + val validationResults = exprRuleSet.validate() + + // Ensure that there is a single row failure + assert(validationResults.summaryReport.count() > 0) + assert(validationResults.summaryReport.count() == 1) + + // Ensure that the ruleType is set correctly + assert(exprRuleSet.getRules.head.ruleType == RuleType.ValidateExpr) + assert(!exprRuleSet.getRules.head.isImplicitBool) + + // Ensure that the complete report matches the expected output + assert(validationResults.completeReport.exceptAll(expectedDF).count() == 0, "Expected explicit expression df is not equal to the returned rules report.") + + } + + test("The input df should have 3 rule failures for complex expression rules.") { + + val testDF = Seq( + ("Northwest", 1001, 123256, 9.32, 8.99, 4.23, "2021-04-01", "2020-02-01 12:00:00.000"), // bad expiration date + ("Northwest", 1001, 123456, 19.99, 16.49, 12.99, "2021-07-26", "2020-02-02 12:08:00.000"), + ("Northwest", 1001, 123456, 0.99, 0.99, 0.10, "2021-07-26", "2020-02-02 12:10:00.000"), // price change too rapid -- same day + ("Northwest", 1001, 123456, 0.98, 0.90, 0.10, "2021-07-26", "2020-02-05 12:13:00.000"), + ("Northwest", 1001, 123456, 0.99, 0.99, 0.10, "2021-07-26", "2020-02-07 00:00:00.000"), + ("Northwest", 1001, 122987, -9.99, -9.49, -6.49, "2021-07-26", "2021-02-01 00:00:00.000"), + ).toDF("region", "store_id", "sku", "retail_price", "scan_price", "cost", "expiration_date", "create_ts") + .withColumn("create_ts", 'create_ts.cast("timestamp")) + .withColumn("create_dt", 'create_ts.cast("date")) + + // Limit price updates to at most one per day + val window = Window.partitionBy("region", "store_id", "sku").orderBy("create_ts") + val skuUpdateRule = Rule("One_Update_Per_Day_Rule", unix_timestamp(col("create_ts")) - unix_timestamp(lag("create_ts", 1).over(window)) > 60 * 60 * 24) + + // Limit expiration date to be within a range + val expirationDateRule = Rule("Expiration_Date_Rule", col("expiration_date").cast("date").between("2021-05-01", "2021-12-31")) + + // Group by region, store_id, sku, expiration_date, create_ts + val validDatesRuleset = RuleSet(testDF, Array(skuUpdateRule, expirationDateRule), Seq("region", "store_id", "sku", "expiration_date", "create_ts")) + val validDatesResults = validDatesRuleset.validate() + + // Ensure that there are 2 rule failures + assert(validDatesResults.summaryReport.count() == 2) + assert(validDatesResults.completeReport.filter(not(col("One_Update_Per_Day_Rule.passed"))).count() == 1) + assert(validDatesResults.completeReport.filter(not(col("Expiration_Date_Rule.passed"))).count() == 1) + assert(validDatesResults.completeReport.filter(not(col("One_Update_Per_Day_Rule.passed"))).select("sku").as[Int].collect.head == 123456) + assert(validDatesResults.completeReport.filter(not(col("Expiration_Date_Rule.passed"))).select("sku").as[Int].collect.head == 123256) + + // Ensure that the ruleTypes are set correctly + assert(validDatesRuleset.getRules.count(_.ruleType == RuleType.ValidateExpr) == 2) + assert(validDatesRuleset.getRules.count(_.isImplicitBool) == 2) + assert(validDatesRuleset.getGroupBys.length == 5) + + // Limit price columns to be non-negative amounts + val nonNegativeColumns = array(col("retail_price"), col("scan_price"), col("cost")) + val nonNegativeValueRule = Rule("Non_Negative_Values_Rule", size(filter(nonNegativeColumns, c => c <= 0.0)) === 0) + + // Group by region, store_id, sku, retail_price, scan_price, cost + val nonNegativeValuesRuleset = RuleSet(testDF, Array(nonNegativeValueRule), Seq("region", "store_id", "sku", "retail_price", "scan_price", "cost")) + val nonNegativeValuesResults = nonNegativeValuesRuleset.validate() + + // Ensure that there is 1 rule failure + assert(nonNegativeValuesResults.summaryReport.count() == 1) + assert(nonNegativeValuesResults.completeReport.filter(not(col("Non_Negative_Values_Rule.passed"))).count() == 1) + assert(nonNegativeValuesResults.completeReport.filter(not(col("Non_Negative_Values_Rule.passed"))).select("sku").as[Int].collect.head == 122987) + + // Ensure that the ruleType is set correctly + assert(nonNegativeValuesRuleset.getRules.head.ruleType == RuleType.ValidateExpr) + assert(nonNegativeValuesRuleset.getRules.head.isImplicitBool) + assert(nonNegativeValuesRuleset.getGroupBys.length == 6) + } + test("A rule name can have special characters and whitespaces in its name.") { + + val testDF = Seq( + (1, "iot_thermostat_1", 84.00, 74.00, -10.00, -10.00), + (2, "iot_thermostat_2", 76.00, 66.00, -10.00, -10.00), + (3, "iot_thermostat_3", 91.00, 69.00, -20.00, -10.00) + ).toDF("device id", "device_name", "current temp", "target_temp", "temp_diff", "cooling_rate") + val expectedColumns = testDF.columns ++ Seq("Valid_Temperature___Range__Rule", "__________--_____sCooling_Rates__________n_t_____") + val expectedDF = Seq( + (1, "iot_thermostat_1", 84.00, 74.00, -10.00, -10.00, + ValidationValue("Valid_Temperature___Range__Rule", passed=true, "[57.0, 85.0]", "84.0"), + ValidationValue("__________--_____sCooling_Rates__________n_t_____", passed=true, "[-20.0, -1.0]", "-10.0") + ), + (2, "iot_thermostat_2", 76.00, 66.00, -10.00, -10.00, + ValidationValue("Valid_Temperature___Range__Rule", passed=true, "[57.0, 85.0]", "76.0"), + ValidationValue("__________--_____sCooling_Rates__________n_t_____", passed=true, "[-20.0, -1.0]", "-10.0") + ), + (3, "iot_thermostat_3", 91.00, 69.00, -20.00, -10.00, + ValidationValue("Valid_Temperature___Range__Rule", passed=false, "[57.0, 85.0]", "91.0"), + ValidationValue("__________--_____sCooling_Rates__________n_t_____", passed=true, "[-20.0, -1.0]", "-10.0") + + ) + ).toDF(expectedColumns: _*) + + val whiteSpaceRule = Rule(" Valid Temperature Range Rule ", col("current temp"), Bounds(57.00, 85.00)) + val specialCharsRule = Rule("!@#$%^&*()--++==%sCooling_Rates~[ ,;{}()\\n\\t=\\\\]+", col("cooling_rate"), Bounds(-20.00, -1.00)) + val specialCharsRuleSet = RuleSet(testDF, Array("device id", "device_name", "current temp", "target_temp", "temp_diff", "cooling_rate")) + .add(whiteSpaceRule) + .add(specialCharsRule) + val validationResults = specialCharsRuleSet.validate() + + // Ensure that there is a single temperature rule failure + assert(validationResults.summaryReport.count() == 1) + assert(whiteSpaceRule.ruleName == "Valid_Temperature___Range__Rule") + assert(specialCharsRule.ruleName == "__________--_____sCooling_Rates__________n_t_____") + + // Ensure that the complete report matches the expected output + assert(validationResults.completeReport.exceptAll(expectedDF).count() == 0, "Expected special char df is not equal to the returned rules report.") + + } }