diff --git a/Project.toml b/Project.toml index 5e90565..67a2451 100644 --- a/Project.toml +++ b/Project.toml @@ -1,18 +1,22 @@ name = "MLJTuning" uuid = "03970b2e-30c4-11ea-3135-d1576263f10f" authors = ["Anthony D. Blaom "] -version = "0.3.0" +version = "0.3.1" [deps] ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" +MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" [compat] ComputationalResources = "^0.3" -MLJBase = "^0.12" +Distributions = "^0.22,^0.23" +MLJBase = "^0.12.2" +MLJModelInterface = "^0.2" RecipesBase = "^0.8" julia = "^1" diff --git a/README.md b/README.md index 8f5f7e7..e4e13c5 100644 --- a/README.md +++ b/README.md @@ -61,14 +61,14 @@ This repository contains: developers to conveniently implement common hyperparameter optimization strategies, such as: - - [x] search a list of explicitly specified models `list = [model1, - model2, ...]` + - [x] search models generated by an arbitrary iterator, eg `models = [model1, + model2, ...]` (built-in `Explicit` strategy) - - [x] grid search + - [x] grid search (built-in `Grid` strategy) - [ ] Latin hypercubes - - [ ] random search + - [x] random search (built-in `RandomSearch` strategy) - [ ] bandit @@ -232,6 +232,8 @@ In setting up a tuning task, the user constructs an instance of the ### Implementation requirements for new tuning strategies +As sample implementations, see [/src/strategies/](/src/strategies) + #### Summary of functions Several functions are part of the tuning strategy API: @@ -373,6 +375,11 @@ is `fit!` the first time, and not on subsequent calls (unless `force=true`). (Specifically, `MLJBase.fit(::TunedModel, ...)` calls `setup` but `MLJBase.update(::TunedModel, ...)` does not.) +The `setup` function is called once only, when a `TunedModel` machine +is `fit!` the first time, and not on subsequent calls (unless +`force=true`). (Specifically, `MLJBase.fit(::TunedModel, ...)` calls +`setup` but `MLJBase.update(::TunedModel, ...)` does not.) + The `verbosity` is an integer indicating the level of logging: `0` means logging should be restricted to warnings, `-1`, means completely silent. @@ -440,6 +447,14 @@ any number of models. If `models!` returns a number of models exceeding the number needed to complete the history, the list returned is simply truncated. +Some simple tuning strategies, such as `RandomSearch`, will want to +return as many models as possible in one hit. The argument +`n_remaining` is the difference between the current length of the +history and the target number of iterations `tuned_model.n` set by the +user when constructing his `TunedModel` instance, `tuned_model` (or +`default_n(tuning, range)` if left unspecified). + + #### The `best` method: To define what constitutes the "optimal model" ```julia @@ -487,8 +502,9 @@ where: model - `tuning_report(::MyTuningStrategy, ...)` is a method the implementer - may overload. It should return a named tuple. The fallback is to - return the raw history: + may overload. It should return a named tuple with `history` as one + of the keys (the format up to the implementation.) The fallback is + to return the raw history: ```julia MLJTuning.tuning_report(tuning, history, state) = (history=history,) diff --git a/src/MLJTuning.jl b/src/MLJTuning.jl index c7aad28..f4c4f3e 100644 --- a/src/MLJTuning.jl +++ b/src/MLJTuning.jl @@ -7,7 +7,7 @@ module MLJTuning export TunedModel # defined in strategies/: -export Explicit, Grid +export Explicit, Grid, RandomSearch # defined in learning_curves.jl: export learning_curve!, learning_curve @@ -17,8 +17,11 @@ export learning_curve!, learning_curve import MLJBase using MLJBase +import MLJBase: Bounded, Unbounded, DoublyUnbounded, + LeftUnbounded, RightUnbounded using RecipesBase using Distributed +import Distributions import ComputationalResources: CPU1, CPUProcesses, CPUThreads, AbstractResource using Random @@ -26,7 +29,7 @@ using Random ## CONSTANTS -const DEFAULT_N = 10 +const DEFAULT_N = 10 # for when `default_n` is not implemented ## INCLUDE FILES @@ -34,9 +37,10 @@ const DEFAULT_N = 10 include("utilities.jl") include("tuning_strategy_interface.jl") include("tuned_models.jl") -include("ranges.jl") +include("range_methods.jl") include("strategies/explicit.jl") include("strategies/grid.jl") +include("strategies/random_search.jl") include("plotrecipes.jl") include("learning_curves.jl") diff --git a/src/range_methods.jl b/src/range_methods.jl new file mode 100644 index 0000000..d38586e --- /dev/null +++ b/src/range_methods.jl @@ -0,0 +1,145 @@ +## BOUNDEDNESS TRAIT + +# For random search and perhaps elsewhere, we need a variation on the +# built-in boundedness notions: +abstract type PositiveUnbounded <: Unbounded end +abstract type Other <: Unbounded end + +boundedness(::NumericRange{<:Any,<:Bounded}) = Bounded +boundedness(::NumericRange{<:Any,<:LeftUnbounded}) = Other +boundedness(::NumericRange{<:Any,<:DoublyUnbounded}) = Other +function boundedness(r::NumericRange{<:Any,<:RightUnbounded}) + if r.lower >= 0 + return PositiveUnbounded + end + return Other +end + +""" + MLJTuning.grid([rng, ] prototype, ranges, resolutions) + +Given an iterable `ranges` of `ParamRange` objects, and an iterable +`resolutions` of the same length, return a vector of models generated +by cloning and mutating the hyperparameters (fields) of `prototype`, +according to the Cartesian grid defined by the specifed +one-dimensional `ranges` (`ParamRange` objects) and specified +`resolutions`. A resolution of `nothing` for a `NominalRange` +indicates that all values should be used. + +Specification of an `AbstractRNG` object `rng` implies shuffling of +the results. Otherwise models are ordered, with the first +hyperparameter referenced cycling fastest. + +""" +grid(rng::AbstractRNG, prototype::Model, ranges, resolutions) = + shuffle(rng, grid(prototype, ranges, resolutions)) + +function grid(prototype::Model, ranges, resolutions) + + iterators = broadcast(iterator, ranges, resolutions) + + A = MLJBase.unwind(iterators...) + + N = size(A, 1) + map(1:N) do i + clone = deepcopy(prototype) + for k in eachindex(ranges) + field = ranges[k].field + recursive_setproperty!(clone, field, A[i,k]) + end + clone + end +end + + +## PRE-PROCESSING OF USER-SPECIFIED CARTESIAN RANGE OBJECTS + +""" + process_grid_range(user_specified_range, resolution, verbosity) + +Utility to convert a user-specified range (see [`Grid`](@ref)) into a +pair of tuples `(ranges, resolutions)`. + +For example, if `r1`, `r2` are `NumericRange`s and `s` is a +NominalRange` with 5 values, then we have: + + julia> MLJTuning.process_grid_range([(r1, 3), r2, s], 42, 1) == + ((r1, r2, s), (3, 42, 5)) + true + +If `verbosity` > 0, then a warning is issued if a `Nominal` range is +paired with a resolution. + +""" +process_grid_range(user_specified_range, args...) = + throw(ArgumentError("Unsupported range. ")) + +process_grid_range(usr::Union{ParamRange,Tuple{ParamRange,Int}}, args...) = + process_grid_range([usr, ], args...) + +function process_grid_range(user_specified_range::AbstractVector, + resolution, verbosity) + # r unpaired: + stand(r) = throw(ArgumentError("Unsupported range. ")) + stand(r::NumericRange) = (r, resolution) + stand(r::NominalRange) = (r, length(r.values)) + + # (r, res): + stand(t::Tuple{NumericRange,Integer}) = t + function stand(t::Tuple{NominalRange,Integer}) + verbosity < 0 || + @warn "Ignoring a resolution specified for a `NominalRange`. " + return (first(t), length(first(t).values)) + end + + ret = zip(stand.(user_specified_range)...) |> collect + return first(ret), last(ret) +end + +""" + process_random_range(user_specified_range, + bounded, + positive_unbounded, + other) + +Utility to convert a user-specified range (see [`RandomSearch`](@ref)) +into an n-tuple of `(field, sampler)` pairs. + +""" +process_random_range(user_specified_range, args...) = + throw(ArgumentError("Unsupported range #1. ")) + +const DIST = Distributions.Distribution + +process_random_range(user_specified_range::Union{ParamRange, Tuple{Any,Any}}, + args...) = + process_random_range([user_specified_range, ], args...) + +function process_random_range(user_specified_range::AbstractVector, + bounded, + positive_unbounded, + other) + # r not paired: + stand(r) = throw(ArgumentError("Unsupported range #2. ")) + stand(r::NumericRange) = stand(r, boundedness(r)) + stand(r::NumericRange, ::Type{<:Bounded}) = (r.field, sampler(r, bounded)) + stand(r::NumericRange, ::Type{<:Other}) = (r.field, sampler(r, other)) + stand(r::NumericRange, ::Type{<:PositiveUnbounded}) = + (r.field, sampler(r, positive_unbounded)) + stand(r::NominalRange) = (n = length(r.values); + (r.field, sampler(r, fill(1/n, n)))) + # (r, d): + stand(t::Tuple{ParamRange,Any}) = stand(t...) + stand(r, d) = throw(ArgumentError("Unsupported range #3. ")) + stand(r::NominalRange, d::AbstractVector{Float64}) = _stand(r, d) + stand(r::NumericRange, d:: Union{DIST, Type{<:DIST}}) = _stand(r, d) + _stand(r, d) = (r.field, sampler(r, d)) + + # (field, s): + stand(t::Tuple{Union{Symbol,Expr},Any}) = t + + return Tuple(stand.(user_specified_range)) + + # ret = zip(stand.(user_specified_range)...) |> collect + # return first(ret), last(ret) +end diff --git a/src/ranges.jl b/src/ranges.jl deleted file mode 100644 index 37e375d..0000000 --- a/src/ranges.jl +++ /dev/null @@ -1,70 +0,0 @@ -""" - MLJTuning.grid([rng, ] prototype, ranges, resolutions) - -Given an iterable `ranges` of `ParamRange` objects, and an iterable -`resolutions` of the same length, return a vector of models generated -by cloning and mutating the hyperparameters (fields) of `prototype`, -according to the Cartesian grid defined by the specifed -one-dimensional `ranges` (`ParamRange` objects) and specified -`resolutions`. A resolution of `nothing` for a `NominalRange` -indicates that all values should be used. - -Specification of an `AbstractRNG` object `rng` implies shuffling of -the results. Otherwise models are ordered, with the first -hyperparameter referenced cycling fastest. - -""" -grid(rng::AbstractRNG, prototype::Model, ranges, resolutions) = - shuffle(rng, grid(prototype, ranges, resolutions)) - -function grid(prototype::Model, ranges, resolutions) - - iterators = broadcast(iterator, ranges, resolutions) - - A = MLJBase.unwind(iterators...) - - N = size(A, 1) - map(1:N) do i - clone = deepcopy(prototype) - for k in eachindex(ranges) - field = ranges[k].field - recursive_setproperty!(clone, field, A[i,k]) - end - clone - end -end - -""" - process_user_range(user_specified_range, resolution, verbosity) - -Utility to convert user-specified range (see [`Grid`](@ref)) into a -pair of tuples `(ranges, resolutions)`. - -For example, if `r1`, `r2` are `NumericRange`s and `s` is a -NominalRange` with 5 values, then we have: - - julia> MLJTuning.process_user_range([(r1, 3), r2, s], 42, 1) == - ((r1, r2, s), (3, 42, 5)) - true - -If `verbosity` > 0, then a warning is issued if a `Nominal` range is -paired with a resolution. - -""" -process_user_range(user_specified_range, resolution, verbosity) = - process_user_range([user_specified_range, ], resolution, verbosity) -function process_user_range(user_specified_range::AbstractVector, - resolution, verbosity) - stand(r) = throw(ArgumentError("Unsupported range. ")) - stand(r::NumericRange) = (r, resolution) - stand(r::NominalRange) = (r, length(r.values)) - stand(t::Tuple{NumericRange,Integer}) = t - function stand(t::Tuple{NominalRange,Integer}) - verbosity < 0 || - @warn "Ignoring a resolution specified for a `NominalRange`. " - return (first(t), length(first(t).values)) - end - - ret = zip(stand.(user_specified_range)...) |> collect - return first(ret), last(ret) -end diff --git a/src/strategies/grid.jl b/src/strategies/grid.jl index 5c31266..31f8e6f 100644 --- a/src/strategies/grid.jl +++ b/src/strategies/grid.jl @@ -9,7 +9,11 @@ default `resolution` in each numeric dimension. ### Supported ranges: -- A single one-dimensional range (`ParamRange` object) `r`, or pair of +A single one-dimensional range or vector of one-dimensioinal ranges +can be specified. Specifically, in `Grid` search, the `range` field +of a `TunedModel` instance can be: + +- A single one-dimensional range (ie, `ParamRange` object) `r`, or pair of the form `(r, res)` where `res` specifies a resolution to override the default `resolution`. @@ -83,7 +87,7 @@ end function setup(tuning::Grid, model, user_range, verbosity) ranges, resolutions = - process_user_range(user_range, tuning.resolution, verbosity) + process_grid_range(user_range, tuning.resolution, verbosity) resolutions = adjusted_resolutions(tuning.goal, ranges, resolutions) fields = map(r -> r.field, ranges) @@ -123,7 +127,7 @@ end function default_n(tuning::Grid, user_range) ranges, resolutions = - process_user_range(user_range, tuning.resolution, -1) + process_grid_range(user_range, tuning.resolution, -1) resolutions = adjusted_resolutions(tuning.goal, ranges, resolutions) len(t::Tuple{NumericRange,Integer}) = length(iterator(t[1], t[2])) diff --git a/src/strategies/random_search.jl b/src/strategies/random_search.jl new file mode 100644 index 0000000..8f98fe4 --- /dev/null +++ b/src/strategies/random_search.jl @@ -0,0 +1,146 @@ +const ParameterName=Union{Symbol,Expr} + +""" + RandomSearch(bounded=Distributions.Uniform, + positive_unbounded=Distributions.Gamma, + other=Distributions.Normal, + rng=Random.GLOBAL_RNG) + +Instantiate a random search tuning strategy, for searching over +Cartesian hyperparameter domains, with customizable priors in each +dimension. + +### Supported ranges + +A single one-dimensional range or vector of one-dimensioinal ranges +can be specified. If not paired with a prior, then one is fitted, +according to fallback distribution types specified by the tuning +strategy hyperparameters. Specifically, in `RandomSearch`, the `range` +field of a `TunedModel` instance can be: + +- a single one-dimensional range (`ParamRange` object) `r` + +- a pair of the form `(r, d)`, with `r` as above and where `d` is: + + - a probability vector of the same length as `r.values` (`r` a + `NominalRange`) + + - any `Distributions.UnivariateDistribution` *instance* (`r` a + `NumericRange`) + + - one of the *subtypes* of `Distributions.UnivariateDistribution` + listed in the table below, for automatic fitting using + `Distributions.fit(d, r)`, a distribution whose support always + lies between `r.lower` and `r.upper` (`r` a `NumericRange`) + +- any pair of the form `(field, s)`, where `field` is the (possibly + nested) name of a field of the model to be tuned, and `s` an + arbitrary sampler object for that field. This means only that + `rand(rng, s)` is defined and returns valid values for the field. + +- any vector of objects of the above form + +A range vector may contain multiple entries for the same model field, +as in `range = [(:lambda, s1), (:alpha, s), (:lambda, s2)]`. In that +case the entry used in each iteration is random. + +distribution types | for fitting to ranges of this type +--------------------|----------------------------------- +`Arcsine`, `Uniform`, `Biweight`, `Cosine`, `Epanechnikov`, `SymTriangularDist`, `Triweight` | bounded +`Gamma`, `InverseGaussian`, `Poisson` | positive (bounded or unbounded) +`Normal`, `Logistic`, `LogNormal`, `Cauchy`, `Gumbel`, `Laplace` | any + +`ParamRange` objects are constructed using the `range` method. + +### Examples + + using Distributions + + range1 = range(model, :hyper1, lower=0, upper=1) + + range2 = [(range(model, :hyper1, lower=1, upper=10), Arcsine), + range(model, :hyper2, lower=2, upper=Inf, unit=1, origin=3), + (range(model, :hyper2, lower=2, upper=4), Normal(0, 3)), + (range(model, :hyper3, values=[:ball, :tree]), [0.3, 0.7])] + + # uniform sampling of :(atom.λ) from [0, 1] without defining a NumericRange: + struct MySampler end + Base.rand(rng::Random.AbstractRNG, ::MySampler) = rand(rng) + range3 = (:(atom.λ), MySampler(), range1) + +### Algorithm + +In each iteration, a model is generated for evaluation by mutating the +fields of a deep copy of `model`. The range vector is shuffled and the +fields sampled according to the new order (repeated fields being +mutated more than once). For a `range` entry of the form `(field, s)` +the algorithm calls `rand(rng, s)` and mutates the field `field` of +the model clone to have this value. For an entry of the form `(r, d)`, +`s` is substituted with `sampler(r, d)`. If no `d` is specified, then +sampling is uniform (with replacement) if `r` is a `NominalRange`, and +is otherwise given by the defaults specified by the tuning strategy +parameters `bounded`, `positive_unbounded`, and `other`, depending on +the field values of the `NumericRange` object `r`. + +See also [`TunedModel`](@ref), [`range`](@ref), [`sampler`](@ref). + +""" +mutable struct RandomSearch <: TuningStrategy + bounded + positive_unbounded + other + rng::Random.AbstractRNG +end + +# Constructor with keywords +function RandomSearch(; bounded=Distributions.Uniform, + positive_unbounded=Distributions.Gamma, + other=Distributions.Normal, + rng=Random.GLOBAL_RNG) + (bounded isa Type{<:Distributions.UnivariateDistribution} && + positive_unbounded isa Type{<:Distributions.UnivariateDistribution} && + other isa Type{<:Distributions.UnivariateDistribution}) || + error("`bounded`, `positive_unbounded` and `other` "* + "must all be subtypes of "* + "`Distributions.UnivariateDistribution`. ") + + _rng = rng isa Integer ? Random.MersenneTwister(rng) : rng + return RandomSearch(bounded, positive_unbounded, other, _rng) +end + +# `state` consists of a tuple of (field, sampler) pairs (that gets +# shuffled each iteration): +setup(tuning::RandomSearch, model, user_range, verbosity) = + process_random_range(user_range, + tuning.bounded, + tuning.positive_unbounded, + tuning.other) |> collect + +function MLJTuning.models!(tuning::RandomSearch, + model, + history, + state, # tuple of (field, sampler) pairs + n_remaining, + verbosity) + return map(1:n_remaining) do _ + clone = deepcopy(model) + Random.shuffle!(tuning.rng, state) + for (fld, s) in state + recursive_setproperty!(clone, fld, rand(tuning.rng, s)) + end + clone + end +end + +function tuning_report(tuning::RandomSearch, history, field_sampler_pairs) + + fields = first.(field_sampler_pairs) + parameter_scales = map(field_sampler_pairs) do (fld, s) + scale(s) + end + + plotting = plotting_report(fields, parameter_scales, history) + + return (history=history, plotting=plotting) + +end diff --git a/src/tuned_models.jl b/src/tuned_models.jl index 8372031..06e4e84 100644 --- a/src/tuned_models.jl +++ b/src/tuned_models.jl @@ -115,19 +115,63 @@ key | value `best_model` | optimal model instance `best_fitted_params`| learned parameters of the optimal model -The named tuple `report(mach)` has these keys/values: +The named tuple `report(mach)` includes these keys/values: key | value --------------------|-------------------------------------------------- `best_model` | optimal model instance `best_result` | corresponding "result" entry in the history `best_report` | report generated by fitting the optimal model +`history` | tuning strategy-specific history of all evaluations plus others specific to the `tuning` strategy, such as `history=...`. +### Summary of key-word arguments + +- `model`: `Supervised` model prototype that is cloned and mutated to + generate models for evaluation + +- `tuning=Grid()`: tuning strategy to be applied (eg, `RandomSearch()`) + +- `resampling=Holdout()`: resampling strategy (eg, `Holdout()`, `CV()`), + `StratifiedCV()`) to be applied in performance evaluations + +- `measure`: measure or measures to be applied in performance + evaluations; only the first used in optimization (unless the + strategy is multi-objective) but all reported to the history + +- `weights`: sample weights to be passed the measure(s) in performance + evaluations, if supported (see important note above for behaviour in + unspecified case) + +- `repeats=1`: for generating train/test sets multiple times in + resampling; see [`evaluate!`](@ref) for details + +- `operation=predict`: operation to be applied to each fitted model; + usually `predict` but `predict_mean`, `predict_median` or + `predict_mode` can be used for `Probabilistic` models, if + the specified measures are `Deterministic` + +- `range`: range object; tuning strategy documentation describes + supported types + +- `n`: number of iterations (ie, models to be evaluated); set by + tuning strategy if left unspecified + +- `train_best=true`: whether to train the optimal model + +- `acceleration=default_resource()`: mode of parallelization for + tuning strategies that support this + +- `acceleration_resampling=CPU1()`: mode of parallelization for + resampling + +- `check_measure`: whether to check `measure` is compatible with the + specified `model` and `operation`) + """ -function TunedModel(;model=nothing, +function TunedModel(; model=nothing, tuning=Grid(), resampling=MLJBase.Holdout(), measures=nothing, @@ -217,16 +261,17 @@ function event(metamodel, model = _first(metamodel) metadata = _last(metamodel) resampling_machine.model.model = model - verb = (verbosity == 2 ? 0 : verbosity - 1) + verb = (verbosity >= 2 ? verbosity - 3 : verbosity - 1) fit!(resampling_machine, verbosity=verb) e = evaluate(resampling_machine) r = result(tuning, history, state, e, metadata) if verbosity > 2 - println(params(model)) + println("hyperparameters: $(params(model))") end + if verbosity > 1 - println("$r") + println("result: $r") end return model, r @@ -300,6 +345,7 @@ function build(history, history, state, acceleration) + history = _vcat(history, Δhistory) end return history diff --git a/src/tuning_strategy_interface.jl b/src/tuning_strategy_interface.jl index 4c58b18..d1827eb 100644 --- a/src/tuning_strategy_interface.jl +++ b/src/tuning_strategy_interface.jl @@ -1,6 +1,11 @@ +## TYPES TO BE SUBTYPED + abstract type TuningStrategy <: MLJBase.MLJType end MLJBase.show_as_constructed(::Type{<:TuningStrategy}) = true + +## METHODS TO BE IMPLEMENTED + # for initialization of state (compulsory) setup(tuning::TuningStrategy, model, range, verbosity) = range @@ -30,5 +35,3 @@ tuning_report(tuning::TuningStrategy, history, state) = (history=history,) # for declaring the default number of models to evaluate: default_n(tuning::TuningStrategy, range) = DEFAULT_N - - diff --git a/test/range_methods.jl b/test/range_methods.jl new file mode 100644 index 0000000..cd1c1be --- /dev/null +++ b/test/range_methods.jl @@ -0,0 +1,183 @@ +module TestRanges + +using Test +using MLJBase +using MLJTuning +using Random +import Distributions +const Dist = Distributions + +# `in` for MLJType is overloaded to be `===` based. For purposed of +# testing here, we need `==` based: +function _in(x, itr)::Union{Bool,Missing} + for y in itr + ismissing(y) && return missing + y == x && return true + end + return false +end +_issubset(itr1, itr2) = all(_in(x, itr2) for x in itr1) + +@testset "boundedness traits" begin + r1 = range(Float64, :K, lower=1, upper=10) + r2 = range(Float64, :K, lower=-1, upper=Inf, origin=1, unit=1) + r3 = range(Float64, :K, lower=0, upper=Inf, origin=1, unit=1) + r4 = range(Float64, :K, lower=-Inf, upper=1, origin=0, unit=1) + r5 = range(Float64, :K, lower=-Inf, upper=Inf, origin=1, unit=1) + @test MLJTuning.boundedness(r1) == MLJTuning.Bounded + @test MLJTuning.boundedness(r2) == MLJTuning.Other + @test MLJTuning.boundedness(r3) == MLJTuning.PositiveUnbounded + @test MLJTuning.boundedness(r4) == MLJTuning.Other + @test MLJTuning.boundedness(r5) == MLJTuning.Other +end + +mutable struct DummyModel <: Deterministic + lambda::Float64 + metric::Float64 + kernel::Char +end + +dummy_model = DummyModel(4, 9.5, 'k') + +mutable struct SuperModel <: Deterministic + K::Int64 + model1::DummyModel + model2::DummyModel +end + +dummy_model = DummyModel(1.2, 9.5, 'k') +super_model = SuperModel(4, dummy_model, deepcopy(dummy_model)) + +r1 = range(super_model, :(model1.kernel), values=['c', 'd']) +r2 = range(super_model, :K, lower=1, upper=10, scale=:log10) + +@testset "models from cartesian range and resolutions" begin + + # with method: + m1 = MLJTuning.grid(super_model, [r1, r2], [nothing, 7]) + m1r = MLJTuning.grid(MersenneTwister(123), super_model, [r1, r2], + [nothing, 7]) + + # generate all models by hand: + models1 = [SuperModel(1, DummyModel(1.2, 9.5, 'c'), dummy_model), + SuperModel(1, DummyModel(1.2, 9.5, 'd'), dummy_model), + SuperModel(2, DummyModel(1.2, 9.5, 'c'), dummy_model), + SuperModel(2, DummyModel(1.2, 9.5, 'd'), dummy_model), + SuperModel(3, DummyModel(1.2, 9.5, 'c'), dummy_model), + SuperModel(3, DummyModel(1.2, 9.5, 'd'), dummy_model), + SuperModel(5, DummyModel(1.2, 9.5, 'c'), dummy_model), + SuperModel(5, DummyModel(1.2, 9.5, 'd'), dummy_model), + SuperModel(7, DummyModel(1.2, 9.5, 'c'), dummy_model), + SuperModel(7, DummyModel(1.2, 9.5, 'd'), dummy_model), + SuperModel(10, DummyModel(1.2, 9.5, 'c'), dummy_model), + SuperModel(10, DummyModel(1.2, 9.5, 'd'), dummy_model)] + + @test _issubset(models1, m1) && _issubset(m1, models1) + @test m1r != models1 + @test _issubset(models1, m1r) && _issubset(m1, models1) + + # with method: + m2 = MLJTuning.grid(super_model, [r1, r2], [1, 7]) + + # generate all models by hand: + models2 = [SuperModel(1, DummyModel(1.2, 9.5, 'c'), dummy_model), + SuperModel(2, DummyModel(1.2, 9.5, 'c'), dummy_model), + SuperModel(3, DummyModel(1.2, 9.5, 'c'), dummy_model), + SuperModel(5, DummyModel(1.2, 9.5, 'c'), dummy_model), + SuperModel(7, DummyModel(1.2, 9.5, 'c'), dummy_model), + SuperModel(10, DummyModel(1.2, 9.5, 'c'), dummy_model)] + + @test _issubset(models2, m2) && _issubset(m2, models2) + +end + +@testset "processing user specification of range in Grid" begin + r1 = range(Int, :h1, lower=1, upper=10) + r2 = range(Int, :h2, lower=20, upper=30) + s = range(Char, :j1, values = ['x', 'y']) + @test_throws ArgumentError MLJTuning.process_grid_range("junk", 42, 1) + @test(@test_logs((:warn, r"Ignoring"), + MLJTuning.process_grid_range((s, 3), 42, 1)) == + ((s, ), (2, ))) + @test MLJTuning.process_grid_range(r1, 42, 1) == ((r1, ), (42, )) + @test MLJTuning.process_grid_range((r1, 3), 42, 1) == ((r1, ), (3, )) + @test MLJTuning.process_grid_range(s, 42, 1) == ((s, ), (2,)) + @test MLJTuning.process_grid_range([(r1, 3), r2, s], 42, 1) == + ((r1, r2, s), (3, 42, 2)) +end + +struct MySampler end +Base.rand(rng::AbstractRNG, ::MySampler) = rand(rng) + +@testset "processing user specification of range in RandomSearch" begin + r1 = range(Int, :h1, lower=1, upper=10, scale=exp) + r2 = range(Int, :h2, lower=5, upper=Inf, origin=10, unit=5) + r3 = range(Char, :j1, values = ['x', 'y']) + s = MySampler() + + @test_throws(ArgumentError, + MLJTuning.process_random_range("junk", + Dist.Uniform, + Dist.Gamma, + Dist.Cauchy)) + @test_throws(ArgumentError, + MLJTuning.process_random_range((r1, "junk"), + Dist.Uniform, + Dist.Gamma, + Dist.Cauchy)) + @test_throws(ArgumentError, + MLJTuning.process_random_range((r3, "junk"), + Dist.Uniform, + Dist.Gamma, + Dist.Cauchy)) + @test_throws(ArgumentError, + MLJTuning.process_random_range(("junk", s), + Dist.Uniform, + Dist.Gamma, + Dist.Cauchy)) + + # unpaired numeric range: + pp = MLJTuning.process_random_range(r1, + Dist.Uniform, # bounded + Dist.Gamma, # positive_unbounded + Dist.Cauchy) # other + @test pp isa Tuple{Tuple{Symbol,MLJBase.NumericSampler}} + p = first(pp) + @test first(p) == :h1 + s = last(p) + @test s.scale == r1.scale + @test s.distribution == Dist.Uniform(1.0, 10.0) + + # unpaired nominal range: + p = MLJTuning.process_random_range(r3, + Dist.Uniform, + Dist.Gamma, + Dist.Cauchy) |> first + @test first(p) == :j1 + s = last(p) + @test s.values == r3.values + @test s.distribution.p == [0.5, 0.5] + @test s.distribution.support == 1:2 + + # (numeric range, distribution instance): + p = MLJTuning.process_random_range((r2, Dist.Poisson(3)), + Dist.Uniform, + Dist.Gamma, + Dist.Cauchy) |> first + @test first(p) == :h2 + s = last(p) + @test s.scale == r2.scale + @test s.distribution == Dist.truncated(Dist.Poisson(3.0), 5.0, Inf) + + # (numeric range, distribution type): + p = MLJTuning.process_random_range((r2, Dist.Poisson), + Dist.Uniform, + Dist.Gamma, + Dist.Cauchy) |> first + s = last(p) + @test s.distribution == Dist.truncated(Dist.Poisson(r2.unit), 5.0, Inf) + +end + +end +true diff --git a/test/ranges.jl b/test/ranges.jl deleted file mode 100644 index 17b3af1..0000000 --- a/test/ranges.jl +++ /dev/null @@ -1,95 +0,0 @@ -module TestRanges - -using Test -using MLJBase -using MLJTuning -using Random - -# `in` for MLJType is overloaded to be `===` based. For purposed of -# testing here, we need `==` based: -function _in(x, itr)::Union{Bool,Missing} - for y in itr - ismissing(y) && return missing - y == x && return true - end - return false -end -_issubset(itr1, itr2) = all(_in(x, itr2) for x in itr1) - -mutable struct DummyModel <: Deterministic - lambda::Float64 - metric::Float64 - kernel::Char -end - -dummy_model = DummyModel(4, 9.5, 'k') - -mutable struct SuperModel <: Deterministic - K::Int64 - model1::DummyModel - model2::DummyModel -end - -dummy_model = DummyModel(1.2, 9.5, 'k') -super_model = SuperModel(4, dummy_model, deepcopy(dummy_model)) - -r1 = range(super_model, :(model1.kernel), values=['c', 'd']) -r2 = range(super_model, :K, lower=1, upper=10, scale=:log10) - -@testset "models from cartesian range and resolutions" begin - - # with method: - m1 = MLJTuning.grid(super_model, [r1, r2], [nothing, 7]) - m1r = MLJTuning.grid(MersenneTwister(123), super_model, [r1, r2], - [nothing, 7]) - - # generate all models by hand: - models1 = [SuperModel(1, DummyModel(1.2, 9.5, 'c'), dummy_model), - SuperModel(1, DummyModel(1.2, 9.5, 'd'), dummy_model), - SuperModel(2, DummyModel(1.2, 9.5, 'c'), dummy_model), - SuperModel(2, DummyModel(1.2, 9.5, 'd'), dummy_model), - SuperModel(3, DummyModel(1.2, 9.5, 'c'), dummy_model), - SuperModel(3, DummyModel(1.2, 9.5, 'd'), dummy_model), - SuperModel(5, DummyModel(1.2, 9.5, 'c'), dummy_model), - SuperModel(5, DummyModel(1.2, 9.5, 'd'), dummy_model), - SuperModel(7, DummyModel(1.2, 9.5, 'c'), dummy_model), - SuperModel(7, DummyModel(1.2, 9.5, 'd'), dummy_model), - SuperModel(10, DummyModel(1.2, 9.5, 'c'), dummy_model), - SuperModel(10, DummyModel(1.2, 9.5, 'd'), dummy_model)] - - @test _issubset(models1, m1) && _issubset(m1, models1) - @test m1r != models1 - @test _issubset(models1, m1r) && _issubset(m1, models1) - - # with method: - m2 = MLJTuning.grid(super_model, [r1, r2], [1, 7]) - - # generate all models by hand: - models2 = [SuperModel(1, DummyModel(1.2, 9.5, 'c'), dummy_model), - SuperModel(2, DummyModel(1.2, 9.5, 'c'), dummy_model), - SuperModel(3, DummyModel(1.2, 9.5, 'c'), dummy_model), - SuperModel(5, DummyModel(1.2, 9.5, 'c'), dummy_model), - SuperModel(7, DummyModel(1.2, 9.5, 'c'), dummy_model), - SuperModel(10, DummyModel(1.2, 9.5, 'c'), dummy_model)] - - @test _issubset(models2, m2) && _issubset(m2, models2) - -end - -@testset "processing user specification of range" begin - r1 = range(Int, :h1, lower=1, upper=10) - r2 = range(Int, :h2, lower=20, upper=30) - s = range(Char, :j1, values = ['x', 'y']) - @test_throws ArgumentError MLJTuning.process_user_range("junk", 42, 1) - @test(@test_logs((:warn, r"Ignoring"), - MLJTuning.process_user_range((s, 3), 42, 1)) == - ((s, ), (2, ))) - @test MLJTuning.process_user_range(r1, 42, 1) == ((r1, ), (42, )) - @test MLJTuning.process_user_range((r1, 3), 42, 1) == ((r1, ), (3, )) - @test MLJTuning.process_user_range(s, 42, 1) == ((s, ), (2,)) - @test MLJTuning.process_user_range([(r1, 3), r2, s], 42, 1) == - ((r1, r2, s), (3, 42, 2)) -end - -end -true diff --git a/test/runtests.jl b/test/runtests.jl index 4764491..f93150c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -20,14 +20,18 @@ end @test include("tuned_models.jl") end -@testset "ranges" begin - @test include("ranges.jl") +@testset "range_methods" begin + @test include("range_methods.jl") end @testset "grid" begin @test include("strategies/grid.jl") end +@testset "random search" begin + @test include("strategies/random_search.jl") +end + @testset "learning curves" begin @test include("learning_curves.jl") end diff --git a/test/strategies/random_search.jl b/test/strategies/random_search.jl new file mode 100644 index 0000000..55501ad --- /dev/null +++ b/test/strategies/random_search.jl @@ -0,0 +1,152 @@ +module TestRandomSearch + +using Test +using MLJBase +using MLJTuning +import Distributions +import Random +import Random.seed! +seed!(1234) + +const Dist = Distributions + +x1 = rand(100); +x2 = rand(100); +x3 = rand(100) +X = (x1=x1, x2=x2, x3=x3); +y = 2*x1 .+ 5*x2 .- 3*x3 .+ 0.2*rand(100); + +mutable struct DummyModel <: Deterministic + lambda::Int + alpha::Int + kernel::Char +end + +mutable struct SuperModel <: Deterministic + K::Int64 + model1::DummyModel + model2::DummyModel +end + +MLJBase.fit(::DummyModel, verbosity::Int, X, y) = mean(y), nothing, nothing +MLJBase.predict(::DummyModel, fitresult, Xnew) = + fill(fitresult, schema(Xnew).nrows) + +dummy_model = DummyModel(1, 9, 'k') +super_model = SuperModel(4, dummy_model, deepcopy(dummy_model)) + +r0 = range(super_model, :(model1.kernel), values=['c', 'd']) +r1 = range(super_model, :(model1.lambda), lower=1, upper=3) +r2 = range(super_model, :K, lower=0, upper=Inf, origin=2, unit=3) + +@testset "Constructor" begin + @test_throws Exception RandomSearch(bounded=Dist.Uniform(1,2)) + @test_throws Exception RandomSearch(positive_unbounded=Dist.Poisson(1)) + @test_throws Exception RandomSearch(bounded=Dist.Uniform(1,2)) +end + +@testset "setup" begin + user_range = [r0, (r1, Dist.SymTriangularDist), r2] + tuning = RandomSearch(positive_unbounded=Dist.Gamma, rng=123) + + @test MLJTuning.default_n(tuning, user_range) == MLJTuning.DEFAULT_N + + p0, p1, p2 = MLJTuning.setup(tuning, super_model, user_range, 3) + @test first.([p0, p1, p2]) == [:(model1.kernel), :(model1.lambda), :K] + + s0, s1, s2 = last.([p0, p1, p2]) + @test s0.distribution == Dist.Categorical(0.5, 0.5) + @test s1.distribution == Dist.SymTriangularDist(2,1) + γ = s2.distribution + @test mean(γ) == 2 + @test std(γ) == 3 +end + +@testset "models!" begin + N = 10000 + model = DummyModel(1, 1, 'k') + r1 = range(model, :lambda, lower=0, upper=1) + r2 = range(model, :alpha, lower=-1, upper=1) + user_range = [r1, r2] + tuning = RandomSearch(rng=1) + tuned_model = TunedModel(model=model, + tuning=tuning, + n=N, + range=user_range, + measures=[rms,mae]) + state = MLJTuning.setup(tuning, model, user_range, 3) + my_models = MLJTuning.models!(tuning, + model, + nothing, # history + state, + N, # n_remaining + 0) + + # check the samples of each hyperparam have expected distritution: + lambdas = map(m -> m.lambda, my_models) + alphas = map(m -> m.alpha, my_models) + a, b = values(Dist.countmap(lambdas)) + @test abs(a/b - 1) < 0.06 + dict = Dist.countmap(alphas) + a, b, c = dict[-1], dict[0], dict[1] + @test abs(b/a - 2) < 0.06 + @test abs(b/c - 2) < 0.06 +end + +@testset "tuned model using random search and its report" begin + N = 4 + model = DummyModel(1, 1, 'k') + r1 = range(model, :lambda, lower=0, upper=1) + r2 = range(model, :alpha, lower=-1, upper=1) + user_range = [r1, r2] + tuning = RandomSearch(rng=1) + tuned_model = TunedModel(model=model, + tuning=tuning, + n=N, + resampling=Holdout(fraction_train=0.5), + range=user_range, + measures=[rms,mae]) + mach = machine(tuned_model, X, y) + fit!(mach, verbosity=0) + + # model predicts mean of training target, so: + train, test = partition(eachindex(y), 0.5) + μ = mean(y[train]) + error = mean((y[test] .- μ).^2) |> sqrt + + r = report(mach) + @test r.plotting.parameter_names == + ["lambda", "alpha"] + @test r.plotting.parameter_scales == [:linear, :linear] + @test r.plotting.measurements ≈ fill(error, N) + @test size(r.plotting.parameter_values) == (N, 2) +end + +struct ConstantSampler + c +end +Base.rand(rng::Random.AbstractRNG, s::ConstantSampler) = s.c + +@testset "multiple samplers for single field" begin + N = 1000 + model = DummyModel(1, 1, 'k') + r = range(model, :alpha, lower=-1, upper=1) + user_range = [(:lambda, ConstantSampler(0)), + r, + (:lambda, ConstantSampler(1))] + tuning = RandomSearch(rng=123) + tuned_model = TunedModel(model=model, + tuning=tuning, + n=N, + range=user_range, + measures=[rms,mae]) + mach = fit!(machine(tuned_model, X, y)) + my_models = first.(report(mach).history); + lambdas = map(m -> m.lambda, my_models); + a, b = values(Dist.countmap(lambdas)) + @test abs(a/b -1) < 0.04 + @test a + b == N +end + +end # module +true diff --git a/test/tuned_models.jl b/test/tuned_models.jl index 18e4340..ea586ff 100644 --- a/test/tuned_models.jl +++ b/test/tuned_models.jl @@ -23,7 +23,7 @@ y = 2*x1 .+ 5*x2 .- 3*x3 .+ 0.4*rand(N); m(K) = KNNRegressor(K=K) r = [m(K) for K in 2:13] -# TODO: replace the above with the line below and fix post an issue on +# TODO: replace the above with the line below and post an issue on # the failure (a bug in Distributed, I reckon): # r = (m(K) for K in 2:13) @@ -38,6 +38,19 @@ r = [m(K) for K in 2:13] TunedModel(model=first(r), tuning=Explicit(), range=r)) end +# @testset "duplicate models warning" begin +# s = [m(K) for K in 2:13] +# push!(s, m(13)) +# tm = TunedModel(model=first(s), tuning=Explicit(), +# range=s, resampling=CV(nfolds=2), +# measures=[rms, l1]) +# @test_logs((:info, r"Attempting"), +# (:warn, r"A model already"), +# fitresult, meta_state, report = fit(tm, 1, X, y)) +# history, _, state = meta_state; +# @test length(history) == length(2:13) + 1 +# end + results = [(evaluate(model, X, y, resampling=CV(nfolds=2), measure=rms, @@ -120,9 +133,9 @@ end) catch MethodError DEFAULT_N end - + end - + end @testset_accelerated("passing of model metadata", accel,