diff --git a/Project.toml b/Project.toml index f069172..47459d2 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "GPLikelihoods" uuid = "6031954c-0455-49d7-b3b9-3e1c99afaf40" authors = ["JuliaGaussianProcesses Team"] -version = "0.2.7" +version = "0.3.0" [deps] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" diff --git a/src/GPLikelihoods.jl b/src/GPLikelihoods.jl index 1640577..34890ca 100644 --- a/src/GPLikelihoods.jl +++ b/src/GPLikelihoods.jl @@ -16,6 +16,7 @@ export BernoulliLikelihood, GammaLikelihood export Link, ChainLink, + BijectiveSimplexLink, ExpLink, LogLink, InvLink, diff --git a/src/likelihoods/categorical.jl b/src/likelihoods/categorical.jl index 7126ec1..10c5c36 100644 --- a/src/likelihoods/categorical.jl +++ b/src/likelihoods/categorical.jl @@ -1,22 +1,41 @@ """ - CategoricalLikelihood(l=softmax) + CategoricalLikelihood(l=BijectiveSimplexLink(softmax)) Categorical likelihood is to be used if we assume that the -uncertainity associated with the data follows a Categorical distribution. +uncertainty associated with the data follows a [Categorical distribution](https://en.wikipedia.org/wiki/Categorical_distribution). + +Assuming a distribution with `n` categories: + +## `n-1` inputs (bijective link) + +One can work with a bijective transformation by wrapping a link (like `softmax`) +into a [`BijectiveSimplexLink`](@ref) and only needs `n-1` inputs: ```math p(y|f_1, f_2, \\dots, f_{n-1}) = \\operatorname{Categorical}(y | l(f_1, f_2, \\dots, f_{n-1}, 0)) ``` -Given an `AbstractVector` ``[f_1, f_2, ..., f_{n-1}]``, returns a `Categorical` distribution, -with probabilities given by ``l(f_1, f_2, ..., f_{n-1}, 0)``. +The default constructor is a bijective link around `softmax`. + +## `n` inputs (non-bijective link) + +One can also pass directly the inputs without concatenating a `0`: +```math + p(y|f_1, f_2, \\dots, f_n) = \\operatorname{Categorical}(y | l(f_1, f_2, \\dots, f_n)) +``` +This variant is over-parametrized, as there are `n-1` independent parameters +embedded in a `n` dimensional parameter space. +For more details, see the end of the section of this [Wikipedia link](https://en.wikipedia.org/wiki/Exponential_family#Table_of_distributions) +where it corresponds to Variant 1 and 2. """ struct CategoricalLikelihood{Tl<:AbstractLink} <: AbstractLikelihood invlink::Tl end -CategoricalLikelihood(l=softmax) = CategoricalLikelihood(link(l)) +CategoricalLikelihood(l=BijectiveSimplexLink(softmax)) = CategoricalLikelihood(link(l)) -(l::CategoricalLikelihood)(f::AbstractVector{<:Real}) = Categorical(l.invlink(vcat(f, 0))) +function (l::CategoricalLikelihood)(f::AbstractVector{<:Real}) + return Categorical(l.invlink(f)) +end function (l::CategoricalLikelihood)(fs::AbstractVector) - return Product(Categorical.(l.invlink.(vcat.(fs, 0)))) + return Product(Categorical.(l.invlink.(fs))) end diff --git a/src/links.jl b/src/links.jl index 1e86a99..e7e66c9 100644 --- a/src/links.jl +++ b/src/links.jl @@ -30,6 +30,23 @@ link(l::AbstractLink) = l Base.inv(l::Link) = Link(InverseFunctions.inverse(l.f)) +""" + BijectiveSimplexLink(link) + +Wrapper to preprocess the inputs by adding a `0` at the end before passing it to +the link `link`. +This is a necessary step to work with simplices. +For example with the [`SoftMaxLink`](@ref), to obtain a `n`-simplex leading to +`n+1` categories for the [`CategoricalLikelihood`](@ref), +one needs to pass `n+1` latent GP. +However, by wrapping the link into a `BijectiveSimplexLink`, only `n` latent are needed. +""" +struct BijectiveSimplexLink{L} <: AbstractLink + link::L +end + +(l::BijectiveSimplexLink)(f::AbstractVector{<:Real}) = l.link(vcat(f, 0)) + # alias const LogLink = Link{typeof(log)} const ExpLink = Link{typeof(exp)} diff --git a/test/likelihoods/categorical.jl b/test/likelihoods/categorical.jl index b129a24..8ed48be 100644 --- a/test/likelihoods/categorical.jl +++ b/test/likelihoods/categorical.jl @@ -1,9 +1,13 @@ @testset "CategoricalLikelihood" begin - for args in ((), (softmax,), (SoftMaxLink(),)) - @test CategoricalLikelihood(args...) isa CategoricalLikelihood{SoftMaxLink} - end + @test CategoricalLikelihood() isa + CategoricalLikelihood{<:GPLikelihoods.BijectiveSimplexLink} + + @test CategoricalLikelihood(softmax) isa CategoricalLikelihood{SoftMaxLink} + @test CategoricalLikelihood(SoftMaxLink()) isa CategoricalLikelihood{SoftMaxLink} - lik = CategoricalLikelihood() OUT_DIM = 4 - test_interface(lik, Categorical, OUT_DIM) + lik_bijective = CategoricalLikelihood() + test_interface(lik_bijective, Categorical, OUT_DIM) + lik_nonbijective = CategoricalLikelihood(softmax) + test_interface(lik_nonbijective, Categorical, OUT_DIM) end diff --git a/test/links.jl b/test/links.jl index b527a39..67c1af3 100644 --- a/test/links.jl +++ b/test/links.jl @@ -10,6 +10,11 @@ l = GPLikelihoods.link(ExpLink()) @test l == ExpLink() + ## SimplexBijective link + l = SoftMaxLink() + sbl = BijectiveSimplexLink(l) + @test sbl(xs) == l(vcat(xs, 0)) + # Log l = LogLink() @test l(x) == log(x)