Merge pull request #119 from baggepinnen/sqkalman

add square root kalman filter
baggepinnen · Sep 29, 2023 · 9b087f0 · 9b087f0
2 parents 5a8ae59 + af98f89
commit 9b087f0
Show file tree

Hide file tree

Showing 7 changed files with 148 additions and 3 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "LowLevelParticleFilters"
 uuid = "d9d29d28-c116-5dba-9239-57a5fe23875b"
 authors = ["baggepinnen <baggepinnen@gmail.com>"]
-version = "3.5.0"
+version = "3.6.0"
 
 [deps]
 ControlSystemsBase = "aaaaaaaa-a6ca-5380-bf3e-84a91bcd477e"

diff --git a/README.md b/README.md
@@ -12,6 +12,7 @@ We provide a number of filter types
 - `AuxiliaryParticleFilter`: This filter is identical to `ParticleFilter`, but uses a slightly different proposal mechanism for new particles.
 - `AdvancedParticleFilter`: This filter gives you more flexibility, at the expense of having to define a few more functions.
 - `KalmanFilter`. A standard Kalman filter. Has the same features as the particle filters, but is restricted to linear dynamics (possibly time varying) and Gaussian noise.
+- `SqKalmanFilter`. A standard Kalman filter on square-root form (slightly slower but more numerically stable with ill-conditioned covariance).
 - `ExtendedKalmanFilter`: For nonlinear systems, the EKF runs a regular Kalman filter on linearized dynamics. Uses ForwardDiff.jl for linearization. The noise model must be Gaussian.
 - `UnscentedKalmanFilter`: The Unscented kalman filter often performs slightly better than the Extended Kalman filter but may be slightly more computationally expensive. The UKF handles nonlinear dynamics and measurement model, but still requires an additive Gaussian noise model.
 - `DAEUnscentedKalmanFilter`: An Unscented Kalman filter for differential-algebraic systems (DAE).

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -19,6 +19,7 @@ Depending on the nature of ``f`` and ``g``, the best method of estimating the st
 ## Estimator types
 We provide a number of filter types
 - [`KalmanFilter`](@ref). A standard Kalman filter. Is restricted to linear dynamics (possibly time varying) and Gaussian noise.
+- [`SqKalmanFilter`](@ref). A standard Kalman filter on square-root form (slightly slower but more numerically stable with ill-conditioned covariance).
 - [`ExtendedKalmanFilter`](@ref): For nonlinear systems, the EKF runs a regular Kalman filter on linearized dynamics. Uses ForwardDiff.jl for linearization. The noise model must be Gaussian.
 - [`UnscentedKalmanFilter`](@ref): The Unscented Kalman filter often performs slightly better than the Extended Kalman filter but may be slightly more computationally expensive. The UKF handles nonlinear dynamics and measurement model, but still requires an additive Gaussian noise model and assumes all posterior distributions are Gaussian, i.e., can not handle multi-modal posteriors.
 - [`ParticleFilter`](@ref): The particle filter is a nonlinear estimator. This version of the particle filter is simple to use and assumes that both dynamics noise and measurement noise are additive. Particle filters handle multi-modal posteriors.

diff --git a/src/LowLevelParticleFilters.jl b/src/LowLevelParticleFilters.jl
@@ -1,6 +1,6 @@
 module LowLevelParticleFilters
 
-export KalmanFilter, UnscentedKalmanFilter, DAEUnscentedKalmanFilter, ExtendedKalmanFilter, ParticleFilter, AuxiliaryParticleFilter, AdvancedParticleFilter, PFstate, index, state, covariance, num_particles, effective_particles, weights, expweights, particles, particletype, smooth, sample_measurement, simulate, loglik, log_likelihood_fun, forward_trajectory, mean_trajectory, mode_trajectory, weighted_mean, weighted_cov, update!, predict!, correct!, reset!, metropolis, shouldresample, TupleProduct
+export KalmanFilter, SqKalmanFilter, UnscentedKalmanFilter, DAEUnscentedKalmanFilter, ExtendedKalmanFilter, ParticleFilter, AuxiliaryParticleFilter, AdvancedParticleFilter, PFstate, index, state, covariance, num_particles, effective_particles, weights, expweights, particles, particletype, smooth, sample_measurement, simulate, loglik, log_likelihood_fun, forward_trajectory, mean_trajectory, mode_trajectory, weighted_mean, weighted_cov, update!, predict!, correct!, reset!, metropolis, shouldresample, TupleProduct
 @deprecate weigthed_mean weighted_mean
 @deprecate weigthed_cov weighted_cov
 
@@ -37,6 +37,7 @@ include("utils.jl")
 include("smoothing.jl")
 include("plotting.jl")
 include("ekf.jl")
+include("sq_kalman.jl")
 
 index(f::AbstractFilter) = f.t[]
 

diff --git a/src/smoothing.jl b/src/smoothing.jl
@@ -72,7 +72,7 @@ end
 Calculate the sum of squared errors ``\\sum dot(e, λ, e)``.
 - `λ`: May be a weighting matrix. A commonly used metric is `λ = Diagonal(1 ./ (mag.^2))`, where `mag` is a vector of the "typical magnitude" of each output.
 
-See also [`prediction_errors!`](@ref) which returns the prediction errors themselves rather than their sum of squares (for use with Gauss-Newton style optimization).
+See also [`LowLevelParticleFilters.prediction_errors!`](@ref) which returns the prediction errors themselves rather than their sum of squares (for use with Gauss-Newton style optimization).
 """
 function sse(f::AbstractFilter, u, y, p=parameters(f), λ=1)
     reset!(f)

diff --git a/src/sq_kalman.jl b/src/sq_kalman.jl
@@ -0,0 +1,132 @@
+@with_kw struct SqKalmanFilter{AT,BT,CT,DT,R1T,R2T,R2DT,D0T,XT,RT,P,αT} <: AbstractKalmanFilter
+    A::AT
+    B::BT
+    C::CT
+    D::DT
+    R1::R1T
+    R2::R2T
+    R2d::R2DT
+    d0::D0T
+    x::XT
+    R::RT
+    t::Base.RefValue{Int} = Ref(1)
+    p::P = SciMLBase.NullParameters()
+    α::αT = 1.0
+end
+
+
+"""
+    SqKalmanFilter(A,B,C,D,R1,R2,d0=MvNormal(R1); p = SciMLBase.NullParameters(), α=1)
+
+
+A standard Kalman filter on square-root form. This filter may have better numerical performance when the covariance matrices are ill-conditioned.
+
+The matrices `A,B,C,D` define the dynamics
+```
+x' = Ax + Bu + w
+y  = Cx + Du + e
+```
+where `w ~ N(0, R1)`, `e ~ N(0, R2)` and `x(0) ~ d0`
+
+The matrices can be time varying such that, e.g., `A[:, :, t]` contains the ``A`` matrix at time index `t`.
+They can also be given as functions on the form
+```
+Afun(x, u, p, t) -> A
+```
+
+The internal fields storing covariance matrices are for this filter storing the upper-triangular Cholesky factor.
+
+α is an optional "forgetting factor", if this is set to a value > 1, such as 1.01-1.2, the filter will, in addition to the covariance inflation due to ``R_1``, exhibit "exponential forgetting" similar to a [Recursive Least-Squares (RLS) estimator](https://en.wikipedia.org/wiki/Recursive_least_squares_filter). It is thus possible to get a RLS-like algorithm by setting ``R_1=0, R_2 = 1/α`` and ``α > 1`` (``α`` is the inverse of the traditional RLS parameter ``α = 1/λ``). The form of the covariance update is
+```math
+R(t+1|t) = α AR(t)A^T + R_1
+```
+
+Ref: "A Square-Root Kalman Filter Using Only QR Decompositions", Kevin Tracy https://arxiv.org/abs/2208.06452
+"""
+function SqKalmanFilter(A,B,C,D,R1,R2,d0=MvNormal(Matrix(R1)); p = SciMLBase.NullParameters(), α = 1.0, check = true)
+    α ≥ 1 || @warn "α should be > 1 for exponential forgetting. An α < 1 will lead to exponential loss of adaptation over time."
+    if check
+        maximum(abs, eigvals(A isa SMatrix ? Matrix(A) : A)) ≥ 2 && @warn "The dynamics matrix A has eigenvalues with absolute value ≥ 2. This is either a highly unstable system, or you have forgotten to discretize a continuous-time model. If you are sure that the system is provided in discrete time, you can disable this warning by setting check=false." maxlog=1
+    end
+    R1 = cholesky(R1).U
+    R2 = cholesky(R2).U
+    SqKalmanFilter(A,B,C,D,R1,R2,MvNormal(Matrix(R2'R2)), d0, Vector(d0.μ), UpperTriangular(Matrix(cholesky(d0.Σ).U)), Ref(1), p, α)
+end
+
+
+
+function Base.getproperty(kf::SqKalmanFilter, s::Symbol)
+    s ∈ fieldnames(typeof(kf)) && return getfield(kf, s)
+    if s === :nu
+        return size(kf.B, 2)
+    elseif s === :ny
+        return size(kf.R2, 1)
+    elseif s === :nx
+        return size(kf.R1, 1)
+    else
+        throw(ArgumentError("$(typeof(kf)) has no property named $s"))
+    end
+end
+
+sample_state(kf::SqKalmanFilter, p=parameters(kf); noise=true) = noise ? rand(kf.d0) : mean(kf.d0)
+sample_state(kf::SqKalmanFilter, x, u, p=parameters(kf), t=0; noise=true) = kf.A*x .+ kf.B*u .+ noise*get_mat(kf.R1, x, u, p, t)*rand(kf.nx)
+sample_measurement(kf::SqKalmanFilter, x, u, p=parameters(kf), t=0; noise=true) = kf.C*x .+ kf.D*u .+ noise*get_mat(kf.R2, x, u, p, t)*rand(kf.ny)
+covariance(kf::SqKalmanFilter)   = kf.R'kf.R
+covtype(kf::SqKalmanFilter) = typeof(kf.R.data)
+
+"""
+    reset!(kf::SqKalmanFilter; x0)
+
+Reset the initial distribution of the state. Optionally, a new mean vector `x0` can be provided.
+"""
+function reset!(kf::SqKalmanFilter; x0 = kf.d0.μ)
+    kf.x .= Vector(x0)
+    kf.R .= cholesky(kf.d0.Σ).U
+    kf.t[] = 1
+end
+
+"""
+    predict!(kf::SqKalmanFilter, u, p = parameters(kf), t::Real = index(kf); R1 = get_mat(kf.R1, kf.x, u, p, t))
+
+For the square-root Kalman filter, a custom provided `R1` must be the upper triangular Cholesky factor of the covariance matrix of the process noise.
+"""
+function predict!(kf::SqKalmanFilter, u, p=parameters(kf), t::Real = index(kf); R1 = get_mat(kf.R1, kf.x, u, p, t))
+    @unpack A,B,x,R = kf
+    At = get_mat(A, x, u, p, t)
+    Bt = get_mat(B, x, u, p, t)
+    x .= At*x .+ Bt*u |> vec
+    if kf.α == 1
+        R .= UpperTriangular(qr!([R*At';R1]).R)
+    else
+        R .= UpperTriangular(qr!([sqrt(kf.α)*R*At';R1]).R) # symmetrize(kf.α*At*R*At') + R1
+    end
+    kf.t[] += 1
+end
+
+
+"""
+    correct!(kf::SqKalmanFilter, u, y, p = parameters(kf), t::Real = index(kf); R2 = get_mat(kf.R2, kf.x, u, p, t))
+
+For the square-root Kalman filter, a custom provided `R2` must be the upper triangular Cholesky factor of the covariance matrix of the measurement noise.
+"""
+function correct!(kf::SqKalmanFilter, u, y, p=parameters(kf), t::Real = index(kf); R2 = get_mat(kf.R2, kf.x, u, p, t))
+    @unpack C,D,x,R = kf
+    Ct = get_mat(C, x, u, p, t)
+    Dt = get_mat(D, x, u, p, t)
+    e   = y .- Ct*x
+    if !iszero(D)
+        e .-= Dt*u
+    end
+    S0 = qr([R*Ct';R2]).R
+    S = UpperTriangular(S0)
+    if det(S) < 0 # Cheap for triangular matrices
+        @. S0 = -S0 # To avoid log(negative) in logpdf
+    end
+    K   = ((R'*(R*Ct'))/S)/(S')
+    x .+= K*e
+    R .= UpperTriangular(qr!([R*(I - K*Ct)';R2*K']).R) 
+    SS = S'S
+    Sᵪ = Cholesky(S0, 'U', 0)
+    ll = logpdf(MvNormal(PDMat(SS, Sᵪ)), e)# - 1/2*logdet(S) # logdet is included in logpdf
+    (; ll, e, SS, Sᵪ, K)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -162,6 +162,16 @@ mvnormal(μ::AbstractVector{<:Real}, σ::Real) = MvNormal(μ, float(σ) ^ 2 * I)
         # plot!(reduce(hcat,xT)')
 
 
+        sqkf   = SqKalmanFilter(A_test, B_test, C_test, 0, 0.01eye(n), eye(p), d0)
+        sqksol = forward_trajectory(sqkf, u, y)
+        @test ksol.x ≈ sqksol.x
+        @test ksol.xt ≈ sqksol.xt
+        @test ksol.R ≈ sqksol.R
+        @test ksol.Rt ≈ sqksol.Rt
+
+        @test_nowarn simulate(sqkf, T, du)
+
+
 
         svec = exp10.(LinRange(-1,2,22))
         llspf = map(svec) do s