diff --git a/Project.toml b/Project.toml index eade767..673f555 100644 --- a/Project.toml +++ b/Project.toml @@ -10,6 +10,7 @@ GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" diff --git a/src/RobustModels.jl b/src/RobustModels.jl index 7d57ca6..9933bb4 100644 --- a/src/RobustModels.jl +++ b/src/RobustModels.jl @@ -1,5 +1,7 @@ module RobustModels +using Pkg: Pkg + include("compat.jl") # Use README as the docstring of the module and doctest README @@ -10,10 +12,10 @@ end RobustModels # Import with `using` to use the module names to prefix the methods # that are extended from these modules -using GLM -using StatsAPI -using StatsBase -using StatsModels +using GLM: GLM +using StatsAPI: StatsAPI +using StatsBase: StatsBase +using StatsModels: StatsModels ## Import to implement new methods import Base: show, broadcastable, convert, == @@ -73,18 +75,21 @@ using LinearAlgebra: inv, diag, diagm, + rank, ldiv! using Random: AbstractRNG, GLOBAL_RNG using Printf: @printf, @sprintf -using GLM: FPVector, lm, SparsePredChol, DensePredChol, DensePredQR +using GLM: FPVector, lm, SparsePredChol, DensePredChol using StatsBase: AbstractWeights, CoefTable, ConvergenceException, median, mad, mad_constant, sample using StatsModels: @delegate, @formula, + formula, RegressionModel, FormulaTerm, + InterceptTerm, ModelFrame, modelcols, apply_schema, @@ -238,6 +243,7 @@ abstract type AbstractRegularizedPred{T} end Base.broadcastable(m::T) where {T<:AbstractEstimator} = Ref(m) Base.broadcastable(m::T) where {T<:LossFunction} = Ref(m) + include("tools.jl") include("losses.jl") include("estimators.jl") diff --git a/src/compat.jl b/src/compat.jl index ab063f1..26619fa 100644 --- a/src/compat.jl +++ b/src/compat.jl @@ -1,4 +1,10 @@ -using LinearAlgebra: cholesky! +using LinearAlgebra: cholesky!, qr! + +function get_pkg_version(m::Module) + toml = Pkg.TOML.parsefile(joinpath(pkgdir(m), "Project.toml")) + return VersionNumber(toml["version"]) +end + ## Compatibility layers @@ -6,5 +12,13 @@ using LinearAlgebra: cholesky! @static if VERSION < v"1.8.0-DEV.1139" pivoted_cholesky!(A; kwargs...) = cholesky!(A, Val(true); kwargs...) else + using LinearAlgebra: RowMaximum pivoted_cholesky!(A; kwargs...) = cholesky!(A, RowMaximum(); kwargs...) end + +@static if VERSION < v"1.7.0" + pivoted_qr!(A; kwargs...) = qr!(A, Val(true); kwargs...) +else + using LinearAlgebra: ColumnNorm + pivoted_qr!(A; kwargs...) = qr!(A, ColumnNorm(); kwargs...) +end diff --git a/src/linpred.jl b/src/linpred.jl index 5e61e78..79c77bf 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -50,45 +50,143 @@ leverage_weights(p::LinPred, wt::AbstractVector) = sqrt.(1 .- leverage(p, wt)) # beta0 #end -""" - DensePredQR -A `LinPred` type with a dense, unpivoted QR decomposition of `X` +########################################## +###### DensePredQR +########################################## -# Members +@static if get_pkg_version(GLM) < v"1.9" + @warn( + "GLM.DensePredQR(X::AbstractMatrix, pivot::Bool=true) is not defined, " * + "fallback to unpivoted RobustModels.DensePredQR definition. " * + "To use pivoted QR, GLM version should be greater than or equal to v1.9." + ) -- `X`: Model matrix of size `n` × `p` with `n ≥ p`. Should be full column rank. -- `beta0`: base coefficient vector of length `p` -- `delbeta`: increment to coefficient vector, also of length `p` -- `scratchbeta`: scratch vector of length `p`, used in `linpred!` method -- `qr`: a `QRCompactWY` object created from `X`, with optional row weights. -""" -DensePredQR - -PRED_QR_WARNING_ISSUED = false - -function qrpred(X::AbstractMatrix, pivot::Bool=false) - try - return DensePredCG(Matrix(X), pivot) - catch e - if e isa MethodError - # GLM.DensePredCG(X::AbstractMatrix, pivot::Bool) is not defined - global PRED_QR_WARNING_ISSUED - if !PRED_QR_WARNING_ISSUED - @warn( - "GLM.DensePredCG(X::AbstractMatrix, pivot::Bool) is not defined, " * - "fallback to unpivoted QR. GLM version should be >= 1.9." - ) - PRED_QR_WARNING_ISSUED = true + using LinearAlgebra: QRCompactWY, QRPivoted, Diagonal, qr!, qr + + """ + DensePredQR + + A `LinPred` type with a dense QR decomposition of `X` + + # Members + + - `X`: Model matrix of size `n` × `p` with `n ≥ p`. Should be full column rank. + - `beta0`: base coefficient vector of length `p` + - `delbeta`: increment to coefficient vector, also of length `p` + - `scratchbeta`: scratch vector of length `p`, used in `linpred!` method + - `qr`: a `QRCompactWY` object created from `X`, with optional row weights. + - `scratchm1`: scratch Matrix{T} of the same size as `X` + - `scratchm2`: scratch Matrix{T} of the same size as `X` + - `scratchR`: scratch Matrix{T} of the same size as `qr.R`, a square matrix. + """ + mutable struct DensePredQR{T<:BlasReal,Q<:Union{QRCompactWY,QRPivoted}} <: DensePred + X::Matrix{T} # model matrix + beta0::Vector{T} # base coefficient vector + delbeta::Vector{T} # coefficient increment + scratchbeta::Vector{T} + qr::Q + scratchm1::Matrix{T} + scratchm2::Matrix{T} + scratchR::Matrix{T} + + function DensePredQR(X::AbstractMatrix, pivot::Bool=false) + n, p = size(X) + T = typeof(float(zero(eltype(X)))) + + if false + # if pivot + F = pivoted_qr!(copy(X)) + else + if n >= p + F = qr(X) + else + # adjoint of X so R is square + # cannot use in-place qr! + F = qr(X) + end end - return DensePredCG(Matrix(X)) + + return new{T,typeof(F)}( + Matrix{T}(X), + zeros(T, p), + zeros(T, p), + zeros(T, p), + F, + similar(X, T), + similar(X, T), + zeros(T, size(F.R)), + ) + end + end + + # GLM.DensePredQR(X::AbstractMatrix, pivot::Bool) is not defined + function qrpred(X::AbstractMatrix, pivot::Bool=false) + return DensePredQR(Matrix(X)) + end + + # GLM.delbeta!(p::DensePredQR{T}, r::Vector{T}) is ill-defined + function delbeta!(p::DensePredQR{T,<:QRCompactWY}, r::Vector{T}) where {T<:BlasReal} + n, m = size(p.X) + if n >= m + p.delbeta = p.qr \ r + else + p.delbeta = p.qr' \ r + end + return p + end + + # GLM.delbeta!(p::DensePredQR{T}, r::Vector{T}, wt::Vector{T}) is not defined + function delbeta!( + p::DensePredQR{T,<:QRCompactWY}, r::Vector{T}, wt::Vector{T} + ) where {T<:BlasReal} + rnk = rank(p.qr.R) + X = p.X + W = Diagonal(wt) + sqrtW = Diagonal(sqrt.(wt)) + scratchm1 = p.scratchm1 = similar(X, T) + mul!(scratchm1, sqrtW, X) + + n, m = size(X) + if n >= m + # W½ X = Q R , with Q'Q = I + # X'WX β = X'y => R'Q'QR β = X'y + # => β = R⁻¹ R⁻ᵀ X'y + qnr = p.qr = qr(scratchm1) + Rinv = p.scratchR = inv(qnr.R) + + scratchm2 = p.scratchm2 = similar(X, T) + mul!(scratchm2, W, X) + mul!(p.delbeta, transpose(scratchm2), r) + + p.delbeta = Rinv * Rinv' * p.delbeta else - rethrow() + # (W½ X)' = Q R , with Q'Q = I + # W½X β = W½y => R'Q' β = y + # => β = Q . [R⁻ᵀ y; 0] + qnrT = p.qr = qr(scratchm1') + RTinv = p.scratchR = inv(qnrT.R)' + @assert 1 <= n <= size(p.delbeta, 1) + mul!(view(p.delbeta, 1:n), RTinv, r) + p.delbeta = zeros(size(p.delbeta)) + p.delbeta[1:n] .= RTinv * r + lmul!(qnrT.Q, p.delbeta) end + return p end + + + ## Use DensePredQR from GLM +else + using GLM: DensePredQR + import GLM: qrpred end +########################################## +###### [Dense/Sparse]PredCG +########################################## + """ DensePredCG @@ -109,20 +207,8 @@ mutable struct DensePredCG{T<:BlasReal} <: DensePred scratchbeta::Vector{T} scratchm1::Matrix{T} scratchr1::Vector{T} - function DensePredCG{T}(X::Matrix{T}, beta0::Vector{T}) where {T} - n, p = size(X) - length(beta0) == p || throw(DimensionMismatch("length(β0) ≠ size(X,2)")) - return new{T}( - X, - beta0, - zeros(T, p), - zeros(T, (p, p)), - zeros(T, p), - zeros(T, (n, p)), - zeros(T, n), - ) - end - function DensePredCG{T}(X::Matrix{T}) where {T} + + function DensePredCG(X::Matrix{T}) where {T<:BlasReal} n, p = size(X) return new{T}( X, @@ -135,10 +221,8 @@ mutable struct DensePredCG{T<:BlasReal} <: DensePred ) end end -DensePredCG(X::Matrix, beta0::Vector) = DensePredCG{eltype(X)}(X, beta0) -DensePredCG(X::Matrix{T}) where {T} = DensePredCG{T}(X, zeros(T, size(X, 2))) function Base.convert(::Type{DensePredCG{T}}, X::Matrix{T}) where {T} - return DensePredCG{T}(X, zeros(T, size(X, 2))) + return DensePredCG(X) end # Compatibility with cholpred(X, pivot) diff --git a/src/regularizedpred.jl b/src/regularizedpred.jl index 9896b39..513aac8 100644 --- a/src/regularizedpred.jl +++ b/src/regularizedpred.jl @@ -154,6 +154,9 @@ function postupdate_λ!(r::RidgePred) # Update the extended model matrix with the new value GG = r.sqrtλ * r.G @views r.pred.X[(n + 1):(n + m), :] .= GG + + # Update other fields + # TODO: update DensePredQR if isa(r.pred, DensePredChol) # Recompute the cholesky decomposition X = r.pred.X diff --git a/src/robustlinearmodel.jl b/src/robustlinearmodel.jl index 38c4e79..86ee199 100644 --- a/src/robustlinearmodel.jl +++ b/src/robustlinearmodel.jl @@ -56,7 +56,18 @@ end ## TODO: specialize to make it faster StatsAPI.leverage(m::AbstractRobustModel) = diag(projectionmatrix(m)) -leverage_weights(m::AbstractRobustModel) = sqrt.(1 .- leverage(m)) +leverage_weights(m::AbstractRobustModel) = sqrt.(1 .- clamp.(leverage(m), 0, 1)) + +## Throw a MethodError for unspecified `args` to avoid StackOverflowError +function StatsAPI.fit( + ::Type{M}, + X::AbstractMatrix{<:AbstractFloat}, + y::AbstractVector{<:AbstractFloat}, + args...; + kwargs..., +) where {M<:AbstractRobustModel} + throw(MethodError(fit, (M, X, y, args...))) +end ## Convert to float, optionally drop rows with missing values (and convert to Non-Missing types) function StatsAPI.fit( @@ -86,7 +97,9 @@ function StatsAPI.fit( X, y, _ = missing_omit(X, y) end - return fit(M, float(X), float(y), args...; kwargs...) + # Make sure X and y have the same float eltype + T = promote_type(float(eltype(X)), float(eltype(y))) + return fit(M, convert.(T, X), convert.(T, y), args...; kwargs...) end ## Convert from formula-data to modelmatrix-response calling form @@ -156,6 +169,7 @@ function StatsModels.formula(m::RobustLinearModel) return m.formula end + """ deviance(m::RobustLinearModel) @@ -428,7 +442,7 @@ rlm(X, y, args...; kwargs...) = fit(RobustLinearModel, X, y, args...; kwargs...) ridgeG::Union{UniformScaling, AbstractArray} = I, βprior::AbstractVector = [], quantile::Union{Nothing, AbstractFloat} = nothing, - pivot::Bool = false, + dropcollinear::Bool = false, initial_scale::Union{Symbol, Real}=:mad, σ0::Union{Nothing, Symbol, Real}=initial_scale, initial_coef::AbstractVector=[], @@ -467,7 +481,8 @@ using a robust estimator. Default to `zeros(p)`; - `quantile::Union{Nothing, AbstractFloat} = nothing`: only for [`GeneralizedQuantileEstimator`](@ref), define the quantile to estimate; -- `pivot::Bool=false`: use pivoted factorization; +- `dropcollinear::Bool=false`: controls whether or not a model matrix less-than-full rank is + accepted; - `contrasts::AbstractDict{Symbol,Any} = Dict{Symbol,Any}()`: a `Dict` mapping term names (as `Symbol`s) to term types (e.g. `ContinuousTerm`) or contrasts (e.g., `HelmertCoding()`, `SeqDiffCoding(; levels=["a", "b", "c"])`, etc.). If contrasts are not provided for a variable, @@ -493,7 +508,7 @@ the RobustLinearModel object. """ function StatsAPI.fit( ::Type{M}, - X::Union{AbstractMatrix{T},SparseMatrixCSC{T}}, + X::AbstractMatrix{T}, y::AbstractVector{T}, est::AbstractMEstimator; method::Symbol=:chol, # :qr, :cg @@ -508,7 +523,7 @@ function StatsAPI.fit( ridgeG::Union{UniformScaling,AbstractArray{<:Real}}=I, βprior::AbstractVector{<:Real}=T[], quantile::Union{Nothing,AbstractFloat}=nothing, - pivot::Bool=false, + dropcollinear::Bool=false, contrasts::AbstractDict{Symbol,Any}=Dict{Symbol,Any}(), # placeholder __formula::Union{Nothing,FormulaTerm}=nothing, fitargs..., @@ -539,7 +554,7 @@ function StatsAPI.fit( rr = RobustLinResp(est, y, offset, wts) # Predictor object - methods = (:auto, :chol, :cg, :qr) + methods = (:auto, :chol, :cholesky, :cg, :qr) if method ∉ methods @warn("Incorrect method `:$(method)`, should be one of $(methods)") method = :auto @@ -559,22 +574,22 @@ function StatsAPI.fit( ridgeG end if method == :cg - cgpred(X, float(ridgeλ), G, βprior, pivot) + cgpred(X, float(ridgeλ), G, βprior, dropcollinear) elseif method == :qr - qrpred(X, float(ridgeλ), G, βprior, pivot) - elseif method in (:chol, :auto) - cholpred(X, float(ridgeλ), G, βprior, pivot) + qrpred(X, float(ridgeλ), G, βprior, dropcollinear) + elseif method in (:chol, :cholesky, :auto) + cholpred(X, float(ridgeλ), G, βprior, dropcollinear) else error("method :$method is not allowed, should be in: $methods") end else # No regularization if method == :cg - cgpred(X, pivot) + cgpred(X, dropcollinear) elseif method == :qr - qrpred(X, pivot) - elseif method in (:chol, :auto) - cholpred(X, pivot) + qrpred(X, dropcollinear) + elseif method in (:chol, :cholesky, :auto) + cholpred(X, dropcollinear) else error("method :$method is not allowed, should be in: $methods") end @@ -1350,19 +1365,20 @@ function resampling_best_estimate( ## TODO: implement something similar to DetS (not sure it could apply) ## Hubert2015 - The DetS and DetMM estimators for multivariate location and scatter ## (https://www.sciencedirect.com/science/article/abs/pii/S0167947314002175) + M = length(coef(m)) if isnothing(Nsamples) - Nsamples = resampling_minN(dof(m), 0.05, propoutliers) + Nsamples = resampling_minN(M, 0.05, propoutliers) end if isnothing(Npoints) - Npoints = dof(m) + Npoints = M end Nsubsamples = min(Nsubsamples, Nsamples) verbose && println("Start $(Nsamples) subsamples...") σis = zeros(Nsamples) - βis = zeros(dof(m), Nsamples) + βis = zeros(M, Nsamples) for i in 1:Nsamples # TODO: to parallelize, make a deepcopy of m inds = sample(rng, axes(response(m), 1), Npoints; replace=false, ordered=false) diff --git a/test/interface.jl b/test/interface.jl index fe52707..ac7be31 100644 --- a/test/interface.jl +++ b/test/interface.jl @@ -37,7 +37,7 @@ est2 = MEstimator(loss2) ) β = copy(coef(m)) VERBOSE && println("rlm($name): ", β) - @test_nowarn println(m) + @test_nowarn show(devnull, m) @test isapprox(coef(m1), β; rtol=1e-5) # make sure that it is not a TableRegressionModel @@ -45,7 +45,7 @@ est2 = MEstimator(loss2) # refit refit!(m, y; verbose=false, initial_scale=:extrema) - @test_nowarn println("$m") + @test_nowarn show(devnull, "$m") @test all(coef(m) .== β) # refit diff --git a/test/linearfit.jl b/test/linearfit.jl index 7659d6a..241abe7 100644 --- a/test/linearfit.jl +++ b/test/linearfit.jl @@ -22,9 +22,9 @@ est2 = MEstimator(loss2) @test all(isfinite.(coef(m2))) if name != "L1" - @test_nowarn println(m2) + @test_nowarn show(devnull, m2) # else - # @test_warn L1_warning println(m2) + # @test_warn L1_warning show(devnull, m2) end VERBOSE && println("\n\t\u25CF Estimator: $(name)") diff --git a/test/mquantile.jl b/test/mquantile.jl index 129b514..fe2d71f 100644 --- a/test/mquantile.jl +++ b/test/mquantile.jl @@ -70,7 +70,7 @@ loss2 = RobustModels.TukeyLoss() VERBOSE && println("rlm(qr): ", coef(m4)) if name == "Expectile" - @test_nowarn println(m2) + @test_nowarn show(devnull, m2) @test isapprox(coef(m2), coef(m3); rtol=1e-4) @test isapprox(coef(m2), coef(m4); rtol=1e-4) @@ -82,7 +82,7 @@ loss2 = RobustModels.TukeyLoss() refit!(m1; quantile=τ) @test isapprox(coef(m1), coef(m2); rtol=1e-4) else - # @test_warn L1_warning println(m2) + # @test_warn L1_warning show(devnull, m2) @test isapprox(coef(m2), coef(m3); rtol=1e-2) @test isapprox(coef(m2), coef(m4); rtol=1e-2) diff --git a/test/robustridge.jl b/test/robustridge.jl index 87b22a4..fa9181d 100644 --- a/test/robustridge.jl +++ b/test/robustridge.jl @@ -33,29 +33,35 @@ est2 = MEstimator(loss2) kwargs = (; method=method, initial_scale=:L1) # use the dispersion from GLM to ensure that the loglikelihood is correct m2 = fit(RobustLinearModel, A, b, est; kwargs...) - m3 = fit(RobustLinearModel, A, b, est; ridgeλ=1, kwargs...) + m3 = fit(RobustLinearModel, A, b, est; ridgeλ=10, kwargs...) m4 = fit( - RobustLinearModel, A, b, est; ridgeλ=1, ridgeG=float([0 0; 0 1]), kwargs... + RobustLinearModel, A, b, est; ridgeλ=10, ridgeG=float([0 0; 0 1]), kwargs... ) m5 = fit(RobustLinearModel, A, b, est; ridgeλ=0.1, ridgeG=[0, 1], kwargs...) - m6 = rlm(A, b, est; ridgeλ=1, ridgeG=[0, 1], βprior=[0.0, 2.0], kwargs...) + m6 = rlm(A, b, est; ridgeλ=0.1, ridgeG=[0, 1], dropcollinear=true, kwargs...) + m7 = rlm(A, b, est; ridgeλ=10, ridgeG=[0, 1], βprior=[0.0, 2.0], kwargs...) VERBOSE && println("\n\t\u25CF Estimator: $(name)") VERBOSE && println(" lm$(aspace) : ", coef(m1)) VERBOSE && println("rlm($(method)) : ", coef(m2)) - VERBOSE && println("ridge λ=1 rlm3($(method)): ", coef(m3)) - VERBOSE && println("ridge λ=1 rlm4($(method)): ", coef(m4)) + VERBOSE && println("ridge λ=10 rlm3($(method)): ", coef(m3)) + VERBOSE && println("ridge λ=10 rlm4($(method)): ", coef(m4)) VERBOSE && println("ridge λ=0.1 rlm5($(method)): ", coef(m5)) - VERBOSE && println("ridge λ=1 βprior=[0,2] rlm6($(method)): ", coef(m6)) + VERBOSE && println("ridge λ=0.1 dropcollinear=true rlm6($(method)): ", coef(m6)) + VERBOSE && println("ridge λ=10 βprior=[0,2] rlm7($(method)): ", coef(m7)) @test isapprox(coef(m3), coef(m4); rtol=1e-5) # Test printing the model @test_nowarn println(m3) # refit - refit!(m5; ridgeλ=1, initial_scale=:L1) - @test isapprox(m5.pred.λ, 1.0; rtol=1e-5) - @test isapprox(coef(m3), coef(m5); rtol=1e-5) + refit!(m5; ridgeλ=10, initial_scale=:L1) + @test isapprox(m5.pred.λ, 10.0; rtol=1e-5) + @test isapprox(coef(m3), coef(m5); rtol=1e-6) + + refit!(m6; ridgeλ=10, initial_scale=:L1) + @test isapprox(m6.pred.λ, 10.0; rtol=1e-5) + @test isapprox(coef(m3), coef(m6); rtol=1e-6) end end diff --git a/test/runtests.jl b/test/runtests.jl index 5fed115..5794a3b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -49,7 +49,7 @@ bounded_losses = ("Geman", "Welsch", "Tukey", "YohaiZamar", "HardThreshold", "Ha losses = (convex_losses..., other_losses..., bounded_losses...) ## Solving methods -nopen_methods = (:auto, :chol, :qr, :cg) +nopen_methods = (:auto, :chol, :cholesky, :qr, :cg) ## Interface methods interface_methods = (