いままで実装してきたRandomizedな方法での固有値計算(RED)では,アルゴリズム中にランダム行列と入力行列との積を計算する必要がある.これにはO(nmr)かかる訳だけど,SRFTとかいう方法でO(nm log r)に高速化できるらしい.SRFTというのは特別な条件を満たす行列っぽくて,FFTとか使って行列積の計算が高速化できるらしい.アルゴリズムを調べてもよくわからなかったんだけれども,LowRankApprox.jlのモジュールにSRFTという名前のついた関数がたくさんあったので,必要な部分をコピペして,いままで実装してきたものにくっつけて動かしてみた.
具体的にはこの部分. 結論から言うとうまくいかなかった.
やっぱりもっとちゃんと勉強して,どうやって高速化するのかを自分で考えてみようと思う. 以下,実験に使ったコード.
using LinearAlgebra using Plots using Statistics import FFTW: plan_r2r!, R2HC,r2rFFTWPlan """ We cited srft functions from [LowRankApprox library](https://github.com/JuliaMatrices/LowRankApprox.jl) """ function srft_apply!( y::AbstractVecOrMat{T}, X::AbstractMatrix{T}, idx::AbstractVector, r2rplan!::r2rFFTWPlan) where T<:Real l, m = size(X) n = l*m k = length(idx) mul!(X, r2rplan!, X) wn = exp(-2im*pi/n) wm = exp(-2im*pi/m) nnyq = div(n, 2) cnyq = div(l, 2) i = 1 @inbounds while i <= k idx_ = idx[i] - 1 row = fld(idx_, l) + 1 col = rem(idx_, l) + 1 w = wm^(row - 1)*wn^(col - 1) # find indices of real/imag parts col_ = col - 1 cswap = col_ > cnyq ia = cswap ? l - col_ : col_ ib = col_ > 0 ? l - ia : 0 # initialze next entry to fill y[i] = 0 s = one(T) # compute only one entry if purely real or no more space if in == 0 || in == nnyq || i == k for j = 1:m a = X[ia+1,j] b = ib == 0 || ib == ia ? zero(T) : X[ib+1,j] b = cswap ? -b : b y[i] += real(s*(a + b*im)) s *= w end # else compute one entry each for real/imag parts else y[i+1] = 0 for j = 1:m a = X[ia+1,j] b = ib == 0 || ib == ia ? zero(T) : X[ib+1,j] b = cswap ? -b : b z = s*(a + b*im) y[i ] += real(z) y[i+1] += imag(z) s *= w end i += 1 end i += 1 end end function srft_reshape!(X::AbstractMatrix, d::AbstractVector, x::AbstractVecOrMat) l, m = size(X) i = 0 @inbounds for j = 1:l, k=1:m i += 1 X[j,k] = d[i] * x[i] end end function srft_rand(::Type{T},n::Integer) where T<:Real x = rand(n) @simd for i = 1:n @inbounds x[i] = 2*(x[i] > 0.5) - 1 end x end function srft_init(::Type{T}, n::Integer, k::Integer) where T<:Real l = k while n % l > 0 l -= 1 end m = div(n,l) X = Array{T}(undef, l, m) d = srft_rand(T,n) idx = rand(1:n,k) r2rplan! = plan_r2r!(X, R2HC, 1) X, d, idx, r2rplan! end """ Get a product between m \times n matrix 'A' and 'n \times k` srft. """ function get_Y(A::AbstractMatrix{T}, k) where T m, n = size(A) Y = Matrix{Float64}(undef,m,k) X, d, idx, fftplan! = srft_init(T, n, k) for i=1:m srft_reshape!(X, d, view(A,i,:)) srft_apply!(view(Y,i,:),X,idx,fftplan!) end return Y end """ Randomized eigenvalue decomposition via Nystrom Method. See Fig 6 in the [original paper](https://arxiv.org/abs/1607.01649) or See Algorithm 5.5 in the [original paper](https://epubs.siam.org/doi/10.1137/090771806) This method is available for only a positive definite matrix. # Aruguments - 'A' : input positive definite matrix - 'k' : target rank, number of wanted eigenvalues - 'p' : over-sampling parameter, it should be 10 or 2k. - 'use_srft' : if ture, use subsampled Fourier transformation. - 'reconst' : if ture, the output is reconstracted matrix which approximates 'A' example: # make a positive definite matrix `A`. n = 5000; m = 25000; x = randn(m,n) demean_x = x .- mean(x, dims=1) A = demean_x'*demean_x ./(m-1) # run evdnys lowrank_mtx = evdnys(A,10, reconst=true) """ function evdnys(A::Symmetric, k::Int;p=10, use_srft=false, reconst=false) n, n = size(A) if use_srft Y = get_Y(A,k+p) else G = randn(n, k+p) Y = A * G end Q = qr(Y).Q*Matrix(I,n,k+p) B1 = A*Q B2 = Q'*B1 C = cholesky(Symmetric(B2)) F = B1 * inv(C.U) U, Σ, _ = svd(F) if reconst return U*diagm(Σ.^2)*U' else return U, Σ.^2 end end """ Randomized eigenvalue decomposition proposed by N.halko et al. See Algorithm 5.6. in the [original paper](https://epubs.siam.org/doi/10.1137/090771806). This method is effective matrices with decaying eigenvalues. # Aruguments - 'A' : input positive definite matrix - 'r' : target rank, number of wanted eigenvalues - 'p' : over-sampling parameter, it should be 10 or 2k. - 'use_srft' : if ture, use subsampled Fourier transformation. - 'reconst' : if ture, the output is reconstracted matrix which approximates 'A' example: # make a matrix with decaying eigenvalues n = 1500; A = rand(n,n); B = zeros(n,n) e, v = eigen(A) for i in 1:n B += e[i] * v[:,i] * v[:,i]' * (i>n-100 ? 1 : 1e-3) end # run reigen lowrank_mtx = reigen(A, 10, reconst=true) """ function reigen(A, r ;p=10, twopass=false, use_srft=false, reconst=false) n = size(A)[1] m = r + p if use_srft Y = get_Y(A,m) else Ω = randn(n, m) Y = A*Ω end Q = qr!(Y).Q * Matrix(I,n,m) if twopass T = Q' * A * Q else if use_srft T = (Q' * Y) * ( (get_Y(Q',m))\I) else T = (Q' * Y) * ( (Q' * Ω)\I) end end λ, S = eigen(T) U = Q*S' if reconst Λ = Diagonal(λ) return U*Λ*U' else return U, λ end end
SRFTを使うか,lapackを使うかで,45000×45000の正定値行列Aの固有値をk個取り出すのに要した時間をプロット.横軸がk.縦軸が計算時間(sec).下の図はreigenの場合について.うーん,全然はやくない.普通にlapack使った方がいい.
とりあえず,行列積だけでも計算が速くなっているか確認した.Y = get_Y(A,m)
と Y = A*randn(n,k)
に必要な時間の差を調べた.うーん,やっぱり普通に計算するのが良さそう??
とりあえず,StackExchangeに投げてみた.なぜ低評価がついているか分からないが,誰か答えてくれると嬉しいです.