まぃふぇいばりっと

機械学習やってます.Julia大好きです.勉強したことの殴り書きです.

Julia言語 Randomizedな方法での固有値計算の高速化のためにSRFTをやってみたけどうまくいかなかった

いままで実装してきたRandomizedな方法での固有値計算(RED)では,アルゴリズム中にランダム行列と入力行列との積を計算する必要がある.これにはO(nmr)かかる訳だけど,SRFTとかいう方法でO(nm log r)に高速化できるらしい.SRFTというのは特別な条件を満たす行列っぽくて,FFTとか使って行列積の計算が高速化できるらしい.アルゴリズムを調べてもよくわからなかったんだけれども,LowRankApprox.jlのモジュールにSRFTという名前のついた関数がたくさんあったので,必要な部分をコピペして,いままで実装してきたものにくっつけて動かしてみた.

github.com

具体的にはこの部分. 結論から言うとうまくいかなかった.

やっぱりもっとちゃんと勉強して,どうやって高速化するのかを自分で考えてみようと思う. 以下,実験に使ったコード.

using LinearAlgebra
using Plots
using Statistics
import FFTW: plan_r2r!, R2HC,r2rFFTWPlan

"""
We cited srft functions from [LowRankApprox library](https://github.com/JuliaMatrices/LowRankApprox.jl)
"""
function srft_apply!(
    y::AbstractVecOrMat{T}, X::AbstractMatrix{T}, idx::AbstractVector,
    r2rplan!::r2rFFTWPlan) where T<:Real
  l, m = size(X)
  n = l*m
  k = length(idx)
  mul!(X, r2rplan!, X)
  wn = exp(-2im*pi/n)
  wm = exp(-2im*pi/m)
  nnyq = div(n, 2)
  cnyq = div(l, 2)
  i = 1
  @inbounds while i <= k
    idx_ = idx[i] - 1
    row = fld(idx_, l) + 1
    col = rem(idx_, l) + 1
    w = wm^(row - 1)*wn^(col - 1)

    # find indices of real/imag parts
    col_ = col - 1
    cswap = col_ > cnyq
    ia = cswap ? l - col_ : col_
    ib = col_ > 0 ? l - ia : 0

    # initialze next entry to fill
    y[i] = 0
    s = one(T)

    # compute only one entry if purely real or no more space
    if in == 0 || in == nnyq || i == k
      for j = 1:m
        a = X[ia+1,j]
        b = ib == 0 || ib == ia ? zero(T) : X[ib+1,j]
        b = cswap ? -b : b
        y[i] += real(s*(a + b*im))
        s *= w
      end

    # else compute one entry each for real/imag parts
    else
      y[i+1] = 0
      for j = 1:m
        a = X[ia+1,j]
        b = ib == 0 || ib == ia ? zero(T) : X[ib+1,j]
        b = cswap ? -b : b
        z = s*(a + b*im)
        y[i  ] += real(z)
        y[i+1] += imag(z)
        s *= w
      end
      i += 1
    end
    i += 1
  end
end

function srft_reshape!(X::AbstractMatrix, d::AbstractVector, x::AbstractVecOrMat)
    l, m = size(X)
    i = 0
    @inbounds for j = 1:l, k=1:m
        i += 1
        X[j,k] = d[i] * x[i]
    end
end

function srft_rand(::Type{T},n::Integer) where T<:Real
    x = rand(n)
    @simd for i = 1:n
        @inbounds x[i] = 2*(x[i] > 0.5) - 1
    end
    x
end

function srft_init(::Type{T}, n::Integer, k::Integer) where T<:Real
    l = k
    while n % l > 0
        l -= 1
    end
    m = div(n,l)
    X = Array{T}(undef, l, m)
    d = srft_rand(T,n)
    idx = rand(1:n,k)
    r2rplan! = plan_r2r!(X, R2HC, 1)
    X, d, idx, r2rplan!
end

"""
Get a product between m \times n matrix 'A' and 'n \times k` srft.
"""
function get_Y(A::AbstractMatrix{T}, k) where T
    m, n = size(A)
    Y = Matrix{Float64}(undef,m,k)
    X, d, idx, fftplan! = srft_init(T, n, k)
    for i=1:m
        srft_reshape!(X, d, view(A,i,:))
        srft_apply!(view(Y,i,:),X,idx,fftplan!)
    end
    return Y
end

"""
Randomized eigenvalue decomposition via Nystrom Method.
See Fig 6 in the [original paper](https://arxiv.org/abs/1607.01649)
or See Algorithm 5.5 in the [original paper](https://epubs.siam.org/doi/10.1137/090771806)
This method is available for only a positive definite matrix.

# Aruguments
- 'A' : input positive definite matrix
- 'k' : target rank, number of wanted eigenvalues
- 'p' : over-sampling parameter, it should be 10 or 2k.
- 'use_srft' : if ture, use subsampled Fourier transformation.
- 'reconst' : if ture, the output is reconstracted matrix which approximates 'A'

example:
    # make a positive definite matrix `A`.
    n = 5000; m = 25000; x = randn(m,n)
    demean_x = x .- mean(x, dims=1)
    A = demean_x'*demean_x ./(m-1)

    # run evdnys
    lowrank_mtx = evdnys(A,10, reconst=true)
"""
function evdnys(A::Symmetric, k::Int;p=10, use_srft=false, reconst=false)
    n, n = size(A)
    if use_srft
        Y = get_Y(A,k+p)
    else
        G = randn(n, k+p)
        Y = A * G
    end
    Q = qr(Y).Q*Matrix(I,n,k+p)
    B1 = A*Q
    B2 = Q'*B1
    C = cholesky(Symmetric(B2))
    F = B1 * inv(C.U)
    U, Σ, _ = svd(F)

    if reconst
        return U*diagm(Σ.^2)*U'
    else
        return U, Σ.^2
    end
end

"""
Randomized eigenvalue decomposition proposed by N.halko et al.
See Algorithm 5.6. in the
[original paper](https://epubs.siam.org/doi/10.1137/090771806).
This method is effective matrices with decaying eigenvalues.

# Aruguments
- 'A' : input positive definite matrix
- 'r' : target rank, number of wanted eigenvalues
- 'p' : over-sampling parameter, it should be 10 or 2k.
- 'use_srft' : if ture, use subsampled Fourier transformation.
- 'reconst' : if ture, the output is reconstracted matrix which approximates 'A'

example:
    # make a matrix with decaying eigenvalues
    n = 1500; A = rand(n,n); B = zeros(n,n)
    e, v = eigen(A)
    for i in 1:n
        B += e[i] * v[:,i] * v[:,i]' * (i>n-100 ? 1 : 1e-3)
    end

    # run reigen
    lowrank_mtx = reigen(A, 10, reconst=true)
"""
function reigen(A, r ;p=10, twopass=false, use_srft=false, reconst=false)
    n = size(A)[1]
    m = r + p

    if use_srft
        Y = get_Y(A,m)
    else
        Ω = randn(n, m)
        Y = A*Ω
    end

    Q = qr!(Y).Q * Matrix(I,n,m)
    if twopass
        T = Q' * A * Q
    else
        if use_srft
            T = (Q' * Y) * ( (get_Y(Q',m))\I)
        else
            T = (Q' * Y) * ( (Q' * Ω)\I)
        end
    end
    λ, S = eigen(T)
    U = Q*S'
    if reconst
        Λ = Diagonal(λ)
        return U*Λ*U'
    else
        return U, λ
    end
end

SRFTを使うか,lapackを使うかで,45000×45000の正定値行列Aの固有値をk個取り出すのに要した時間をプロット.横軸がk.縦軸が計算時間(sec).下の図はreigenの場合について.うーん,全然はやくない.普通にlapack使った方がいい.

とりあえず,行列積だけでも計算が速くなっているか確認した.Y = get_Y(A,m)Y = A*randn(n,k)に必要な時間の差を調べた.うーん,やっぱり普通に計算するのが良さそう??

とりあえず,StackExchangeに投げてみた.なぜ低評価がついているか分からないが,誰か答えてくれると嬉しいです.

scicomp.stackexchange.com