Skip to content

Commit

Permalink
Merge pull request #1 from EcoJulia/mkb/dev
Browse files Browse the repository at this point in the history
More efficient implementation
  • Loading branch information
mkborregaard authored Sep 2, 2018
2 parents a15141c + d2c3f46 commit 1ffb757
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 26 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ matrix:
- julia: nightly

script:
- julia -e 'import Pkg; Pkg.add("https://github.com/EcoJulia/RandomBooleanMatrices.jl"); Pkg.build("RandomBooleanMatrices"); Pkg.test("RandomBooleanMatrices")'
- julia -e 'import Pkg; Pkg.clone(pwd()); Pkg.build("RandomBooleanMatrices"); Pkg.test("RandomBooleanMatrices")'
2 changes: 2 additions & 0 deletions REQUIRE
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
julia 1.0
RandomNumbers
StatsBase
22 changes: 13 additions & 9 deletions src/RandomBooleanMatrices.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
module RandomBooleanMatrices

using Random
using RandomNumbers.Xorshifts
using SparseArrays
using StatsBase

include("curveball.jl")

@enum matrixrandomizations curveball
Expand All @@ -11,20 +14,21 @@ include("curveball.jl")
Randomize the sparse boolean Matrix `m` while maintaining row and column sums
"""
function randomize_matrix!(m; method::matrixrandomizations = curveball)
function randomize_matrix!(m, rng = Random.GLOBAL_RNG; method::matrixrandomizations = curveball)
if method == curveball
return _curveball!(m)
return _curveball!(m, rng)
end
error("undefined method")
end

struct MatrixGenerator
struct MatrixGenerator{R<:AbstractRNG}
m::SparseMatrixCSC{Bool, Int}
method::matrixrandomizations
rng::R
end

"""
random_matrices(m; method = curveball)
random_matrices(m [,rng]; method = curveball)
Create a matrix generator function that will return a random boolean matrix
every time it is called, maintaining row and column sums. Non-boolean input
Expand All @@ -39,12 +43,12 @@ random1 = rmg()
random2 = rmg()
``
"""
random_matrices(m::AbstractMatrix; method::matrixrandomizations = curveball) =
MatrixGenerator(m, method) #for this case there's an implicit `copy` already
random_matrices(m::SparseMatrixCSC{Bool, Int}; method::matrixrandomizations = curveball) =
MatrixGenerator(copy(m), method)
random_matrices(m::AbstractMatrix, rng = Xoroshiro128Plus(); method::matrixrandomizations = curveball) =
MatrixGenerator{typeof(rng)}(dropzeros!(sparse(m)), method, rng)
random_matrices(m::SparseMatrixCSC{Bool, Int}, rng = Xoroshiro128Plus(); method::matrixrandomizations = curveball) =
MatrixGenerator{typeof(rng)}(dropzeros(m), method, rng)

(r::MatrixGenerator)(; method::matrixrandomizations = curveball) = copy(randomize_matrix!(r.m, method = r.method))
(r::MatrixGenerator)(; method::matrixrandomizations = curveball) = copy(randomize_matrix!(r.m, r.rng, method = r.method))

export randomize_matrix!, random_matrices
export curveball
Expand Down
63 changes: 47 additions & 16 deletions src/curveball.jl
Original file line number Diff line number Diff line change
@@ -1,51 +1,82 @@

function _interdif(v1, v2)
inter, dif = Int[], Int[]
function sortmerge!(v1, v2, ret, iend, jend)
retind = 0
i,j = firstindex(v1), firstindex(v2)
@inbounds while i <= iend || j <= jend
if j > jend
ret[retind += 1] = v1[i]
i+=1
continue
elseif i > iend
ret[retind += 1] = v2[j]
j+=1
continue
end
if v1[i] == v2[j]
error("The two vectors are not supposed to have overlapping values")
elseif j > lastindex(v2) || v1[i] < v2[j]
ret[retind += 1] = v1[i]
i+=1
elseif i > lastindex(v1) || v1[i] > v2[j]
ret[retind += 1] = v2[j]
j+=1
end
end
end


function _interdif!(v1, v2, inter, dif)
nshared, ndiff = 0, 0
i,j = firstindex(v1), firstindex(v2)
@inbounds while i <= lastindex(v1) || j <= lastindex(v2)
if j > lastindex(v2)
push!(dif, v1[i])
dif[ndiff += 1] = v1[i]
i+=1
continue
elseif i > lastindex(v1)
push!(dif, v2[j])
dif[ndiff += 1] = v2[j]
j+=1
continue
end
if v1[i] == v2[j]
push!(inter, v1[i])
inter[nshared += 1] = v1[i]
i += 1
j += 1
elseif j > lastindex(v2) || v1[i] < v2[j]
push!(dif, v1[i])
dif[ndiff += 1] = v1[i]
i+=1
elseif i > lastindex(v1) || v1[i] > v2[j]
push!(dif, v2[j])
dif[ndiff += 1] = v2[j]
j+=1
end
end
inter, dif
ndiff, nshared
end

function _curveball!(m::SparseMatrixCSC{Bool, Int})
function _curveball!(m::SparseMatrixCSC{Bool, Int}, rng = Random.GLOBAL_RNG)
R, C = size(m)
mcs = min(2maximum(diff(m.colptr)), size(m, 1))
not_shared, shared = Vector{Int}(undef, mcs), Vector{Int}(undef, mcs)
newa, newb = Vector{Int}(undef, mcs), Vector{Int}(undef, mcs)

for rep 1:5C
A, B = rand(1:C,2)
A, B = rand(rng, 1:C,2)

# use views directly into the sparse matrix to avoid copying
a, b = view(m.rowval, m.colptr[A]:m.colptr[A+1]-1), view(m.rowval, m.colptr[B]:m.colptr[B+1]-1)
l_a, l_b = length(a), length(b)

# an efficient algorithm since both a and b are sorted
shared, not_shared = _interdif(a, b)
l_ab = length(shared)
l_dif, l_ab = _interdif!(a, b, shared, not_shared)

if !(l_ab (l_a, l_b))
shuffle!(not_shared)
L = l_a - l_ab
a .= sort!([shared; not_shared[1:L]])
b .= sort!([shared; not_shared[L+1:end]])
L = l_a - l_ab

sample!(rng, view(not_shared, Base.OneTo(l_dif)), view(newa,Base.OneTo(L)), replace = false, ordered = true)
L2,_ = _interdif!(view(newa, 1:L), view(not_shared, Base.OneTo(l_dif)), newa, newb)

sortmerge!(shared, newa, a, l_ab, L)
sortmerge!(shared, newb, b, l_ab, L2)
end
end

Expand Down

0 comments on commit 1ffb757

Please sign in to comment.