FluxML · mcognetta · Nov 29, 2021 · Nov 29, 2021 · Nov 29, 2021 · Nov 29, 2021
diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl
@@ -30,14 +30,14 @@ The [`Dropout`](@ref) layer is what you should use in most scenarios.
 """
 function dropout(x, p; dims=:, active::Bool=true)
   active || return x
-  y = dropout_mask(x, p, dims=dims)
-  return x .* y
+  y = rand!(similar(x, _dropout_shape(x, dims)))
+  @inbounds @. y = x * _dropout_kernel(y, p, 1-p)
 end
 
 @adjoint function dropout(x, p; dims=:, active::Bool=true)
   active || return x, Δ -> (Δ, nothing)
-  y = dropout_mask(x, p, dims=dims)
-  return x .* y, Δ -> (Δ .* y, nothing)
+  y = rand!(similar(x, _dropout_shape(x, dims)))
 noise = rand!(similar(x)) 
 noise = rand!(similar(x)) 
+  return x .* _dropout_kernel.(y, p, 1-p), Δ -> (Δ .* _dropout_kernel.(y, p, 1-p), nothing)
 end
 
 function dropout_mask(x, p; dims=:)
@@ -56,7 +56,7 @@ e.g. `Dropout(p; dims = 3)` will randomly zero out entire channels on WHCN input
 (also called 2D dropout).
 
 Does nothing to the input once [`Flux.testmode!`](@ref) is `true`.
-"""
+"""`
 mutable struct Dropout{F,D}
   p::F
   dims::D