forked from FluxML/Flux.jl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunctor.jl
230 lines (176 loc) · 6.67 KB
/
functor.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
import Adapt: adapt, adapt_storage
using LinearAlgebra: Cholesky
using Zygote: IdSet
import Functors: Functors, @functor, functor, fmap, isleaf
using SparseArrays: AbstractSparseArray
"""
testmode!(m, mode = true)
Set a layer or model's test mode (see below).
Using `:auto` mode will treat any gradient computation as training.
_Note_: if you manually set a model into test mode, you need to manually place
it back into train mode during training phase.
Possible values include:
- `false` for training
- `true` for testing
- `:auto` or `nothing` for Flux to detect the mode automatically
"""
testmode!(m, mode = true) = (foreach(x -> testmode!(x, mode), trainable(m)); m)
"""
trainmode!(m, mode = true)
Set a layer of model's train mode (see below).
Symmetric to [`testmode!`](@ref) (i.e. `trainmode!(m, mode) == testmode!(m, !mode)`).
_Note_: if you manually set a model into train mode, you need to manually place
it into test mode during testing phase.
Possible values include:
- `true` for training
- `false` for testing
- `:auto` or `nothing` for Flux to detect the mode automatically
"""
trainmode!(m, mode = true) = mode isa Bool ? testmode!(m, !mode) : testmode!(m, mode)
function params!(p::Params, x, seen = IdSet())
if x isa AbstractArray{<:Number} && Functors.isleaf(x)
return push!(p, x)
elseif x in seen
nothing
else
push!(seen, x)
for child in trainable(x)
params!(p, child, seen)
end
end
end
"""
params(model)
params(layers...)
Given a model or specific layers from a model, create a `Params` object pointing to its trainable parameters.
This can be used with the `gradient` function, see [Taking Gradients](@ref), or as input to the [`Flux.train!`](@ref Flux.train!) function.
The behaviour of `params` on custom types can be customized using [`Functors.@functor`](@ref) or [`Flux.trainable`](@ref).
# Examples
```jldoctest
julia> using Flux: params
julia> params(Chain(Dense(ones(2,3)), softmax)) # unpacks Flux models
Params([[1.0 1.0 1.0; 1.0 1.0 1.0], [0.0, 0.0]])
julia> bn = BatchNorm(2, relu)
BatchNorm(2, relu) # 4 parameters, plus 4 non-trainable
julia> params(bn) # only the trainable parameters
Params([Float32[0.0, 0.0], Float32[1.0, 1.0]])
julia> params([1, 2, 3], [4]) # one or more arrays of numbers
Params([[1, 2, 3], [4]])
julia> params([[1, 2, 3], [4]]) # unpacks array of arrays
Params([[1, 2, 3], [4]])
julia> params(1, [2 2], (alpha=[3,3,3], beta=Ref(4), gamma=sin)) # ignores scalars, unpacks NamedTuples
Params([[2 2], [3, 3, 3]])
```
"""
function params(m...)
ps = Params()
params!(ps, m)
return ps
end
# Allows caching of the parameters when params is called within gradient() to fix #2040.
@non_differentiable params(m...)
struct FluxCUDAAdaptor end
adapt_storage(to::FluxCUDAAdaptor, x) = CUDA.cu(x)
adapt_storage(to::FluxCUDAAdaptor, x::Zygote.FillArrays.AbstractFill) = CUDA.cu(collect(x))
if VERSION >= v"1.7"
adapt_storage(to::FluxCUDAAdaptor, x::Random.TaskLocalRNG) = CUDA.default_rng()
else
adapt_storage(to::FluxCUDAAdaptor, x::Random._GLOBAL_RNG) = CUDA.default_rng()
end
adapt_storage(to::FluxCUDAAdaptor, x::CUDA.RNG) = x
adapt_storage(to::FluxCUDAAdaptor, x::AbstractRNG) =
error("Cannot map RNG of type $(typeof(x)) to GPU. GPU execution only supports Random.default_rng().")
# TODO: figure out the correct design for OneElement
adapt_storage(to::FluxCUDAAdaptor, x::Zygote.OneElement) = CUDA.cu(collect(x))
struct FluxCPUAdaptor end
# define rules for handling structured arrays
adapt_storage(to::FluxCPUAdaptor, x::AbstractArray) = adapt(Array, x)
adapt_storage(to::FluxCPUAdaptor, x::AbstractRange) = x
adapt_storage(to::FluxCPUAdaptor, x::Zygote.FillArrays.AbstractFill) = x
adapt_storage(to::FluxCPUAdaptor, x::T) where T <: CUDA.CUSPARSE.CUDA.CUSPARSE.AbstractCuSparseMatrix = adapt(Array, x)
adapt_storage(to::FluxCPUAdaptor, x::Zygote.OneElement) = x
adapt_storage(to::FluxCPUAdaptor, x::AbstractSparseArray) = x
adapt_storage(to::FluxCPUAdaptor, x::CUDA.RNG) = Random.default_rng()
adapt_storage(to::FluxCPUAdaptor, x::AbstractRNG) = x
function ChainRulesCore.rrule(::Type{Array}, x::CUDA.CuArray)
Array(x), dx -> (NoTangent(), CUDA.cu(unthunk(dx)),)
end
function ChainRulesCore.rrule(::typeof(Adapt.adapt_storage), to::FluxCPUAdaptor, x::CUDA.AbstractGPUArray)
adapt_storage(to, x), dx -> (NoTangent(), NoTangent(), adapt_storage(FluxCUDAAdaptor(), unthunk(dx)),)
end
# CPU/GPU movement conveniences
"""
cpu(m)
Moves `m` onto the CPU, the opposite of [`gpu`](@ref).
Recurses into structs marked [`@functor`](@ref).
```julia-repl
julia> m = Dense(1,2)
Dense(1, 2)
julia> m_gpu = gpu(m)
Dense(1, 2)
julia> typeof(m_gpu.W)
CuArray{Float32, 2}
julia> m_cpu = cpu(m_gpu)
Dense(1, 2)
julia> typeof(m_cpu.W)
Matrix{Float32}
```
"""
cpu(x) = fmap(x -> adapt(FluxCPUAdaptor(), x), x)
_isbitsarray(::AbstractArray{<:Number}) = true
_isbitsarray(::AbstractArray{T}) where T = isbitstype(T)
_isbitsarray(x) = false
_isleaf(::AbstractRNG) = true
_isleaf(x) = _isbitsarray(x) || Functors.isleaf(x)
"""
gpu(x)
Moves `m` to the current GPU device, if available. It is a no-op otherwise.
See the [CUDA.jl docs](https://juliagpu.github.io/CUDA.jl/stable/usage/multigpu/)
to help identify the current device.
This works for functions, and any struct marked with [`@functor`](@ref).
```julia-repl
julia> m = Dense(1,2)
Dense(1, 2)
julia> typeof(m.W)
Matrix{Float32}
julia> m_gpu = gpu(m)
Dense(1, 2)
julia> typeof(m_gpu.W) # notice the type of the array changed to a CuArray
CuArray{Float32, 2}
```
"""
function gpu(x)
check_use_cuda()
use_cuda[] ? fmap(x -> Adapt.adapt(FluxCUDAAdaptor(), x), x; exclude = _isleaf) : x
end
function check_use_cuda()
if use_cuda[] === nothing
use_cuda[] = CUDA.functional()
if use_cuda[] && !CUDA.has_cudnn()
@warn "CUDA.jl found cuda, but did not find libcudnn. Some functionality will not be available."
end
if !(use_cuda[])
@info """The GPU function is being called but the GPU is not accessible.
Defaulting back to the CPU. (No action is required if you want to run on the CPU).""" maxlog=1
end
end
end
ChainRulesCore.@non_differentiable check_use_cuda()
# Precision
adapt_storage(T::Type{<:Real}, xs::AbstractArray{<:Real}) = convert.(T, xs) # piracy
paramtype(T::Type{<:Real}, m) = fmap(x -> adapt(T, x), m)
"""
f32(m)
Converts the `eltype` of model's parameters to `Float32` (which is Flux's default).
Recurses into structs marked with [`@functor`](@ref).
"""
f32(m) = paramtype(Float32, m)
"""
f64(m)
Converts the `eltype` of model's parameters to `Float64`.
Recurses into structs marked with [`@functor`](@ref).
"""
f64(m) = paramtype(Float64, m)
# Functors for certain Julia data structures
@functor Cholesky
trainable(c::Cholesky) = ()