forked from JuliaLang/julia
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmissing.jl
430 lines (370 loc) · 13 KB
/
missing.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
# This file is a part of Julia. License is MIT: https://julialang.org/license
# Missing, missing and ismissing are defined in essentials.jl
show(io::IO, x::Missing) = print(io, "missing")
"""
MissingException(msg)
Exception thrown when a [`missing`](@ref) value is encountered in a situation
where it is not supported. The error message, in the `msg` field
may provide more specific details.
"""
struct MissingException <: Exception
msg::String
end
showerror(io::IO, ex::MissingException) =
print(io, "MissingException: ", ex.msg)
"""
nonmissingtype(T::Type)
If `T` is a union of types containing `Missing`, return a new type with
`Missing` removed.
# Examples
```jldoctest
julia> nonmissingtype(Union{Int64,Missing})
Int64
julia> nonmissingtype(Any)
Any
```
!!! compat "Julia 1.3"
This function is exported as of Julia 1.3.
"""
nonmissingtype(::Type{T}) where {T} = typesplit(T, Missing)
function nonmissingtype_checked(T::Type)
R = nonmissingtype(T)
R >: T && error("could not compute non-missing type")
return R
end
promote_rule(T::Type{Missing}, S::Type) = Union{S, Missing}
promote_rule(T::Type{Union{Nothing, Missing}}, S::Type) = Union{S, Nothing, Missing}
function promote_rule(T::Type{>:Union{Nothing, Missing}}, S::Type)
R = nonnothingtype(T)
R >: T && return Any
T = R
R = nonmissingtype(T)
R >: T && return Any
T = R
R = promote_type(T, S)
return Union{R, Nothing, Missing}
end
function promote_rule(T::Type{>:Missing}, S::Type)
R = nonmissingtype(T)
R >: T && return Any
T = R
R = promote_type(T, S)
return Union{R, Missing}
end
convert(::Type{T}, x::T) where {T>:Missing} = x
convert(::Type{T}, x::T) where {T>:Union{Missing, Nothing}} = x
convert(::Type{T}, x) where {T>:Missing} = convert(nonmissingtype_checked(T), x)
convert(::Type{T}, x) where {T>:Union{Missing, Nothing}} = convert(nonmissingtype_checked(nonnothingtype_checked(T)), x)
# Comparison operators
==(::Missing, ::Missing) = missing
==(::Missing, ::Any) = missing
==(::Any, ::Missing) = missing
# To fix ambiguity
==(::Missing, ::WeakRef) = missing
==(::WeakRef, ::Missing) = missing
isequal(::Missing, ::Missing) = true
isequal(::Missing, ::Any) = false
isequal(::Any, ::Missing) = false
<(::Missing, ::Missing) = missing
<(::Missing, ::Any) = missing
<(::Any, ::Missing) = missing
isless(::Missing, ::Missing) = false
isless(::Missing, ::Any) = false
isless(::Any, ::Missing) = true
isapprox(::Missing, ::Missing; kwargs...) = missing
isapprox(::Missing, ::Any; kwargs...) = missing
isapprox(::Any, ::Missing; kwargs...) = missing
# Unary operators/functions
for f in (:(!), :(~), :(+), :(-), :(*), :(&), :(|), :(xor),
:(zero), :(one), :(oneunit),
:(isfinite), :(isinf), :(isodd),
:(isinteger), :(isreal), :(isnan),
:(iszero), :(transpose), :(adjoint), :(float), :(complex), :(conj),
:(abs), :(abs2), :(iseven), :(ispow2),
:(real), :(imag), :(sign), :(inv))
@eval ($f)(::Missing) = missing
end
for f in (:(Base.zero), :(Base.one), :(Base.oneunit))
@eval ($f)(::Type{Missing}) = missing
@eval function $(f)(::Type{Union{T, Missing}}) where T
T === Any && throw(MethodError($f, (Any,))) # To prevent StackOverflowError
$f(T)
end
end
for f in (:(Base.float), :(Base.complex))
@eval $f(::Type{Missing}) = Missing
@eval function $f(::Type{Union{T, Missing}}) where T
T === Any && throw(MethodError($f, (Any,))) # To prevent StackOverflowError
Union{$f(T), Missing}
end
end
# Binary operators/functions
for f in (:(+), :(-), :(*), :(/), :(^), :(mod), :(rem))
@eval begin
# Scalar with missing
($f)(::Missing, ::Missing) = missing
($f)(::Missing, ::Number) = missing
($f)(::Number, ::Missing) = missing
end
end
div(::Missing, ::Missing, r::RoundingMode) = missing
div(::Missing, ::Number, r::RoundingMode) = missing
div(::Number, ::Missing, r::RoundingMode) = missing
min(::Missing, ::Missing) = missing
min(::Missing, ::Any) = missing
min(::Any, ::Missing) = missing
max(::Missing, ::Missing) = missing
max(::Missing, ::Any) = missing
max(::Any, ::Missing) = missing
# Rounding and related functions
round(::Missing, ::RoundingMode=RoundNearest; sigdigits::Integer=0, digits::Integer=0, base::Integer=0) = missing
round(::Type{>:Missing}, ::Missing, ::RoundingMode=RoundNearest) = missing
round(::Type{T}, ::Missing, ::RoundingMode=RoundNearest) where {T} =
throw(MissingException("cannot convert a missing value to type $T: use Union{$T, Missing} instead"))
round(::Type{T}, x::Any, r::RoundingMode=RoundNearest) where {T>:Missing} = round(nonmissingtype_checked(T), x, r)
# to fix ambiguities
round(::Type{T}, x::Rational{Tr}, r::RoundingMode=RoundNearest) where {T>:Missing,Tr} = round(nonmissingtype_checked(T), x, r)
round(::Type{T}, x::Rational{Bool}, r::RoundingMode=RoundNearest) where {T>:Missing} = round(nonmissingtype_checked(T), x, r)
# Handle ceil, floor, and trunc separately as they have no RoundingMode argument
for f in (:(ceil), :(floor), :(trunc))
@eval begin
($f)(::Missing; sigdigits::Integer=0, digits::Integer=0, base::Integer=0) = missing
($f)(::Type{>:Missing}, ::Missing) = missing
($f)(::Type{T}, ::Missing) where {T} =
throw(MissingException("cannot convert a missing value to type $T: use Union{$T, Missing} instead"))
($f)(::Type{T}, x::Any) where {T>:Missing} = $f(nonmissingtype_checked(T), x)
# to fix ambiguities
($f)(::Type{T}, x::Rational) where {T>:Missing} = $f(nonmissingtype_checked(T), x)
end
end
# to avoid ambiguity warnings
(^)(::Missing, ::Integer) = missing
# Bit operators
(&)(::Missing, ::Missing) = missing
(&)(a::Missing, b::Bool) = ifelse(b, missing, false)
(&)(b::Bool, a::Missing) = ifelse(b, missing, false)
(&)(::Missing, ::Integer) = missing
(&)(::Integer, ::Missing) = missing
(|)(::Missing, ::Missing) = missing
(|)(a::Missing, b::Bool) = ifelse(b, true, missing)
(|)(b::Bool, a::Missing) = ifelse(b, true, missing)
(|)(::Missing, ::Integer) = missing
(|)(::Integer, ::Missing) = missing
xor(::Missing, ::Missing) = missing
xor(a::Missing, b::Bool) = missing
xor(b::Bool, a::Missing) = missing
xor(::Missing, ::Integer) = missing
xor(::Integer, ::Missing) = missing
*(d::Missing, x::Union{AbstractString,AbstractChar}) = missing
*(d::Union{AbstractString,AbstractChar}, x::Missing) = missing
function float(A::AbstractArray{Union{T, Missing}}) where {T}
U = typeof(float(zero(T)))
convert(AbstractArray{Union{U, Missing}}, A)
end
float(A::AbstractArray{Missing}) = A
"""
skipmissing(itr)
Return an iterator over the elements in `itr` skipping [`missing`](@ref) values.
The returned object can be indexed using indices of `itr` if the latter is indexable.
Indices corresponding to missing values are not valid: they are skipped by [`keys`](@ref)
and [`eachindex`](@ref), and a `MissingException` is thrown when trying to use them.
Use [`collect`](@ref) to obtain an `Array` containing the non-`missing` values in
`itr`. Note that even if `itr` is a multidimensional array, the result will always
be a `Vector` since it is not possible to remove missings while preserving dimensions
of the input.
See also [`coalesce`](@ref), [`ismissing`](@ref), [`something`](@ref).
# Examples
```jldoctest
julia> x = skipmissing([1, missing, 2])
skipmissing(Union{Missing, Int64}[1, missing, 2])
julia> sum(x)
3
julia> x[1]
1
julia> x[2]
ERROR: MissingException: the value at index (2,) is missing
[...]
julia> argmax(x)
3
julia> collect(keys(x))
2-element Vector{Int64}:
1
3
julia> collect(skipmissing([1, missing, 2]))
2-element Vector{Int64}:
1
2
julia> collect(skipmissing([1 missing; 2 missing]))
2-element Vector{Int64}:
1
2
```
"""
skipmissing(itr) = SkipMissing(itr)
struct SkipMissing{T}
x::T
end
IteratorSize(::Type{<:SkipMissing}) = SizeUnknown()
IteratorEltype(::Type{SkipMissing{T}}) where {T} = IteratorEltype(T)
eltype(::Type{SkipMissing{T}}) where {T} = nonmissingtype(eltype(T))
function iterate(itr::SkipMissing, state...)
y = iterate(itr.x, state...)
y === nothing && return nothing
item, state = y
while item === missing
y = iterate(itr.x, state)
y === nothing && return nothing
item, state = y
end
item, state
end
IndexStyle(::Type{<:SkipMissing{T}}) where {T} = IndexStyle(T)
eachindex(itr::SkipMissing) =
Iterators.filter(i -> @inbounds(itr.x[i]) !== missing, eachindex(itr.x))
keys(itr::SkipMissing) =
Iterators.filter(i -> @inbounds(itr.x[i]) !== missing, keys(itr.x))
@propagate_inbounds function getindex(itr::SkipMissing, I...)
v = itr.x[I...]
v === missing && throw(MissingException("the value at index $I is missing"))
v
end
function show(io::IO, s::SkipMissing)
print(io, "skipmissing(")
show(io, s.x)
print(io, ')')
end
# Optimized mapreduce implementation
# The generic method is faster when !(eltype(A) >: Missing) since it does not need
# additional loops to identify the two first non-missing values of each block
mapreduce(f, op, itr::SkipMissing{<:AbstractArray}) =
_mapreduce(f, op, IndexStyle(itr.x), eltype(itr.x) >: Missing ? itr : itr.x)
function _mapreduce(f, op, ::IndexLinear, itr::SkipMissing{<:AbstractArray})
A = itr.x
ai = missing
inds = LinearIndices(A)
i = first(inds)
ilast = last(inds)
for outer i in i:ilast
@inbounds ai = A[i]
ai !== missing && break
end
ai === missing && return mapreduce_empty(f, op, eltype(itr))
a1::eltype(itr) = ai
i == typemax(typeof(i)) && return mapreduce_first(f, op, a1)
i += 1
ai = missing
for outer i in i:ilast
@inbounds ai = A[i]
ai !== missing && break
end
ai === missing && return mapreduce_first(f, op, a1)
# We know A contains at least two non-missing entries: the result cannot be nothing
something(mapreduce_impl(f, op, itr, first(inds), last(inds)))
end
_mapreduce(f, op, ::IndexCartesian, itr::SkipMissing) = mapfoldl(f, op, itr)
mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) =
mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op))
# Returns nothing when the input contains only missing values, and Some(x) otherwise
@noinline function mapreduce_impl(f, op, itr::SkipMissing{<:AbstractArray},
ifirst::Integer, ilast::Integer, blksize::Int)
A = itr.x
if ifirst > ilast
return nothing
elseif ifirst == ilast
@inbounds a1 = A[ifirst]
if a1 === missing
return nothing
else
return Some(mapreduce_first(f, op, a1))
end
elseif ilast - ifirst < blksize
# sequential portion
ai = missing
i = ifirst
for outer i in i:ilast
@inbounds ai = A[i]
ai !== missing && break
end
ai === missing && return nothing
a1 = ai::eltype(itr)
i == typemax(typeof(i)) && return Some(mapreduce_first(f, op, a1))
i += 1
ai = missing
for outer i in i:ilast
@inbounds ai = A[i]
ai !== missing && break
end
ai === missing && return Some(mapreduce_first(f, op, a1))
a2 = ai::eltype(itr)
i == typemax(typeof(i)) && return Some(op(f(a1), f(a2)))
i += 1
v = op(f(a1), f(a2))
@simd for i = i:ilast
@inbounds ai = A[i]
if ai !== missing
v = op(v, f(ai))
end
end
return Some(v)
else
# pairwise portion
imid = ifirst + (ilast - ifirst) >> 1
v1 = mapreduce_impl(f, op, itr, ifirst, imid, blksize)
v2 = mapreduce_impl(f, op, itr, imid+1, ilast, blksize)
if v1 === nothing && v2 === nothing
return nothing
elseif v1 === nothing
return v2
elseif v2 === nothing
return v1
else
return Some(op(something(v1), something(v2)))
end
end
end
"""
filter(f, itr::SkipMissing{<:AbstractArray})
Return a vector similar to the array wrapped by the given `SkipMissing` iterator
but with all missing elements and those for which `f` returns `false` removed.
!!! compat "Julia 1.2"
This method requires Julia 1.2 or later.
# Examples
```jldoctest
julia> x = [1 2; missing 4]
2×2 Matrix{Union{Missing, Int64}}:
1 2
missing 4
julia> filter(isodd, skipmissing(x))
1-element Vector{Int64}:
1
```
"""
function filter(f, itr::SkipMissing{<:AbstractArray})
y = similar(itr.x, eltype(itr), 0)
for xi in itr.x
if xi !== missing && f(xi)
push!(y, xi)
end
end
y
end
"""
coalesce(x, y...)
Return the first value in the arguments which is not equal to [`missing`](@ref),
if any. Otherwise return `missing`.
See also [`skipmissing`](@ref), [`something`](@ref).
# Examples
```jldoctest
julia> coalesce(missing, 1)
1
julia> coalesce(1, missing)
1
julia> coalesce(nothing, 1) # returns `nothing`
julia> coalesce(missing, missing)
missing
```
"""
function coalesce end
coalesce() = missing
coalesce(x::Missing, y...) = coalesce(y...)
coalesce(x::Any, y...) = x