forked from ppwwyyxx/cocoapi
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMaskApi.m
116 lines (106 loc) · 4.86 KB
/
MaskApi.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
classdef MaskApi
% Interface for manipulating masks stored in RLE format.
%
% RLE is a simple yet efficient format for storing binary masks. RLE
% first divides a vector (or vectorized image) into a series of piecewise
% constant regions and then for each piece simply stores the length of
% that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
% be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
% (note that the odd counts are always the numbers of zeros). Instead of
% storing the counts directly, additional compression is achieved with a
% variable bitrate representation based on a common scheme called LEB128.
%
% Compression is greatest given large piecewise constant regions.
% Specifically, the size of the RLE is proportional to the number of
% *boundaries* in M (or for an image the number of boundaries in the y
% direction). Assuming fairly simple shapes, the RLE representation is
% O(sqrt(n)) where n is number of pixels in the object. Hence space usage
% is substantially lower, especially for large simple objects (large n).
%
% Many common operations on masks can be computed directly using the RLE
% (without need for decoding). This includes computations such as area,
% union, intersection, etc. All of these operations are linear in the
% size of the RLE, in other words they are O(sqrt(n)) where n is the area
% of the object. Computing these operations on the original mask is O(n).
% Thus, using the RLE can result in substantial computational savings.
%
% The following API functions are defined:
% encode - Encode binary masks using RLE.
% decode - Decode binary masks encoded via RLE.
% merge - Compute union or intersection of encoded masks.
% iou - Compute intersection over union between masks.
% nms - Compute non-maximum suppression between ordered masks.
% area - Compute area of encoded masks.
% toBbox - Get bounding boxes surrounding encoded masks.
% frBbox - Convert bounding boxes to encoded masks.
% frPoly - Convert polygon to encoded mask.
%
% Usage:
% Rs = MaskApi.encode( masks )
% masks = MaskApi.decode( Rs )
% R = MaskApi.merge( Rs, [intersect=false] )
% o = MaskApi.iou( dt, gt, [iscrowd=false] )
% keep = MaskApi.nms( dt, thr )
% a = MaskApi.area( Rs )
% bbs = MaskApi.toBbox( Rs )
% Rs = MaskApi.frBbox( bbs, h, w )
% R = MaskApi.frPoly( poly, h, w )
%
% In the API the following formats are used:
% R,Rs - [struct] Run-length encoding of binary mask(s)
% masks - [hxwxn] Binary mask(s) (must have type uint8)
% bbs - [nx4] Bounding box(es) stored as [x y w h]
% poly - Polygon stored as {[x1 y1 x2 y2...],[x1 y1 ...],...}
% dt,gt - May be either bounding boxes or encoded masks
% Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
%
% Finally, a note about the intersection over union (iou) computation.
% The standard iou of a ground truth (gt) and detected (dt) object is
% iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
% For "crowd" regions, we use a modified criteria. If a gt object is
% marked as "iscrowd", we allow a dt to match any subregion of the gt.
% Choosing gt' in the crowd gt that best matches the dt can be done using
% gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
% iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
% For crowd gt regions we use this modified criteria above for the iou.
%
% To compile use the following (some precompiled binaries are included):
% mex('CFLAGS=\$CFLAGS -Wall -std=c99','-largeArrayDims',...
% 'private/maskApiMex.c','../common/maskApi.c',...
% '-I../common/','-outdir','private');
% Please do not contact us for help with compiling.
%
% Microsoft COCO Toolbox. version 2.0
% Data, paper, and tutorials available at: http://mscoco.org/
% Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
% Licensed under the Simplified BSD License [see coco/license.txt]
methods( Static )
function Rs = encode( masks )
Rs = maskApiMex( 'encode', masks );
end
function masks = decode( Rs )
masks = maskApiMex( 'decode', Rs );
end
function R = merge( Rs, varargin )
R = maskApiMex( 'merge', Rs, varargin{:} );
end
function o = iou( dt, gt, varargin )
o = maskApiMex( 'iou', dt', gt', varargin{:} );
end
function keep = nms( dt, thr )
keep = maskApiMex('nms',dt',thr);
end
function a = area( Rs )
a = maskApiMex( 'area', Rs );
end
function bbs = toBbox( Rs )
bbs = maskApiMex( 'toBbox', Rs )';
end
function Rs = frBbox( bbs, h, w )
Rs = maskApiMex( 'frBbox', bbs', h, w );
end
function R = frPoly( poly, h, w )
R = maskApiMex( 'frPoly', poly, h , w );
end
end
end