forked from iree-org/iree
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcommand_buffer.h
383 lines (339 loc) · 15.6 KB
/
command_buffer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_MLIR_EDGE_IREE_HAL_COMMAND_BUFFER_H_
#define THIRD_PARTY_MLIR_EDGE_IREE_HAL_COMMAND_BUFFER_H_
#include <cstdint>
#include "third_party/mlir_edge/iree/base/bitfield.h"
#include "third_party/mlir_edge/iree/base/shape.h"
#include "third_party/mlir_edge/iree/base/status.h"
#include "third_party/mlir_edge/iree/hal/allocator.h"
#include "third_party/mlir_edge/iree/hal/buffer.h"
#include "third_party/mlir_edge/iree/hal/buffer_view.h"
#include "third_party/mlir_edge/iree/hal/event.h"
#include "third_party/mlir_edge/iree/hal/executable.h"
#include "third_party/mlir_edge/iree/hal/resource.h"
namespace iree {
namespace hal {
// A bitfield specifying the mode of operation for a command buffer.
enum class CommandBufferMode : uint32_t {
// Command buffer will be submitted once and never used again.
// This may enable in-place patching of command buffers that reduce overhead
// when it's known that command buffers will not be reused.
kOneShot = 1 << 0,
};
IREE_BITFIELD(CommandBufferMode);
using CommandBufferModeBitfield = CommandBufferMode;
std::string CommandBufferModeString(CommandBufferModeBitfield mode);
// A bitfield specifying the category of commands in a command queue.
enum class CommandCategory : uint32_t {
// Command is considered a transfer operation (memcpy, etc).
kTransfer = 1 << 0,
// Command is considered a dispatch operation (dispatch/execute).
kDispatch = 1 << 1,
};
IREE_BITFIELD(CommandCategory);
using CommandCategoryBitfield = CommandCategory;
std::string CommandCategoryString(CommandCategoryBitfield categories);
// Bitfield specifying which execution stage a brarrier should start/end at.
//
// Maps to VkPipelineStageFlagBits.
enum class ExecutionStage : uint32_t {
// Top of the pipeline when commands are initially issued by the device.
kCommandIssue = 1 << 0,
// Stage of the pipeline when dispatch parameter data is consumed.
kCommandProcess = 1 << 1,
// Stage where dispatch commands execute.
kDispatch = 1 << 2,
// Stage where transfer (copy/clear/fill/etc) commands execute.
kTransfer = 1 << 3,
// Final stage in the pipeline when commands are retired on the device.
kCommandRetire = 1 << 4,
// Pseudo-stage for read/writes by the host. Not executed on device.
kHost = 1 << 5,
};
IREE_BITFIELD(ExecutionStage);
using ExecutionStageBitfield = ExecutionStage;
// Bitfield specifying which scopes will access memory and how.
//
// Maps to VkAccessFlagBits.
enum class AccessScope : uint32_t {
// Read access to indirect command data as part of an indirect dispatch.
kIndirectCommandRead = 1 << 0,
// Constant uniform buffer reads by the device.
kConstantRead = 1 << 1,
// Storage buffer reads by dispatch commands.
kDispatchRead = 1 << 2,
// Storage buffer writes by dispatch commands.
kDispatchWrite = 1 << 3,
// Source of a transfer operation.
kTransferRead = 1 << 4,
// Target of a transfer operation.
kTransferWrite = 1 << 5,
// Read operation by the host through mapped memory.
kHostRead = 1 << 6,
// Write operation by the host through mapped memory.
kHostWrite = 1 << 7,
// External/non-specific read.
kMemoryRead = 1 << 8,
// External/non-specific write.
kMemoryWrite = 1 << 9,
};
IREE_BITFIELD(AccessScope);
using AccessScopeBitfield = AccessScope;
// Defines a global memory barrier.
// These are cheaper to encode than buffer-specific barriers but may cause
// stalls and bubbles in device pipelines if applied too broadly. Prefer them
// over equivalently large sets of buffer-specific barriers (such as when
// completely changing execution contexts).
//
// Maps to VkMemoryBarrier.
struct MemoryBarrier {
// All access scopes prior-to the barrier (inclusive).
AccessScopeBitfield source_scope;
// All access scopes following the barrier (inclusive).
AccessScopeBitfield target_scope;
};
// Defines a memory barrier that applies to a range of a specific buffer.
// Use of these (vs. global memory barriers) provides fine-grained execution
// ordering to device command processors and allows for more aggressive
// reordering.
//
// Maps to VkBufferMemoryBarrier.
struct BufferBarrier {
// All access scopes prior-to the barrier (inclusive).
AccessScopeBitfield source_scope;
// All access scopes following the barrier (inclusive).
AccessScopeBitfield target_scope;
// Buffer the barrier is restricted to.
// The barrier will apply to the entire physical device allocation.
Buffer* buffer = nullptr;
// Relative offset/length within |buffer| (which may itself be mapped into the
// device allocation at an offset).
device_size_t offset = 0;
device_size_t length = kWholeBuffer;
};
// Represents a binding to a buffer with a set of attributes.
// This may be used by drivers to validate alignment.
struct BufferBinding {
// Access rights of the buffer contents by the executable.
MemoryAccessBitfield access = MemoryAccess::kAll;
// The buffer this binding references.
// The buffer is not retained by the binding and must be kept alive externally
// for the duration it is in use by the queue.
Buffer* buffer = nullptr;
// Shape of the buffer contents.
Shape shape;
// Size of each element within the buffer, in bytes.
int8_t element_size = 0;
BufferBinding() = default;
BufferBinding(MemoryAccessBitfield access, Buffer* buffer)
: access(access), buffer(buffer) {}
BufferBinding(MemoryAccessBitfield access, Buffer* buffer, Shape shape,
int8_t element_size)
: access(access),
buffer(buffer),
shape(shape),
element_size(element_size) {}
BufferBinding(MemoryAccessBitfield access, const BufferView& buffer_view)
: access(access),
buffer(buffer_view.buffer.get()),
shape(buffer_view.shape),
element_size(buffer_view.element_size) {}
};
// Wraps parameters for a Dispatch request.
struct DispatchRequest {
// Executable prepared for use on the device.
// The executable must remain alive until all in-flight dispatch requests
// that use it have completed.
Executable* executable = nullptr;
// Executable entry point ordinal.
int entry_point = 0;
// TODO(benvanik): predication.
// Static workload parameters defining the X, Y, and Z workgroup counts.
std::array<int32_t, 3> workload;
// An optional buffer containing the dynamic workload to dispatch.
// The contents need not be available at the time of recording but must be
// made visible prior to execution of the dispatch command.
//
// Buffer contents are expected to be 3 int32 values defining the X, Y, and Z
// workgroup counts.
//
// The buffer must have been allocated with BufferUsage::kDispatch and be
// of MemoryType::kDeviceVisible.
Buffer* workload_buffer = nullptr;
// A list of buffers that contain the execution inputs/outputs.
// Order is dependent on executable arg layout.
//
// Buffers must have been allocated with BufferUsage::kDispatch and be
// of MemoryType::kDeviceVisible.
absl::Span<const BufferBinding> bindings;
// TODO(benvanik): push-constant equivalent (uniforms, etc).
};
// Asynchronous command buffer recording interface.
// Commands are recorded by the implementation for later submission to command
// queues.
//
// Buffers and synchronization objects referenced must remain valid and not be
// modified or read while there are commands in-flight. The usual flow is to
// populate input buffers, Dispatch using those buffers, wait on a Fence until
// the buffers are guaranteed to no longer be in use, and then reuse or release
// the buffers.
//
// Errors that can be recognized when operations are enqueued will be returned
// immediately, such as invalid argument errors. Errors that can only be
// determined at execution time will be returned on fences. Once a failure
// occurs the device queue will enter an error state that invalidates all
// operations on the device queue (as ordering is not strict and any may still
// be in-flight). In this case the user of the device queue should treat all
// in-flight operations as cancelled and fully reset themselves. Other device
// queues that may be waiting on events from the device queue will also enter
// error states. Only once a user has acknowledged and cleared the error state
// with a Reset the queue will become usable, and otherwise all operations will
// return errors.
//
// Command buffers are thread-compatible. Use multiple command buffers if trying
// to record commands from multiple threads. Command buffers must not be mutated
// between when they have are submitted for execution on a queue and when the
// fence fires indicating the completion of their execution.
class CommandBuffer : public Resource {
public:
virtual CommandBuffer* impl() { return this; }
// Device allocator that commands encoded into the buffer share compatibility
// with.
Allocator* allocator() const { return allocator_; }
// Command buffer operation mode.
CommandBufferModeBitfield mode() const { return mode_; }
// Command categories that may be recorded into the buffer.
CommandCategoryBitfield command_categories() const {
return command_categories_;
}
// True if the command buffer is between a Begin/End recording block.
virtual bool is_recording() const = 0;
// Resets and begins recording into the command buffer, clearing all
// previously recorded contents.
// The command buffer must not be in-flight.
virtual Status Begin() = 0;
// Ends recording into the command buffer.
// This must be called prior to submitting the command buffer for execution.
virtual Status End() = 0;
// TODO(benvanik): annotations for debugging and tracing:
// enter/exit
// stack frame manipulation
// explicit timers? or profiling buffer?
// TODO(b/138719910): cross-queue and external acquire/release.
// virtual Status AcquireBuffer() = 0;
// virtual Status ReleaseBuffer() = 0;
// Defines a memory dependency between commands recorded before and after the
// barrier. One or more memory or buffer barriers can be specified to indicate
// between which stages or buffers the dependencies exist.
virtual Status ExecutionBarrier(
ExecutionStageBitfield source_stage_mask,
ExecutionStageBitfield target_stage_mask,
absl::Span<const MemoryBarrier> memory_barriers,
absl::Span<const BufferBarrier> buffer_barriers) = 0;
// Sets an event to the signaled state.
// |source_stage_mask| specifies when the event is signaled.
//
// Events are only valid within a single command buffer. Events can only be
// used on non-transfer queues.
virtual Status SignalEvent(Event* event,
ExecutionStageBitfield source_stage_mask) = 0;
// Resets an event to the non-signaled state.
// |source_stage_mask| specifies when the event is unsignaled.
//
// Events are only valid within a single command buffer. Events can only be
// used on non-transfer queues.
virtual Status ResetEvent(Event* event,
ExecutionStageBitfield source_stage_mask) = 0;
// Waits for one or more events to be signaled and defines a memory dependency
// between the synchronization scope of the signal operations and the commands
// following the wait.
//
// |source_stage_mask| must include ExecutionStage::kHost for Event::Signal to
// be visibile.
//
// Events are only valid within a single command buffer. Events remain
// signaled even after waiting and must be reset to be reused. Events can only
// be used on non-transfer queues.
virtual Status WaitEvents(
absl::Span<Event*> events, ExecutionStageBitfield source_stage_mask,
ExecutionStageBitfield target_stage_mask,
absl::Span<const MemoryBarrier> memory_barriers,
absl::Span<const BufferBarrier> buffer_barriers) = 0;
// Fills the target buffer with the given repeating value.
// Expects that value_length is one of 1, 2, or 4 and that the offset and
// length are aligned to the natural alignment of the value.
// The target buffer must be compatible with the devices owned by this
// device queue and be allocated with BufferUsage::kTransfer.
virtual Status FillBuffer(Buffer* target_buffer, device_size_t target_offset,
device_size_t length, const void* pattern,
size_t pattern_length) = 0;
// Hints to the device queue that the given buffer will not be used again.
// After encoding a discard the buffer contents will be considered undefined.
// This is because the discard may be used to elide write backs to host memory
// or aggressively reuse the allocation for other purposes.
//
// For buffers allocated with MemoryType::kTransient this may allow
// the device queue to reclaim the memory used by the buffer earlier than
// otherwise possible.
virtual Status DiscardBuffer(Buffer* buffer) = 0;
// Updates a range of the given target buffer from the source host memory.
// The source host memory is copied immediately into the command buffer and
// occupies command buffer space. It is strongly recommended that large buffer
// updates are performed via CopyBuffer where there is the possibility of a
// zero-copy path.
// The |source_buffer| may be releaed by the caller immediately after this
// call returns.
// The |target_buffer| must be compatible with the devices owned by this
// device queue and be allocated with BufferUsage::kTransfer.
virtual Status UpdateBuffer(const void* source_buffer,
device_size_t source_offset,
Buffer* target_buffer,
device_size_t target_offset,
device_size_t length) = 0;
// Copies a range of one buffer to another.
// Both buffers must be compatible with the devices owned by this device
// queue and be allocated with BufferUsage::kTransfer. Though the source and
// target buffer may be the same the ranges must not overlap (as with memcpy).
//
// This can be used to perform device->host, host->device, and device->device
// copies.
virtual Status CopyBuffer(Buffer* source_buffer, device_size_t source_offset,
Buffer* target_buffer, device_size_t target_offset,
device_size_t length) = 0;
// Dispatches an execution request.
// The request may execute overlapped with any other transfer operation or
// dispatch made within the same barrier-defined sequence.
//
// The executable specified must be registered for use with the device driver
// owning this queue. It must not be unregistered until all requests that use
// it have completed.
//
// Fails if the queue does not support dispatch operations (as indicated by
// can_dispatch).
virtual Status Dispatch(const DispatchRequest& dispatch_request) = 0;
protected:
CommandBuffer(Allocator* allocator, CommandBufferModeBitfield mode,
CommandCategoryBitfield command_categories)
: allocator_(allocator),
mode_(mode),
command_categories_(command_categories) {}
private:
Allocator* const allocator_;
const CommandBufferModeBitfield mode_;
const CommandCategoryBitfield command_categories_;
};
} // namespace hal
} // namespace iree
#endif // THIRD_PARTY_MLIR_EDGE_IREE_HAL_COMMAND_BUFFER_H_