Skip to content

Commit

Permalink
Merge tag 'misc-habanalabs-next-2019-11-21' of git://people.freedeskt…
Browse files Browse the repository at this point in the history
…op.org/~gabbayo/linux into char-misc-next

Oded writes:

This tag contains the following changes for kernel 5.5:

- MMU code improvements that includes:
  - Distinguish between "normal" unmapping and unmapping that is done as
    part of the tear-down of a user process. This improves performance of
    unmapping during reset of the device.
  - Add future ASIC support in generic MMU code.

- Improve device reset code by adding more protection around accessing the
  device during the reset process.

- Add new H/W queue type for future ASIC support

- Add more information to be retrieved by users through INFO IOCTL:
  - clock rate
  - board name
  - reset counters

- Small bug fixes and minor improvements to code.

* tag 'misc-habanalabs-next-2019-11-21' of git://people.freedesktop.org/~gabbayo/linux: (31 commits)
  habanalabs: add more protection of device during reset
  habanalabs: flush EQ workers in hard reset
  habanalabs: make the reset code more consistent
  habanalabs: expose reset counters via existing INFO IOCTL
  habanalabs: make code more concise
  habanalabs: use defines for F/W files
  habanalabs: remove prints on successful device initialization
  habanalabs: remove unnecessary checks
  habanalabs: invalidate MMU cache only once
  habanalabs: skip VA block list update in reset flow
  habanalabs: optimize MMU unmap
  habanalabs: prevent read/write from/to the device during hard reset
  habanalabs: split MMU properties to PCI/DRAM
  habanalabs: re-factor MMU masks and documentation
  habanalabs: type specific MMU cache invalidation
  habanalabs: re-factor memory module code
  habanalabs: export uapi defines to user-space
  habanalabs: don't print error when queues are full
  habanalabs: increase max jobs number to 512
  habanalabs: set ETR as non-secured
  ...
  • Loading branch information
gregkh committed Nov 21, 2019
2 parents 599ea01 + 5feccdd commit b78cda7
Show file tree
Hide file tree
Showing 20 changed files with 1,159 additions and 542 deletions.
127 changes: 87 additions & 40 deletions drivers/misc/habanalabs/command_submission.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,18 @@ static void cs_put(struct hl_cs *cs)
kref_put(&cs->refcount, cs_do_release);
}

static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
{
/*
* Patched CB is created for external queues jobs, and for H/W queues
* jobs if the user CB was allocated by driver and MMU is disabled.
*/
return (job->queue_type == QUEUE_TYPE_EXT ||
(job->queue_type == QUEUE_TYPE_HW &&
job->is_kernel_allocated_cb &&
!hdev->mmu_enable));
}

/*
* cs_parser - parse the user command submission
*
Expand All @@ -91,11 +103,13 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
parser.patched_cb = NULL;
parser.user_cb = job->user_cb;
parser.user_cb_size = job->user_cb_size;
parser.ext_queue = job->ext_queue;
parser.queue_type = job->queue_type;
parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
job->patched_cb = NULL;

rc = hdev->asic_funcs->cs_parser(hdev, &parser);
if (job->ext_queue) {

if (is_cb_patched(hdev, job)) {
if (!rc) {
job->patched_cb = parser.patched_cb;
job->job_cb_size = parser.patched_cb_size;
Expand Down Expand Up @@ -124,7 +138,7 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
{
struct hl_cs *cs = job->cs;

if (job->ext_queue) {
if (is_cb_patched(hdev, job)) {
hl_userptr_delete_list(hdev, &job->userptr_list);

/*
Expand All @@ -140,6 +154,19 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
}
}

/* For H/W queue jobs, if a user CB was allocated by driver and MMU is
* enabled, the user CB isn't released in cs_parser() and thus should be
* released here.
*/
if (job->queue_type == QUEUE_TYPE_HW &&
job->is_kernel_allocated_cb && hdev->mmu_enable) {
spin_lock(&job->user_cb->lock);
job->user_cb->cs_cnt--;
spin_unlock(&job->user_cb->lock);

hl_cb_put(job->user_cb);
}

/*
* This is the only place where there can be multiple threads
* modifying the list at the same time
Expand All @@ -150,7 +177,8 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)

hl_debugfs_remove_job(hdev, job);

if (job->ext_queue)
if (job->queue_type == QUEUE_TYPE_EXT ||
job->queue_type == QUEUE_TYPE_HW)
cs_put(cs);

kfree(job);
Expand Down Expand Up @@ -387,39 +415,43 @@ static void job_wq_completion(struct work_struct *work)
free_job(hdev, job);
}

static struct hl_cb *validate_queue_index(struct hl_device *hdev,
struct hl_cb_mgr *cb_mgr,
struct hl_cs_chunk *chunk,
bool *ext_queue)
static int validate_queue_index(struct hl_device *hdev,
struct hl_cs_chunk *chunk,
enum hl_queue_type *queue_type,
bool *is_kernel_allocated_cb)
{
struct asic_fixed_properties *asic = &hdev->asic_prop;
struct hw_queue_properties *hw_queue_prop;
u32 cb_handle;
struct hl_cb *cb;

/* Assume external queue */
*ext_queue = true;

hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];

if ((chunk->queue_index >= HL_MAX_QUEUES) ||
(hw_queue_prop->type == QUEUE_TYPE_NA)) {
dev_err(hdev->dev, "Queue index %d is invalid\n",
chunk->queue_index);
return NULL;
return -EINVAL;
}

if (hw_queue_prop->driver_only) {
dev_err(hdev->dev,
"Queue index %d is restricted for the kernel driver\n",
chunk->queue_index);
return NULL;
} else if (hw_queue_prop->type == QUEUE_TYPE_INT) {
*ext_queue = false;
return (struct hl_cb *) (uintptr_t) chunk->cb_handle;
return -EINVAL;
}

/* Retrieve CB object */
*queue_type = hw_queue_prop->type;
*is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb;

return 0;
}

static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
struct hl_cb_mgr *cb_mgr,
struct hl_cs_chunk *chunk)
{
struct hl_cb *cb;
u32 cb_handle;

cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);

cb = hl_cb_get(hdev, cb_mgr, cb_handle);
Expand All @@ -444,20 +476,23 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev,
return NULL;
}

struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue)
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
{
struct hl_cs_job *job;

job = kzalloc(sizeof(*job), GFP_ATOMIC);
if (!job)
return NULL;

job->ext_queue = ext_queue;
job->queue_type = queue_type;
job->is_kernel_allocated_cb = is_kernel_allocated_cb;

if (job->ext_queue) {
if (is_cb_patched(hdev, job))
INIT_LIST_HEAD(&job->userptr_list);

if (job->queue_type == QUEUE_TYPE_EXT)
INIT_WORK(&job->finish_work, job_wq_completion);
}

return job;
}
Expand All @@ -470,7 +505,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
struct hl_cs_job *job;
struct hl_cs *cs;
struct hl_cb *cb;
bool ext_queue_present = false;
bool int_queues_only = true;
u32 size_to_copy;
int rc, i, parse_cnt;

Expand Down Expand Up @@ -514,23 +549,33 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
/* Validate ALL the CS chunks before submitting the CS */
for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) {
struct hl_cs_chunk *chunk = &cs_chunk_array[i];
bool ext_queue;
enum hl_queue_type queue_type;
bool is_kernel_allocated_cb;

rc = validate_queue_index(hdev, chunk, &queue_type,
&is_kernel_allocated_cb);
if (rc)
goto free_cs_object;

cb = validate_queue_index(hdev, &hpriv->cb_mgr, chunk,
&ext_queue);
if (ext_queue) {
ext_queue_present = true;
if (is_kernel_allocated_cb) {
cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
if (!cb) {
rc = -EINVAL;
goto free_cs_object;
}
} else {
cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
}

job = hl_cs_allocate_job(hdev, ext_queue);
if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW)
int_queues_only = false;

job = hl_cs_allocate_job(hdev, queue_type,
is_kernel_allocated_cb);
if (!job) {
dev_err(hdev->dev, "Failed to allocate a new job\n");
rc = -ENOMEM;
if (ext_queue)
if (is_kernel_allocated_cb)
goto release_cb;
else
goto free_cs_object;
Expand All @@ -540,7 +585,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
job->cs = cs;
job->user_cb = cb;
job->user_cb_size = chunk->cb_size;
if (job->ext_queue)
if (is_kernel_allocated_cb)
job->job_cb_size = cb->size;
else
job->job_cb_size = chunk->cb_size;
Expand All @@ -553,10 +598,11 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
/*
* Increment CS reference. When CS reference is 0, CS is
* done and can be signaled to user and free all its resources
* Only increment for JOB on external queues, because only
* for those JOBs we get completion
* Only increment for JOB on external or H/W queues, because
* only for those JOBs we get completion
*/
if (job->ext_queue)
if (job->queue_type == QUEUE_TYPE_EXT ||
job->queue_type == QUEUE_TYPE_HW)
cs_get(cs);

hl_debugfs_add_job(hdev, job);
Expand All @@ -570,19 +616,20 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
}
}

if (!ext_queue_present) {
if (int_queues_only) {
dev_err(hdev->dev,
"Reject CS %d.%llu because no external queues jobs\n",
"Reject CS %d.%llu because only internal queues jobs are present\n",
cs->ctx->asid, cs->sequence);
rc = -EINVAL;
goto free_cs_object;
}

rc = hl_hw_queue_schedule_cs(cs);
if (rc) {
dev_err(hdev->dev,
"Failed to submit CS %d.%llu to H/W queues, error %d\n",
cs->ctx->asid, cs->sequence, rc);
if (rc != -EAGAIN)
dev_err(hdev->dev,
"Failed to submit CS %d.%llu to H/W queues, error %d\n",
cs->ctx->asid, cs->sequence, rc);
goto free_cs_object;
}

Expand Down
Loading

0 comments on commit b78cda7

Please sign in to comment.