mirror of
https://github.com/followmsi/android_kernel_google_msm.git
synced 2024-11-06 23:17:41 +00:00
msm: kgsl: Support user specified timeouts in adreno_waittimestamp
We have always allowed the user to specify a timeout in waittimestamp but we never obeyed it. Reorgaize the function to be less convoluted and support a user specified timeout (or spin forever if 0 is passed). Change-Id: Ic0dedbadbc368336d05b14d66829205631046515 Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org> Signed-off-by: Rajeev Kulkarni <krajeev@codeaurora.org>
This commit is contained in:
parent
0449252ee1
commit
dd57e44819
1 changed files with 205 additions and 157 deletions
|
@ -2281,177 +2281,225 @@ unsigned int adreno_hang_detect(struct kgsl_device *device,
|
|||
return hang_detected;
|
||||
}
|
||||
|
||||
|
||||
/* MUST be called with the device mutex held */
|
||||
static int adreno_waittimestamp(struct kgsl_device *device,
|
||||
struct kgsl_context *context,
|
||||
unsigned int timestamp,
|
||||
unsigned int msecs)
|
||||
/**
|
||||
* adreno_handle_hang - Process a hang detected in adreno_waittimestamp
|
||||
* @device - pointer to a KGSL device structure
|
||||
* @context - pointer to the active KGSL context
|
||||
* @timestamp - the timestamp that the process was waiting for
|
||||
*
|
||||
* Process a possible GPU hang and try to recover from it cleanly
|
||||
*/
|
||||
static int adreno_handle_hang(struct kgsl_device *device,
|
||||
struct kgsl_context *context, unsigned int timestamp)
|
||||
{
|
||||
long status = 0;
|
||||
uint io = 1;
|
||||
static uint io_cnt;
|
||||
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
|
||||
struct kgsl_pwrctrl *pwr = &device->pwrctrl;
|
||||
struct adreno_context *adreno_ctx = context->devctxt;
|
||||
int retries = 0;
|
||||
unsigned int ts_issued;
|
||||
unsigned int context_id = _get_context_id(context);
|
||||
unsigned int time_elapsed = 0;
|
||||
unsigned int prev_reg_val[hang_detect_regs_count];
|
||||
unsigned int wait;
|
||||
unsigned int retry_ts_cmp = 0;
|
||||
unsigned int retry_ts_cmp_msecs = KGSL_SYNCOBJ_SERVER_TIMEOUT;
|
||||
unsigned int ts_issued;
|
||||
|
||||
memset(prev_reg_val, 0, sizeof(prev_reg_val));
|
||||
/* Do one last check to see if we somehow made it through */
|
||||
if (kgsl_check_timestamp(device, context, timestamp))
|
||||
return 0;
|
||||
|
||||
ts_issued = adreno_dev->ringbuffer.timestamp[context_id];
|
||||
|
||||
/* Don't wait forever, set a max value for now */
|
||||
if (msecs == KGSL_TIMEOUT_DEFAULT)
|
||||
msecs = adreno_dev->wait_timeout;
|
||||
|
||||
/*
|
||||
* With user generated ts, if this check fails perform this check
|
||||
* again after 'retry_ts_cmp_msecs' milliseconds.
|
||||
*/
|
||||
if (timestamp_cmp(timestamp, ts_issued) > 0) {
|
||||
if (!(adreno_ctx->flags & CTXT_FLAGS_USER_GENERATED_TS)) {
|
||||
if (context && !context->wait_on_invalid_ts) {
|
||||
KGSL_DRV_ERR(device,
|
||||
"Cannot wait for invalid ts <%d:0x%x>, "
|
||||
"last issued ts <%d:0x%x>\n",
|
||||
context_id, timestamp, context_id, ts_issued);
|
||||
/*
|
||||
* Prevent the above message from spamming the
|
||||
* kernel logs and causing a watchdog
|
||||
*/
|
||||
context->wait_on_invalid_ts = true;
|
||||
}
|
||||
status = -EINVAL;
|
||||
goto done;
|
||||
} else
|
||||
retry_ts_cmp = 1;
|
||||
} else if (context && context->wait_on_invalid_ts) {
|
||||
/* Once we wait for a valid ts reset the invalid wait flag */
|
||||
context->wait_on_invalid_ts = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make the first timeout interval 100 msecs and then try to kick the
|
||||
* wptr again. This helps to ensure the wptr is updated properly. If
|
||||
* the requested timeout is less than 100 msecs, then wait 20msecs which
|
||||
* is the minimum amount of time we can safely wait at 100HZ
|
||||
*/
|
||||
|
||||
if (msecs == 0 || msecs >= 100)
|
||||
wait = 100;
|
||||
else
|
||||
wait = 20;
|
||||
|
||||
do {
|
||||
/*
|
||||
* If the context ID is invalid, we are in a race with
|
||||
* the context being destroyed by userspace so bail.
|
||||
*/
|
||||
if (context_id == KGSL_CONTEXT_INVALID) {
|
||||
KGSL_DRV_WARN(device, "context was detached");
|
||||
status = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
if (kgsl_check_timestamp(device, context, timestamp)) {
|
||||
/* if the timestamp happens while we're not
|
||||
* waiting, there's a chance that an interrupt
|
||||
* will not be generated and thus the timestamp
|
||||
* work needs to be queued.
|
||||
*/
|
||||
queue_work(device->work_queue, &device->ts_expired_ws);
|
||||
status = 0;
|
||||
goto done;
|
||||
}
|
||||
|
||||
io_cnt = (io_cnt + 1) % 100;
|
||||
if (io_cnt <
|
||||
pwr->pwrlevels[pwr->active_pwrlevel].io_fraction)
|
||||
io = 0;
|
||||
|
||||
if ((retries > 0) &&
|
||||
(adreno_hang_detect(device, prev_reg_val)))
|
||||
goto hang_dump;
|
||||
|
||||
mutex_unlock(&device->mutex);
|
||||
/* We need to make sure that the process is
|
||||
* placed in wait-q before its condition is called
|
||||
*/
|
||||
status = kgsl_wait_event_interruptible_timeout(
|
||||
device->wait_queue,
|
||||
kgsl_check_interrupt_timestamp(device,
|
||||
context, timestamp),
|
||||
msecs_to_jiffies(wait), io);
|
||||
|
||||
mutex_lock(&device->mutex);
|
||||
|
||||
if (status > 0) {
|
||||
/*completed before the wait finished */
|
||||
status = 0;
|
||||
goto done;
|
||||
} else if (status < 0) {
|
||||
/*an error occurred*/
|
||||
goto done;
|
||||
}
|
||||
/*this wait timed out*/
|
||||
|
||||
time_elapsed += wait;
|
||||
wait = KGSL_TIMEOUT_PART;
|
||||
|
||||
if (!retry_ts_cmp)
|
||||
retries++;
|
||||
else if (time_elapsed >= retry_ts_cmp_msecs) {
|
||||
ts_issued =
|
||||
adreno_dev->ringbuffer.timestamp[context_id];
|
||||
if (timestamp_cmp(timestamp, ts_issued) > 0) {
|
||||
if (context && !context->wait_on_invalid_ts) {
|
||||
KGSL_DRV_ERR(device,
|
||||
"Cannot wait for user-generated ts <%d:0x%x>, "
|
||||
"not submitted within server timeout period. "
|
||||
"last issued ts <%d:0x%x>\n",
|
||||
context_id, timestamp, context_id,
|
||||
ts_issued);
|
||||
context->wait_on_invalid_ts = true;
|
||||
}
|
||||
status = -EINVAL;
|
||||
goto done;
|
||||
} else if (context && context->wait_on_invalid_ts) {
|
||||
context->wait_on_invalid_ts = false;
|
||||
}
|
||||
retry_ts_cmp = 0;
|
||||
}
|
||||
|
||||
} while (!msecs || time_elapsed < msecs);
|
||||
|
||||
hang_dump:
|
||||
/*
|
||||
* Check if timestamp has retired here because we may have hit
|
||||
* recovery which can take some time and cause waiting threads
|
||||
* to timeout
|
||||
*/
|
||||
if (kgsl_check_timestamp(device, context, timestamp))
|
||||
goto done;
|
||||
status = -ETIMEDOUT;
|
||||
KGSL_DRV_ERR(device,
|
||||
"Device hang detected while waiting for timestamp: "
|
||||
"<%d:0x%x>, last submitted timestamp: <%d:0x%x>, "
|
||||
"wptr: 0x%x\n",
|
||||
context_id, timestamp, context_id, ts_issued,
|
||||
adreno_dev->ringbuffer.wptr);
|
||||
if (!adreno_dump_and_recover(device)) {
|
||||
/* The timestamp that this process wanted
|
||||
* to wait on may be invalid or expired now
|
||||
* after successful recovery */
|
||||
status = 0;
|
||||
|
||||
/* Return 0 after a successful recovery */
|
||||
if (!adreno_dump_and_recover(device))
|
||||
return 0;
|
||||
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
|
||||
static int _check_pending_timestamp(struct kgsl_device *device,
|
||||
struct kgsl_context *context, unsigned int timestamp)
|
||||
{
|
||||
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
|
||||
unsigned int context_id = _get_context_id(context);
|
||||
unsigned int ts_issued;
|
||||
|
||||
if (context_id == KGSL_CONTEXT_INVALID)
|
||||
return -EINVAL;
|
||||
|
||||
ts_issued = adreno_dev->ringbuffer.timestamp[context_id];
|
||||
|
||||
if (timestamp_cmp(timestamp, ts_issued) <= 0)
|
||||
return 0;
|
||||
|
||||
if (context && !context->wait_on_invalid_ts) {
|
||||
KGSL_DRV_ERR(device, "Cannot wait for invalid ts <%d:0x%x>, last issued ts <%d:0x%x>\n",
|
||||
context_id, timestamp, context_id, ts_issued);
|
||||
|
||||
/* Only print this message once */
|
||||
context->wait_on_invalid_ts = true;
|
||||
}
|
||||
done:
|
||||
return (int)status;
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/**
|
||||
* adreno_waittimestamp - sleep while waiting for the specified timestamp
|
||||
* @device - pointer to a KGSL device structure
|
||||
* @context - pointer to the active kgsl context
|
||||
* @timestamp - GPU timestamp to wait for
|
||||
* @msecs - amount of time to wait (in milliseconds)
|
||||
*
|
||||
* Wait 'msecs' milliseconds for the specified timestamp to expire. Wake up
|
||||
* every KGSL_TIMEOUT_PART milliseconds to check for a device hang and process
|
||||
* one if it happened. Otherwise, spend most of our time in an interruptible
|
||||
* wait for the timestamp interrupt to be processed. This function must be
|
||||
* called with the mutex already held.
|
||||
*/
|
||||
static int adreno_waittimestamp(struct kgsl_device *device,
|
||||
struct kgsl_context *context,
|
||||
unsigned int timestamp,
|
||||
unsigned int msecs)
|
||||
{
|
||||
static unsigned int io_cnt;
|
||||
struct adreno_context *adreno_ctx = context ? context->devctxt : NULL;
|
||||
struct kgsl_pwrctrl *pwr = &device->pwrctrl;
|
||||
unsigned int context_id = _get_context_id(context);
|
||||
unsigned int prev_reg_val[hang_detect_regs_count];
|
||||
unsigned int time_elapsed = 0;
|
||||
unsigned int wait;
|
||||
int ts_compare = 1;
|
||||
int io, ret = -ETIMEDOUT;
|
||||
|
||||
/* Get out early if the context has already been destroyed */
|
||||
|
||||
if (context_id == KGSL_CONTEXT_INVALID) {
|
||||
KGSL_DRV_WARN(device, "context was detached");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if the requested timestamp is "newer" then the last
|
||||
* timestamp issued. If it is complain once and return error. Only
|
||||
* print the message once per context so that badly behaving
|
||||
* applications don't spam the logs
|
||||
*/
|
||||
|
||||
if (adreno_ctx && !(adreno_ctx->flags & CTXT_FLAGS_USER_GENERATED_TS)) {
|
||||
if (_check_pending_timestamp(device, context, timestamp))
|
||||
return -EINVAL;
|
||||
|
||||
/* Reset the invalid timestamp flag on a valid wait */
|
||||
context->wait_on_invalid_ts = false;
|
||||
}
|
||||
|
||||
|
||||
/* Clear the registers used for hang detection */
|
||||
memset(prev_reg_val, 0, sizeof(prev_reg_val));
|
||||
|
||||
/*
|
||||
* On the first time through the loop only wait 100ms.
|
||||
* this gives enough time for the engine to start moving and oddly
|
||||
* provides better hang detection results than just going the full
|
||||
* KGSL_TIMEOUT_PART right off the bat. The exception to this rule
|
||||
* is if msecs happens to be < 100ms then just use the full timeout
|
||||
*/
|
||||
|
||||
wait = 100;
|
||||
|
||||
do {
|
||||
long status;
|
||||
|
||||
if (wait > (msecs - time_elapsed))
|
||||
wait = msecs - time_elapsed;
|
||||
|
||||
/*
|
||||
* if the timestamp happens while we're not
|
||||
* waiting, there's a chance that an interrupt
|
||||
* will not be generated and thus the timestamp
|
||||
* work needs to be queued.
|
||||
*/
|
||||
|
||||
if (kgsl_check_timestamp(device, context, timestamp)) {
|
||||
queue_work(device->work_queue, &device->ts_expired_ws);
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Check to see if the GPU is hung */
|
||||
if (adreno_hang_detect(device, prev_reg_val)) {
|
||||
ret = adreno_handle_hang(device, context, timestamp);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* For proper power accounting sometimes we need to call
|
||||
* io_wait_interruptible_timeout and sometimes we need to call
|
||||
* plain old wait_interruptible_timeout. We call the regular
|
||||
* timeout N times out of 100, where N is a number specified by
|
||||
* the current power level
|
||||
*/
|
||||
|
||||
io_cnt = (io_cnt + 1) % 100;
|
||||
io = (io_cnt < pwr->pwrlevels[pwr->active_pwrlevel].io_fraction)
|
||||
? 0 : 1;
|
||||
|
||||
mutex_unlock(&device->mutex);
|
||||
|
||||
/* Wait for a timestamp event */
|
||||
status = kgsl_wait_event_interruptible_timeout(
|
||||
device->wait_queue,
|
||||
kgsl_check_interrupt_timestamp(device, context,
|
||||
timestamp), msecs_to_jiffies(wait), io);
|
||||
|
||||
mutex_lock(&device->mutex);
|
||||
|
||||
/*
|
||||
* If status is non zero then either the condition was satisfied
|
||||
* or there was an error. In either event, this is the end of
|
||||
* the line for us
|
||||
*/
|
||||
|
||||
if (status != 0) {
|
||||
ret = (status > 0) ? 0 : (int) status;
|
||||
break;
|
||||
}
|
||||
|
||||
time_elapsed += wait;
|
||||
|
||||
/* If user specified timestamps are being used, wait at least
|
||||
* KGSL_SYNCOBJ_SERVER_TIMEOUT msecs for the user driver to
|
||||
* issue a IB for a timestamp before checking to see if the
|
||||
* current timestamp we are waiting for is valid or not
|
||||
*/
|
||||
|
||||
if (ts_compare && (adreno_ctx &&
|
||||
(adreno_ctx->flags & CTXT_FLAGS_USER_GENERATED_TS))) {
|
||||
if (time_elapsed > KGSL_SYNCOBJ_SERVER_TIMEOUT) {
|
||||
ret = _check_pending_timestamp(device, context,
|
||||
timestamp);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
/* Don't do this check again */
|
||||
ts_compare = 0;
|
||||
|
||||
/*
|
||||
* Reset the invalid timestamp flag on a valid
|
||||
* wait
|
||||
*/
|
||||
context->wait_on_invalid_ts = false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* all subsequent trips through the loop wait the full
|
||||
* KGSL_TIMEOUT_PART interval
|
||||
*/
|
||||
wait = KGSL_TIMEOUT_PART;
|
||||
|
||||
} while (!msecs || time_elapsed < msecs);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static unsigned int adreno_readtimestamp(struct kgsl_device *device,
|
||||
|
|
Loading…
Reference in a new issue