mirror of
https://github.com/followmsi/android_kernel_google_msm.git
synced 2024-11-06 23:17:41 +00:00
mmc: msm_sdcc: fix race conditions in runtime PM
commit 386ad800716b6660d40c677b57ee4bf8e4430329 msm-3.4 What is the race condition? (NOTE: RPM stands for Runtime Power Management) 1. SDCC is in RPM_SUSPENEDED state and SDCC platform suspend gets triggered and then system goes into suspend. 2. During platform resume, SDCC platform resume is triggered which blindly sets the pending_resume flag in SDCC host structure. 3. If new MMC transfer request comes then MMC block driver calls mmc_claim_host() which internally calls msmsdcc_enable(). 4. msmsdcc_enable() checks for following 3 conditions to be true: 1. host->sdcc_suspended == true 2. host->pending_resume == true 3. pm_runtime_suspended() == false But as SDCC state is RPM_SUSPENDED, 3rd condition is not be satisfied so msmsdcc_enable() don't clear the host->pending_resume flag and simply calls pm_runtime_get_sync() to resume the SDCC. Once SDCC is resumed, host->sdcc_suspended = false, runtime_status = RPM_ACTIVE but host->pending_resume is still set. 5. Now after RPM idle timeout, SDCC runtime suspend gets triggered which calls SDCC driver's runtime suspend callback (msmsdcc_runtime_suspend). Note that before executing the RPM suspend calllback, SDCC RPM status is set to RPM_SUSPENDING. 6. As SDCC driver's runtime suspend callback gets executed, it sets the host->sdcc_suspended to true. But now before the RPM framework sets the SDCC RPM status to RPM_SUSPENDED, on other active CPU core, new MMC transfer reques comes in which would first call msmsdcc_enable() and all of the 3 conditions below is true: 1. host->sdcc_suspended == true 2. host->pending_resume == true 3. pm_runtime_suspended() == false (RPM status is RPM_SUSPENDING) As these conditions are satisfied, msmsdcc_enable() does not call pm_runtime_get_sync(), instead just calls pm_runtime_get_noresume() and msmsdcc_runtime_resume(). This means even after execution of msmsdcc_enable(), SDCC RPM status is either RPM_SUSPENDING or RPM_SUSPENDED. 7. RPM suspend framework on 1st core sets the SDCC RPM status to RPM_SUSPENDED once SDCC runtime suspend callback returns. 8. Now once MMC transfer request is completed on other core, it will call msmsdcc_disable(). This function calls pm_runtime_put_sync() which returns -EAGAIN error as RPM status is already RPM_SUSPENED. This error gets returned to MMC layer so MMC layer thinks that SDCC is still enabled and skips call to msmsdcc_enable() until msmsdcc_disable() succeeds. 8. Note when msmsdcc_disable() returned error, RPM usage_counter was set to 0 so next call to msmsdcc_disable() decrements RPM usage_counter to -1 (remember msmsdcc_enable() was skipped). 9. Now new MMC request comes in and it will first call msmsdcc_enable() which calls pm_runtime_get_sync() and it sets the usage_counter to 0 and blindly resumes the SDCC as it was already suspended. After this RPM status will be RPM_ACTIVE. 10. Once MMC request is processed, it will call smsdcc_disable() which calls pm_runtime_put_sync() and it decrements RPM usage counter to -1 and skips scheduling runtime suspend callback as RPM usage counter is not 0. RPM status remains in RPM_ACTIVE. 11. Now onwards for every new MMC transfer requests, 9 and 10 repeats and SDCC always stays in RPM_ACTIVE state forever. How is this race condition fixed? Above race is created because host->pending_resume is remaining sticky. Following changes are done to ensure that pending_resume gets set and cleared at appropriate places: 1. In SDCC platform resume callback, set the host->pending_resume flag only if RPM status is RPM_ACTIVE. 2. Clear the pending_resume flag once msmsdcc_runtime_resume() is completed. 3. In msmsdcc_enable() function, if pending_resume flag is set skip calling pm_runtime_get_sync() instead directly call msmsdcc_runtime_resume() because RPM status is RPM_ACTIVE. In addition, this patch adds WARN() messages in case of failures in msmsdcc_enable() and msmsdcc_disable() functions to capture more details in error conditions. CRs-Fixed: 373338 Change-Id: I50d1bd63480c668dd2b83f01567f912661f0c606 Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org>
This commit is contained in:
parent
21cfe24ba2
commit
4a31675ea0
1 changed files with 17 additions and 14 deletions
|
@ -3291,6 +3291,9 @@ static void msmsdcc_print_rpm_info(struct msmsdcc_host *host)
|
|||
{
|
||||
struct device *dev = mmc_dev(host->mmc);
|
||||
|
||||
pr_info("%s: PM: sdcc_suspended=%d, pending_resume=%d, sdcc_suspending=%d\n",
|
||||
mmc_hostname(host->mmc), host->sdcc_suspended,
|
||||
host->pending_resume, host->sdcc_suspending);
|
||||
pr_info("%s: RPM: runtime_status=%d, usage_count=%d,"
|
||||
" is_suspended=%d, disable_depth=%d, runtime_error=%d,"
|
||||
" request_pending=%d, request=%d\n",
|
||||
|
@ -3312,8 +3315,7 @@ static int msmsdcc_enable(struct mmc_host *mmc)
|
|||
if (mmc->card && mmc_card_sdio(mmc->card))
|
||||
goto out;
|
||||
|
||||
if (host->sdcc_suspended && host->pending_resume &&
|
||||
!pm_runtime_suspended(dev)) {
|
||||
if (host->sdcc_suspended && host->pending_resume) {
|
||||
host->pending_resume = false;
|
||||
pm_runtime_get_noresume(dev);
|
||||
rc = msmsdcc_runtime_resume(dev);
|
||||
|
@ -3334,8 +3336,8 @@ static int msmsdcc_enable(struct mmc_host *mmc)
|
|||
|
||||
skip_get_sync:
|
||||
if (rc < 0) {
|
||||
pr_info("%s: %s: failed with error %d", mmc_hostname(mmc),
|
||||
__func__, rc);
|
||||
WARN(1, "%s: %s: failed with error %d\n", mmc_hostname(mmc),
|
||||
__func__, rc);
|
||||
msmsdcc_print_rpm_info(host);
|
||||
return rc;
|
||||
}
|
||||
|
@ -3361,15 +3363,9 @@ static int msmsdcc_disable(struct mmc_host *mmc)
|
|||
|
||||
rc = pm_runtime_put_sync(mmc->parent);
|
||||
|
||||
/*
|
||||
* Ignore -EAGAIN as that is not fatal, it means that
|
||||
* either runtime usage count is non-zero or the runtime
|
||||
* pm itself is disabled or not in proper state to process
|
||||
* idle notification.
|
||||
*/
|
||||
if (rc < 0 && (rc != -EAGAIN)) {
|
||||
pr_info("%s: %s: failed with error %d", mmc_hostname(mmc),
|
||||
__func__, rc);
|
||||
if (rc < 0) {
|
||||
WARN(1, "%s: %s: failed with error %d\n", mmc_hostname(mmc),
|
||||
__func__, rc);
|
||||
msmsdcc_print_rpm_info(host);
|
||||
return rc;
|
||||
}
|
||||
|
@ -6261,6 +6257,7 @@ msmsdcc_runtime_resume(struct device *dev)
|
|||
|
||||
wake_unlock(&host->sdio_suspend_wlock);
|
||||
}
|
||||
host->pending_resume = false;
|
||||
pr_debug("%s: %s: end\n", mmc_hostname(mmc), __func__);
|
||||
return 0;
|
||||
}
|
||||
|
@ -6333,7 +6330,13 @@ static int msmsdcc_pm_resume(struct device *dev)
|
|||
|
||||
if (mmc->card && mmc_card_sdio(mmc->card))
|
||||
rc = msmsdcc_runtime_resume(dev);
|
||||
else
|
||||
/*
|
||||
* As runtime PM is enabled before calling the device's platform resume
|
||||
* callback, we use the pm_runtime_suspended API to know if SDCC is
|
||||
* really runtime suspended or not and set the pending_resume flag only
|
||||
* if its not runtime suspended.
|
||||
*/
|
||||
else if (!pm_runtime_suspended(dev))
|
||||
host->pending_resume = true;
|
||||
|
||||
if (host->plat->status_irq) {
|
||||
|
|
Loading…
Reference in a new issue