From 54380172740c843fcf79102d555065f7a8f3872c Mon Sep 17 00:00:00 2001 From: Tarun Karra Date: Tue, 5 Feb 2013 15:38:51 -0800 Subject: [PATCH] msm: kgsl: Allow fault tolerance userspace control Allow userspace to control fault tolerance policy, this allows fault tolerance policy to be controlled using panel file. Change-Id: I991edf6f082384bc69454058fe5df3b5f535aa4c Signed-off-by: Tarun Karra --- drivers/gpu/msm/adreno.c | 94 +++++++++++++++++++++++--------- drivers/gpu/msm/adreno.h | 13 +---- drivers/gpu/msm/adreno_debugfs.c | 12 ++-- include/linux/msm_kgsl.h | 16 ++++++ 4 files changed, 94 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index 4f460ed5e018..5f538238f1ad 100644 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -1599,8 +1600,8 @@ static int adreno_setup_ft_data(struct kgsl_device *device, ret = -ENOMEM; goto done; } - ft_data->fault = device->mmu.fault; - ft_data->step = adreno_dev->ft_policy; + + ft_data->status = 0; /* find the start of bad command sequence in rb */ context = idr_find(&device->context_idr, ft_data->context_id); @@ -1620,6 +1621,12 @@ static int adreno_setup_ft_data(struct kgsl_device *device, ft_data->start_of_replay_cmds = rb_rptr; + if (!adreno_dev->ft_policy) + adreno_dev->ft_policy = KGSL_FT_DEFAULT_POLICY; + + ft_data->ft_policy = adreno_dev->ft_policy; + + adreno_context = context->devctxt; if (adreno_context->flags & CTXT_FLAGS_PREAMBLE) { if (ft_data->ib1) { @@ -1789,14 +1796,17 @@ _adreno_ft(struct kgsl_device *device, KGSL_FT_INFO(device, "Context found\n"); } - /* Extract valid contents from rb which can still be executed after - * hang */ + /* + * Extract valid contents from rb which can still be executed after + * hang + */ adreno_ringbuffer_extract(rb, ft_data); /* Check if we detected a long running IB, * if true do not attempt replay of bad cmds */ if (adreno_dev->long_ib) { if (_adreno_check_long_ib(device)) { + ft_data->status = 1; _adreno_debug_ft_info(device, ft_data); goto play_good_cmds; } else { @@ -1806,24 +1816,30 @@ _adreno_ft(struct kgsl_device *device, } /* Do not try the bad commands if hang is due to a fault */ - if (ft_data->fault) { - KGSL_FT_ERR(device, "Page fault no FT for bad context\n"); + if (device->mmu.fault) { + KGSL_FT_ERR(device, "MMU fault skipping bad cmds\n"); + device->mmu.fault = 0; goto play_good_cmds; } - if (ft_data->step == FT_REPLAY_BAD_CTXT_CMDS) { + if (ft_data->ft_policy & KGSL_FT_DISABLE) { + KGSL_FT_ERR(device, "NO FT policy play only good cmds\n"); + goto play_good_cmds; + } + + if (ft_data->ft_policy & KGSL_FT_REPLAY) { ret = _adreno_ft_resubmit_rb(device, rb, context, ft_data, ft_data->bad_rb_buffer, ft_data->bad_rb_size); - if (ret) - KGSL_FT_INFO(device, "Replay unsuccessful\n"); - else + if (ret) { + KGSL_FT_ERR(device, "Replay unsuccessful\n"); + ft_data->status = 1; + } else goto play_good_cmds; - } - if (ft_data->step == FT_NOP_IB_BAD_CTXT_CMDS) { + if (ft_data->ft_policy & KGSL_FT_SKIPIB) { for (i = 0; i < ft_data->bad_rb_size; i++) { if ((ft_data->bad_rb_buffer[i] == @@ -1841,7 +1857,7 @@ _adreno_ft(struct kgsl_device *device, if ((i == (ft_data->bad_rb_size)) || (!ft_data->ib1)) { KGSL_FT_ERR(device, "Bad IB to NOP not found\n"); - ft_data->step = FT_FAIL_BAD_CTXT_CMDS; + ft_data->status = 1; goto play_good_cmds; } @@ -1849,13 +1865,15 @@ _adreno_ft(struct kgsl_device *device, ft_data->bad_rb_buffer, ft_data->bad_rb_size); if (ret) { - KGSL_FT_INFO(device, "NOP faulty IB unsuccessful\n"); - ft_data->step = FT_SKIP_EOF_BAD_CTXT_CMDS; - } else + KGSL_FT_ERR(device, "NOP faulty IB unsuccessful\n"); + ft_data->status = 1; + } else { + ft_data->status = 0; goto play_good_cmds; + } } - if (ft_data->step == FT_SKIP_EOF_BAD_CTXT_CMDS) { + if (ft_data->ft_policy & KGSL_FT_SKIPFRAME) { for (i = 0; i < ft_data->bad_rb_size; i++) { if (ft_data->bad_rb_buffer[i] == @@ -1869,6 +1887,8 @@ _adreno_ft(struct kgsl_device *device, next IB submission */ if (i == ft_data->bad_rb_size) { adreno_context->flags |= CTXT_FLAGS_SKIP_EOF; + KGSL_FT_INFO(device, + "EOF not found in RB, skip next issueib till EOF\n"); ft_data->bad_rb_buffer[0] = cp_nop_packet(i); } @@ -1876,15 +1896,17 @@ _adreno_ft(struct kgsl_device *device, ft_data->bad_rb_buffer, ft_data->bad_rb_size); if (ret) { - KGSL_FT_INFO(device, "Skip EOF unsuccessful\n"); - ft_data->step = FT_FAIL_BAD_CTXT_CMDS; - } else + KGSL_FT_ERR(device, "Skip EOF unsuccessful\n"); + ft_data->status = 1; + } else { + ft_data->status = 0; goto play_good_cmds; + } } play_good_cmds: - if (ft_data->step == FT_FAIL_BAD_CTXT_CMDS) + if (ft_data->status) KGSL_FT_ERR(device, "Bad context commands failed\n"); else { KGSL_FT_INFO(device, "Bad context commands success\n"); @@ -1948,8 +1970,6 @@ adreno_ft(struct kgsl_device *device, ret = _adreno_ft(device, ft_data); - KGSL_FT_CRIT(device, "POLICY: 0x%X\n", ft_data->step); - if (-EAGAIN == ret) { /* setup new fault tolerance parameters and retry, this * means more than 1 contexts are causing hang */ @@ -1989,8 +2009,8 @@ adreno_ft(struct kgsl_device *device, done: adreno_set_max_ts_for_bad_ctxs(device); adreno_mark_context_status(device, ret); - if (ret) - KGSL_FT_ERR(device, "Fault Tolerance failed\n"); + KGSL_FT_ERR(device, "policy 0x%X status 0x%x\n", + ft_data->ft_policy, ret); return ret; } @@ -2199,6 +2219,30 @@ static int adreno_setproperty(struct kgsl_device *device, status = 0; } break; + case KGSL_PROP_FAULT_TOLERANCE: { + struct kgsl_ft_config ftd; + + if (sizebytes != sizeof(ftd)) + break; + + if (copy_from_user(&ftd, (void __user *) value, + sizeof(ftd))) { + status = -EFAULT; + break; + } + + if (ftd.ft_policy) + adreno_dev->ft_policy = ftd.ft_policy; + else + adreno_dev->ft_policy = KGSL_FT_DEFAULT_POLICY; + + if (ftd.ft_pm_dump) + device->pm_dump_enable = 1; + else + device->pm_dump_enable = 0; + + } + break; default: break; } diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h index 417a041304f9..3dc70b0aa299 100644 --- a/drivers/gpu/msm/adreno.h +++ b/drivers/gpu/msm/adreno.h @@ -169,20 +169,13 @@ struct adreno_ft_data { unsigned int *good_rb_buffer; unsigned int good_rb_size; unsigned int last_valid_ctx_id; - unsigned int step; - int fault; + unsigned int status; + unsigned int ft_policy; + unsigned int err_code; unsigned int start_of_replay_cmds; unsigned int replay_for_snapshot; }; -enum ft_steps { - FT_REPLAY_BAD_CTXT_CMDS = 0, - FT_NOP_IB_BAD_CTXT_CMDS, - FT_SKIP_EOF_BAD_CTXT_CMDS, - FT_FAIL_BAD_CTXT_CMDS, - FT_PLAY_GOOD_CTXT_CMDS -}; - extern struct adreno_gpudev adreno_a2xx_gpudev; extern struct adreno_gpudev adreno_a3xx_gpudev; diff --git a/drivers/gpu/msm/adreno_debugfs.c b/drivers/gpu/msm/adreno_debugfs.c index 79eb0c3aef09..70379e5deaca 100644 --- a/drivers/gpu/msm/adreno_debugfs.c +++ b/drivers/gpu/msm/adreno_debugfs.c @@ -66,13 +66,13 @@ void adreno_debugfs_init(struct kgsl_device *device) &adreno_dev->fast_hang_detect); /* * FT policy can be set to any of the options below. - * FT_REPLAY_BAD_CTXT_CMDS -> try replay, NOP IB and skip to EOF - * of bad cmds - * FT_NOT_IB_BAD_CTXT_CMDS -> try replay and NOP IB of bad cmds - * FT_SKIP_EOF_BAD_CTXT_CMDS -> try skip to EOF of bad cmds - * by default set FT policy to FT_REPLAY_BAD_CTXT_CMDS + * KGSL_FT_DISABLE -> BIT(0) Set to disable FT + * KGSL_FT_REPLAY -> BIT(1) Set to enable replay + * KGSL_FT_SKIPIB -> BIT(2) Set to skip IB + * KGSL_FT_SKIPFRAME -> BIT(3) Set to skip frame + * by default set FT policy to KGSL_FT_DEFAULT_POLICY */ - adreno_dev->ft_policy = FT_REPLAY_BAD_CTXT_CMDS; + adreno_dev->ft_policy = KGSL_FT_DEFAULT_POLICY; debugfs_create_u32("fault_tolerance_policy", 0644, device->d_debugfs, &adreno_dev->ft_policy); /* By default enable long IB detection */ diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h index 6912087f284d..2a1f801d7147 100644 --- a/include/linux/msm_kgsl.h +++ b/include/linux/msm_kgsl.h @@ -163,6 +163,22 @@ enum kgsl_property_type { KGSL_PROP_VERSION = 0x00000008, KGSL_PROP_GPU_RESET_STAT = 0x00000009, KGSL_PROP_PWRCTRL = 0x0000000E, + KGSL_PROP_FAULT_TOLERANCE = 0x00000011, +}; + +/* Fault Tolerance policy flags */ +#define KGSL_FT_DISABLE 0x00000001 +#define KGSL_FT_REPLAY 0x00000002 +#define KGSL_FT_SKIPIB 0x00000004 +#define KGSL_FT_SKIPFRAME 0x00000008 +#define KGSL_FT_DEFAULT_POLICY (KGSL_FT_REPLAY + KGSL_FT_SKIPIB) + +/* Fault tolerance config */ +struct kgsl_ft_config { + unsigned int ft_policy; /* GPU fault tolerance policy flags */ + unsigned int ft_pm_dump; /* KGSL enable postmortem dump */ + unsigned int ft_detect_ms; + unsigned int ft_dos_timeout_ms; }; struct kgsl_shadowprop {