drm/radeon/kms: add htile support to the cs checker v3

For 6xx+.  Required for mesa to use htile support for HiZ/HiS.
Userspace will check radeon version 2.14 with is bumped either
by tiling patch or stream out patch. This patch only add support
for htile relocation which should be enough for any userspace
to implement the hyperz (using htile buffer) feature.

v2: Jerome: Fix size checking for htile buffer.
v3: Jerome: Adapt on top of r600/evergreen cs checker changes,
            also check htile surface in case only stencil is
            present.

Signed-off-by: Pierre-Eric Pelloux-Prayer <pelloux@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
Jerome Glisse 2012-03-21 19:18:21 -04:00 committed by Dave Airlie
parent 017d213f64
commit 88f50c8074
7 changed files with 385 additions and 123 deletions

View file

@ -80,6 +80,9 @@ struct evergreen_cs_track {
bool cb_dirty;
bool db_dirty;
bool streamout_dirty;
u32 htile_offset;
u32 htile_surface;
struct radeon_bo *htile_bo;
};
static u32 evergreen_cs_get_aray_mode(u32 tiling_flags)
@ -144,6 +147,9 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track)
track->db_s_read_bo = NULL;
track->db_s_write_bo = NULL;
track->db_dirty = true;
track->htile_bo = NULL;
track->htile_offset = 0xFFFFFFFF;
track->htile_surface = 0;
for (i = 0; i < 4; i++) {
track->vgt_strmout_size[i] = 0;
@ -444,6 +450,62 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i
return 0;
}
static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
unsigned nbx, unsigned nby)
{
struct evergreen_cs_track *track = p->track;
unsigned long size;
if (track->htile_bo == NULL) {
dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
__func__, __LINE__, track->db_z_info);
return -EINVAL;
}
if (G_028ABC_LINEAR(track->htile_surface)) {
/* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */
nbx = round_up(nbx, 16 * 8);
/* height is npipes htiles aligned == npipes * 8 pixel aligned */
nby = round_up(nby, track->npipes * 8);
} else {
switch (track->npipes) {
case 8:
nbx = round_up(nbx, 64 * 8);
nby = round_up(nby, 64 * 8);
break;
case 4:
nbx = round_up(nbx, 64 * 8);
nby = round_up(nby, 32 * 8);
break;
case 2:
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 32 * 8);
break;
case 1:
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 16 * 8);
break;
default:
dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
__func__, __LINE__, track->npipes);
return -EINVAL;
}
}
/* compute number of htile */
nbx = nbx / 8;
nby = nby / 8;
size = nbx * nby * 4;
size += track->htile_offset;
if (size > radeon_bo_size(track->htile_bo)) {
dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
__func__, __LINE__, radeon_bo_size(track->htile_bo),
size, nbx, nby);
return -EINVAL;
}
return 0;
}
static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p)
{
struct evergreen_cs_track *track = p->track;
@ -530,6 +592,14 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p)
return -EINVAL;
}
/* hyperz */
if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
if (r) {
return r;
}
}
return 0;
}
@ -617,6 +687,14 @@ static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p)
return -EINVAL;
}
/* hyperz */
if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
if (r) {
return r;
}
}
return 0;
}
@ -850,7 +928,7 @@ static int evergreen_cs_track_check(struct radeon_cs_parser *p)
return r;
}
/* Check depth buffer */
if (G_028800_Z_WRITE_ENABLE(track->db_depth_control)) {
if (G_028800_Z_ENABLE(track->db_depth_control)) {
r = evergreen_cs_track_validate_depth(p);
if (r)
return r;
@ -1616,6 +1694,23 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
track->cb_color_bo[tmp] = reloc->robj;
track->cb_dirty = true;
break;
case DB_HTILE_DATA_BASE:
r = evergreen_cs_packet_next_reloc(p, &reloc);
if (r) {
dev_warn(p->dev, "bad SET_CONTEXT_REG "
"0x%04X\n", reg);
return -EINVAL;
}
track->htile_offset = radeon_get_ib_value(p, idx);
ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
track->htile_bo = reloc->robj;
track->db_dirty = true;
break;
case DB_HTILE_SURFACE:
/* 8x8 only */
track->htile_surface = radeon_get_ib_value(p, idx);
track->db_dirty = true;
break;
case CB_IMMED0_BASE:
case CB_IMMED1_BASE:
case CB_IMMED2_BASE:
@ -1628,7 +1723,6 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
case CB_IMMED9_BASE:
case CB_IMMED10_BASE:
case CB_IMMED11_BASE:
case DB_HTILE_DATA_BASE:
case SQ_PGM_START_FS:
case SQ_PGM_START_ES:
case SQ_PGM_START_VS:

View file

@ -991,6 +991,14 @@
#define G_028008_SLICE_MAX(x) (((x) >> 13) & 0x7FF)
#define C_028008_SLICE_MAX 0xFF001FFF
#define DB_HTILE_DATA_BASE 0x28014
#define DB_HTILE_SURFACE 0x28abc
#define S_028ABC_HTILE_WIDTH(x) (((x) & 0x1) << 0)
#define G_028ABC_HTILE_WIDTH(x) (((x) >> 0) & 0x1)
#define C_028ABC_HTILE_WIDTH 0xFFFFFFFE
#define S_028ABC_HTILE_HEIGHT(x) (((x) & 0x1) << 1)
#define G_028ABC_HTILE_HEIGHT(x) (((x) >> 1) & 0x1)
#define C_028ABC_HTILE_HEIGHT 0xFFFFFFFD
#define G_028ABC_LINEAR(x) (((x) >> 2) & 0x1)
#define DB_Z_INFO 0x28040
# define Z_ARRAY_MODE(x) ((x) << 4)
# define DB_TILE_SPLIT(x) (((x) & 0x7) << 8)

View file

@ -78,6 +78,9 @@ struct r600_cs_track {
bool cb_dirty;
bool db_dirty;
bool streamout_dirty;
struct radeon_bo *htile_bo;
u64 htile_offset;
u32 htile_surface;
};
#define FMT_8_BIT(fmt, vc) [fmt] = { 1, 1, 1, vc, CHIP_R600 }
@ -321,6 +324,9 @@ static void r600_cs_track_init(struct r600_cs_track *track)
track->db_depth_size_idx = 0;
track->db_depth_control = 0xFFFFFFFF;
track->db_dirty = true;
track->htile_bo = NULL;
track->htile_offset = 0xFFFFFFFF;
track->htile_surface = 0;
for (i = 0; i < 4; i++) {
track->vgt_strmout_size[i] = 0;
@ -455,82 +461,23 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
return 0;
}
static int r600_cs_track_check(struct radeon_cs_parser *p)
static int r600_cs_track_validate_db(struct radeon_cs_parser *p)
{
struct r600_cs_track *track = p->track;
u32 tmp;
int r, i;
volatile u32 *ib = p->ib->ptr;
/* on legacy kernel we don't perform advanced check */
if (p->rdev == NULL)
return 0;
/* check streamout */
if (track->streamout_dirty && track->vgt_strmout_en) {
for (i = 0; i < 4; i++) {
if (track->vgt_strmout_buffer_en & (1 << i)) {
if (track->vgt_strmout_bo[i]) {
u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
(u64)track->vgt_strmout_size[i];
if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
i, offset,
radeon_bo_size(track->vgt_strmout_bo[i]));
return -EINVAL;
}
} else {
dev_warn(p->dev, "No buffer for streamout %d\n", i);
return -EINVAL;
}
}
}
track->streamout_dirty = false;
}
if (track->sx_misc_kill_all_prims)
return 0;
/* check that we have a cb for each enabled target, we don't check
* shader_mask because it seems mesa isn't always setting it :(
*/
if (track->cb_dirty) {
tmp = track->cb_target_mask;
for (i = 0; i < 8; i++) {
if ((tmp >> (i * 4)) & 0xF) {
/* at least one component is enabled */
if (track->cb_color_bo[i] == NULL) {
dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
__func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
return -EINVAL;
}
/* perform rewrite of CB_COLOR[0-7]_SIZE */
r = r600_cs_track_validate_cb(p, i);
if (r)
return r;
}
}
track->cb_dirty = false;
}
if (track->db_dirty) {
/* Check depth buffer */
if (G_028800_STENCIL_ENABLE(track->db_depth_control) ||
G_028800_Z_ENABLE(track->db_depth_control)) {
u32 nviews, bpe, ntiles, size, slice_tile_max;
u32 height, height_align, pitch, pitch_align, depth_align;
u32 nviews, bpe, ntiles, size, slice_tile_max, tmp;
u32 height_align, pitch_align, depth_align;
u32 pitch = 8192;
u32 height = 8192;
u64 base_offset, base_align;
struct array_mode_checker array_check;
int array_mode;
volatile u32 *ib = p->ib->ptr;
if (track->db_bo == NULL) {
dev_warn(p->dev, "z/stencil with no depth buffer\n");
return -EINVAL;
}
if (G_028010_TILE_SURFACE_ENABLE(track->db_depth_info)) {
dev_warn(p->dev, "this kernel doesn't support z/stencil htile\n");
return -EINVAL;
}
switch (G_028010_FORMAT(track->db_depth_info)) {
case V_028010_DEPTH_16:
bpe = 2;
@ -612,7 +559,7 @@ static int r600_cs_track_check(struct radeon_cs_parser *p)
return -EINVAL;
}
if (!IS_ALIGNED(base_offset, base_align)) {
dev_warn(p->dev, "%s offset[%d] 0x%llx, 0x%llx, %d not aligned\n", __func__, i,
dev_warn(p->dev, "%s offset 0x%llx, 0x%llx, %d not aligned\n", __func__,
base_offset, base_align, array_mode);
return -EINVAL;
}
@ -628,9 +575,202 @@ static int r600_cs_track_check(struct radeon_cs_parser *p)
return -EINVAL;
}
}
/* hyperz */
if (G_028010_TILE_SURFACE_ENABLE(track->db_depth_info)) {
unsigned long size;
unsigned nbx, nby;
if (track->htile_bo == NULL) {
dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
__func__, __LINE__, track->db_depth_info);
return -EINVAL;
}
if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) {
dev_warn(p->dev, "%s:%d htile can't be enabled with bogus db_depth_size 0x%08x\n",
__func__, __LINE__, track->db_depth_size);
return -EINVAL;
}
nbx = pitch;
nby = height;
if (G_028D24_LINEAR(track->htile_surface)) {
/* nbx must be 16 htiles aligned == 16 * 8 pixel aligned */
nbx = round_up(nbx, 16 * 8);
/* nby is npipes htiles aligned == npipes * 8 pixel aligned */
nby = round_up(nby, track->npipes * 8);
} else {
/* htile widht & nby (8 or 4) make 2 bits number */
tmp = track->htile_surface & 3;
/* align is htile align * 8, htile align vary according to
* number of pipe and tile width and nby
*/
switch (track->npipes) {
case 8:
switch (tmp) {
case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 64 * 8);
nby = round_up(nby, 64 * 8);
break;
case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
nbx = round_up(nbx, 64 * 8);
nby = round_up(nby, 32 * 8);
break;
case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 32 * 8);
break;
default:
return -EINVAL;
}
break;
case 4:
switch (tmp) {
case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 64 * 8);
nby = round_up(nby, 32 * 8);
break;
case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 32 * 8);
break;
case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 16 * 8);
break;
default:
return -EINVAL;
}
break;
case 2:
switch (tmp) {
case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 32 * 8);
break;
case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 16 * 8);
break;
case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
nbx = round_up(nbx, 16 * 8);
nby = round_up(nby, 16 * 8);
break;
default:
return -EINVAL;
}
break;
case 1:
switch (tmp) {
case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 16 * 8);
break;
case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
nbx = round_up(nbx, 16 * 8);
nby = round_up(nby, 16 * 8);
break;
case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
nbx = round_up(nbx, 16 * 8);
nby = round_up(nby, 8 * 8);
break;
default:
return -EINVAL;
}
break;
default:
dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
__func__, __LINE__, track->npipes);
return -EINVAL;
}
}
/* compute number of htile */
nbx = G_028D24_HTILE_WIDTH(track->htile_surface) ? nbx / 8 : nbx / 4;
nby = G_028D24_HTILE_HEIGHT(track->htile_surface) ? nby / 8 : nby / 4;
size = nbx * nby * 4;
size += track->htile_offset;
if (size > radeon_bo_size(track->htile_bo)) {
dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
__func__, __LINE__, radeon_bo_size(track->htile_bo),
size, nbx, nby);
return -EINVAL;
}
}
track->db_dirty = false;
return 0;
}
static int r600_cs_track_check(struct radeon_cs_parser *p)
{
struct r600_cs_track *track = p->track;
u32 tmp;
int r, i;
/* on legacy kernel we don't perform advanced check */
if (p->rdev == NULL)
return 0;
/* check streamout */
if (track->streamout_dirty && track->vgt_strmout_en) {
for (i = 0; i < 4; i++) {
if (track->vgt_strmout_buffer_en & (1 << i)) {
if (track->vgt_strmout_bo[i]) {
u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
(u64)track->vgt_strmout_size[i];
if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
i, offset,
radeon_bo_size(track->vgt_strmout_bo[i]));
return -EINVAL;
}
} else {
dev_warn(p->dev, "No buffer for streamout %d\n", i);
return -EINVAL;
}
}
}
track->streamout_dirty = false;
}
if (track->sx_misc_kill_all_prims)
return 0;
/* check that we have a cb for each enabled target, we don't check
* shader_mask because it seems mesa isn't always setting it :(
*/
if (track->cb_dirty) {
tmp = track->cb_target_mask;
for (i = 0; i < 8; i++) {
if ((tmp >> (i * 4)) & 0xF) {
/* at least one component is enabled */
if (track->cb_color_bo[i] == NULL) {
dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
__func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
return -EINVAL;
}
/* perform rewrite of CB_COLOR[0-7]_SIZE */
r = r600_cs_track_validate_cb(p, i);
if (r)
return r;
}
}
track->cb_dirty = false;
}
/* Check depth buffer */
if (track->db_dirty && (G_028800_STENCIL_ENABLE(track->db_depth_control) ||
G_028800_Z_ENABLE(track->db_depth_control))) {
r = r600_cs_track_validate_db(p);
if (r)
return r;
}
return 0;
}
@ -1244,6 +1384,21 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
track->db_dirty = true;
break;
case DB_HTILE_DATA_BASE:
r = r600_cs_packet_next_reloc(p, &reloc);
if (r) {
dev_warn(p->dev, "bad SET_CONTEXT_REG "
"0x%04X\n", reg);
return -EINVAL;
}
track->htile_offset = radeon_get_ib_value(p, idx) << 8;
ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
track->htile_bo = reloc->robj;
track->db_dirty = true;
break;
case DB_HTILE_SURFACE:
track->htile_surface = radeon_get_ib_value(p, idx);
track->db_dirty = true;
break;
case SQ_PGM_START_FS:
case SQ_PGM_START_ES:
case SQ_PGM_START_VS:

View file

@ -195,6 +195,14 @@
#define PREZ_MUST_WAIT_FOR_POSTZ_DONE (1 << 31)
#define DB_DEPTH_BASE 0x2800C
#define DB_HTILE_DATA_BASE 0x28014
#define DB_HTILE_SURFACE 0x28D24
#define S_028D24_HTILE_WIDTH(x) (((x) & 0x1) << 0)
#define G_028D24_HTILE_WIDTH(x) (((x) >> 0) & 0x1)
#define C_028D24_HTILE_WIDTH 0xFFFFFFFE
#define S_028D24_HTILE_HEIGHT(x) (((x) & 0x1) << 1)
#define G_028D24_HTILE_HEIGHT(x) (((x) >> 1) & 0x1)
#define C_028D24_HTILE_HEIGHT 0xFFFFFFFD
#define G_028D24_LINEAR(x) (((x) >> 2) & 0x1)
#define DB_WATERMARKS 0x9838
#define DEPTH_FREE(x) ((x) << 0)
#define DEPTH_FLUSH(x) ((x) << 5)

View file

@ -509,7 +509,6 @@ cayman 0x9400
0x00028AA8 IA_MULTI_VGT_PARAM
0x00028AB4 VGT_REUSE_OFF
0x00028AB8 VGT_VTX_CNT_EN
0x00028ABC DB_HTILE_SURFACE
0x00028AC0 DB_SRESULTS_COMPARE_STATE0
0x00028AC4 DB_SRESULTS_COMPARE_STATE1
0x00028AC8 DB_PRELOAD_CONTROL

View file

@ -519,7 +519,6 @@ evergreen 0x9400
0x00028AA4 VGT_INSTANCE_STEP_RATE_1
0x00028AB4 VGT_REUSE_OFF
0x00028AB8 VGT_VTX_CNT_EN
0x00028ABC DB_HTILE_SURFACE
0x00028AC0 DB_SRESULTS_COMPARE_STATE0
0x00028AC4 DB_SRESULTS_COMPARE_STATE1
0x00028AC8 DB_PRELOAD_CONTROL

View file

@ -713,7 +713,6 @@ r600 0x9400
0x0000A710 TD_VS_SAMPLER17_BORDER_RED
0x00009508 TA_CNTL_AUX
0x0002802C DB_DEPTH_CLEAR
0x00028D24 DB_HTILE_SURFACE
0x00028D34 DB_PREFETCH_LIMIT
0x00028D30 DB_PRELOAD_CONTROL
0x00028D0C DB_RENDER_CONTROL