kms: evergreen/ni big endian accel support

Based on 6xx/7xx patches from Cédric Cano.

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
This commit is contained in:
Alex Deucher
2011-02-11 17:21:10 -05:00
parent e8dc728a54
commit f1dc419c98
6 changed files with 130 additions and 43 deletions

View File

@@ -1188,7 +1188,11 @@ evergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf)
BEGIN_BATCH(10);
EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
PACK3(IT_INDEX_TYPE, 1);
#if X_BYTE_ORDER == X_BIG_ENDIAN
E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
#else
E32(draw_conf->index_type);
#endif
PACK3(IT_NUM_INSTANCES, 1);
E32(draw_conf->num_instances);
PACK3(IT_DRAW_INDEX_AUTO, 2);
@@ -1227,6 +1231,9 @@ void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size)
vtx_res.dst_sel_y = SQ_SEL_Y;
vtx_res.dst_sel_z = SQ_SEL_Z;
vtx_res.dst_sel_w = SQ_SEL_W;
#if X_BYTE_ORDER == X_BIG_ENDIAN
vtx_res.endian = SQ_ENDIAN_8IN32;
#endif
evergreen_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT);
/* Draw */

View File

@@ -127,9 +127,15 @@ EVERGREENPrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
} else if (accel_state->dst_obj.bpp == 16) {
cb_conf.format = COLOR_5_6_5;
cb_conf.comp_swap = 2; /* RGB */
#if X_BYTE_ORDER == X_BIG_ENDIAN
cb_conf.endian = ENDIAN_8IN16;
#endif
} else {
cb_conf.format = COLOR_8_8_8_8;
cb_conf.comp_swap = 1; /* ARGB */
#if X_BYTE_ORDER == X_BIG_ENDIAN
cb_conf.endian = ENDIAN_8IN32;
#endif
}
cb_conf.source_format = EXPORT_4C_16BPC;
cb_conf.blend_clamp = 1;
@@ -795,6 +801,19 @@ static Bool EVERGREENTextureSetup(PicturePtr pPict, PixmapPtr pPix,
tex_res.bo = accel_state->src_obj[unit].bo;
tex_res.mip_bo = accel_state->src_obj[unit].bo;
#if X_BYTE_ORDER == X_BIG_ENDIAN
switch (accel_state->src_obj[unit].bpp) {
case 16:
tex_res.endian = SQ_ENDIAN_8IN16;
break;
case 32:
tex_res.endian = SQ_ENDIAN_8IN32;
break;
default :
break;
}
#endif
/* component swizzles */
switch (pPict->format) {
case PICT_a1r5g5b5:
@@ -1224,6 +1243,18 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
cb_conf.pmask = 0xf;
if (accel_state->dst_obj.tiling_flags == 0)
cb_conf.array_mode = 1;
#if X_BYTE_ORDER == X_BIG_ENDIAN
switch (dst_obj.bpp) {
case 16:
cb_conf.endian = ENDIAN_8IN16;
break;
case 32:
cb_conf.endian = ENDIAN_8IN32;
break;
default:
break;
}
#endif
evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
if (pMask)

View File

@@ -110,6 +110,9 @@ enum {
#define IT_WAIT_ADDR(x) ((x) >> 2)
/* IT_INDEX_TYPE */
#define IT_INDEX_TYPE_SWAP_MODE(x) ((x) << 2)
enum {
SQ_LDS_ALLOC_PS = 0x288ec,

View File

@@ -110,7 +110,11 @@ int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(0),
ENDIAN_SWAP(ENDIAN_NONE),
#if X_BYTE_ORDER == X_BIG_ENDIAN
ENDIAN_SWAP(SQ_ENDIAN_8IN32),
#else
ENDIAN_SWAP(SQ_ENDIAN_NONE),
#endif
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(1),
ALT_CONST(0),
@@ -331,7 +335,11 @@ int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(0),
ENDIAN_SWAP(ENDIAN_NONE),
#if X_BYTE_ORDER == X_BIG_ENDIAN
ENDIAN_SWAP(SQ_ENDIAN_8IN32),
#else
ENDIAN_SWAP(SQ_ENDIAN_NONE),
#endif
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(1),
ALT_CONST(0),
@@ -358,7 +366,11 @@ int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(8),
ENDIAN_SWAP(ENDIAN_NONE),
#if X_BYTE_ORDER == X_BIG_ENDIAN
ENDIAN_SWAP(SQ_ENDIAN_8IN32),
#else
ENDIAN_SWAP(SQ_ENDIAN_NONE),
#endif
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(0),
ALT_CONST(0),
@@ -689,7 +701,11 @@ int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(0),
ENDIAN_SWAP(ENDIAN_NONE),
#if X_BYTE_ORDER == X_BIG_ENDIAN
ENDIAN_SWAP(SQ_ENDIAN_8IN32),
#else
ENDIAN_SWAP(SQ_ENDIAN_NONE),
#endif
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(1),
ALT_CONST(0),
@@ -716,7 +732,11 @@ int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(8),
ENDIAN_SWAP(ENDIAN_NONE),
#if X_BYTE_ORDER == X_BIG_ENDIAN
ENDIAN_SWAP(SQ_ENDIAN_8IN32),
#else
ENDIAN_SWAP(SQ_ENDIAN_NONE),
#endif
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(0),
ALT_CONST(0),
@@ -2344,7 +2364,11 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(0),
ENDIAN_SWAP(ENDIAN_NONE),
#if X_BYTE_ORDER == X_BIG_ENDIAN
ENDIAN_SWAP(SQ_ENDIAN_8IN32),
#else
ENDIAN_SWAP(SQ_ENDIAN_NONE),
#endif
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(1),
ALT_CONST(0),
@@ -2371,7 +2395,11 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(8),
ENDIAN_SWAP(ENDIAN_NONE),
#if X_BYTE_ORDER == X_BIG_ENDIAN
ENDIAN_SWAP(SQ_ENDIAN_8IN32),
#else
ENDIAN_SWAP(SQ_ENDIAN_NONE),
#endif
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(0),
ALT_CONST(0),
@@ -2398,7 +2426,11 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(16),
ENDIAN_SWAP(ENDIAN_NONE),
#if X_BYTE_ORDER == X_BIG_ENDIAN
ENDIAN_SWAP(SQ_ENDIAN_8IN32),
#else
ENDIAN_SWAP(SQ_ENDIAN_NONE),
#endif
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(0),
ALT_CONST(0),
@@ -2426,7 +2458,11 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(0),
ENDIAN_SWAP(ENDIAN_NONE),
#if X_BYTE_ORDER == X_BIG_ENDIAN
ENDIAN_SWAP(SQ_ENDIAN_8IN32),
#else
ENDIAN_SWAP(SQ_ENDIAN_NONE),
#endif
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(1),
ALT_CONST(0),
@@ -2453,7 +2489,11 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(8),
ENDIAN_SWAP(ENDIAN_NONE),
#if X_BYTE_ORDER == X_BIG_ENDIAN
ENDIAN_SWAP(SQ_ENDIAN_8IN32),
#else
ENDIAN_SWAP(SQ_ENDIAN_NONE),
#endif
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(0),
ALT_CONST(0),

View File

@@ -104,23 +104,23 @@
#define SRC_SEL_Z(x) (x)
#define SRC_SEL_W(x) (x)
#define CF_DWORD0(addr, jmptbl) ((addr) | ((jmptbl) << 24))
#define CF_DWORD0(addr, jmptbl) cpu_to_le32(((addr) | ((jmptbl) << 24)))
#define CF_DWORD1(pc, cf_const, cond, count, vpm, eop, cf_inst, wqm, b) \
(((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | ((count) << 10) | \
((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | ((wqm) << 30) | ((b) << 31))
cpu_to_le32((((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | ((count) << 10) | \
((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | ((wqm) << 30) | ((b) << 31)))
#define CF_ALU_DWORD0(addr, kb0, kb1, km0) (((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30))
#define CF_ALU_DWORD0(addr, kb0, kb1, km0) cpu_to_le32((((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30)))
#define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, alt_const, cf_inst, wqm, b) \
(((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \
((count) << 18) | ((alt_const) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31))
cpu_to_le32((((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \
((count) << 18) | ((alt_const) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31)))
#define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \
(((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | \
((index_gpr) << 23) | ((es) << 30))
cpu_to_le32((((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | \
((index_gpr) << 23) | ((es) << 30)))
#define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, bc, vpm, eop, cf_inst, m, b) \
(((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | \
((bc) << 16) | ((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | \
((m) << 30) | ((b) << 31))
cpu_to_le32((((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | \
((bc) << 16) | ((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | \
((m) << 30) | ((b) << 31)))
// ALU clause insts
#define SRC0_SEL(x) (x)
@@ -185,19 +185,19 @@
#define CLAMP(x) (x)
#define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \
(((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \
((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \
((im) << 26) | ((ps) << 29) | ((last) << 31))
cpu_to_le32((((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \
((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \
((im) << 26) | ((ps) << 29) | ((last) << 31)))
#define ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
(((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \
((dr) << 28) | ((de) << 29) | ((clamp) << 31))
cpu_to_le32((((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \
((dr) << 28) | ((de) << 29) | ((clamp) << 31)))
#define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \
(((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \
((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \
((de) << 29) | ((clamp) << 31))
cpu_to_le32((((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \
((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \
((de) << 29) | ((clamp) << 31)))
// VTX clause insts
// vxt insts
@@ -235,14 +235,14 @@
#define BUFFER_INDEX_MODE(x) (x)
#define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, mfc) \
(((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \
((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26))
cpu_to_le32((((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \
((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26)))
#define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
(((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31))
cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)))
#define VTX_DWORD2(offset, es, cbns, mf, alt_const, bim) \
(((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19) | ((alt_const) << 20) | ((bim) << 21))
#define VTX_DWORD_PAD 0x00000000
cpu_to_le32((((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19) | ((alt_const) << 20) | ((bim) << 21)))
#define VTX_DWORD_PAD cpu_to_le32(0x00000000)
// TEX clause insts
// tex insts
@@ -267,15 +267,15 @@
#define SAMPLER_ID(x) (x)
#define TEX_DWORD0(tex_inst, im, fwq, resource_id, src_gpr, sr, ac, rim, sim) \
(((tex_inst) << 0) | ((im) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \
((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24) | ((rim) << 25) | ((sim) << 27))
cpu_to_le32((((tex_inst) << 0) | ((im) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \
((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24) | ((rim) << 25) | ((sim) << 27)))
#define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \
(((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31))
cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31)))
#define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \
(((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \
((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29))
#define TEX_DWORD_PAD 0x00000000
cpu_to_le32((((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \
((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29)))
#define TEX_DWORD_PAD cpu_to_le32(0x00000000)
extern int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* vs);
extern int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* ps);

View File

@@ -422,10 +422,16 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
cb_conf.format = COLOR_5_6_5;
cb_conf.comp_swap = 2; /* RGB */
}
#if X_BYTE_ORDER == X_BIG_ENDIAN
cb_conf.endian = ENDIAN_8IN16;
#endif
break;
case 32:
cb_conf.format = COLOR_8_8_8_8;
cb_conf.comp_swap = 1; /* ARGB */
#if X_BYTE_ORDER == X_BIG_ENDIAN
cb_conf.endian = ENDIAN_8IN32;
#endif
break;
default:
return;