mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-18 04:56:46 +07:00
drm/amd/display: Optimize regamma calculations
There are several optimizations: 1) Use predefined SRGB, don't calculate. This is the most common case. 2) Precompute HW X points at boot since they're fixed in ColModule 3) Precompute PQ - it never changes and is very CPU intensive in fixed pt. 4) Reduce number of points in ColModule to 512 (32x16) from 1024. This also requires reducing some regions for legacy DCEs to 16 pts at most. Performance 1) is super-fast, build_output_tf is 1-2us, down from 25000-30000. Programming also fast since only one reg write. 2)+3) gives build_output_tf for PQ in ~100us range, down from ~80000-110000 2) + 4) results in slightly over 50% improvement. It gives an idea of the savings when we can't use SRGB or PQ table (e.g. sdr white level > 80). There's also a bit of refactoring: renaming some stuff that was misleading and removing a lot of magic numbers that novices might not be able to understand where they come from and what they mean. Signed-off-by: Krunoslav Kovac <Krunoslav.Kovac@amd.com> Reviewed-by: Tony Cheng <Tony.Cheng@amd.com> Acked-by: Harry Wentland <harry.wentland@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
c5fc7f59a7
commit
8f8372c7d1
@ -407,6 +407,10 @@ static bool convert_to_custom_float(struct pwl_result_data *rgb_resulted,
|
||||
return true;
|
||||
}
|
||||
|
||||
#define MAX_LOW_POINT 11
|
||||
#define NUMBER_REGIONS 16
|
||||
#define NUMBER_SW_SEGMENTS 16
|
||||
|
||||
static bool
|
||||
dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
|
||||
struct pwl_params *regamma_params)
|
||||
@ -421,8 +425,8 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
|
||||
struct fixed31_32 y1_min;
|
||||
struct fixed31_32 y3_max;
|
||||
|
||||
int32_t segment_start, segment_end;
|
||||
uint32_t i, j, k, seg_distr[16], increment, start_index, hw_points;
|
||||
int32_t region_start, region_end;
|
||||
uint32_t i, j, k, seg_distr[NUMBER_REGIONS], increment, start_index, hw_points;
|
||||
|
||||
if (output_tf == NULL || regamma_params == NULL || output_tf->type == TF_TYPE_BYPASS)
|
||||
return false;
|
||||
@ -437,34 +441,20 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
|
||||
/* 16 segments
|
||||
* segments are from 2^-11 to 2^5
|
||||
*/
|
||||
segment_start = -11;
|
||||
segment_end = 5;
|
||||
region_start = -MAX_LOW_POINT;
|
||||
region_end = NUMBER_REGIONS - MAX_LOW_POINT;
|
||||
|
||||
seg_distr[0] = 2;
|
||||
seg_distr[1] = 2;
|
||||
seg_distr[2] = 2;
|
||||
seg_distr[3] = 2;
|
||||
seg_distr[4] = 2;
|
||||
seg_distr[5] = 2;
|
||||
seg_distr[6] = 3;
|
||||
seg_distr[7] = 4;
|
||||
seg_distr[8] = 4;
|
||||
seg_distr[9] = 4;
|
||||
seg_distr[10] = 4;
|
||||
seg_distr[11] = 5;
|
||||
seg_distr[12] = 5;
|
||||
seg_distr[13] = 5;
|
||||
seg_distr[14] = 5;
|
||||
seg_distr[15] = 5;
|
||||
for (i = 0; i < NUMBER_REGIONS; i++)
|
||||
seg_distr[i] = 4;
|
||||
|
||||
} else {
|
||||
/* 10 segments
|
||||
* segment is from 2^-10 to 2^0
|
||||
*/
|
||||
segment_start = -10;
|
||||
segment_end = 0;
|
||||
region_start = -10;
|
||||
region_end = 0;
|
||||
|
||||
seg_distr[0] = 3;
|
||||
seg_distr[0] = 4;
|
||||
seg_distr[1] = 4;
|
||||
seg_distr[2] = 4;
|
||||
seg_distr[3] = 4;
|
||||
@ -472,8 +462,8 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
|
||||
seg_distr[5] = 4;
|
||||
seg_distr[6] = 4;
|
||||
seg_distr[7] = 4;
|
||||
seg_distr[8] = 5;
|
||||
seg_distr[9] = 5;
|
||||
seg_distr[8] = 4;
|
||||
seg_distr[9] = 4;
|
||||
seg_distr[10] = -1;
|
||||
seg_distr[11] = -1;
|
||||
seg_distr[12] = -1;
|
||||
@ -488,10 +478,12 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
|
||||
}
|
||||
|
||||
j = 0;
|
||||
for (k = 0; k < (segment_end - segment_start); k++) {
|
||||
for (k = 0; k < (region_end - region_start); k++) {
|
||||
increment = 32 / (1 << seg_distr[k]);
|
||||
start_index = (segment_start + k + 25) * 32;
|
||||
for (i = start_index; i < start_index + 32; i += increment) {
|
||||
start_index = (region_start + k + MAX_LOW_POINT) *
|
||||
NUMBER_SW_SEGMENTS;
|
||||
for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS;
|
||||
i += increment) {
|
||||
if (j == hw_points - 1)
|
||||
break;
|
||||
rgb_resulted[j].red = output_tf->tf_pts.red[i];
|
||||
@ -502,15 +494,15 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
|
||||
}
|
||||
|
||||
/* last point */
|
||||
start_index = (segment_end + 25) * 32;
|
||||
start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS;
|
||||
rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index];
|
||||
rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
|
||||
rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
|
||||
|
||||
arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
|
||||
dal_fixed31_32_from_int(segment_start));
|
||||
dal_fixed31_32_from_int(region_start));
|
||||
arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
|
||||
dal_fixed31_32_from_int(segment_end));
|
||||
dal_fixed31_32_from_int(region_end));
|
||||
|
||||
y_r = rgb_resulted[0].red;
|
||||
y_g = rgb_resulted[0].green;
|
||||
|
@ -232,10 +232,11 @@ bool cm_helper_convert_to_custom_float(
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* driver uses 32 regions or less, but DCN HW has 34, extra 2 are set to 0 */
|
||||
#define MAX_REGIONS_NUMBER 34
|
||||
#define MAX_LOW_POINT 25
|
||||
#define NUMBER_SEGMENTS 32
|
||||
#define NUMBER_REGIONS 32
|
||||
#define NUMBER_SW_SEGMENTS 16
|
||||
|
||||
bool cm_helper_translate_curve_to_hw_format(
|
||||
const struct dc_transfer_func *output_tf,
|
||||
@ -251,7 +252,7 @@ bool cm_helper_translate_curve_to_hw_format(
|
||||
struct fixed31_32 y1_min;
|
||||
struct fixed31_32 y3_max;
|
||||
|
||||
int32_t segment_start, segment_end;
|
||||
int32_t region_start, region_end;
|
||||
int32_t i;
|
||||
uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points;
|
||||
|
||||
@ -271,11 +272,11 @@ bool cm_helper_translate_curve_to_hw_format(
|
||||
/* 32 segments
|
||||
* segments are from 2^-25 to 2^7
|
||||
*/
|
||||
for (i = 0; i < 32 ; i++)
|
||||
for (i = 0; i < NUMBER_REGIONS ; i++)
|
||||
seg_distr[i] = 3;
|
||||
|
||||
segment_start = -25;
|
||||
segment_end = 7;
|
||||
region_start = -MAX_LOW_POINT;
|
||||
region_end = NUMBER_REGIONS - MAX_LOW_POINT;
|
||||
} else {
|
||||
/* 10 segments
|
||||
* segment is from 2^-10 to 2^0
|
||||
@ -289,14 +290,14 @@ bool cm_helper_translate_curve_to_hw_format(
|
||||
seg_distr[5] = 4;
|
||||
seg_distr[6] = 4;
|
||||
seg_distr[7] = 4;
|
||||
seg_distr[8] = 5;
|
||||
seg_distr[9] = 5;
|
||||
seg_distr[8] = 4;
|
||||
seg_distr[9] = 4;
|
||||
|
||||
segment_start = -10;
|
||||
segment_end = 0;
|
||||
region_start = -10;
|
||||
region_end = 0;
|
||||
}
|
||||
|
||||
for (i = segment_end - segment_start; i < MAX_REGIONS_NUMBER ; i++)
|
||||
for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++)
|
||||
seg_distr[i] = -1;
|
||||
|
||||
for (k = 0; k < MAX_REGIONS_NUMBER; k++) {
|
||||
@ -305,10 +306,12 @@ bool cm_helper_translate_curve_to_hw_format(
|
||||
}
|
||||
|
||||
j = 0;
|
||||
for (k = 0; k < (segment_end - segment_start); k++) {
|
||||
increment = NUMBER_SEGMENTS / (1 << seg_distr[k]);
|
||||
start_index = (segment_start + k + MAX_LOW_POINT) * NUMBER_SEGMENTS;
|
||||
for (i = start_index; i < start_index + NUMBER_SEGMENTS; i += increment) {
|
||||
for (k = 0; k < (region_end - region_start); k++) {
|
||||
increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]);
|
||||
start_index = (region_start + k + MAX_LOW_POINT) *
|
||||
NUMBER_SW_SEGMENTS;
|
||||
for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS;
|
||||
i += increment) {
|
||||
if (j == hw_points - 1)
|
||||
break;
|
||||
rgb_resulted[j].red = output_tf->tf_pts.red[i];
|
||||
@ -319,15 +322,15 @@ bool cm_helper_translate_curve_to_hw_format(
|
||||
}
|
||||
|
||||
/* last point */
|
||||
start_index = (segment_end + MAX_LOW_POINT) * NUMBER_SEGMENTS;
|
||||
start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS;
|
||||
rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index];
|
||||
rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
|
||||
rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
|
||||
|
||||
arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
|
||||
dal_fixed31_32_from_int(segment_start));
|
||||
dal_fixed31_32_from_int(region_start));
|
||||
arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
|
||||
dal_fixed31_32_from_int(segment_end));
|
||||
dal_fixed31_32_from_int(region_end));
|
||||
|
||||
y_r = rgb_resulted[0].red;
|
||||
y_g = rgb_resulted[0].green;
|
||||
|
@ -196,7 +196,7 @@ static void dpp1_cm_set_regamma_pwl(
|
||||
case OPP_REGAMMA_SRGB:
|
||||
re_mode = 1;
|
||||
break;
|
||||
case OPP_REGAMMA_3_6:
|
||||
case OPP_REGAMMA_XVYCC:
|
||||
re_mode = 2;
|
||||
break;
|
||||
case OPP_REGAMMA_USER:
|
||||
|
@ -136,7 +136,7 @@ struct out_csc_color_matrix {
|
||||
enum opp_regamma {
|
||||
OPP_REGAMMA_BYPASS = 0,
|
||||
OPP_REGAMMA_SRGB,
|
||||
OPP_REGAMMA_3_6,
|
||||
OPP_REGAMMA_XVYCC,
|
||||
OPP_REGAMMA_USER
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user