diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h index 9d44a598967cad6db06c0097f1f9f2378981b3de..00362695d53f5791d3312bf4d83ddabb7ac10739 100644 --- a/gcc/config/aarch64/aarch64-opts.h +++ b/gcc/config/aarch64/aarch64-opts.h @@ -90,6 +90,7 @@ enum aarch64_function_type { /* SVE vector register sizes. */ enum aarch64_sve_vector_bits_enum { SVE_SCALABLE, + SVE_NOT_IMPLEMENTED = SVE_SCALABLE, SVE_128 = 128, SVE_256 = 256, SVE_512 = 512, diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 1fe1a50d52aeb3719cf30c4a2af41abb8dd7233d..fa3c247f0773e1d4101b6209b6b7ba6cd50f82eb 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -252,6 +252,10 @@ struct tune_params const struct cpu_vector_cost *vec_costs; const struct cpu_branch_cost *branch_costs; const struct cpu_approx_modes *approx_modes; + /* Width of the SVE registers or SVE_NOT_IMPLEMENTED if not appicable. + Only used for tuning decisions, does not disable VLA + vectorization. */ + enum aarch64_sve_vector_bits_enum sve_width; int memmov_cost; int issue_rate; unsigned int fusible_ops; diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 2c267b936b0495a8c5b6593d259619dbe88ae7a8..7ccc6b78d5872d6b43491badbfa9f2d70580015c 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -681,6 +681,7 @@ static const struct tune_params generic_tunings = &generic_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost */ 2, /* issue_rate */ (AARCH64_FUSE_AES_AESMC), /* fusible_ops */ @@ -706,6 +707,7 @@ static const struct tune_params cortexa35_tunings = &generic_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost */ 1, /* issue_rate */ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD @@ -732,6 +734,7 @@ static const struct tune_params cortexa53_tunings = &generic_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost */ 2, /* issue_rate */ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD @@ -758,6 +761,7 @@ static const struct tune_params cortexa57_tunings = &cortexa57_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost */ 3, /* issue_rate */ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD @@ -784,6 +788,7 @@ static const struct tune_params cortexa72_tunings = &cortexa57_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost */ 3, /* issue_rate */ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD @@ -810,6 +815,7 @@ static const struct tune_params cortexa73_tunings = &cortexa57_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost. */ 2, /* issue_rate. */ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD @@ -838,6 +844,7 @@ static const struct tune_params exynosm1_tunings = &exynosm1_vector_cost, &generic_branch_cost, &exynosm1_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost */ 3, /* issue_rate */ (AARCH64_FUSE_AES_AESMC), /* fusible_ops */ @@ -863,6 +870,7 @@ static const struct tune_params thunderxt88_tunings = &thunderx_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 6, /* memmov_cost */ 2, /* issue_rate */ AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */ @@ -888,6 +896,7 @@ static const struct tune_params thunderx_tunings = &thunderx_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 6, /* memmov_cost */ 2, /* issue_rate */ AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */ @@ -914,6 +923,7 @@ static const struct tune_params tsv110_tunings = &tsv110_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost */ 4, /* issue_rate */ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH @@ -940,6 +950,7 @@ static const struct tune_params xgene1_tunings = &xgene1_vector_cost, &generic_branch_cost, &xgene1_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 6, /* memmov_cost */ 4, /* issue_rate */ AARCH64_FUSE_NOTHING, /* fusible_ops */ @@ -965,6 +976,7 @@ static const struct tune_params emag_tunings = &xgene1_vector_cost, &generic_branch_cost, &xgene1_approx_modes, + SVE_NOT_IMPLEMENTED, 6, /* memmov_cost */ 4, /* issue_rate */ AARCH64_FUSE_NOTHING, /* fusible_ops */ @@ -990,6 +1002,7 @@ static const struct tune_params qdf24xx_tunings = &qdf24xx_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost */ 4, /* issue_rate */ (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD @@ -1018,6 +1031,7 @@ static const struct tune_params saphira_tunings = &generic_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost */ 4, /* issue_rate */ (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD @@ -1044,6 +1058,7 @@ static const struct tune_params thunderx2t99_tunings = &thunderx2t99_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost. */ 4, /* issue_rate. */ (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC @@ -17862,6 +17877,25 @@ aarch64_speculation_safe_value (machine_mode mode, return result; } +/* Implement TARGET_ESTIMATED_POLY_VALUE. + Look into the tuning structure for an estimate. + VAL.coeffs[1] is multiplied by the number of VQ chunks over the initial + Advanced SIMD 128 bits. */ + +static HOST_WIDE_INT +aarch64_estimated_poly_value (poly_int64 val) +{ + enum aarch64_sve_vector_bits_enum width_source + = aarch64_tune_params.sve_width; + + /* If we still don't have an estimate, use the default. */ + if (width_source == SVE_SCALABLE) + return default_estimated_poly_value (val); + + HOST_WIDE_INT over_128 = width_source - 128; + return val.coeffs[0] + val.coeffs[1] * over_128 / 128; +} + /* Target-specific selftests. */ #if CHECKING_P @@ -18341,6 +18375,9 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_SPECULATION_SAFE_VALUE #define TARGET_SPECULATION_SAFE_VALUE aarch64_speculation_safe_value +#undef TARGET_ESTIMATED_POLY_VALUE +#define TARGET_ESTIMATED_POLY_VALUE aarch64_estimated_poly_value + #if CHECKING_P #undef TARGET_RUN_TARGET_SELFTESTS #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests