From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id 5D5B6385483E; Tue, 29 Jun 2021 19:06:32 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 5D5B6385483E From: "johnnybit at gmail dot com" To: gcc-bugs@gcc.gnu.org Subject: [Bug middle-end/101262] GCC11 OpenMP optimization causes sigsegv on aligned constant array in darktable Date: Tue, 29 Jun 2021 19:06:32 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: changed X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: middle-end X-Bugzilla-Version: 11.0 X-Bugzilla-Keywords: X-Bugzilla-Severity: normal X-Bugzilla-Who: johnnybit at gmail dot com X-Bugzilla-Status: WAITING X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: Message-ID: In-Reply-To: References: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 X-BeenThere: gcc-bugs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-bugs mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 29 Jun 2021 19:06:32 -0000 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D101262 --- Comment #2 from Hubert Kowalski --- I've tried producing a minimum reproducer in form of code below, however I = run on gcc 10.3. And it depends on optimization type. According to user reports - it's enough to compile darktable using GCC 11 w= ith RelWithDebInfo target (it applies -O2). Builds with Release target (-O3) are apparently "fine" (below code theoretically reproduces issue, but afaik it might not reliably reproduce the problem) #include #include #include #if defined(__GNUC__) #pragma GCC optimize ("unroll-loops", "tree-loop-if-convert", \ "tree-loop-distribution", "no-strict-aliasing", \ "loop-interchange", "loop-nest-optimize", "tree-loop-= im", \ "unswitch-loops", "tree-loop-ivcanon", "ira-loop-pressure", \ "split-ivs-in-unroller", "variable-expansion-in-unroller", \ "split-loops", "ivopts", "predictive-commoning",\ "tree-loop-linear", "loop-block", "loop-strip-mine", \ "finite-math-only", "fp-contract=3Dfast", "fast-math") #endif #define dt_omp_firstprivate(...) firstprivate(__VA_ARGS__) #define __DT_CLONE_TARGETS__ __attribute__((target_clones("default", "sse2", "sse3", "sse4.1", "sse4.2", "popcnt", "avx", "avx2", "avx512f", "fma4"))) #define DT_ALIGNED_ARRAY __attribute__((aligned(64))) #define PIXEL_CHAN 8 #define UI_SAMPLES 256 // radial distances used for pixel ops static const float centers_ops[PIXEL_CHAN] DT_ALIGNED_ARRAY =3D {-56.0f / 7= .0f, // =3D -8.0f -48.0f / 7.0= f, -40.0f / 7.0= f, -32.0f / 7.0= f, -24.0f / 7.0= f, -16.0f / 7.0= f, -8.0f / 7.0= f, 0.0f / 7.0= f}; typedef struct dt_iop_toneequalizer_gui_data_t { // Mem arrays 64-bits aligned - contiguous memory float factors[PIXEL_CHAN] DT_ALIGNED_ARRAY; float gui_lut[UI_SAMPLES] DT_ALIGNED_ARRAY; // LUT for the UI graph float sigma; } dt_iop_toneequalizer_gui_data_t; #pragma omp declare simd __DT_CLONE_TARGETS__ static inline float fast_clamp(const float value, const float bottom, const float top) { // vectorizable clamping between bottom and top values return fmaxf(fminf(value, top), bottom); } #pragma omp declare simd __DT_CLONE_TARGETS__ static float gaussian_denom(const float sigma) { // Gaussian function denominator such that y =3D exp(- radius^2 / denomin= ator) // this is the constant factor of the exponential, so we don't need to recompute it // for every single pixel return 2.0f * sigma * sigma; } #pragma omp declare simd __DT_CLONE_TARGETS__ static float gaussian_func(const float radius, const float denominator) { // Gaussian function without normalization // this is the variable part of the exponential // the denominator should be evaluated with `gaussian_denom` // ahead of the array loop for optimal performance return expf(- radius * radius / denominator); } __DT_CLONE_TARGETS__ static inline float pixel_correction(const float exposure, const float *const restrict factors, const float sigma) { // build the correction for the current pixel // as the sum of the contribution of each luminance channel float result =3D 0.0f; const float gauss_denom =3D gaussian_denom(sigma); const float expo =3D fast_clamp(exposure, -8.0f, 0.0f); #pragma omp simd aligned(centers_ops, factors:64) safelen(PIXEL_CHAN) reduction(+:result) for(int i =3D 0; i < PIXEL_CHAN; ++i) result +=3D gaussian_func(expo - centers_ops[i], gauss_denom) * factors= [i]; return fast_clamp(result, 0.25f, 4.0f); } __DT_CLONE_TARGETS__ static inline void compute_lut_correction(struct dt_iop_toneequalizer_gui_data_t *g, const float offset, const float scaling) { // Compute the LUT of the exposure corrections in EV, // offset and scale it for display in GUI widget graph float *const restrict LUT =3D g->gui_lut; const float *const restrict factors =3D g->factors; const float sigma =3D g->sigma; #pragma omp parallel for simd schedule(static) default(none) \ dt_omp_firstprivate(factors, sigma, offset, scaling, LUT) \ aligned(LUT, factors:64) for(int k =3D 0; k < UI_SAMPLES; k++) { // build the inset graph curve LUT // the x range is [-14;+2] EV const float x =3D (8.0f * (((float)k) / ((float)(UI_SAMPLES - 1)))) - 8= .0f; LUT[k] =3D offset - log2f(pixel_correction(x, factors, sigma)) / scalin= g; } } int main() { dt_iop_toneequalizer_gui_data_t *g =3D calloc(1, sizeof(dt_iop_toneequalizer_gui_data_t)); compute_lut_correction(g, 0.5f, 4.0f); }=