From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-bugzilla@gcc.gnu.org>
Received: by sourceware.org (Postfix, from userid 48)
 id 5D5B6385483E; Tue, 29 Jun 2021 19:06:32 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 5D5B6385483E
From: "johnnybit at gmail dot com" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug middle-end/101262] GCC11 OpenMP optimization causes sigsegv on
 aligned constant array in darktable
Date: Tue, 29 Jun 2021 19:06:32 +0000
X-Bugzilla-Reason: CC
X-Bugzilla-Type: changed
X-Bugzilla-Watch-Reason: None
X-Bugzilla-Product: gcc
X-Bugzilla-Component: middle-end
X-Bugzilla-Version: 11.0
X-Bugzilla-Keywords: 
X-Bugzilla-Severity: normal
X-Bugzilla-Who: johnnybit at gmail dot com
X-Bugzilla-Status: WAITING
X-Bugzilla-Resolution: 
X-Bugzilla-Priority: P3
X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org
X-Bugzilla-Target-Milestone: ---
X-Bugzilla-Flags: 
X-Bugzilla-Changed-Fields: 
Message-ID: <bug-101262-4-M4EiBZ3mN4@http.gcc.gnu.org/bugzilla/>
In-Reply-To: <bug-101262-4@http.gcc.gnu.org/bugzilla/>
References: <bug-101262-4@http.gcc.gnu.org/bugzilla/>
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/
Auto-Submitted: auto-generated
MIME-Version: 1.0
X-BeenThere: gcc-bugs@gcc.gnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Gcc-bugs mailing list <gcc-bugs.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-bugs>,
 <mailto:gcc-bugs-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-bugs/>
List-Post: <mailto:gcc-bugs@gcc.gnu.org>
List-Help: <mailto:gcc-bugs-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-bugs>,
 <mailto:gcc-bugs-request@gcc.gnu.org?subject=subscribe>
X-List-Received-Date: Tue, 29 Jun 2021 19:06:32 -0000

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D101262
--- Comment #2 from Hubert Kowalski <johnnybit at gmail dot com> ---
I've tried producing a minimum reproducer in form of code below, however I =
run
on gcc 10.3. And it depends on optimization type.

According to user reports - it's enough to compile darktable using GCC 11 w=
ith
RelWithDebInfo target (it applies -O2). Builds with Release target (-O3) are
apparently "fine"

(below code theoretically reproduces issue, but afaik it might not reliably
reproduce the problem)

#include <math.h>
#include <stdlib.h>
#include <omp.h>

#if defined(__GNUC__)
#pragma GCC optimize ("unroll-loops", "tree-loop-if-convert", \
                      "tree-loop-distribution", "no-strict-aliasing", \
                      "loop-interchange", "loop-nest-optimize", "tree-loop-=
im",
\
                      "unswitch-loops", "tree-loop-ivcanon",
"ira-loop-pressure", \
                      "split-ivs-in-unroller",
"variable-expansion-in-unroller", \
                      "split-loops", "ivopts", "predictive-commoning",\
                      "tree-loop-linear", "loop-block", "loop-strip-mine", \
                      "finite-math-only", "fp-contract=3Dfast", "fast-math")
#endif

#define dt_omp_firstprivate(...) firstprivate(__VA_ARGS__)
#define __DT_CLONE_TARGETS__ __attribute__((target_clones("default", "sse2",
"sse3", "sse4.1", "sse4.2", "popcnt", "avx", "avx2", "avx512f", "fma4")))
#define DT_ALIGNED_ARRAY __attribute__((aligned(64)))
#define PIXEL_CHAN 8
#define UI_SAMPLES 256

// radial distances used for pixel ops
static const float centers_ops[PIXEL_CHAN] DT_ALIGNED_ARRAY =3D {-56.0f / 7=
.0f,
// =3D -8.0f
                                                               -48.0f / 7.0=
f,
                                                               -40.0f / 7.0=
f,
                                                               -32.0f / 7.0=
f,
                                                               -24.0f / 7.0=
f,
                                                               -16.0f / 7.0=
f,
                                                                -8.0f / 7.0=
f,
                                                                 0.0f / 7.0=
f};

typedef struct dt_iop_toneequalizer_gui_data_t
{
  // Mem arrays 64-bits aligned - contiguous memory
  float factors[PIXEL_CHAN] DT_ALIGNED_ARRAY;
  float gui_lut[UI_SAMPLES] DT_ALIGNED_ARRAY; // LUT for the UI graph
  float sigma;
} dt_iop_toneequalizer_gui_data_t;

#pragma omp declare simd
__DT_CLONE_TARGETS__
static inline float fast_clamp(const float value, const float bottom, const
float top)
{
  // vectorizable clamping between bottom and top values
  return fmaxf(fminf(value, top), bottom);
}

#pragma omp declare simd
__DT_CLONE_TARGETS__
static float gaussian_denom(const float sigma)
{
  // Gaussian function denominator such that y =3D exp(- radius^2 / denomin=
ator)
  // this is the constant factor of the exponential, so we don't need to
recompute it
  // for every single pixel
  return 2.0f * sigma * sigma;
}

#pragma omp declare simd
__DT_CLONE_TARGETS__
static float gaussian_func(const float radius, const float denominator)
{
  // Gaussian function without normalization
  // this is the variable part of the exponential
  // the denominator should be evaluated with `gaussian_denom`
  // ahead of the array loop for optimal performance
  return expf(- radius * radius / denominator);
}

__DT_CLONE_TARGETS__
static inline float pixel_correction(const float exposure,
                                     const float *const restrict factors,
                                     const float sigma)
{
  // build the correction for the current pixel
  // as the sum of the contribution of each luminance channel
  float result =3D 0.0f;
  const float gauss_denom =3D gaussian_denom(sigma);
  const float expo =3D fast_clamp(exposure, -8.0f, 0.0f);

#pragma omp simd aligned(centers_ops, factors:64) safelen(PIXEL_CHAN)
reduction(+:result)
  for(int i =3D 0; i < PIXEL_CHAN; ++i)
    result +=3D gaussian_func(expo - centers_ops[i], gauss_denom) * factors=
[i];

  return fast_clamp(result, 0.25f, 4.0f);
}

__DT_CLONE_TARGETS__
static inline void compute_lut_correction(struct
dt_iop_toneequalizer_gui_data_t *g,
                                          const float offset,
                                          const float scaling)
{
  // Compute the LUT of the exposure corrections in EV,
  // offset and scale it for display in GUI widget graph

  float *const restrict LUT =3D g->gui_lut;
  const float *const restrict factors =3D g->factors;
  const float sigma =3D g->sigma;

#pragma omp parallel for simd schedule(static) default(none) \
  dt_omp_firstprivate(factors, sigma, offset, scaling, LUT) \
  aligned(LUT, factors:64)
  for(int k =3D 0; k < UI_SAMPLES; k++)
  {
    // build the inset graph curve LUT
    // the x range is [-14;+2] EV
    const float x =3D (8.0f * (((float)k) / ((float)(UI_SAMPLES - 1)))) - 8=
.0f;
    LUT[k] =3D offset - log2f(pixel_correction(x, factors, sigma)) / scalin=
g;
  }
}

int main() {
    dt_iop_toneequalizer_gui_data_t *g =3D calloc(1,
sizeof(dt_iop_toneequalizer_gui_data_t));
    compute_lut_correction(g, 0.5f, 4.0f);
}=