public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
From: "zhangjungcc at gmail dot com" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug middle-end/110015] openjpeg is slower when built with gcc13 compared to clang16
Date: Tue, 31 Oct 2023 12:08:23 +0000	[thread overview]
Message-ID: <bug-110015-4-Xtv0QSvBo2@http.gcc.gnu.org/bugzilla/> (raw)
In-Reply-To: <bug-110015-4@http.gcc.gnu.org/bugzilla/>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110015

jun zhang <zhangjungcc at gmail dot com> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |zhangjungcc at gmail dot com

--- Comment #2 from jun zhang <zhangjungcc at gmail dot com> ---
  The following loop couldn't vectorize in gcc, but could in llvm. it has 3%
improvement.
more info, please refer: https://godbolt.org/z/zMbjq41h5

#include<string.h>
typedef signed int  OPJ_INT32;
typedef unsigned int OPJ_UINT32;
typedef int OPJ_BOOL;
#define OPJ_TRUE 1
#define OPJ_FALSE 0
typedef char          OPJ_CHAR;
typedef float         OPJ_FLOAT32;
typedef double        OPJ_FLOAT64;
typedef unsigned char OPJ_BYTE;
#define T1_NMSEDEC_FRACBITS 6
#define OPJ_RESTRICT restrict
#define OPJ_TLS_KEY_T1  0
#include <stdio.h>
typedef size_t   OPJ_SIZE_T;

typedef struct opj_tcd_cblk_enc {
    OPJ_BYTE* data;               /* Data */
//    opj_tcd_layer_t* layers;      /* layer information */
//    opj_tcd_pass_t* passes;       /* information about the passes */
    OPJ_INT32 x0, y0, x1,
              y1;     /* dimension of the code-blocks : left upper corner (x0,
y0) right low corner (x1,y1) */
    OPJ_UINT32 numbps;
    OPJ_UINT32 numlenbits;
    OPJ_UINT32 data_size;         /* Size of allocated data buffer */
    OPJ_UINT32
    numpasses;         /* number of pass already done for the code-blocks */
    OPJ_UINT32 numpassesinlayers; /* number of passes in the layer */
    OPJ_UINT32 totalpasses;       /* total number of passes */
} opj_tcd_cblk_enc_t;
typedef struct opj_t1 {

    /** MQC component */
//    opj_mqc_t mqc;

    OPJ_INT32  *data;
    /** Flags used by decoder and encoder.
     * Such that flags[1+0] is for state of col=0,row=0..3,
       flags[1+1] for col=1, row=0..3, flags[1+flags_stride] for
col=0,row=4..7, ...
       This array avoids too much cache trashing when processing by 4 vertical
samples
       as done in the various decoding steps. */
//    opj_flag_t *flags;

    OPJ_UINT32 w;
    OPJ_UINT32 h;
    OPJ_UINT32 datasize;
    OPJ_UINT32 flagssize;
    OPJ_BOOL   encoder;

    /* Thre 3 variables below are only used by the decoder */
    /* set to TRUE in multithreaded context */
    OPJ_BOOL     mustuse_cblkdatabuffer;
    /* Temporary buffer to concatenate all chunks of a codebock */
    OPJ_BYTE    *cblkdatabuffer;
    /* Maximum size available in cblkdatabuffer */
    OPJ_UINT32   cblkdatabuffersize;
} opj_t1_t;

#define INLINE __inline__
static INLINE OPJ_INT32 opj_int_max(OPJ_INT32 a, OPJ_INT32 b)
{
    return (a > b) ? a : b;
}
#define opj_to_smr(x)   ((x) >= 0 ? (OPJ_UINT32)(x) : ((OPJ_UINT32)(-x) |
0x80000000U))
OPJ_FLOAT64 opj_t1_encode_cblk(opj_t1_t *t1,
                                      opj_tcd_cblk_enc_t* cblk,
                                      OPJ_UINT32 orient,
                                      OPJ_UINT32 compno,
                                      OPJ_UINT32 level,
                                      OPJ_UINT32 qmfbid,
                                      OPJ_FLOAT64 stepsize,
                                      OPJ_UINT32 cblksty,
                                      OPJ_UINT32 numcomps,
                                      const OPJ_FLOAT64 * mct_norms,
                                      OPJ_UINT32 mct_numcomps)
{
    OPJ_INT32 max;
    OPJ_UINT32 i, j;
    OPJ_INT32* datap;

    max = 0;
    datap = t1->data;
    for (j = 0; j < t1->h; ++j) {
        const OPJ_UINT32 w = t1->w;
        for (i = 0; i < w; ++i, ++datap) {
            OPJ_INT32 tmp = *datap;
            if (tmp < 0) {
                OPJ_UINT32 tmp_unsigned;
                max = opj_int_max(max, -tmp);
                tmp_unsigned = opj_to_smr(tmp);
                memcpy(datap, &tmp_unsigned, sizeof(OPJ_INT32));
            } else {
                max = opj_int_max(max, tmp);
            }
        }
    }
        cblk->numbps = max ? 6 : 0;
}

  parent reply	other threads:[~2023-10-31 12:08 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-28 19:15 [Bug middle-end/110015] New: " hubicka at gcc dot gnu.org
2023-05-28 19:42 ` [Bug middle-end/110015] " hubicka at gcc dot gnu.org
2023-10-31 12:08 ` zhangjungcc at gmail dot com [this message]
2023-11-01  1:15 ` crazylht at gmail dot com
2023-11-01  1:28 ` crazylht at gmail dot com
2023-11-07  1:12 ` pinskia at gcc dot gnu.org
2023-11-07  2:14 ` pinskia at gcc dot gnu.org
2023-11-07  2:14 ` pinskia at gcc dot gnu.org
2023-11-07  2:42 ` pinskia at gcc dot gnu.org
2023-11-07  2:50 ` pinskia at gcc dot gnu.org
2023-11-24 23:22 ` hubicka at gcc dot gnu.org

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-110015-4-Xtv0QSvBo2@http.gcc.gnu.org/bugzilla/ \
    --to=gcc-bugzilla@gcc.gnu.org \
    --cc=gcc-bugs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).