* [5/7] Add DR_BASE_ALIGNMENT and DR_BASE_MISALIGNMENT
@ 2017-07-03 7:43 Richard Sandiford
2017-07-03 10:42 ` Richard Biener
0 siblings, 1 reply; 2+ messages in thread
From: Richard Sandiford @ 2017-07-03 7:43 UTC (permalink / raw)
To: gcc-patches
This patch records the base alignment and misalignment in
innermost_loop_behavior, to avoid the second-guessing that was
previously done in vect_compute_data_ref_alignment. It also makes
vect_analyze_data_refs use dr_analyze_innermost, instead of having an
almost-copy of the same code.
I wasn't sure whether the alignments should be measured in bits
(for consistency with most other interfaces) or in bytes (for consistency
with DR_ALIGNED_TO, now DR_OFFSET_ALIGNMENT, and with *_ptr_info_alignment).
I went for bytes because:
- I think in practice most consumers are going to want bytes.
E.g. using bytes avoids having to mix TYPE_ALIGN and TYPE_ALIGN_UNIT
in vect_compute_data_ref_alignment.
- It means that any bit-level paranoia is dealt with when building
the innermost_loop_behavior and doesn't get pushed down to consumers.
Tested an aarch64-linux-gnu and x86_64-linux-gnu. OK to install?
Richard
2017-07-03 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
* tree-data-ref.h (innermost_loop_behavior): Add base_alignment
and base_misalignment fields.
(DR_BASE_ALIGNMENT, DR_BASE_MISALIGNMENT): New macros.
* tree-data-ref.c: Include builtins.h.
(dr_analyze_innermost): Set up the new innmost_loop_behavior fields.
* tree-vectorizer.h (STMT_VINFO_DR_BASE_ALIGNMENT): New macro.
(STMT_VINFO_DR_BASE_MISALIGNMENT): Likewise.
* tree-vect-data-refs.c: Include tree-cfg.h.
(vect_compute_data_ref_alignment): Use the new innermost_loop_behavior
fields instead of calculating an alignment here.
(vect_analyze_data_refs): Use dr_analyze_innermost. Dump the new
innermost_loop_behavior fields.
Index: gcc/tree-data-ref.h
===================================================================
--- gcc/tree-data-ref.h 2017-07-03 07:52:14.194782203 +0100
+++ gcc/tree-data-ref.h 2017-07-03 07:52:55.920272347 +0100
@@ -52,6 +52,42 @@ struct innermost_loop_behavior
tree init;
tree step;
+ /* BASE_ADDRESS is known to be misaligned by BASE_MISALIGNMENT bytes
+ from an alignment boundary of BASE_ALIGNMENT bytes. For example,
+ if we had:
+
+ struct S __attribute__((aligned(16))) { ... };
+
+ char *ptr;
+ ... *(struct S *) (ptr - 4) ...;
+
+ the information would be:
+
+ base_address: ptr
+ base_aligment: 16
+ base_misalignment: 4
+ init: -4
+
+ where init cancels the base misalignment. If instead we had a
+ reference to a particular field:
+
+ struct S __attribute__((aligned(16))) { ... int f; ... };
+
+ char *ptr;
+ ... ((struct S *) (ptr - 4))->f ...;
+
+ the information would be:
+
+ base_address: ptr
+ base_aligment: 16
+ base_misalignment: 4
+ init: -4 + offsetof (S, f)
+
+ where base_address + init might also be misaligned, and by a different
+ amount from base_address. */
+ unsigned int base_alignment;
+ unsigned int base_misalignment;
+
/* The largest power of two that divides OFFSET, capped to a suitably
high value if the offset is zero. This is a byte rather than a bit
quantity. */
@@ -147,6 +183,8 @@ #define DR_OFFSET(DR) (DR)-
#define DR_INIT(DR) (DR)->innermost.init
#define DR_STEP(DR) (DR)->innermost.step
#define DR_PTR_INFO(DR) (DR)->alias.ptr_info
+#define DR_BASE_ALIGNMENT(DR) (DR)->innermost.base_alignment
+#define DR_BASE_MISALIGNMENT(DR) (DR)->innermost.base_misalignment
#define DR_OFFSET_ALIGNMENT(DR) (DR)->innermost.offset_alignment
#define DR_STEP_ALIGNMENT(DR) (DR)->innermost.step_alignment
#define DR_INNERMOST(DR) (DR)->innermost
Index: gcc/tree-data-ref.c
===================================================================
--- gcc/tree-data-ref.c 2017-07-03 07:52:14.193782226 +0100
+++ gcc/tree-data-ref.c 2017-07-03 07:52:55.920272347 +0100
@@ -94,6 +94,7 @@ Software Foundation; either version 3, o
#include "dumpfile.h"
#include "tree-affine.h"
#include "params.h"
+#include "builtins.h"
static struct datadep_stats
{
@@ -802,11 +803,26 @@ dr_analyze_innermost (struct data_refere
return false;
}
+ /* Calculate the alignment and misalignment for the inner reference. */
+ unsigned int HOST_WIDE_INT base_misalignment;
+ unsigned int base_alignment;
+ get_object_alignment_1 (base, &base_alignment, &base_misalignment);
+
+ /* There are no bitfield references remaining in BASE, so the values
+ we got back must be whole bytes. */
+ gcc_assert (base_alignment % BITS_PER_UNIT == 0
+ && base_misalignment % BITS_PER_UNIT == 0);
+ base_alignment /= BITS_PER_UNIT;
+ base_misalignment /= BITS_PER_UNIT;
+
if (TREE_CODE (base) == MEM_REF)
{
if (!integer_zerop (TREE_OPERAND (base, 1)))
{
+ /* Subtract MOFF from the base and add it to POFFSET instead.
+ Adjust the misalignment to reflect the amount we subtracted. */
offset_int moff = mem_ref_offset (base);
+ base_misalignment -= moff.to_short_addr ();
tree mofft = wide_int_to_tree (sizetype, moff);
if (!poffset)
poffset = mofft;
@@ -855,20 +871,46 @@ dr_analyze_innermost (struct data_refere
}
init = ssize_int (pbitpos / BITS_PER_UNIT);
+
+ /* Subtract any constant component from the base and add it to INIT instead.
+ Adjust the misalignment to reflect the amount we subtracted. */
split_constant_offset (base_iv.base, &base_iv.base, &dinit);
- init = size_binop (PLUS_EXPR, init, dinit);
+ init = size_binop (PLUS_EXPR, init, dinit);
+ base_misalignment -= TREE_INT_CST_LOW (dinit);
+
split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
- init = size_binop (PLUS_EXPR, init, dinit);
+ init = size_binop (PLUS_EXPR, init, dinit);
step = size_binop (PLUS_EXPR,
fold_convert (ssizetype, base_iv.step),
fold_convert (ssizetype, offset_iv.step));
- drb->base_address = canonicalize_base_object_address (base_iv.base);
+ base = canonicalize_base_object_address (base_iv.base);
+
+ /* See if get_pointer_alignment can guarantee a higher alignment than
+ the one we calculated above. */
+ unsigned int HOST_WIDE_INT alt_misalignment;
+ unsigned int alt_alignment;
+ get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
+
+ /* As above, these values must be whole bytes. */
+ gcc_assert (alt_alignment % BITS_PER_UNIT == 0
+ && alt_misalignment % BITS_PER_UNIT == 0);
+ alt_alignment /= BITS_PER_UNIT;
+ alt_misalignment /= BITS_PER_UNIT;
+
+ if (base_alignment < alt_alignment)
+ {
+ base_alignment = alt_alignment;
+ base_misalignment = alt_misalignment;
+ }
+ drb->base_address = base;
drb->offset = fold_convert (ssizetype, offset_iv.base);
drb->init = init;
drb->step = step;
+ drb->base_alignment = base_alignment;
+ drb->base_misalignment = base_misalignment & (base_alignment - 1);
drb->offset_alignment = highest_pow2_factor (offset_iv.base);
drb->step_alignment = highest_pow2_factor (step);
@@ -1084,6 +1126,9 @@ create_data_ref (loop_p nest, loop_p loo
print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
fprintf (dump_file, "\n\tstep: ");
print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
+ fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
+ fprintf (dump_file, "\n\tbase misalignment: %d",
+ DR_BASE_MISALIGNMENT (dr));
fprintf (dump_file, "\n\toffset alignment: %d",
DR_OFFSET_ALIGNMENT (dr));
fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h 2017-07-03 07:52:14.196782157 +0100
+++ gcc/tree-vectorizer.h 2017-07-03 07:52:55.921272300 +0100
@@ -707,6 +707,9 @@ #define STMT_VINFO_DR_BASE_ADDRESS(S)
#define STMT_VINFO_DR_INIT(S) (S)->dr_wrt_vec_loop.init
#define STMT_VINFO_DR_OFFSET(S) (S)->dr_wrt_vec_loop.offset
#define STMT_VINFO_DR_STEP(S) (S)->dr_wrt_vec_loop.step
+#define STMT_VINFO_DR_BASE_ALIGNMENT(S) (S)->dr_wrt_vec_loop.base_alignment
+#define STMT_VINFO_DR_BASE_MISALIGNMENT(S) \
+ (S)->dr_wrt_vec_loop.base_misalignment
#define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \
(S)->dr_wrt_vec_loop.offset_alignment
#define STMT_VINFO_DR_STEP_ALIGNMENT(S) \
Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c 2017-07-03 07:52:14.194782203 +0100
+++ gcc/tree-vect-data-refs.c 2017-07-03 07:52:55.921272300 +0100
@@ -50,6 +50,7 @@ Software Foundation; either version 3, o
#include "expr.h"
#include "builtins.h"
#include "params.h"
+#include "tree-cfg.h"
/* Return true if load- or store-lanes optab OPTAB is implemented for
COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
@@ -667,8 +668,6 @@ vect_compute_data_ref_alignment (struct
struct loop *loop = NULL;
tree ref = DR_REF (dr);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- tree base;
- unsigned HOST_WIDE_INT alignment;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -728,48 +727,18 @@ vect_compute_data_ref_alignment (struct
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"step doesn't divide the vector-size.\n");
}
- tree base_addr = drb->base_address;
- /* To look at alignment of the base we have to preserve an inner MEM_REF
- as that carries alignment information of the actual access. */
- base = ref;
- while (handled_component_p (base))
- base = TREE_OPERAND (base, 0);
- unsigned int base_alignment = 0;
- unsigned HOST_WIDE_INT base_bitpos;
- get_object_alignment_1 (base, &base_alignment, &base_bitpos);
- /* As data-ref analysis strips the MEM_REF down to its base operand
- to form DR_BASE_ADDRESS and adds the offset to DR_INIT we have to
- adjust things to make base_alignment valid as the alignment of
- DR_BASE_ADDRESS. */
- if (TREE_CODE (base) == MEM_REF)
- {
- /* Note all this only works if DR_BASE_ADDRESS is the same as
- MEM_REF operand zero, otherwise DR/SCEV analysis might have factored
- in other offsets. We need to rework DR to compute the alingment
- of DR_BASE_ADDRESS as long as all information is still available. */
- if (operand_equal_p (TREE_OPERAND (base, 0), base_addr, 0))
- {
- base_bitpos -= mem_ref_offset (base).to_short_addr () * BITS_PER_UNIT;
- base_bitpos &= (base_alignment - 1);
- }
- else
- base_bitpos = BITS_PER_UNIT;
- }
- if (base_bitpos != 0)
- base_alignment = base_bitpos & -base_bitpos;
- /* Also look at the alignment of the base address DR analysis
- computed. */
- unsigned int base_addr_alignment = get_pointer_alignment (base_addr);
- if (base_addr_alignment > base_alignment)
- base_alignment = base_addr_alignment;
+ unsigned int base_alignment = drb->base_alignment;
+ unsigned int base_misalignment = drb->base_misalignment;
+ unsigned HOST_WIDE_INT vector_alignment = TYPE_ALIGN_UNIT (vectype);
+ unsigned HOST_WIDE_INT element_alignment
+ = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
- if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype)))
+ if (base_alignment >= element_alignment
+ && (base_misalignment & (element_alignment - 1)) == 0)
DR_VECT_AUX (dr)->base_element_aligned = true;
- alignment = TYPE_ALIGN_UNIT (vectype);
-
- if (drb->offset_alignment < alignment
+ if (drb->offset_alignment < vector_alignment
|| !step_preserves_misalignment_p
/* We need to know whether the step wrt the vectorized loop is
negative when computing the starting misalignment below. */
@@ -785,12 +754,13 @@ vect_compute_data_ref_alignment (struct
return true;
}
- if (base_alignment < TYPE_ALIGN (vectype))
+ if (base_alignment < vector_alignment)
{
- base = base_addr;
+ tree base = drb->base_address;
if (TREE_CODE (base) == ADDR_EXPR)
base = TREE_OPERAND (base, 0);
- if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
+ if (!vect_can_force_dr_alignment_p (base,
+ vector_alignment * BITS_PER_UNIT))
{
if (dump_enabled_p ())
{
@@ -828,24 +798,20 @@ vect_compute_data_ref_alignment (struct
DR_VECT_AUX (dr)->base_decl = base;
DR_VECT_AUX (dr)->base_misaligned = true;
DR_VECT_AUX (dr)->base_element_aligned = true;
+ base_misalignment = 0;
}
+ unsigned int misalignment = (base_misalignment
+ + TREE_INT_CST_LOW (drb->init));
/* If this is a backward running DR then first access in the larger
vectype actually is N-1 elements before the address in the DR.
Adjust misalign accordingly. */
- tree misalign = drb->init;
if (tree_int_cst_sgn (drb->step) < 0)
- {
- tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
- /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
- otherwise we wouldn't be here. */
- offset = fold_build2 (MULT_EXPR, ssizetype, offset, drb->step);
- /* PLUS because STEP was negative. */
- misalign = size_binop (PLUS_EXPR, misalign, offset);
- }
+ /* PLUS because STEP is negative. */
+ misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
+ * TREE_INT_CST_LOW (drb->step));
- SET_DR_MISALIGNMENT (dr,
- wi::mod_floor (misalign, alignment, SIGNED).to_uhwi ());
+ SET_DR_MISALIGNMENT (dr, misalignment & (vector_alignment - 1));
if (dump_enabled_p ())
{
@@ -3554,100 +3520,27 @@ vect_analyze_data_refs (vec_info *vinfo,
the outer-loop. */
if (loop && nested_in_vect_loop_p (loop, stmt))
{
- tree outer_step, outer_base, outer_init;
- HOST_WIDE_INT pbitsize, pbitpos;
- tree poffset;
- machine_mode pmode;
- int punsignedp, preversep, pvolatilep;
- affine_iv base_iv, offset_iv;
- tree dinit;
-
/* Build a reference to the first location accessed by the
- inner-loop: *(BASE+INIT). (The first location is actually
- BASE+INIT+OFFSET, but we add OFFSET separately later). */
- tree inner_base = build_fold_indirect_ref
- (fold_build_pointer_plus (base, init));
+ inner loop: *(BASE + INIT + OFFSET). By construction,
+ this address must be invariant in the inner loop, so we
+ can consider it as being used in the outer loop. */
+ tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
+ init, offset);
+ tree init_addr = fold_build_pointer_plus (base, init_offset);
+ tree init_ref = build_fold_indirect_ref (init_addr);
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
- "analyze in outer-loop: ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
+ "analyze in outer loop: ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, init_ref);
dump_printf (MSG_NOTE, "\n");
}
- outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
- &poffset, &pmode, &punsignedp,
- &preversep, &pvolatilep);
- gcc_assert (outer_base != NULL_TREE);
-
- if (pbitpos % BITS_PER_UNIT != 0)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "failed: bit offset alignment.\n");
- return false;
- }
-
- if (preversep)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "failed: reverse storage order.\n");
- return false;
- }
-
- outer_base = build_fold_addr_expr (outer_base);
- if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
- &base_iv, false))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "failed: evolution of base is not affine.\n");
- return false;
- }
-
- if (offset)
- {
- if (poffset)
- poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
- poffset);
- else
- poffset = offset;
- }
-
- if (!poffset)
- {
- offset_iv.base = ssize_int (0);
- offset_iv.step = ssize_int (0);
- }
- else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
- &offset_iv, false))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "evolution of offset is not affine.\n");
- return false;
- }
-
- outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
- split_constant_offset (base_iv.base, &base_iv.base, &dinit);
- outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
- split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
- outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
-
- outer_step = size_binop (PLUS_EXPR,
- fold_convert (ssizetype, base_iv.step),
- fold_convert (ssizetype, offset_iv.step));
-
- STMT_VINFO_DR_STEP (stmt_info) = outer_step;
- /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
- STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
- STMT_VINFO_DR_INIT (stmt_info) = outer_init;
- STMT_VINFO_DR_OFFSET (stmt_info) =
- fold_convert (ssizetype, offset_iv.base);
- STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info)
- = highest_pow2_factor (offset_iv.base);
+ if (!dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
+ init_ref, loop))
+ /* dr_analyze_innermost already explained the failure. */
+ return false;
if (dump_enabled_p ())
{
@@ -3665,6 +3558,10 @@ vect_analyze_data_refs (vec_info *vinfo,
dump_printf (MSG_NOTE, "\n\touter step: ");
dump_generic_expr (MSG_NOTE, TDF_SLIM,
STMT_VINFO_DR_STEP (stmt_info));
+ dump_printf (MSG_NOTE, "\n\touter base alignment: %d\n",
+ STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info));
+ dump_printf (MSG_NOTE, "\n\touter base misalignment: %d\n",
+ STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info));
dump_printf (MSG_NOTE, "\n\touter offset alignment: %d\n",
STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info));
dump_printf (MSG_NOTE, "\n\touter step alignment: %d\n",
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [5/7] Add DR_BASE_ALIGNMENT and DR_BASE_MISALIGNMENT
2017-07-03 7:43 [5/7] Add DR_BASE_ALIGNMENT and DR_BASE_MISALIGNMENT Richard Sandiford
@ 2017-07-03 10:42 ` Richard Biener
0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2017-07-03 10:42 UTC (permalink / raw)
To: GCC Patches, Richard Sandiford
On Mon, Jul 3, 2017 at 9:38 AM, Richard Sandiford
<richard.sandiford@linaro.org> wrote:
> This patch records the base alignment and misalignment in
> innermost_loop_behavior, to avoid the second-guessing that was
> previously done in vect_compute_data_ref_alignment. It also makes
> vect_analyze_data_refs use dr_analyze_innermost, instead of having an
> almost-copy of the same code.
>
> I wasn't sure whether the alignments should be measured in bits
> (for consistency with most other interfaces) or in bytes (for consistency
> with DR_ALIGNED_TO, now DR_OFFSET_ALIGNMENT, and with *_ptr_info_alignment).
> I went for bytes because:
>
> - I think in practice most consumers are going to want bytes.
> E.g. using bytes avoids having to mix TYPE_ALIGN and TYPE_ALIGN_UNIT
> in vect_compute_data_ref_alignment.
>
> - It means that any bit-level paranoia is dealt with when building
> the innermost_loop_behavior and doesn't get pushed down to consumers.
>
> Tested an aarch64-linux-gnu and x86_64-linux-gnu. OK to install?
Ok.
Thanks,
Richard.
> Richard
>
>
> 2017-07-03 Richard Sandiford <richard.sandiford@linaro.org>
>
> gcc/
> * tree-data-ref.h (innermost_loop_behavior): Add base_alignment
> and base_misalignment fields.
> (DR_BASE_ALIGNMENT, DR_BASE_MISALIGNMENT): New macros.
> * tree-data-ref.c: Include builtins.h.
> (dr_analyze_innermost): Set up the new innmost_loop_behavior fields.
> * tree-vectorizer.h (STMT_VINFO_DR_BASE_ALIGNMENT): New macro.
> (STMT_VINFO_DR_BASE_MISALIGNMENT): Likewise.
> * tree-vect-data-refs.c: Include tree-cfg.h.
> (vect_compute_data_ref_alignment): Use the new innermost_loop_behavior
> fields instead of calculating an alignment here.
> (vect_analyze_data_refs): Use dr_analyze_innermost. Dump the new
> innermost_loop_behavior fields.
>
> Index: gcc/tree-data-ref.h
> ===================================================================
> --- gcc/tree-data-ref.h 2017-07-03 07:52:14.194782203 +0100
> +++ gcc/tree-data-ref.h 2017-07-03 07:52:55.920272347 +0100
> @@ -52,6 +52,42 @@ struct innermost_loop_behavior
> tree init;
> tree step;
>
> + /* BASE_ADDRESS is known to be misaligned by BASE_MISALIGNMENT bytes
> + from an alignment boundary of BASE_ALIGNMENT bytes. For example,
> + if we had:
> +
> + struct S __attribute__((aligned(16))) { ... };
> +
> + char *ptr;
> + ... *(struct S *) (ptr - 4) ...;
> +
> + the information would be:
> +
> + base_address: ptr
> + base_aligment: 16
> + base_misalignment: 4
> + init: -4
> +
> + where init cancels the base misalignment. If instead we had a
> + reference to a particular field:
> +
> + struct S __attribute__((aligned(16))) { ... int f; ... };
> +
> + char *ptr;
> + ... ((struct S *) (ptr - 4))->f ...;
> +
> + the information would be:
> +
> + base_address: ptr
> + base_aligment: 16
> + base_misalignment: 4
> + init: -4 + offsetof (S, f)
> +
> + where base_address + init might also be misaligned, and by a different
> + amount from base_address. */
> + unsigned int base_alignment;
> + unsigned int base_misalignment;
> +
> /* The largest power of two that divides OFFSET, capped to a suitably
> high value if the offset is zero. This is a byte rather than a bit
> quantity. */
> @@ -147,6 +183,8 @@ #define DR_OFFSET(DR) (DR)-
> #define DR_INIT(DR) (DR)->innermost.init
> #define DR_STEP(DR) (DR)->innermost.step
> #define DR_PTR_INFO(DR) (DR)->alias.ptr_info
> +#define DR_BASE_ALIGNMENT(DR) (DR)->innermost.base_alignment
> +#define DR_BASE_MISALIGNMENT(DR) (DR)->innermost.base_misalignment
> #define DR_OFFSET_ALIGNMENT(DR) (DR)->innermost.offset_alignment
> #define DR_STEP_ALIGNMENT(DR) (DR)->innermost.step_alignment
> #define DR_INNERMOST(DR) (DR)->innermost
> Index: gcc/tree-data-ref.c
> ===================================================================
> --- gcc/tree-data-ref.c 2017-07-03 07:52:14.193782226 +0100
> +++ gcc/tree-data-ref.c 2017-07-03 07:52:55.920272347 +0100
> @@ -94,6 +94,7 @@ Software Foundation; either version 3, o
> #include "dumpfile.h"
> #include "tree-affine.h"
> #include "params.h"
> +#include "builtins.h"
>
> static struct datadep_stats
> {
> @@ -802,11 +803,26 @@ dr_analyze_innermost (struct data_refere
> return false;
> }
>
> + /* Calculate the alignment and misalignment for the inner reference. */
> + unsigned int HOST_WIDE_INT base_misalignment;
> + unsigned int base_alignment;
> + get_object_alignment_1 (base, &base_alignment, &base_misalignment);
> +
> + /* There are no bitfield references remaining in BASE, so the values
> + we got back must be whole bytes. */
> + gcc_assert (base_alignment % BITS_PER_UNIT == 0
> + && base_misalignment % BITS_PER_UNIT == 0);
> + base_alignment /= BITS_PER_UNIT;
> + base_misalignment /= BITS_PER_UNIT;
> +
> if (TREE_CODE (base) == MEM_REF)
> {
> if (!integer_zerop (TREE_OPERAND (base, 1)))
> {
> + /* Subtract MOFF from the base and add it to POFFSET instead.
> + Adjust the misalignment to reflect the amount we subtracted. */
> offset_int moff = mem_ref_offset (base);
> + base_misalignment -= moff.to_short_addr ();
> tree mofft = wide_int_to_tree (sizetype, moff);
> if (!poffset)
> poffset = mofft;
> @@ -855,20 +871,46 @@ dr_analyze_innermost (struct data_refere
> }
>
> init = ssize_int (pbitpos / BITS_PER_UNIT);
> +
> + /* Subtract any constant component from the base and add it to INIT instead.
> + Adjust the misalignment to reflect the amount we subtracted. */
> split_constant_offset (base_iv.base, &base_iv.base, &dinit);
> - init = size_binop (PLUS_EXPR, init, dinit);
> + init = size_binop (PLUS_EXPR, init, dinit);
> + base_misalignment -= TREE_INT_CST_LOW (dinit);
> +
> split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
> - init = size_binop (PLUS_EXPR, init, dinit);
> + init = size_binop (PLUS_EXPR, init, dinit);
>
> step = size_binop (PLUS_EXPR,
> fold_convert (ssizetype, base_iv.step),
> fold_convert (ssizetype, offset_iv.step));
>
> - drb->base_address = canonicalize_base_object_address (base_iv.base);
> + base = canonicalize_base_object_address (base_iv.base);
> +
> + /* See if get_pointer_alignment can guarantee a higher alignment than
> + the one we calculated above. */
> + unsigned int HOST_WIDE_INT alt_misalignment;
> + unsigned int alt_alignment;
> + get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
> +
> + /* As above, these values must be whole bytes. */
> + gcc_assert (alt_alignment % BITS_PER_UNIT == 0
> + && alt_misalignment % BITS_PER_UNIT == 0);
> + alt_alignment /= BITS_PER_UNIT;
> + alt_misalignment /= BITS_PER_UNIT;
> +
> + if (base_alignment < alt_alignment)
> + {
> + base_alignment = alt_alignment;
> + base_misalignment = alt_misalignment;
> + }
>
> + drb->base_address = base;
> drb->offset = fold_convert (ssizetype, offset_iv.base);
> drb->init = init;
> drb->step = step;
> + drb->base_alignment = base_alignment;
> + drb->base_misalignment = base_misalignment & (base_alignment - 1);
> drb->offset_alignment = highest_pow2_factor (offset_iv.base);
> drb->step_alignment = highest_pow2_factor (step);
>
> @@ -1084,6 +1126,9 @@ create_data_ref (loop_p nest, loop_p loo
> print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
> fprintf (dump_file, "\n\tstep: ");
> print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
> + fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
> + fprintf (dump_file, "\n\tbase misalignment: %d",
> + DR_BASE_MISALIGNMENT (dr));
> fprintf (dump_file, "\n\toffset alignment: %d",
> DR_OFFSET_ALIGNMENT (dr));
> fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
> Index: gcc/tree-vectorizer.h
> ===================================================================
> --- gcc/tree-vectorizer.h 2017-07-03 07:52:14.196782157 +0100
> +++ gcc/tree-vectorizer.h 2017-07-03 07:52:55.921272300 +0100
> @@ -707,6 +707,9 @@ #define STMT_VINFO_DR_BASE_ADDRESS(S)
> #define STMT_VINFO_DR_INIT(S) (S)->dr_wrt_vec_loop.init
> #define STMT_VINFO_DR_OFFSET(S) (S)->dr_wrt_vec_loop.offset
> #define STMT_VINFO_DR_STEP(S) (S)->dr_wrt_vec_loop.step
> +#define STMT_VINFO_DR_BASE_ALIGNMENT(S) (S)->dr_wrt_vec_loop.base_alignment
> +#define STMT_VINFO_DR_BASE_MISALIGNMENT(S) \
> + (S)->dr_wrt_vec_loop.base_misalignment
> #define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \
> (S)->dr_wrt_vec_loop.offset_alignment
> #define STMT_VINFO_DR_STEP_ALIGNMENT(S) \
> Index: gcc/tree-vect-data-refs.c
> ===================================================================
> --- gcc/tree-vect-data-refs.c 2017-07-03 07:52:14.194782203 +0100
> +++ gcc/tree-vect-data-refs.c 2017-07-03 07:52:55.921272300 +0100
> @@ -50,6 +50,7 @@ Software Foundation; either version 3, o
> #include "expr.h"
> #include "builtins.h"
> #include "params.h"
> +#include "tree-cfg.h"
>
> /* Return true if load- or store-lanes optab OPTAB is implemented for
> COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
> @@ -667,8 +668,6 @@ vect_compute_data_ref_alignment (struct
> struct loop *loop = NULL;
> tree ref = DR_REF (dr);
> tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> - tree base;
> - unsigned HOST_WIDE_INT alignment;
>
> if (dump_enabled_p ())
> dump_printf_loc (MSG_NOTE, vect_location,
> @@ -728,48 +727,18 @@ vect_compute_data_ref_alignment (struct
> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> "step doesn't divide the vector-size.\n");
> }
> - tree base_addr = drb->base_address;
>
> - /* To look at alignment of the base we have to preserve an inner MEM_REF
> - as that carries alignment information of the actual access. */
> - base = ref;
> - while (handled_component_p (base))
> - base = TREE_OPERAND (base, 0);
> - unsigned int base_alignment = 0;
> - unsigned HOST_WIDE_INT base_bitpos;
> - get_object_alignment_1 (base, &base_alignment, &base_bitpos);
> - /* As data-ref analysis strips the MEM_REF down to its base operand
> - to form DR_BASE_ADDRESS and adds the offset to DR_INIT we have to
> - adjust things to make base_alignment valid as the alignment of
> - DR_BASE_ADDRESS. */
> - if (TREE_CODE (base) == MEM_REF)
> - {
> - /* Note all this only works if DR_BASE_ADDRESS is the same as
> - MEM_REF operand zero, otherwise DR/SCEV analysis might have factored
> - in other offsets. We need to rework DR to compute the alingment
> - of DR_BASE_ADDRESS as long as all information is still available. */
> - if (operand_equal_p (TREE_OPERAND (base, 0), base_addr, 0))
> - {
> - base_bitpos -= mem_ref_offset (base).to_short_addr () * BITS_PER_UNIT;
> - base_bitpos &= (base_alignment - 1);
> - }
> - else
> - base_bitpos = BITS_PER_UNIT;
> - }
> - if (base_bitpos != 0)
> - base_alignment = base_bitpos & -base_bitpos;
> - /* Also look at the alignment of the base address DR analysis
> - computed. */
> - unsigned int base_addr_alignment = get_pointer_alignment (base_addr);
> - if (base_addr_alignment > base_alignment)
> - base_alignment = base_addr_alignment;
> + unsigned int base_alignment = drb->base_alignment;
> + unsigned int base_misalignment = drb->base_misalignment;
> + unsigned HOST_WIDE_INT vector_alignment = TYPE_ALIGN_UNIT (vectype);
> + unsigned HOST_WIDE_INT element_alignment
> + = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
>
> - if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype)))
> + if (base_alignment >= element_alignment
> + && (base_misalignment & (element_alignment - 1)) == 0)
> DR_VECT_AUX (dr)->base_element_aligned = true;
>
> - alignment = TYPE_ALIGN_UNIT (vectype);
> -
> - if (drb->offset_alignment < alignment
> + if (drb->offset_alignment < vector_alignment
> || !step_preserves_misalignment_p
> /* We need to know whether the step wrt the vectorized loop is
> negative when computing the starting misalignment below. */
> @@ -785,12 +754,13 @@ vect_compute_data_ref_alignment (struct
> return true;
> }
>
> - if (base_alignment < TYPE_ALIGN (vectype))
> + if (base_alignment < vector_alignment)
> {
> - base = base_addr;
> + tree base = drb->base_address;
> if (TREE_CODE (base) == ADDR_EXPR)
> base = TREE_OPERAND (base, 0);
> - if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
> + if (!vect_can_force_dr_alignment_p (base,
> + vector_alignment * BITS_PER_UNIT))
> {
> if (dump_enabled_p ())
> {
> @@ -828,24 +798,20 @@ vect_compute_data_ref_alignment (struct
> DR_VECT_AUX (dr)->base_decl = base;
> DR_VECT_AUX (dr)->base_misaligned = true;
> DR_VECT_AUX (dr)->base_element_aligned = true;
> + base_misalignment = 0;
> }
> + unsigned int misalignment = (base_misalignment
> + + TREE_INT_CST_LOW (drb->init));
>
> /* If this is a backward running DR then first access in the larger
> vectype actually is N-1 elements before the address in the DR.
> Adjust misalign accordingly. */
> - tree misalign = drb->init;
> if (tree_int_cst_sgn (drb->step) < 0)
> - {
> - tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
> - /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
> - otherwise we wouldn't be here. */
> - offset = fold_build2 (MULT_EXPR, ssizetype, offset, drb->step);
> - /* PLUS because STEP was negative. */
> - misalign = size_binop (PLUS_EXPR, misalign, offset);
> - }
> + /* PLUS because STEP is negative. */
> + misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
> + * TREE_INT_CST_LOW (drb->step));
>
> - SET_DR_MISALIGNMENT (dr,
> - wi::mod_floor (misalign, alignment, SIGNED).to_uhwi ());
> + SET_DR_MISALIGNMENT (dr, misalignment & (vector_alignment - 1));
>
> if (dump_enabled_p ())
> {
> @@ -3554,100 +3520,27 @@ vect_analyze_data_refs (vec_info *vinfo,
> the outer-loop. */
> if (loop && nested_in_vect_loop_p (loop, stmt))
> {
> - tree outer_step, outer_base, outer_init;
> - HOST_WIDE_INT pbitsize, pbitpos;
> - tree poffset;
> - machine_mode pmode;
> - int punsignedp, preversep, pvolatilep;
> - affine_iv base_iv, offset_iv;
> - tree dinit;
> -
> /* Build a reference to the first location accessed by the
> - inner-loop: *(BASE+INIT). (The first location is actually
> - BASE+INIT+OFFSET, but we add OFFSET separately later). */
> - tree inner_base = build_fold_indirect_ref
> - (fold_build_pointer_plus (base, init));
> + inner loop: *(BASE + INIT + OFFSET). By construction,
> + this address must be invariant in the inner loop, so we
> + can consider it as being used in the outer loop. */
> + tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
> + init, offset);
> + tree init_addr = fold_build_pointer_plus (base, init_offset);
> + tree init_ref = build_fold_indirect_ref (init_addr);
>
> if (dump_enabled_p ())
> {
> dump_printf_loc (MSG_NOTE, vect_location,
> - "analyze in outer-loop: ");
> - dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
> + "analyze in outer loop: ");
> + dump_generic_expr (MSG_NOTE, TDF_SLIM, init_ref);
> dump_printf (MSG_NOTE, "\n");
> }
>
> - outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
> - &poffset, &pmode, &punsignedp,
> - &preversep, &pvolatilep);
> - gcc_assert (outer_base != NULL_TREE);
> -
> - if (pbitpos % BITS_PER_UNIT != 0)
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "failed: bit offset alignment.\n");
> - return false;
> - }
> -
> - if (preversep)
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "failed: reverse storage order.\n");
> - return false;
> - }
> -
> - outer_base = build_fold_addr_expr (outer_base);
> - if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
> - &base_iv, false))
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "failed: evolution of base is not affine.\n");
> - return false;
> - }
> -
> - if (offset)
> - {
> - if (poffset)
> - poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
> - poffset);
> - else
> - poffset = offset;
> - }
> -
> - if (!poffset)
> - {
> - offset_iv.base = ssize_int (0);
> - offset_iv.step = ssize_int (0);
> - }
> - else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
> - &offset_iv, false))
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "evolution of offset is not affine.\n");
> - return false;
> - }
> -
> - outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
> - split_constant_offset (base_iv.base, &base_iv.base, &dinit);
> - outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
> - split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
> - outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
> -
> - outer_step = size_binop (PLUS_EXPR,
> - fold_convert (ssizetype, base_iv.step),
> - fold_convert (ssizetype, offset_iv.step));
> -
> - STMT_VINFO_DR_STEP (stmt_info) = outer_step;
> - /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
> - STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
> - STMT_VINFO_DR_INIT (stmt_info) = outer_init;
> - STMT_VINFO_DR_OFFSET (stmt_info) =
> - fold_convert (ssizetype, offset_iv.base);
> - STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info)
> - = highest_pow2_factor (offset_iv.base);
> + if (!dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
> + init_ref, loop))
> + /* dr_analyze_innermost already explained the failure. */
> + return false;
>
> if (dump_enabled_p ())
> {
> @@ -3665,6 +3558,10 @@ vect_analyze_data_refs (vec_info *vinfo,
> dump_printf (MSG_NOTE, "\n\touter step: ");
> dump_generic_expr (MSG_NOTE, TDF_SLIM,
> STMT_VINFO_DR_STEP (stmt_info));
> + dump_printf (MSG_NOTE, "\n\touter base alignment: %d\n",
> + STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info));
> + dump_printf (MSG_NOTE, "\n\touter base misalignment: %d\n",
> + STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info));
> dump_printf (MSG_NOTE, "\n\touter offset alignment: %d\n",
> STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info));
> dump_printf (MSG_NOTE, "\n\touter step alignment: %d\n",
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2017-07-03 10:42 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-03 7:43 [5/7] Add DR_BASE_ALIGNMENT and DR_BASE_MISALIGNMENT Richard Sandiford
2017-07-03 10:42 ` Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).