* [PATCH] [BRIGFE] Reduce the number of type conversions due to the untyped HSAIL regs
@ 2017-11-16 17:41 Pekka Jääskeläinen
2017-11-17 12:41 ` Rainer Orth
0 siblings, 1 reply; 3+ messages in thread
From: Pekka Jääskeläinen @ 2017-11-16 17:41 UTC (permalink / raw)
To: GCC Patches, Martin Jambor, Henry Linjamäki
[-- Attachment #1: Type: text/plain, Size: 339 bytes --]
Instead of always representing the HSAIL's untyped registers as
unsigned int, the gccbrig now pre-analyzes the BRIG code and
builds the register variables as a type used the most when storing
or reading data to/from each register. This reduces the total
conversions which cannot be always optimized away.
Committed as r254837.
BR,
Pekka
[-- Attachment #2: 254837.patch --]
[-- Type: text/x-patch, Size: 49252 bytes --]
Index: gcc/brig/brigfrontend/brig-util.cc
===================================================================
--- gcc/brig/brigfrontend/brig-util.cc (revision 254836)
+++ gcc/brig/brigfrontend/brig-util.cc (revision 254837)
@@ -26,6 +26,7 @@
#include "brig-util.h"
#include "errors.h"
#include "diagnostic-core.h"
+#include "print-tree.h"
bool
group_variable_offset_index::has_variable (const std::string &name) const
@@ -473,3 +474,91 @@
/* Drop const qualifiers. */
return tree_type;
}
+
+/* Calculates numeric identifier for the HSA register REG.
+
+ Returned value is bound to [0, BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT]. */
+
+size_t
+gccbrig_hsa_reg_id (const BrigOperandRegister ®)
+{
+ size_t offset = reg.regNum;
+ switch (reg.regKind)
+ {
+ case BRIG_REGISTER_KIND_QUAD:
+ offset
+ += BRIG_2_TREE_HSAIL_D_REG_COUNT + BRIG_2_TREE_HSAIL_S_REG_COUNT
+ + BRIG_2_TREE_HSAIL_C_REG_COUNT;
+ break;
+ case BRIG_REGISTER_KIND_DOUBLE:
+ offset += BRIG_2_TREE_HSAIL_S_REG_COUNT + BRIG_2_TREE_HSAIL_C_REG_COUNT;
+ break;
+ case BRIG_REGISTER_KIND_SINGLE:
+ offset += BRIG_2_TREE_HSAIL_C_REG_COUNT;
+ case BRIG_REGISTER_KIND_CONTROL:
+ break;
+ default:
+ gcc_unreachable ();
+ break;
+ }
+ return offset;
+}
+
+std::string
+gccbrig_hsa_reg_name_from_id (size_t reg_hash)
+{
+ char reg_name[32];
+ if (reg_hash < BRIG_2_TREE_HSAIL_C_REG_COUNT)
+ {
+ sprintf (reg_name, "$c%lu", reg_hash);
+ return reg_name;
+ }
+
+ reg_hash -= BRIG_2_TREE_HSAIL_C_REG_COUNT;
+ if (reg_hash < BRIG_2_TREE_HSAIL_S_REG_COUNT)
+ {
+ sprintf (reg_name, "$s%lu", reg_hash);
+ return reg_name;
+ }
+
+ reg_hash -= BRIG_2_TREE_HSAIL_S_REG_COUNT;
+ if (reg_hash < BRIG_2_TREE_HSAIL_D_REG_COUNT)
+ {
+ sprintf (reg_name, "$d%lu", reg_hash);
+ return reg_name;
+ }
+
+ reg_hash -= BRIG_2_TREE_HSAIL_D_REG_COUNT;
+ if (reg_hash < BRIG_2_TREE_HSAIL_Q_REG_COUNT)
+ {
+ sprintf (reg_name, "$q%lu", reg_hash);
+ return reg_name;
+ }
+
+ gcc_unreachable ();
+ return "$??";
+}
+
+/* Prints statistics of register usage to stdout. */
+
+void
+gccbrig_print_reg_use_info (FILE *dump, const regs_use_index &info)
+{
+ regs_use_index::const_iterator begin_it = info.begin ();
+ regs_use_index::const_iterator end_it = info.end ();
+ for (regs_use_index::const_iterator it = begin_it; it != end_it; it++)
+ {
+ std::string hsa_reg = gccbrig_hsa_reg_name_from_id (it->first);
+ printf ("%s:\n", hsa_reg.c_str ());
+ const reg_use_info &info = it->second;
+ typedef std::vector<std::pair<tree, size_t> >::const_iterator reg_use_it;
+ reg_use_it begin_it2 = info.m_type_refs.begin ();
+ reg_use_it end_it2 = info.m_type_refs.end ();
+ for (reg_use_it it2 = begin_it2; it2 != end_it2; it2++)
+ {
+ fprintf (dump, "(%lu) ", it2->second);
+ print_node_brief (dump, "", it2->first, 0);
+ fprintf (dump, "\n");
+ }
+ }
+}
Index: gcc/brig/brigfrontend/brig-util.h
===================================================================
--- gcc/brig/brigfrontend/brig-util.h (revision 254836)
+++ gcc/brig/brigfrontend/brig-util.h (revision 254837)
@@ -23,6 +23,7 @@
#define GCC_BRIG_UTIL_H
#include <map>
+#include <vector>
#include "config.h"
#include "system.h"
@@ -31,6 +32,15 @@
#include "opts.h"
#include "tree.h"
+/* There are 128 c regs and 2048 s/d/q regs each in the HSAIL. */
+#define BRIG_2_TREE_HSAIL_C_REG_COUNT (128)
+#define BRIG_2_TREE_HSAIL_S_REG_COUNT (2048)
+#define BRIG_2_TREE_HSAIL_D_REG_COUNT (2048)
+#define BRIG_2_TREE_HSAIL_Q_REG_COUNT (2048)
+#define BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT \
+ (BRIG_2_TREE_HSAIL_C_REG_COUNT + BRIG_2_TREE_HSAIL_S_REG_COUNT \
+ + BRIG_2_TREE_HSAIL_D_REG_COUNT + BRIG_2_TREE_HSAIL_Q_REG_COUNT)
+
/* Helper class for keeping book of group variable offsets. */
class group_variable_offset_index
@@ -76,4 +86,25 @@
/* From hsa.h. */
bool hsa_type_packed_p (BrigType16_t type);
+struct reg_use_info
+{
+ /* This vector keeps count of the times an HSAIL register is used as
+ a tree type in generic expressions. The count is used to select
+ type for 'register' variables to reduce emission of
+ VIEW_CONVERT_EXPR nodes. The data is kept in vector (insertion
+ order) for determinism, in a case there is a tie with the
+ counts. */
+ std::vector<std::pair<tree, size_t> > m_type_refs;
+ /* Tree to index. Lookup for the above vector. */
+ std::map<tree, size_t> m_type_refs_lookup;
+};
+
+/* key = hsa register entry generated by gccbrig_hsa_reg_id (). */
+typedef std::map<size_t, reg_use_info> regs_use_index;
+
+size_t gccbrig_hsa_reg_id (const BrigOperandRegister ®);
+std::string gccbrig_hsa_reg_name_from_id (size_t reg_hash);
+
+void gccbrig_print_reg_use_info (FILE *dump, const regs_use_index &info);
+
#endif
Index: gcc/brig/brigfrontend/brig-cvt-inst-handler.cc
===================================================================
--- gcc/brig/brigfrontend/brig-cvt-inst-handler.cc (revision 254836)
+++ gcc/brig/brigfrontend/brig-cvt-inst-handler.cc (revision 254837)
@@ -116,7 +116,7 @@
/* Flush the float operand to zero if indicated with 'ftz'. */
if (FTZ && SCALAR_FLOAT_TYPE_P (src_type))
{
- tree casted_input = build_reinterpret_cast (src_type, input);
+ tree casted_input = build_resize_convert_view (src_type, input);
input = flush_to_zero (src_is_fp16) (*this, casted_input);
}
@@ -158,7 +158,8 @@
}
else
gcc_unreachable ();
- tree casted_input = build_reinterpret_cast (unsigned_int_type, input);
+ tree casted_input = build_resize_convert_view (unsigned_int_type,
+ input);
tree masked_input
= build2 (BIT_AND_EXPR, unsigned_int_type, casted_input, and_mask);
conversion_result
@@ -172,7 +173,7 @@
}
else if (dest_is_fp16)
{
- tree casted_input = build_reinterpret_cast (src_type, input);
+ tree casted_input = build_resize_convert_view (src_type, input);
conversion_result
= convert_to_real (brig_to_generic::s_fp32_type, casted_input);
if (FTZ)
@@ -181,7 +182,7 @@
}
else if (SCALAR_FLOAT_TYPE_P (dest_type))
{
- tree casted_input = build_reinterpret_cast (src_type, input);
+ tree casted_input = build_resize_convert_view (src_type, input);
conversion_result = convert_to_real (dest_type, casted_input);
}
else if (INTEGRAL_TYPE_P (dest_type) && INTEGRAL_TYPE_P (src_type))
@@ -214,46 +215,47 @@
#include "brig-builtins.def"
gcc_unreachable ();
- tree casted_input = build_reinterpret_cast (src_type, input);
+ tree casted_input = build_resize_convert_view (src_type, input);
conversion_result
= call_builtin (builtin, 1, dest_type, src_type, casted_input);
}
else
{
- tree casted_input = build_reinterpret_cast (src_type, input);
+ tree casted_input = build_resize_convert_view (src_type, input);
- /* Perform the int to float conversion. */
+ /* Perform the float to int conversion. */
conversion_result = convert_to_integer (dest_type, casted_input);
}
- /* The converted result is finally extended to the target register
- width, using the same sign as the destination. */
- conversion_result
- = convert_to_integer (TREE_TYPE (output), conversion_result);
}
else
{
/* Just use CONVERT_EXPR and hope for the best. */
- tree casted_input = build_reinterpret_cast (dest_type, input);
+ tree casted_input = build_resize_convert_view (dest_type, input);
conversion_result = build1 (CONVERT_EXPR, dest_type, casted_input);
}
size_t dst_reg_size = int_size_in_bytes (TREE_TYPE (output));
- tree assign = NULL_TREE;
/* The output register can be of different type&size than the
- conversion output size. Cast it to the register variable type. */
- if (dst_reg_size > conv_dst_size)
+ conversion output size. Only need to handle signed integers, rest
+ is handled by reinterpret_cast. */
+ tree casted_output = conversion_result;
+ if (dst_reg_size > conv_dst_size &&
+ INTEGRAL_TYPE_P (TREE_TYPE (casted_output)))
{
- tree casted_output
- = build1 (CONVERT_EXPR, TREE_TYPE (output), conversion_result);
- assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted_output);
+ gcc_assert (!VECTOR_TYPE_P (casted_output));
+
+ bool unsignedp = TYPE_UNSIGNED (TREE_TYPE (casted_output));
+ tree resized_int_type
+ = build_nonstandard_integer_type (dst_reg_size * BITS_PER_UNIT,
+ unsignedp);
+ casted_output = build1 (CONVERT_EXPR, resized_int_type, casted_output);
}
- else
- {
- tree casted_output
- = build_reinterpret_cast (TREE_TYPE (output), conversion_result);
- assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted_output);
- }
+
+ casted_output
+ = build_resize_convert_view (TREE_TYPE (output), casted_output);
+ tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted_output);
+
m_parent.m_cf->append_statement (assign);
return base->byteCount;
Index: gcc/brig/brigfrontend/brig-code-entry-handler.cc
===================================================================
--- gcc/brig/brigfrontend/brig-code-entry-handler.cc (revision 254836)
+++ gcc/brig/brigfrontend/brig-code-entry-handler.cc (revision 254837)
@@ -137,14 +137,7 @@
correct size here so we don't need a separate unpack/pack for it.
fp16-fp32 conversion is done in build_operands (). */
if (is_input && TREE_TYPE (element) != operand_type)
- {
- if (int_size_in_bytes (TREE_TYPE (element))
- == int_size_in_bytes (operand_type)
- && !INTEGRAL_TYPE_P (operand_type))
- element = build1 (VIEW_CONVERT_EXPR, operand_type, element);
- else
- element = convert (operand_type, element);
- }
+ element = build_resize_convert_view (operand_type, element);
CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, element);
++operand_ptr;
@@ -368,7 +361,7 @@
to the array object. */
if (POINTER_TYPE_P (TREE_TYPE (arg_var_decl)))
- symbol_base = build_reinterpret_cast (ptype, arg_var_decl);
+ symbol_base = build_resize_convert_view (ptype, arg_var_decl);
else
{
/* In case we are referring to an array (the argument in
@@ -436,7 +429,8 @@
= (const BrigOperandRegister *) m_parent.get_brig_operand_entry
(addr_operand.reg);
tree base_reg_var = m_parent.m_cf->get_m_var_declfor_reg (mem_base_reg);
- var_offset = convert_to_pointer (ptr_type_node, base_reg_var);
+ tree as_uint = build_reinterpret_to_uint (base_reg_var);
+ var_offset = convert_to_pointer (ptr_type_node, as_uint);
gcc_assert (var_offset != NULL_TREE);
}
@@ -527,7 +521,10 @@
= ((const uint32_t *) &operand_entries->bytes)[operand_index];
const BrigBase *operand_data
= m_parent.get_brig_operand_entry (operand_offset);
- return build_tree_operand (*brig_inst, *operand_data, operand_type);
+
+ bool inputp = !gccbrig_hsa_opcode_op_output_p (brig_inst->opcode,
+ operand_index);
+ return build_tree_operand (*brig_inst, *operand_data, operand_type, inputp);
}
/* Builds a single (scalar) constant initialized element of type
@@ -991,8 +988,8 @@
call_operands.resize (4, NULL_TREE);
operand_types.resize (4, NULL_TREE);
for (size_t i = 0; i < operand_count; ++i)
- call_operands.at (i) = build_reinterpret_cast (operand_types.at (i),
- call_operands.at (i));
+ call_operands.at (i) = build_resize_convert_view (operand_types.at (i),
+ call_operands.at (i));
tree fnptr = build_fold_addr_expr (built_in);
return build_call_array (TREE_TYPE (TREE_TYPE (built_in)), fnptr,
@@ -1141,6 +1138,28 @@
tree_stl_vec
brig_code_entry_handler::build_operands (const BrigInstBase &brig_inst)
{
+ return build_or_analyze_operands (brig_inst, false);
+}
+
+void
+brig_code_entry_handler::analyze_operands (const BrigInstBase &brig_inst)
+{
+ build_or_analyze_operands (brig_inst, true);
+}
+
+/* Implements both the build_operands () and analyze_operands () call
+ so changes go in tandem. Performs build_operands () when ANALYZE
+ is false. Otherwise, only analyze operands and return empty
+ list.
+
+ If analyzing record each HSA register operand with the
+ corresponding resolved operand tree type to
+ brig_to_generic::m_fn_regs_use_index. */
+
+tree_stl_vec
+brig_code_entry_handler::
+build_or_analyze_operands (const BrigInstBase &brig_inst, bool analyze)
+{
/* Flush to zero. */
bool ftz = false;
const BrigBase *base = &brig_inst.base;
@@ -1308,9 +1327,19 @@
/* Treat the operands as the storage type at this point. */
operand_type = half_storage_type;
+ if (analyze)
+ {
+ if (operand_data->kind == BRIG_KIND_OPERAND_REGISTER)
+ {
+ const BrigOperandRegister &brig_reg
+ = (const BrigOperandRegister &) *operand_data;
+ m_parent.add_reg_used_as_type (brig_reg, operand_type);
+ }
+ continue;
+ }
+
tree operand = build_tree_operand (brig_inst, *operand_data, operand_type,
!is_output);
-
gcc_assert (operand);
/* Cast/convert the inputs to correct types as expected by the GENERIC
@@ -1319,36 +1348,17 @@
{
if (half_to_float)
operand = build_h2f_conversion
- (build_reinterpret_cast (half_storage_type, operand));
+ (build_resize_convert_view (half_storage_type, operand));
else if (TREE_CODE (operand) != LABEL_DECL
&& TREE_CODE (operand) != TREE_VEC
&& operand_data->kind != BRIG_KIND_OPERAND_ADDRESS
- && !VECTOR_TYPE_P (TREE_TYPE (operand)))
+ && operand_data->kind != BRIG_KIND_OPERAND_OPERAND_LIST)
{
- size_t reg_width = int_size_in_bytes (TREE_TYPE (operand));
- size_t instr_width = int_size_in_bytes (operand_type);
- if (reg_width == instr_width)
- operand = build_reinterpret_cast (operand_type, operand);
- else if (reg_width > instr_width)
- {
- /* Clip the operand because the instruction's bitwidth
- is smaller than the HSAIL reg width. */
- if (INTEGRAL_TYPE_P (operand_type))
- operand
- = convert_to_integer (signed_or_unsigned_type_for
- (TYPE_UNSIGNED (operand_type),
- operand_type), operand);
- else
- operand = build_reinterpret_cast (operand_type, operand);
- }
- else if (reg_width < instr_width)
- /* At least shift amount operands can be read from smaller
- registers than the data operands. */
- operand = convert (operand_type, operand);
+ operand = build_resize_convert_view (operand_type, operand);
}
else if (brig_inst.opcode == BRIG_OPCODE_SHUFFLE)
/* Force the operand type to be treated as the raw type. */
- operand = build_reinterpret_cast (operand_type, operand);
+ operand = build_resize_convert_view (operand_type, operand);
if (brig_inst.opcode == BRIG_OPCODE_CMOV && i == 1)
{
@@ -1379,8 +1389,9 @@
brig_code_entry_handler::build_output_assignment (const BrigInstBase &brig_inst,
tree output, tree inst_expr)
{
- /* The destination type might be different from the output register
- variable type (which is always an unsigned integer type). */
+ /* The result/input type might be different from the output register
+ variable type (can be any type; see get_m_var_declfor_reg @
+ brig-function.cc). */
tree output_type = TREE_TYPE (output);
tree input_type = TREE_TYPE (inst_expr);
bool is_fp16 = (brig_inst.type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16
@@ -1421,12 +1432,12 @@
{
inst_expr = add_temp_var ("before_f2h", inst_expr);
tree f2h_output = build_f2h_conversion (inst_expr);
- tree conv_int = convert_to_integer (output_type, f2h_output);
- tree assign = build2 (MODIFY_EXPR, output_type, output, conv_int);
+ tree conv = build_resize_convert_view (output_type, f2h_output);
+ tree assign = build2 (MODIFY_EXPR, output_type, output, conv);
m_parent.m_cf->append_statement (assign);
return assign;
}
- else if (VECTOR_TYPE_P (TREE_TYPE (output)))
+ else if (VECTOR_TYPE_P (output_type) && TREE_CODE (output) == CONSTRUCTOR)
{
/* Expand/unpack the input value to the given vector elements. */
size_t i;
@@ -1454,22 +1465,21 @@
bitwidths. */
size_t src_width = int_size_in_bytes (input_type);
size_t dst_width = int_size_in_bytes (output_type);
-
- if (src_width == dst_width)
+ tree input = inst_expr;
+ /* Integer results are extended to the target register width, using
+ the same sign as the inst_expr. */
+ if (INTEGRAL_TYPE_P (TREE_TYPE (input)) && src_width != dst_width)
{
- /* A simple bitcast should do. */
- tree bitcast = build_reinterpret_cast (output_type, inst_expr);
- tree assign = build2 (MODIFY_EXPR, output_type, output, bitcast);
- m_parent.m_cf->append_statement (assign);
- return assign;
+ bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (input));
+ tree resized_type
+ = build_nonstandard_integer_type (dst_width * BITS_PER_UNIT,
+ unsigned_p);
+ input = convert_to_integer (resized_type, input);
}
- else
- {
- tree conv_int = convert_to_integer (output_type, inst_expr);
- tree assign = build2 (MODIFY_EXPR, output_type, output, conv_int);
- m_parent.m_cf->append_statement (assign);
- return assign;
- }
+ input = build_resize_convert_view (output_type, input);
+ tree assign = build2 (MODIFY_EXPR, output_type, output, input);
+ m_parent.m_cf->append_statement (assign);
+ return assign;
}
return NULL_TREE;
}
@@ -1672,7 +1682,7 @@
{
tree built_in = builtin_decl_explicit (BUILT_IN_HSAIL_F32_TO_F16);
- tree casted_operand = build_reinterpret_cast (uint32_type_node, operand);
+ tree casted_operand = build_resize_convert_view (uint32_type_node, operand);
tree call = call_builtin (built_in, 1, uint16_type_node, uint32_type_node,
casted_operand);
@@ -1701,7 +1711,7 @@
tree output = create_tmp_var (const_fp32_type, "fp32out");
tree casted_result
- = build_reinterpret_cast (brig_to_generic::s_fp32_type, call);
+ = build_resize_convert_view (brig_to_generic::s_fp32_type, call);
tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted_result);
Index: gcc/brig/brigfrontend/brig-function.cc
===================================================================
--- gcc/brig/brigfrontend/brig-function.cc (revision 254836)
+++ gcc/brig/brigfrontend/brig-function.cc (revision 254837)
@@ -272,32 +272,59 @@
return variable;
}
+/* Return tree type for an HSA register.
+
+ The tree type can be anything (scalar, vector, int, float, etc.)
+ but its size is guaranteed to match the HSA register size.
+
+ HSA registers are untyped but we select a type based on their use
+ to reduce (sometimes unoptimizable) VIEW_CONVERT_EXPR nodes (seems
+ to occur when use or def reaches over current BB). */
+
+tree
+brig_function::get_tree_type_for_hsa_reg (const BrigOperandRegister *reg) const
+{
+ size_t reg_size = gccbrig_reg_size (reg);
+
+ /* The default type. */
+ tree type = build_nonstandard_integer_type (reg_size, true);
+
+ if (m_parent->m_fn_regs_use_index.count (m_name) == 0)
+ return type;
+
+ const regs_use_index &index = m_parent->m_fn_regs_use_index[m_name];
+ size_t reg_id = gccbrig_hsa_reg_id (*reg);
+ if (index.count (reg_id) == 0)
+ return type;
+
+ const reg_use_info &info = index.find (reg_id)->second;
+ std::vector<std::pair<tree, size_t> >::const_iterator it
+ = info.m_type_refs.begin ();
+ std::vector<std::pair<tree, size_t> >::const_iterator it_end
+ = info.m_type_refs.end ();
+ size_t max_refs_as_type_count = 0;
+ for (; it != it_end; it++)
+ {
+ size_t type_bit_size = int_size_in_bytes (it->first) * BITS_PER_UNIT;
+ if (type_bit_size != reg_size) continue;
+ if (it->second > max_refs_as_type_count)
+ {
+ type = it->first;
+ max_refs_as_type_count = it->second;
+ }
+ }
+
+ return type;
+}
+
/* Returns a DECL_VAR for the given HSAIL operand register.
If it has not been created yet for the function being generated,
- creates it as an unsigned int variable. */
+ creates it as a type determined by analysis phase. */
tree
brig_function::get_m_var_declfor_reg (const BrigOperandRegister *reg)
{
- size_t offset = reg->regNum;
- switch (reg->regKind)
- {
- case BRIG_REGISTER_KIND_QUAD:
- offset
- += BRIG_2_TREE_HSAIL_D_REG_COUNT + BRIG_2_TREE_HSAIL_S_REG_COUNT +
- BRIG_2_TREE_HSAIL_C_REG_COUNT;
- break;
- case BRIG_REGISTER_KIND_DOUBLE:
- offset += BRIG_2_TREE_HSAIL_S_REG_COUNT + BRIG_2_TREE_HSAIL_C_REG_COUNT;
- break;
- case BRIG_REGISTER_KIND_SINGLE:
- offset += BRIG_2_TREE_HSAIL_C_REG_COUNT;
- case BRIG_REGISTER_KIND_CONTROL:
- break;
- default:
- gcc_unreachable ();
- break;
- }
+ size_t offset = gccbrig_hsa_reg_id (*reg);
reg_decl_index_entry *regEntry = m_regs[offset];
if (regEntry == NULL)
@@ -305,7 +332,7 @@
size_t reg_size = gccbrig_reg_size (reg);
tree type;
if (reg_size > 1)
- type = build_nonstandard_integer_type (reg_size, true);
+ type = get_tree_type_for_hsa_reg (reg);
else
type = boolean_type_node;
Index: gcc/brig/brigfrontend/brig-code-entry-handler.h
===================================================================
--- gcc/brig/brigfrontend/brig-code-entry-handler.h (revision 254836)
+++ gcc/brig/brigfrontend/brig-code-entry-handler.h (revision 254837)
@@ -89,6 +89,7 @@
tree build_h2f_conversion (tree source);
tree_stl_vec build_operands (const BrigInstBase &brig_inst);
+ void analyze_operands (const BrigInstBase &brig_inst);
tree build_output_assignment (const BrigInstBase &brig_inst, tree output,
tree inst_expr);
@@ -102,6 +103,11 @@
/* HSAIL-specific builtin functions not yet integrated to gcc. */
static builtin_map s_custom_builtins;
+
+private:
+
+ tree_stl_vec build_or_analyze_operands (const BrigInstBase &brig_inst,
+ bool analyze);
};
/* Implement the Visitor software pattern for performing various actions on
Index: gcc/brig/brigfrontend/brig-function.h
===================================================================
--- gcc/brig/brigfrontend/brig-function.h (revision 254836)
+++ gcc/brig/brigfrontend/brig-function.h (revision 254837)
@@ -45,15 +45,6 @@
typedef std::map<const BrigDirectiveVariable *, tree> variable_index;
typedef std::vector<tree> tree_stl_vec;
-/* There are 128 c regs and 2048 s/d/q regs each in the HSAIL. */
-#define BRIG_2_TREE_HSAIL_C_REG_COUNT (128)
-#define BRIG_2_TREE_HSAIL_S_REG_COUNT (2048)
-#define BRIG_2_TREE_HSAIL_D_REG_COUNT (2048)
-#define BRIG_2_TREE_HSAIL_Q_REG_COUNT (2048)
-#define BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT \
- (BRIG_2_TREE_HSAIL_C_REG_COUNT + BRIG_2_TREE_HSAIL_S_REG_COUNT \
- + BRIG_2_TREE_HSAIL_D_REG_COUNT + BRIG_2_TREE_HSAIL_Q_REG_COUNT)
-
/* Holds data for the currently built GENERIC function. */
class brig_function
@@ -222,6 +213,9 @@
phsa_descriptor m_descriptor;
private:
+
+ tree get_tree_type_for_hsa_reg (const BrigOperandRegister *reg) const;
+
/* Bookkeeping for the different HSA registers and their tree declarations
for the currently generated function. */
reg_decl_index_entry *m_regs[BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT];
Index: gcc/brig/brigfrontend/brig-to-generic.cc
===================================================================
--- gcc/brig/brigfrontend/brig-to-generic.cc (revision 254836)
+++ gcc/brig/brigfrontend/brig-to-generic.cc (revision 254837)
@@ -124,6 +124,24 @@
}
};
+class brig_reg_use_analyzer : public brig_code_entry_handler
+{
+public:
+ brig_reg_use_analyzer (brig_to_generic &parent)
+ : brig_code_entry_handler (parent)
+ {
+ }
+
+ size_t
+ operator () (const BrigBase *base)
+ {
+ const BrigInstBase *brig_inst = (const BrigInstBase *) base;
+ analyze_operands (*brig_inst);
+ return base->byteCount;
+ }
+
+};
+
/* Helper struct for pairing a BrigKind and a BrigCodeEntryHandler that
should handle its data. */
@@ -210,6 +228,7 @@
brig_directive_variable_handler var_handler (*this);
brig_directive_fbarrier_handler fbar_handler (*this);
brig_directive_function_handler func_handler (*this);
+ brig_reg_use_analyzer reg_use_analyzer (*this);
/* Need this for grabbing the module names for mangling the
group variable names. */
@@ -219,7 +238,21 @@
const BrigSectionHeader *csection_header = (const BrigSectionHeader *) m_code;
code_entry_handler_info handlers[]
- = {{BRIG_KIND_DIRECTIVE_VARIABLE, &var_handler},
+ = {{BRIG_KIND_INST_BASIC, ®_use_analyzer},
+ {BRIG_KIND_INST_MOD, ®_use_analyzer},
+ {BRIG_KIND_INST_CMP, ®_use_analyzer},
+ {BRIG_KIND_INST_MEM, ®_use_analyzer},
+ {BRIG_KIND_INST_CVT, ®_use_analyzer},
+ {BRIG_KIND_INST_SEG_CVT, ®_use_analyzer},
+ {BRIG_KIND_INST_SEG, ®_use_analyzer},
+ {BRIG_KIND_INST_ADDR, ®_use_analyzer},
+ {BRIG_KIND_INST_SOURCE_TYPE, ®_use_analyzer},
+ {BRIG_KIND_INST_ATOMIC, ®_use_analyzer},
+ {BRIG_KIND_INST_SIGNAL, ®_use_analyzer},
+ {BRIG_KIND_INST_BR, ®_use_analyzer},
+ {BRIG_KIND_INST_LANE, ®_use_analyzer},
+ {BRIG_KIND_INST_QUEUE, ®_use_analyzer},
+ {BRIG_KIND_DIRECTIVE_VARIABLE, &var_handler},
{BRIG_KIND_DIRECTIVE_FBARRIER, &fbar_handler},
{BRIG_KIND_DIRECTIVE_KERNEL, &func_handler},
{BRIG_KIND_DIRECTIVE_MODULE, &module_handler},
@@ -555,10 +588,14 @@
than the created reg var type in order to select correct instruction type
later on. This function creates the necessary reinterpret type cast from
a source variable to the destination type. In case no cast is needed to
- the same type, SOURCE is returned directly. */
+ the same type, SOURCE is returned directly.
+ In case of mismatched type sizes, casting:
+ - to narrower type the upper bits are clipped and
+ - to wider type the source value is zero extended. */
+
tree
-build_reinterpret_cast (tree destination_type, tree source)
+build_resize_convert_view (tree destination_type, tree source)
{
gcc_assert (source && destination_type && TREE_TYPE (source) != NULL_TREE
@@ -578,7 +615,7 @@
size_t dst_size = int_size_in_bytes (destination_type);
if (src_size == dst_size)
return build1 (VIEW_CONVERT_EXPR, destination_type, source);
- else if (src_size < dst_size)
+ else /* src_size != dst_size */
{
/* The src_size can be smaller at least with f16 scalars which are
stored to 32b register variables. First convert to an equivalent
@@ -585,18 +622,25 @@
size unsigned type, then extend to an unsigned type of the
target width, after which VIEW_CONVERT_EXPR can be used to
force to the target type. */
- tree unsigned_temp = build1 (VIEW_CONVERT_EXPR,
- get_unsigned_int_type (source_type),
- source);
- return build1 (VIEW_CONVERT_EXPR, destination_type,
- convert (get_unsigned_int_type (destination_type),
- unsigned_temp));
+ tree resized = convert (get_scalar_unsigned_int_type (destination_type),
+ build_reinterpret_to_uint (source));
+ gcc_assert ((size_t)int_size_in_bytes (TREE_TYPE (resized)) == dst_size);
+ return build_resize_convert_view (destination_type, resized);
}
- else
- gcc_unreachable ();
- return NULL_TREE;
}
+/* Reinterprets SOURCE as a scalar unsigned int with the size
+ corresponding to the orignal. */
+
+tree build_reinterpret_to_uint (tree source)
+{
+ tree src_type = TREE_TYPE (source);
+ if (INTEGRAL_TYPE_P (src_type) && TYPE_UNSIGNED (src_type))
+ return source;
+ tree dest_type = get_scalar_unsigned_int_type (src_type);
+ return build1 (VIEW_CONVERT_EXPR, dest_type, source);
+}
+
/* Returns the finished brig_function for the given generic FUNC_DECL,
or NULL, if not found. */
@@ -775,7 +819,7 @@
{
types[i] = va_arg (ap, tree);
tree arg = va_arg (ap, tree);
- args[i] = build_reinterpret_cast (types[i], arg);
+ args[i] = build_resize_convert_view (types[i], arg);
if (types[i] == error_mark_node || args[i] == error_mark_node)
{
delete[] types;
@@ -879,6 +923,16 @@
true);
}
+/* Returns a type with unsigned int corresponding to the size
+ ORIGINAL_TYPE. */
+
+tree
+get_scalar_unsigned_int_type (tree original_type)
+{
+ return build_nonstandard_integer_type (int_size_in_bytes (original_type)
+ * BITS_PER_UNIT, true);
+}
+
void
dump_function (FILE *dump_file, brig_function *f)
{
@@ -893,3 +947,22 @@
fprintf (dump_file, "\n");
}
}
+
+/* Records use of the BRIG_REG as a TYPE in the current function. */
+
+void
+brig_to_generic::add_reg_used_as_type (const BrigOperandRegister &brig_reg,
+ tree type)
+{
+ gcc_assert (m_cf);
+ reg_use_info &info
+ = m_fn_regs_use_index[m_cf->m_name][gccbrig_hsa_reg_id (brig_reg)];
+
+ if (info.m_type_refs_lookup.count (type))
+ info.m_type_refs[info.m_type_refs_lookup[type]].second++;
+ else
+ {
+ info.m_type_refs.push_back (std::make_pair (type, 1));
+ info.m_type_refs_lookup[type] = info.m_type_refs.size () - 1;
+ }
+}
Index: gcc/brig/brigfrontend/brig-to-generic.h
===================================================================
--- gcc/brig/brigfrontend/brig-to-generic.h (revision 254836)
+++ gcc/brig/brigfrontend/brig-to-generic.h (revision 254837)
@@ -106,6 +106,9 @@
void add_group_variable (const std::string &name, size_t size,
size_t alignment, bool function_scope);
+ void add_reg_used_as_type (const BrigOperandRegister &brig_reg,
+ tree operand_type);
+
static tree s_fp16_type;
static tree s_fp32_type;
static tree s_fp64_type;
@@ -129,6 +132,9 @@
/* Accumulates the total group segment usage. */
size_t m_total_group_segment_usage;
+ /* Statistics about register uses per function. */
+ std::map<std::string, regs_use_index> m_fn_regs_use_index;
+
private:
void find_brig_sections ();
@@ -212,12 +218,15 @@
tree call_builtin (tree pdecl, int nargs, tree rettype, ...);
-tree build_reinterpret_cast (tree destination_type, tree source);
+tree build_resize_convert_view (tree destination_type, tree source);
+tree build_reinterpret_to_uint (tree source);
tree build_stmt (enum tree_code code, ...);
tree get_unsigned_int_type (tree type);
+tree get_scalar_unsigned_int_type (tree type);
+
void dump_function (FILE *dump_file, brig_function *f);
#endif
Index: gcc/brig/brigfrontend/brig-copy-move-inst-handler.cc
===================================================================
--- gcc/brig/brigfrontend/brig-copy-move-inst-handler.cc (revision 254836)
+++ gcc/brig/brigfrontend/brig-copy-move-inst-handler.cc (revision 254837)
@@ -53,12 +53,12 @@
tree input = build_tree_operand_from_brig (brig_inst, source_type, 1);
tree output = build_tree_operand_from_brig (brig_inst, dest_type, 0);
+
if (brig_inst->opcode == BRIG_OPCODE_COMBINE)
{
/* For combine, a simple reinterpret cast from the array constructor
works. */
-
- tree casted = build_reinterpret_cast (dest_type, input);
+ tree casted = build_resize_convert_view (TREE_TYPE (output), input);
tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted);
m_parent.m_cf->append_statement (assign);
}
Index: gcc/brig/brigfrontend/brig-mem-inst-handler.cc
===================================================================
--- gcc/brig/brigfrontend/brig-mem-inst-handler.cc (revision 254836)
+++ gcc/brig/brigfrontend/brig-mem-inst-handler.cc (revision 254837)
@@ -41,7 +41,11 @@
tree instr_type = gccbrig_tree_type_for_hsa_type (brig_inst->type);
- if (VECTOR_TYPE_P (TREE_TYPE (data)))
+ /* In case of {ld,st}_v{2,4}. Note: since 'register' variables may
+ be any type, even a vector type, we distinguish the registers
+ from operand lists by checking for constructor nodes (which
+ operand lists are represented as). */
+ if (VECTOR_TYPE_P (TREE_TYPE (data)) && TREE_CODE (data) == CONSTRUCTOR)
instr_type = TREE_TYPE (data);
tree ptype = build_pointer_type (instr_type);
@@ -151,14 +155,7 @@
address_base, ptr_offset);
if (is_store && TREE_TYPE (data) != instr_type)
- {
- if (int_size_in_bytes (TREE_TYPE (data))
- == int_size_in_bytes (instr_type)
- && !INTEGRAL_TYPE_P (instr_type))
- data = build1 (VIEW_CONVERT_EXPR, instr_type, data);
- else
- data = convert (instr_type, data);
- }
+ data = build_resize_convert_view (instr_type, data);
build_mem_access (brig_inst, address, data);
Index: gcc/brig/brigfrontend/brig-basic-inst-handler.cc
===================================================================
--- gcc/brig/brigfrontend/brig-basic-inst-handler.cc (revision 254836)
+++ gcc/brig/brigfrontend/brig-basic-inst-handler.cc (revision 254837)
@@ -184,8 +184,9 @@
tree and_mask_vec = build_constructor (vec_type, and_mask_vals);
tree perm = build3 (VEC_PERM_EXPR, vec_type,
- build_reinterpret_cast (vec_type, operands[0]),
- build_reinterpret_cast (vec_type, operands[0]), mask_vec);
+ build_resize_convert_view (vec_type, operands[0]),
+ build_resize_convert_view (vec_type, operands[0]),
+ mask_vec);
tree cleared = build2 (BIT_AND_EXPR, vec_type, perm, and_mask_vec);
@@ -192,7 +193,7 @@
size_t s = int_size_in_bytes (TREE_TYPE (cleared)) * BITS_PER_UNIT;
tree raw_type = build_nonstandard_integer_type (s, true);
- tree as_int = build_reinterpret_cast (raw_type, cleared);
+ tree as_int = build_resize_convert_view (raw_type, cleared);
if (int_size_in_bytes (src_element_type) < 4)
{
@@ -217,7 +218,7 @@
size_t vecsize = int_size_in_bytes (TREE_TYPE (operands[0])) * BITS_PER_UNIT;
tree wide_type = build_nonstandard_integer_type (vecsize, 1);
- tree src_vect = build_reinterpret_cast (wide_type, operands[0]);
+ tree src_vect = build_resize_convert_view (wide_type, operands[0]);
src_vect = add_temp_var ("src_vect", src_vect);
tree scalar = operands[1];
@@ -650,10 +651,10 @@
if (is_fp16_operation)
old_value = build_h2f_conversion
- (build_reinterpret_cast (half_storage_type, operands[0]));
+ (build_resize_convert_view (half_storage_type, operands[0]));
else
old_value
- = build_reinterpret_cast (TREE_TYPE (instr_expr), operands[0]);
+ = build_resize_convert_view (TREE_TYPE (instr_expr), operands[0]);
size_t esize = is_fp16_operation ? 32 : element_size_bits;
Index: gcc/brig/ChangeLog
===================================================================
--- gcc/brig/ChangeLog (revision 254836)
+++ gcc/brig/ChangeLog (revision 254837)
@@ -1,3 +1,27 @@
+2017-11-16 Henry Linjamäki <henry.linjamaki@parmance.com>
+
+ Change internal representation of HSA registers. Instead
+ representing HSA's untyped registers as unsigned int the gccbrig
+ analyzes brig code and builds the register variables as a type
+ used in tree expressions at most. This gives better chance to
+ optimize CONVERT_VIEW_EXPRs away.
+ * brigfrontend/brig-code-entry-handler.cc: Add analysis method for
+ register type usage. Handle any-typed register variables.
+ * brigfrontend/brig-code-entry-handler.h: New declarations for the
+ above.
+ * brigfrontend/brig-copy-move-inst-handler.cc: Handle any-typed
+ register variables.
+ * brigfrontend/brig-cvt-inst-handler.cc: Likewise.
+ * brigfrontend/brig-function.cc: Build register variables as a
+ type based on results of analysis phase.
+ * brigfrontend/brig-function.h: Move HSA register count defines to
+ brig-utils.h.
+ * brigfrontend/brig-to-generic.cc: New analysis handler. Analyze
+ HSA register usage.
+ * brigfrontend/brig-to-generic.h: New declarations.
+ * brigfrontend/brig-util.cc: New utility functions.
+ * brigfrontend/brig-util.h: New declarations for the above.
+
2017-11-16 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
* gccbrig.texi: Added some documentation.
Index: gcc/testsuite/ChangeLog
===================================================================
--- gcc/testsuite/ChangeLog (revision 254836)
+++ gcc/testsuite/ChangeLog (revision 254837)
@@ -1,3 +1,10 @@
+2017-11-16 Henry Linjamäki <henry.linjamaki@parmance.com>
+
+ * brig.dg/test/gimple/vector.hsail: Update for HSA registers' tree
+ representation changes in brig1.
+ * brig.dg/test/gimple/packed.hsail: Likewise.
+ * brig.dg/test/gimple/internal-reg-var-casts.hsail: New.
+
2017-11-16 Jan Hubicka <hubicka@ucw.cz>
* gcc.dg/ipa/ipcp-2.c: Lower threshold.
Index: gcc/testsuite/brig.dg/test/gimple/vector.hsail
===================================================================
--- gcc/testsuite/brig.dg/test/gimple/vector.hsail (revision 254836)
+++ gcc/testsuite/brig.dg/test/gimple/vector.hsail (revision 254837)
@@ -32,18 +32,18 @@
/* { dg-final { scan-tree-dump " = MEM\\\[\\\(vector\\\(2\\\) <float:32> \\\*\\\)" "original"} } */
/* The v3 load is scalarized (at the moment) due to gcc requiring 2's exponent wide vectors. */
-/* { dg-final { scan-tree-dump "s0 = VIEW_CONVERT_EXPR<unsigned int>\\\(BIT_FIELD_REF <mem_read.\[0-9\]+, 32, 0>\\\);\[\n ]+s1 = VIEW_CONVERT_EXPR<unsigned int>\\\(BIT_FIELD_REF <mem_read.\[0-9\]+, 32, 32>\\\);" "original"} } */
+/* { dg-final { scan-tree-dump "s0 = .*BIT_FIELD_REF <mem_read.\[0-9\]+, 32, 0>\\\)?;\[\n ]+s1 = .*BIT_FIELD_REF <mem_read.\[0-9\]+, 32, 32>\\\)?;" "original"} } */
/* The v4 load is done via casting to a vector datatype ptr. */
/* { dg-final { scan-tree-dump " = MEM\\\[\\\(vector\\\(4\\\) <float:32> \\\*\\\)" "original"} } */
/* The combines are generated to vector constructors. */
-/* { dg-final { scan-tree-dump "{s1, s0}" "original"} } */
-/* { dg-final { scan-tree-dump "{s2, s3}" "original"} } */
+/* { dg-final { scan-tree-dump "{.*s1\\\)?, .*s0\\\)?}" "original"} } */
+/* { dg-final { scan-tree-dump "{.*s2\\\)?, .*s3\\\)?}" "original"} } */
/* Expands to BIT_FIELD_REFs. */
-/* { dg-final { scan-tree-dump "s0 = BIT_FIELD_REF <d4, 32, 0>;" "original"} } */
-/* { dg-final { scan-tree-dump "s3 = BIT_FIELD_REF <d4, 32, 32>;" "original"} } */
+/* { dg-final { scan-tree-dump "s0 = \(VIEW_CONVERT_EXPR.*\\\(\)?BIT_FIELD_REF <d4, 32, 0>\\\)?;" "original"} } */
+/* { dg-final { scan-tree-dump "s3 = \(VIEW_CONVERT_EXPR.*\\\(\)?BIT_FIELD_REF <d4, 32, 32>\\\)?;" "original"} } */
/* The v1 store is done via casting to a vector datatype ptr and constructing a vector from the inputs. */
/* { dg-final { scan-tree-dump "MEM\\\[\\\(vector\\\(2\\\) <float:32> \\\*\\\)\\\(<float:32> \\\*\\\) d1\\\] = " "original"} } */
Index: gcc/testsuite/brig.dg/test/gimple/internal-casts.hsail
===================================================================
--- gcc/testsuite/brig.dg/test/gimple/internal-casts.hsail (nonexistent)
+++ gcc/testsuite/brig.dg/test/gimple/internal-casts.hsail (revision 254837)
@@ -0,0 +1,146 @@
+module &module:1:0:$full:$large:$default;
+
+/* Test for casting from/to representation of HSA registers. */
+
+/* HSA registers are untyped but in gccbrig they are presented as */
+/* variables with a type selected by analysis. Currently, each */
+/* register variable, per function, has a type as it is used at */
+/* most. Therefore, register variable can be nearly any type. The */
+/* tests makes sure the generic/tree expressions have the right casts */
+/* from/to the register variables. */
+
+
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-original" } */
+
+prog kernel &Kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
+{
+ private_u64 %foo;
+ private_u64 %bar;
+ private_b128 %baz;
+
+ ld_kernarg_u64 $d0, [%input_ptr];
+ ld_global_u32 $s0, [$d0];
+
+ /* Trick gccbrig to set wanted type for the registers. */
+
+/* $s0 is selected as float... */
+/* { dg-final { scan-tree-dump "<float:32> s0;" "original"} } */
+/* ..., therefore, there should not be any casts. */
+/* { dg-final { scan-tree-dump "s10 = s0 \\\+ s0;" "original"} } */
+
+ add_f32 $s10, $s0, $s0;
+ add_f32 $s10, $s0, $s0;
+ add_f32 $s10, $s0, $s0;
+ add_f32 $s10, $s0, $s0;
+ add_f32 $s10, $s0, $s0;
+
+/* Expression with other type, a cast is needed. */
+/* { dg-final { scan-tree-dump "s1 = VIEW_CONVERT_EXPR<unsigned int>.s0. \\\+ 123;" "original"} } */
+
+ add_u32 $s1, $s0, 123;
+
+/* { dg-final { scan-tree-dump "unsigned int s1;" "original"} } */
+
+ add_u32 $s10, $s1, 0;
+ add_u32 $s10, $s1, 0;
+ add_u32 $s10, $s1, 0;
+ add_u32 $s10, $s1, 0;
+ add_u32 $s10, $s1, 0;
+
+/* { dg-final { scan-tree-dump "s0 = VIEW_CONVERT_EXPR<<float:32>>.s1.;" "original"} } */
+
+ mov_b32 $s0, $s1;
+
+/* Rig the election for $d0 to be double. */
+/* { dg-final { scan-tree-dump "<float:64> d0;" "original"} } */
+/* { dg-final { scan-tree-dump "d10 = d0 \\\+ d0;" "original"} } */
+
+ add_f64 $d10, $d0, $d0;
+ add_f64 $d10, $d0, $d0;
+ add_f64 $d10, $d0, $d0;
+ add_f64 $d10, $d0, $d0;
+ add_f64 $d10, $d0, $d0;
+
+/* Make $s2 to be vector type. */
+/* { dg-final { scan-tree-dump "vector.4. unsigned char s2;" "original"} } */
+/* { dg-final { scan-tree-dump "s2 = VIEW_CONVERT_EXPR<vector.4. unsigned char>\\\(s1\\\) \\\+ VIEW_CONVERT_EXPR<vector.4. unsigned char>\\\(s1\\\);" "original"} } */
+
+ add_pp_u8x4 $s2, $s1, $s1;
+
+/* { dg-final { scan-tree-dump "s20 = s2 \\\+ s2;" "original"} } */
+
+ add_pp_u8x4 $s20, $s2, $s2;
+ add_pp_u8x4 $s20, $s2, $s2;
+ add_pp_u8x4 $s20, $s2, $s2;
+ add_pp_u8x4 $s20, $s2, $s2;
+
+/* { dg-final { scan-tree-dump "d0 = VIEW_CONVERT_EXPR<<float:64>>.{VIEW_CONVERT_EXPR<unsigned int>.s0., VIEW_CONVERT_EXPR<unsigned int>.s2.}.;" "original"} } */
+
+ combine_v2_b64_b32 $d0, ($s0, $s2);
+
+/* { dg-final { scan-tree-dump "s2 = VIEW_CONVERT_EXPR<vector.4. unsigned char>.BIT_FIELD_REF <d0, 32, 0>.;" "original"} } */
+/* { dg-final { scan-tree-dump "s1 = BIT_FIELD_REF <d0, 32, 32>;" "original"} } */
+
+ expand_v2_b32_b64 ($s2, $s1), $d0;
+
+/* { dg-final { scan-tree-dump "s0 = VIEW_CONVERT_EXPR<<float:32>>\\\(.*VIEW_CONVERT_EXPR<unsigned int>.s0\[\)\]*;" "original"} } */
+
+ cvt_s16_s8 $s0, $s0;
+
+/* { dg-final { scan-tree-dump "c0 = .*VIEW_CONVERT_EXPR<<float:32>>.s2..* != 0;" "original"} } */
+
+ cvt_b1_f32 $c0, $s2;
+
+/* { dg-final { scan-tree-dump ".*__private_base_addr.* = .*\\\(unsigned char\\\) VIEW_CONVERT_EXPR<unsigned int>\\\(s0\\\)\[\)\]*;" "original"} } */
+
+ st_private_u8 $s0, [%foo];
+
+/* { dg-final { scan-tree-dump ".*__private_base_addr.* = .*\\\(unsigned short\\\) VIEW_CONVERT_EXPR<unsigned int>\\\(s2\\\)\[\)\]*;" "original"} } */
+
+ st_private_u16 $s2, [%bar];
+
+/* { dg-final { scan-tree-dump "mem_read.\[0-9\]* = \\\*\\\(signed char \\\*\\\) \\\(__private_base_addr .*\\\);\[ \n\]*s2 = VIEW_CONVERT_EXPR<vector.4. unsigned char>\\\(\\\(signed int\\\) mem_read.\[0-9\]*\\\);" "original"} } */
+
+ ld_private_s8 $s2, [%foo];
+
+/* { dg-final { scan-tree-dump "mem_read.\[0-9\]* = \\\*\\\(signed short \\\*\\\) \\\(__private_base_addr .*\\\);\[ \n\]*s0 = VIEW_CONVERT_EXPR<<float:32>>\\\(\\\(signed int\\\) mem_read.\[0-9\]*\\\);" "original"} } */
+
+ ld_private_s16 $s0, [%bar];
+
+/* { dg-final { scan-tree-dump "\\\*\\\(<float:32> \\\*\\\) \\\(__private_base_addr.*\\\) \\\+ 0 = s0;" "original"} } */
+/* { dg-final { scan-tree-dump "\\\*\\\(<float:32> \\\*\\\) \\\(__private_base_addr.*\\\) \\\+ 4 = VIEW_CONVERT_EXPR<<float:32>>\\\(s1\\\);" "original"} } */
+/* { dg-final { scan-tree-dump "\\\*\\\(<float:32> \\\*\\\) \\\(__private_base_addr.*\\\) \\\+ 8 = VIEW_CONVERT_EXPR<<float:32>>\\\(s2\\\);" "original"} } */
+
+ st_v3_private_f32 ($s0, $s1, $s2), [%baz];
+
+/* { dg-final { scan-tree-dump "mem_read.\[0-9\]* = \\\*\\\(signed short \\\*\\\) \\\(__private_base_addr.*\\\) \\\+ 0;\[ \n\]*s0 = VIEW_CONVERT_EXPR<<float:32>>\\\(\\\(signed int\\\) mem_read.\[0-9\]*\\\);" "original"} } */
+/* { dg-final { scan-tree-dump "mem_read.\[0-9\]* = \\\*\\\(signed short \\\*\\\) \\\(__private_base_addr.*\\\) \\\+ 2;\[ \n\]*s1 = VIEW_CONVERT_EXPR<unsigned int>\\\(\\\(signed int\\\) mem_read.\[0-9\]*\\\);" "original"} } */
+/* { dg-final { scan-tree-dump "mem_read.\[0-9\]* = \\\*\\\(signed short \\\*\\\) \\\(__private_base_addr.*\\\) \\\+ 4;\[ \n\]*s2 = VIEW_CONVERT_EXPR<vector.4. unsigned char>\\\(\\\(signed int\\\) mem_read.\[0-9\]*\\\);" "original"} } */
+
+ ld_v3_private_s16 ($s0, $s1, $s2), [%baz];
+
+/* { dg-final { scan-tree-dump "s5 = .*VIEW_CONVERT_EXPR<unsigned int>\\\(s0\\\) == VIEW_CONVERT_EXPR<unsigned int>\\\(s2\\\)\\\) .*;" "original"} } */
+
+ cmp_eq_s32_u32 $s5, $s0, $s2;
+
+/* { dg-final { scan-tree-dump "s6 = VIEW_CONVERT_EXPR<<float:32>>\\\(.*VIEW_CONVERT_EXPR<vector\\\(2\\\) unsigned short>\\\(s0\\\).*VIEW_CONVERT_EXPR<vector\\\(2\\\) unsigned short>\\\(s2\\\).*;" "original"} } */
+
+ cmp_eq_pp_u16x2_u16x2 $s6, $s0, $s2;
+
+/* { dg-final { scan-tree-dump "<float:32> s60;" "original"} } */
+
+ add_f32 $s60, $s6, $s6;
+ add_f32 $s60, $s6, $s6;
+ add_f32 $s60, $s6, $s6;
+ add_f32 $s60, $s6, $s6;
+
+ ld_kernarg_u64 $d0, [%output_ptr];
+ st_global_u32 $s0, [$d0];
+
+ ret;
+};
+
+
+
+
Index: gcc/testsuite/brig.dg/test/gimple/packed.hsail
===================================================================
--- gcc/testsuite/brig.dg/test/gimple/packed.hsail (revision 254836)
+++ gcc/testsuite/brig.dg/test/gimple/packed.hsail (revision 254837)
@@ -42,7 +42,7 @@
ret;
};
-/* The b128 load is done using uint128_t*.
+/* The b128 load is done using uint128_t*. */
/* { dg-final { scan-tree-dump "q0 = VIEW_CONVERT_EXPR<uint128_t>\\\(mem_read.\[0-9\]+\\\);" "original"} } */
/* Before arithmetics, the uint128_t is casted to a vector datatype. */
@@ -52,27 +52,25 @@
/* in comparison to the HSAIL syntax. */
/* { dg-final { scan-tree-dump "\\\+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }" "original"} } */
-/* After arithmetics, the vector DT is casted back to a uint128_t. */
-/* { dg-final { scan-tree-dump "q1 = VIEW_CONVERT_EXPR<uint128_t>" "original"} } */
-
/* Broadcasted the constant vector's lowest element and summed it up in the next line. */
-/* { dg-final { scan-tree-dump "= { 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 };\[\n \]+_\[0-9\]+ = _\[0-9\]+ \\\+ _\[0-9\]+;" "gimple"} } */
+/* { dg-final { scan-tree-dump "= { 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 };\[\n \]+\[a-z0-9_\]+ = \[a-z0-9_\]+ \\\+ \[a-z0-9_\]+;" "gimple"} } */
/* Broadcasted the registers lowest element via a VEC_PERM_EXPR that has an all-zeros mask. */
-/* { dg-final { scan-tree-dump "VEC_PERM_EXPR <_\[0-9\]+, _\[0-9\]+, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }>;" "gimple" } } */
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR <\[a-z0-9_\]+, \[a-z0-9_\]+, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }>;" "gimple" } } */
/* For the add_ss we assume performing the computation over the whole vector is cheaper than */
/* extracting the scalar and performing a scalar operation. This aims to stay in the vector
/* datapath as long as possible. */
-/* { dg-final { scan-tree-dump "_\[0-9\]+ = VIEW_CONVERT_EXPR<vector\\\(16\\\) unsigned char>\\\(q2\\\);\[\n \]+_\[0-9\]+ = VIEW_CONVERT_EXPR<vector\\\(16\\\) unsigned char>\\\(q3\\\);\[\n \]+_\[0-9\]+ = _\[0-9\]+ \\\+ _\[0-9\]+;" "gimple" } } */
+/* { dg-final { scan-tree-dump "_\[0-9\]+ = q2 \\\+ q3;" "gimple" } } */
/* Insert the lowest element of the result to the lowest element of the result register. */
-/* { dg-final { scan-tree-dump "= VEC_PERM_EXPR <_\[0-9\]+, new_output.\[0-9\]+_\[0-9\]+, { 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }>;" "gimple" } } */
+/* { dg-final { scan-tree-dump "= VEC_PERM_EXPR <\[a-z0-9_\]+, new_output.\[0-9\]+_\[0-9\]+, { 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }>;" "gimple" } } */
-/* { dg-final { scan-tree-dump "q4 = VIEW_CONVERT_EXPR<uint128_t>\\\(s_output.\[0-9\]+_\[0-9\]+\\\);" "gimple" } } */
+/* FIXME */
+/* { dg-final { scan-tree-dump "q4 = \(VIEW_CONVERT_EXPR<uint128_t>\\\()?s_output.\[0-9\]+\(_\[0-9\]+\)*\\\)?;" "gimple" } } */
/* The saturating arithmetics are (curently) implemented using scalar builtin calls. */
/* { dg-final { scan-tree-dump-times "= __builtin___hsail_sat_add_u8" 64 "gimple" } } */
/* A single operand vector instr (neg.) */
-/* { dg-final { scan-tree-dump " = VIEW_CONVERT_EXPR<vector\\\(8\\\) signed short>\\\(q8\\\);\[\n \]+_\[0-9\]+ = -_\[0-9\]+;\[\n \]+" "gimple" } } */
+/* { dg-final { scan-tree-dump "= VIEW_CONVERT_EXPR<vector\\\(8\\\) signed short>\\\(\(s_output.\[0-9\]+_\[0-9\]+|q8\)\\\);\[\n \]+q9 = -_\[0-9\]+;\[\n \]+" "gimple" } } */
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] [BRIGFE] Reduce the number of type conversions due to the untyped HSAIL regs
2017-11-16 17:41 [PATCH] [BRIGFE] Reduce the number of type conversions due to the untyped HSAIL regs Pekka Jääskeläinen
@ 2017-11-17 12:41 ` Rainer Orth
2017-11-17 14:09 ` Pekka Jääskeläinen
0 siblings, 1 reply; 3+ messages in thread
From: Rainer Orth @ 2017-11-17 12:41 UTC (permalink / raw)
To: Pekka Jääskeläinen
Cc: GCC Patches, Martin Jambor, Henry Linjamäki
Hi Pekka,
> Instead of always representing the HSAIL's untyped registers as
> unsigned int, the gccbrig now pre-analyzes the BRIG code and
> builds the register variables as a type used the most when storing
> or reading data to/from each register. This reduces the total
> conversions which cannot be always optimized away.
this patch broke i686-pc-linux-gnu bootstrap:
/vol/gcc/src/hg/trunk/local/gcc/brig/brigfrontend/brig-util.cc: In function 'std::__cxx11::string gccbrig_hsa_reg_name_from_id(size_t)':
/vol/gcc/src/hg/trunk/local/gcc/brig/brigfrontend/brig-util.cc:513:43: error: format '%lu' expects argument of type 'long unsigned int', but argument 3 has type 'size_t {aka unsigned int}' [-Werror=format=]
sprintf (reg_name, "$c%lu", reg_hash);
^
/vol/gcc/src/hg/trunk/local/gcc/brig/brigfrontend/brig-util.cc:520:43: error: format '%lu' expects argument of type 'long unsigned int', but argument 3 has type 'size_t {aka unsigned int}' [-Werror=format=]
sprintf (reg_name, "$s%lu", reg_hash);
^
/vol/gcc/src/hg/trunk/local/gcc/brig/brigfrontend/brig-util.cc:527:43: error: format '%lu' expects argument of type 'long unsigned int', but argument 3 has type 'size_t {aka unsigned int}' [-Werror=format=]
sprintf (reg_name, "$d%lu", reg_hash);
^
/vol/gcc/src/hg/trunk/local/gcc/brig/brigfrontend/brig-util.cc:534:43: error: format '%lu' expects argument of type 'long unsigned int', but argument 3 has type 'size_t {aka unsigned int}' [-Werror=format=]
sprintf (reg_name, "$q%lu", reg_hash);
^
/vol/gcc/src/hg/trunk/local/gcc/brig/brigfrontend/brig-util.cc: In function 'void gccbrig_print_reg_use_info(FILE*, const regs_use_index&)':
/vol/gcc/src/hg/trunk/local/gcc/brig/brigfrontend/brig-util.cc:559:40: error: format '%lu' expects argument of type 'long unsigned int', but argument 3 has type 'unsigned int' [-Werror=format=]
fprintf (dump, "(%lu) ", it2->second);
~~~~~~~~~~~^
/vol/gcc/src/hg/trunk/local/gcc/brig/Make-lang.in:250: recipe for target 'brig/brig-util.o' failed
make[3]: *** [brig/brig-util.o] Error 1
Please fix.
Rainer
--
-----------------------------------------------------------------------------
Rainer Orth, Center for Biotechnology, Bielefeld University
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] [BRIGFE] Reduce the number of type conversions due to the untyped HSAIL regs
2017-11-17 12:41 ` Rainer Orth
@ 2017-11-17 14:09 ` Pekka Jääskeläinen
0 siblings, 0 replies; 3+ messages in thread
From: Pekka Jääskeläinen @ 2017-11-17 14:09 UTC (permalink / raw)
To: Rainer Orth; +Cc: GCC Patches, Martin Jambor, Henry Linjamäki
Hi Rainer,
On Fri, Nov 17, 2017 at 1:32 PM, Rainer Orth
<ro@cebitec.uni-bielefeld.de> wrote:
> Please fix.
Fixed in r254870.
BR,
Pekka
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2017-11-17 14:02 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-16 17:41 [PATCH] [BRIGFE] Reduce the number of type conversions due to the untyped HSAIL regs Pekka Jääskeläinen
2017-11-17 12:41 ` Rainer Orth
2017-11-17 14:09 ` Pekka Jääskeläinen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).