On 2023-03-08 13:57, Alan Modra wrote:
> On Wed, Mar 08, 2023 at 12:48:15PM +0100, Ulf Samuelsson via Binutils wrote:
>> Building using 32-bit bfd_vma makes life a lot more difficult since a lot of
>> the code uses the bfd_vma type.
> Right, there are a few obvious places in lddigest.c that need to use
> uint64_t rather than bfd_vma.
>
>> The expression calculator implemented in ldexp.c stores all integers in a
>> "bigint".
>>
>>    struct big_int
>>      {
>>        bfd_vma integer;
>>        char *str;
>>      } bigint;
>>
>> So if you do DIGEST POLY (64,0xD800000000000000ULL,x,y,z,v,w)
>> the expression calculator will return the wrong value for the all the 64 bit
>> constants larger than 0xFFFFFFFF.
> Yes, that is a pain.
>
>> You  do not get around this, except by redesigning a **lot**.
> I think you can do it by changing lang_add_digest to take etree_type*
> args.  I'm feeling generous so here's a first pass at it.  I haven't
> fixed the debug functions..
>
> It's late here so I'll commit this tomorrow after a little more
> testing.

I had a further look at this.
Please be aware that I am still a newbie on the code, so I might have 
misunderstood things.


If you specify

       DIGEST POLY (64, *$*42F0E1EBA9EA3693, $FFFFFFFFFFFFFFFF, ...)

The lexical analyzer sets the "str" field to NULL and the "integer" 
field becomes ULONG_MAX.

<MRI,EXPRESSION>"$"([0-9A-Fa-f])+ {
                 yylval.integer = bfd_scan_vma (yytext + 1, 0, 16);
                 yylval.bigint.str = NULL;
                 return INT;
             }

===

Reason for ULONG_MAX is:

bfd_vma
bfd_scan_vma (const char *string, const char **end, int base)
{
   bfd_vma value;
   bfd_vma cutoff;
   unsigned int cutlim;
   int overflow;

   /* Let the host do it if possible.  */
   if (sizeof (bfd_vma) <= sizeof (unsigned long))
     return strtoul (string, (char **) end, base);

   ...

bfd_scan_vma will scan the string "42F0E1EBA9EA3693".

Since sizeof(bfd_vma) == 4, strtoul will be called.

https://cplusplus.com/reference/cstdlib/strtoul/ => if value > ULONG_MAX 
return ULONG_MAX.

Since 0x42F0E1EBA9EA3693 > 0xFFFFFFFF, strtoul will return ULONG_MAX and 
set errno to ERANGE.

Anyway bfd_scan_vma returns a 32-bit integer on a 32-bit host so 
yylval.integer will be wrong.

======================================

If you specify

       DIGEST POLY (64, *0x*42F0E1EBA9EA3693, $FFFFFFFFFFFFFFFF, ...)

then the yylval.integer will be ULONG_MAX and yylval.bigint.str will be 
"42F0E1EBA9EA3693"

Here, I think your stuff might work.

======================================

If you specify

       DIGEST POLY (64, 0x42F0E1EBA9EA3693, *~0*, ...)

which does not seem to be unreasonable,

then the third parameter become 0x0000000000000000 instead of 
0xFFFFFFFFFFFFFFFF.

======================================

When you read a number, you call "exp_bigintop"

etree_type *
exp_bigintop (bfd_vma value, char *str)
{
   etree_type *new_e = stat_alloc (sizeof (new_e->value));
   new_e->type.node_code = INT;
   new_e->type.filename = ldlex_filename ();
   new_e->type.lineno = lineno;
   new_e->value.value = value;
   new_e->value.str = str;
*  new_e->type.node_class = etree_value;*
   return new_e;
}

This sets the node_class to etree_value;

=======

"get_int" will call "exp_fold_tree_no_dot" which will call "exp_fold_tree_1"

void
exp_fold_tree_no_dot (etree_type *tree)
{
   ...
   exp_fold_tree_1 (tree);
}

"exp_fold_tree" will find an "etree_value"  and call new_abs or new_number
which sets the expld.result.str to NULL, but it is immediately 
afterwards set to tree->value.str.

static void
exp_fold_tree_1 (etree_type *tree)
{
   ...
   switch (tree->type.node_class)
     {
         case etree_value:
            if (expld.section == bfd_abs_section_ptr && !config.sane_expr)
               new_abs (tree->value.value);
            else
               new_number (tree->value.value);
            expld.result.str = tree->value.str;
            break;

            ...
         case etree_unary:
           fold_unary (tree);
           break;

}

static void
fold_unary (etree_type *tree)
{
   exp_fold_tree_1 (tree->unary.child);
   if (expld.result.valid_p)
     {
       switch (tree->type.node_code)
         {
            case '~':
            expld.result.value = ~expld.result.value;
            break;

            ...

         }
       }
}

so fold_unary will result in

expld.result.value = 0x00000000 == ~ULONG_MAX  (due to overflow of 
0x42F0E1EBA9EA3693)

expld.result.str = "42F0E1EBA9EA3693" and not ~0x42F0E1EBA9EA3693

so get_int will get the wrong result.

=============

Since expld.result.value is a bfd_vma, the computation will fail,

typedef struct {
   bfd_vma value;
   char *str;
   ...
   bool valid_p;
} etree_value_type;

struct ldexp_control {

   ...
   etree_value_type result;
   ...
};

extern struct ldexp_control expld;

================================================

Conclusion.

Some parameters to POLY should not be expressions,
they must be valid integers using the "0xFFFF_FFFF_FFFF_FFFF" style

The parameters affected are:

* polynome
* initial value
* xor     value

The range of addresses covered can be expressions as long as they are 
below 4 GB.

I guess that if the documentation adds those restrictions, the patch 
will work


Best Regards
Ulf Samuelsson


>
> diff --git a/ld/lddigest.c b/ld/lddigest.c
> index d0bb4db73ab..846ece177d3 100644
> --- a/ld/lddigest.c
> +++ b/ld/lddigest.c
> @@ -103,10 +103,12 @@ lang_add_crc32_syndrome (algorithm_desc_t * a)
>   static void
>   lang_add_crc32_table (bool big_endian)
>   {
> -  uint32_t *crc32_table = algorithm.crc_tab;	/* Use a precomputed, if it exists */
> +  /* Use a precomputed table, if it exists.  */
> +  uint32_t *crc32_table = algorithm.crc_tab;
>     bool local_table = false;
>     if (crc32_table == NULL)
> -    {				/* No luck, create a table */
> +    {
> +      /* No luck, create a table.  */
>         crc32_table = init_crc32_tab (&algorithm);
>         if (crc32_table == NULL)
>   	{
> @@ -115,7 +117,7 @@ lang_add_crc32_table (bool big_endian)
>   	}
>         local_table = true;
>       }
> -  for (bfd_vma i = 0; i < 256; i++)
> +  for (int i = 0; i < 256; i++)
>       {
>         uint32_t elem = crc32_table[i];
>         if (big_endian)
> @@ -181,10 +183,12 @@ print_hash64_table (algorithm_desc_t * a)
>   static void
>   lang_add_crc64_table (bool big_endian)
>   {
> -  bfd_vma *crc64_table = algorithm.crc_tab;	/* Use a precomputed, if it exists */
> +  /* Use a precomputed table, if it exists.  */
> +  uint64_t *crc64_table = algorithm.crc_tab;
>     bool local_table = false;
>     if (crc64_table == NULL)
> -    {				/* No luck, create a table */
> +    {
> +      /* No luck, create a table.  */
>         crc64_table = init_crc64_tab (&algorithm);
>         if (crc64_table == NULL)
>   	{
> @@ -194,9 +198,9 @@ lang_add_crc64_table (bool big_endian)
>         local_table = true;
>       }
>     print_hash64_table (&algorithm);
> -  for (bfd_vma i = 0; i < 256; i++)
> +  for (int i = 0; i < 256; i++)
>       {
> -      bfd_vma elem = crc64_table[i];
> +      uint64_t elem = crc64_table[i];
>         if (big_endian)
>   	{
>   	  elem = __builtin_bswap64 (elem);
> @@ -214,12 +218,42 @@ lang_add_crc64_table (bool big_endian)
>   
>   /* ============ DIGEST COMMON functions ======================================*/
>   
> +static uint64_t
> +get_int (etree_type *tree, bool *err)
> +{
> +  if (tree != NULL)
> +    {
> +      exp_fold_tree_no_dot (tree);
> +
> +      if (expld.result.valid_p)
> +	{
> +	  if (expld.result.str != NULL)
> +	    {
> +	      char *end;
> +	      uint64_t val = strtoull (expld.result.str, &end, 16);
> +	      if (!*end)
> +		return val;
> +	    }
> +	  else
> +	    {
> +	      if (expld.result.section != NULL)
> +		expld.result.value += expld.result.section->vma;
> +	      return expld.result.value;
> +	    }
> +	}
> +    }
> +  *err = true;
> +  return 0;
> +}
> +
>   void
> -lang_add_digest (bfd_vma size,
> -		 bfd_vma poly,
> -		 bfd_vma initial,
> -		 bfd_vma xor_val,
> -		 bfd_vma ireflect, bfd_vma oreflect, bfd_vma reciprocal)
> +lang_add_digest (etree_type *size,
> +		 etree_type *poly,
> +		 etree_type *initial,
> +		 etree_type *xor_val,
> +		 etree_type *ireflect,
> +		 etree_type *oreflect,
> +		 etree_type *reciprocal)
>   {
>     if (algorithm.crc_algo != no_algo)	/* We only allow one CRC <polynom> */
>       {
> @@ -227,40 +261,44 @@ lang_add_digest (bfd_vma size,
>         return;
>       }
>   
> +  bool err = false;
>     algorithm.name = "CUSTOM";
>     algorithm.big_endian = digest_big_endian;
> -  if (size == 64)
> +  algorithm.crc_size = get_int (size, &err);
> +  algorithm.poly.d64 = get_int (poly, &err);
> +  algorithm.initial.d64 = get_int (initial, &err);
> +  algorithm.xor_val.d64 = get_int (xor_val, &err);
> +  algorithm.ireflect = get_int (ireflect, &err);
> +  algorithm.oreflect = get_int (oreflect, &err);
> +  algorithm.crc_tab = NULL;
> +  algorithm.reciprocal = get_int (reciprocal, &err);
> +  algorithm.expected.d64 = 0;
> +
> +  if (err)
> +    {
> +      einfo (_("%F%P: Invalid DIGEST arg\n"));
> +      return;
> +    }
> +
> +  if (algorithm.crc_size == 64)
>       {
>         algorithm.crc_algo = crc_algo_64;
> -      algorithm.crc_size = size;
> -      algorithm.poly.d64 = poly;	/* Set the polynom */
> -      algorithm.initial.d64 = initial;	/* Set seed */
> -      algorithm.xor_val.d64 = xor_val;	/* final XOR value */
> -      algorithm.ireflect = ireflect;
> -      algorithm.oreflect = oreflect;
> -      algorithm.crc_tab = NULL;
> -      algorithm.reciprocal = reciprocal;
> -      algorithm.expected.d64 = 0;
> -
>         lang_add_crc64_syndrome (&algorithm);
>       }
> -  else if (size == 32)
> +  else if (algorithm.crc_size == 32)
>       {
>         algorithm.crc_algo = crc_algo_32;
> -      algorithm.crc_size = size;
> -      algorithm.poly.d32._0 = poly;	/* Set the polynom */
> -      algorithm.initial.d32._0 = initial;	/* Set seed */
> -      algorithm.xor_val.d32._0 = xor_val;	/* final XOR value */
> -      algorithm.ireflect = ireflect;
> -      algorithm.oreflect = oreflect;
> -      algorithm.crc_tab = NULL;
> -      algorithm.reciprocal = reciprocal;
> -      algorithm.expected.d32._0 = 0;
> +      algorithm.poly.d32._0 = algorithm.poly.d64;
> +      algorithm.poly.d32._1 = 0;
> +      algorithm.initial.d32._0 = algorithm.initial.d64;
> +      algorithm.initial.d32._1 = 0;
> +      algorithm.xor_val.d32._0 = algorithm.xor_val.d64;
> +      algorithm.xor_val.d32._1 = 0;
>         lang_add_crc32_syndrome (&algorithm);
>       }
>     else
>       {
> -      einfo (_("%F%P: Illegal Size in DIGEST: %E\n"));
> +      einfo (_("%F%P: Invalid Size in DIGEST\n"));
>         return;
>       }
>   }
> @@ -660,7 +698,7 @@ set_crc64_checksum (uint64_t crc, bfd_vma addr)
>   }
>   
>   static bool
> -set_crc_checksum (bfd_vma crc, bfd_vma addr, bfd_vma size)
> +set_crc_checksum (uint64_t crc, bfd_vma addr, int size)
>   {
>     bool status;
>     if (size == 64)
> @@ -700,7 +738,7 @@ symbol_lookup (char *name, bfd_vma * val)
>    * Multiplexing function for calculating CRC with different algorithms
>    * 'algorithm.crc_algo'
>    */
> -static bfd_vma
> +static uint64_t
>   calculate_crc (const unsigned char *input_str, size_t num_bytes)
>   {
>     if (algorithm.crc_algo == crc_algo_64)
> @@ -782,7 +820,7 @@ void
>   lang_generate_crc (void)
>   {
>     bfd_vma crc_addr, crc_area_start, crc_area_end;
> -  bfd_vma crc;
> +  uint64_t crc;
>     bool can_do_crc;
>   
>     /* Return immediately, if CRC is not requested */
> diff --git a/ld/lddigest.h b/ld/lddigest.h
> index 9f5e5f3fbda..f5ae1ddad7e 100755
> --- a/ld/lddigest.h
> +++ b/ld/lddigest.h
> @@ -33,6 +33,7 @@
>   #include <stdint.h>
>   #include <stdbool.h>
>   #include "bfd.h"
> +#include "ldexp.h"
>   
>   #define	CRC_POLY_64_ECMA	0x42F0E1EBA9EA3693ull	/* Normal */
>   #define	CRC_POLY_64_ECMA_EXP	0x6C40DF5F0B497347ull	/* CRC when testing "123456789" */
> @@ -163,17 +164,18 @@ extern uint32_t calc_crc32
>   extern void lang_add_crc32_syndrome (algorithm_desc_t * a);
>   
>   /* CR-64 */
> -extern bfd_vma *init_crc64_tab (algorithm_desc_t * dsc);
> -extern bfd_vma calc_crc64
> +extern uint64_t *init_crc64_tab (algorithm_desc_t * dsc);
> +extern uint64_t calc_crc64
>     (algorithm_desc_t * dsc, const unsigned char *input_str, size_t num_bytes);
>   extern void lang_add_crc64_syndrome (algorithm_desc_t * a);
>   
> -extern void lang_add_digest (bfd_vma size,
> -			     bfd_vma poly,
> -			     bfd_vma initial,
> -			     bfd_vma xor_val,
> -			     bfd_vma ireflect,
> -			     bfd_vma oreflect, bfd_vma reciprocal);
> +extern void lang_add_digest (etree_type *size,
> +			     etree_type *poly,
> +			     etree_type *initial,
> +			     etree_type *xor_val,
> +			     etree_type *ireflect,
> +			     etree_type *oreflect,
> +			     etree_type *reciprocal);
>   extern bool lang_set_digest (char *digest);
>   extern void lang_add_digest_table (bool big_endian);
>   extern const char *lang_get_label (const char *label, bool *big_endian);
> diff --git a/ld/lddigest_tab.c b/ld/lddigest_tab.c
> index 4f5db10c997..b286a3c0161 100644
> --- a/ld/lddigest_tab.c
> +++ b/ld/lddigest_tab.c
> @@ -99,9 +99,9 @@ const algorithm_desc_t algorithms[MAXALGO+1] =
>     [CRC32] =
>   	{
>   		crc_algo_32, 32, "CRC32",
> -		.poly.d32._0 = CRC_POLY_32,
> +		.poly.d32 = { CRC_POLY_32, 0 },
>   		.initial.d64 = CRC_START_32_INV,
> -		.xor_val.d32._0 = CRC_END_32_INV,
> +		.xor_val.d32 = { CRC_END_32_INV, 0 },
>   		.ireflect = true,
>   		.oreflect = true,
>   		.reciprocal = false,
> diff --git a/ld/ldgram.y b/ld/ldgram.y
> index ea0c569279a..2bc2f5e24e2 100644
> --- a/ld/ldgram.y
> +++ b/ld/ldgram.y
> @@ -747,13 +747,13 @@ polynome:
>   		   mustbe_exp ',' mustbe_exp ',' mustbe_exp ')'
>   		{
>   		  lang_add_digest (
> -			$3->value.value,	/* size			*/
> -			$5->value.value,	/* polynome		*/
> -			$7->value.value,	/* initial value	*/
> -			$9->value.value,	/* xor     value	*/
> -			$11->value.value,	/* input   reflected	*/
> -			$13->value.value,	/* output  reflected	*/
> -			$15->value.value	/* reciprocal		*/
>
>   			);
>   		  polynome_valid = true;
>   		}
> diff --git a/ld/testsuite/ld-scripts/crc64-poly-size.d b/ld/testsuite/ld-scripts/crc64-poly-size.d
> index 6a3651cbff5..1c7b3c46881 100644
> --- a/ld/testsuite/ld-scripts/crc64-poly-size.d
> +++ b/ld/testsuite/ld-scripts/crc64-poly-size.d
> @@ -1,5 +1,5 @@
>   #source: crc64-poly-size.s
>   #ld: -T crc64-poly-size.t
> -# error: .*: Illegal Size in DIGEST: .*
> +# error: .*: Invalid Size in DIGEST
>   #target: [is_elf_format] [is_coff_format]
>   #skip: tic4x-*-* tic54x-*-*
>
>