From 1f183edc0428706d9839b1863240e7b5a2a6f036 Mon Sep 17 00:00:00 2001 From: Ulf Samuelsson Date: Wed, 15 Feb 2023 19:06:57 +0100 Subject: [PATCH] ASCII command Signed-off-by: Ulf Samuelsson --- ld/NEWS | 6 ++ ld/ld.texi | 24 ++++- ld/ldgram.y | 10 +- ld/ldlang.c | 65 +++++++++--- ld/ldlang.h | 3 +- ld/ldlex.l | 1 + ld/testsuite/ld-scripts/ascii.d | 155 +++++++++++++++++++++++++++++ ld/testsuite/ld-scripts/ascii.s | 9 ++ ld/testsuite/ld-scripts/ascii.t | 51 ++++++++++ ld/testsuite/ld-scripts/asciz.d | 17 ++-- ld/testsuite/ld-scripts/asciz.t | 23 ++--- ld/testsuite/ld-scripts/header.inc | 34 +++++++ ld/testsuite/ld-scripts/script.exp | 1 + 13 files changed, 349 insertions(+), 50 deletions(-) create mode 100644 ld/testsuite/ld-scripts/ascii.d create mode 100644 ld/testsuite/ld-scripts/ascii.s create mode 100644 ld/testsuite/ld-scripts/ascii.t create mode 100644 ld/testsuite/ld-scripts/header.inc diff --git a/ld/NEWS b/ld/NEWS index 4ce7e19d40b..38af9cba877 100644 --- a/ld/NEWS +++ b/ld/NEWS @@ -1,5 +1,11 @@ -*- text -*- +* The linker script syntax has a new command for output sections: + ASCII () "string" (Alt 1 = Working) + ASCII , "string" (Alt 2 = Not Working) + This will reserve a zero filled block of bytes at the current + location and insert a zero-terminated string at the beginning of the block. + * The linker script syntax has a new command for output sections: ASCIZ "string" This will insert a zero-terminated string at the current location. diff --git a/ld/ld.texi b/ld/ld.texi index 335886d4e6b..e309eebfa43 100644 --- a/ld/ld.texi +++ b/ld/ld.texi @@ -5308,6 +5308,7 @@ C identifiers because they contain a @samp{.} character. @cindex data @cindex section data @cindex output section data +@kindex ASCII (@var{expression}) ``@var{string}'' @kindex ASCIZ ``@var{string}'' @kindex BYTE(@var{expression}) @kindex SHORT(@var{expression}) @@ -5345,14 +5346,27 @@ When the object file format does not have an explicit endianness, as is true of, for example, S-records, the value will be stored in the endianness of the first input object file. +You can include a fixed size string in an output section by using @code{ASCII}. +The keyword is followed by a size and a string which is stored at +the current value of the location counter adding zero bytes at the end. + You can include a zero-terminated string in an output section by using @code{ASCIZ}. The keyword is followed by a string which is stored at -the current value of the location counter adding a zero byte at the -end. If the string includes spaces it must be enclosed in double -quotes. The string may contain '\n', '\r', '\t' and octal numbers. -Hex numbers are not supported. +the current value of the location counter adding a zero byte at the end. + +If the string in an @code{ASCIZ} or @code{ASCIZ} command includes spaces +it must be enclosed in double quotes. +If the string is too long, a warning is issued and the string is truncated. +The string can have C escape characters like '\n', '\r', '\t' and octal numbers. +The '\"' escape is not supported. + +Example 1: This is string of 16 characters and will create a 32 byte area +@smallexample + ASCII 32, "This is 16 bytes" + ASCII (32) "This is 16 bytes" +@end smallexample -For example, this string of 16 characters will create a 17 byte area +Example 2: This is a string of 16 characters and will create a 17 byte area @smallexample ASCIZ "This is 16 bytes" @end smallexample diff --git a/ld/ldgram.y b/ld/ldgram.y index 8240cf97327..8aa7749c1e8 100644 --- a/ld/ldgram.y +++ b/ld/ldgram.y @@ -125,7 +125,7 @@ static int error_index; %right UNARY %token END %left '(' -%token ALIGN_K BLOCK BIND QUAD SQUAD LONG SHORT BYTE ASCIZ +%token ALIGN_K BLOCK BIND QUAD SQUAD LONG SHORT BYTE ASCII ASCIZ %token SECTIONS PHDRS INSERT_K AFTER BEFORE %token DATA_SEGMENT_ALIGN DATA_SEGMENT_RELRO_END DATA_SEGMENT_END %token SORT_BY_NAME SORT_BY_ALIGNMENT SORT_NONE @@ -668,9 +668,15 @@ statement: { lang_add_data ((int) $1, $3); } + | ASCII '(' mustbe_exp ')' NAME + { + /* 'value' is a memory leak, do we care? */ + etree_type *value = $3; + lang_add_string (value->value.value, $5); + } | ASCIZ NAME { - lang_add_string ($2); + lang_add_string (0, $2); } | FILL '(' fill_exp ')' { diff --git a/ld/ldlang.c b/ld/ldlang.c index b20455c9373..2ba2980f082 100644 --- a/ld/ldlang.c +++ b/ld/ldlang.c @@ -8361,15 +8361,16 @@ lang_add_data (int type, union etree_union *exp) new_stmt->type = type; } -void -lang_add_string (const char *s) +static char * +convert_string (const char * s) { - bfd_vma len = strlen (s); - bfd_vma i; - bool escape = false; + int len = strlen (s); + int i; + bool escape = false; + char * buffer = malloc (len + 1); + char * b; - /* Add byte expressions until end of string. */ - for (i = 0 ; i < len; i++) + for (i = 0, b = buffer; i < len; i++) { char c = *s++; @@ -8404,7 +8405,7 @@ lang_add_string (const char *s) value += (c - '0'); i++; s++; - + c = *s; if ((c >= '0') && (c <= '7')) { @@ -8422,26 +8423,58 @@ lang_add_string (const char *s) i--; s--; } - + c = value; } break; } - - lang_add_data (BYTE, exp_intop (c)); escape = false; } else { if (c == '\\') - escape = true; - else - lang_add_data (BYTE, exp_intop (c)); + { + escape = true; + continue; + } } + + * b ++ = c; } - /* Remeber to terminate the string. */ - lang_add_data (BYTE, exp_intop (0)); + * b = 0; + return buffer; +} + +void +lang_add_string (int size, const char *s) +{ + int len; + int i; + char * string; + + string = convert_string (s); + len = strlen (string); + + /* Check if it is ASCIZ command (len == 0) */ + if (size == 0) + size = len + 1; + else if (len > size) + { + /* We cannot fit the '\0' at the end. */ + len = size - 1; + + einfo (_("%P:%pS: warning: ASCII string does not fit in allocated space," + " truncated\n"), NULL); + } + + for (i = 0 ; i < len ; i++) + lang_add_data (BYTE, exp_intop (string[i])); + + while (i++ < size) + lang_add_data (BYTE, exp_intop ('\0')); + + free (string); } /* Create a new reloc statement. RELOC is the BFD relocation type to diff --git a/ld/ldlang.h b/ld/ldlang.h index 32819066b8a..fe85e159aa7 100644 --- a/ld/ldlang.h +++ b/ld/ldlang.h @@ -646,8 +646,9 @@ extern void pop_stat_ptr (void); extern void lang_add_data (int, union etree_union *); +extern bfd_vma charcount(const char *s); extern void lang_add_string - (const char *); + (int, const char *s); extern void lang_add_reloc (bfd_reloc_code_real_type, reloc_howto_type *, asection *, const char *, union etree_union *); diff --git a/ld/ldlex.l b/ld/ldlex.l index 32336cf0be2..910e7ea3b8b 100644 --- a/ld/ldlex.l +++ b/ld/ldlex.l @@ -309,6 +309,7 @@ V_IDENTIFIER [*?.$_a-zA-Z\[\]\-\!\^\\]([*?.$_a-zA-Z0-9\[\]\-\!\^\\]|::)* "LONG" { RTOKEN(LONG); } "SHORT" { RTOKEN(SHORT); } "BYTE" { RTOKEN(BYTE); } +"ASCII" { RTOKEN(ASCII); } "ASCIZ" { RTOKEN(ASCIZ); }