public inbox for annobin@sourceware.org
 help / color / mirror / Atom feed
From: Tulio Magno Quites Machado Filho <tuliom@ascii.art.br>
To: annobin@sourceware.org
Cc: grant@hcubed.com, Nick Clifton <nickc@redhat.com>
Subject: [PATCH] Add support for global-file-syms to the clang and llvm plugins
Date: Fri, 22 Mar 2024 17:17:36 -0300	[thread overview]
Message-ID: <20240322201736.421134-1-tuliom@ascii.art.br> (raw)
In-Reply-To: <a8509ec95a440a7bed816c9e5dcb9aea@hcubed.com>

From: Tulio Magno Quites Machado Filho <tuliom@redhat.com>

This part starts by moving the implementation of parse_env() to
annobin-common.cc in order to be shared between the gcc and the llvm
plugins.

This was required because the llvm plugin didn't have a mechanism to
receive arguments from the user. This commit adds support for using the
environment variable ANNOBIN for the LLVM plugin, although it's still
missing many of the basic features available in the other plugins.

Both clang and llvm plugins are now able to generate the same output as
the GCC plugin, e.g. _annobin_hello_c_1711138217_00204218_start.
---
 annobin-common.cc        | 61 ++++++++++++++++++++++++++++++++++++++++
 annobin-common.h         | 19 +++++++++++++
 clang-plugin/Makefile.in | 12 +++++++-
 clang-plugin/annobin.cpp | 45 +++++++++++++++++++++++++----
 gcc-plugin/Makefile.am   |  2 +-
 gcc-plugin/Makefile.in   |  7 +++--
 gcc-plugin/annobin.cc    | 45 ++---------------------------
 llvm-plugin/Makefile.in  | 21 ++++++++++++--
 llvm-plugin/annobin.cpp  | 53 ++++++++++++++++++++++++++++++++--
 9 files changed, 208 insertions(+), 57 deletions(-)
 create mode 100644 annobin-common.cc
 create mode 100644 annobin-common.h

diff --git a/annobin-common.cc b/annobin-common.cc
new file mode 100644
index 0000000..bbdba41
--- /dev/null
+++ b/annobin-common.cc
@@ -0,0 +1,61 @@
+/* annobin - Common functions used by plugins.
+   Copyright (c) 2024 Red Hat.
+
+  This is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License as published
+  by the Free Software Foundation; either version 3, or (at your
+  option) any later version.
+
+  It is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.  */
+
+#include "annobin-common.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+extern bool parse_argument (const char *, const char *);
+
+void
+parse_env (bool (* parse_argument)(const char *, const char *))
+{
+  char arg[2048];
+  const char * env;
+
+  if ((env = getenv ("ANNOBIN")) == NULL)
+    return;
+
+  while (*env != 0)
+    {
+      const char * comma;
+
+      comma = strchr (env, ',');
+      if (comma)
+	{
+	  strncpy (arg, env, comma - env);
+	  arg[comma - env] = 0;
+	  env = comma + 1;
+        }
+      else
+	{
+	  strncpy (arg, env, sizeof arg - 1);
+	  arg[sizeof arg - 1] = 0;
+	  env += strlen (env);
+	}
+
+      char * value = strchr (arg, '=');
+      if (value)
+	{
+	  * value = 0;
+	  value ++;
+	}
+      else
+	{
+	  value = (char *) "";
+	}
+
+      (void) (*parse_argument) (arg, value);
+    }
+}
diff --git a/annobin-common.h b/annobin-common.h
new file mode 100644
index 0000000..8007c6c
--- /dev/null
+++ b/annobin-common.h
@@ -0,0 +1,19 @@
+/* annobin - Common functions used by plugins.
+   Copyright (c) 2024 Red Hat.
+
+  This is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License as published
+  by the Free Software Foundation; either version 3, or (at your
+  option) any later version.
+
+  It is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.  */
+#ifndef ANNOBIN_COMMON_H_
+#define ANNOBIN_COMMON_H_
+
+#include <stdbool.h>
+
+void parse_env (bool (* parse_argument)(const char *, const char *));
+#endif // ANNOBIN-COMMON_H_
diff --git a/clang-plugin/Makefile.in b/clang-plugin/Makefile.in
index 1fc45c0..9ceba39 100644
--- a/clang-plugin/Makefile.in
+++ b/clang-plugin/Makefile.in
@@ -65,7 +65,9 @@ PLUGIN_TEST_OPTIONS = \
 #   -flto            # Not used because the object file generated is not an ELF format file.
 #   -fcf-protection  # Not used because not supported by all architectures
 
-check: @srcdir@/hello.c $(PLUGIN_NAME)
+TESTS = test-global-file-syms
+
+check: @srcdir@/hello.c $(PLUGIN_NAME) $(addsuffix .log,$(TESTS))
 	$(CLANG) -fplugin=$(PLUGIN) $(PLUGIN_TEST_OPTIONS) -c @srcdir@/hello.c
 	$(READELF) --wide --notes hello.o > clang-plugin-test.readelf.out
 	@ grep --silent -e "annobin built by clang version" clang-plugin-test.readelf.out
@@ -74,3 +76,11 @@ check: @srcdir@/hello.c $(PLUGIN_NAME)
 	@ grep --silent -e "SpecLoadHarden" clang-plugin-test.readelf.out
 	$(ANNOCHECK) --skip-all --test-optimization --test-fortify --test-stack-prot  hello.o --verbose
 	@ echo "PASS Clang plugin test"
+
+test-global-file-syms.log: @srcdir@/hello.c $(PLUGIN_NAME)
+	$(CLANG) -fplugin=$(PLUGIN) -fplugin-arg-annobin-global-file-syms -c $< -o test-global-file-syms.o
+	$(READELF) --wide --syms  test-global-file-syms.o > test-global-file-syms.readelf.out
+	@ grep --silent -e "_annobin.\+_hello_c_" test-global-file-syms.readelf.out
+	@ grep --silent -e '_annobin.\+_hello_c_.\+_start' test-global-file-syms.readelf.out
+	@ grep --silent -e '_annobin.\+_hello_c_.\+_end' test-global-file-syms.readelf.out
+	@ echo "PASS Clang global-file-syms test" | tee $@
diff --git a/clang-plugin/annobin.cpp b/clang-plugin/annobin.cpp
index 5426d22..6e1a8d4 100644
--- a/clang-plugin/annobin.cpp
+++ b/clang-plugin/annobin.cpp
@@ -31,12 +31,19 @@ using namespace llvm;
 #include <cstring>
 #include <cctype>
 #include <cstdarg>
+#include <iomanip>
 #include <sstream>
+#include <sys/time.h>
 
 namespace
 {
   static const unsigned int   annobin_version = (unsigned int) (ANNOBIN_VERSION * 100);
   bool                        be_verbose = false;
+  /* True if the symbols used to map addresses to file names should be global.
+     On some architectures these symbols have to be global so that they will
+     be preserved in object files.  But doing so can prevent the build-id
+     mechanism from working, since the symbols contain build-date information.  */
+  bool                        global_file_name_symbols = false;
 
   // Helper functions used throughout this file.
   template<class... Tys>
@@ -188,6 +195,30 @@ private:
       for( auto & c : name)
 	if (!isalnum (c))
 	  c = '_';
+
+      if (global_file_name_symbols)
+	{
+	  /* A program can have multiple source files with the same name.
+	     Or indeed the same source file can be included multiple times.
+	     Or a library can be built from a sources which include file names
+	     that match application file names.  Whatever the reason, we need
+	     to be ensure that we generate unique global symbol names.  So we
+	     append the time to the symbol name.  This will of course break
+	     the functionality of build-ids.  That is why this option is off
+	     by default.  */
+	  struct timeval tv;
+
+	  if (gettimeofday (& tv, NULL))
+	    {
+	      ice ("unable to get time of day.");
+	      tv.tv_sec = tv.tv_usec = 0;
+	    }
+	  std::ostringstream t;
+	  t << "_" << std::setfill('0') << std::setw(8) << (long) tv.tv_sec;
+	  t << "_" << std::setfill('0') << std::setw(8) << (long) tv.tv_usec;
+	  name += t.str();
+    }
+
     }
     
     void
@@ -574,11 +605,13 @@ private:
 	{
 	  if (args[i] == "help")
 	    inform ("supported options:\n\
-  help      Display this message\n\
-  disable   Disable the plugin\n\
-  enable    Reenable the plugin if it has been disabled\n\
-  version   Displays the version number\n\
-  verbose   Produce descriptive messages whilst working");
+  help               Display this message\n\
+  disable            Disable the plugin\n\
+  enable             Reenable the plugin if it has been disabled\n\
+  version            Displays the version number\n\
+  verbose            Produce descriptive messages whilst working\n\
+  \n\
+  global-file-syms   Create final name symbols that are global");
 	  else if (args[i] == "disable")
 	    enabled = false;
 	  else if (args[i] == "enable")
@@ -587,6 +620,8 @@ private:
 	    inform ("Annobin plugin version: %u", annobin_version);
 	  else if (args[i] == "verbose")
 	    be_verbose = true;
+	  else if (args[i] == "global-file-syms")
+	    global_file_name_symbols = true;
 	  else
 	    inform ("error: unknown option: %s", args[i].c_str());
 	}
diff --git a/gcc-plugin/Makefile.am b/gcc-plugin/Makefile.am
index eca5e10..9616018 100644
--- a/gcc-plugin/Makefile.am
+++ b/gcc-plugin/Makefile.am
@@ -8,7 +8,7 @@ plugin_LTLIBRARIES = annobin.la
 AM_CPPFLAGS = -I'$(top_builddir)' -I'$(top_srcdir)'
 AUTOMAKE_OPTIONS = no-dependencies
 
-annobin_la_SOURCES = annobin.cc
+annobin_la_SOURCES = annobin.cc ../annobin-common.cc
 EXTRA_annobin_la_SOURCES = aarch64.annobin.cc arm.annobin.cc dummy.annobin.cc powerpc.annobin.cc s390.annobin.cc x86_64.annobin.cc i686.annobin.cc riscv.annobin.cc mips.annobin.cc
 annobin_la_LIBADD = @target_plugin@
 annobin_la_DEPENDENCIES = @target_plugin@
diff --git a/gcc-plugin/Makefile.in b/gcc-plugin/Makefile.in
index d5604cc..6ecc51f 100644
--- a/gcc-plugin/Makefile.in
+++ b/gcc-plugin/Makefile.in
@@ -144,7 +144,7 @@ am__uninstall_files_from_dir = { \
   }
 am__installdirs = "$(DESTDIR)$(plugindir)"
 LTLIBRARIES = $(plugin_LTLIBRARIES)
-am_annobin_la_OBJECTS = annobin.lo
+am_annobin_la_OBJECTS = annobin.lo annobin-common.lo
 annobin_la_OBJECTS = $(am_annobin_la_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
 am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -347,7 +347,7 @@ top_srcdir = @top_srcdir@
 plugin_LTLIBRARIES = annobin.la
 AM_CPPFLAGS = -I'$(top_builddir)' -I'$(top_srcdir)'
 AUTOMAKE_OPTIONS = no-dependencies
-annobin_la_SOURCES = annobin.cc
+annobin_la_SOURCES = annobin.cc ../annobin-common.cc
 EXTRA_annobin_la_SOURCES = aarch64.annobin.cc arm.annobin.cc dummy.annobin.cc powerpc.annobin.cc s390.annobin.cc x86_64.annobin.cc i686.annobin.cc riscv.annobin.cc mips.annobin.cc
 annobin_la_LIBADD = @target_plugin@
 annobin_la_DEPENDENCIES = @target_plugin@
@@ -460,6 +460,9 @@ distclean-compile:
 .cc.lo:
 	$(AM_V_CXX)$(LTCXXCOMPILE) -c -o $@ $<
 
+annobin-common.lo: ../annobin-common.cc
+	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o annobin-common.lo `test -f '../annobin-common.cc' || echo '$(srcdir)/'`../annobin-common.cc
+
 mostlyclean-libtool:
 	-rm -f *.lo
 
diff --git a/gcc-plugin/annobin.cc b/gcc-plugin/annobin.cc
index 440029e..af1e2dd 100644
--- a/gcc-plugin/annobin.cc
+++ b/gcc-plugin/annobin.cc
@@ -16,6 +16,7 @@
 #include <stdio.h>
 #include <intl.h>
 
+#include "annobin-common.h"
 #include "annobin-global.h"
 #include "annobin.h"
 
@@ -3123,43 +3124,6 @@ parse_args (unsigned argc, struct plugin_argument * argv)
   return result;
 }
 
-static void
-parse_env (const char * env)
-{
-  while (*env != 0)
-    {
-      const char * comma;
-      char * arg = annobin_note_buffer;
-
-      comma = strchr (env, ',');
-      if (comma)
-	{
-	  strncpy (arg, env, comma - env);
-	  arg[comma - env] = 0;
-	  env = comma + 1;
-	}
-      else
-	{
-	  strncpy (arg, env, sizeof annobin_note_buffer - 1);
-	  arg[sizeof annobin_note_buffer - 1] = 0;
-	  env += strlen (env);
-	}
-
-      char * value = strchr (arg, '=');
-      if (value)
-	{
-	  * value = 0;
-	  value ++;
-	}
-      else
-	{
-	  value = (char *) "";
-	}
-
-      (void) parse_argument (arg, value);
-    }
-}
-
 int
 plugin_init (struct plugin_name_args *    plugin_info,
              struct plugin_gcc_version *  version)
@@ -3173,12 +3137,7 @@ plugin_init (struct plugin_name_args *    plugin_info,
       return 1;
     }
 
-  const char * env;
-  if ((env = getenv ("ANNOBIN")) != NULL)
-    {
-      parse_env (env);
-    }
-
+  parse_env (&parse_argument);
   if (plugins_active_p () && (annobin_extra_prefix[0] == 0))
     {
       /* See BZ 2162746 for an example of why this is needed.  */
diff --git a/llvm-plugin/Makefile.in b/llvm-plugin/Makefile.in
index bcee371..f9b4e26 100644
--- a/llvm-plugin/Makefile.in
+++ b/llvm-plugin/Makefile.in
@@ -28,10 +28,15 @@ PLUGIN_NAME = annobin-for-llvm.so
 
 all: $(PLUGIN_NAME) Makefile
 
-$(PLUGIN_NAME): annobin.cpp
+$(PLUGIN_NAME): annobin.cpp annobin-common.o
 	clang++ $(CLANG_TARGET_OPTIONS) $(LLVM_CXX_OPTIONS) \
 		$(PLUGIN_OPTIONS) $(LLVM_LD_OPTIONS) $(LLVM_SYS_LIBS) \
-		-I .. -I$(INCDIR) $< -o $@
+		-I .. -I$(INCDIR) $^ -o $@
+
+annobin-common.o: @top_srcdir@/annobin-common.cc
+	clang++ -c $(CLANG_TARGET_OPTIONS) $(LLVM_CXX_OPTIONS) \
+		$(PLUGIN_OPTIONS) $(LLVM_LD_OPTIONS) $(LLVM_SYS_LIBS) \
+		-I .. -I$(INCDIR) $^ -o $@
 
 install: $(PLUGIN_NAME)
 	install -Dpm0755 -t ${PLUGIN_INSTALL_DIR} $<
@@ -63,6 +68,8 @@ PLUGIN_TEST_OPTIONS = \
 #   -flto            # Not used because the object file generated is not an ELF format file.
 #   -fcf-protection  # Not used because not supported by all architectures
 
+TESTS = test-global-file-syms
+
 check: $(PLUGIN_NAME)
 	@ if [ `echo | clang -dM -E - | grep __clang_major__ | cut -f 3 -d ' '` -gt 14 ] ; \
 	then \
@@ -85,7 +92,7 @@ check-newpm:
 check-pre-clang-13:
 	$(MAKE) check-run LOAD_PLUGIN_ARG="-Xclang -load -Xclang $(PLUGIN)"
 
-check-run: @srcdir@/hello.c
+check-run: @srcdir@/hello.c $(addsuffix .log,$(TESTS))
 	echo Compiling with $(LOAD_PLUGIN_ARG) ...
 	$(CLANG) $(LOAD_PLUGIN_ARG)  $(PLUGIN_TEST_OPTIONS) -c @srcdir@/hello.c
 	echo Checking with readelf ...
@@ -97,3 +104,11 @@ check-run: @srcdir@/hello.c
 	$(ANNOCHECK) --skip-all --test-optimization --test-fortify --test-stack-prot hello.o || :
 	$(ANNOCHECK) --skip-all --test-optimization --test-fortify --test-stack-prot hello.o
 	echo "PASS LLVM plugin test [$(LOAD_PLUGIN_ARG)]"
+
+test-global-file-syms.log: @srcdir@/hello.c
+	ANNOBIN=global-file-syms $(CLANG) $(LOAD_PLUGIN_ARG) -c $< -o test-global-file-syms.o
+	$(READELF) --wide --syms  test-global-file-syms.o > test-global-file-syms.readelf.out
+	@ grep --silent -e "_annobin.\+_hello_c_" test-global-file-syms.readelf.out
+	@ grep --silent -e '_annobin.\+_hello_c_.\+_start' test-global-file-syms.readelf.out
+	@ grep --silent -e '_annobin.\+_hello_c_.\+_end' test-global-file-syms.readelf.out
+	@ echo "PASS LLVM global-file-syms test" | tee $@
diff --git a/llvm-plugin/annobin.cpp b/llvm-plugin/annobin.cpp
index fe3e036..08b6241 100644
--- a/llvm-plugin/annobin.cpp
+++ b/llvm-plugin/annobin.cpp
@@ -25,16 +25,24 @@
 #endif
 #include "llvm/LTO/legacy/LTOCodeGenerator.h"
 #include "annobin-global.h"
+#include "annobin-common.h"
 #include <cstring>
 #include <cctype>
 #include <cstdarg>
+#include <iomanip>
 #include <sstream>
+#include <sys/time.h>
 
 using namespace llvm;
 namespace
 {
   static bool                 be_verbose = false;
   static unsigned int         target_start_sym_bias = 0;
+  /* True if the symbols used to map addresses to file names should be global.
+     On some architectures these symbols have to be global so that they will
+     be preserved in object files.  But doing so can prevent the build-id
+     mechanism from working, since the symbols contain build-date information.  */
+  bool                        global_file_name_symbols = false;
 
   // Helper functions used throughout this file.
   template<class... Tys>
@@ -46,7 +54,6 @@ namespace
     (void) std::initializer_list<int>{((oss << args), 1)...};
     return strdup (oss.str().c_str());
   }
-#if 0
   static inline void
   inform (char const fmt[], ...)
   {
@@ -59,7 +66,6 @@ namespace
     fputc ('\n', stderr);
     va_end (args);
   }
-#endif
   static inline void
   verbose (char const fmt[], ...)
   {
@@ -278,6 +284,24 @@ namespace
       free (buffer);
     }
 
+    static bool
+    parse_argument (const char * key, const char * value)
+    {
+      if (streq (key, "verbose"))
+	be_verbose = true;
+      else if (streq (key, "global-file-syms"))
+	global_file_name_symbols = true;
+      else if (streq (key, "no-global-file-syms"))
+	global_file_name_symbols = false;
+      else
+	{
+	  inform ("annobin: unrecognised option: %s\n", key);
+	  return false;
+        }
+
+      return true;
+    }
+
   public:
 #if __clang_major__ > 12
     AnnobinModule()
@@ -289,6 +313,8 @@ namespace
       if (getenv ("ANNOBIN_VERBOSE") != NULL
 	  && ! streq (getenv ("ANNOBIN_VERBOSE"), "false"))
 	be_verbose = true;
+
+      parse_env (&parse_argument);
     }
 
     void
@@ -486,6 +512,29 @@ namespace
       for( auto & c : name)
 	if (!isalnum (c))
 	  c = '_';
+
+      if (global_file_name_symbols)
+	{
+	  /* A program can have multiple source files with the same name.
+	     Or indeed the same source file can be included multiple times.
+	     Or a library can be built from a sources which include file names
+	     that match application file names.  Whatever the reason, we need
+	     to be ensure that we generate unique global symbol names.  So we
+	     append the time to the symbol name.  This will of course break
+	     the functionality of build-ids.  That is why this option is off
+	     by default.  */
+	  struct timeval tv;
+
+	  if (gettimeofday (& tv, NULL))
+	    {
+	      ice ("unable to get time of day.");
+	      tv.tv_sec = tv.tv_usec = 0;
+	    }
+	  std::ostringstream t;
+	  t << "_" << std::setfill('0') << std::setw(8) << (long) tv.tv_sec;
+	  t << "_" << std::setfill('0') << std::setw(8) << (long) tv.tv_usec;
+	  name += t.str();
+	}
     }
 
     static void
-- 
2.44.0


  parent reply	other threads:[~2024-03-22 20:18 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-20 17:28 global-file-syms for clang grant
2024-03-21  8:35 ` Nick Clifton
2024-03-21 15:32   ` grant
2024-03-21 20:47     ` Tulio Magno Quites Machado Filho
2024-03-22 20:17     ` Tulio Magno Quites Machado Filho [this message]
2024-03-25 16:00       ` [PATCH] Add support for global-file-syms to the clang and llvm plugins Nick Clifton
2024-03-25 17:19     ` global-file-syms for clang Nick Clifton
2024-03-25 17:38       ` grant
2024-03-26 11:31         ` Nick Clifton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240322201736.421134-1-tuliom@ascii.art.br \
    --to=tuliom@ascii.art.br \
    --cc=annobin@sourceware.org \
    --cc=grant@hcubed.com \
    --cc=nickc@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).