public inbox for elfutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] elfclassify tool
@ 2019-04-12 15:38 Florian Weimer
  2019-04-15 15:39 ` Mark Wielaard
  0 siblings, 1 reply; 36+ messages in thread
From: Florian Weimer @ 2019-04-12 15:38 UTC (permalink / raw)
  To: elfutils-devel

This patch adds an elfclassify tool, mainly for the benefit of RPM's
find-debuginfo.sh.

I still need to implement an --unstripped option and fix the iteration
over the dynamic section.

Suggestions for improving the argp/help output are welcome as well.  I'm
not familiar with argp at all.

I'm keeping a branch with these changes here:

  <https://pagure.io/fweimer/elfutils/commits/elfclassify>

Thanks,
Florian

diff --git a/src/Makefile.am b/src/Makefile.am
index 2b1c0dcb..966d1da7 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -26,7 +26,8 @@ AM_CPPFLAGS += -I$(srcdir)/../libelf -I$(srcdir)/../libebl \
 AM_LDFLAGS = -Wl,-rpath-link,../libelf:../libdw
 
 bin_PROGRAMS = readelf nm size strip elflint findtextrel addr2line \
-	       elfcmp objdump ranlib strings ar unstrip stack elfcompress
+	       elfcmp objdump ranlib strings ar unstrip stack elfcompress \
+	       elfclassify
 
 noinst_LIBRARIES = libar.a
 
@@ -83,6 +84,7 @@ ar_LDADD = libar.a $(libelf) $(libeu) $(argp_LDADD)
 unstrip_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD) -ldl
 stack_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD) -ldl $(demanglelib)
 elfcompress_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD)
+elfclassify_LDADD = $(libelf) $(libeu) $(argp_LDADD)
 
 installcheck-binPROGRAMS: $(bin_PROGRAMS)
 	bad=0; pid=$$$$; list="$(bin_PROGRAMS)"; for p in $$list; do \
diff --git a/src/elfclassify.c b/src/elfclassify.c
new file mode 100644
index 00000000..ead3260b
--- /dev/null
+++ b/src/elfclassify.c
@@ -0,0 +1,387 @@
+/* Classification of ELF files.
+   Copyright (C) 2019 Red Hat, Inc.
+   This file is part of elfutils.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#include <argp.h>
+#include <error.h>
+#include <fcntl.h>
+#include <gelf.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#include ELFUTILS_HEADER(elf)
+#include "printversion.h"
+
+/* Name and version of program.  */
+ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
+
+/* Bug report address.  */
+ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
+
+enum classify_command
+{
+  classify_file = 1000,
+  classify_elf,
+  classify_executable,
+  classify_shared,
+  classify_loadable
+};
+
+/* Set by parse_opt.  */
+static enum classify_command command;
+static const char *command_path;
+static int verbose;
+
+/* Set by map_file.  */
+static int file_fd = -1;
+
+static void
+open_file (void)
+{
+  if (verbose > 1)
+    fprintf (stderr, "debug: processing file: %s\n", command_path);
+
+  file_fd = open (command_path, O_RDONLY);
+  if (file_fd < 0)
+    {
+      if (errno == ENOENT)
+        exit (1);
+      else
+        error (2, errno, N_("opening %s"), command_path);
+    }
+  struct stat st;
+  if (fstat (file_fd, &st) != 0)
+    error (2, errno, N_("reading %s\n"), command_path);
+  if (!S_ISREG (st.st_mode))
+    exit (1);
+}
+
+/* Set by open_elf.  */
+static Elf *elf;
+
+static void
+open_elf (void)
+{
+  open_file ();
+  elf = elf_begin (file_fd, ELF_C_READ, NULL);
+  if (elf == NULL)
+    error (2, 0, "%s: %s", command_path, elf_errmsg (-1));
+  if (elf_kind (elf) != ELF_K_ELF && elf_kind (elf) != ELF_K_AR)
+    exit (1);
+}
+
+static int elf_type;
+static bool has_program_interpreter;
+static bool has_dynamic;
+static bool has_soname;
+static bool has_pie_flag;
+static bool has_dt_debug;
+
+static void
+run_classify (void)
+{
+  if (elf_kind (elf) != ELF_K_ELF)
+    return;
+
+  GElf_Ehdr ehdr_storage;
+  GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_storage);
+  if (ehdr == NULL)
+    exit (1);
+  elf_type = ehdr->e_type;
+
+  /* Examine program headers.  */
+  {
+    size_t nphdrs;
+    if (elf_getphdrnum (elf, &nphdrs) != 0)
+      error (2, 0, "%s: program header: %s", command_path, elf_errmsg (-1));
+    if (nphdrs > INT_MAX)
+      error (2, 0, "%s: number of program headers is too large: %zu",
+             command_path, nphdrs);
+    for (size_t phdr_idx = 0; phdr_idx < nphdrs; ++phdr_idx)
+      {
+        GElf_Phdr phdr_storage;
+        GElf_Phdr *phdr = gelf_getphdr (elf, phdr_idx, &phdr_storage);
+        if (phdr == NULL)
+          error (2, 0, "%s: %s", command_path, elf_errmsg (-1));
+        if (phdr->p_type == PT_DYNAMIC)
+          has_dynamic = true;
+        if (phdr->p_type == PT_INTERP)
+          has_program_interpreter = true;
+      }
+  }
+
+  /* Examine the dynamic section.  */
+  if (has_dynamic)
+    {
+      Elf_Scn *dyn_section = NULL;
+      {
+        Elf_Scn *scn = NULL;
+        while (true)
+          {
+            scn = elf_nextscn (elf, scn);
+            if (scn == NULL)
+              break;
+            GElf_Shdr shdr_storage;
+            GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
+            if (shdr == NULL)
+              error (2, 0, N_("could not obtain section header: %s"),
+                     elf_errmsg (-1));
+            if (verbose > 2)
+              fprintf (stderr, "debug: section header %d found\n",
+                       shdr->sh_type);
+            if (shdr->sh_type == SHT_DYNAMIC)
+              {
+                if (verbose > 1)
+                  fputs ("debug: dynamic section found", stderr);
+                dyn_section = scn;
+                break;
+              }
+          }
+      }
+      if (dyn_section != NULL)
+        {
+          Elf_Data *data = elf_getdata (dyn_section, NULL);
+          if (verbose > 2)
+            fprintf (stderr, "debug: Elf_Data for dynamic section: %p\n",
+                     data);
+
+          if (data != NULL)
+            for (int dyn_idx = 0; ; ++dyn_idx)
+              {
+                GElf_Dyn dyn_storage;
+                GElf_Dyn *dyn = gelf_getdyn (data, dyn_idx, &dyn_storage);
+                if (dyn == NULL)
+                  break;
+                if (verbose > 2)
+                  fprintf (stderr, "debug: dynamic entry %d"
+                           " with tag %llu found\n",
+                           dyn_idx, (unsigned long long int) dyn->d_tag);
+                if (dyn->d_tag == DT_SONAME)
+                  has_soname = true;
+                if (dyn->d_tag == DT_FLAGS_1 && (dyn->d_un.d_val & DF_1_PIE))
+                  has_pie_flag = true;
+                if (dyn->d_tag == DT_DEBUG)
+                  has_dt_debug = true;
+                if (dyn->d_tag == DT_NULL)
+                  break;
+              }
+        }
+    }
+
+  if (verbose)
+    {
+      fprintf (stderr, "info: ELF type: %d\n", elf_type);
+      if (has_program_interpreter)
+        fputs ("info: program interpreter found\n", stderr);
+      if (has_dynamic)
+        fputs ("info: dynamic segment found\n", stderr);
+      if (has_soname)
+        fputs ("info: soname found\n", stderr);
+      if (has_pie_flag)
+        fputs ("info: PIE flag found\n", stderr);
+      if (has_dt_debug)
+        fputs ("info: DT_DEBUG found\n", stderr);
+    }
+}
+
+/* Return true if the file is a loadable object, which basically means
+   it is an ELF file, but not a relocatable object file.  (The kernel
+   and various userspace components can load ET_REL files, but we
+   disregard that for our classification purposes.)  */
+static bool
+is_loadable (void)
+{
+  return elf_kind (elf) == ELF_K_ELF && elf_type != ET_REL;
+}
+
+static bool
+is_shared (void)
+{
+  if (!is_loadable ())
+    return false;
+
+  /* The ELF type is very clear: this is an executable.  */
+  if (elf_type == ET_EXEC)
+    return false;
+
+  /* If the object is marked as PIE, it is definitely an executable,
+     and not a loadlable shared object.  */
+  if (has_pie_flag)
+    return false;
+
+  /* Treat a DT_SONAME tag as a strong indicator that this is a shared
+     object.  */
+  if (has_soname)
+    return true;
+
+  /* This is probably a PIE program: there is no soname, but a program
+     interpreter.  In theory, this file could be also  */
+  if (has_program_interpreter)
+    return false;
+
+  /* Roland McGrath mentions in
+     <https://www.sourceware.org/ml/libc-alpha/2015-03/msg00605.html>,
+     that “we defined a PIE as an ET_DYN with a DT_DEBUG”.  This
+     matches current binutils behavior (version 2.32).  DT_DEBUG is
+     added if bfd_link_executable returns true or if bfd_link_pic
+     returns false, depending on the architectures.  However, DT_DEBUG
+     is not documented as being specific to executables, therefore use
+     it only as a low-priority discriminator.  */
+  if (has_dt_debug)
+    return false;
+
+  /* If there is no dynamic section, the file cannot be loaded as a
+     shared object.  */
+  if (!has_dynamic)
+    return false;
+  return true;
+}
+
+static bool
+is_executable (void)
+{
+  if (!is_loadable ())
+    return false;
+
+  /* A loadable object which is not a shared object is treated as an
+     executable.  */
+  return !is_shared ();
+}
+
+static error_t
+parse_opt (int key, char *arg, struct argp_state *state)
+{
+  switch (key)
+    {
+    case classify_file:
+    case classify_elf:
+    case classify_executable:
+    case classify_shared:
+    case classify_loadable:
+      command = key;
+      command_path = arg;
+      break;
+
+    case 'v':
+      ++verbose;
+      break;
+
+    case ARGP_KEY_ARG:
+      argp_usage (state);
+      exit (2);
+    }
+
+  return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+  const struct argp_option options[] =
+    {
+      { "file", classify_file, "PATH", 0,
+        N_("Check PATH is file that can be read"), 0 },
+      { "elf", classify_elf, "PATH", 0,
+        N_("Check if the file at PATH is a valid ELF object"), 0 },
+      { "executable", classify_executable, "PATH", 0,
+        N_("Check if the file at PATH is an ELF program executable"), 0 },
+      { "shared", classify_shared, "PATH", 0,
+        N_("Check if the file at PATH is an ELF shared object (DSO)"), 0 },
+      { "loadable", classify_loadable, "PATH", 0,
+        N_("Check if the file at PATH is a loadable object (program or shared object)"), 0 },
+      { "verbose", 'v', NULL, 0,
+        N_("Output additional information (can be specified multiple times)"), 0 },
+      { NULL, 0, NULL, 0, NULL, 0 }
+    };
+
+  const struct argp argp =
+    {
+      .options = options,
+      .parser = parse_opt,
+      .doc = N_("Determine the type of an ELF file.")
+    };
+
+  if (argp_parse (&argp, argc, argv, ARGP_NO_EXIT, NULL, NULL) != 0)
+    return 2;
+
+  elf_version (EV_CURRENT);
+
+  switch (command)
+    {
+    case classify_file:
+      open_file ();
+      return 0;
+    case classify_elf:
+      open_elf ();
+      return 0;
+
+    case classify_executable:
+      open_elf ();
+      run_classify ();
+      if (is_executable ())
+        {
+          if (verbose)
+            fputs ("info: executable\n", stderr);
+          return 0;
+        }
+      else
+        {
+          if (verbose)
+            fputs ("info: not an executable\n", stderr);
+          return 1;
+        }
+
+    case classify_shared:
+      open_elf ();
+      run_classify ();
+      if (is_shared ())
+        {
+          if (verbose)
+            fputs ("info: shared object\n", stderr);
+          return 0;
+        }
+      else
+        {
+          if (verbose)
+            fputs ("info: not a shared object\n", stderr);
+          return 1;
+        }
+
+    case classify_loadable:
+      open_elf ();
+      run_classify ();
+      if (is_loadable ())
+        {
+          if (verbose)
+            fputs ("info: loadable object\n", stderr);
+          return 0;
+        }
+      else
+        {
+          if (verbose)
+            fputs ("info: not a loadable object\n", stderr);
+          return 1;
+        }
+    }
+
+  return 2;
+}

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-04-12 15:38 [PATCH] elfclassify tool Florian Weimer
@ 2019-04-15 15:39 ` Mark Wielaard
  2019-04-16 11:38   ` Florian Weimer
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Wielaard @ 2019-04-15 15:39 UTC (permalink / raw)
  To: Florian Weimer, elfutils-devel; +Cc: Panu Matilainen

Hi,

On Fri, 2019-04-12 at 17:38 +0200, Florian Weimer wrote:
> This patch adds an elfclassify tool, mainly for the benefit of RPM's
> find-debuginfo.sh.

I have CCed Panu to see if he has any input.

> I still need to implement an --unstripped option and fix the
> iteration over the dynamic section.

We did already discuss some of this off-list.

The basic idea is that we provide a replacement for using "file" as an
ELF file classifier. It currently provides the following options:

    --elf=PATH         Check if the file at PATH is a valid ELF object
    --executable=PATH  Check if the file at PATH is an ELF program
                       executable
    --file=PATH        Check PATH is file that can be read
    --loadable=PATH    Check if the file at PATH is a loadable object
                       (program or shared object)
    --shared=PATH      Check if the file at PATH is an ELF shared object
                       (DSO)
-v, --verbose          Output additional information (can be specified
                       multiple times)

The program returns 0 on success (the given PATH is if the requested
classification), return 1 on failure (the given PATH isn't of the
requested classification) or returns 2 on error.

Note that only one PATH can be given (the = is optional).

--elf PATH return 0 whenever the file can be opened and a minimal ELF
header can be read (it might not be a completely valid ELF file). Do we
want or need to do any more verification (e.g. try to get the full ELF
header, walk through all phdrs and shdrs)?

Where only one of --executable and --shared can be true for an ELF file.
They indicate whether the primary purpose of an ELF file is to be an
executable or a shared library (this is for example how rpm can make a
decision to strip or keep the symtab table, you might want to keep it
for an ELF file that is used primarily as a common shared library, but
not if it is primarily used as executable).

--unstripped (not yet implemented) would be a classification that
indicates whether the ELF file can be stripped (further), that is has a
.symtab (symbol table), .debug_* sections (and possibly any non-
loadable sections -- "file" only detects the first two).

I am not sure --file=PATH is a useful option.
But maybe we need some way to indicate whether a file is a real file or
a symlink? But the current implementation returns 0 even for symlinks.
As do every other option (if the file is a symlink to an ELF file of
the requested classification). Is this what we want? I would suggest
that we return 1 for anything that is not a regular file. But that
would mean that for example eu-elfclassify --executable=/proc/$$/exe
would also return 1 (currently it returns 0, which might be helpful in
some cases).

--loadable basically checks whether the given ELF file is not an object
(ET_REL) file, so it will return 0 for either an executable, a shared
object or core file, but not check whether any other attribute (like
whether it has program headers and/or loadable segments). Personally I
would like it if this at least included a check for a PT_LOAD segment.

This does not classify kernel modules as loadable objects.
rpm does contain a check for that, it might make sense to include that
as a separate classification in elfclassify --kernel-module.

Kernel modules are also special because they can be compressed ELF
files. Do we want to support that? (It is easy with gelf_elf_begin).
That could for example be an flag/option like --compressed which can be
combined with any other classification option?

I think another useful classification would be --debugfile which
succeeds if the primary function of the given ELF file is being a
separete debug file (basically .debug, .dwo or dwz .multi file) which
cannot be linked and loaded on its own

BTW. Florian, the extra options are certainly not required for you to
implement to get eu-elfclassify accepted. They are just suggestions,
which we might decide not to do/add. Or they can be added by others if
they think they are useful.

> Suggestions for improving the argp/help output are welcome as
> well.  I'm not familiar with argp at all.

You usage of argp seems fine. But I think you don't want to use
ARGP_NO_EXIT. That causes standard options like --version and --help to
not exit (with success). Which is generally what we want.
We do want to want --version and --help to not return an error
indicator (this is actually checked by make distcheck).

I think we might want to avoid specific ELF concepts in the
classification descriptors though. For example people might have a
different concept of DSO.

> I'm keeping a branch with these changes here:
> 
>   <https://pagure.io/fweimer/elfutils/commits/elfclassify>
>

> +/* Name and version of program.  */
> +ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
> +
> +/* Bug report address.  */
> +ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
> +
> +enum classify_command
> +{
> +  classify_file = 1000,
> +  classify_elf,
> +  classify_executable,
> +  classify_shared,
> +  classify_loadable
> +};
> +
> +/* Set by parse_opt.  */
> +static enum classify_command command;
> +static const char *command_path;
> +static int verbose;
> +
> +/* Set by map_file.  */
> +static int file_fd = -1;

map_file?

> +static void
> +open_file (void)
> +{
> +  if (verbose > 1)
> +    fprintf (stderr, "debug: processing file: %s\n", command_path);
> +
> +  file_fd = open (command_path, O_RDONLY);
> +  if (file_fd < 0)
> +    {
> +      if (errno == ENOENT)
> +        exit (1);
> +      else
> +        error (2, errno, N_("opening %s"), command_path);
> +    }
> +  struct stat st;
> +  if (fstat (file_fd, &st) != 0)
> +    error (2, errno, N_("reading %s\n"), command_path);
> +  if (!S_ISREG (st.st_mode))
> +    exit (1);
> +}

That is odd, I assumed !S_ISREG would by true for symlinks.

> +  if (verbose)
> +    {
> +      fprintf (stderr, "info: ELF type: %d\n", elf_type);
> +      if (has_program_interpreter)
> +        fputs ("info: program interpreter found\n", stderr);

You might want to print the program interpreter here.

> +      if (has_dynamic)
> +        fputs ("info: dynamic segment found\n", stderr);
> +      if (has_soname)
> +        fputs ("info: soname found\n", stderr);

You might want to print the soname found here.

> +      if (has_pie_flag)
> +        fputs ("info: PIE flag found\n", stderr);

Maybe call it DF_1_PIE flag?

> +      if (has_dt_debug)
> +        fputs ("info: DT_DEBUG found\n", stderr);
> +    }
> +}

> +  /* This is probably a PIE program: there is no soname, but a program
> +     interpreter.  In theory, this file could be also  */
> +  if (has_program_interpreter)
> +    return false;

Comment seems to end abruptly.

> +static bool
> +is_executable (void)
> +{
> +  if (!is_loadable ())
> +    return false;
> +
> +  /* A loadable object which is not a shared object is treated as an
> +     executable.  */
> +  return !is_shared ();
> +}
> +
> +static error_t
> +parse_opt (int key, char *arg, struct argp_state *state)
> +{
> +  switch (key)
> +    {
> +    case classify_file:
> +    case classify_elf:
> +    case classify_executable:
> +    case classify_shared:
> +    case classify_loadable:
> +      command = key;
> +      command_path = arg;
> +      break;

If you want to only allow one classification at a time you should check
whether command is already set and call something like:
argp_error (state, N_("Can only use one classification at a time."));

> +    case 'v':
> +      ++verbose;
> +      break;
> +
> +    case ARGP_KEY_ARG:
> +      argp_usage (state);
> +      exit (2);
> +    }
> +
> +  return 0;
> +}

Thanks,

Mark

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-04-15 15:39 ` Mark Wielaard
@ 2019-04-16 11:38   ` Florian Weimer
  2019-04-18 11:17     ` Florian Weimer
  2019-07-19 13:24     ` Mark Wielaard
  0 siblings, 2 replies; 36+ messages in thread
From: Florian Weimer @ 2019-04-16 11:38 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: elfutils-devel, Panu Matilainen

* Mark Wielaard:

>> I still need to implement an --unstripped option and fix the
>> iteration over the dynamic section.
>
> We did already discuss some of this off-list.

Thanks for summarizing the previous discussion.

> --elf PATH return 0 whenever the file can be opened and a minimal ELF
> header can be read (it might not be a completely valid ELF file). Do we
> want or need to do any more verification (e.g. try to get the full ELF
> header, walk through all phdrs and shdrs)?

If we ever need that, I think we should add it as separate options,
detecting both separate debuginfo and regular ELF files.

> --unstripped (not yet implemented) would be a classification that
> indicates whether the ELF file can be stripped (further), that is has a
> .symtab (symbol table), .debug_* sections (and possibly any non-
> loadable sections -- "file" only detects the first two).

Some non-allocated sections are expected in stripped binaries:
.gnu_debuglink, .shstrtab, .gnu.build.attributes look relevant in this
context.  I'm not sure if we should flag any other non-allocated section
in this way.

> I am not sure --file=PATH is a useful option.

It's useful for determining if the file exists and can be mapped.

> But maybe we need some way to indicate whether a file is a real file or
> a symlink? But the current implementation returns 0 even for symlinks.
> As do every other option (if the file is a symlink to an ELF file of
> the requested classification). Is this what we want? I would suggest
> that we return 1 for anything that is not a regular file. But that
> would mean that for example eu-elfclassify --executable=/proc/$$/exe
> would also return 1 (currently it returns 0, which might be helpful in
> some cases).

I don't know what RPM needs in this context.  I expect that it can
easily filter out non-regular files.  My problem with symbolic link
detection is that it is so inconsistent—it generally applies to the
final pathname component, and that does not look useful to me.

> --loadable basically checks whether the given ELF file is not an object
> (ET_REL) file, so it will return 0 for either an executable, a shared
> object or core file, but not check whether any other attribute (like
> whether it has program headers and/or loadable segments). Personally I
> would like it if this at least included a check for a PT_LOAD segment.

Is a PT_LOAD segment required to make the PT_DYNAMIC segment visible?
It is possible to have mostly empty objects, after all.

> This does not classify kernel modules as loadable objects.
> rpm does contain a check for that, it might make sense to include that
> as a separate classification in elfclassify --kernel-module.
>
> Kernel modules are also special because they can be compressed ELF
> files. Do we want to support that? (It is easy with gelf_elf_begin).
> That could for example be an flag/option like --compressed which can be
> combined with any other classification option?

How relevant are kernel modules to eu-elfclassify?

Is path-based detection feasible for kernel modules?

> I think another useful classification would be --debugfile which
> succeeds if the primary function of the given ELF file is being a
> separete debug file (basically .debug, .dwo or dwz .multi file) which
> cannot be linked and loaded on its own

That is very difficult to detect reliably, unfortunately, and would best
be implemented in lib(g)elf itself because it would be generally useful,
for all kinds of tools which expect to process real ELF files only.

> BTW. Florian, the extra options are certainly not required for you to
> implement to get eu-elfclassify accepted. They are just suggestions,
> which we might decide not to do/add. Or they can be added by others if
> they think they are useful.

Understood.  I would rather fix the command line syntax as a priority,
implement --unstripped, and add a test suite.

>> Suggestions for improving the argp/help output are welcome as
>> well.  I'm not familiar with argp at all.
>
> You usage of argp seems fine.

Trust me, it's been a struggle so far.

> But I think you don't want to use
> ARGP_NO_EXIT. That causes standard options like --version and --help to
> not exit (with success). Which is generally what we want.
> We do want to want --version and --help to not return an error
> indicator (this is actually checked by make distcheck).

I want to exit with status 2 on usage error.  I couldn't make that
happen without ARGP_NO_EXIT.  I'm open to different suggestions.

> I think we might want to avoid specific ELF concepts in the
> classification descriptors though. For example people might have a
> different concept of DSO.
>
>> I'm keeping a branch with these changes here:
>> 
>>   <https://pagure.io/fweimer/elfutils/commits/elfclassify>
>>
>
>> +/* Name and version of program.  */
>> +ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
>> +
>> +/* Bug report address.  */
>> +ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
>> +
>> +enum classify_command
>> +{
>> +  classify_file = 1000,
>> +  classify_elf,
>> +  classify_executable,
>> +  classify_shared,
>> +  classify_loadable
>> +};
>> +
>> +/* Set by parse_opt.  */
>> +static enum classify_command command;
>> +static const char *command_path;
>> +static int verbose;
>> +
>> +/* Set by map_file.  */
>> +static int file_fd = -1;
>
> map_file?
>
>> +static void
>> +open_file (void)
>> +{
>> +  if (verbose > 1)
>> +    fprintf (stderr, "debug: processing file: %s\n", command_path);
>> +
>> +  file_fd = open (command_path, O_RDONLY);
>> +  if (file_fd < 0)
>> +    {
>> +      if (errno == ENOENT)
>> +        exit (1);
>> +      else
>> +        error (2, errno, N_("opening %s"), command_path);
>> +    }
>> +  struct stat st;
>> +  if (fstat (file_fd, &st) != 0)
>> +    error (2, errno, N_("reading %s\n"), command_path);
>> +  if (!S_ISREG (st.st_mode))
>> +    exit (1);
>> +}
>
> That is odd, I assumed !S_ISREG would by true for symlinks.

No, the open followed the symbolic link.  This is needed for rejecting
directories.  I've added a comment.

>> +  if (verbose)
>> +    {
>> +      fprintf (stderr, "info: ELF type: %d\n", elf_type);
>> +      if (has_program_interpreter)
>> +        fputs ("info: program interpreter found\n", stderr);
>
> You might want to print the program interpreter here.

I can't do that without detecting first if the file is separated
debuginfo.  (Separated debuginfo has a program header that points
nowhere.)

>> +      if (has_dynamic)
>> +        fputs ("info: dynamic segment found\n", stderr);
>> +      if (has_soname)
>> +        fputs ("info: soname found\n", stderr);
>
> You might want to print the soname found here.

This needs access to the dynamic string table.  I don't know how easy
this is to implement.

>> +      if (has_pie_flag)
>> +        fputs ("info: PIE flag found\n", stderr);
>
> Maybe call it DF_1_PIE flag?

Changed.

>> +      if (has_dt_debug)
>> +        fputs ("info: DT_DEBUG found\n", stderr);
>> +    }
>> +}
>
>> +  /* This is probably a PIE program: there is no soname, but a program
>> +     interpreter.  In theory, this file could be also  */
>> +  if (has_program_interpreter)
>> +    return false;
>
> Comment seems to end abruptly.

Fixed.

>> +static error_t
>> +parse_opt (int key, char *arg, struct argp_state *state)
>> +{
>> +  switch (key)
>> +    {
>> +    case classify_file:
>> +    case classify_elf:
>> +    case classify_executable:
>> +    case classify_shared:
>> +    case classify_loadable:
>> +      command = key;
>> +      command_path = arg;
>> +      break;
>
> If you want to only allow one classification at a time you should check
> whether command is already set and call something like:
> argp_error (state, N_("Can only use one classification at a time."));

I tried that, but I ran into issues with that.  It also breaks --help
with multiple/conflicting flags.

I'm trying to come up with a different command line syntax anyway.

Thanks,
Florian

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-04-16 11:38   ` Florian Weimer
@ 2019-04-18 11:17     ` Florian Weimer
  2019-07-19 12:47       ` Mark Wielaard
  2019-07-19 13:24     ` Mark Wielaard
  1 sibling, 1 reply; 36+ messages in thread
From: Florian Weimer @ 2019-04-18 11:17 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: elfutils-devel, Panu Matilainen

* Florian Weimer:

>> BTW. Florian, the extra options are certainly not required for you to
>> implement to get eu-elfclassify accepted. They are just suggestions,
>> which we might decide not to do/add. Or they can be added by others if
>> they think they are useful.
>
> Understood.  I would rather fix the command line syntax as a priority,
> implement --unstripped, and add a test suite.

The patch below, also available here:

  <https://pagure.io/fweimer/elfutils/commits/elfclassify>

reworks the command line parser, implements filtering of file lists, and
adds the --unstripped option.

I assume the next step is to write tests.

Thanks,
Florian

diff --git a/src/Makefile.am b/src/Makefile.am
index 2b1c0dcb..966d1da7 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -26,7 +26,8 @@ AM_CPPFLAGS += -I$(srcdir)/../libelf -I$(srcdir)/../libebl \
 AM_LDFLAGS = -Wl,-rpath-link,../libelf:../libdw
 
 bin_PROGRAMS = readelf nm size strip elflint findtextrel addr2line \
-	       elfcmp objdump ranlib strings ar unstrip stack elfcompress
+	       elfcmp objdump ranlib strings ar unstrip stack elfcompress \
+	       elfclassify
 
 noinst_LIBRARIES = libar.a
 
@@ -83,6 +84,7 @@ ar_LDADD = libar.a $(libelf) $(libeu) $(argp_LDADD)
 unstrip_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD) -ldl
 stack_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD) -ldl $(demanglelib)
 elfcompress_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD)
+elfclassify_LDADD = $(libelf) $(libeu) $(argp_LDADD)
 
 installcheck-binPROGRAMS: $(bin_PROGRAMS)
 	bad=0; pid=$$$$; list="$(bin_PROGRAMS)"; for p in $$list; do \
diff --git a/src/elfclassify.c b/src/elfclassify.c
new file mode 100644
index 00000000..d4b46b64
--- /dev/null
+++ b/src/elfclassify.c
@@ -0,0 +1,654 @@
+/* Classification of ELF files.
+   Copyright (C) 2019 Red Hat, Inc.
+   This file is part of elfutils.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#include <argp.h>
+#include <error.h>
+#include <fcntl.h>
+#include <gelf.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include ELFUTILS_HEADER(elf)
+#include "printversion.h"
+
+/* Name and version of program.  */
+ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
+
+/* Bug report address.  */
+ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
+
+/* Set by parse_opt.  */
+static int verbose;
+
+/* Set by the main function.  */
+static const char *current_path;
+
+/* Set by open_file.  */
+static int file_fd = -1;
+
+static bool
+open_file (void)
+{
+  if (file_fd >= 0)
+    {
+      close (file_fd);
+      file_fd = -1;
+    }
+
+  if (verbose > 1)
+    fprintf (stderr, "debug: processing file: %s\n", current_path);
+
+  file_fd = open (current_path, O_RDONLY);
+  if (file_fd < 0)
+    {
+      if (errno == ENOENT)
+        {
+          if (verbose > 0)
+            fprintf (stderr, N_("warning: %s: file does not exist\n"),
+                     current_path);
+          return false;
+        }
+      else
+        error (2, errno, N_("opening %s"), current_path);
+    }
+  struct stat st;
+  if (fstat (file_fd, &st) != 0)
+    error (2, errno, N_("reading %s\n"), current_path);
+
+  /* Reject directories here because processing those as ELF fails
+     would fail.  */
+  if (!S_ISREG (st.st_mode))
+    {
+      if (verbose > 0)
+        fprintf (stderr, N_("warning: %s: not a regular file\n"),
+                 current_path);
+      return false;
+    }
+  return true;
+}
+
+/* Set by open_elf.  */
+static Elf *elf;
+
+static bool
+open_elf (void)
+{
+  if (elf != NULL)
+    {
+      elf_end (elf);
+      elf = NULL;
+    }
+
+  if (!open_file ())
+    return false;
+
+  elf = elf_begin (file_fd, ELF_C_READ, NULL);
+  if (elf == NULL)
+    error (2, 0, "%s: %s", current_path, elf_errmsg (-1));
+  if (elf_kind (elf) != ELF_K_ELF && elf_kind (elf) != ELF_K_AR)
+    {
+      if (verbose > 0)
+        fprintf (stderr, N_("warning: %s: not an ELF file\n"),
+                 current_path);
+      return false;
+    }
+
+  return true;
+}
+
+static int elf_type;
+static bool has_program_interpreter;
+static bool has_dynamic;
+static bool has_soname;
+static bool has_pie_flag;
+static bool has_dt_debug;
+static bool has_symtab;
+static bool has_debug_sections;
+
+static void
+run_classify (void)
+{
+  /* Reset to unanalyzed default.  */
+  elf_type = 0;
+  has_program_interpreter = false;
+  has_dynamic = false;
+  has_soname = false;
+  has_pie_flag = false;
+  has_dt_debug = false;
+  has_symtab = false;
+  has_debug_sections = false;
+
+  if (elf_kind (elf) != ELF_K_ELF)
+    return;
+
+  GElf_Ehdr ehdr_storage;
+  GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_storage);
+  if (ehdr == NULL)
+    error (2, 0, N_("%s: ELF header: %s"), current_path, elf_errmsg (-1));
+  elf_type = ehdr->e_type;
+
+  /* Examine program headers.  */
+  {
+    size_t nphdrs;
+    if (elf_getphdrnum (elf, &nphdrs) != 0)
+      error (2, 0, "%s: program header: %s", current_path, elf_errmsg (-1));
+    if (nphdrs > INT_MAX)
+      error (2, 0, "%s: number of program headers is too large: %zu",
+             current_path, nphdrs);
+    for (size_t phdr_idx = 0; phdr_idx < nphdrs; ++phdr_idx)
+      {
+        GElf_Phdr phdr_storage;
+        GElf_Phdr *phdr = gelf_getphdr (elf, phdr_idx, &phdr_storage);
+        if (phdr == NULL)
+          error (2, 0, "%s: %s", current_path, elf_errmsg (-1));
+        if (phdr->p_type == PT_DYNAMIC)
+          has_dynamic = true;
+        if (phdr->p_type == PT_INTERP)
+          has_program_interpreter = true;
+      }
+  }
+
+  Elf_Scn *dyn_section = NULL;
+  {
+    size_t shstrndx;
+    if (unlikely (elf_getshdrstrndx (elf, &shstrndx) < 0))
+      error (2, 0, N_("%s: section header string table index: %s"),
+             current_path, elf_errmsg (-1));
+
+    Elf_Scn *scn = NULL;
+    while (true)
+      {
+        scn = elf_nextscn (elf, scn);
+        if (scn == NULL)
+          break;
+        GElf_Shdr shdr_storage;
+        GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
+        if (shdr == NULL)
+          error (2, 0, N_("could not obtain section header: %s"),
+                 elf_errmsg (-1));
+        const char *section_name = elf_strptr (elf, shstrndx, shdr->sh_name);
+        if (section_name == NULL)
+          error (2, 0, N_("%s: could not obtain section name: %s"),
+                 current_path, elf_errmsg (-1));
+        if (verbose > 2)
+          fprintf (stderr, "debug: section header %s (type %d) found\n",
+                   section_name, shdr->sh_type);
+        if (shdr->sh_type == SHT_DYNAMIC)
+          {
+            if (verbose > 1)
+              fputs ("debug: dynamic section found", stderr);
+            dyn_section = scn;
+          }
+        if (shdr->sh_type == SHT_SYMTAB)
+          {
+            if (verbose > 1)
+              fputs ("debug: symtab section found\n", stderr);
+            has_symtab = true;
+          }
+        const char *debug_prefix = ".debug_";
+        if (strncmp (section_name, debug_prefix, strlen (debug_prefix)) == 0)
+          {
+            if (verbose > 1)
+              fputs ("debug: .debug_* section found\n", stderr);
+            has_debug_sections = true;
+          }
+      }
+  }
+
+  /* Examine the dynamic section.  */
+  if (has_dynamic)
+    {
+      if (dyn_section != NULL)
+        {
+          Elf_Data *data = elf_getdata (dyn_section, NULL);
+          if (verbose > 2)
+            fprintf (stderr, "debug: Elf_Data for dynamic section: %p\n",
+                     data);
+
+          if (data != NULL)
+            for (int dyn_idx = 0; ; ++dyn_idx)
+              {
+                GElf_Dyn dyn_storage;
+                GElf_Dyn *dyn = gelf_getdyn (data, dyn_idx, &dyn_storage);
+                if (dyn == NULL)
+                  break;
+                if (verbose > 2)
+                  fprintf (stderr, "debug: dynamic entry %d"
+                           " with tag %llu found\n",
+                           dyn_idx, (unsigned long long int) dyn->d_tag);
+                if (dyn->d_tag == DT_SONAME)
+                  has_soname = true;
+                if (dyn->d_tag == DT_FLAGS_1 && (dyn->d_un.d_val & DF_1_PIE))
+                  has_pie_flag = true;
+                if (dyn->d_tag == DT_DEBUG)
+                  has_dt_debug = true;
+                if (dyn->d_tag == DT_NULL)
+                  break;
+              }
+        }
+    }
+
+  if (verbose)
+    {
+      fprintf (stderr, "info: %s: ELF type: %d\n", current_path, elf_type);
+      if (has_program_interpreter)
+        fprintf (stderr, "info: %s: program interpreter found\n",
+                 current_path);
+      if (has_dynamic)
+        fprintf (stderr, "info: %s: dynamic segment found\n", current_path);
+      if (has_soname)
+        fprintf (stderr, "info: %s: soname found\n", current_path);
+      if (has_pie_flag)
+        fprintf (stderr, "info: %s: DF_1_PIE flag found\n", current_path);
+      if (has_dt_debug)
+        fprintf (stderr, "info: %s: DT_DEBUG found\n", current_path);
+      if (has_symtab)
+        fprintf (stderr, "info: %s: symbol table found\n", current_path);
+      if (has_debug_sections)
+        fprintf (stderr, "info: %s: .debug_* section found\n", current_path);
+    }
+}
+
+/* Return true if the file is a loadable object, which basically means
+   it is an ELF file, but not a relocatable object file.  (The kernel
+   and various userspace components can load ET_REL files, but we
+   disregard that for our classification purposes.)  */
+static bool
+is_loadable (void)
+{
+  return elf_kind (elf) == ELF_K_ELF && elf_type != ET_REL;
+}
+
+/* Return true if the file is an ELF file which has a symbol table or
+   .debug_* sections (and thus can be stripped futher).  */
+static bool
+is_unstripped (void)
+{
+  return elf_kind (elf) != ELF_K_NONE && (has_symtab || has_debug_sections);
+}
+
+static bool
+is_shared (void)
+{
+  if (!is_loadable ())
+    return false;
+
+  /* The ELF type is very clear: this is an executable.  */
+  if (elf_type == ET_EXEC)
+    return false;
+
+  /* If the object is marked as PIE, it is definitely an executable,
+     and not a loadlable shared object.  */
+  if (has_pie_flag)
+    return false;
+
+  /* Treat a DT_SONAME tag as a strong indicator that this is a shared
+     object.  */
+  if (has_soname)
+    return true;
+
+  /* This is probably a PIE program: there is no soname, but a program
+     interpreter.  In theory, this file could be also a DSO with a
+     soname implied by its file name that can be run as a program.
+     This situation is impossible to resolve in the general case. */
+  if (has_program_interpreter)
+    return false;
+
+  /* Roland McGrath mentions in
+     <https://www.sourceware.org/ml/libc-alpha/2015-03/msg00605.html>,
+     that “we defined a PIE as an ET_DYN with a DT_DEBUG”.  This
+     matches current binutils behavior (version 2.32).  DT_DEBUG is
+     added if bfd_link_executable returns true or if bfd_link_pic
+     returns false, depending on the architectures.  However, DT_DEBUG
+     is not documented as being specific to executables, therefore use
+     it only as a low-priority discriminator.  */
+  if (has_dt_debug)
+    return false;
+
+  /* If there is no dynamic section, the file cannot be loaded as a
+     shared object.  */
+  if (!has_dynamic)
+    return false;
+  return true;
+}
+
+static bool
+is_executable (void)
+{
+  if (!is_loadable ())
+    return false;
+
+  /* A loadable object which is not a shared object is treated as an
+     executable.  */
+  return !is_shared ();
+}
+
+enum classify_requirement { do_not_care, required, forbidden };
+
+enum classify_check
+{
+  classify_elf,
+  classify_unstripped,
+  classify_executable,
+  classify_shared,
+  classify_loadable,
+
+  classify_check_last = classify_loadable
+};
+
+enum
+{
+  classify_check_offset = 1000,
+  classify_check_not_offset = 2000,
+
+  classify_flag_stdin = 3000,
+  classify_flag_stdin0,
+  classify_flag_no_stdin,
+  classify_flag_print,
+  classify_flag_print0,
+  classify_flag_no_print,
+  classify_flag_matching,
+  classify_flag_not_matching,
+};
+
+static bool
+classify_check_positive (int key)
+{
+  return key >= classify_check_offset
+    && key <= classify_check_offset + classify_check_last;
+}
+
+static bool
+classify_check_negative (int key)
+{
+  return key >= classify_check_not_offset
+    && key <= classify_check_not_offset + classify_check_last;
+}
+
+/* Set by parse_opt.  */
+static enum classify_requirement requirements[classify_check_last + 1];
+static enum { no_stdin, do_stdin, do_stdin0 } flag_stdin;
+static enum { no_print, do_print, do_print0 } flag_print;
+static bool flag_print_matching = true;
+
+static error_t
+parse_opt (int key, char *arg __attribute__ ((unused)),
+           struct argp_state *state __attribute__ ((unused)))
+{
+  if (classify_check_positive (key))
+    requirements[key - classify_check_offset] = required;
+  else if (classify_check_negative (key))
+    requirements[key - classify_check_not_offset] = forbidden;
+  else
+    switch (key)
+      {
+      case 'v':
+        ++verbose;
+        break;
+
+      case classify_flag_stdin:
+        flag_stdin = do_stdin;
+        break;
+
+      case classify_flag_stdin0:
+        flag_stdin = do_stdin0;
+        break;
+
+      case classify_flag_no_stdin:
+        flag_stdin = no_stdin;
+        break;
+
+      case classify_flag_print:
+        flag_print = do_print;
+        break;
+
+      case classify_flag_print0:
+        flag_print = do_print0;
+        break;
+
+      case classify_flag_no_print:
+        flag_print = no_print;
+        break;
+
+      case classify_flag_matching:
+        flag_print_matching = true;
+        break;
+
+      case classify_flag_not_matching:
+        flag_print_matching = false;
+        break;
+
+      default:
+        return ARGP_ERR_UNKNOWN;
+      }
+
+  return 0;
+}
+
+/* Perform requested checks against the file at current_path.  If
+   necessary, sets *STATUS to 1 if checks failed.  */
+static void
+process_current_path (int *status)
+{
+  bool checks_passed = true;
+
+  if (!open_elf ())
+    {
+      for (enum classify_check check = 0;
+           check <= classify_check_last; ++check)
+        if (requirements[check] == required)
+          checks_passed = false;
+    }
+  else
+    {
+      run_classify ();
+
+      bool checks[] =
+        {
+         [classify_elf] = true,
+         [classify_unstripped] = is_unstripped (),
+         [classify_executable] = is_executable (),
+         [classify_shared] = is_shared (),
+         [classify_loadable] = is_loadable (),
+        };
+
+      if (verbose > 1)
+        {
+          if (checks[classify_unstripped])
+            fprintf (stderr, "debug: %s: unsigned\n", current_path);
+          if (checks[classify_executable])
+            fprintf (stderr, "debug: %s: executable\n", current_path);
+          if (checks[classify_shared])
+            fprintf (stderr, "debug: %s: shared\n", current_path);
+          if (checks[classify_loadable])
+            fprintf (stderr, "debug: %s: loadable\n", current_path);
+        }
+
+      for (enum classify_check check = 0;
+           check <= classify_check_last; ++check)
+        switch (requirements[check])
+          {
+          case required:
+            if (!checks[check])
+              checks_passed = false;
+            break;
+          case forbidden:
+            if (checks[check])
+              checks_passed = false;
+            break;
+          case do_not_care:
+            break;
+          }
+    }
+
+  switch (flag_print)
+    {
+    case do_print:
+      if (checks_passed == flag_print_matching)
+        puts (current_path);
+      break;
+    case do_print0:
+      if (checks_passed == flag_print_matching)
+        fwrite (current_path, strlen (current_path) + 1, 1, stdout);
+      break;
+    case no_print:
+      if (!checks_passed)
+        *status = 1;
+      break;
+    }
+}
+
+/* Called to process standard input if flag_stdin is not no_stdin.  */
+static void
+process_stdin (int *status)
+{
+  char delim;
+  if (flag_stdin == do_stdin0)
+    delim = '\0';
+  else
+    delim = '\n';
+
+  char *buffer = NULL;
+  size_t buffer_size = 0;
+  while (true)
+    {
+      ssize_t ret = getdelim (&buffer, &buffer_size, delim, stdin);
+      if (ferror (stdin))
+        error (2, errno, N_("reading from standard input"));
+      if (feof (stdin))
+        break;
+      if (ret < 0)
+        abort ();           /* Cannot happen due to error checks above.  */
+      current_path = buffer;
+      process_current_path (status);
+    }
+
+  free (buffer);
+}
+
+int
+main (int argc, char **argv)
+{
+  const struct argp_option options[] =
+    {
+      { NULL, 0, NULL, OPTION_DOC, N_("Classification options"), 1 },
+      { "elf", classify_check_offset + classify_elf, NULL, 0,
+        N_("File looks like an ELF object or archive/static library"), 1 },
+      { "unstripped", classify_check_offset + classify_unstripped, NULL, 0,
+        N_("File is an ELF file with symbol table or .debug_* sections \
+and can bre stripped further"), 1 },
+      { "executable", classify_check_offset + classify_executable, NULL, 0,
+        N_("File is an ELF program executable"), 1 },
+      { "shared", classify_check_offset + classify_shared, NULL, 0,
+        N_("File is an ELF shared object (DSO)"), 1 },
+      { "loadable", classify_check_offset + classify_loadable, NULL, 0,
+        N_("File is a loadable ELF object (program or shared object)"), 1 },
+
+      /* Negated versions of the above.  */
+      { "not-elf", classify_check_not_offset + classify_elf,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-unstripped", classify_check_not_offset + classify_unstripped,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-executable", classify_check_not_offset + classify_executable,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-shared", classify_check_not_offset + classify_shared,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-loadable", classify_check_not_offset + classify_loadable,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+
+      { NULL, 0, NULL, OPTION_DOC, N_("Input flags"), 2 },
+      { "stdin", classify_flag_stdin, NULL, 0,
+        N_("Also read file names to process from standard input, \
+separated by newlines"), 2 },
+      { "stdin0", classify_flag_stdin0, NULL, 0,
+        N_("Also read file names to process from standard input, \
+separated by ASCII NUL bytes"), 2 },
+      { "no-stdin", classify_flag_stdin, NULL, 0,
+        N_("Do not read files from standard input (default)"), 2 },
+
+      { NULL, 0, NULL, OPTION_DOC, N_("Output flags"), 3 },
+      { "print", classify_flag_print, NULL, 0,
+        N_("Output names of files, separated by newline"), 3 },
+      { "print0", classify_flag_print0, NULL, 0,
+        N_("Output names of files, separated by ASCII NUL"), 3 },
+      { "no-print", classify_flag_no_print, NULL, 0,
+        N_("Do not output file names"), 3 },
+      { "matching", classify_flag_matching, NULL, 0,
+        N_("If printing file names, print matching files (default)"), 3 },
+      { "not-matching", classify_flag_not_matching, NULL, 0,
+        N_("If printing file names, print files that do not match"), 3 },
+
+      { NULL, 0, NULL, OPTION_DOC, N_("Additional flags"), 4 },
+      { "verbose", 'v', NULL, 0,
+        N_("Output additional information (can be specified multiple times)"), 4 },
+      { NULL, 0, NULL, 0, NULL, 0 }
+    };
+
+  const struct argp argp =
+    {
+      .options = options,
+      .parser = parse_opt,
+      .args_doc = N_("FILE..."),
+      .doc = N_("\
+Determine the type of an ELF file.\
+\n\n\
+Only one of the --shared and --executable checks can pass for one file.  \
+Since modern ELF does not clearly distinguish between programs and \
+dynamic shared objects, these options attempt to identify the primary \
+purpose of the file.\
+\n\n\
+All of the classification options must apply at the same time to a \
+particular file.  Classification options can be negated using a \
+\"--not-\" prefix.\
+\n\n\
+Without any of the --print options, the program exits with status 0 \
+if the requested checks pass for all input files, with 1 if a check \
+fails for any file, and 2 if there is an environmental issue (such \
+as a file read error or a memory allocation error).\
+\n\n\
+When printing file names, the program exists with status 0 even if \
+no file names are printed, and exits with status 2 if there is an \
+environmental issue.\
+")
+    };
+
+  int remaining;
+  if (argp_parse (&argp, argc, argv, ARGP_NO_EXIT, &remaining, NULL) != 0)
+    return 2;
+
+  elf_version (EV_CURRENT);
+
+  int status = 0;
+
+  for (int i = remaining; i < argc; ++i)
+    {
+      current_path = argv[i];
+      process_current_path (&status);
+    }
+
+  if (flag_stdin != no_stdin)
+    process_stdin (&status);
+
+  return status;
+}

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-04-18 11:17     ` Florian Weimer
@ 2019-07-19 12:47       ` Mark Wielaard
  2019-07-19 13:43         ` Dmitry V. Levin
                           ` (2 more replies)
  0 siblings, 3 replies; 36+ messages in thread
From: Mark Wielaard @ 2019-07-19 12:47 UTC (permalink / raw)
  To: Florian Weimer; +Cc: elfutils-devel, Panu Matilainen

[-- Attachment #1: Type: text/plain, Size: 2187 bytes --]

Hi,

Sorry, this took way too long. But I really like this code.

On Thu, 2019-04-18 at 13:17 +0200, Florian Weimer wrote:
> * Florian Weimer:
> 
> > > BTW. Florian, the extra options are certainly not required for you to
> > > implement to get eu-elfclassify accepted. They are just suggestions,
> > > which we might decide not to do/add. Or they can be added by others if
> > > they think they are useful.
> > 
> > Understood.  I would rather fix the command line syntax as a priority,
> > implement --unstripped, and add a test suite.
> 
> The patch below, also available here:
> 
>   <https://pagure.io/fweimer/elfutils/commits/elfclassify>
> 
> reworks the command line parser, implements filtering of file lists, and
> adds the --unstripped option.

That looks really good. I went ahead an fixed a couple of nits and
added some of my suggestions. I'll respond to your other email
explaining some of my reasoning. The changes I made are:

  elfclassify: Fix bre -> be typo in "unstripped" option help text.
  elfclassify: When reading stdin make sure paths don't include newline.
  elfclassify: Allow inspecting compressed or (kernel) image files with -z.
  elfclassify: Always clean up ELF file and descriptor if one is still open.
  elfclassify: Don't treat errors in elf_open or run_classify as fatal.
  elfclassify: Add --quiet/-q to suppress error messages.
  elfclassify: Add \n to fputs debug output.
  elfclassify: Add --file/-f for testing just regular files.
  elfclassify: Require --elf by default. Add more classifications.
  elfclassify: Add elf_kind and elf_type strings for verbose output.
  elfclassify: Require PT_LOAD for loadable classification.
  elfclassify: Add --program classification.
  elfclassify: Don't use ARGP_NO_EXIT and document exit code expectation.
  elfclassify: Add --linux-kernel-module classification.
  elfclassify: Add --debug-only classification.

Attached is the new version. The individual commits can be found here:
https://code.wildebeest.org/git/user/mjw/elfutils/log/?h=elfclassify

Please let me know if any of this looks bad or unusual.

I'll write some testcases.

Thanks,

Mark

[-- Attachment #2: elfclassify.patch --]
[-- Type: text/x-patch, Size: 30652 bytes --]

diff --git a/src/Makefile.am b/src/Makefile.am
index 2b1c0dcb..69ac4dbe 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -26,7 +26,8 @@ AM_CPPFLAGS += -I$(srcdir)/../libelf -I$(srcdir)/../libebl \
 AM_LDFLAGS = -Wl,-rpath-link,../libelf:../libdw
 
 bin_PROGRAMS = readelf nm size strip elflint findtextrel addr2line \
-	       elfcmp objdump ranlib strings ar unstrip stack elfcompress
+	       elfcmp objdump ranlib strings ar unstrip stack elfcompress \
+	       elfclassify
 
 noinst_LIBRARIES = libar.a
 
@@ -83,6 +84,7 @@ ar_LDADD = libar.a $(libelf) $(libeu) $(argp_LDADD)
 unstrip_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD) -ldl
 stack_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD) -ldl $(demanglelib)
 elfcompress_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD)
+elfclassify_LDADD = $(libelf) $(libdw) $(libeu) $(argp_LDADD)
 
 installcheck-binPROGRAMS: $(bin_PROGRAMS)
 	bad=0; pid=$$$$; list="$(bin_PROGRAMS)"; for p in $$list; do \
diff --git a/src/elfclassify.c b/src/elfclassify.c
new file mode 100644
index 00000000..83a97d47
--- /dev/null
+++ b/src/elfclassify.c
@@ -0,0 +1,989 @@
+/* Classification of ELF files.
+   Copyright (C) 2019 Red Hat, Inc.
+   This file is part of elfutils.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#include <argp.h>
+#include <error.h>
+#include <fcntl.h>
+#include <gelf.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include ELFUTILS_HEADER(elf)
+#include ELFUTILS_HEADER(dwelf)
+#include "printversion.h"
+
+/* Name and version of program.  */
+ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
+
+/* Bug report address.  */
+ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
+
+/* Set by parse_opt.  */
+static int verbose;
+
+/* Set by the main function.  */
+static const char *current_path;
+
+/* Set by open_file.  */
+static int file_fd = -1;
+
+/* Set by issue or elf_issue.  */
+static bool issue_found;
+
+/* Non-fatal issue occured while processing the current_path.  */
+static void
+issue (int e, const char *msg)
+{
+  if (verbose >= 0)
+    {
+      if (current_path == NULL)
+	error (0, e, "%s", msg);
+      else
+	error (0, e, "%s '%s'", msg, current_path);
+    }
+  issue_found = true;
+}
+
+/* Non-fatal issue occured while processing the current ELF.  */
+static void
+elf_issue (const char *msg)
+{
+  if (verbose >= 0)
+    error (0, 0, "%s: %s: '%s'", msg, elf_errmsg (-1), current_path);
+  issue_found = true;
+}
+
+/* Set by parse_opt.  */
+static bool flag_only_regular_files;
+
+static bool
+open_file (void)
+{
+  if (verbose > 1)
+    fprintf (stderr, "debug: processing file: %s\n", current_path);
+
+  file_fd = open (current_path, O_RDONLY | (flag_only_regular_files
+					    ? O_NOFOLLOW : 0));
+  if (file_fd < 0)
+    {
+      if (!flag_only_regular_files || errno != ELOOP)
+	issue (errno, N_("opening"));
+      return false;
+    }
+
+  struct stat st;
+  if (fstat (file_fd, &st) != 0)
+    {
+      issue (errno, N_("reading"));
+      return false;
+    }
+
+  /* Don't even bother with directories.  */
+  if (S_ISDIR (st.st_mode)
+      || (flag_only_regular_files && !S_ISREG (st.st_mode)))
+    return false;
+
+  return true;
+}
+
+static void
+close_file (void)
+{
+  if (file_fd >= 0)
+    {
+      close (file_fd);
+      file_fd = -1;
+    }
+}
+
+/* Set by open_elf.  */
+static Elf *elf;
+
+/* Set by parse_opt.  */
+static bool flag_compressed;
+
+static bool
+open_elf (void)
+{
+  if (!open_file ())
+    {
+      /* Make sure the file descriptor is gone.  */
+      close_file ();
+      return false;
+    }
+
+  if (flag_compressed)
+    elf = dwelf_elf_begin (file_fd);
+  else
+    elf = elf_begin (file_fd, ELF_C_READ, NULL);
+
+  if (elf == NULL)
+    {
+      /* This likely means it just isn't an ELF file, probably not a
+	 real issue, but warn if verbose reporting.  */
+      if (verbose > 0)
+	fprintf (stderr, "warning: %s: %s\n", current_path, elf_errmsg (-1));
+      return false;
+    }
+
+  return true;
+}
+
+static void
+close_elf (void)
+{
+  if (elf != NULL)
+    {
+      elf_end (elf);
+      elf = NULL;
+    }
+
+  close_file ();
+}
+
+static const char *
+elf_kind_string (int kind)
+{
+  switch (kind)
+    {
+    case ELF_K_NONE:
+      return "ELF_K_NONE";
+    case ELF_K_AR:
+      return "ELF_K_AR";
+    case ELF_K_COFF:
+      return "ELF_K_COFF"; /* libelf doesn't really support this.  */
+    case ELF_K_ELF:
+      return "ELF_K_ELF";
+    default:
+      return "<unknown>";
+    }
+}
+
+static const char *
+elf_type_string (int type)
+{
+  switch (type)
+    {
+    case ET_NONE:
+      return "ET_NONE";
+    case ET_REL:
+      return "ET_REL";
+    case ET_EXEC:
+      return "ET_EXEC";
+    case ET_DYN:
+      return "ET_DYN";
+    case ET_CORE:
+      return "ET_CORE";
+    default:
+      return "<unknown>";
+    }
+}
+
+static int elf_type;
+static bool has_program_load;
+static bool has_progbits_alloc;
+static bool has_program_interpreter;
+static bool has_dynamic;
+static bool has_soname;
+static bool has_pie_flag;
+static bool has_dt_debug;
+static bool has_symtab;
+static bool has_debug_sections;
+static bool has_modinfo;
+static bool has_gnu_linkonce_this_module;
+
+static bool
+run_classify (void)
+{
+  /* Reset to unanalyzed default.  */
+  elf_type = 0;
+  has_program_load = false;
+  has_progbits_alloc = false;
+  has_program_interpreter = false;
+  has_dynamic = false;
+  has_soname = false;
+  has_pie_flag = false;
+  has_dt_debug = false;
+  has_symtab = false;
+  has_debug_sections = false;
+  has_modinfo = false;
+  has_gnu_linkonce_this_module = false;
+
+  int kind = elf_kind (elf);
+  if (verbose > 0)
+    fprintf (stderr, "info: %s: ELF kind: %s (0x%x)\n", current_path,
+	     elf_kind_string (kind), kind);
+  if (kind != ELF_K_ELF)
+    return true;
+
+  GElf_Ehdr ehdr_storage;
+  GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_storage);
+  if (ehdr == NULL)
+    {
+      elf_issue (N_("ELF header"));
+      return false;
+    }
+  elf_type = ehdr->e_type;
+
+  /* Examine program headers.  */
+  {
+    size_t nphdrs;
+    if (elf_getphdrnum (elf, &nphdrs) != 0)
+      {
+	elf_issue (N_("program headers"));
+	return false;
+      }
+    for (size_t phdr_idx = 0; phdr_idx < nphdrs; ++phdr_idx)
+      {
+	GElf_Phdr phdr_storage;
+	GElf_Phdr *phdr = gelf_getphdr (elf, phdr_idx, &phdr_storage);
+	if (phdr == NULL)
+	  {
+	    elf_issue (N_("program header"));
+	    return false;
+	  }
+	if (phdr->p_type == PT_DYNAMIC)
+	  has_dynamic = true;
+	if (phdr->p_type == PT_INTERP)
+	  has_program_interpreter = true;
+	if (phdr->p_type == PT_LOAD)
+	  has_program_load = true;
+      }
+  }
+
+  Elf_Scn *dyn_section = NULL;
+  {
+    size_t shstrndx;
+    if (unlikely (elf_getshdrstrndx (elf, &shstrndx) < 0))
+      {
+	elf_issue (N_("section header string table index"));
+	return false;
+      }
+
+    Elf_Scn *scn = NULL;
+    while (true)
+      {
+        scn = elf_nextscn (elf, scn);
+        if (scn == NULL)
+          break;
+        GElf_Shdr shdr_storage;
+        GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
+        if (shdr == NULL)
+	  {
+            elf_issue (N_("could not obtain section header"));
+	    return false;
+	  }
+        const char *section_name = elf_strptr (elf, shstrndx, shdr->sh_name);
+        if (section_name == NULL)
+	  {
+            elf_issue(N_("could not obtain section name"));
+	    return false;
+	  }
+        if (verbose > 2)
+          fprintf (stderr, "debug: section header %s (type %d) found\n",
+                   section_name, shdr->sh_type);
+        if (shdr->sh_type == SHT_DYNAMIC)
+          {
+            if (verbose > 1)
+              fputs ("debug: dynamic section found\n", stderr);
+            dyn_section = scn;
+          }
+        if (shdr->sh_type == SHT_SYMTAB)
+          {
+            if (verbose > 1)
+              fputs ("debug: symtab section found\n", stderr);
+            has_symtab = true;
+          }
+	if (shdr->sh_type == SHT_PROGBITS && (shdr->sh_flags & SHF_ALLOC) != 0)
+	  {
+	    if (verbose > 1 && !has_progbits_alloc)
+	      fputs ("debug: allocated PROGBITS section found\n", stderr);
+	    has_progbits_alloc = true;
+	  }
+        const char *debug_prefix = ".debug_";
+        if (strncmp (section_name, debug_prefix, strlen (debug_prefix)) == 0)
+          {
+            if (verbose > 1 && !has_debug_sections)
+              fputs ("debug: .debug_* section found\n", stderr);
+            has_debug_sections = true;
+          }
+	if (strcmp (section_name, ".modinfo") == 0)
+	  {
+	    if (verbose > 1)
+	      fputs ("debug: .modinfo section found\n", stderr);
+	    has_modinfo = true;
+	  }
+	if (strcmp (section_name, ".gnu.linkonce.this_module") == 0)
+	  {
+	    if (verbose > 1)
+	      fputs ("debug: .gnu.linkonce.this_module section found\n",
+		     stderr);
+	    has_gnu_linkonce_this_module = true;
+	  }
+      }
+  }
+
+  /* Examine the dynamic section.  */
+  if (has_dynamic)
+    {
+      if (dyn_section != NULL)
+        {
+          Elf_Data *data = elf_getdata (dyn_section, NULL);
+          if (verbose > 2)
+            fprintf (stderr, "debug: Elf_Data for dynamic section: %p\n",
+                     data);
+
+          if (data != NULL)
+            for (int dyn_idx = 0; ; ++dyn_idx)
+              {
+                GElf_Dyn dyn_storage;
+                GElf_Dyn *dyn = gelf_getdyn (data, dyn_idx, &dyn_storage);
+                if (dyn == NULL)
+                  break;
+                if (verbose > 2)
+                  fprintf (stderr, "debug: dynamic entry %d"
+                           " with tag %llu found\n",
+                           dyn_idx, (unsigned long long int) dyn->d_tag);
+                if (dyn->d_tag == DT_SONAME)
+                  has_soname = true;
+                if (dyn->d_tag == DT_FLAGS_1 && (dyn->d_un.d_val & DF_1_PIE))
+                  has_pie_flag = true;
+                if (dyn->d_tag == DT_DEBUG)
+                  has_dt_debug = true;
+                if (dyn->d_tag == DT_NULL)
+                  break;
+              }
+        }
+    }
+
+  if (verbose > 0)
+    {
+      fprintf (stderr, "info: %s: ELF type: %s (0x%x)\n", current_path,
+	       elf_type_string (elf_type), elf_type);
+      if (has_program_load)
+        fprintf (stderr, "info: %s: PT_LOAD found\n", current_path);
+      if (has_progbits_alloc)
+	fprintf (stderr, "info: %s: allocated PROGBITS section found\n",
+		 current_path);
+      if (has_program_interpreter)
+        fprintf (stderr, "info: %s: program interpreter found\n",
+                 current_path);
+      if (has_dynamic)
+        fprintf (stderr, "info: %s: dynamic segment found\n", current_path);
+      if (has_soname)
+        fprintf (stderr, "info: %s: soname found\n", current_path);
+      if (has_pie_flag)
+        fprintf (stderr, "info: %s: DF_1_PIE flag found\n", current_path);
+      if (has_dt_debug)
+        fprintf (stderr, "info: %s: DT_DEBUG found\n", current_path);
+      if (has_symtab)
+        fprintf (stderr, "info: %s: symbol table found\n", current_path);
+      if (has_debug_sections)
+        fprintf (stderr, "info: %s: .debug_* section found\n", current_path);
+      if (has_modinfo)
+        fprintf (stderr, "info: %s: .modinfo section found\n", current_path);
+      if (has_gnu_linkonce_this_module)
+        fprintf (stderr,
+		 "info: %s: .gnu.linkonce.this_module section found\n",
+		 current_path);
+    }
+
+  return true;
+}
+
+static bool
+is_elf (void)
+{
+  return elf_kind (elf) != ELF_K_NONE;
+}
+
+static bool
+is_elf_file (void)
+{
+  return elf_kind (elf) == ELF_K_ELF;
+}
+
+static bool
+is_elf_archive (void)
+{
+  return elf_kind (elf) == ELF_K_AR;
+}
+
+static bool
+is_core (void)
+{
+  return elf_kind (elf) == ELF_K_ELF && elf_type == ET_CORE;
+}
+
+/* Return true if the file is a loadable object, which basically means
+   it is an ELF file, but not a relocatable object or a core dump
+   file.  (The kernel and various userspace components can load ET_REL
+   files, but we disregard that for our classification purposes.)  */
+static bool
+is_loadable (void)
+{
+  return elf_kind (elf) == ELF_K_ELF
+    && (elf_type == ET_EXEC || elf_type == ET_DYN)
+    && has_program_load;
+}
+
+/* Return true if the file is an ELF file which has a symbol table or
+   .debug_* sections (and thus can be stripped futher).  */
+static bool
+is_unstripped (void)
+{
+  return elf_kind (elf) != ELF_K_NONE
+    && (elf_type == ET_REL || elf_type == ET_EXEC || elf_type == ET_DYN)
+    && (has_symtab || has_debug_sections);
+}
+
+/* Return true if the file contains only debuginfo, but no loadable
+   program bits.  Then it is most likely a separate .debug file, a dwz
+   multi-file or a .dwo file.  Note that it can still be loadable,
+   but in that case the phdrs shouldn't be trusted.  */
+static bool
+is_debug_only (void)
+{
+  return elf_kind (elf) != ELF_K_NONE
+    && (elf_type == ET_REL || elf_type == ET_EXEC || elf_type == ET_DYN)
+    && has_debug_sections
+    && !has_progbits_alloc;
+}
+
+static bool
+is_shared (void)
+{
+  if (!is_loadable ())
+    return false;
+
+  /* The ELF type is very clear: this is an executable.  */
+  if (elf_type == ET_EXEC)
+    return false;
+
+  /* If the object is marked as PIE, it is definitely an executable,
+     and not a loadlable shared object.  */
+  if (has_pie_flag)
+    return false;
+
+  /* Treat a DT_SONAME tag as a strong indicator that this is a shared
+     object.  */
+  if (has_soname)
+    return true;
+
+  /* This is probably a PIE program: there is no soname, but a program
+     interpreter.  In theory, this file could be also a DSO with a
+     soname implied by its file name that can be run as a program.
+     This situation is impossible to resolve in the general case. */
+  if (has_program_interpreter)
+    return false;
+
+  /* Roland McGrath mentions in
+     <https://www.sourceware.org/ml/libc-alpha/2015-03/msg00605.html>,
+     that “we defined a PIE as an ET_DYN with a DT_DEBUG”.  This
+     matches current binutils behavior (version 2.32).  DT_DEBUG is
+     added if bfd_link_executable returns true or if bfd_link_pic
+     returns false, depending on the architectures.  However, DT_DEBUG
+     is not documented as being specific to executables, therefore use
+     it only as a low-priority discriminator.  */
+  if (has_dt_debug)
+    return false;
+
+  /* If there is no dynamic section, the file cannot be loaded as a
+     shared object.  */
+  if (!has_dynamic)
+    return false;
+  return true;
+}
+
+static bool
+is_executable (void)
+{
+  if (!is_loadable ())
+    return false;
+
+  /* A loadable object which is not a shared object is treated as an
+     executable.  */
+  return !is_shared ();
+}
+
+/* Like is_executable, but the object can also be a shared library at
+   the same time.  */
+static bool
+is_program (void)
+{
+  if (!is_loadable ())
+    return false;
+
+  /* The ELF type is very clear: this is an executable.  */
+  if (elf_type == ET_EXEC)
+    return true;
+
+  /* If the object is marked as PIE, it is definitely an executable,
+     and not a loadlable shared object.  */
+  if (has_pie_flag)
+    return true;
+
+  /* This is probably a PIE program. It isn't ET_EXEC, but has a
+     program interpreter. In theory, this file could be also a DSO
+     with a soname. This situation is impossible to resolve in the
+     general case. See is_shared. This is different from
+     is_executable.  */
+  if (has_program_interpreter)
+    return true;
+
+  /* Roland McGrath mentions in
+     <https://www.sourceware.org/ml/libc-alpha/2015-03/msg00605.html>,
+     that “we defined a PIE as an ET_DYN with a DT_DEBUG”.  This
+     matches current binutils behavior (version 2.32).  DT_DEBUG is
+     added if bfd_link_executable returns true or if bfd_link_pic
+     returns false, depending on the architectures.  However, DT_DEBUG
+     is not documented as being specific to executables, therefore use
+     it only as a low-priority discriminator.  */
+  if (has_dt_debug)
+    return true;
+
+  return false;
+}
+
+/* Returns true if the file is a linux kernel module (is ET_REL and
+   has the two magic sections .modinfo and .gnu.linkonce.this_module).  */
+static bool
+is_linux_kernel_module (void)
+{
+  return (elf_kind (elf) == ELF_K_ELF
+	  && elf_type == ET_REL
+	  && has_modinfo
+	  && has_gnu_linkonce_this_module);
+}
+
+enum classify_requirement { do_not_care, required, forbidden };
+
+enum classify_check
+{
+  classify_elf,
+  classify_elf_file,
+  classify_elf_archive,
+  classify_core,
+  classify_unstripped,
+  classify_executable,
+  classify_program,
+  classify_shared,
+  classify_linux_kernel_module,
+  classify_debug_only,
+  classify_loadable,
+
+  classify_check_last = classify_loadable
+};
+
+enum
+{
+  classify_check_offset = 1000,
+  classify_check_not_offset = 2000,
+
+  classify_flag_stdin = 3000,
+  classify_flag_stdin0,
+  classify_flag_no_stdin,
+  classify_flag_print,
+  classify_flag_print0,
+  classify_flag_no_print,
+  classify_flag_matching,
+  classify_flag_not_matching,
+};
+
+static bool
+classify_check_positive (int key)
+{
+  return key >= classify_check_offset
+    && key <= classify_check_offset + classify_check_last;
+}
+
+static bool
+classify_check_negative (int key)
+{
+  return key >= classify_check_not_offset
+    && key <= classify_check_not_offset + classify_check_last;
+}
+
+/* Set by parse_opt.  */
+static enum classify_requirement requirements[classify_check_last + 1];
+static enum { no_stdin, do_stdin, do_stdin0 } flag_stdin;
+static enum { no_print, do_print, do_print0 } flag_print;
+static bool flag_print_matching = true;
+
+static error_t
+parse_opt (int key, char *arg __attribute__ ((unused)),
+           struct argp_state *state __attribute__ ((unused)))
+{
+  if (classify_check_positive (key))
+    requirements[key - classify_check_offset] = required;
+  else if (classify_check_negative (key))
+    requirements[key - classify_check_not_offset] = forbidden;
+  else
+    switch (key)
+      {
+      case 'v':
+        ++verbose;
+        break;
+
+      case 'q':
+	--verbose;
+	break;
+
+      case 'z':
+	flag_compressed = true;
+	break;
+
+      case 'f':
+	flag_only_regular_files = true;
+	break;
+
+      case classify_flag_stdin:
+        flag_stdin = do_stdin;
+        break;
+
+      case classify_flag_stdin0:
+        flag_stdin = do_stdin0;
+        break;
+
+      case classify_flag_no_stdin:
+        flag_stdin = no_stdin;
+        break;
+
+      case classify_flag_print:
+        flag_print = do_print;
+        break;
+
+      case classify_flag_print0:
+        flag_print = do_print0;
+        break;
+
+      case classify_flag_no_print:
+        flag_print = no_print;
+        break;
+
+      case classify_flag_matching:
+        flag_print_matching = true;
+        break;
+
+      case classify_flag_not_matching:
+        flag_print_matching = false;
+        break;
+
+      default:
+        return ARGP_ERR_UNKNOWN;
+      }
+
+  return 0;
+}
+
+/* Perform requested checks against the file at current_path.  If
+   necessary, sets *STATUS to 1 if checks failed.  */
+static void
+process_current_path (int *status)
+{
+  bool checks_passed = true;
+
+  if (open_elf () && run_classify ())
+    {
+      bool checks[] =
+        {
+	 [classify_elf] = is_elf (),
+	 [classify_elf_file] = is_elf_file (),
+	 [classify_elf_archive] = is_elf_archive (),
+	 [classify_core] = is_core (),
+	 [classify_unstripped] = is_unstripped (),
+	 [classify_executable] = is_executable (),
+	 [classify_program] = is_program (),
+	 [classify_shared] = is_shared (),
+	 [classify_linux_kernel_module] = is_linux_kernel_module (),
+	 [classify_debug_only] = is_debug_only (),
+	 [classify_loadable] = is_loadable (),
+	};
+
+      if (verbose > 1)
+        {
+	  if (checks[classify_elf])
+	    fprintf (stderr, "debug: %s: elf\n", current_path);
+	  if (checks[classify_elf_file])
+	    fprintf (stderr, "debug: %s: elf_file\n", current_path);
+	  if (checks[classify_elf_archive])
+	    fprintf (stderr, "debug: %s: elf_archive\n", current_path);
+	  if (checks[classify_core])
+	    fprintf (stderr, "debug: %s: core\n", current_path);
+          if (checks[classify_unstripped])
+            fprintf (stderr, "debug: %s: unstripped\n", current_path);
+          if (checks[classify_executable])
+            fprintf (stderr, "debug: %s: executable\n", current_path);
+          if (checks[classify_program])
+            fprintf (stderr, "debug: %s: program\n", current_path);
+          if (checks[classify_shared])
+            fprintf (stderr, "debug: %s: shared\n", current_path);
+	  if (checks[classify_linux_kernel_module])
+	    fprintf (stderr, "debug: %s: linux kernel module\n", current_path);
+	  if (checks[classify_debug_only])
+	    fprintf (stderr, "debug: %s: debug-only\n", current_path);
+          if (checks[classify_loadable])
+            fprintf (stderr, "debug: %s: loadable\n", current_path);
+        }
+
+      for (enum classify_check check = 0;
+           check <= classify_check_last; ++check)
+        switch (requirements[check])
+          {
+          case required:
+            if (!checks[check])
+              checks_passed = false;
+            break;
+          case forbidden:
+            if (checks[check])
+              checks_passed = false;
+            break;
+          case do_not_care:
+            break;
+          }
+    }
+  else if (file_fd == -1)
+    checks_passed = false; /* There is nothing to check, bad file.  */
+  else
+    {
+      for (enum classify_check check = 0;
+           check <= classify_check_last; ++check)
+        if (requirements[check] == required)
+          checks_passed = false;
+    }
+
+  close_elf ();
+
+  switch (flag_print)
+    {
+    case do_print:
+      if (checks_passed == flag_print_matching)
+        puts (current_path);
+      break;
+    case do_print0:
+      if (checks_passed == flag_print_matching)
+        fwrite (current_path, strlen (current_path) + 1, 1, stdout);
+      break;
+    case no_print:
+      if (!checks_passed)
+        *status = 1;
+      break;
+    }
+}
+
+/* Called to process standard input if flag_stdin is not no_stdin.  */
+static void
+process_stdin (int *status)
+{
+  char delim;
+  if (flag_stdin == do_stdin0)
+    delim = '\0';
+  else
+    delim = '\n';
+
+  char *buffer = NULL;
+  size_t buffer_size = 0;
+  while (true)
+    {
+      ssize_t ret = getdelim (&buffer, &buffer_size, delim, stdin);
+      if (ferror (stdin))
+	{
+	  current_path = NULL;
+	  issue (errno, N_("reading from standard input"));
+	  break;
+	}
+      if (feof (stdin))
+        break;
+      if (ret < 0)
+        abort ();           /* Cannot happen due to error checks above.  */
+      if (delim != '\0' && ret > 0)
+        buffer[ret - 1] = '\0';
+      current_path = buffer;
+      process_current_path (status);
+    }
+
+  free (buffer);
+}
+
+int
+main (int argc, char **argv)
+{
+  const struct argp_option options[] =
+    {
+      { NULL, 0, NULL, OPTION_DOC, N_("Classification options"), 1 },
+      { "elf", classify_check_offset + classify_elf, NULL, 0,
+        N_("File looks like an ELF object or archive/static library (default)")
+	, 1 },
+      { "elf-file", classify_check_offset + classify_elf_file, NULL, 0,
+        N_("File is an regular ELF object (not an archive/static library")
+	, 1 },
+      { "elf-archive", classify_check_offset + classify_elf_archive, NULL, 0,
+        N_("File is an ELF archive or static library")
+	, 1 },
+      { "core", classify_check_offset + classify_core, NULL, 0,
+        N_("File is an ELF core dump file")
+	, 1 },
+      { "unstripped", classify_check_offset + classify_unstripped, NULL, 0,
+        N_("File is an ELF file with symbol table or .debug_* sections \
+and can be stripped further"), 1 },
+      { "executable", classify_check_offset + classify_executable, NULL, 0,
+        N_("File is an ELF program executable \
+(and not also a shared library)"), 1 },
+      { "program", classify_check_offset + classify_program, NULL, 0,
+        N_("File is an ELF program executable \
+(might also be a shared library)"), 1 },
+      { "shared", classify_check_offset + classify_shared, NULL, 0,
+        N_("File is an ELF shared object (DSO)"), 1 },
+      { "linux-kernel-module", (classify_check_offset
+				+ classify_linux_kernel_module), NULL, 0,
+        N_("File is a linux kernel module"), 1 },
+      { "debug-only", (classify_check_offset + classify_debug_only), NULL, 0,
+        N_("File is a debug only ELF file \
+(separate .debug, .dwo or dwz multi-file)"), 1 },
+      { "loadable", classify_check_offset + classify_loadable, NULL, 0,
+        N_("File is a loadable ELF object (program or shared object)"), 1 },
+
+      /* Negated versions of the above.  */
+      { "not-elf", classify_check_not_offset + classify_elf,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-elf-file", classify_check_not_offset + classify_elf_file,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-elf-archive", classify_check_not_offset + classify_elf_archive,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-core", classify_check_not_offset + classify_core,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-unstripped", classify_check_not_offset + classify_unstripped,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-executable", classify_check_not_offset + classify_executable,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-program", classify_check_not_offset + classify_program,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-shared", classify_check_not_offset + classify_shared,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-linux-kernel-module", (classify_check_not_offset
+				    + classify_linux_kernel_module),
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-debug-only", (classify_check_not_offset + classify_debug_only),
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-loadable", classify_check_not_offset + classify_loadable,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+
+      { NULL, 0, NULL, OPTION_DOC, N_("Input flags"), 2 },
+      { "file", 'f', NULL, 0,
+        N_("Only classify regular (not symlink nor special device) files"), 2 },
+      { "stdin", classify_flag_stdin, NULL, 0,
+        N_("Also read file names to process from standard input, \
+separated by newlines"), 2 },
+      { "stdin0", classify_flag_stdin0, NULL, 0,
+        N_("Also read file names to process from standard input, \
+separated by ASCII NUL bytes"), 2 },
+      { "no-stdin", classify_flag_stdin, NULL, 0,
+        N_("Do not read files from standard input (default)"), 2 },
+      { "compressed", 'z', NULL, 0,
+	N_("Try to open compressed files or embedded (kernel) ELF images"),
+	2 },
+
+      { NULL, 0, NULL, OPTION_DOC, N_("Output flags"), 3 },
+      { "print", classify_flag_print, NULL, 0,
+        N_("Output names of files, separated by newline"), 3 },
+      { "print0", classify_flag_print0, NULL, 0,
+        N_("Output names of files, separated by ASCII NUL"), 3 },
+      { "no-print", classify_flag_no_print, NULL, 0,
+        N_("Do not output file names"), 3 },
+      { "matching", classify_flag_matching, NULL, 0,
+        N_("If printing file names, print matching files (default)"), 3 },
+      { "not-matching", classify_flag_not_matching, NULL, 0,
+        N_("If printing file names, print files that do not match"), 3 },
+
+      { NULL, 0, NULL, OPTION_DOC, N_("Additional flags"), 4 },
+      { "verbose", 'v', NULL, 0,
+        N_("Output additional information (can be specified multiple times)"), 4 },
+      { "quiet", 'q', NULL, 0,
+        N_("Suppress some error output (counterpart to --verbose)"), 4 },
+      { NULL, 0, NULL, 0, NULL, 0 }
+    };
+
+  const struct argp argp =
+    {
+      .options = options,
+      .parser = parse_opt,
+      .args_doc = N_("FILE..."),
+      .doc = N_("\
+Determine the type of an ELF file.\
+\n\n\
+All of the classification options must apply at the same time to a \
+particular file.  Classification options can be negated using a \
+\"--not-\" prefix.\
+\n\n\
+Since modern ELF does not clearly distinguish between programs and \
+dynamic shared objects, you should normally use either --executable or \
+--shared to identify the primary purpose of a file.  \
+Only one of the --shared and --executable checks can pass for a file.  \
+If you want to know whether an ELF object might be both a program and a \
+shared library at the same time use --program --shared.  \
+--executable is effectively the same as --program --not-shared.\
+\n\n\
+Without any of the --print options, the program exits with status 0 \
+if the requested checks pass for all input files, with 1 if a check \
+fails for any file, and 2 if there is an environmental issue (such \
+as a file read error or a memory allocation error).\
+\n\n\
+When printing file names, the program exits with status 0 even if \
+no file names are printed, and exits with status 2 if there is an \
+environmental issue.\
+\n\n\
+On usage error (e.g. a bad option was given), the program exits with \
+a status code of 64.\
+\n\n\
+The --quiet or -q option suppresses some error warning output, but \
+doesn't change the exit status.\
+")
+    };
+
+  /* Require that the file is an ELF file by default.  User can
+     disable with --not-elf.  */
+  requirements[classify_elf] = required;
+
+  int remaining;
+  if (argp_parse (&argp, argc, argv, 0, &remaining, NULL) != 0)
+    return 2;
+
+  elf_version (EV_CURRENT);
+
+  int status = 0;
+
+  for (int i = remaining; i < argc; ++i)
+    {
+      current_path = argv[i];
+      process_current_path (&status);
+    }
+
+  if (flag_stdin != no_stdin)
+    process_stdin (&status);
+
+  if (issue_found)
+    return 2;
+
+  return status;
+}

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-04-16 11:38   ` Florian Weimer
  2019-04-18 11:17     ` Florian Weimer
@ 2019-07-19 13:24     ` Mark Wielaard
  1 sibling, 0 replies; 36+ messages in thread
From: Mark Wielaard @ 2019-07-19 13:24 UTC (permalink / raw)
  To: Florian Weimer; +Cc: elfutils-devel, Panu Matilainen

Hi,

Some answers to this older discussion to explain some of my recent
commits suggested for elfclassify.

On Tue, 2019-04-16 at 13:38 +0200, Florian Weimer wrote:
> * Mark Wielaard:
> > --elf PATH return 0 whenever the file can be opened and a minimal ELF
> > header can be read (it might not be a completely valid ELF file). Do we
> > want or need to do any more verification (e.g. try to get the full ELF
> > header, walk through all phdrs and shdrs)?
> If we ever need that, I think we should add it as separate options,
> detecting both separate debuginfo and regular ELF files.

You are probably right that a real verification isn't the task of
elfclassify. We already have elflint. But I did make --elf required by
default (with as check simply that libelf could open the file without
marking it as ELF_K_NONE). The user can disable it again with --not-
elf. It felt slightly odd to not have any filter active by default.

I did add two "sub" classifications --elf-file and --elf-archive
because I believe people often make a distinction between "normal" ELF
files and ELF archives (containers of ELF objects).

I also added detection of --debug-only ELF file using the heuristic
that Frank also came up with for the dbgserver (contains .debug_*
sections, but no allocated SHT_PROGBIT sections).

> > --unstripped (not yet implemented) would be a classification that
> > indicates whether the ELF file can be stripped (further), that is has a
> > .symtab (symbol table), .debug_* sections (and possibly any non-
> > loadable sections -- "file" only detects the first two).
> 
> Some non-allocated sections are expected in stripped binaries:
> .gnu_debuglink, .shstrtab, .gnu.build.attributes look relevant in this
> context.  I'm not sure if we should flag any other non-allocated section
> in this way.

I don't think those sections need special flagging.

> > I am not sure --file=PATH is a useful option.
> 
> It's useful for determining if the file exists and can be mapped.
> 
> > But maybe we need some way to indicate whether a file is a real file or
> > a symlink? But the current implementation returns 0 even for symlinks.
> > As do every other option (if the file is a symlink to an ELF file of
> > the requested classification). Is this what we want? I would suggest
> > that we return 1 for anything that is not a regular file. But that
> > would mean that for example eu-elfclassify --executable=/proc/$$/exe
> > would also return 1 (currently it returns 0, which might be helpful in
> > some cases).
> 
> I don't know what RPM needs in this context.  I expect that it can
> easily filter out non-regular files.  My problem with symbolic link
> detection is that it is so inconsistent—it generally applies to the
> final pathname component, and that does not look useful to me.

Since --file was available again I reused it to indicate that the final
pathname component should be a regular file (not a symlink or special
device). I think that is useful in general. But you are right there are
other tools to filter out symlinks/special device files. It was useful
for quick and dirty testing though.

> > --loadable basically checks whether the given ELF file is not an object
> > (ET_REL) file, so it will return 0 for either an executable, a shared
> > object or core file, but not check whether any other attribute (like
> > whether it has program headers and/or loadable segments). Personally I
> > would like it if this at least included a check for a PT_LOAD segment.
> 
> Is a PT_LOAD segment required to make the PT_DYNAMIC segment visible?
> It is possible to have mostly empty objects, after all.

I did add this extra check, because I am a little paranoid about having
to deal with totally broken ELF files. My reasoning was basically that
without a PT_LOAD there is nothing in memory to load/execute.

> > This does not classify kernel modules as loadable objects.
> > rpm does contain a check for that, it might make sense to include that
> > as a separate classification in elfclassify --kernel-module.
> > 
> > Kernel modules are also special because they can be compressed ELF
> > files. Do we want to support that? (It is easy with gelf_elf_begin).
> > That could for example be an flag/option like --compressed which can be
> > combined with any other classification option?
> 
> How relevant are kernel modules to eu-elfclassify?
> 
> Is path-based detection feasible for kernel modules?

Sadly kernel modules often need special treatment that you wouldn't
need for "normal" ET_REL files. path-based detection is only partially
possible (rpm used to use the extension name, but that was fragile). So
I added an option --linux-kernel-module that detects them. I also added
a -z option to try to detect ELF files inside compressed images because
it was easy and because these days kernel modules are often compressed.

> > I think another useful classification would be --debugfile which
> > succeeds if the primary function of the given ELF file is being a
> > separete debug file (basically .debug, .dwo or dwz .multi file) which
> > cannot be linked and loaded on its own
> 
> That is very difficult to detect reliably, unfortunately, and would best
> be implemented in lib(g)elf itself because it would be generally useful,
> for all kinds of tools which expect to process real ELF files only.

I think the heuristic mentioned above for --debug-only works pretty
well. I haven't spotted false positives yet. It is hard to do this in
libelf because the public interface is mostly locked and libelf treats
the segment and section views completely independently.

> > But I think you don't want to use
> > ARGP_NO_EXIT. That causes standard options like --version and --
> > help to
> > not exit (with success). Which is generally what we want.
> > We do want to want --version and --help to not return an error
> > indicator (this is actually checked by make distcheck).
> 
> I want to exit with status 2 on usage error.  I couldn't make that
> happen without ARGP_NO_EXIT.  I'm open to different suggestions.

There is one patch in my tree that does remove the ARGP_NO_EXIT
(because it breaks --help and friends). It documents that usage errors
produce an status code of 64. Processing errors cause a status of 2.
And matching results produce a status exit of 0 or 1.

If you really want to produce 2 for usage errors then we could try to
check things during ARGP_KEY_FINI. But I don't think it is too bad for
usage errors to produce a different status code from process errors.

Another change I made is that process errors (either opening/reading
files or parsing the ELF structures) exit the elfclassify directly. The
status is recorded and the next file is processed first. Only once all
inputs have been classified is the exit status set.

> > That is odd, I assumed !S_ISREG would by true for symlinks.
> 
> No, the open followed the symbolic link.  This is needed for rejecting
> directories.  I've added a comment.

I finally settled on treating directories special (and immediately
"reject" them). I think this matches mostly how you treated them
originally.

> > > +  if (verbose)
> > > +    {
> > > +      fprintf (stderr, "info: ELF type: %d\n", elf_type);
> > > +      if (has_program_interpreter)
> > > +        fputs ("info: program interpreter found\n", stderr);
> > 
> > You might want to print the program interpreter here.
> 
> I can't do that without detecting first if the file is separated
> debuginfo.  (Separated debuginfo has a program header that points
> nowhere.)

Yes. That is a bit of a pain :{ I didn't try this myself. It does seem
too tricky.

> > > +      if (has_dynamic)
> > > +        fputs ("info: dynamic segment found\n", stderr);
> > > +      if (has_soname)
> > > +        fputs ("info: soname found\n", stderr);
> > 
> > You might want to print the soname found here.
> 
> This needs access to the dynamic string table.  I don't know how easy
> this is to implement.

I also didn't do this. It shouldn't be that hard. We should probably
use the PT headers instead of the sections. But it was indeed more work
than I had.

> > If you want to only allow one classification at a time you should check
> > whether command is already set and call something like:
> > argp_error (state, N_("Can only use one classification at a time."));
> 
> I tried that, but I ran into issues with that.  It also breaks --help
> with multiple/conflicting flags.
> 
> I'm trying to come up with a different command line syntax anyway.

I like the new command line syntax. Especially the --stdin[0] is very
powerful! I tried briefly to detect "conflicting" classification
options. But it isn't easy to do so consistently. So just let the user
do whatever they want, even if that means nothing will ever match all
classifications requested.

Cheers,

Mark

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-19 12:47       ` Mark Wielaard
@ 2019-07-19 13:43         ` Dmitry V. Levin
  2019-07-19 14:21           ` Mark
  2019-07-20 21:40         ` Mark Wielaard
  2019-07-22 15:55         ` Florian Weimer
  2 siblings, 1 reply; 36+ messages in thread
From: Dmitry V. Levin @ 2019-07-19 13:43 UTC (permalink / raw)
  To: elfutils-devel; +Cc: Mark Wielaard, Florian Weimer, Panu Matilainen

[-- Attachment #1: Type: text/plain, Size: 804 bytes --]

On Fri, Jul 19, 2019 at 02:47:09PM +0200, Mark Wielaard wrote:
[...]
> +static bool
> +is_shared (void)
> +{
> +  if (!is_loadable ())
> +    return false;
> +
> +  /* The ELF type is very clear: this is an executable.  */
> +  if (elf_type == ET_EXEC)
> +    return false;
> +
> +  /* If the object is marked as PIE, it is definitely an executable,
> +     and not a loadlable shared object.  */
> +  if (has_pie_flag)
> +    return false;
> +
> +  /* Treat a DT_SONAME tag as a strong indicator that this is a shared
> +     object.  */
> +  if (has_soname)
> +    return true;

I'm not sure DT_SONAME is a reliable indicator.

I've seen many cases of DT_SONAME being erroneously applied to 
non-libraries, e.g. lib.so was used as soname in openjdk executables.


-- 
ldv

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-19 13:43         ` Dmitry V. Levin
@ 2019-07-19 14:21           ` Mark
  2019-07-19 18:35             ` Dmitry V. Levin
  0 siblings, 1 reply; 36+ messages in thread
From: Mark @ 2019-07-19 14:21 UTC (permalink / raw)
  To: Dmitry V. Levin, elfutils-devel; +Cc: Florian Weimer, Panu Matilainen

On Fri, 2019-07-19 at 16:43 +0300, Dmitry V. Levin wrote:
> On Fri, Jul 19, 2019 at 02:47:09PM +0200, Mark Wielaard wrote:
> [...]
> > +static bool
> > +is_shared (void)
> > +{
> > +  if (!is_loadable ())
> > +    return false;
> > +
> > +  /* The ELF type is very clear: this is an executable.  */
> > +  if (elf_type == ET_EXEC)
> > +    return false;
> > +
> > +  /* If the object is marked as PIE, it is definitely an
> > executable,
> > +     and not a loadlable shared object.  */
> > +  if (has_pie_flag)
> > +    return false;
> > +
> > +  /* Treat a DT_SONAME tag as a strong indicator that this is a
> > shared
> > +     object.  */
> > +  if (has_soname)
> > +    return true;
> 
> I'm not sure DT_SONAME is a reliable indicator.
> 
> I've seen many cases of DT_SONAME being erroneously applied to 
> non-libraries, e.g. lib.so was used as soname in openjdk executables.

I didn't know. Is this really common?
I did find one java binary on my system that indeed has this problem.
$ eu-readelf -d /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.212.b04-
0.el7_6.x86_64/jre/bin/policytool

Dynamic segment contains 39 entries:
 Addr: 0x0000000000600d88  Offset: 0x000d88  Link to section: [ 7]
'.dynstr'
  Type              Value
  NEEDED            Shared library: [libpthread.so.0]
  NEEDED            Shared library: [libz.so.1]
  NEEDED            Shared library: [libX11.so.6]
  NEEDED            Shared library: [libjli.so]
  NEEDED            Shared library: [libdl.so.2]
  NEEDED            Shared library: [libc.so.6]
  SONAME            Library soname: [lib.so]
  RPATH             Library rpath:
[$ORIGIN/../lib/amd64/jli:$ORIGIN/../lib/amd64]
[...]

But even so eu-elfclassify still doesn't treat it as a shared library,
because:
$ eu-elfclassify -v --shared policytool; echo $?
info: policytool: ELF kind: ELF_K_ELF (0x3)
info: policytool: ELF type: ET_EXEC (0x2)
info: policytool: PT_LOAD found
info: policytool: allocated PROGBITS section found
info: policytool: program interpreter found
info: policytool: dynamic segment found
info: policytool: soname found
info: policytool: DT_DEBUG found
1

So other characteristics like it being ET_EXEC mark it as an
executable. And I assume if it was PIE (ET_DYN) the PIE DT_FLAGS would
have caught it.

So, I don't think the code is wrong. We might want to tweak the comment
a bit though, to make it less definitive?

Cheers,

Mark

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-19 14:21           ` Mark
@ 2019-07-19 18:35             ` Dmitry V. Levin
  2019-07-19 21:00               ` Florian Weimer
  0 siblings, 1 reply; 36+ messages in thread
From: Dmitry V. Levin @ 2019-07-19 18:35 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: elfutils-devel, Florian Weimer, Panu Matilainen

[-- Attachment #1: Type: text/plain, Size: 3065 bytes --]

On Fri, Jul 19, 2019 at 04:21:53PM +0200, Mark wrote:
> On Fri, 2019-07-19 at 16:43 +0300, Dmitry V. Levin wrote:
> > On Fri, Jul 19, 2019 at 02:47:09PM +0200, Mark Wielaard wrote:
> > [...]
> > > +static bool
> > > +is_shared (void)
> > > +{
> > > +  if (!is_loadable ())
> > > +    return false;
> > > +
> > > +  /* The ELF type is very clear: this is an executable.  */
> > > +  if (elf_type == ET_EXEC)
> > > +    return false;
> > > +
> > > +  /* If the object is marked as PIE, it is definitely an
> > > executable,
> > > +     and not a loadlable shared object.  */
> > > +  if (has_pie_flag)
> > > +    return false;
> > > +
> > > +  /* Treat a DT_SONAME tag as a strong indicator that this is a
> > > shared
> > > +     object.  */
> > > +  if (has_soname)
> > > +    return true;
> > 
> > I'm not sure DT_SONAME is a reliable indicator.
> > 
> > I've seen many cases of DT_SONAME being erroneously applied to 
> > non-libraries, e.g. lib.so was used as soname in openjdk executables.
> 
> I didn't know. Is this really common?

I don't think it is very common, but the mistake is very easy to make
(-Wl,-soname,lib.so) and it doesn't really break anything.  Apparently,
some projects apply the same linker flags that add DT_SONAME to all
generated files.

> I did find one java binary on my system that indeed has this problem.
> $ eu-readelf -d /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.212.b04-
> 0.el7_6.x86_64/jre/bin/policytool
> 
> Dynamic segment contains 39 entries:
>  Addr: 0x0000000000600d88  Offset: 0x000d88  Link to section: [ 7]
> '.dynstr'
>   Type              Value
>   NEEDED            Shared library: [libpthread.so.0]
>   NEEDED            Shared library: [libz.so.1]
>   NEEDED            Shared library: [libX11.so.6]
>   NEEDED            Shared library: [libjli.so]
>   NEEDED            Shared library: [libdl.so.2]
>   NEEDED            Shared library: [libc.so.6]
>   SONAME            Library soname: [lib.so]
>   RPATH             Library rpath:
> [$ORIGIN/../lib/amd64/jli:$ORIGIN/../lib/amd64]
> [...]
> 
> But even so eu-elfclassify still doesn't treat it as a shared library,
> because:
> $ eu-elfclassify -v --shared policytool; echo $?
> info: policytool: ELF kind: ELF_K_ELF (0x3)
> info: policytool: ELF type: ET_EXEC (0x2)
> info: policytool: PT_LOAD found
> info: policytool: allocated PROGBITS section found
> info: policytool: program interpreter found
> info: policytool: dynamic segment found
> info: policytool: soname found
> info: policytool: DT_DEBUG found
> 1
> 
> So other characteristics like it being ET_EXEC mark it as an
> executable. And I assume if it was PIE (ET_DYN) the PIE DT_FLAGS would
> have caught it.

Yes, the checks above has_soname are much more definitive.

> So, I don't think the code is wrong. We might want to tweak the comment
> a bit though, to make it less definitive?

What I'm saying is that has_soname is just a hint which is probably even
less reliable than has_program_interpreter.


-- 
ldv

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-19 18:35             ` Dmitry V. Levin
@ 2019-07-19 21:00               ` Florian Weimer
  2019-07-19 21:23                 ` Dmitry V. Levin
  0 siblings, 1 reply; 36+ messages in thread
From: Florian Weimer @ 2019-07-19 21:00 UTC (permalink / raw)
  To: Dmitry V. Levin; +Cc: Mark Wielaard, elfutils-devel, Panu Matilainen

* Dmitry V. Levin:

>> So, I don't think the code is wrong. We might want to tweak the comment
>> a bit though, to make it less definitive?
>
> What I'm saying is that has_soname is just a hint which is probably even
> less reliable than has_program_interpreter.

If I recall correctly, I added the soname check to classify
/lib64/libc.so.6 as a library, not an executable.  So it didn't come
completely out of nowhere.

Thanks,
Florian

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-19 21:00               ` Florian Weimer
@ 2019-07-19 21:23                 ` Dmitry V. Levin
  2019-07-19 21:36                   ` Mark Wielaard
  0 siblings, 1 reply; 36+ messages in thread
From: Dmitry V. Levin @ 2019-07-19 21:23 UTC (permalink / raw)
  To: Florian Weimer; +Cc: Mark Wielaard, elfutils-devel, Panu Matilainen

[-- Attachment #1: Type: text/plain, Size: 762 bytes --]

On Fri, Jul 19, 2019 at 11:00:49PM +0200, Florian Weimer wrote:
> * Dmitry V. Levin:
> 
> >> So, I don't think the code is wrong. We might want to tweak the comment
> >> a bit though, to make it less definitive?
> >
> > What I'm saying is that has_soname is just a hint which is probably even
> > less reliable than has_program_interpreter.
> 
> If I recall correctly, I added the soname check to classify
> /lib64/libc.so.6 as a library, not an executable.  So it didn't come
> completely out of nowhere.

Well, /lib64/libc.so.6 is not just a library, it's also a valid executable.

If the ELF type is ET_DYN and the object is not marked as DF_1_PIE,
could we come up with a more reliable heuristics than DT_SONAME and PT_INTERP?


-- 
ldv

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-19 21:23                 ` Dmitry V. Levin
@ 2019-07-19 21:36                   ` Mark Wielaard
  2019-07-19 22:57                     ` Dmitry V. Levin
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Wielaard @ 2019-07-19 21:36 UTC (permalink / raw)
  To: Dmitry V. Levin; +Cc: Florian Weimer, elfutils-devel, Panu Matilainen

On Sat, Jul 20, 2019 at 12:23:08AM +0300, Dmitry V. Levin wrote:
> On Fri, Jul 19, 2019 at 11:00:49PM +0200, Florian Weimer wrote:
> > * Dmitry V. Levin:
> > 
> > >> So, I don't think the code is wrong. We might want to tweak the comment
> > >> a bit though, to make it less definitive?
> > >
> > > What I'm saying is that has_soname is just a hint which is probably even
> > > less reliable than has_program_interpreter.
> > 
> > If I recall correctly, I added the soname check to classify
> > /lib64/libc.so.6 as a library, not an executable.  So it didn't come
> > completely out of nowhere.
> 
> Well, /lib64/libc.so.6 is not just a library, it's also a valid executable.
> 
> If the ELF type is ET_DYN and the object is not marked as DF_1_PIE,
> could we come up with a more reliable heuristics than DT_SONAME and PT_INTERP?

Why do you feel it is unreliable? Do you have any examples of files
misidentified? I tested a bit and --shared seems to correctly
indentify all shared libraries. I did add --program as a counterpart
to --executable if you really want to identify such "libraries" as
programs. But in general it looks like --shared and --executable come
up with the correct classification.

The only two examples I could find were the glibc and Qt binaries
which have "dual use" library/executables. And I believe --shared
corrrectly identifies them as primarily shared libraries.

Cheers,

Mark

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-19 21:36                   ` Mark Wielaard
@ 2019-07-19 22:57                     ` Dmitry V. Levin
  2019-07-20 21:51                       ` Mark Wielaard
  0 siblings, 1 reply; 36+ messages in thread
From: Dmitry V. Levin @ 2019-07-19 22:57 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: Florian Weimer, elfutils-devel, Panu Matilainen

[-- Attachment #1: Type: text/plain, Size: 1587 bytes --]

On Fri, Jul 19, 2019 at 11:36:53PM +0200, Mark Wielaard wrote:
> On Sat, Jul 20, 2019 at 12:23:08AM +0300, Dmitry V. Levin wrote:
> > On Fri, Jul 19, 2019 at 11:00:49PM +0200, Florian Weimer wrote:
> > > * Dmitry V. Levin:
> > > 
> > > >> So, I don't think the code is wrong. We might want to tweak the comment
> > > >> a bit though, to make it less definitive?
> > > >
> > > > What I'm saying is that has_soname is just a hint which is probably even
> > > > less reliable than has_program_interpreter.
> > > 
> > > If I recall correctly, I added the soname check to classify
> > > /lib64/libc.so.6 as a library, not an executable.  So it didn't come
> > > completely out of nowhere.
> > 
> > Well, /lib64/libc.so.6 is not just a library, it's also a valid executable.
> > 
> > If the ELF type is ET_DYN and the object is not marked as DF_1_PIE,
> > could we come up with a more reliable heuristics than DT_SONAME and PT_INTERP?
> 
> Why do you feel it is unreliable? Do you have any examples of files
> misidentified?

No, I don't have such examples because most (if not all) ET_DYN
non-DF_1_PIE objects we have nowadays are actually libraries regardless
of their DT_SONAME or PT_INTERP.

What actually disqualifies these objects from being libraries, besides
missing PT_DYNAMIC?

The only reason why I feel uncomfortable to rely on this has_soname check
is that DT_SONAME is so easily added unnoticed by mistake.

btw, I think it would be appropriate to move the has_dynamic check before
the first check in is_shared that returns true.


-- 
ldv

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-19 12:47       ` Mark Wielaard
  2019-07-19 13:43         ` Dmitry V. Levin
@ 2019-07-20 21:40         ` Mark Wielaard
  2019-07-22 15:55         ` Florian Weimer
  2 siblings, 0 replies; 36+ messages in thread
From: Mark Wielaard @ 2019-07-20 21:40 UTC (permalink / raw)
  To: Florian Weimer; +Cc: elfutils-devel, Panu Matilainen

[-- Attachment #1: Type: text/plain, Size: 884 bytes --]

On Fri, Jul 19, 2019 at 02:47:09PM +0200, Mark Wielaard wrote:
> The individual commits can be found here:
> https://code.wildebeest.org/git/user/mjw/elfutils/log/?h=elfclassify
> 
> Please let me know if any of this looks bad or unusual.
> 
> I'll write some testcases.

And as always when writing tests you find some corner cases :)
So the attached is some testcases plus some fixes:
- Handle files without sections (only phdrs), in particular handle
  PT_DYNAMIC directly (not through a section).
- is_loadable should check whether it isn't a debug-only file
  (but only if there are section headers, if not, it cannot be debug-only).
- For .debug_only we can also have just a .symtab (but no .debug sections).
  And any allocated section makes it not-debug-only (except for SHT_NOBITS
  or SHT_NOTE)
- Detect .zdebug sections too (old GNU compressed ELF sections).

Cheers,

Mark

[-- Attachment #2: 0001-elfclassify-Add-elfclassify-tests-and-fix-issues-fou.patch --]
[-- Type: text/x-diff, Size: 22129 bytes --]

From b389a9343193adb357500ccf018ff56495641697 Mon Sep 17 00:00:00 2001
From: Mark Wielaard <mark@klomp.org>
Date: Sat, 20 Jul 2019 23:30:35 +0200
Subject: [PATCH] elfclassify: Add elfclassify tests and fix issues found.

---
 src/elfclassify.c             | 113 +++++++-----
 tests/Makefile.am             |   7 +-
 tests/run-elfclassify-self.sh |  36 ++++
 tests/run-elfclassify.sh      | 327 ++++++++++++++++++++++++++++++++++
 4 files changed, 432 insertions(+), 51 deletions(-)
 create mode 100755 tests/run-elfclassify-self.sh
 create mode 100755 tests/run-elfclassify.sh

diff --git a/src/elfclassify.c b/src/elfclassify.c
index 83a97d47c..1df0789d2 100644
--- a/src/elfclassify.c
+++ b/src/elfclassify.c
@@ -202,7 +202,8 @@ elf_type_string (int type)
 
 static int elf_type;
 static bool has_program_load;
-static bool has_progbits_alloc;
+static bool has_sections;
+static bool has_bits_alloc;
 static bool has_program_interpreter;
 static bool has_dynamic;
 static bool has_soname;
@@ -219,7 +220,8 @@ run_classify (void)
   /* Reset to unanalyzed default.  */
   elf_type = 0;
   has_program_load = false;
-  has_progbits_alloc = false;
+  has_sections = false;
+  has_bits_alloc = false;
   has_program_interpreter = false;
   has_dynamic = false;
   has_soname = false;
@@ -247,6 +249,7 @@ run_classify (void)
   elf_type = ehdr->e_type;
 
   /* Examine program headers.  */
+  GElf_Phdr dyn_seg = {};
   {
     size_t nphdrs;
     if (elf_getphdrnum (elf, &nphdrs) != 0)
@@ -264,7 +267,10 @@ run_classify (void)
 	    return false;
 	  }
 	if (phdr->p_type == PT_DYNAMIC)
-	  has_dynamic = true;
+	  {
+	    dyn_seg = *phdr;
+	    has_dynamic = true;
+	  }
 	if (phdr->p_type == PT_INTERP)
 	  has_program_interpreter = true;
 	if (phdr->p_type == PT_LOAD)
@@ -272,7 +278,18 @@ run_classify (void)
       }
   }
 
-  Elf_Scn *dyn_section = NULL;
+  /* Do we have sections?  */
+  {
+    size_t nshdrs;
+    if (elf_getshdrnum (elf, &nshdrs) != 0)
+      {
+	elf_issue (N_("section headers"));
+	return false;
+      }
+    if (nshdrs > 0)
+      has_sections = true;
+  }
+
   {
     size_t shstrndx;
     if (unlikely (elf_getshdrstrndx (elf, &shstrndx) < 0))
@@ -303,26 +320,28 @@ run_classify (void)
         if (verbose > 2)
           fprintf (stderr, "debug: section header %s (type %d) found\n",
                    section_name, shdr->sh_type);
-        if (shdr->sh_type == SHT_DYNAMIC)
-          {
-            if (verbose > 1)
-              fputs ("debug: dynamic section found\n", stderr);
-            dyn_section = scn;
-          }
         if (shdr->sh_type == SHT_SYMTAB)
           {
             if (verbose > 1)
               fputs ("debug: symtab section found\n", stderr);
             has_symtab = true;
           }
-	if (shdr->sh_type == SHT_PROGBITS && (shdr->sh_flags & SHF_ALLOC) != 0)
+	/* NOBITS and NOTE sections can be in any file.  We want to be
+	   sure there is at least one other allocated section.  */
+	if (shdr->sh_type != SHT_NOBITS
+	    && shdr->sh_type != SHT_NOTE
+	    && (shdr->sh_flags & SHF_ALLOC) != 0)
 	  {
-	    if (verbose > 1 && !has_progbits_alloc)
-	      fputs ("debug: allocated PROGBITS section found\n", stderr);
-	    has_progbits_alloc = true;
+	    if (verbose > 1 && !has_bits_alloc)
+	      fputs ("debug: allocated (non-nobits/note) section found\n",
+		     stderr);
+	    has_bits_alloc = true;
 	  }
         const char *debug_prefix = ".debug_";
-        if (strncmp (section_name, debug_prefix, strlen (debug_prefix)) == 0)
+        const char *zdebug_prefix = ".zdebug_";
+        if (strncmp (section_name, debug_prefix, strlen (debug_prefix)) == 0
+	    || strncmp (section_name, zdebug_prefix,
+			strlen (zdebug_prefix)) == 0)
           {
             if (verbose > 1 && !has_debug_sections)
               fputs ("debug: .debug_* section found\n", stderr);
@@ -347,34 +366,29 @@ run_classify (void)
   /* Examine the dynamic section.  */
   if (has_dynamic)
     {
-      if (dyn_section != NULL)
-        {
-          Elf_Data *data = elf_getdata (dyn_section, NULL);
-          if (verbose > 2)
-            fprintf (stderr, "debug: Elf_Data for dynamic section: %p\n",
-                     data);
-
-          if (data != NULL)
-            for (int dyn_idx = 0; ; ++dyn_idx)
-              {
-                GElf_Dyn dyn_storage;
-                GElf_Dyn *dyn = gelf_getdyn (data, dyn_idx, &dyn_storage);
-                if (dyn == NULL)
-                  break;
-                if (verbose > 2)
-                  fprintf (stderr, "debug: dynamic entry %d"
-                           " with tag %llu found\n",
-                           dyn_idx, (unsigned long long int) dyn->d_tag);
-                if (dyn->d_tag == DT_SONAME)
-                  has_soname = true;
-                if (dyn->d_tag == DT_FLAGS_1 && (dyn->d_un.d_val & DF_1_PIE))
-                  has_pie_flag = true;
-                if (dyn->d_tag == DT_DEBUG)
-                  has_dt_debug = true;
-                if (dyn->d_tag == DT_NULL)
-                  break;
-              }
-        }
+      Elf_Data *data = elf_getdata_rawchunk (elf, dyn_seg.p_offset,
+					     dyn_seg.p_filesz,
+					     ELF_T_DYN);
+      if (data != NULL)
+	for (int dyn_idx = 0; ; ++dyn_idx)
+	  {
+	    GElf_Dyn dyn_storage;
+	    GElf_Dyn *dyn = gelf_getdyn (data, dyn_idx, &dyn_storage);
+	    if (dyn == NULL)
+	      break;
+	    if (verbose > 2)
+	      fprintf (stderr, "debug: dynamic entry %d"
+		       " with tag %llu found\n",
+		       dyn_idx, (unsigned long long int) dyn->d_tag);
+	    if (dyn->d_tag == DT_SONAME)
+	      has_soname = true;
+	    if (dyn->d_tag == DT_FLAGS_1 && (dyn->d_un.d_val & DF_1_PIE))
+	      has_pie_flag = true;
+	    if (dyn->d_tag == DT_DEBUG)
+	      has_dt_debug = true;
+	    if (dyn->d_tag == DT_NULL)
+	      break;
+	  }
     }
 
   if (verbose > 0)
@@ -383,8 +397,10 @@ run_classify (void)
 	       elf_type_string (elf_type), elf_type);
       if (has_program_load)
         fprintf (stderr, "info: %s: PT_LOAD found\n", current_path);
-      if (has_progbits_alloc)
-	fprintf (stderr, "info: %s: allocated PROGBITS section found\n",
+      if (has_sections)
+	fprintf (stderr, "info: %s: has sections\n", current_path);
+      if (has_bits_alloc)
+	fprintf (stderr, "info: %s: allocated (real) section found\n",
 		 current_path);
       if (has_program_interpreter)
         fprintf (stderr, "info: %s: program interpreter found\n",
@@ -445,7 +461,8 @@ is_loadable (void)
 {
   return elf_kind (elf) == ELF_K_ELF
     && (elf_type == ET_EXEC || elf_type == ET_DYN)
-    && has_program_load;
+    && has_program_load
+    && (!has_sections || has_bits_alloc); /* It isn't debug-only.  */
 }
 
 /* Return true if the file is an ELF file which has a symbol table or
@@ -467,8 +484,8 @@ is_debug_only (void)
 {
   return elf_kind (elf) != ELF_K_NONE
     && (elf_type == ET_REL || elf_type == ET_EXEC || elf_type == ET_DYN)
-    && has_debug_sections
-    && !has_progbits_alloc;
+    && (has_debug_sections || has_symtab)
+    && !has_bits_alloc;
 }
 
 static bool
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 0ac353152..2ff7dfc46 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -164,7 +164,8 @@ TESTS = run-arextract.sh run-arsymtest.sh run-ar.sh newfile test-nlist \
 	run-typeiter-many.sh run-strip-test-many.sh \
 	run-strip-version.sh run-xlate-note.sh \
 	run-readelf-discr.sh \
-	run-dwelf_elf_e_machine_string.sh
+	run-dwelf_elf_e_machine_string.sh \
+	run-elfclassify.sh run-elfclassify-self.sh
 
 if !BIARCH
 export ELFUTILS_DISABLE_BIARCH = 1
@@ -435,8 +436,8 @@ EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh \
 	     run-xlate-note.sh \
 	     run-readelf-discr.sh \
 	     testfile-rng.debug.bz2 testfile-urng.debug.bz2 \
-	     run-dwelf_elf_e_machine_string.sh
-
+	     run-dwelf_elf_e_machine_string.sh \
+	     run-elfclassify.sh run-elfclassify-self.sh
 
 if USE_VALGRIND
 valgrind_cmd='valgrind -q --leak-check=full --error-exitcode=1'
diff --git a/tests/run-elfclassify-self.sh b/tests/run-elfclassify-self.sh
new file mode 100755
index 000000000..c48ab9c93
--- /dev/null
+++ b/tests/run-elfclassify-self.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+# Copyright (C) 2019 Red Hat, Inc.
+# This file is part of elfutils.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# elfutils is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. $srcdir/test-subr.sh
+
+testrun_on_self ${abs_top_builddir}/src/elfclassify --elf-file
+testrun_on_self ${abs_top_builddir}/src/elfclassify --not-core
+testrun_on_self ${abs_top_builddir}/src/elfclassify --unstripped
+testrun_on_self ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module
+
+testrun_on_self_lib ${abs_top_builddir}/src/elfclassify --shared
+testrun_on_self_lib ${abs_top_builddir}/src/elfclassify --loadable
+testrun_on_self_lib ${abs_top_builddir}/src/elfclassify --not-executable
+testrun_on_self_lib ${abs_top_builddir}/src/elfclassify --not-program
+
+testrun_on_self_exe ${abs_top_builddir}/src/elfclassify --executable
+testrun_on_self_exe ${abs_top_builddir}/src/elfclassify --program
+testrun_on_self_exe ${abs_top_builddir}/src/elfclassify --loadable
+testrun_on_self_exe ${abs_top_builddir}/src/elfclassify --not-shared
+
+testrun ${abs_top_builddir}/src/elfclassify --not-shared $self_test_files_obj
+testrun ${abs_top_builddir}/src/elfclassify --not-executable $self_test_files_obj
diff --git a/tests/run-elfclassify.sh b/tests/run-elfclassify.sh
new file mode 100755
index 000000000..5a849bbdd
--- /dev/null
+++ b/tests/run-elfclassify.sh
@@ -0,0 +1,327 @@
+#!/bin/sh
+# Copyright (C) 2019 Red Hat, Inc.
+# This file is part of elfutils.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# elfutils is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. $srcdir/test-subr.sh
+
+core_files=\
+"testfile_aarch64_core \
+ testfile-backtrace-demangle.core \
+ testfiledwarfinlines.core \
+ testfile_i686_core \
+ testfile-m68k-core \
+ testfile-riscv64-core \
+ backtrace.aarch64.core \
+ backtrace.i386.core \
+ backtrace.ppc.core \
+ backtrace.s390.core"
+
+testfiles $core_files
+
+echo "elfclassify --core"
+testrun ${abs_top_builddir}/src/elfclassify --core $core_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --core --print $core_files <<EOF
+$(echo $core_files | sed -e "s/ /\n/g")
+EOF
+
+echo "core files are not programs"
+testrun ${abs_top_builddir}/src/elfclassify --not-program $core_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-program --print $core_files <<EOF
+$(echo $core_files | sed -e "s/ /\n/g")
+EOF
+
+echo "core files are not shared"
+testrun ${abs_top_builddir}/src/elfclassify --not-shared $core_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-shared --print $core_files <<EOF
+$(echo $core_files | sed -e "s/ /\n/g")
+EOF
+
+echo "core files are not kernel-modules"
+testrun ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module $core_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module --print $core_files <<EOF
+$(echo $core_files | sed -e "s/ /\n/g")
+EOF
+
+echo "core files are not debug-only"
+testrun ${abs_top_builddir}/src/elfclassify --not-debug-only $core_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-debug-only --print $core_files <<EOF
+$(echo $core_files | sed -e "s/ /\n/g")
+EOF
+
+object_files=\
+"debug-ranges-no-lowpc.o \
+ testfile-annobingroup-i386.o \
+ testfile-bpf-dis1.o \
+ testfile-debug-rel-g.o \
+ testfile-gnu-property-note.o"
+
+testfiles $object_files
+
+echo "elfclassify --elf-file"
+testrun ${abs_top_builddir}/src/elfclassify --elf-file $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --elf-file --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+echo "object files are not archives"
+testrun ${abs_top_builddir}/src/elfclassify --not-elf-archive $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-elf-archive --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+echo "object files are not core files"
+testrun ${abs_top_builddir}/src/elfclassify --not-core $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-core --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+echo "object files are not program files"
+testrun ${abs_top_builddir}/src/elfclassify --not-program $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-program --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+echo "object files are not shared files"
+testrun ${abs_top_builddir}/src/elfclassify --not-shared $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-shared --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+echo "object files are not kernel modules"
+testrun ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+echo "object files are not debug-only files"
+testrun ${abs_top_builddir}/src/elfclassify --not-debug-only $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-debug-only --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+ar_files="testarchive64.a"
+
+testfiles $ar_files
+
+echo "elfclassify --elf-archive"
+testrun ${abs_top_builddir}/src/elfclassify --elf-archive $ar_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --elf-archive --print $ar_files <<EOF
+$(echo $ar_files | sed -e "s/ /\n/g")
+EOF
+
+echo "archives are not elf-files"
+testrun ${abs_top_builddir}/src/elfclassify --not-elf-file $ar_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-elf-file --print $ar_files <<EOF
+$(echo $ar_files | sed -e "s/ /\n/g")
+EOF
+
+echo "archives are not core files"
+testrun ${abs_top_builddir}/src/elfclassify --not-core $ar_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-core --print $ar_files <<EOF
+$(echo $ar_files | sed -e "s/ /\n/g")
+EOF
+
+echo "archives are not program files"
+testrun ${abs_top_builddir}/src/elfclassify --not-program $ar_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-program --print $ar_files <<EOF
+$(echo $ar_files | sed -e "s/ /\n/g")
+EOF
+
+echo "archives are not shared files"
+testrun ${abs_top_builddir}/src/elfclassify --not-shared $ar_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-shared --print $ar_files <<EOF
+$(echo $ar_files | sed -e "s/ /\n/g")
+EOF
+
+lib_files=\
+"testfile52-32.noshdrs.so \
+ libtestfile_multi_shared.so \
+ testfile52-32.prelink.so \
+ testfile52-32.so
+ testfile54-64.noshdrs.so \
+ testfile54-64.prelink.so \
+ testfile54-64.so \
+ testlib_dynseg.so"
+
+testfiles $lib_files
+
+echo "elfclassify --shared"
+testrun ${abs_top_builddir}/src/elfclassify --shared $lib_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --shared --print $lib_files <<EOF
+$(echo $lib_files | sed -e "s/ /\n/g")
+EOF
+
+echo "shared files are loadable"
+testrun ${abs_top_builddir}/src/elfclassify --loadable $lib_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --loadable --print $lib_files <<EOF
+$(echo $lib_files | sed -e "s/ /\n/g")
+EOF
+
+echo "shared files are not executables"
+testrun ${abs_top_builddir}/src/elfclassify --not-executable $lib_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-executable --print $lib_files <<EOF
+$(echo $lib_files | sed -e "s/ /\n/g")
+EOF
+
+echo "shared files are not debug-only"
+testrun ${abs_top_builddir}/src/elfclassify --not-debug-only $lib_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-debug-only --print $lib_files <<EOF
+$(echo $lib_files | sed -e "s/ /\n/g")
+EOF
+
+echo "shared files are not kernel modules"
+testrun ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module $lib_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module --print $lib_files <<EOF
+$(echo $lib_files | sed -e "s/ /\n/g")
+EOF
+
+exe_files=\
+"backtrace.aarch64.exec \
+ backtrace.i386.exec \
+ backtrace.ppc.exec \
+ backtrace.s390x.exec \
+ testfile70.exec \
+ test-offset-loop \
+ testfilebaztab \
+ testfilebaztabppc64"
+
+testfiles $exe_files
+
+echo "elfclassify --program"
+testrun ${abs_top_builddir}/src/elfclassify --program $exe_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --program --print $exe_files <<EOF
+$(echo $exe_files | sed -e "s/ /\n/g")
+EOF
+
+echo "programs are executables (in this case)"
+testrun ${abs_top_builddir}/src/elfclassify --executable $exe_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --executable --print $exe_files <<EOF
+$(echo $exe_files | sed -e "s/ /\n/g")
+EOF
+
+echo "programs are not shared libraries (in this case)"
+testrun ${abs_top_builddir}/src/elfclassify --not-shared $exe_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-shared --print $exe_files <<EOF
+$(echo $exe_files | sed -e "s/ /\n/g")
+EOF
+
+echo "programs are not kernel-modules"
+testrun ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module $exe_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module --print $exe_files <<EOF
+$(echo $exe_files | sed -e "s/ /\n/g")
+EOF
+
+echo "programs are not debug-only"
+testrun ${abs_top_builddir}/src/elfclassify --not-debug-only $exe_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-debug-only --print $exe_files <<EOF
+$(echo $exe_files | sed -e "s/ /\n/g")
+EOF
+
+kmod_files=\
+"hello_aarch64.ko \
+ hello_csky.ko \
+ hello_i386.ko \
+ hello_m68k.ko \
+ hello_ppc64.ko \
+ hello_riscv64.ko \
+ hello_s390.ko \
+ hello_x86_64.ko"
+
+testfiles $kmod_files
+
+echo "elfclassify --linux-kernel-module"
+testrun ${abs_top_builddir}/src/elfclassify --linux-kernel-module $kmod_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --linux-kernel-module --print $kmod_files <<EOF
+$(echo $kmod_files | sed -e "s/ /\n/g")
+EOF
+
+echo "kmods are unstripped"
+testrun ${abs_top_builddir}/src/elfclassify --unstripped $kmod_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --unstripped --print $kmod_files <<EOF
+$(echo $kmod_files | sed -e "s/ /\n/g")
+EOF
+
+echo "kmods are not debug-only"
+testrun ${abs_top_builddir}/src/elfclassify --not-debug-only $kmod_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-debug-only --print $kmod_files <<EOF
+$(echo $kmod_files | sed -e "s/ /\n/g")
+EOF
+
+echo "kmods are not loabable (in the normal sense)"
+testrun ${abs_top_builddir}/src/elfclassify --not-loadable $kmod_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-loadable --print $kmod_files <<EOF
+$(echo $kmod_files | sed -e "s/ /\n/g")
+EOF
+
+echo "gnu compressed kmods are unstripped"
+${abs_top_builddir}/src/elfcompress -t gnu --force $kmod_files
+testrun ${abs_top_builddir}/src/elfclassify --unstripped $kmod_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --unstripped --print $kmod_files <<EOF
+$(echo $kmod_files | sed -e "s/ /\n/g")
+EOF
+
+debug_files=\
+"testfile15.debug \
+ testfile35.debug \
+ testfile40.debug \
+ testfile48.debug \
+ testfile53-32.debug \
+ testfile53-64.debug \
+ testfilebazdbg.debug \
+ testfilebazdbgppc64.debug \
+ addrx_constx-4.dwo \
+ addrx_constx-5.dwo"
+
+testfiles $debug_files
+
+echo "elfclassify --debug-only"
+testrun ${abs_top_builddir}/src/elfclassify --debug-only $debug_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --debug-only --print $debug_files <<EOF
+$(echo $debug_files | sed -e "s/ /\n/g")
+EOF
+
+echo "debug-only files are unstripped"
+testrun ${abs_top_builddir}/src/elfclassify --unstripped $debug_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --unstripped --print $debug_files <<EOF
+$(echo $debug_files | sed -e "s/ /\n/g")
+EOF
+
+echo "debug-only files are not programs"
+testrun ${abs_top_builddir}/src/elfclassify --not-program $debug_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-program --print $debug_files <<EOF
+$(echo $debug_files | sed -e "s/ /\n/g")
+EOF
+
+echo "debug-only files are not shared"
+testrun ${abs_top_builddir}/src/elfclassify --not-shared $debug_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-shared --print $debug_files <<EOF
+$(echo $debug_files | sed -e "s/ /\n/g")
+EOF
+
+echo "compress the debug sections and try again"
+${abs_top_builddir}/src/elfcompress -t gnu --force $debug_files
+
+echo "again unstripped"
+testrun ${abs_top_builddir}/src/elfclassify --unstripped $debug_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --unstripped --print $debug_files <<EOF
+$(echo $debug_files | sed -e "s/ /\n/g")
+EOF
+
+echo "again debug-only"
+testrun ${abs_top_builddir}/src/elfclassify --debug-only $debug_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --debug-only --print $debug_files <<EOF
+$(echo $debug_files | sed -e "s/ /\n/g")
+EOF
-- 
2.20.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-19 22:57                     ` Dmitry V. Levin
@ 2019-07-20 21:51                       ` Mark Wielaard
  2019-07-25 22:39                         ` [PATCH] elfclassify: Add --library classification Mark Wielaard
  2019-07-26 23:04                         ` [PATCH] elfclassify tool Dmitry V. Levin
  0 siblings, 2 replies; 36+ messages in thread
From: Mark Wielaard @ 2019-07-20 21:51 UTC (permalink / raw)
  To: Dmitry V. Levin; +Cc: Florian Weimer, elfutils-devel, Panu Matilainen

On Sat, Jul 20, 2019 at 01:57:27AM +0300, Dmitry V. Levin wrote:
> On Fri, Jul 19, 2019 at 11:36:53PM +0200, Mark Wielaard wrote:
> > > If the ELF type is ET_DYN and the object is not marked as DF_1_PIE,
> > > could we come up with a more reliable heuristics than DT_SONAME and PT_INTERP?
> > 
> > Why do you feel it is unreliable? Do you have any examples of files
> > misidentified?
> 
> No, I don't have such examples because most (if not all) ET_DYN
> non-DF_1_PIE objects we have nowadays are actually libraries regardless
> of their DT_SONAME or PT_INTERP.
> 
> What actually disqualifies these objects from being libraries, besides
> missing PT_DYNAMIC?

The goal here was to have a classification specifically to handle things
like glibc or Qt executable.so shared libraries. So that --shared would
make them as shared libraries primarily (and so --executable would not
mark them as programs). This is because for shared libraries you might
want to do some processing you don't want for executable (or the other
way around). Think about deciding whether or not to keep the .symtab.

Maybe you are looking for another goal/classification?  For example I
added --program which does classify those special files as programs
(even though --shared also says they are shared libraries). Maybe you
are looking for a different classification similar/dual to that. Say
--library?

> The only reason why I feel uncomfortable to rely on this has_soname check
> is that DT_SONAME is so easily added unnoticed by mistake.

But it isn't used in isolation to mark something as --shared.

> btw, I think it would be appropriate to move the has_dynamic check before
> the first check in is_shared that returns true.

Yes, that is probably fine, but does it really matter?
Florian, what do you think?

Cheers,

Mark

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-19 12:47       ` Mark Wielaard
  2019-07-19 13:43         ` Dmitry V. Levin
  2019-07-20 21:40         ` Mark Wielaard
@ 2019-07-22 15:55         ` Florian Weimer
  2019-07-26 22:11           ` Mark Wielaard
  2 siblings, 1 reply; 36+ messages in thread
From: Florian Weimer @ 2019-07-22 15:55 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: elfutils-devel, Panu Matilainen

* Mark Wielaard:

> On Thu, 2019-04-18 at 13:17 +0200, Florian Weimer wrote:
>> * Florian Weimer:
>> 
>> > > BTW. Florian, the extra options are certainly not required for you to
>> > > implement to get eu-elfclassify accepted. They are just suggestions,
>> > > which we might decide not to do/add. Or they can be added by others if
>> > > they think they are useful.
>> > 
>> > Understood.  I would rather fix the command line syntax as a priority,
>> > implement --unstripped, and add a test suite.
>> 
>> The patch below, also available here:
>> 
>>   <https://pagure.io/fweimer/elfutils/commits/elfclassify>
>> 
>> reworks the command line parser, implements filtering of file lists, and
>> adds the --unstripped option.
>
> That looks really good. I went ahead an fixed a couple of nits and
> added some of my suggestions. I'll respond to your other email
> explaining some of my reasoning. The changes I made are:

Wow, this is much more than I expected.  Thanks!

>   elfclassify: Fix bre -> be typo in "unstripped" option help text.
>   elfclassify: When reading stdin make sure paths don't include newline.
>   elfclassify: Allow inspecting compressed or (kernel) image files with -z.
>   elfclassify: Always clean up ELF file and descriptor if one is still open.
>   elfclassify: Don't treat errors in elf_open or run_classify as fatal.
>   elfclassify: Add --quiet/-q to suppress error messages.
>   elfclassify: Add \n to fputs debug output.
>   elfclassify: Add --file/-f for testing just regular files.
>   elfclassify: Require --elf by default. Add more classifications.
>   elfclassify: Add elf_kind and elf_type strings for verbose output.
>   elfclassify: Require PT_LOAD for loadable classification.
>   elfclassify: Add --program classification.
>   elfclassify: Don't use ARGP_NO_EXIT and document exit code expectation.
>   elfclassify: Add --linux-kernel-module classification.
>   elfclassify: Add --debug-only classification.

I went through these patches, albeit in a somewhat cursory fashion, and
they look okay to me.

Do you think this is enough to port over RPM's find-debuginfo.sh?

Thanks,
Florian

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH] elfclassify: Add --library classification.
  2019-07-20 21:51                       ` Mark Wielaard
@ 2019-07-25 22:39                         ` Mark Wielaard
  2019-07-26 22:53                           ` Dmitry V. Levin
  2019-07-26 23:04                         ` [PATCH] elfclassify tool Dmitry V. Levin
  1 sibling, 1 reply; 36+ messages in thread
From: Mark Wielaard @ 2019-07-25 22:39 UTC (permalink / raw)
  To: elfutils-devel
  Cc: Dmitry V. Levin, Florian Weimer, Panu Matilainen, Mark Wielaard

> Maybe you are looking for another goal/classification?  For example I
> added --program which does classify those special files as programs
> (even though --shared also says they are shared libraries). Maybe you
> are looking for a different classification similar/dual to that. Say
> --library?

This patch implements this and updates the --help text to better explain
the differenes between --loadable, --shared/--executable and
--program/--library.

Does this look reasonable?

Cheers,

Mark
---
 src/elfclassify.c | 59 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 50 insertions(+), 9 deletions(-)

diff --git a/src/elfclassify.c b/src/elfclassify.c
index 1df0789d..ebd42c1d 100644
--- a/src/elfclassify.c
+++ b/src/elfclassify.c
@@ -583,6 +583,30 @@ is_program (void)
   return false;
 }
 
+/* Like is_shared but the library could also be an executable.  */
+static bool
+is_library  (void)
+{
+  /* Only ET_DYN can be shared libraries.  */
+  if (elf_type != ET_DYN)
+    return false;
+
+  if (!is_loadable ())
+    return false;
+
+  /* Without a PT_DYNAMIC segment the library cannot be loaded.  */
+  if (!has_dynamic)
+    return false;
+
+  /* This really is a (PIE) executable.  See is_shared.  */
+  if (has_pie_flag || has_dt_debug)
+    return false;
+
+  /* It could still (also) be a (PIE) executable, but most likely you
+     can dlopen it just fine.  */
+  return true;
+}
+
 /* Returns true if the file is a linux kernel module (is ET_REL and
    has the two magic sections .modinfo and .gnu.linkonce.this_module).  */
 static bool
@@ -606,6 +630,7 @@ enum classify_check
   classify_executable,
   classify_program,
   classify_shared,
+  classify_library,
   classify_linux_kernel_module,
   classify_debug_only,
   classify_loadable,
@@ -733,6 +758,7 @@ process_current_path (int *status)
 	 [classify_executable] = is_executable (),
 	 [classify_program] = is_program (),
 	 [classify_shared] = is_shared (),
+	 [classify_library] = is_library (),
 	 [classify_linux_kernel_module] = is_linux_kernel_module (),
 	 [classify_debug_only] = is_debug_only (),
 	 [classify_loadable] = is_loadable (),
@@ -756,6 +782,8 @@ process_current_path (int *status)
             fprintf (stderr, "debug: %s: program\n", current_path);
           if (checks[classify_shared])
             fprintf (stderr, "debug: %s: shared\n", current_path);
+          if (checks[classify_library])
+            fprintf (stderr, "debug: %s: library\n", current_path);
 	  if (checks[classify_linux_kernel_module])
 	    fprintf (stderr, "debug: %s: linux kernel module\n", current_path);
 	  if (checks[classify_debug_only])
@@ -853,7 +881,7 @@ main (int argc, char **argv)
         N_("File looks like an ELF object or archive/static library (default)")
 	, 1 },
       { "elf-file", classify_check_offset + classify_elf_file, NULL, 0,
-        N_("File is an regular ELF object (not an archive/static library")
+        N_("File is an regular ELF object (not an archive/static library)")
 	, 1 },
       { "elf-archive", classify_check_offset + classify_elf_archive, NULL, 0,
         N_("File is an ELF archive or static library")
@@ -865,13 +893,17 @@ main (int argc, char **argv)
         N_("File is an ELF file with symbol table or .debug_* sections \
 and can be stripped further"), 1 },
       { "executable", classify_check_offset + classify_executable, NULL, 0,
-        N_("File is an ELF program executable \
-(and not also a shared library)"), 1 },
+        N_("File is (primarily) an ELF program executable \
+(not primarily a DSO)"), 1 },
       { "program", classify_check_offset + classify_program, NULL, 0,
         N_("File is an ELF program executable \
-(might also be a shared library)"), 1 },
+(might also be a DSO)"), 1 },
       { "shared", classify_check_offset + classify_shared, NULL, 0,
-        N_("File is an ELF shared object (DSO)"), 1 },
+        N_("File is (primarily) an ELF shared object (DSO) \
+(not primarily an executable)"), 1 },
+      { "library", classify_check_offset + classify_library, NULL, 0,
+        N_("File is an ELF shared object (DSO) \
+(might also be an executable)"), 1 },
       { "linux-kernel-module", (classify_check_offset
 				+ classify_linux_kernel_module), NULL, 0,
         N_("File is a linux kernel module"), 1 },
@@ -898,6 +930,8 @@ and can be stripped further"), 1 },
         NULL, OPTION_HIDDEN, NULL, 1 },
       { "not-shared", classify_check_not_offset + classify_shared,
         NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-library", classify_check_not_offset + classify_library,
+        NULL, OPTION_HIDDEN, NULL, 1 },
       { "not-linux-kernel-module", (classify_check_not_offset
 				    + classify_linux_kernel_module),
         NULL, OPTION_HIDDEN, NULL, 1 },
@@ -956,10 +990,17 @@ particular file.  Classification options can be negated using a \
 Since modern ELF does not clearly distinguish between programs and \
 dynamic shared objects, you should normally use either --executable or \
 --shared to identify the primary purpose of a file.  \
-Only one of the --shared and --executable checks can pass for a file.  \
-If you want to know whether an ELF object might be both a program and a \
-shared library at the same time use --program --shared.  \
---executable is effectively the same as --program --not-shared.\
+Only one of the --shared and --executable checks can pass for a file.\
+\n\n\
+If you want to know whether an ELF object might a program or a \
+shared library (but could be both), then use --program or --library. \
+Some ELF files will classify as both a program and a library.\
+\n\n\
+If you just want to know whether an ELF file is loadable (as program \
+or library) use --loadable.  Note that files that only contain \
+(separate) debug information (--debug-only) are never --loadable (even \
+though they might contain program headers).  Linux kernel modules are \
+also not --loadable (in the normal sense).\
 \n\n\
 Without any of the --print options, the program exits with status 0 \
 if the requested checks pass for all input files, with 1 if a check \
-- 
2.18.1

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-22 15:55         ` Florian Weimer
@ 2019-07-26 22:11           ` Mark Wielaard
  2019-07-29  8:44             ` Florian Weimer
                               ` (2 more replies)
  0 siblings, 3 replies; 36+ messages in thread
From: Mark Wielaard @ 2019-07-26 22:11 UTC (permalink / raw)
  To: Florian Weimer; +Cc: elfutils-devel, Panu Matilainen

[-- Attachment #1: Type: text/plain, Size: 651 bytes --]

Hi,

On Mon, Jul 22, 2019 at 05:54:57PM +0200, Florian Weimer wrote:
> I went through these patches, albeit in a somewhat cursory fashion, and
> they look okay to me.
> 
> Do you think this is enough to port over RPM's find-debuginfo.sh?

Yes, I think this would make it possible to drop reliance on 'file'
and replace it with more precise eu-elfclassify classifications.

I squashed your and mine commits together for a final commit as
attached. Could you look over it and see if it looks OK?  If so, I
would like to add your Signed-off-by if you agree with the Developer's
Certificate of Origin as expressed in our CONTRIBUTING file.

Thanks,

Mark

[-- Attachment #2: 0001-elfclassify-New-tool-to-analyze-ELF-objects.patch --]
[-- Type: text/x-diff, Size: 53303 bytes --]

From 990f0cf28d2bc50837172831f7b3c2bafe272265 Mon Sep 17 00:00:00 2001
From: Florian Weimer <fweimer@redhat.com>
Date: Thu, 11 Apr 2019 18:07:20 +0200
Subject: [PATCH] elfclassify: New tool to analyze ELF objects.

Usage: eu-elfclassify [OPTION...] FILE...
Determine the type of an ELF file.

All of the classification options must apply at the same time to a
particular file.  Classification options can be negated using a
"--not-" prefix.

Since modern ELF does not clearly distinguish between programs and
dynamic shared objects, you should normally use either --executable or
--shared to identify the primary purpose of a file.  Only one of the
--shared and --executable checks can pass for a file.

If you want to know whether an ELF object might a program or a shared
library (but could be both), then use --program or --library. Some ELF
files will classify as both a program and a library.

If you just want to know whether an ELF file is loadable (as program
or library) use --loadable.  Note that files that only contain
(separate) debug information (--debug-only) are never --loadable (even
though they might contain program headers).  Linux kernel modules are
also not --loadable (in the normal sense).

Without any of the --print options, the program exits with status 0 if
the requested checks pass for all input files, with 1 if a check fails
for any file, and 2 if there is an environmental issue (such as a file
read error or a memory allocation error).

When printing file names, the program exits with status 0 even if no
file names are printed, and exits with status 2 if there is an
environmental issue.

On usage error (e.g. a bad option was given), the program exits with a
status code larger than 2.

The --quiet or -q option suppresses some error warning output, but
doesn't change the exit status.

 Classification options
  --core                 File is an ELF core dump file
  --debug-only           File is a debug only ELF file (separate .debug,
                         .dwo or dwz multi-file)
  --elf                  File looks like an ELF object or archive/static
                         library (default)
  --elf-archive          File is an ELF archive or static library
  --elf-file             File is an regular ELF object (not an
                         archive/static library)
  --executable           File is (primarily) an ELF program executable (not
                         primarily a DSO)
  --library              File is an ELF shared object (DSO) (might also be
                         an executable)
  --linux-kernel-module  File is a linux kernel module
  --loadable             File is a loadable ELF object (program or shared
                         object)
  --program              File is an ELF program executable (might also be a
                         DSO)
  --shared               File is (primarily) an ELF shared object (DSO)
                         (not primarily an executable)
  --unstripped           File is an ELF file with symbol table or .debug_*
                         sections and can be stripped further

 Input flags
  -f, --file             Only classify regular (not symlink nor special
                         device) files
      --no-stdin         Do not read files from standard input (default)
      --stdin            Also read file names to process from standard
                         input, separated by newlines
      --stdin0           Also read file names to process from standard
                         input, separated by ASCII NUL bytes
  -z, --compressed       Try to open compressed files or embedded (kernel)
                         ELF images

 Output flags
  --matching             If printing file names, print matching files
                         (default)
  --no-print             Do not output file names
  --not-matching         If printing file names, print files that do not
                         match
  --print                Output names of files, separated by newline
  --print0               Output names of files, separated by ASCII NUL

 Additional flags
  -q, --quiet            Suppress some error output (counterpart to
                         --verbose)
  -v, --verbose          Output additional information (can be specified
                         multiple times)

  -?, --help             Give this help list
      --usage            Give a short usage message
  -V, --version          Print program version

Signed-off-by: Mark Wielaard <mark@klomp.org>
---
 src/ChangeLog                 |    9 +-
 src/Makefile.am               |    4 +-
 src/elfclassify.c             | 1047 +++++++++++++++++++++++++++++++++
 tests/ChangeLog               |    9 +
 tests/Makefile.am             |    7 +-
 tests/run-elfclassify-self.sh |   36 ++
 tests/run-elfclassify.sh      |  327 ++++++++++
 7 files changed, 1434 insertions(+), 5 deletions(-)
 create mode 100644 src/elfclassify.c
 create mode 100755 tests/run-elfclassify-self.sh
 create mode 100755 tests/run-elfclassify.sh

diff --git a/src/ChangeLog b/src/ChangeLog
index 911ad26f..c2102fcd 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,4 +1,11 @@
-2019-07-13 Mao Han <han_mao@c-sky.com>
+2019-07-26  Florian Weimer  <fweimer@redhat.com>
+	    Mark Wielaard  <mark@klomp.org>
+
+	* Makefile.am (bin_PROGRAMS): Add elfclassify.
+	(elfclassify_LDADD): New variable.
+	* elfclassify.c: New tool.
+
+2019-07-13  Mao Han  <han_mao@c-sky.com>
 
 	* elflint.c: Add C-SKY.
 
diff --git a/src/Makefile.am b/src/Makefile.am
index 2b1c0dcb..69ac4dbe 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -26,7 +26,8 @@ AM_CPPFLAGS += -I$(srcdir)/../libelf -I$(srcdir)/../libebl \
 AM_LDFLAGS = -Wl,-rpath-link,../libelf:../libdw
 
 bin_PROGRAMS = readelf nm size strip elflint findtextrel addr2line \
-	       elfcmp objdump ranlib strings ar unstrip stack elfcompress
+	       elfcmp objdump ranlib strings ar unstrip stack elfcompress \
+	       elfclassify
 
 noinst_LIBRARIES = libar.a
 
@@ -83,6 +84,7 @@ ar_LDADD = libar.a $(libelf) $(libeu) $(argp_LDADD)
 unstrip_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD) -ldl
 stack_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD) -ldl $(demanglelib)
 elfcompress_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD)
+elfclassify_LDADD = $(libelf) $(libdw) $(libeu) $(argp_LDADD)
 
 installcheck-binPROGRAMS: $(bin_PROGRAMS)
 	bad=0; pid=$$$$; list="$(bin_PROGRAMS)"; for p in $$list; do \
diff --git a/src/elfclassify.c b/src/elfclassify.c
new file mode 100644
index 00000000..03655aea
--- /dev/null
+++ b/src/elfclassify.c
@@ -0,0 +1,1047 @@
+/* Classification of ELF files.
+   Copyright (C) 2019 Red Hat, Inc.
+   This file is part of elfutils.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#include <argp.h>
+#include <error.h>
+#include <fcntl.h>
+#include <gelf.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include ELFUTILS_HEADER(elf)
+#include ELFUTILS_HEADER(dwelf)
+#include "printversion.h"
+
+/* Name and version of program.  */
+ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
+
+/* Bug report address.  */
+ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
+
+/* Set by parse_opt.  */
+static int verbose;
+
+/* Set by the main function.  */
+static const char *current_path;
+
+/* Set by open_file.  */
+static int file_fd = -1;
+
+/* Set by issue or elf_issue.  */
+static bool issue_found;
+
+/* Non-fatal issue occured while processing the current_path.  */
+static void
+issue (int e, const char *msg)
+{
+  if (verbose >= 0)
+    {
+      if (current_path == NULL)
+	error (0, e, "%s", msg);
+      else
+	error (0, e, "%s '%s'", msg, current_path);
+    }
+  issue_found = true;
+}
+
+/* Non-fatal issue occured while processing the current ELF.  */
+static void
+elf_issue (const char *msg)
+{
+  if (verbose >= 0)
+    error (0, 0, "%s: %s: '%s'", msg, elf_errmsg (-1), current_path);
+  issue_found = true;
+}
+
+/* Set by parse_opt.  */
+static bool flag_only_regular_files;
+
+static bool
+open_file (void)
+{
+  if (verbose > 1)
+    fprintf (stderr, "debug: processing file: %s\n", current_path);
+
+  file_fd = open (current_path, O_RDONLY | (flag_only_regular_files
+					    ? O_NOFOLLOW : 0));
+  if (file_fd < 0)
+    {
+      if (!flag_only_regular_files || errno != ELOOP)
+	issue (errno, N_("opening"));
+      return false;
+    }
+
+  struct stat st;
+  if (fstat (file_fd, &st) != 0)
+    {
+      issue (errno, N_("reading"));
+      return false;
+    }
+
+  /* Don't even bother with directories.  */
+  if (S_ISDIR (st.st_mode)
+      || (flag_only_regular_files && !S_ISREG (st.st_mode)))
+    return false;
+
+  return true;
+}
+
+static void
+close_file (void)
+{
+  if (file_fd >= 0)
+    {
+      close (file_fd);
+      file_fd = -1;
+    }
+}
+
+/* Set by open_elf.  */
+static Elf *elf;
+
+/* Set by parse_opt.  */
+static bool flag_compressed;
+
+static bool
+open_elf (void)
+{
+  if (!open_file ())
+    {
+      /* Make sure the file descriptor is gone.  */
+      close_file ();
+      return false;
+    }
+
+  if (flag_compressed)
+    elf = dwelf_elf_begin (file_fd);
+  else
+    elf = elf_begin (file_fd, ELF_C_READ, NULL);
+
+  if (elf == NULL)
+    {
+      /* This likely means it just isn't an ELF file, probably not a
+	 real issue, but warn if verbose reporting.  */
+      if (verbose > 0)
+	fprintf (stderr, "warning: %s: %s\n", current_path, elf_errmsg (-1));
+      return false;
+    }
+
+  return true;
+}
+
+static void
+close_elf (void)
+{
+  if (elf != NULL)
+    {
+      elf_end (elf);
+      elf = NULL;
+    }
+
+  close_file ();
+}
+
+static const char *
+elf_kind_string (int kind)
+{
+  switch (kind)
+    {
+    case ELF_K_NONE:
+      return "ELF_K_NONE";
+    case ELF_K_AR:
+      return "ELF_K_AR";
+    case ELF_K_COFF:
+      return "ELF_K_COFF"; /* libelf doesn't really support this.  */
+    case ELF_K_ELF:
+      return "ELF_K_ELF";
+    default:
+      return "<unknown>";
+    }
+}
+
+static const char *
+elf_type_string (int type)
+{
+  switch (type)
+    {
+    case ET_NONE:
+      return "ET_NONE";
+    case ET_REL:
+      return "ET_REL";
+    case ET_EXEC:
+      return "ET_EXEC";
+    case ET_DYN:
+      return "ET_DYN";
+    case ET_CORE:
+      return "ET_CORE";
+    default:
+      return "<unknown>";
+    }
+}
+
+static int elf_type;
+static bool has_program_load;
+static bool has_sections;
+static bool has_bits_alloc;
+static bool has_program_interpreter;
+static bool has_dynamic;
+static bool has_soname;
+static bool has_pie_flag;
+static bool has_dt_debug;
+static bool has_symtab;
+static bool has_debug_sections;
+static bool has_modinfo;
+static bool has_gnu_linkonce_this_module;
+
+static bool
+run_classify (void)
+{
+  /* Reset to unanalyzed default.  */
+  elf_type = 0;
+  has_program_load = false;
+  has_sections = false;
+  has_bits_alloc = false;
+  has_program_interpreter = false;
+  has_dynamic = false;
+  has_soname = false;
+  has_pie_flag = false;
+  has_dt_debug = false;
+  has_symtab = false;
+  has_debug_sections = false;
+  has_modinfo = false;
+  has_gnu_linkonce_this_module = false;
+
+  int kind = elf_kind (elf);
+  if (verbose > 0)
+    fprintf (stderr, "info: %s: ELF kind: %s (0x%x)\n", current_path,
+	     elf_kind_string (kind), kind);
+  if (kind != ELF_K_ELF)
+    return true;
+
+  GElf_Ehdr ehdr_storage;
+  GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_storage);
+  if (ehdr == NULL)
+    {
+      elf_issue (N_("ELF header"));
+      return false;
+    }
+  elf_type = ehdr->e_type;
+
+  /* Examine program headers.  */
+  GElf_Phdr dyn_seg = {};
+  {
+    size_t nphdrs;
+    if (elf_getphdrnum (elf, &nphdrs) != 0)
+      {
+	elf_issue (N_("program headers"));
+	return false;
+      }
+    for (size_t phdr_idx = 0; phdr_idx < nphdrs; ++phdr_idx)
+      {
+	GElf_Phdr phdr_storage;
+	GElf_Phdr *phdr = gelf_getphdr (elf, phdr_idx, &phdr_storage);
+	if (phdr == NULL)
+	  {
+	    elf_issue (N_("program header"));
+	    return false;
+	  }
+	if (phdr->p_type == PT_DYNAMIC)
+	  {
+	    dyn_seg = *phdr;
+	    has_dynamic = true;
+	  }
+	if (phdr->p_type == PT_INTERP)
+	  has_program_interpreter = true;
+	if (phdr->p_type == PT_LOAD)
+	  has_program_load = true;
+      }
+  }
+
+  /* Do we have sections?  */
+  {
+    size_t nshdrs;
+    if (elf_getshdrnum (elf, &nshdrs) != 0)
+      {
+	elf_issue (N_("section headers"));
+	return false;
+      }
+    if (nshdrs > 0)
+      has_sections = true;
+  }
+
+  {
+    size_t shstrndx;
+    if (unlikely (elf_getshdrstrndx (elf, &shstrndx) < 0))
+      {
+	elf_issue (N_("section header string table index"));
+	return false;
+      }
+
+    Elf_Scn *scn = NULL;
+    while (true)
+      {
+        scn = elf_nextscn (elf, scn);
+        if (scn == NULL)
+          break;
+        GElf_Shdr shdr_storage;
+        GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
+        if (shdr == NULL)
+	  {
+            elf_issue (N_("could not obtain section header"));
+	    return false;
+	  }
+        const char *section_name = elf_strptr (elf, shstrndx, shdr->sh_name);
+        if (section_name == NULL)
+	  {
+            elf_issue(N_("could not obtain section name"));
+	    return false;
+	  }
+        if (verbose > 2)
+          fprintf (stderr, "debug: section header %s (type %d) found\n",
+                   section_name, shdr->sh_type);
+        if (shdr->sh_type == SHT_SYMTAB)
+          {
+            if (verbose > 1)
+              fputs ("debug: symtab section found\n", stderr);
+            has_symtab = true;
+          }
+	/* NOBITS and NOTE sections can be in any file.  We want to be
+	   sure there is at least one other allocated section.  */
+	if (shdr->sh_type != SHT_NOBITS
+	    && shdr->sh_type != SHT_NOTE
+	    && (shdr->sh_flags & SHF_ALLOC) != 0)
+	  {
+	    if (verbose > 1 && !has_bits_alloc)
+	      fputs ("debug: allocated (non-nobits/note) section found\n",
+		     stderr);
+	    has_bits_alloc = true;
+	  }
+        const char *debug_prefix = ".debug_";
+        const char *zdebug_prefix = ".zdebug_";
+        if (strncmp (section_name, debug_prefix, strlen (debug_prefix)) == 0
+	    || strncmp (section_name, zdebug_prefix,
+			strlen (zdebug_prefix)) == 0)
+          {
+            if (verbose > 1 && !has_debug_sections)
+              fputs ("debug: .debug_* section found\n", stderr);
+            has_debug_sections = true;
+          }
+	if (strcmp (section_name, ".modinfo") == 0)
+	  {
+	    if (verbose > 1)
+	      fputs ("debug: .modinfo section found\n", stderr);
+	    has_modinfo = true;
+	  }
+	if (strcmp (section_name, ".gnu.linkonce.this_module") == 0)
+	  {
+	    if (verbose > 1)
+	      fputs ("debug: .gnu.linkonce.this_module section found\n",
+		     stderr);
+	    has_gnu_linkonce_this_module = true;
+	  }
+      }
+  }
+
+  /* Examine the dynamic section.  */
+  if (has_dynamic)
+    {
+      Elf_Data *data = elf_getdata_rawchunk (elf, dyn_seg.p_offset,
+					     dyn_seg.p_filesz,
+					     ELF_T_DYN);
+      if (data != NULL)
+	for (int dyn_idx = 0; ; ++dyn_idx)
+	  {
+	    GElf_Dyn dyn_storage;
+	    GElf_Dyn *dyn = gelf_getdyn (data, dyn_idx, &dyn_storage);
+	    if (dyn == NULL)
+	      break;
+	    if (verbose > 2)
+	      fprintf (stderr, "debug: dynamic entry %d"
+		       " with tag %llu found\n",
+		       dyn_idx, (unsigned long long int) dyn->d_tag);
+	    if (dyn->d_tag == DT_SONAME)
+	      has_soname = true;
+	    if (dyn->d_tag == DT_FLAGS_1 && (dyn->d_un.d_val & DF_1_PIE))
+	      has_pie_flag = true;
+	    if (dyn->d_tag == DT_DEBUG)
+	      has_dt_debug = true;
+	    if (dyn->d_tag == DT_NULL)
+	      break;
+	  }
+    }
+
+  if (verbose > 0)
+    {
+      fprintf (stderr, "info: %s: ELF type: %s (0x%x)\n", current_path,
+	       elf_type_string (elf_type), elf_type);
+      if (has_program_load)
+        fprintf (stderr, "info: %s: PT_LOAD found\n", current_path);
+      if (has_sections)
+	fprintf (stderr, "info: %s: has sections\n", current_path);
+      if (has_bits_alloc)
+	fprintf (stderr, "info: %s: allocated (real) section found\n",
+		 current_path);
+      if (has_program_interpreter)
+        fprintf (stderr, "info: %s: program interpreter found\n",
+                 current_path);
+      if (has_dynamic)
+        fprintf (stderr, "info: %s: dynamic segment found\n", current_path);
+      if (has_soname)
+        fprintf (stderr, "info: %s: soname found\n", current_path);
+      if (has_pie_flag)
+        fprintf (stderr, "info: %s: DF_1_PIE flag found\n", current_path);
+      if (has_dt_debug)
+        fprintf (stderr, "info: %s: DT_DEBUG found\n", current_path);
+      if (has_symtab)
+        fprintf (stderr, "info: %s: symbol table found\n", current_path);
+      if (has_debug_sections)
+        fprintf (stderr, "info: %s: .debug_* section found\n", current_path);
+      if (has_modinfo)
+        fprintf (stderr, "info: %s: .modinfo section found\n", current_path);
+      if (has_gnu_linkonce_this_module)
+        fprintf (stderr,
+		 "info: %s: .gnu.linkonce.this_module section found\n",
+		 current_path);
+    }
+
+  return true;
+}
+
+static bool
+is_elf (void)
+{
+  return elf_kind (elf) != ELF_K_NONE;
+}
+
+static bool
+is_elf_file (void)
+{
+  return elf_kind (elf) == ELF_K_ELF;
+}
+
+static bool
+is_elf_archive (void)
+{
+  return elf_kind (elf) == ELF_K_AR;
+}
+
+static bool
+is_core (void)
+{
+  return elf_kind (elf) == ELF_K_ELF && elf_type == ET_CORE;
+}
+
+/* Return true if the file is a loadable object, which basically means
+   it is an ELF file, but not a relocatable object or a core dump
+   file.  (The kernel and various userspace components can load ET_REL
+   files, but we disregard that for our classification purposes.)  */
+static bool
+is_loadable (void)
+{
+  return elf_kind (elf) == ELF_K_ELF
+    && (elf_type == ET_EXEC || elf_type == ET_DYN)
+    && has_program_load
+    && (!has_sections || has_bits_alloc); /* It isn't debug-only.  */
+}
+
+/* Return true if the file is an ELF file which has a symbol table or
+   .debug_* sections (and thus can be stripped futher).  */
+static bool
+is_unstripped (void)
+{
+  return elf_kind (elf) != ELF_K_NONE
+    && (elf_type == ET_REL || elf_type == ET_EXEC || elf_type == ET_DYN)
+    && (has_symtab || has_debug_sections);
+}
+
+/* Return true if the file contains only debuginfo, but no loadable
+   program bits.  Then it is most likely a separate .debug file, a dwz
+   multi-file or a .dwo file.  Note that it can still be loadable,
+   but in that case the phdrs shouldn't be trusted.  */
+static bool
+is_debug_only (void)
+{
+  return elf_kind (elf) != ELF_K_NONE
+    && (elf_type == ET_REL || elf_type == ET_EXEC || elf_type == ET_DYN)
+    && (has_debug_sections || has_symtab)
+    && !has_bits_alloc;
+}
+
+static bool
+is_shared (void)
+{
+  if (!is_loadable ())
+    return false;
+
+  /* The ELF type is very clear: this is an executable.  */
+  if (elf_type == ET_EXEC)
+    return false;
+
+  /* If the object is marked as PIE, it is definitely an executable,
+     and not a loadlable shared object.  */
+  if (has_pie_flag)
+    return false;
+
+  /* Treat a DT_SONAME tag as a strong indicator that this is a shared
+     object.  */
+  if (has_soname)
+    return true;
+
+  /* This is probably a PIE program: there is no soname, but a program
+     interpreter.  In theory, this file could be also a DSO with a
+     soname implied by its file name that can be run as a program.
+     This situation is impossible to resolve in the general case. */
+  if (has_program_interpreter)
+    return false;
+
+  /* Roland McGrath mentions in
+     <https://www.sourceware.org/ml/libc-alpha/2015-03/msg00605.html>,
+     that “we defined a PIE as an ET_DYN with a DT_DEBUG”.  This
+     matches current binutils behavior (version 2.32).  DT_DEBUG is
+     added if bfd_link_executable returns true or if bfd_link_pic
+     returns false, depending on the architectures.  However, DT_DEBUG
+     is not documented as being specific to executables, therefore use
+     it only as a low-priority discriminator.  */
+  if (has_dt_debug)
+    return false;
+
+  /* If there is no dynamic section, the file cannot be loaded as a
+     shared object.  */
+  if (!has_dynamic)
+    return false;
+  return true;
+}
+
+static bool
+is_executable (void)
+{
+  if (!is_loadable ())
+    return false;
+
+  /* A loadable object which is not a shared object is treated as an
+     executable.  */
+  return !is_shared ();
+}
+
+/* Like is_executable, but the object can also be a shared library at
+   the same time.  */
+static bool
+is_program (void)
+{
+  if (!is_loadable ())
+    return false;
+
+  /* The ELF type is very clear: this is an executable.  */
+  if (elf_type == ET_EXEC)
+    return true;
+
+  /* If the object is marked as PIE, it is definitely an executable,
+     and not a loadlable shared object.  */
+  if (has_pie_flag)
+    return true;
+
+  /* This is probably a PIE program. It isn't ET_EXEC, but has a
+     program interpreter. In theory, this file could be also a DSO
+     with a soname. This situation is impossible to resolve in the
+     general case. See is_shared. This is different from
+     is_executable.  */
+  if (has_program_interpreter)
+    return true;
+
+  /* Roland McGrath mentions in
+     <https://www.sourceware.org/ml/libc-alpha/2015-03/msg00605.html>,
+     that “we defined a PIE as an ET_DYN with a DT_DEBUG”.  This
+     matches current binutils behavior (version 2.32).  DT_DEBUG is
+     added if bfd_link_executable returns true or if bfd_link_pic
+     returns false, depending on the architectures.  However, DT_DEBUG
+     is not documented as being specific to executables, therefore use
+     it only as a low-priority discriminator.  */
+  if (has_dt_debug)
+    return true;
+
+  return false;
+}
+
+/* Like is_shared but the library could also be an executable.  */
+static bool
+is_library  (void)
+{
+  /* Only ET_DYN can be shared libraries.  */
+  if (elf_type != ET_DYN)
+    return false;
+
+  if (!is_loadable ())
+    return false;
+
+  /* Without a PT_DYNAMIC segment the library cannot be loaded.  */
+  if (!has_dynamic)
+    return false;
+
+  /* This really is a (PIE) executable.  See is_shared.  */
+  if (has_pie_flag || has_dt_debug)
+    return false;
+
+  /* It could still (also) be a (PIE) executable, but most likely you
+     can dlopen it just fine.  */
+  return true;
+}
+
+/* Returns true if the file is a linux kernel module (is ET_REL and
+   has the two magic sections .modinfo and .gnu.linkonce.this_module).  */
+static bool
+is_linux_kernel_module (void)
+{
+  return (elf_kind (elf) == ELF_K_ELF
+	  && elf_type == ET_REL
+	  && has_modinfo
+	  && has_gnu_linkonce_this_module);
+}
+
+enum classify_requirement { do_not_care, required, forbidden };
+
+enum classify_check
+{
+  classify_elf,
+  classify_elf_file,
+  classify_elf_archive,
+  classify_core,
+  classify_unstripped,
+  classify_executable,
+  classify_program,
+  classify_shared,
+  classify_library,
+  classify_linux_kernel_module,
+  classify_debug_only,
+  classify_loadable,
+
+  classify_check_last = classify_loadable
+};
+
+enum
+{
+  classify_check_offset = 1000,
+  classify_check_not_offset = 2000,
+
+  classify_flag_stdin = 3000,
+  classify_flag_stdin0,
+  classify_flag_no_stdin,
+  classify_flag_print,
+  classify_flag_print0,
+  classify_flag_no_print,
+  classify_flag_matching,
+  classify_flag_not_matching,
+};
+
+static bool
+classify_check_positive (int key)
+{
+  return key >= classify_check_offset
+    && key <= classify_check_offset + classify_check_last;
+}
+
+static bool
+classify_check_negative (int key)
+{
+  return key >= classify_check_not_offset
+    && key <= classify_check_not_offset + classify_check_last;
+}
+
+/* Set by parse_opt.  */
+static enum classify_requirement requirements[classify_check_last + 1];
+static enum { no_stdin, do_stdin, do_stdin0 } flag_stdin;
+static enum { no_print, do_print, do_print0 } flag_print;
+static bool flag_print_matching = true;
+
+static error_t
+parse_opt (int key, char *arg __attribute__ ((unused)),
+           struct argp_state *state __attribute__ ((unused)))
+{
+  if (classify_check_positive (key))
+    requirements[key - classify_check_offset] = required;
+  else if (classify_check_negative (key))
+    requirements[key - classify_check_not_offset] = forbidden;
+  else
+    switch (key)
+      {
+      case 'v':
+        ++verbose;
+        break;
+
+      case 'q':
+	--verbose;
+	break;
+
+      case 'z':
+	flag_compressed = true;
+	break;
+
+      case 'f':
+	flag_only_regular_files = true;
+	break;
+
+      case classify_flag_stdin:
+        flag_stdin = do_stdin;
+        break;
+
+      case classify_flag_stdin0:
+        flag_stdin = do_stdin0;
+        break;
+
+      case classify_flag_no_stdin:
+        flag_stdin = no_stdin;
+        break;
+
+      case classify_flag_print:
+        flag_print = do_print;
+        break;
+
+      case classify_flag_print0:
+        flag_print = do_print0;
+        break;
+
+      case classify_flag_no_print:
+        flag_print = no_print;
+        break;
+
+      case classify_flag_matching:
+        flag_print_matching = true;
+        break;
+
+      case classify_flag_not_matching:
+        flag_print_matching = false;
+        break;
+
+      default:
+        return ARGP_ERR_UNKNOWN;
+      }
+
+  return 0;
+}
+
+/* Perform requested checks against the file at current_path.  If
+   necessary, sets *STATUS to 1 if checks failed.  */
+static void
+process_current_path (int *status)
+{
+  bool checks_passed = true;
+
+  if (open_elf () && run_classify ())
+    {
+      bool checks[] =
+        {
+	 [classify_elf] = is_elf (),
+	 [classify_elf_file] = is_elf_file (),
+	 [classify_elf_archive] = is_elf_archive (),
+	 [classify_core] = is_core (),
+	 [classify_unstripped] = is_unstripped (),
+	 [classify_executable] = is_executable (),
+	 [classify_program] = is_program (),
+	 [classify_shared] = is_shared (),
+	 [classify_library] = is_library (),
+	 [classify_linux_kernel_module] = is_linux_kernel_module (),
+	 [classify_debug_only] = is_debug_only (),
+	 [classify_loadable] = is_loadable (),
+	};
+
+      if (verbose > 1)
+        {
+	  if (checks[classify_elf])
+	    fprintf (stderr, "debug: %s: elf\n", current_path);
+	  if (checks[classify_elf_file])
+	    fprintf (stderr, "debug: %s: elf_file\n", current_path);
+	  if (checks[classify_elf_archive])
+	    fprintf (stderr, "debug: %s: elf_archive\n", current_path);
+	  if (checks[classify_core])
+	    fprintf (stderr, "debug: %s: core\n", current_path);
+          if (checks[classify_unstripped])
+            fprintf (stderr, "debug: %s: unstripped\n", current_path);
+          if (checks[classify_executable])
+            fprintf (stderr, "debug: %s: executable\n", current_path);
+          if (checks[classify_program])
+            fprintf (stderr, "debug: %s: program\n", current_path);
+          if (checks[classify_shared])
+            fprintf (stderr, "debug: %s: shared\n", current_path);
+          if (checks[classify_library])
+            fprintf (stderr, "debug: %s: library\n", current_path);
+	  if (checks[classify_linux_kernel_module])
+	    fprintf (stderr, "debug: %s: linux kernel module\n", current_path);
+	  if (checks[classify_debug_only])
+	    fprintf (stderr, "debug: %s: debug-only\n", current_path);
+          if (checks[classify_loadable])
+            fprintf (stderr, "debug: %s: loadable\n", current_path);
+        }
+
+      for (enum classify_check check = 0;
+           check <= classify_check_last; ++check)
+        switch (requirements[check])
+          {
+          case required:
+            if (!checks[check])
+              checks_passed = false;
+            break;
+          case forbidden:
+            if (checks[check])
+              checks_passed = false;
+            break;
+          case do_not_care:
+            break;
+          }
+    }
+  else if (file_fd == -1)
+    checks_passed = false; /* There is nothing to check, bad file.  */
+  else
+    {
+      for (enum classify_check check = 0;
+           check <= classify_check_last; ++check)
+        if (requirements[check] == required)
+          checks_passed = false;
+    }
+
+  close_elf ();
+
+  switch (flag_print)
+    {
+    case do_print:
+      if (checks_passed == flag_print_matching)
+        puts (current_path);
+      break;
+    case do_print0:
+      if (checks_passed == flag_print_matching)
+        fwrite (current_path, strlen (current_path) + 1, 1, stdout);
+      break;
+    case no_print:
+      if (!checks_passed)
+        *status = 1;
+      break;
+    }
+}
+
+/* Called to process standard input if flag_stdin is not no_stdin.  */
+static void
+process_stdin (int *status)
+{
+  char delim;
+  if (flag_stdin == do_stdin0)
+    delim = '\0';
+  else
+    delim = '\n';
+
+  char *buffer = NULL;
+  size_t buffer_size = 0;
+  while (true)
+    {
+      ssize_t ret = getdelim (&buffer, &buffer_size, delim, stdin);
+      if (ferror (stdin))
+	{
+	  current_path = NULL;
+	  issue (errno, N_("reading from standard input"));
+	  break;
+	}
+      if (feof (stdin))
+        break;
+      if (ret < 0)
+        abort ();           /* Cannot happen due to error checks above.  */
+      if (delim != '\0' && ret > 0)
+        buffer[ret - 1] = '\0';
+      current_path = buffer;
+      process_current_path (status);
+    }
+
+  free (buffer);
+}
+
+int
+main (int argc, char **argv)
+{
+  const struct argp_option options[] =
+    {
+      { NULL, 0, NULL, OPTION_DOC, N_("Classification options"), 1 },
+      { "elf", classify_check_offset + classify_elf, NULL, 0,
+        N_("File looks like an ELF object or archive/static library (default)")
+	, 1 },
+      { "elf-file", classify_check_offset + classify_elf_file, NULL, 0,
+        N_("File is an regular ELF object (not an archive/static library)")
+	, 1 },
+      { "elf-archive", classify_check_offset + classify_elf_archive, NULL, 0,
+        N_("File is an ELF archive or static library")
+	, 1 },
+      { "core", classify_check_offset + classify_core, NULL, 0,
+        N_("File is an ELF core dump file")
+	, 1 },
+      { "unstripped", classify_check_offset + classify_unstripped, NULL, 0,
+        N_("File is an ELF file with symbol table or .debug_* sections \
+and can be stripped further"), 1 },
+      { "executable", classify_check_offset + classify_executable, NULL, 0,
+        N_("File is (primarily) an ELF program executable \
+(not primarily a DSO)"), 1 },
+      { "program", classify_check_offset + classify_program, NULL, 0,
+        N_("File is an ELF program executable \
+(might also be a DSO)"), 1 },
+      { "shared", classify_check_offset + classify_shared, NULL, 0,
+        N_("File is (primarily) an ELF shared object (DSO) \
+(not primarily an executable)"), 1 },
+      { "library", classify_check_offset + classify_library, NULL, 0,
+        N_("File is an ELF shared object (DSO) \
+(might also be an executable)"), 1 },
+      { "linux-kernel-module", (classify_check_offset
+				+ classify_linux_kernel_module), NULL, 0,
+        N_("File is a linux kernel module"), 1 },
+      { "debug-only", (classify_check_offset + classify_debug_only), NULL, 0,
+        N_("File is a debug only ELF file \
+(separate .debug, .dwo or dwz multi-file)"), 1 },
+      { "loadable", classify_check_offset + classify_loadable, NULL, 0,
+        N_("File is a loadable ELF object (program or shared object)"), 1 },
+
+      /* Negated versions of the above.  */
+      { "not-elf", classify_check_not_offset + classify_elf,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-elf-file", classify_check_not_offset + classify_elf_file,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-elf-archive", classify_check_not_offset + classify_elf_archive,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-core", classify_check_not_offset + classify_core,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-unstripped", classify_check_not_offset + classify_unstripped,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-executable", classify_check_not_offset + classify_executable,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-program", classify_check_not_offset + classify_program,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-shared", classify_check_not_offset + classify_shared,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-library", classify_check_not_offset + classify_library,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-linux-kernel-module", (classify_check_not_offset
+				    + classify_linux_kernel_module),
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-debug-only", (classify_check_not_offset + classify_debug_only),
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-loadable", classify_check_not_offset + classify_loadable,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+
+      { NULL, 0, NULL, OPTION_DOC, N_("Input flags"), 2 },
+      { "file", 'f', NULL, 0,
+        N_("Only classify regular (not symlink nor special device) files"), 2 },
+      { "stdin", classify_flag_stdin, NULL, 0,
+        N_("Also read file names to process from standard input, \
+separated by newlines"), 2 },
+      { "stdin0", classify_flag_stdin0, NULL, 0,
+        N_("Also read file names to process from standard input, \
+separated by ASCII NUL bytes"), 2 },
+      { "no-stdin", classify_flag_stdin, NULL, 0,
+        N_("Do not read files from standard input (default)"), 2 },
+      { "compressed", 'z', NULL, 0,
+	N_("Try to open compressed files or embedded (kernel) ELF images"),
+	2 },
+
+      { NULL, 0, NULL, OPTION_DOC, N_("Output flags"), 3 },
+      { "print", classify_flag_print, NULL, 0,
+        N_("Output names of files, separated by newline"), 3 },
+      { "print0", classify_flag_print0, NULL, 0,
+        N_("Output names of files, separated by ASCII NUL"), 3 },
+      { "no-print", classify_flag_no_print, NULL, 0,
+        N_("Do not output file names"), 3 },
+      { "matching", classify_flag_matching, NULL, 0,
+        N_("If printing file names, print matching files (default)"), 3 },
+      { "not-matching", classify_flag_not_matching, NULL, 0,
+        N_("If printing file names, print files that do not match"), 3 },
+
+      { NULL, 0, NULL, OPTION_DOC, N_("Additional flags"), 4 },
+      { "verbose", 'v', NULL, 0,
+        N_("Output additional information (can be specified multiple times)"), 4 },
+      { "quiet", 'q', NULL, 0,
+        N_("Suppress some error output (counterpart to --verbose)"), 4 },
+      { NULL, 0, NULL, 0, NULL, 0 }
+    };
+
+  const struct argp argp =
+    {
+      .options = options,
+      .parser = parse_opt,
+      .args_doc = N_("FILE..."),
+      .doc = N_("\
+Determine the type of an ELF file.\
+\n\n\
+All of the classification options must apply at the same time to a \
+particular file.  Classification options can be negated using a \
+\"--not-\" prefix.\
+\n\n\
+Since modern ELF does not clearly distinguish between programs and \
+dynamic shared objects, you should normally use either --executable or \
+--shared to identify the primary purpose of a file.  \
+Only one of the --shared and --executable checks can pass for a file.\
+\n\n\
+If you want to know whether an ELF object might a program or a \
+shared library (but could be both), then use --program or --library. \
+Some ELF files will classify as both a program and a library.\
+\n\n\
+If you just want to know whether an ELF file is loadable (as program \
+or library) use --loadable.  Note that files that only contain \
+(separate) debug information (--debug-only) are never --loadable (even \
+though they might contain program headers).  Linux kernel modules are \
+also not --loadable (in the normal sense).\
+\n\n\
+Without any of the --print options, the program exits with status 0 \
+if the requested checks pass for all input files, with 1 if a check \
+fails for any file, and 2 if there is an environmental issue (such \
+as a file read error or a memory allocation error).\
+\n\n\
+When printing file names, the program exits with status 0 even if \
+no file names are printed, and exits with status 2 if there is an \
+environmental issue.\
+\n\n\
+On usage error (e.g. a bad option was given), the program exits with \
+a status code larger than 2.\
+\n\n\
+The --quiet or -q option suppresses some error warning output, but \
+doesn't change the exit status.\
+")
+    };
+
+  /* Require that the file is an ELF file by default.  User can
+     disable with --not-elf.  */
+  requirements[classify_elf] = required;
+
+  int remaining;
+  if (argp_parse (&argp, argc, argv, 0, &remaining, NULL) != 0)
+    return 2;
+
+  elf_version (EV_CURRENT);
+
+  int status = 0;
+
+  for (int i = remaining; i < argc; ++i)
+    {
+      current_path = argv[i];
+      process_current_path (&status);
+    }
+
+  if (flag_stdin != no_stdin)
+    process_stdin (&status);
+
+  if (issue_found)
+    return 2;
+
+  return status;
+}
diff --git a/tests/ChangeLog b/tests/ChangeLog
index 63d6af96..2900bd82 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,12 @@
+2019-07-26  Florian Weimer  <fweimer@redhat.com>
+	    Mark Wielaard  <mark@klomp.org>
+
+	* run-elfclassify.sh: New test.
+	* run-elfclassify-self.sh: Likewise.
+	* Makefile.sh (TESTS): Add run-elfclassify.sh and
+	run-elfclassify-self.sh.
+	(EXTRA_DIST): Likewise.
+
 2019-07-16  Mao Han  <han_mao@c-sky.com>
 
 	* hello_csky.ko.bz2: New testfile.
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 0ac35315..2ff7dfc4 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -164,7 +164,8 @@ TESTS = run-arextract.sh run-arsymtest.sh run-ar.sh newfile test-nlist \
 	run-typeiter-many.sh run-strip-test-many.sh \
 	run-strip-version.sh run-xlate-note.sh \
 	run-readelf-discr.sh \
-	run-dwelf_elf_e_machine_string.sh
+	run-dwelf_elf_e_machine_string.sh \
+	run-elfclassify.sh run-elfclassify-self.sh
 
 if !BIARCH
 export ELFUTILS_DISABLE_BIARCH = 1
@@ -435,8 +436,8 @@ EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh \
 	     run-xlate-note.sh \
 	     run-readelf-discr.sh \
 	     testfile-rng.debug.bz2 testfile-urng.debug.bz2 \
-	     run-dwelf_elf_e_machine_string.sh
-
+	     run-dwelf_elf_e_machine_string.sh \
+	     run-elfclassify.sh run-elfclassify-self.sh
 
 if USE_VALGRIND
 valgrind_cmd='valgrind -q --leak-check=full --error-exitcode=1'
diff --git a/tests/run-elfclassify-self.sh b/tests/run-elfclassify-self.sh
new file mode 100755
index 00000000..c48ab9c9
--- /dev/null
+++ b/tests/run-elfclassify-self.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+# Copyright (C) 2019 Red Hat, Inc.
+# This file is part of elfutils.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# elfutils is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. $srcdir/test-subr.sh
+
+testrun_on_self ${abs_top_builddir}/src/elfclassify --elf-file
+testrun_on_self ${abs_top_builddir}/src/elfclassify --not-core
+testrun_on_self ${abs_top_builddir}/src/elfclassify --unstripped
+testrun_on_self ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module
+
+testrun_on_self_lib ${abs_top_builddir}/src/elfclassify --shared
+testrun_on_self_lib ${abs_top_builddir}/src/elfclassify --loadable
+testrun_on_self_lib ${abs_top_builddir}/src/elfclassify --not-executable
+testrun_on_self_lib ${abs_top_builddir}/src/elfclassify --not-program
+
+testrun_on_self_exe ${abs_top_builddir}/src/elfclassify --executable
+testrun_on_self_exe ${abs_top_builddir}/src/elfclassify --program
+testrun_on_self_exe ${abs_top_builddir}/src/elfclassify --loadable
+testrun_on_self_exe ${abs_top_builddir}/src/elfclassify --not-shared
+
+testrun ${abs_top_builddir}/src/elfclassify --not-shared $self_test_files_obj
+testrun ${abs_top_builddir}/src/elfclassify --not-executable $self_test_files_obj
diff --git a/tests/run-elfclassify.sh b/tests/run-elfclassify.sh
new file mode 100755
index 00000000..5a849bbd
--- /dev/null
+++ b/tests/run-elfclassify.sh
@@ -0,0 +1,327 @@
+#!/bin/sh
+# Copyright (C) 2019 Red Hat, Inc.
+# This file is part of elfutils.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# elfutils is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. $srcdir/test-subr.sh
+
+core_files=\
+"testfile_aarch64_core \
+ testfile-backtrace-demangle.core \
+ testfiledwarfinlines.core \
+ testfile_i686_core \
+ testfile-m68k-core \
+ testfile-riscv64-core \
+ backtrace.aarch64.core \
+ backtrace.i386.core \
+ backtrace.ppc.core \
+ backtrace.s390.core"
+
+testfiles $core_files
+
+echo "elfclassify --core"
+testrun ${abs_top_builddir}/src/elfclassify --core $core_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --core --print $core_files <<EOF
+$(echo $core_files | sed -e "s/ /\n/g")
+EOF
+
+echo "core files are not programs"
+testrun ${abs_top_builddir}/src/elfclassify --not-program $core_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-program --print $core_files <<EOF
+$(echo $core_files | sed -e "s/ /\n/g")
+EOF
+
+echo "core files are not shared"
+testrun ${abs_top_builddir}/src/elfclassify --not-shared $core_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-shared --print $core_files <<EOF
+$(echo $core_files | sed -e "s/ /\n/g")
+EOF
+
+echo "core files are not kernel-modules"
+testrun ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module $core_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module --print $core_files <<EOF
+$(echo $core_files | sed -e "s/ /\n/g")
+EOF
+
+echo "core files are not debug-only"
+testrun ${abs_top_builddir}/src/elfclassify --not-debug-only $core_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-debug-only --print $core_files <<EOF
+$(echo $core_files | sed -e "s/ /\n/g")
+EOF
+
+object_files=\
+"debug-ranges-no-lowpc.o \
+ testfile-annobingroup-i386.o \
+ testfile-bpf-dis1.o \
+ testfile-debug-rel-g.o \
+ testfile-gnu-property-note.o"
+
+testfiles $object_files
+
+echo "elfclassify --elf-file"
+testrun ${abs_top_builddir}/src/elfclassify --elf-file $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --elf-file --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+echo "object files are not archives"
+testrun ${abs_top_builddir}/src/elfclassify --not-elf-archive $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-elf-archive --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+echo "object files are not core files"
+testrun ${abs_top_builddir}/src/elfclassify --not-core $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-core --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+echo "object files are not program files"
+testrun ${abs_top_builddir}/src/elfclassify --not-program $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-program --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+echo "object files are not shared files"
+testrun ${abs_top_builddir}/src/elfclassify --not-shared $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-shared --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+echo "object files are not kernel modules"
+testrun ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+echo "object files are not debug-only files"
+testrun ${abs_top_builddir}/src/elfclassify --not-debug-only $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-debug-only --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+ar_files="testarchive64.a"
+
+testfiles $ar_files
+
+echo "elfclassify --elf-archive"
+testrun ${abs_top_builddir}/src/elfclassify --elf-archive $ar_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --elf-archive --print $ar_files <<EOF
+$(echo $ar_files | sed -e "s/ /\n/g")
+EOF
+
+echo "archives are not elf-files"
+testrun ${abs_top_builddir}/src/elfclassify --not-elf-file $ar_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-elf-file --print $ar_files <<EOF
+$(echo $ar_files | sed -e "s/ /\n/g")
+EOF
+
+echo "archives are not core files"
+testrun ${abs_top_builddir}/src/elfclassify --not-core $ar_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-core --print $ar_files <<EOF
+$(echo $ar_files | sed -e "s/ /\n/g")
+EOF
+
+echo "archives are not program files"
+testrun ${abs_top_builddir}/src/elfclassify --not-program $ar_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-program --print $ar_files <<EOF
+$(echo $ar_files | sed -e "s/ /\n/g")
+EOF
+
+echo "archives are not shared files"
+testrun ${abs_top_builddir}/src/elfclassify --not-shared $ar_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-shared --print $ar_files <<EOF
+$(echo $ar_files | sed -e "s/ /\n/g")
+EOF
+
+lib_files=\
+"testfile52-32.noshdrs.so \
+ libtestfile_multi_shared.so \
+ testfile52-32.prelink.so \
+ testfile52-32.so
+ testfile54-64.noshdrs.so \
+ testfile54-64.prelink.so \
+ testfile54-64.so \
+ testlib_dynseg.so"
+
+testfiles $lib_files
+
+echo "elfclassify --shared"
+testrun ${abs_top_builddir}/src/elfclassify --shared $lib_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --shared --print $lib_files <<EOF
+$(echo $lib_files | sed -e "s/ /\n/g")
+EOF
+
+echo "shared files are loadable"
+testrun ${abs_top_builddir}/src/elfclassify --loadable $lib_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --loadable --print $lib_files <<EOF
+$(echo $lib_files | sed -e "s/ /\n/g")
+EOF
+
+echo "shared files are not executables"
+testrun ${abs_top_builddir}/src/elfclassify --not-executable $lib_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-executable --print $lib_files <<EOF
+$(echo $lib_files | sed -e "s/ /\n/g")
+EOF
+
+echo "shared files are not debug-only"
+testrun ${abs_top_builddir}/src/elfclassify --not-debug-only $lib_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-debug-only --print $lib_files <<EOF
+$(echo $lib_files | sed -e "s/ /\n/g")
+EOF
+
+echo "shared files are not kernel modules"
+testrun ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module $lib_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module --print $lib_files <<EOF
+$(echo $lib_files | sed -e "s/ /\n/g")
+EOF
+
+exe_files=\
+"backtrace.aarch64.exec \
+ backtrace.i386.exec \
+ backtrace.ppc.exec \
+ backtrace.s390x.exec \
+ testfile70.exec \
+ test-offset-loop \
+ testfilebaztab \
+ testfilebaztabppc64"
+
+testfiles $exe_files
+
+echo "elfclassify --program"
+testrun ${abs_top_builddir}/src/elfclassify --program $exe_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --program --print $exe_files <<EOF
+$(echo $exe_files | sed -e "s/ /\n/g")
+EOF
+
+echo "programs are executables (in this case)"
+testrun ${abs_top_builddir}/src/elfclassify --executable $exe_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --executable --print $exe_files <<EOF
+$(echo $exe_files | sed -e "s/ /\n/g")
+EOF
+
+echo "programs are not shared libraries (in this case)"
+testrun ${abs_top_builddir}/src/elfclassify --not-shared $exe_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-shared --print $exe_files <<EOF
+$(echo $exe_files | sed -e "s/ /\n/g")
+EOF
+
+echo "programs are not kernel-modules"
+testrun ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module $exe_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module --print $exe_files <<EOF
+$(echo $exe_files | sed -e "s/ /\n/g")
+EOF
+
+echo "programs are not debug-only"
+testrun ${abs_top_builddir}/src/elfclassify --not-debug-only $exe_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-debug-only --print $exe_files <<EOF
+$(echo $exe_files | sed -e "s/ /\n/g")
+EOF
+
+kmod_files=\
+"hello_aarch64.ko \
+ hello_csky.ko \
+ hello_i386.ko \
+ hello_m68k.ko \
+ hello_ppc64.ko \
+ hello_riscv64.ko \
+ hello_s390.ko \
+ hello_x86_64.ko"
+
+testfiles $kmod_files
+
+echo "elfclassify --linux-kernel-module"
+testrun ${abs_top_builddir}/src/elfclassify --linux-kernel-module $kmod_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --linux-kernel-module --print $kmod_files <<EOF
+$(echo $kmod_files | sed -e "s/ /\n/g")
+EOF
+
+echo "kmods are unstripped"
+testrun ${abs_top_builddir}/src/elfclassify --unstripped $kmod_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --unstripped --print $kmod_files <<EOF
+$(echo $kmod_files | sed -e "s/ /\n/g")
+EOF
+
+echo "kmods are not debug-only"
+testrun ${abs_top_builddir}/src/elfclassify --not-debug-only $kmod_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-debug-only --print $kmod_files <<EOF
+$(echo $kmod_files | sed -e "s/ /\n/g")
+EOF
+
+echo "kmods are not loabable (in the normal sense)"
+testrun ${abs_top_builddir}/src/elfclassify --not-loadable $kmod_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-loadable --print $kmod_files <<EOF
+$(echo $kmod_files | sed -e "s/ /\n/g")
+EOF
+
+echo "gnu compressed kmods are unstripped"
+${abs_top_builddir}/src/elfcompress -t gnu --force $kmod_files
+testrun ${abs_top_builddir}/src/elfclassify --unstripped $kmod_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --unstripped --print $kmod_files <<EOF
+$(echo $kmod_files | sed -e "s/ /\n/g")
+EOF
+
+debug_files=\
+"testfile15.debug \
+ testfile35.debug \
+ testfile40.debug \
+ testfile48.debug \
+ testfile53-32.debug \
+ testfile53-64.debug \
+ testfilebazdbg.debug \
+ testfilebazdbgppc64.debug \
+ addrx_constx-4.dwo \
+ addrx_constx-5.dwo"
+
+testfiles $debug_files
+
+echo "elfclassify --debug-only"
+testrun ${abs_top_builddir}/src/elfclassify --debug-only $debug_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --debug-only --print $debug_files <<EOF
+$(echo $debug_files | sed -e "s/ /\n/g")
+EOF
+
+echo "debug-only files are unstripped"
+testrun ${abs_top_builddir}/src/elfclassify --unstripped $debug_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --unstripped --print $debug_files <<EOF
+$(echo $debug_files | sed -e "s/ /\n/g")
+EOF
+
+echo "debug-only files are not programs"
+testrun ${abs_top_builddir}/src/elfclassify --not-program $debug_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-program --print $debug_files <<EOF
+$(echo $debug_files | sed -e "s/ /\n/g")
+EOF
+
+echo "debug-only files are not shared"
+testrun ${abs_top_builddir}/src/elfclassify --not-shared $debug_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-shared --print $debug_files <<EOF
+$(echo $debug_files | sed -e "s/ /\n/g")
+EOF
+
+echo "compress the debug sections and try again"
+${abs_top_builddir}/src/elfcompress -t gnu --force $debug_files
+
+echo "again unstripped"
+testrun ${abs_top_builddir}/src/elfclassify --unstripped $debug_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --unstripped --print $debug_files <<EOF
+$(echo $debug_files | sed -e "s/ /\n/g")
+EOF
+
+echo "again debug-only"
+testrun ${abs_top_builddir}/src/elfclassify --debug-only $debug_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --debug-only --print $debug_files <<EOF
+$(echo $debug_files | sed -e "s/ /\n/g")
+EOF
-- 
2.18.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify: Add --library classification.
  2019-07-25 22:39                         ` [PATCH] elfclassify: Add --library classification Mark Wielaard
@ 2019-07-26 22:53                           ` Dmitry V. Levin
  0 siblings, 0 replies; 36+ messages in thread
From: Dmitry V. Levin @ 2019-07-26 22:53 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: elfutils-devel, Florian Weimer, Panu Matilainen

[-- Attachment #1: Type: text/plain, Size: 626 bytes --]

On Fri, Jul 26, 2019 at 12:39:32AM +0200, Mark Wielaard wrote:
> > Maybe you are looking for another goal/classification?  For example I
> > added --program which does classify those special files as programs
> > (even though --shared also says they are shared libraries). Maybe you
> > are looking for a different classification similar/dual to that. Say
> > --library?
> 
> This patch implements this and updates the --help text to better explain
> the differenes between --loadable, --shared/--executable and
> --program/--library.
> 
> Does this look reasonable?

Yes, this looks fine, thanks!


-- 
ldv

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-20 21:51                       ` Mark Wielaard
  2019-07-25 22:39                         ` [PATCH] elfclassify: Add --library classification Mark Wielaard
@ 2019-07-26 23:04                         ` Dmitry V. Levin
  2019-07-27 11:54                           ` Mark Wielaard
  1 sibling, 1 reply; 36+ messages in thread
From: Dmitry V. Levin @ 2019-07-26 23:04 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: Florian Weimer, elfutils-devel, Panu Matilainen

[-- Attachment #1: Type: text/plain, Size: 569 bytes --]

On Sat, Jul 20, 2019 at 11:51:16PM +0200, Mark Wielaard wrote:
> On Sat, Jul 20, 2019 at 01:57:27AM +0300, Dmitry V. Levin wrote:
[...]
> > btw, I think it would be appropriate to move the has_dynamic check before
> > the first check in is_shared that returns true.
> 
> Yes, that is probably fine, but does it really matter?

It doesn't matter unless the file has DT_SONAME but doesn't have PT_DYNAMIC.

If /lib64/ld-linux-x86-64.so.2 --verify doesn't like files without
PT_DYNAMIC, elfclassify --shared shouldn't classify them as DSOs, too.


-- 
ldv

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-26 23:04                         ` [PATCH] elfclassify tool Dmitry V. Levin
@ 2019-07-27 11:54                           ` Mark Wielaard
  0 siblings, 0 replies; 36+ messages in thread
From: Mark Wielaard @ 2019-07-27 11:54 UTC (permalink / raw)
  To: Dmitry V. Levin; +Cc: Florian Weimer, elfutils-devel, Panu Matilainen

On Sat, Jul 27, 2019 at 02:04:48AM +0300, Dmitry V. Levin wrote:
> On Sat, Jul 20, 2019 at 11:51:16PM +0200, Mark Wielaard wrote:
> > On Sat, Jul 20, 2019 at 01:57:27AM +0300, Dmitry V. Levin wrote:
> [...]
> > > btw, I think it would be appropriate to move the has_dynamic check before
> > > the first check in is_shared that returns true.
> > 
> > Yes, that is probably fine, but does it really matter?
> 
> It doesn't matter unless the file has DT_SONAME but doesn't have PT_DYNAMIC.
> 
> If /lib64/ld-linux-x86-64.so.2 --verify doesn't like files without
> PT_DYNAMIC, elfclassify --shared shouldn't classify them as DSOs, too.

Yes, I see how theoretically that is "more correct".  But if the file
doesn't have PT_DYNAMIC then it cannot have a DT_SONAME.  And there
are no other checks that return true. So in practice there is no
difference. Still, if it looks more correct, then lets just swap the
checks.

diff --git a/src/elfclassify.c b/src/elfclassify.c
index 03655aea..0b1bb63a 100644
--- a/src/elfclassify.c
+++ b/src/elfclassify.c
@@ -498,6 +498,11 @@ is_shared (void)
   if (elf_type == ET_EXEC)
     return false;
 
+  /* If there is no dynamic section, the file cannot be loaded as a
+     shared object.  */
+  if (!has_dynamic)
+    return false;
+
   /* If the object is marked as PIE, it is definitely an executable,
      and not a loadlable shared object.  */
   if (has_pie_flag)
@@ -526,10 +531,6 @@ is_shared (void)
   if (has_dt_debug)
     return false;
 
-  /* If there is no dynamic section, the file cannot be loaded as a
-     shared object.  */
-  if (!has_dynamic)
-    return false;
   return true;
 }
 
Thanks,

Mark

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-26 22:11           ` Mark Wielaard
@ 2019-07-29  8:44             ` Florian Weimer
  2019-07-29 14:24               ` Mark Wielaard
  2019-07-29  9:16             ` Florian Weimer
  2019-07-29  9:22             ` [PATCH] elfclassify tool Florian Weimer
  2 siblings, 1 reply; 36+ messages in thread
From: Florian Weimer @ 2019-07-29  8:44 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: elfutils-devel, Panu Matilainen

* Mark Wielaard:

> +  if (elf == NULL)
> +    {
> +      /* This likely means it just isn't an ELF file, probably not a
> +	 real issue, but warn if verbose reporting.  */
> +      if (verbose > 0)
> +	fprintf (stderr, "warning: %s: %s\n", current_path, elf_errmsg (-1));
> +      return false;
> +    }

Is it possible to distinguish the error from a memory allocation error?
It would be wrong to mis-classify a file just because the system is low
on memory.

Thanks,
Florian

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-26 22:11           ` Mark Wielaard
  2019-07-29  8:44             ` Florian Weimer
@ 2019-07-29  9:16             ` Florian Weimer
  2019-07-29 14:34               ` Mark Wielaard
  2019-07-29  9:22             ` [PATCH] elfclassify tool Florian Weimer
  2 siblings, 1 reply; 36+ messages in thread
From: Florian Weimer @ 2019-07-29  9:16 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: elfutils-devel, Panu Matilainen

* Mark Wielaard:

> +/* Called to process standard input if flag_stdin is not no_stdin.  */
> +static void
> +process_stdin (int *status)
> +{
> +  char delim;
> +  if (flag_stdin == do_stdin0)
> +    delim = '\0';
> +  else
> +    delim = '\n';
> +
> +  char *buffer = NULL;
> +  size_t buffer_size = 0;
> +  while (true)
> +    {
> +      ssize_t ret = getdelim (&buffer, &buffer_size, delim, stdin);
> +      if (ferror (stdin))
> +	{
> +	  current_path = NULL;
> +	  issue (errno, N_("reading from standard input"));
> +	  break;
> +	}
> +      if (feof (stdin))
> +        break;
> +      if (ret < 0)
> +        abort ();           /* Cannot happen due to error checks above.  */
> +      if (delim != '\0' && ret > 0)
> +        buffer[ret - 1] = '\0';

I think this can overwrite the last character of the last line if the
file does not end with '\n'.

Thanks,
Florian

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-26 22:11           ` Mark Wielaard
  2019-07-29  8:44             ` Florian Weimer
  2019-07-29  9:16             ` Florian Weimer
@ 2019-07-29  9:22             ` Florian Weimer
  2019-07-29 14:40               ` Mark Wielaard
  2 siblings, 1 reply; 36+ messages in thread
From: Florian Weimer @ 2019-07-29  9:22 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: elfutils-devel, Panu Matilainen

* Mark Wielaard:

> Signed-off-by: Mark Wielaard <mark@klomp.org>

Does elfutils use DCO?  Then yoy have my signoff as well:

Signed-off-by: Florian Weimer <fweimer@redhat.com>

You should you list yourself as an author somewhere in the commit
message.  This is so much more than what I wrote.

Regarding the test case, I think if the build target is ELF, it makes
sense to check that the elfutils binaries themselves are classified as
expected, with the current build flags.  This will detect changes
required due to the evolution of the toolchain.

Thanks,
Florian

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-29  8:44             ` Florian Weimer
@ 2019-07-29 14:24               ` Mark Wielaard
  2019-08-11 23:38                 ` Mark Wielaard
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Wielaard @ 2019-07-29 14:24 UTC (permalink / raw)
  To: Florian Weimer; +Cc: elfutils-devel, Panu Matilainen

On Mon, Jul 29, 2019 at 10:43:56AM +0200, Florian Weimer wrote:
> * Mark Wielaard:
> 
> > +  if (elf == NULL)
> > +    {
> > +      /* This likely means it just isn't an ELF file, probably not a
> > +	 real issue, but warn if verbose reporting.  */
> > +      if (verbose > 0)
> > +	fprintf (stderr, "warning: %s: %s\n", current_path, elf_errmsg (-1));
> > +      return false;
> > +    }
> 
> Is it possible to distinguish the error from a memory allocation error?
> It would be wrong to mis-classify a file just because the system is low
> on memory.

You are right this is not the proper way to report the issue.
Normally, when just using elf_begin, a NULL return should be reported
through elf_issue (which will set the issues flag).

But, because I added -z, we are using either elf_begin or
dwelf_elf_begin. dwelf_elf_begin will return NULL (instead of a an
empty (ELF_K_NONE) Elf descriptor when there is an issue, or the
(decompressed) file wasn't an ELF file.

So we should split the error reporting. If we called elf_begin and get
NULL we should call elf_issue to report the proper issue.

If we called dwefl_elf_begin and we get NULL, I am not sure yet what
the proper way is to detect whether it is a real issue, or "just" not
a (decompressed) ELF file. I am afraid the current handling is the
best we can do.

Maybe we can fix dwelf_elf_begin to return an empty (ELF_K_NONE) Elf
descriptor if there was no issue, but the (decompressed) file wasn't
an ELF file.

Cheers,

Mark

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-29  9:16             ` Florian Weimer
@ 2019-07-29 14:34               ` Mark Wielaard
  2019-07-29 14:38                 ` Florian Weimer
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Wielaard @ 2019-07-29 14:34 UTC (permalink / raw)
  To: Florian Weimer; +Cc: elfutils-devel, Panu Matilainen

On Mon, Jul 29, 2019 at 11:16:31AM +0200, Florian Weimer wrote:
> * Mark Wielaard:
> 
> > +/* Called to process standard input if flag_stdin is not no_stdin.  */
> > +static void
> > +process_stdin (int *status)
> > +{
> > +  char delim;
> > +  if (flag_stdin == do_stdin0)
> > +    delim = '\0';
> > +  else
> > +    delim = '\n';
> > +
> > +  char *buffer = NULL;
> > +  size_t buffer_size = 0;
> > +  while (true)
> > +    {
> > +      ssize_t ret = getdelim (&buffer, &buffer_size, delim, stdin);
> > +      if (ferror (stdin))
> > +	{
> > +	  current_path = NULL;
> > +	  issue (errno, N_("reading from standard input"));
> > +	  break;
> > +	}
> > +      if (feof (stdin))
> > +        break;
> > +      if (ret < 0)
> > +        abort ();           /* Cannot happen due to error checks above.  */
> > +      if (delim != '\0' && ret > 0)
> > +        buffer[ret - 1] = '\0';
> 
> I think this can overwrite the last character of the last line if the
> file does not end with '\n'.

I see.  "The buffer is null-terminated and includes the newline
character, if one was found."

So the test should be:

diff --git a/src/elfclassify.c b/src/elfclassify.c
index ebd42c1d5..b17d14d45 100644
--- a/src/elfclassify.c
+++ b/src/elfclassify.c
@@ -862,7 +862,7 @@ process_stdin (int *status)
         break;
       if (ret < 0)
         abort ();           /* Cannot happen due to error checks above.  */
-      if (delim != '\0' && ret > 0)
+      if (delim != '\0' && ret > 0 && buffer[ret - 1] == '\n')
         buffer[ret - 1] = '\0';
       current_path = buffer;
       process_current_path (status);

Thanks,

Mark

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-29 14:34               ` Mark Wielaard
@ 2019-07-29 14:38                 ` Florian Weimer
  2019-08-13  9:44                   ` Mark Wielaard
  0 siblings, 1 reply; 36+ messages in thread
From: Florian Weimer @ 2019-07-29 14:38 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: elfutils-devel, Panu Matilainen

* Mark Wielaard:

> On Mon, Jul 29, 2019 at 11:16:31AM +0200, Florian Weimer wrote:
>> * Mark Wielaard:
>> 
>> > +/* Called to process standard input if flag_stdin is not no_stdin.  */
>> > +static void
>> > +process_stdin (int *status)
>> > +{
>> > +  char delim;
>> > +  if (flag_stdin == do_stdin0)
>> > +    delim = '\0';
>> > +  else
>> > +    delim = '\n';
>> > +
>> > +  char *buffer = NULL;
>> > +  size_t buffer_size = 0;
>> > +  while (true)
>> > +    {
>> > +      ssize_t ret = getdelim (&buffer, &buffer_size, delim, stdin);
>> > +      if (ferror (stdin))
>> > +	{
>> > +	  current_path = NULL;
>> > +	  issue (errno, N_("reading from standard input"));
>> > +	  break;
>> > +	}
>> > +      if (feof (stdin))
>> > +        break;
>> > +      if (ret < 0)
>> > +        abort ();           /* Cannot happen due to error checks above.  */
>> > +      if (delim != '\0' && ret > 0)
>> > +        buffer[ret - 1] = '\0';
>> 
>> I think this can overwrite the last character of the last line if the
>> file does not end with '\n'.
>
> I see.  "The buffer is null-terminated and includes the newline
> character, if one was found."
>
> So the test should be:
>
> diff --git a/src/elfclassify.c b/src/elfclassify.c
> index ebd42c1d5..b17d14d45 100644
> --- a/src/elfclassify.c
> +++ b/src/elfclassify.c
> @@ -862,7 +862,7 @@ process_stdin (int *status)
>          break;
>        if (ret < 0)
>          abort ();           /* Cannot happen due to error checks above.  */
> -      if (delim != '\0' && ret > 0)
> +      if (delim != '\0' && ret > 0 && buffer[ret - 1] == '\n')
>          buffer[ret - 1] = '\0';
>        current_path = buffer;
>        process_current_path (status);

Right.  But now I wonder why ret == 0 can ever happen.  Maybe on
OpenVMS, which doesn't use in-band signaling for line terminators?

Thanks,
Florian

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-29  9:22             ` [PATCH] elfclassify tool Florian Weimer
@ 2019-07-29 14:40               ` Mark Wielaard
  2019-07-29 14:42                 ` Florian Weimer
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Wielaard @ 2019-07-29 14:40 UTC (permalink / raw)
  To: Florian Weimer; +Cc: elfutils-devel, Panu Matilainen

On Mon, Jul 29, 2019 at 11:22:13AM +0200, Florian Weimer wrote:
> * Mark Wielaard:
> 
> > Signed-off-by: Mark Wielaard <mark@klomp.org>
> 
> Does elfutils use DCO?  Then yoy have my signoff as well:
> 
> Signed-off-by: Florian Weimer <fweimer@redhat.com>

Thanks. Yes, elfutils uses a Developer Certificate of Origin based on
the linux kernel one (but clarified for the elfutils project details -
like some files having multiple licenses at the same time). We do
require all commits to have a Signed-off-by line from developers that
agree with the DCO as explained in the CONTRIBUTING document.

> You should you list yourself as an author somewhere in the commit
> message.

The ChangeLog entries do mention both of us as authors.

> Regarding the test case, I think if the build target is ELF, it makes
> sense to check that the elfutils binaries themselves are classified as
> expected, with the current build flags.  This will detect changes
> required due to the evolution of the toolchain.

That is what the run-elfclassify-self.sh testcase does I believe.
Or do you believe it should be extended?

Thanks,

Mark

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-29 14:40               ` Mark Wielaard
@ 2019-07-29 14:42                 ` Florian Weimer
  0 siblings, 0 replies; 36+ messages in thread
From: Florian Weimer @ 2019-07-29 14:42 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: elfutils-devel, Panu Matilainen

* Mark Wielaard:

>> Regarding the test case, I think if the build target is ELF, it makes
>> sense to check that the elfutils binaries themselves are classified as
>> expected, with the current build flags.  This will detect changes
>> required due to the evolution of the toolchain.
>
> That is what the run-elfclassify-self.sh testcase does I believe.

Ah, I had missed it.  Great, I think this one is covered.

Thanks,
Florian

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-29 14:24               ` Mark Wielaard
@ 2019-08-11 23:38                 ` Mark Wielaard
  2019-08-12  8:14                   ` Florian Weimer
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Wielaard @ 2019-08-11 23:38 UTC (permalink / raw)
  To: Florian Weimer; +Cc: elfutils-devel, Panu Matilainen

[-- Attachment #1: Type: text/plain, Size: 3401 bytes --]

On Mon, 2019-07-29 at 16:24 +0200, Mark Wielaard wrote:
> On Mon, Jul 29, 2019 at 10:43:56AM +0200, Florian Weimer wrote:
> > * Mark Wielaard:
> > 
> > > +  if (elf == NULL)
> > > +    {
> > > +      /* This likely means it just isn't an ELF file, probably not a
> > > +	 real issue, but warn if verbose reporting.  */
> > > +      if (verbose > 0)
> > > +	fprintf (stderr, "warning: %s: %s\n", current_path, elf_errmsg (-1));
> > > +      return false;
> > > +    }
> > 
> > Is it possible to distinguish the error from a memory allocation error?
> > It would be wrong to mis-classify a file just because the system is low
> > on memory.
> 
> You are right this is not the proper way to report the issue.
> Normally, when just using elf_begin, a NULL return should be reported
> through elf_issue (which will set the issues flag).
> 
> But, because I added -z, we are using either elf_begin or
> dwelf_elf_begin. dwelf_elf_begin will return NULL (instead of a an
> empty (ELF_K_NONE) Elf descriptor when there is an issue, or the
> (decompressed) file wasn't an ELF file.
> 
> So we should split the error reporting. If we called elf_begin and get
> NULL we should call elf_issue to report the proper issue.
> 
> If we called dwefl_elf_begin and we get NULL, I am not sure yet what
> the proper way is to detect whether it is a real issue, or "just" not
> a (decompressed) ELF file. I am afraid the current handling is the
> best we can do.
> 
> Maybe we can fix dwelf_elf_begin to return an empty (ELF_K_NONE) Elf
> descriptor if there was no issue, but the (decompressed) file wasn't
> an ELF file.

Sorry this took so long. And this is indeed the last issue holding up
the release. But this is a tricky problem.

We made a mistake when we wrote the contract for dwelf_elf_begin by
saying it would never return ELF_K_NONE. That made it different from
elf_begin and made it impossible to distinguish between a real (file or
decompression) error and whether the file simply wasn't an ELF file and
also wasn't a compressed ELF file.

I think we should fix the contract. Technically it would be an API
break, but I think no user is really relying on the fact that the Elf
handle returned is never ELF_K_NONE. Users still need to distinguish
between ELF_K_ELF and ELF_K_AR (and theoretically any other ELF_K_type,
like COFF, which we currently don't support, but we do define it).

So that is what the attached patch does. I also audited all
decompression code to make sure it returns error codes consistently.
The decompression will either decompress successfully and return
DWFL_E_NOERROR, or if the file wasn't compressed (or an embedded image)
it will return DWFL_E_BADELF. In all other cases (file or decompression
error) it will set a a different DWFL_E error.

This "only" leaves the problem that we don't have a good way to
translate those errors into "real" libelf error codes. So for now we
just fake one if it wasn't an elf_errno value. I don't intent to try to
solve this error translation issue before the release (I don't know how
to do it yet).

What do you think about this change to dwelf_elf_begin?
The change would make it possible to detect real errors in the
elfclassify code, whether elf_begin or dwelf_elf_begin was used. So we
would not misclassify files (but return an error status of 2).

Thanks,

Mark

[-- Attachment #2: 0001-libdwelf-Make-dwelf_elf_begin-return-NULL-only-when-.patch --]
[-- Type: text/x-patch, Size: 7796 bytes --]

From 648837a9f1be7628e9ceee6818bf56c80b9d3fa1 Mon Sep 17 00:00:00 2001
From: Mark Wielaard <mark@klomp.org>
Date: Mon, 12 Aug 2019 00:43:22 +0200
Subject: [PATCH] libdwelf: Make dwelf_elf_begin return NULL only when there is
 an error.

dwelf_elf_begin was slightly different from elf_begin in case the file
turned out to not be an ELF file. elf_begin would return an Elf handle
with ELF_K_NONE. But dwelf_elf_begin would return NULL. This made it
impossible to tell the difference between a file or decompression error
and a (decompressed) file not being an ELF file.

Since dwelf_elf_begin could still return different kinds of ELF files
(ELF_K_ELF or ELF_K_AR - and theoretically ELF_K_COFF) this was not
really useful anyway. So make it so that dwelf_elf_begin always returns
an Elf handle unless there was a real error reading or decompressing
the file. Otherwise return NULL to make clear there was a real error.

Make sure that the decompression function returns DWFL_E_BADELF only
when the file isn't compressed. In which case the Elf handle won't
be replaced and can be returned (as ELF_K_NONE).

Add a new version to dwelf_elf_begin so programs can rely on it
returning NULL only for real errors.

Signed-off-by: Mark Wielaard <mark@klomp.org>
---
 libdw/ChangeLog            |  4 ++++
 libdw/libdw.map            |  4 ++++
 libdwelf/ChangeLog         |  6 ++++++
 libdwelf/dwelf_elf_begin.c | 12 +++++++-----
 libdwelf/libdwelf.h        |  9 ++++++---
 libdwfl/ChangeLog          |  8 ++++++++
 libdwfl/gzip.c             |  5 +++--
 libdwfl/open.c             | 10 +++++++---
 8 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/libdw/ChangeLog b/libdw/ChangeLog
index 6b779e77..bf1f4857 100644
--- a/libdw/ChangeLog
+++ b/libdw/ChangeLog
@@ -1,3 +1,7 @@
+2019-08-12  Mark Wielaard  <mark@klomp.org>
+
+	* libdw.map (ELFUTILS_0.177): Add new version of dwelf_elf_begin.
+
 2019-06-28  Mark Wielaard  <mark@klomp.org>
 
 	* libdw.map (ELFUTILS_0.177): New section. Add
diff --git a/libdw/libdw.map b/libdw/libdw.map
index 2e1c0e9e..decac05c 100644
--- a/libdw/libdw.map
+++ b/libdw/libdw.map
@@ -365,4 +365,8 @@ ELFUTILS_0.175 {
 ELFUTILS_0.177 {
   global:
     dwelf_elf_e_machine_string;
+    # Replaced ELFUTILS_0.175 versions.  Both versions point to the
+    # same implementation, but users of the new symbol version can
+    # presume that NULL is only returned on error (otherwise ELF_K_NONE).
+    dwelf_elf_begin;
 } ELFUTILS_0.175;
diff --git a/libdwelf/ChangeLog b/libdwelf/ChangeLog
index 29f9a509..5b48ed8f 100644
--- a/libdwelf/ChangeLog
+++ b/libdwelf/ChangeLog
@@ -1,3 +1,9 @@
+2019-08-12  Mark Wielaard  <mark@klomp.org>
+
+	* libdwelf.h (dwelf_elf_begin): Update documentation.
+	* dwelf_elf_begin.c (dwelf_elf_begin): Don't suppress ELF_K_NONE.
+	Mark old and new version.
+
 2019-06-28  Mark Wielaard  <mark@klomp.org>
 
 	* Makefile.am (libdwelf_a_SOURCES): Add dwelf_elf_e_machine_string.c.
diff --git a/libdwelf/dwelf_elf_begin.c b/libdwelf/dwelf_elf_begin.c
index 79825338..c7d63a1c 100644
--- a/libdwelf/dwelf_elf_begin.c
+++ b/libdwelf/dwelf_elf_begin.c
@@ -41,13 +41,13 @@ dwelf_elf_begin (int fd)
 {
   Elf *elf = NULL;
   Dwfl_Error e = __libdw_open_elf (fd, &elf);
-  if (elf != NULL && elf_kind (elf) != ELF_K_NONE)
+  if (e == DWFL_E_NOERROR)
     return elf;
 
-  /* Elf wasn't usable.  Make sure there is a proper elf error message.  */
-
-  if (elf != NULL)
-    elf_end (elf);
+  /* Elf wasn't usable.  Make sure there is a proper elf error
+     message.  This is probably not the real error, because there is
+     no good way to propagate errnos or decompression errors, but
+     better than nothing.  */
 
   if (e != DWFL_E_LIBELF)
     {
@@ -60,3 +60,5 @@ dwelf_elf_begin (int fd)
 
   return NULL;
 }
+OLD_VERSION (dwelf_elf_begin, ELFUTILS_0.175)
+NEW_VERSION (dwelf_elf_begin, ELFUTILS_0.177)
diff --git a/libdwelf/libdwelf.h b/libdwelf/libdwelf.h
index cb7ea091..dbb8f08c 100644
--- a/libdwelf/libdwelf.h
+++ b/libdwelf/libdwelf.h
@@ -128,9 +128,12 @@ extern void dwelf_strtab_free (Dwelf_Strtab *st)
 /* Creates a read-only Elf handle from the given file handle.  The
    file may be compressed and/or contain a linux kernel image header,
    in which case it is eagerly decompressed in full and the Elf handle
-   is created as if created with elf_memory ().  On error NULL is
-   returned.  The Elf handle should be closed with elf_end ().  The
-   file handle will not be closed.  Does not return ELF_K_NONE handles.  */
+   is created as if created with elf_memory ().  On decompression or
+   file errors NULL is returned (and elf_errno will be set).  If there
+   was no error, but the file is not an ELF file, then an ELF_K_NONE
+   Elf handle is returned (just like with elf_begin).  The Elf handle
+   should be closed with elf_end ().  The file handle will not be
+   closed.  */
 extern Elf *dwelf_elf_begin (int fd);
 
 /* Returns a human readable string for the given ELF header e_machine
diff --git a/libdwfl/ChangeLog b/libdwfl/ChangeLog
index 8cbe90c9..04a39637 100644
--- a/libdwfl/ChangeLog
+++ b/libdwfl/ChangeLog
@@ -1,3 +1,11 @@
+2019-08-12  Mark Wielaard  <mark@klomp.org>
+
+	* gzip.c (open_stream): Return DWFL_E_ERRNO on bad file operation.
+	* open.c (libdw_open_elf): New argument bad_elf_ok. Check it and
+	return DWFL_E_NOERROR in case it is set and error was DWFL_E_BADELF.
+	(__libdw_open_file): Call libdw_open_elf with bad_elf_ok false.
+	(__libdw_open_elf): Call libdw_open_elf with bad_elf_ok true.
+
 2019-08-05  Omar Sandoval  <osandov@fb.com>
 
 	* dwfl_segment_report_module.c (dwfl_segment_report_module): Assign
diff --git a/libdwfl/gzip.c b/libdwfl/gzip.c
index c2c13baf..043d0b6e 100644
--- a/libdwfl/gzip.c
+++ b/libdwfl/gzip.c
@@ -139,14 +139,14 @@ open_stream (int fd, off_t start_offset, struct unzip_state *state)
 {
     int d = dup (fd);
     if (unlikely (d < 0))
-      return DWFL_E_BADELF;
+      return DWFL_E_ERRNO;
     if (start_offset != 0)
       {
 	off_t off = lseek (d, start_offset, SEEK_SET);
 	if (off != start_offset)
 	  {
 	    close (d);
-	    return DWFL_E_BADELF;
+	    return DWFL_E_ERRNO;
 	  }
       }
     state->zf = gzdopen (d, "r");
@@ -288,6 +288,7 @@ unzip (int fd, off_t start_offset,
   if (result == DWFL_E_NOERROR && gzdirect (state.zf))
     {
       gzclose (state.zf);
+      /* Not a compressed stream after all.  */
       return fail (&state, DWFL_E_BADELF);
     }
 
diff --git a/libdwfl/open.c b/libdwfl/open.c
index 74367359..35fc5283 100644
--- a/libdwfl/open.c
+++ b/libdwfl/open.c
@@ -118,7 +118,7 @@ what_kind (int fd, Elf **elfp, Elf_Kind *kind, bool *may_close_fd)
 
 static Dwfl_Error
 libdw_open_elf (int *fdp, Elf **elfp, bool close_on_fail, bool archive_ok,
-		bool never_close_fd)
+		bool never_close_fd, bool bad_elf_ok)
 {
   bool may_close_fd = false;
 
@@ -164,6 +164,10 @@ libdw_open_elf (int *fdp, Elf **elfp, bool close_on_fail, bool archive_ok,
       && !(archive_ok && kind == ELF_K_AR))
     error = DWFL_E_BADELF;
 
+  /* This basically means, we keep a ELF_K_NONE Elf handle and return it.  */
+  if (bad_elf_ok && error == DWFL_E_BADELF)
+    error = DWFL_E_NOERROR;
+
   if (error != DWFL_E_NOERROR)
     {
       elf_end (elf);
@@ -184,11 +188,11 @@ libdw_open_elf (int *fdp, Elf **elfp, bool close_on_fail, bool archive_ok,
 Dwfl_Error internal_function
 __libdw_open_file (int *fdp, Elf **elfp, bool close_on_fail, bool archive_ok)
 {
-  return libdw_open_elf (fdp, elfp, close_on_fail, archive_ok, false);
+  return libdw_open_elf (fdp, elfp, close_on_fail, archive_ok, false, false);
 }
 
 Dwfl_Error internal_function
 __libdw_open_elf (int fd, Elf **elfp)
 {
-  return libdw_open_elf (&fd, elfp, false, true, true);
+  return libdw_open_elf (&fd, elfp, false, true, true, true);
 }
-- 
2.18.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-08-11 23:38                 ` Mark Wielaard
@ 2019-08-12  8:14                   ` Florian Weimer
  2019-08-12 15:18                     ` Mark Wielaard
  0 siblings, 1 reply; 36+ messages in thread
From: Florian Weimer @ 2019-08-12  8:14 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: elfutils-devel, Panu Matilainen

* Mark Wielaard:

> What do you think about this change to dwelf_elf_begin?
> The change would make it possible to detect real errors in the
> elfclassify code, whether elf_begin or dwelf_elf_begin was used. So we
> would not misclassify files (but return an error status of 2).

I'm not really familiar with how these functions are used, sorry,
Viewed in isolation, the changes appear reasonable to me.

Thanks,
Florian

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-08-12  8:14                   ` Florian Weimer
@ 2019-08-12 15:18                     ` Mark Wielaard
  0 siblings, 0 replies; 36+ messages in thread
From: Mark Wielaard @ 2019-08-12 15:18 UTC (permalink / raw)
  To: Florian Weimer; +Cc: elfutils-devel, Panu Matilainen

On Mon, Aug 12, 2019 at 10:14:20AM +0200, Florian Weimer wrote:
> * Mark Wielaard:
> 
> > What do you think about this change to dwelf_elf_begin?
> > The change would make it possible to detect real errors in the
> > elfclassify code, whether elf_begin or dwelf_elf_begin was used. So we
> > would not misclassify files (but return an error status of 2).
> 
> I'm not really familiar with how these functions are used, sorry,

I think you are selling yourself short. You did spot the issue with
using them. And that using them as "advertised" could cause
problems. Thanks for that.

> Viewed in isolation, the changes appear reasonable to me.

Thanks. That certainly is helpful feedback.

Unless there are objections or questions from other I intend to
check this into master tomorrow.

Cheers,

Mark

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-07-29 14:38                 ` Florian Weimer
@ 2019-08-13  9:44                   ` Mark Wielaard
  2019-08-13 11:42                     ` Mark Wielaard
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Wielaard @ 2019-08-13  9:44 UTC (permalink / raw)
  To: Florian Weimer; +Cc: elfutils-devel, Panu Matilainen

On Mon, Jul 29, 2019 at 04:38:17PM +0200, Florian Weimer wrote:
> > On Mon, Jul 29, 2019 at 11:16:31AM +0200, Florian Weimer wrote:
> > So the test should be:
> >
> > diff --git a/src/elfclassify.c b/src/elfclassify.c
> > index ebd42c1d5..b17d14d45 100644
> > --- a/src/elfclassify.c
> > +++ b/src/elfclassify.c
> > @@ -862,7 +862,7 @@ process_stdin (int *status)
> >          break;
> >        if (ret < 0)
> >          abort ();           /* Cannot happen due to error checks above.  */
> > -      if (delim != '\0' && ret > 0)
> > +      if (delim != '\0' && ret > 0 && buffer[ret - 1] == '\n')
> >          buffer[ret - 1] = '\0';
> >        current_path = buffer;
> >        process_current_path (status);
> 
> Right.  But now I wonder why ret == 0 can ever happen.  Maybe on
> OpenVMS, which doesn't use in-band signaling for line terminators?

I also couldn't create a situation where ret == 0.
But I still included the change because it feels more robust.

Thanks,

Mark

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] elfclassify tool
  2019-08-13  9:44                   ` Mark Wielaard
@ 2019-08-13 11:42                     ` Mark Wielaard
  2019-08-14 20:46                       ` [PATCH] config/elfutils.spec.in: package eu-elfclassify Dmitry V. Levin
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Wielaard @ 2019-08-13 11:42 UTC (permalink / raw)
  To: Florian Weimer; +Cc: elfutils-devel, Panu Matilainen

[-- Attachment #1: Type: text/plain, Size: 282 bytes --]

Hi,

With the dwelf_elf_begin fix now committed I intend to commit the
elfclassify tool as attached. It now treats a NULL return from
[dwelf_]elf_begin always as error. It has the new --library
classification and various fixes that were pointed out during the
review.

Cheers,

Mark

[-- Attachment #2: 0001-elfclassify-New-tool-to-analyze-ELF-objects.patch --]
[-- Type: text/x-diff, Size: 53306 bytes --]

From 3f489b5c7c78df6d52f8982f79c36e9a220e8951 Mon Sep 17 00:00:00 2001
From: Florian Weimer <fweimer@redhat.com>
Date: Tue, 13 Aug 2019 13:27:15 +0200
Subject: [PATCH] elfclassify: New tool to analyze ELF objects.

Usage: elfclassify [OPTION...] FILE...
Determine the type of an ELF file.

All of the classification options must apply at the same time to a
particular file.  Classification options can be negated using a
"--not-" prefix.

Since modern ELF does not clearly distinguish between programs and
dynamic shared objects, you should normally use either --executable or
--shared to identify the primary purpose of a file.  Only one of the
--shared and --executable checks can pass for a file.

If you want to know whether an ELF object might a program or a shared
library (but could be both), then use --program or --library. Some ELF
files will classify as both a program and a library.

If you just want to know whether an ELF file is loadable (as program
or library) use --loadable.  Note that files that only contain
(separate) debug information (--debug-only) are never --loadable (even
though they might contain program headers).  Linux kernel modules are
also not --loadable (in the normal sense).

Without any of the --print options, the program exits with status 0 if
the requested checks pass for all input files, with 1 if a check fails
for any file, and 2 if there is an environmental issue (such as a file
read error or a memory allocation error).

When printing file names, the program exits with status 0 even if no
file names are printed, and exits with status 2 if there is an
environmental issue.

On usage error (e.g. a bad option was given), the program exits with a
status code larger than 2.

The --quiet or -q oose_filestion suppresses some error warning output,
but doesn't change the exit status.

Classification options
  --core                 File is an ELF core dump file
  --debug-only           File is a debug only ELF file (separate .debug,
                         .dwo or dwz multi-file)
  --elf                  File looks like an ELF object or archive/static
                         library (default)
  --elf-archive          File is an ELF archive or static library
  --elf-file             File is an regular ELF object (not an
                         archive/static library)
  --executable           File is (primarily) an ELF program executable (not
                         primarily a DSO)
  --library              File is an ELF shared object (DSO) (might also be
                         an executable)
  --linux-kernel-module  File is a linux kernel module
  --loadable             File is a loadable ELF object (program or shared
                         object)
  --program              File is an ELF program executable (might also be a
                         DSO)
  --shared               File is (primarily) an ELF shared object (DSO)
                         (not primarily an executable)
  --unstripped           File is an ELF file with symbol table or .debug_*
                         sections and can be stripped further

Input flags
  -f, --file             Only classify regular (not symlink nor special
                         device) files
  --no-stdin             Do not read files from standard input (default)
  --stdin                Also read file names to process from standard
                         input, separated by newlines
  --stdin0               Also read file names to process from standard
                         input, separated by ASCII NUL bytes
  -z, --compressed       Try to open compressed files or embedded (kernel)
                         ELF images

Output flags
  --matching             If printing file names, print matching files
                         (default)
  --no-print             Do not output file names
  --not-matching         If printing file names, print files that do not
                         match
  --print                Output names of files, separated by newline
  --print0               Output names of files, separated by ASCII NUL

Additional flags
  -q, --quiet            Suppress some error output (counterpart to
                         --verbose)
  -v, --verbose          Output additional information (can be specified
                         multiple times)

  -?, --help             Give this help list
      --usage            Give a short usage message
  -V, --version          Print program version

Report bugs to https://sourceware.org/bugzilla.

Signed-off-by: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Mark Wielaard <mark@klomp.org>
---
 src/ChangeLog                 |    9 +-
 src/Makefile.am               |    4 +-
 src/elfclassify.c             | 1046 +++++++++++++++++++++++++++++++++
 tests/ChangeLog               |    9 +
 tests/Makefile.am             |    7 +-
 tests/run-elfclassify-self.sh |   36 ++
 tests/run-elfclassify.sh      |  327 +++++++++++
 7 files changed, 1433 insertions(+), 5 deletions(-)
 create mode 100644 src/elfclassify.c
 create mode 100755 tests/run-elfclassify-self.sh
 create mode 100755 tests/run-elfclassify.sh

diff --git a/src/ChangeLog b/src/ChangeLog
index 911ad26f8..c2102fcda 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,4 +1,11 @@
-2019-07-13 Mao Han <han_mao@c-sky.com>
+2019-07-26  Florian Weimer  <fweimer@redhat.com>
+	    Mark Wielaard  <mark@klomp.org>
+
+	* Makefile.am (bin_PROGRAMS): Add elfclassify.
+	(elfclassify_LDADD): New variable.
+	* elfclassify.c: New tool.
+
+2019-07-13  Mao Han  <han_mao@c-sky.com>
 
 	* elflint.c: Add C-SKY.
 
diff --git a/src/Makefile.am b/src/Makefile.am
index 2b1c0dcbc..69ac4dbe0 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -26,7 +26,8 @@ AM_CPPFLAGS += -I$(srcdir)/../libelf -I$(srcdir)/../libebl \
 AM_LDFLAGS = -Wl,-rpath-link,../libelf:../libdw
 
 bin_PROGRAMS = readelf nm size strip elflint findtextrel addr2line \
-	       elfcmp objdump ranlib strings ar unstrip stack elfcompress
+	       elfcmp objdump ranlib strings ar unstrip stack elfcompress \
+	       elfclassify
 
 noinst_LIBRARIES = libar.a
 
@@ -83,6 +84,7 @@ ar_LDADD = libar.a $(libelf) $(libeu) $(argp_LDADD)
 unstrip_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD) -ldl
 stack_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD) -ldl $(demanglelib)
 elfcompress_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD)
+elfclassify_LDADD = $(libelf) $(libdw) $(libeu) $(argp_LDADD)
 
 installcheck-binPROGRAMS: $(bin_PROGRAMS)
 	bad=0; pid=$$$$; list="$(bin_PROGRAMS)"; for p in $$list; do \
diff --git a/src/elfclassify.c b/src/elfclassify.c
new file mode 100644
index 000000000..535cc49fc
--- /dev/null
+++ b/src/elfclassify.c
@@ -0,0 +1,1046 @@
+/* Classification of ELF files.
+   Copyright (C) 2019 Red Hat, Inc.
+   This file is part of elfutils.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#include <argp.h>
+#include <error.h>
+#include <fcntl.h>
+#include <gelf.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include ELFUTILS_HEADER(elf)
+#include ELFUTILS_HEADER(dwelf)
+#include "printversion.h"
+
+/* Name and version of program.  */
+ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
+
+/* Bug report address.  */
+ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
+
+/* Set by parse_opt.  */
+static int verbose;
+
+/* Set by the main function.  */
+static const char *current_path;
+
+/* Set by open_file.  */
+static int file_fd = -1;
+
+/* Set by issue or elf_issue.  */
+static bool issue_found;
+
+/* Non-fatal issue occured while processing the current_path.  */
+static void
+issue (int e, const char *msg)
+{
+  if (verbose >= 0)
+    {
+      if (current_path == NULL)
+	error (0, e, "%s", msg);
+      else
+	error (0, e, "%s '%s'", msg, current_path);
+    }
+  issue_found = true;
+}
+
+/* Non-fatal issue occured while processing the current ELF.  */
+static void
+elf_issue (const char *msg)
+{
+  if (verbose >= 0)
+    error (0, 0, "%s: %s: '%s'", msg, elf_errmsg (-1), current_path);
+  issue_found = true;
+}
+
+/* Set by parse_opt.  */
+static bool flag_only_regular_files;
+
+static bool
+open_file (void)
+{
+  if (verbose > 1)
+    fprintf (stderr, "debug: processing file: %s\n", current_path);
+
+  file_fd = open (current_path, O_RDONLY | (flag_only_regular_files
+					    ? O_NOFOLLOW : 0));
+  if (file_fd < 0)
+    {
+      if (!flag_only_regular_files || errno != ELOOP)
+	issue (errno, N_("opening"));
+      return false;
+    }
+
+  struct stat st;
+  if (fstat (file_fd, &st) != 0)
+    {
+      issue (errno, N_("reading"));
+      return false;
+    }
+
+  /* Don't even bother with directories.  */
+  if (S_ISDIR (st.st_mode)
+      || (flag_only_regular_files && !S_ISREG (st.st_mode)))
+    return false;
+
+  return true;
+}
+
+static void
+close_file (void)
+{
+  if (file_fd >= 0)
+    {
+      close (file_fd);
+      file_fd = -1;
+    }
+}
+
+/* Set by open_elf.  */
+static Elf *elf;
+
+/* Set by parse_opt.  */
+static bool flag_compressed;
+
+static bool
+open_elf (void)
+{
+  if (!open_file ())
+    {
+      /* Make sure the file descriptor is gone.  */
+      close_file ();
+      return false;
+    }
+
+  if (flag_compressed)
+    elf = dwelf_elf_begin (file_fd);
+  else
+    elf = elf_begin (file_fd, ELF_C_READ, NULL);
+
+  if (elf == NULL)
+    {
+      elf_issue ("opening ELF file");
+      close_file ();
+      return false;
+    }
+
+  return true;
+}
+
+static void
+close_elf (void)
+{
+  if (elf != NULL)
+    {
+      elf_end (elf);
+      elf = NULL;
+    }
+
+  close_file ();
+}
+
+static const char *
+elf_kind_string (int kind)
+{
+  switch (kind)
+    {
+    case ELF_K_NONE:
+      return "ELF_K_NONE";
+    case ELF_K_AR:
+      return "ELF_K_AR";
+    case ELF_K_COFF:
+      return "ELF_K_COFF"; /* libelf doesn't really support this.  */
+    case ELF_K_ELF:
+      return "ELF_K_ELF";
+    default:
+      return "<unknown>";
+    }
+}
+
+static const char *
+elf_type_string (int type)
+{
+  switch (type)
+    {
+    case ET_NONE:
+      return "ET_NONE";
+    case ET_REL:
+      return "ET_REL";
+    case ET_EXEC:
+      return "ET_EXEC";
+    case ET_DYN:
+      return "ET_DYN";
+    case ET_CORE:
+      return "ET_CORE";
+    default:
+      return "<unknown>";
+    }
+}
+
+static int elf_type;
+static bool has_program_load;
+static bool has_sections;
+static bool has_bits_alloc;
+static bool has_program_interpreter;
+static bool has_dynamic;
+static bool has_soname;
+static bool has_pie_flag;
+static bool has_dt_debug;
+static bool has_symtab;
+static bool has_debug_sections;
+static bool has_modinfo;
+static bool has_gnu_linkonce_this_module;
+
+static bool
+run_classify (void)
+{
+  /* Reset to unanalyzed default.  */
+  elf_type = 0;
+  has_program_load = false;
+  has_sections = false;
+  has_bits_alloc = false;
+  has_program_interpreter = false;
+  has_dynamic = false;
+  has_soname = false;
+  has_pie_flag = false;
+  has_dt_debug = false;
+  has_symtab = false;
+  has_debug_sections = false;
+  has_modinfo = false;
+  has_gnu_linkonce_this_module = false;
+
+  int kind = elf_kind (elf);
+  if (verbose > 0)
+    fprintf (stderr, "info: %s: ELF kind: %s (0x%x)\n", current_path,
+	     elf_kind_string (kind), kind);
+  if (kind != ELF_K_ELF)
+    return true;
+
+  GElf_Ehdr ehdr_storage;
+  GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_storage);
+  if (ehdr == NULL)
+    {
+      elf_issue (N_("ELF header"));
+      return false;
+    }
+  elf_type = ehdr->e_type;
+
+  /* Examine program headers.  */
+  GElf_Phdr dyn_seg = { .p_type = 0 };
+  {
+    size_t nphdrs;
+    if (elf_getphdrnum (elf, &nphdrs) != 0)
+      {
+	elf_issue (N_("program headers"));
+	return false;
+      }
+    for (size_t phdr_idx = 0; phdr_idx < nphdrs; ++phdr_idx)
+      {
+	GElf_Phdr phdr_storage;
+	GElf_Phdr *phdr = gelf_getphdr (elf, phdr_idx, &phdr_storage);
+	if (phdr == NULL)
+	  {
+	    elf_issue (N_("program header"));
+	    return false;
+	  }
+	if (phdr->p_type == PT_DYNAMIC)
+	  {
+	    dyn_seg = *phdr;
+	    has_dynamic = true;
+	  }
+	if (phdr->p_type == PT_INTERP)
+	  has_program_interpreter = true;
+	if (phdr->p_type == PT_LOAD)
+	  has_program_load = true;
+      }
+  }
+
+  /* Do we have sections?  */
+  {
+    size_t nshdrs;
+    if (elf_getshdrnum (elf, &nshdrs) != 0)
+      {
+	elf_issue (N_("section headers"));
+	return false;
+      }
+    if (nshdrs > 0)
+      has_sections = true;
+  }
+
+  {
+    size_t shstrndx;
+    if (unlikely (elf_getshdrstrndx (elf, &shstrndx) < 0))
+      {
+	elf_issue (N_("section header string table index"));
+	return false;
+      }
+
+    Elf_Scn *scn = NULL;
+    while (true)
+      {
+        scn = elf_nextscn (elf, scn);
+        if (scn == NULL)
+          break;
+        GElf_Shdr shdr_storage;
+        GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
+        if (shdr == NULL)
+	  {
+            elf_issue (N_("could not obtain section header"));
+	    return false;
+	  }
+        const char *section_name = elf_strptr (elf, shstrndx, shdr->sh_name);
+        if (section_name == NULL)
+	  {
+            elf_issue(N_("could not obtain section name"));
+	    return false;
+	  }
+        if (verbose > 2)
+          fprintf (stderr, "debug: section header %s (type %d) found\n",
+                   section_name, shdr->sh_type);
+        if (shdr->sh_type == SHT_SYMTAB)
+          {
+            if (verbose > 1)
+              fputs ("debug: symtab section found\n", stderr);
+            has_symtab = true;
+          }
+	/* NOBITS and NOTE sections can be in any file.  We want to be
+	   sure there is at least one other allocated section.  */
+	if (shdr->sh_type != SHT_NOBITS
+	    && shdr->sh_type != SHT_NOTE
+	    && (shdr->sh_flags & SHF_ALLOC) != 0)
+	  {
+	    if (verbose > 1 && !has_bits_alloc)
+	      fputs ("debug: allocated (non-nobits/note) section found\n",
+		     stderr);
+	    has_bits_alloc = true;
+	  }
+        const char *debug_prefix = ".debug_";
+        const char *zdebug_prefix = ".zdebug_";
+        if (strncmp (section_name, debug_prefix, strlen (debug_prefix)) == 0
+	    || strncmp (section_name, zdebug_prefix,
+			strlen (zdebug_prefix)) == 0)
+          {
+            if (verbose > 1 && !has_debug_sections)
+              fputs ("debug: .debug_* section found\n", stderr);
+            has_debug_sections = true;
+          }
+	if (strcmp (section_name, ".modinfo") == 0)
+	  {
+	    if (verbose > 1)
+	      fputs ("debug: .modinfo section found\n", stderr);
+	    has_modinfo = true;
+	  }
+	if (strcmp (section_name, ".gnu.linkonce.this_module") == 0)
+	  {
+	    if (verbose > 1)
+	      fputs ("debug: .gnu.linkonce.this_module section found\n",
+		     stderr);
+	    has_gnu_linkonce_this_module = true;
+	  }
+      }
+  }
+
+  /* Examine the dynamic section.  */
+  if (has_dynamic)
+    {
+      Elf_Data *data = elf_getdata_rawchunk (elf, dyn_seg.p_offset,
+					     dyn_seg.p_filesz,
+					     ELF_T_DYN);
+      if (data != NULL)
+	for (int dyn_idx = 0; ; ++dyn_idx)
+	  {
+	    GElf_Dyn dyn_storage;
+	    GElf_Dyn *dyn = gelf_getdyn (data, dyn_idx, &dyn_storage);
+	    if (dyn == NULL)
+	      break;
+	    if (verbose > 2)
+	      fprintf (stderr, "debug: dynamic entry %d"
+		       " with tag %llu found\n",
+		       dyn_idx, (unsigned long long int) dyn->d_tag);
+	    if (dyn->d_tag == DT_SONAME)
+	      has_soname = true;
+	    if (dyn->d_tag == DT_FLAGS_1 && (dyn->d_un.d_val & DF_1_PIE))
+	      has_pie_flag = true;
+	    if (dyn->d_tag == DT_DEBUG)
+	      has_dt_debug = true;
+	    if (dyn->d_tag == DT_NULL)
+	      break;
+	  }
+    }
+
+  if (verbose > 0)
+    {
+      fprintf (stderr, "info: %s: ELF type: %s (0x%x)\n", current_path,
+	       elf_type_string (elf_type), elf_type);
+      if (has_program_load)
+        fprintf (stderr, "info: %s: PT_LOAD found\n", current_path);
+      if (has_sections)
+	fprintf (stderr, "info: %s: has sections\n", current_path);
+      if (has_bits_alloc)
+	fprintf (stderr, "info: %s: allocated (real) section found\n",
+		 current_path);
+      if (has_program_interpreter)
+        fprintf (stderr, "info: %s: program interpreter found\n",
+                 current_path);
+      if (has_dynamic)
+        fprintf (stderr, "info: %s: dynamic segment found\n", current_path);
+      if (has_soname)
+        fprintf (stderr, "info: %s: soname found\n", current_path);
+      if (has_pie_flag)
+        fprintf (stderr, "info: %s: DF_1_PIE flag found\n", current_path);
+      if (has_dt_debug)
+        fprintf (stderr, "info: %s: DT_DEBUG found\n", current_path);
+      if (has_symtab)
+        fprintf (stderr, "info: %s: symbol table found\n", current_path);
+      if (has_debug_sections)
+        fprintf (stderr, "info: %s: .debug_* section found\n", current_path);
+      if (has_modinfo)
+        fprintf (stderr, "info: %s: .modinfo section found\n", current_path);
+      if (has_gnu_linkonce_this_module)
+        fprintf (stderr,
+		 "info: %s: .gnu.linkonce.this_module section found\n",
+		 current_path);
+    }
+
+  return true;
+}
+
+static bool
+is_elf (void)
+{
+  return elf_kind (elf) != ELF_K_NONE;
+}
+
+static bool
+is_elf_file (void)
+{
+  return elf_kind (elf) == ELF_K_ELF;
+}
+
+static bool
+is_elf_archive (void)
+{
+  return elf_kind (elf) == ELF_K_AR;
+}
+
+static bool
+is_core (void)
+{
+  return elf_kind (elf) == ELF_K_ELF && elf_type == ET_CORE;
+}
+
+/* Return true if the file is a loadable object, which basically means
+   it is an ELF file, but not a relocatable object or a core dump
+   file.  (The kernel and various userspace components can load ET_REL
+   files, but we disregard that for our classification purposes.)  */
+static bool
+is_loadable (void)
+{
+  return elf_kind (elf) == ELF_K_ELF
+    && (elf_type == ET_EXEC || elf_type == ET_DYN)
+    && has_program_load
+    && (!has_sections || has_bits_alloc); /* It isn't debug-only.  */
+}
+
+/* Return true if the file is an ELF file which has a symbol table or
+   .debug_* sections (and thus can be stripped futher).  */
+static bool
+is_unstripped (void)
+{
+  return elf_kind (elf) != ELF_K_NONE
+    && (elf_type == ET_REL || elf_type == ET_EXEC || elf_type == ET_DYN)
+    && (has_symtab || has_debug_sections);
+}
+
+/* Return true if the file contains only debuginfo, but no loadable
+   program bits.  Then it is most likely a separate .debug file, a dwz
+   multi-file or a .dwo file.  Note that it can still be loadable,
+   but in that case the phdrs shouldn't be trusted.  */
+static bool
+is_debug_only (void)
+{
+  return elf_kind (elf) != ELF_K_NONE
+    && (elf_type == ET_REL || elf_type == ET_EXEC || elf_type == ET_DYN)
+    && (has_debug_sections || has_symtab)
+    && !has_bits_alloc;
+}
+
+static bool
+is_shared (void)
+{
+  if (!is_loadable ())
+    return false;
+
+  /* The ELF type is very clear: this is an executable.  */
+  if (elf_type == ET_EXEC)
+    return false;
+
+  /* If there is no dynamic section, the file cannot be loaded as a
+     shared object.  */
+  if (!has_dynamic)
+    return false;
+
+  /* If the object is marked as PIE, it is definitely an executable,
+     and not a loadlable shared object.  */
+  if (has_pie_flag)
+    return false;
+
+  /* Treat a DT_SONAME tag as a strong indicator that this is a shared
+     object.  */
+  if (has_soname)
+    return true;
+
+  /* This is probably a PIE program: there is no soname, but a program
+     interpreter.  In theory, this file could be also a DSO with a
+     soname implied by its file name that can be run as a program.
+     This situation is impossible to resolve in the general case. */
+  if (has_program_interpreter)
+    return false;
+
+  /* Roland McGrath mentions in
+     <https://www.sourceware.org/ml/libc-alpha/2015-03/msg00605.html>,
+     that “we defined a PIE as an ET_DYN with a DT_DEBUG”.  This
+     matches current binutils behavior (version 2.32).  DT_DEBUG is
+     added if bfd_link_executable returns true or if bfd_link_pic
+     returns false, depending on the architectures.  However, DT_DEBUG
+     is not documented as being specific to executables, therefore use
+     it only as a low-priority discriminator.  */
+  if (has_dt_debug)
+    return false;
+
+  return true;
+}
+
+static bool
+is_executable (void)
+{
+  if (!is_loadable ())
+    return false;
+
+  /* A loadable object which is not a shared object is treated as an
+     executable.  */
+  return !is_shared ();
+}
+
+/* Like is_executable, but the object can also be a shared library at
+   the same time.  */
+static bool
+is_program (void)
+{
+  if (!is_loadable ())
+    return false;
+
+  /* The ELF type is very clear: this is an executable.  */
+  if (elf_type == ET_EXEC)
+    return true;
+
+  /* If the object is marked as PIE, it is definitely an executable,
+     and not a loadlable shared object.  */
+  if (has_pie_flag)
+    return true;
+
+  /* This is probably a PIE program. It isn't ET_EXEC, but has a
+     program interpreter. In theory, this file could be also a DSO
+     with a soname. This situation is impossible to resolve in the
+     general case. See is_shared. This is different from
+     is_executable.  */
+  if (has_program_interpreter)
+    return true;
+
+  /* Roland McGrath mentions in
+     <https://www.sourceware.org/ml/libc-alpha/2015-03/msg00605.html>,
+     that “we defined a PIE as an ET_DYN with a DT_DEBUG”.  This
+     matches current binutils behavior (version 2.32).  DT_DEBUG is
+     added if bfd_link_executable returns true or if bfd_link_pic
+     returns false, depending on the architectures.  However, DT_DEBUG
+     is not documented as being specific to executables, therefore use
+     it only as a low-priority discriminator.  */
+  if (has_dt_debug)
+    return true;
+
+  return false;
+}
+
+/* Like is_shared but the library could also be an executable.  */
+static bool
+is_library  (void)
+{
+  /* Only ET_DYN can be shared libraries.  */
+  if (elf_type != ET_DYN)
+    return false;
+
+  if (!is_loadable ())
+    return false;
+
+  /* Without a PT_DYNAMIC segment the library cannot be loaded.  */
+  if (!has_dynamic)
+    return false;
+
+  /* This really is a (PIE) executable.  See is_shared.  */
+  if (has_pie_flag || has_dt_debug)
+    return false;
+
+  /* It could still (also) be a (PIE) executable, but most likely you
+     can dlopen it just fine.  */
+  return true;
+}
+
+/* Returns true if the file is a linux kernel module (is ET_REL and
+   has the two magic sections .modinfo and .gnu.linkonce.this_module).  */
+static bool
+is_linux_kernel_module (void)
+{
+  return (elf_kind (elf) == ELF_K_ELF
+	  && elf_type == ET_REL
+	  && has_modinfo
+	  && has_gnu_linkonce_this_module);
+}
+
+enum classify_requirement { do_not_care, required, forbidden };
+
+enum classify_check
+{
+  classify_elf,
+  classify_elf_file,
+  classify_elf_archive,
+  classify_core,
+  classify_unstripped,
+  classify_executable,
+  classify_program,
+  classify_shared,
+  classify_library,
+  classify_linux_kernel_module,
+  classify_debug_only,
+  classify_loadable,
+
+  classify_check_last = classify_loadable
+};
+
+enum
+{
+  classify_check_offset = 1000,
+  classify_check_not_offset = 2000,
+
+  classify_flag_stdin = 3000,
+  classify_flag_stdin0,
+  classify_flag_no_stdin,
+  classify_flag_print,
+  classify_flag_print0,
+  classify_flag_no_print,
+  classify_flag_matching,
+  classify_flag_not_matching,
+};
+
+static bool
+classify_check_positive (int key)
+{
+  return key >= classify_check_offset
+    && key <= classify_check_offset + classify_check_last;
+}
+
+static bool
+classify_check_negative (int key)
+{
+  return key >= classify_check_not_offset
+    && key <= classify_check_not_offset + classify_check_last;
+}
+
+/* Set by parse_opt.  */
+static enum classify_requirement requirements[classify_check_last + 1];
+static enum { no_stdin, do_stdin, do_stdin0 } flag_stdin;
+static enum { no_print, do_print, do_print0 } flag_print;
+static bool flag_print_matching = true;
+
+static error_t
+parse_opt (int key, char *arg __attribute__ ((unused)),
+           struct argp_state *state __attribute__ ((unused)))
+{
+  if (classify_check_positive (key))
+    requirements[key - classify_check_offset] = required;
+  else if (classify_check_negative (key))
+    requirements[key - classify_check_not_offset] = forbidden;
+  else
+    switch (key)
+      {
+      case 'v':
+        ++verbose;
+        break;
+
+      case 'q':
+	--verbose;
+	break;
+
+      case 'z':
+	flag_compressed = true;
+	break;
+
+      case 'f':
+	flag_only_regular_files = true;
+	break;
+
+      case classify_flag_stdin:
+        flag_stdin = do_stdin;
+        break;
+
+      case classify_flag_stdin0:
+        flag_stdin = do_stdin0;
+        break;
+
+      case classify_flag_no_stdin:
+        flag_stdin = no_stdin;
+        break;
+
+      case classify_flag_print:
+        flag_print = do_print;
+        break;
+
+      case classify_flag_print0:
+        flag_print = do_print0;
+        break;
+
+      case classify_flag_no_print:
+        flag_print = no_print;
+        break;
+
+      case classify_flag_matching:
+        flag_print_matching = true;
+        break;
+
+      case classify_flag_not_matching:
+        flag_print_matching = false;
+        break;
+
+      default:
+        return ARGP_ERR_UNKNOWN;
+      }
+
+  return 0;
+}
+
+/* Perform requested checks against the file at current_path.  If
+   necessary, sets *STATUS to 1 if checks failed.  */
+static void
+process_current_path (int *status)
+{
+  bool checks_passed = true;
+
+  if (open_elf () && run_classify ())
+    {
+      bool checks[] =
+        {
+	 [classify_elf] = is_elf (),
+	 [classify_elf_file] = is_elf_file (),
+	 [classify_elf_archive] = is_elf_archive (),
+	 [classify_core] = is_core (),
+	 [classify_unstripped] = is_unstripped (),
+	 [classify_executable] = is_executable (),
+	 [classify_program] = is_program (),
+	 [classify_shared] = is_shared (),
+	 [classify_library] = is_library (),
+	 [classify_linux_kernel_module] = is_linux_kernel_module (),
+	 [classify_debug_only] = is_debug_only (),
+	 [classify_loadable] = is_loadable (),
+	};
+
+      if (verbose > 1)
+        {
+	  if (checks[classify_elf])
+	    fprintf (stderr, "debug: %s: elf\n", current_path);
+	  if (checks[classify_elf_file])
+	    fprintf (stderr, "debug: %s: elf_file\n", current_path);
+	  if (checks[classify_elf_archive])
+	    fprintf (stderr, "debug: %s: elf_archive\n", current_path);
+	  if (checks[classify_core])
+	    fprintf (stderr, "debug: %s: core\n", current_path);
+          if (checks[classify_unstripped])
+            fprintf (stderr, "debug: %s: unstripped\n", current_path);
+          if (checks[classify_executable])
+            fprintf (stderr, "debug: %s: executable\n", current_path);
+          if (checks[classify_program])
+            fprintf (stderr, "debug: %s: program\n", current_path);
+          if (checks[classify_shared])
+            fprintf (stderr, "debug: %s: shared\n", current_path);
+          if (checks[classify_library])
+            fprintf (stderr, "debug: %s: library\n", current_path);
+	  if (checks[classify_linux_kernel_module])
+	    fprintf (stderr, "debug: %s: linux kernel module\n", current_path);
+	  if (checks[classify_debug_only])
+	    fprintf (stderr, "debug: %s: debug-only\n", current_path);
+          if (checks[classify_loadable])
+            fprintf (stderr, "debug: %s: loadable\n", current_path);
+        }
+
+      for (enum classify_check check = 0;
+           check <= classify_check_last; ++check)
+        switch (requirements[check])
+          {
+          case required:
+            if (!checks[check])
+              checks_passed = false;
+            break;
+          case forbidden:
+            if (checks[check])
+              checks_passed = false;
+            break;
+          case do_not_care:
+            break;
+          }
+    }
+  else if (file_fd == -1)
+    checks_passed = false; /* There is nothing to check, bad file.  */
+  else
+    {
+      for (enum classify_check check = 0;
+           check <= classify_check_last; ++check)
+        if (requirements[check] == required)
+          checks_passed = false;
+    }
+
+  close_elf ();
+
+  switch (flag_print)
+    {
+    case do_print:
+      if (checks_passed == flag_print_matching)
+        puts (current_path);
+      break;
+    case do_print0:
+      if (checks_passed == flag_print_matching)
+        fwrite (current_path, strlen (current_path) + 1, 1, stdout);
+      break;
+    case no_print:
+      if (!checks_passed)
+        *status = 1;
+      break;
+    }
+}
+
+/* Called to process standard input if flag_stdin is not no_stdin.  */
+static void
+process_stdin (int *status)
+{
+  char delim;
+  if (flag_stdin == do_stdin0)
+    delim = '\0';
+  else
+    delim = '\n';
+
+  char *buffer = NULL;
+  size_t buffer_size = 0;
+  while (true)
+    {
+      ssize_t ret = getdelim (&buffer, &buffer_size, delim, stdin);
+      if (ferror (stdin))
+	{
+	  current_path = NULL;
+	  issue (errno, N_("reading from standard input"));
+	  break;
+	}
+      if (feof (stdin))
+        break;
+      if (ret < 0)
+        abort ();           /* Cannot happen due to error checks above.  */
+      if (delim != '\0' && ret > 0 && buffer[ret - 1] == '\n')
+        buffer[ret - 1] = '\0';
+      current_path = buffer;
+      process_current_path (status);
+    }
+
+  free (buffer);
+}
+
+int
+main (int argc, char **argv)
+{
+  const struct argp_option options[] =
+    {
+      { NULL, 0, NULL, OPTION_DOC, N_("Classification options"), 1 },
+      { "elf", classify_check_offset + classify_elf, NULL, 0,
+        N_("File looks like an ELF object or archive/static library (default)")
+	, 1 },
+      { "elf-file", classify_check_offset + classify_elf_file, NULL, 0,
+        N_("File is an regular ELF object (not an archive/static library)")
+	, 1 },
+      { "elf-archive", classify_check_offset + classify_elf_archive, NULL, 0,
+        N_("File is an ELF archive or static library")
+	, 1 },
+      { "core", classify_check_offset + classify_core, NULL, 0,
+        N_("File is an ELF core dump file")
+	, 1 },
+      { "unstripped", classify_check_offset + classify_unstripped, NULL, 0,
+        N_("File is an ELF file with symbol table or .debug_* sections \
+and can be stripped further"), 1 },
+      { "executable", classify_check_offset + classify_executable, NULL, 0,
+        N_("File is (primarily) an ELF program executable \
+(not primarily a DSO)"), 1 },
+      { "program", classify_check_offset + classify_program, NULL, 0,
+        N_("File is an ELF program executable \
+(might also be a DSO)"), 1 },
+      { "shared", classify_check_offset + classify_shared, NULL, 0,
+        N_("File is (primarily) an ELF shared object (DSO) \
+(not primarily an executable)"), 1 },
+      { "library", classify_check_offset + classify_library, NULL, 0,
+        N_("File is an ELF shared object (DSO) \
+(might also be an executable)"), 1 },
+      { "linux-kernel-module", (classify_check_offset
+				+ classify_linux_kernel_module), NULL, 0,
+        N_("File is a linux kernel module"), 1 },
+      { "debug-only", (classify_check_offset + classify_debug_only), NULL, 0,
+        N_("File is a debug only ELF file \
+(separate .debug, .dwo or dwz multi-file)"), 1 },
+      { "loadable", classify_check_offset + classify_loadable, NULL, 0,
+        N_("File is a loadable ELF object (program or shared object)"), 1 },
+
+      /* Negated versions of the above.  */
+      { "not-elf", classify_check_not_offset + classify_elf,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-elf-file", classify_check_not_offset + classify_elf_file,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-elf-archive", classify_check_not_offset + classify_elf_archive,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-core", classify_check_not_offset + classify_core,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-unstripped", classify_check_not_offset + classify_unstripped,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-executable", classify_check_not_offset + classify_executable,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-program", classify_check_not_offset + classify_program,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-shared", classify_check_not_offset + classify_shared,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-library", classify_check_not_offset + classify_library,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-linux-kernel-module", (classify_check_not_offset
+				    + classify_linux_kernel_module),
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-debug-only", (classify_check_not_offset + classify_debug_only),
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-loadable", classify_check_not_offset + classify_loadable,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+
+      { NULL, 0, NULL, OPTION_DOC, N_("Input flags"), 2 },
+      { "file", 'f', NULL, 0,
+        N_("Only classify regular (not symlink nor special device) files"), 2 },
+      { "stdin", classify_flag_stdin, NULL, 0,
+        N_("Also read file names to process from standard input, \
+separated by newlines"), 2 },
+      { "stdin0", classify_flag_stdin0, NULL, 0,
+        N_("Also read file names to process from standard input, \
+separated by ASCII NUL bytes"), 2 },
+      { "no-stdin", classify_flag_stdin, NULL, 0,
+        N_("Do not read files from standard input (default)"), 2 },
+      { "compressed", 'z', NULL, 0,
+	N_("Try to open compressed files or embedded (kernel) ELF images"),
+	2 },
+
+      { NULL, 0, NULL, OPTION_DOC, N_("Output flags"), 3 },
+      { "print", classify_flag_print, NULL, 0,
+        N_("Output names of files, separated by newline"), 3 },
+      { "print0", classify_flag_print0, NULL, 0,
+        N_("Output names of files, separated by ASCII NUL"), 3 },
+      { "no-print", classify_flag_no_print, NULL, 0,
+        N_("Do not output file names"), 3 },
+      { "matching", classify_flag_matching, NULL, 0,
+        N_("If printing file names, print matching files (default)"), 3 },
+      { "not-matching", classify_flag_not_matching, NULL, 0,
+        N_("If printing file names, print files that do not match"), 3 },
+
+      { NULL, 0, NULL, OPTION_DOC, N_("Additional flags"), 4 },
+      { "verbose", 'v', NULL, 0,
+        N_("Output additional information (can be specified multiple times)"), 4 },
+      { "quiet", 'q', NULL, 0,
+        N_("Suppress some error output (counterpart to --verbose)"), 4 },
+      { NULL, 0, NULL, 0, NULL, 0 }
+    };
+
+  const struct argp argp =
+    {
+      .options = options,
+      .parser = parse_opt,
+      .args_doc = N_("FILE..."),
+      .doc = N_("\
+Determine the type of an ELF file.\
+\n\n\
+All of the classification options must apply at the same time to a \
+particular file.  Classification options can be negated using a \
+\"--not-\" prefix.\
+\n\n\
+Since modern ELF does not clearly distinguish between programs and \
+dynamic shared objects, you should normally use either --executable or \
+--shared to identify the primary purpose of a file.  \
+Only one of the --shared and --executable checks can pass for a file.\
+\n\n\
+If you want to know whether an ELF object might a program or a \
+shared library (but could be both), then use --program or --library. \
+Some ELF files will classify as both a program and a library.\
+\n\n\
+If you just want to know whether an ELF file is loadable (as program \
+or library) use --loadable.  Note that files that only contain \
+(separate) debug information (--debug-only) are never --loadable (even \
+though they might contain program headers).  Linux kernel modules are \
+also not --loadable (in the normal sense).\
+\n\n\
+Without any of the --print options, the program exits with status 0 \
+if the requested checks pass for all input files, with 1 if a check \
+fails for any file, and 2 if there is an environmental issue (such \
+as a file read error or a memory allocation error).\
+\n\n\
+When printing file names, the program exits with status 0 even if \
+no file names are printed, and exits with status 2 if there is an \
+environmental issue.\
+\n\n\
+On usage error (e.g. a bad option was given), the program exits with \
+a status code larger than 2.\
+\n\n\
+The --quiet or -q option suppresses some error warning output, but \
+doesn't change the exit status.\
+")
+    };
+
+  /* Require that the file is an ELF file by default.  User can
+     disable with --not-elf.  */
+  requirements[classify_elf] = required;
+
+  int remaining;
+  if (argp_parse (&argp, argc, argv, 0, &remaining, NULL) != 0)
+    return 2;
+
+  elf_version (EV_CURRENT);
+
+  int status = 0;
+
+  for (int i = remaining; i < argc; ++i)
+    {
+      current_path = argv[i];
+      process_current_path (&status);
+    }
+
+  if (flag_stdin != no_stdin)
+    process_stdin (&status);
+
+  if (issue_found)
+    return 2;
+
+  return status;
+}
diff --git a/tests/ChangeLog b/tests/ChangeLog
index 63d6af96d..2900bd828 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,12 @@
+2019-07-26  Florian Weimer  <fweimer@redhat.com>
+	    Mark Wielaard  <mark@klomp.org>
+
+	* run-elfclassify.sh: New test.
+	* run-elfclassify-self.sh: Likewise.
+	* Makefile.sh (TESTS): Add run-elfclassify.sh and
+	run-elfclassify-self.sh.
+	(EXTRA_DIST): Likewise.
+
 2019-07-16  Mao Han  <han_mao@c-sky.com>
 
 	* hello_csky.ko.bz2: New testfile.
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 0ac353152..2ff7dfc46 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -164,7 +164,8 @@ TESTS = run-arextract.sh run-arsymtest.sh run-ar.sh newfile test-nlist \
 	run-typeiter-many.sh run-strip-test-many.sh \
 	run-strip-version.sh run-xlate-note.sh \
 	run-readelf-discr.sh \
-	run-dwelf_elf_e_machine_string.sh
+	run-dwelf_elf_e_machine_string.sh \
+	run-elfclassify.sh run-elfclassify-self.sh
 
 if !BIARCH
 export ELFUTILS_DISABLE_BIARCH = 1
@@ -435,8 +436,8 @@ EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh \
 	     run-xlate-note.sh \
 	     run-readelf-discr.sh \
 	     testfile-rng.debug.bz2 testfile-urng.debug.bz2 \
-	     run-dwelf_elf_e_machine_string.sh
-
+	     run-dwelf_elf_e_machine_string.sh \
+	     run-elfclassify.sh run-elfclassify-self.sh
 
 if USE_VALGRIND
 valgrind_cmd='valgrind -q --leak-check=full --error-exitcode=1'
diff --git a/tests/run-elfclassify-self.sh b/tests/run-elfclassify-self.sh
new file mode 100755
index 000000000..c48ab9c93
--- /dev/null
+++ b/tests/run-elfclassify-self.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+# Copyright (C) 2019 Red Hat, Inc.
+# This file is part of elfutils.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# elfutils is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. $srcdir/test-subr.sh
+
+testrun_on_self ${abs_top_builddir}/src/elfclassify --elf-file
+testrun_on_self ${abs_top_builddir}/src/elfclassify --not-core
+testrun_on_self ${abs_top_builddir}/src/elfclassify --unstripped
+testrun_on_self ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module
+
+testrun_on_self_lib ${abs_top_builddir}/src/elfclassify --shared
+testrun_on_self_lib ${abs_top_builddir}/src/elfclassify --loadable
+testrun_on_self_lib ${abs_top_builddir}/src/elfclassify --not-executable
+testrun_on_self_lib ${abs_top_builddir}/src/elfclassify --not-program
+
+testrun_on_self_exe ${abs_top_builddir}/src/elfclassify --executable
+testrun_on_self_exe ${abs_top_builddir}/src/elfclassify --program
+testrun_on_self_exe ${abs_top_builddir}/src/elfclassify --loadable
+testrun_on_self_exe ${abs_top_builddir}/src/elfclassify --not-shared
+
+testrun ${abs_top_builddir}/src/elfclassify --not-shared $self_test_files_obj
+testrun ${abs_top_builddir}/src/elfclassify --not-executable $self_test_files_obj
diff --git a/tests/run-elfclassify.sh b/tests/run-elfclassify.sh
new file mode 100755
index 000000000..5a849bbdd
--- /dev/null
+++ b/tests/run-elfclassify.sh
@@ -0,0 +1,327 @@
+#!/bin/sh
+# Copyright (C) 2019 Red Hat, Inc.
+# This file is part of elfutils.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# elfutils is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. $srcdir/test-subr.sh
+
+core_files=\
+"testfile_aarch64_core \
+ testfile-backtrace-demangle.core \
+ testfiledwarfinlines.core \
+ testfile_i686_core \
+ testfile-m68k-core \
+ testfile-riscv64-core \
+ backtrace.aarch64.core \
+ backtrace.i386.core \
+ backtrace.ppc.core \
+ backtrace.s390.core"
+
+testfiles $core_files
+
+echo "elfclassify --core"
+testrun ${abs_top_builddir}/src/elfclassify --core $core_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --core --print $core_files <<EOF
+$(echo $core_files | sed -e "s/ /\n/g")
+EOF
+
+echo "core files are not programs"
+testrun ${abs_top_builddir}/src/elfclassify --not-program $core_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-program --print $core_files <<EOF
+$(echo $core_files | sed -e "s/ /\n/g")
+EOF
+
+echo "core files are not shared"
+testrun ${abs_top_builddir}/src/elfclassify --not-shared $core_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-shared --print $core_files <<EOF
+$(echo $core_files | sed -e "s/ /\n/g")
+EOF
+
+echo "core files are not kernel-modules"
+testrun ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module $core_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module --print $core_files <<EOF
+$(echo $core_files | sed -e "s/ /\n/g")
+EOF
+
+echo "core files are not debug-only"
+testrun ${abs_top_builddir}/src/elfclassify --not-debug-only $core_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-debug-only --print $core_files <<EOF
+$(echo $core_files | sed -e "s/ /\n/g")
+EOF
+
+object_files=\
+"debug-ranges-no-lowpc.o \
+ testfile-annobingroup-i386.o \
+ testfile-bpf-dis1.o \
+ testfile-debug-rel-g.o \
+ testfile-gnu-property-note.o"
+
+testfiles $object_files
+
+echo "elfclassify --elf-file"
+testrun ${abs_top_builddir}/src/elfclassify --elf-file $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --elf-file --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+echo "object files are not archives"
+testrun ${abs_top_builddir}/src/elfclassify --not-elf-archive $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-elf-archive --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+echo "object files are not core files"
+testrun ${abs_top_builddir}/src/elfclassify --not-core $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-core --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+echo "object files are not program files"
+testrun ${abs_top_builddir}/src/elfclassify --not-program $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-program --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+echo "object files are not shared files"
+testrun ${abs_top_builddir}/src/elfclassify --not-shared $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-shared --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+echo "object files are not kernel modules"
+testrun ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+echo "object files are not debug-only files"
+testrun ${abs_top_builddir}/src/elfclassify --not-debug-only $object_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-debug-only --print $object_files <<EOF
+$(echo $object_files | sed -e "s/ /\n/g")
+EOF
+
+ar_files="testarchive64.a"
+
+testfiles $ar_files
+
+echo "elfclassify --elf-archive"
+testrun ${abs_top_builddir}/src/elfclassify --elf-archive $ar_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --elf-archive --print $ar_files <<EOF
+$(echo $ar_files | sed -e "s/ /\n/g")
+EOF
+
+echo "archives are not elf-files"
+testrun ${abs_top_builddir}/src/elfclassify --not-elf-file $ar_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-elf-file --print $ar_files <<EOF
+$(echo $ar_files | sed -e "s/ /\n/g")
+EOF
+
+echo "archives are not core files"
+testrun ${abs_top_builddir}/src/elfclassify --not-core $ar_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-core --print $ar_files <<EOF
+$(echo $ar_files | sed -e "s/ /\n/g")
+EOF
+
+echo "archives are not program files"
+testrun ${abs_top_builddir}/src/elfclassify --not-program $ar_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-program --print $ar_files <<EOF
+$(echo $ar_files | sed -e "s/ /\n/g")
+EOF
+
+echo "archives are not shared files"
+testrun ${abs_top_builddir}/src/elfclassify --not-shared $ar_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-shared --print $ar_files <<EOF
+$(echo $ar_files | sed -e "s/ /\n/g")
+EOF
+
+lib_files=\
+"testfile52-32.noshdrs.so \
+ libtestfile_multi_shared.so \
+ testfile52-32.prelink.so \
+ testfile52-32.so
+ testfile54-64.noshdrs.so \
+ testfile54-64.prelink.so \
+ testfile54-64.so \
+ testlib_dynseg.so"
+
+testfiles $lib_files
+
+echo "elfclassify --shared"
+testrun ${abs_top_builddir}/src/elfclassify --shared $lib_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --shared --print $lib_files <<EOF
+$(echo $lib_files | sed -e "s/ /\n/g")
+EOF
+
+echo "shared files are loadable"
+testrun ${abs_top_builddir}/src/elfclassify --loadable $lib_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --loadable --print $lib_files <<EOF
+$(echo $lib_files | sed -e "s/ /\n/g")
+EOF
+
+echo "shared files are not executables"
+testrun ${abs_top_builddir}/src/elfclassify --not-executable $lib_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-executable --print $lib_files <<EOF
+$(echo $lib_files | sed -e "s/ /\n/g")
+EOF
+
+echo "shared files are not debug-only"
+testrun ${abs_top_builddir}/src/elfclassify --not-debug-only $lib_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-debug-only --print $lib_files <<EOF
+$(echo $lib_files | sed -e "s/ /\n/g")
+EOF
+
+echo "shared files are not kernel modules"
+testrun ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module $lib_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module --print $lib_files <<EOF
+$(echo $lib_files | sed -e "s/ /\n/g")
+EOF
+
+exe_files=\
+"backtrace.aarch64.exec \
+ backtrace.i386.exec \
+ backtrace.ppc.exec \
+ backtrace.s390x.exec \
+ testfile70.exec \
+ test-offset-loop \
+ testfilebaztab \
+ testfilebaztabppc64"
+
+testfiles $exe_files
+
+echo "elfclassify --program"
+testrun ${abs_top_builddir}/src/elfclassify --program $exe_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --program --print $exe_files <<EOF
+$(echo $exe_files | sed -e "s/ /\n/g")
+EOF
+
+echo "programs are executables (in this case)"
+testrun ${abs_top_builddir}/src/elfclassify --executable $exe_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --executable --print $exe_files <<EOF
+$(echo $exe_files | sed -e "s/ /\n/g")
+EOF
+
+echo "programs are not shared libraries (in this case)"
+testrun ${abs_top_builddir}/src/elfclassify --not-shared $exe_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-shared --print $exe_files <<EOF
+$(echo $exe_files | sed -e "s/ /\n/g")
+EOF
+
+echo "programs are not kernel-modules"
+testrun ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module $exe_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-linux-kernel-module --print $exe_files <<EOF
+$(echo $exe_files | sed -e "s/ /\n/g")
+EOF
+
+echo "programs are not debug-only"
+testrun ${abs_top_builddir}/src/elfclassify --not-debug-only $exe_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-debug-only --print $exe_files <<EOF
+$(echo $exe_files | sed -e "s/ /\n/g")
+EOF
+
+kmod_files=\
+"hello_aarch64.ko \
+ hello_csky.ko \
+ hello_i386.ko \
+ hello_m68k.ko \
+ hello_ppc64.ko \
+ hello_riscv64.ko \
+ hello_s390.ko \
+ hello_x86_64.ko"
+
+testfiles $kmod_files
+
+echo "elfclassify --linux-kernel-module"
+testrun ${abs_top_builddir}/src/elfclassify --linux-kernel-module $kmod_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --linux-kernel-module --print $kmod_files <<EOF
+$(echo $kmod_files | sed -e "s/ /\n/g")
+EOF
+
+echo "kmods are unstripped"
+testrun ${abs_top_builddir}/src/elfclassify --unstripped $kmod_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --unstripped --print $kmod_files <<EOF
+$(echo $kmod_files | sed -e "s/ /\n/g")
+EOF
+
+echo "kmods are not debug-only"
+testrun ${abs_top_builddir}/src/elfclassify --not-debug-only $kmod_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-debug-only --print $kmod_files <<EOF
+$(echo $kmod_files | sed -e "s/ /\n/g")
+EOF
+
+echo "kmods are not loabable (in the normal sense)"
+testrun ${abs_top_builddir}/src/elfclassify --not-loadable $kmod_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-loadable --print $kmod_files <<EOF
+$(echo $kmod_files | sed -e "s/ /\n/g")
+EOF
+
+echo "gnu compressed kmods are unstripped"
+${abs_top_builddir}/src/elfcompress -t gnu --force $kmod_files
+testrun ${abs_top_builddir}/src/elfclassify --unstripped $kmod_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --unstripped --print $kmod_files <<EOF
+$(echo $kmod_files | sed -e "s/ /\n/g")
+EOF
+
+debug_files=\
+"testfile15.debug \
+ testfile35.debug \
+ testfile40.debug \
+ testfile48.debug \
+ testfile53-32.debug \
+ testfile53-64.debug \
+ testfilebazdbg.debug \
+ testfilebazdbgppc64.debug \
+ addrx_constx-4.dwo \
+ addrx_constx-5.dwo"
+
+testfiles $debug_files
+
+echo "elfclassify --debug-only"
+testrun ${abs_top_builddir}/src/elfclassify --debug-only $debug_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --debug-only --print $debug_files <<EOF
+$(echo $debug_files | sed -e "s/ /\n/g")
+EOF
+
+echo "debug-only files are unstripped"
+testrun ${abs_top_builddir}/src/elfclassify --unstripped $debug_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --unstripped --print $debug_files <<EOF
+$(echo $debug_files | sed -e "s/ /\n/g")
+EOF
+
+echo "debug-only files are not programs"
+testrun ${abs_top_builddir}/src/elfclassify --not-program $debug_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-program --print $debug_files <<EOF
+$(echo $debug_files | sed -e "s/ /\n/g")
+EOF
+
+echo "debug-only files are not shared"
+testrun ${abs_top_builddir}/src/elfclassify --not-shared $debug_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --not-shared --print $debug_files <<EOF
+$(echo $debug_files | sed -e "s/ /\n/g")
+EOF
+
+echo "compress the debug sections and try again"
+${abs_top_builddir}/src/elfcompress -t gnu --force $debug_files
+
+echo "again unstripped"
+testrun ${abs_top_builddir}/src/elfclassify --unstripped $debug_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --unstripped --print $debug_files <<EOF
+$(echo $debug_files | sed -e "s/ /\n/g")
+EOF
+
+echo "again debug-only"
+testrun ${abs_top_builddir}/src/elfclassify --debug-only $debug_files
+testrun_compare ${abs_top_builddir}/src/elfclassify --debug-only --print $debug_files <<EOF
+$(echo $debug_files | sed -e "s/ /\n/g")
+EOF
-- 
2.20.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH] config/elfutils.spec.in: package eu-elfclassify
  2019-08-13 11:42                     ` Mark Wielaard
@ 2019-08-14 20:46                       ` Dmitry V. Levin
  2019-08-15  7:33                         ` Mark Wielaard
  0 siblings, 1 reply; 36+ messages in thread
From: Dmitry V. Levin @ 2019-08-14 20:46 UTC (permalink / raw)
  To: elfutils-devel

Package the new tool introduced by commit elfutils-0.177~1.

Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
---

I have no idea whether this file is actually used as a specfile.
If not, let's add eu-elfclassify there for consistency.

P.S. Thanks for the nice tool!

 config/ChangeLog        | 4 ++++
 config/elfutils.spec.in | 1 +
 2 files changed, 5 insertions(+)

diff --git a/config/ChangeLog b/config/ChangeLog
index 65658118..12a141b2 100644
--- a/config/ChangeLog
+++ b/config/ChangeLog
@@ -1,3 +1,7 @@
+2019-08-14  Dmitry V. Levin  <ldv@altlinux.org>
+
+	* elfutils.spec.in (%files): Add %{_bindir}/eu-elfclassify.
+
 2019-08-13  Mark Wielaard  <mark@klomp.org>
 
 	* Makefile.am (elfutils.spec.in): Use git --get user.name and
diff --git a/config/elfutils.spec.in b/config/elfutils.spec.in
index 19b4229e..c5896a41 100644
--- a/config/elfutils.spec.in
+++ b/config/elfutils.spec.in
@@ -169,6 +169,7 @@ rm -rf ${RPM_BUILD_ROOT}
 %{_bindir}/eu-strip
 %{_bindir}/eu-findtextrel
 %{_bindir}/eu-addr2line
+%{_bindir}/eu-elfclassify
 %{_bindir}/eu-elfcmp
 %{_bindir}/eu-ranlib
 %{_bindir}/eu-strings
-- 
ldv

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] config/elfutils.spec.in: package eu-elfclassify
  2019-08-14 20:46                       ` [PATCH] config/elfutils.spec.in: package eu-elfclassify Dmitry V. Levin
@ 2019-08-15  7:33                         ` Mark Wielaard
  0 siblings, 0 replies; 36+ messages in thread
From: Mark Wielaard @ 2019-08-15  7:33 UTC (permalink / raw)
  To: Dmitry V. Levin; +Cc: elfutils-devel

Hi Dmitry,

On Wed, Aug 14, 2019 at 11:45:56PM +0300, Dmitry V. Levin wrote:
> Package the new tool introduced by commit elfutils-0.177~1.
> 
> Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
> ---
> 
> I have no idea whether this file is actually used as a specfile.
> If not, let's add eu-elfclassify there for consistency.

Yes, we certainly should have it in the (example) spec file.
Pushed to master.

Thanks,

Mark

^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2019-08-15  7:33 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-12 15:38 [PATCH] elfclassify tool Florian Weimer
2019-04-15 15:39 ` Mark Wielaard
2019-04-16 11:38   ` Florian Weimer
2019-04-18 11:17     ` Florian Weimer
2019-07-19 12:47       ` Mark Wielaard
2019-07-19 13:43         ` Dmitry V. Levin
2019-07-19 14:21           ` Mark
2019-07-19 18:35             ` Dmitry V. Levin
2019-07-19 21:00               ` Florian Weimer
2019-07-19 21:23                 ` Dmitry V. Levin
2019-07-19 21:36                   ` Mark Wielaard
2019-07-19 22:57                     ` Dmitry V. Levin
2019-07-20 21:51                       ` Mark Wielaard
2019-07-25 22:39                         ` [PATCH] elfclassify: Add --library classification Mark Wielaard
2019-07-26 22:53                           ` Dmitry V. Levin
2019-07-26 23:04                         ` [PATCH] elfclassify tool Dmitry V. Levin
2019-07-27 11:54                           ` Mark Wielaard
2019-07-20 21:40         ` Mark Wielaard
2019-07-22 15:55         ` Florian Weimer
2019-07-26 22:11           ` Mark Wielaard
2019-07-29  8:44             ` Florian Weimer
2019-07-29 14:24               ` Mark Wielaard
2019-08-11 23:38                 ` Mark Wielaard
2019-08-12  8:14                   ` Florian Weimer
2019-08-12 15:18                     ` Mark Wielaard
2019-07-29  9:16             ` Florian Weimer
2019-07-29 14:34               ` Mark Wielaard
2019-07-29 14:38                 ` Florian Weimer
2019-08-13  9:44                   ` Mark Wielaard
2019-08-13 11:42                     ` Mark Wielaard
2019-08-14 20:46                       ` [PATCH] config/elfutils.spec.in: package eu-elfclassify Dmitry V. Levin
2019-08-15  7:33                         ` Mark Wielaard
2019-07-29  9:22             ` [PATCH] elfclassify tool Florian Weimer
2019-07-29 14:40               ` Mark Wielaard
2019-07-29 14:42                 ` Florian Weimer
2019-07-19 13:24     ` Mark Wielaard

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).