public inbox for gcc@gcc.gnu.org
 help / color / mirror / Atom feed
* GSoC (Make cp-demangle non-recursive)
@ 2022-03-27 12:21 Juan Scerri
  2022-04-07 15:24 ` Martin Jambor
  0 siblings, 1 reply; 6+ messages in thread
From: Juan Scerri @ 2022-03-27 12:21 UTC (permalink / raw)
  To: gcc

Hello,

Lately, I have been working on a way to implement recursion on the heap to
deal with
the limits associated with recursion on the stack. If a good implementation
is reached
that should allow us to convert the code.

I have implemented the data structure and I have written a simple factorial
function
using the data structure. Currently, no error handling is being done.

Any guidance on what is expected with regards to error handling and how
errors should be
dealt with (push up the call stack or terminate the program?) would be
greatly appreciated.

Moreover, any feedback on the current way the data structure
is implemented and the code used to implement the simple factorial function
would be appreciated.

The current issue I have with this method is that it is really complex to
write a
function using the data structure to implement recursion on the heap. Plus
it requires
dealing and understanding the control flow of the function very well.

Any ideas on how I can make this, as much as possible, a drop in
replacement would be helpful.

The code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define GET_FRAME_AS(work, frame_type) ((struct frame_type **)&work.frame)

enum d_work_type {
  CALL,
  RET,
};

struct d_work {
  /* Type of work (RET or CALL). */
  enum d_work_type type;
  /* Type of RET work. */
  unsigned int ret_type;
  /* Pointer to function frame. */
  void *frame;
};

/* Stack of operations to do after traversing a full branch. */
struct d_work_stack {
  /* Base pointer. */
  struct d_work *bp;
  /* Current length of data on the stack. */
  size_t len;
  /* Allocated size of buffer. */
  size_t alc;
  /* Set to 1 if we had a memory allocation failure. */
  int allocation_failure;
};

static inline struct d_work d_new_work(enum d_work_type type,
                                       unsigned int ret_work, void *frame) {
  return (struct d_work){type, ret_work, frame};
}

static inline void d_work_stack_resize(struct d_work_stack *dws, size_t
need) {
  size_t newalc;
  struct d_work *newbp;

  if (dws->allocation_failure)
    return;

  /* Start allocation at two bytes to avoid any possibility of confusion
     with the special value of 1 used as a return to indicate
     allocation failures.  */
  newalc = dws->alc > 0 ? dws->alc : 2;

  while (newalc < need)
    newalc <<= 1;

  newbp = (struct d_work *)realloc(dws->bp, newalc * sizeof(struct d_work));

  if (newbp == NULL) {
    free(dws->bp);
    dws->bp = NULL;
    dws->len = 0;
    dws->alc = 0;
    dws->allocation_failure = 1;

    return;
  }

  dws->bp = newbp;
  dws->alc = newalc;
}

static void d_work_stack_init(struct d_work_stack *dws, size_t estimate) {
  dws->bp = NULL;
  dws->len = 0;
  dws->alc = 0;
  dws->allocation_failure = 0;

  if (estimate > 0)
    d_work_stack_resize(dws, estimate);
}

static void d_work_stack_push(struct d_work_stack *dws, struct d_work dw) {
  size_t need = dws->len + 1;

  if (need > dws->alc)
    d_work_stack_resize(dws, need);

  if (dws->allocation_failure)
    return;

  dws->bp[(dws->len)++] = dw;
}

static int d_work_stack_peek(struct d_work_stack *dws, struct d_work *pdw) {
  if (dws->len == 0)
    return -1;

  *pdw = dws->bp[dws->len - 1];

  return dws->len; /* Return the number of elements. */
}

static int d_work_stack_pop(struct d_work_stack *dws, struct d_work *pdw) {
  if (dws->len == 0)
    return -1;

  *pdw = dws->bp[--(dws->len)];

  return dws->len; /* Return how many elements are left. */
}

static void d_work_stack_deinit(struct d_work_stack *dws) {
  if (dws->bp != NULL) {
    free(dws->bp);
    dws->bp = NULL;
  }

  dws->len = 0;
  dws->alc = 0;
  dws->allocation_failure = 0;
}

/*
 * This section of the code will contain examples of recursive
 * code implemented pseudo-recursively using the heap.
 */

/* Recursive implementation of factorial. */
long r_factorial(long n) {
  long ret; // Return of the function.
  long res; // Result of a recursive call.

  if (n == 0 || n == 1) {
    ret = 1;
  } else {
    res = r_factorial(n - 1);
    ret = n * res;
  }

  return ret;
}

struct p_factorial_frame {
  long n;
  long ret;
  long res;
};

enum p_factorial_ret_types { MULT_RET };

/* Pseudo-recursive implementation of factorial. */
long p_factorial(long n) {
  /* Return variable. */
  long ret;

  /* Setup the work stack. */
  struct d_work_stack work_stack;
  d_work_stack_init(&work_stack, 16);

  /* Initial frame. */
  struct p_factorial_frame *initial_frame =
      malloc(sizeof(struct p_factorial_frame));
  initial_frame->n = n;
  d_work_stack_push(&work_stack, d_new_work(CALL, 0, initial_frame));

  /*
   * These will be used to access work without a lot of
   * difficulty.
   */
  struct d_work current_work;
  struct d_work peek_work;

  /* Pointer to frame pointer for ease of writing. */
  struct p_factorial_frame **current_frame;
  struct p_factorial_frame **peek_frame;

  /*
   * When we get the final element we do not modify current_work
   * hence all we need to do is take the the ret value from the
   * block and free the state variable in current_work.
   */
  while (d_work_stack_pop(&work_stack, &current_work) != -1) {
    current_frame = GET_FRAME_AS(current_work, p_factorial_frame);

    if (current_work.type == CALL) {
      if ((*current_frame)->n == 0 || (*current_frame)->n == 1) {
        (*current_frame)->ret = 1;

        // Modify previous stack frame if it is present.
        if (d_work_stack_peek(&work_stack, &peek_work) != -1) {
          peek_frame = GET_FRAME_AS(peek_work, p_factorial_frame);
          (*peek_frame)->res = (*current_frame)->ret;
        }
      } else {
        /*
         * Make a new return frame, copy the current frame and
         * push onto the stack.
         */
        struct p_factorial_frame *ret_frame =
            malloc(sizeof(struct p_factorial_frame));
        memcpy(ret_frame, *current_frame, sizeof(struct p_factorial_frame));
        d_work_stack_push(&work_stack, d_new_work(RET, MULT_RET,
ret_frame));

        /*
         * Make a new call frame, copy the change function
         * parameter and push onto the stack.
         */
        struct p_factorial_frame *call_frame =
            malloc(sizeof(struct p_factorial_frame));
        call_frame->n = (*current_frame)->n - 1;
        d_work_stack_push(&work_stack, d_new_work(CALL, 0, call_frame));
      }
    } else {
      switch (current_work.ret_type) {
      case MULT_RET:
        (*current_frame)->ret = (*current_frame)->n * (*current_frame)->res;
        break;
      }

      // Modify the previous stack frame if it is present.
      if (d_work_stack_peek(&work_stack, &peek_work) != -1) {
        peek_frame = GET_FRAME_AS(peek_work, p_factorial_frame);
        (*peek_frame)->res = (*current_frame)->ret;
      }
    }

    /*
     * Do not free the last frame because that will be freed. At
     * the end.
     */
    if (work_stack.len > 0) {
      free(*(current_frame));
    }
  }

  ret = (*GET_FRAME_AS(current_work, p_factorial_frame))->ret;

  /*
   * Freeing of the last stack frame and freeing of the whole
   * stack
   */
  free(*GET_FRAME_AS(current_work, p_factorial_frame));
  d_work_stack_deinit(&work_stack);

  return ret;
}

int main(void) {
  long n = p_factorial(0);
  printf("%ld\n", n);

  long n2 = p_factorial(1);
  printf("%ld\n", n2);

  long n3 = p_factorial(5);
  printf("%ld\n", n3);

  return 0;
}

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: GSoC (Make cp-demangle non-recursive)
  2022-03-27 12:21 GSoC (Make cp-demangle non-recursive) Juan Scerri
@ 2022-04-07 15:24 ` Martin Jambor
  0 siblings, 0 replies; 6+ messages in thread
From: Martin Jambor @ 2022-04-07 15:24 UTC (permalink / raw)
  To: Juan Scerri; +Cc: gcc

Hello,

sorry for a late reply.

On Sun, Mar 27 2022, Juan Scerri wrote:
> Hello,
>
> Lately, I have been working on a way to implement recursion on the
> heap to deal with the limits associated with recursion on the
> stack. If a good implementation is reached that should allow us to
> convert the code.
>
> I have implemented the data structure and I have written a simple
> factorial function using the data structure. Currently, no error
> handling is being done.

Good.  Generally something quite similar to what you implemented for the
factorial computation will be needed.  I think that having a function
pointer instead of an enum for work type would make implementation
easier and cleaner (though theoretically less "safe," I guess, so maybe
there is a reason to have enums).  Similarly, I think that rather than
void pointers for "frames", we can use an inheritance-in-C approach,
i.e. to have different kinds of frames all have the first field of the
same type that will specify what kind of "frame" the rest are - very
much like all tree_<something> structures in gcc/tree-core.h have
tree_base as the first field, which specifies what the rest of the big
structure looks like.  But those are implementation details that are
easy to change.

>
> Any guidance on what is expected with regards to error handling and
> how errors should be dealt with (push up the call stack or terminate
> the program?) would be greatly appreciated.

In case of error you need to deallocate all allocated memory and
d_demangle will then report error as specified in the function
description and return.  You do not want to terminate the program for
non-catastrophic reasons because the demangler can be called as a library.

Deallocating memory will probably be easier if you do not call malloc
for every "frame" but rather allocate them from some bigger continuous
buffers.  But an initial implementation can definitely just go over the
stack and free everything manually, these are things that can be tweaked
after the core work is done.

>
> Moreover, any feedback on the current way the data structure
> is implemented and the code used to implement the simple factorial function
> would be appreciated.
>
> The current issue I have with this method is that it is really complex to
> write a
> function using the data structure to implement recursion on the heap. Plus
> it requires
> dealing and understanding the control flow of the function very well.

Right, code re-structuring might sometimes get ugly.  I do not think it
can be completely avoided but with consistent style, intelligent
division of code to different functions and good naming and comments, I
believe the issue can be mitigated to a reasonable level.

I think you understand the core issue of the project, you may now want
to look at the demangler and think about how to incrementally.  In the
past, we though that the first function to look at could be d_parmlist
to break the recursive cycle of:

d_parmlist -> cplus_demangle_type -> d_function_type -> d_parmlist.

d_parmlist would use the new scheme, but would recursively call into the
not-yet-converted other functions.  But that is just a suggestion.

Good luck and sorry for the late response again,

Martin

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: GSoC (Make cp-demangle non-recursive)
       [not found]   ` <CACJSivvYgNG+C2Y5ph=4yv5y0iaV+Ak7=vh3FGD7W6eQADxk9w@mail.gmail.com>
@ 2022-03-09 16:15     ` Martin Jambor
  0 siblings, 0 replies; 6+ messages in thread
From: Martin Jambor @ 2022-03-09 16:15 UTC (permalink / raw)
  To: Juan Scerri; +Cc: GCC Mailing List

Hello Juan,

please remember to CC the list when discussing technical problems, ideas
and approaches.  Others may help you also, perhaps even better than I do.

On Thu, Mar 03 2022, Juan Scerri wrote:
> Hi Martin,
>
> Firstly, thank you for your response.
>
> I think I have a good knowledge of C. It is part of my course at
> university.
>
> Moreover, I have figured out that the code which I need to change is in
> libiberty and more specifically cp-demangle.c and cp-demangle.h. Thank you
> for
> pointing out that I can view binutils. I will give the repository a look
> since
> compiling gcc takes a very long time, even with 16 threads.
>
> I have also read some of the code and I have an idea as to what I can do.
>
> In the code there are certain functions which use recursion which are very
> trivial to convert to an iterative function for example:
>
> static int
> has_return_type (struct demangle_component *dc)
> {
>   if (dc == NULL)
>     return 0;
>   switch (dc->type)
>     {
>   default:
>     return 0;
>   case DEMANGLE_COMPONENT_LOCAL_NAME:
>     return has_return_type (d_right (dc));
>   case DEMANGLE_COMPONENT_TEMPLATE:
>     return ! is_ctor_dtor_or_conversion (d_left (dc));
>   FNQUAL_COMPONENT_CASE:
>     return has_return_type (d_left (dc));
>     }
> }
>
> This can be converted into an iterative function through the introduction
> of a
> while-loop or a for-loop (whichever one is preferred in the codebase) as
> shown
> below.
>
> static int
> has_return_type (struct demangle_component *dc)
> {
>   while (1) {
>     if (dc == NULL)
>       return 0;
>     switch (dc->type)
>       {
>       default:
>         return 0;
>       case DEMANGLE_COMPONENT_LOCAL_NAME:
>         dc = d_right (dc);
>         continue;
>       case DEMANGLE_COMPONENT_TEMPLATE:
>         return ! is_ctor_dtor_or_conversion (d_left (dc));
>       FNQUAL_COMPONENT_CASE:
>         dc = d_left (dc);
>         continue;
>       }
>   }
> }

Right, those are the easy cases, the project of course is about the
non-trivial ones.

>
> For the more complicated functions like `d_print_comp` and
> `d_print_comp_inner`, there is very large number of cases. Moreover,
> different
> cases can have multiple recursive calls to `d_print_comp`. My idea to tackle
> these is to keep track of the number of calls which would be made if the
> algorithm
> was recursive. Basically, the count starts at 1 and for:
>
>> a base case we subtract 1
>> a single call we add 0
>> two calls we add 1
>> three calls we add 2
>> and so on.
>
> However, this is not enough. We need to deal with the instructions in
> between
> calls and afterwards. The best structure to keep track of the work which
> needs
> to be done is a stack (as hinted in the website). This is the implementation
> which I came up with (it is heavily inspired by `d_growable_string`):

You are correct about the stack, you definitely need a stack to drive
the whole process, I am not sure I understand the call counting above or
what purpose it would serve.

>
> #define EMPTY 0
>
> /* Stack of operations to do after traversing a full branch. */
> struct d_work_stack {
>   /* Base pointer. */
>   int *bp;
>   /* Current length of data on the stack. */
>   size_t len;
>   /* Allocated size of buffer. */
>   size_t alc;
>   /* Set to 1 if we had a memory allocation failure. */
>   int allocation_failure;
> };
>
> static void d_work_stack_init(struct d_work_stack *dws, size_t estimate) {
>   dws->bp = NULL;
>   dws->len = 0;
>   dws->alc = 0;
>   dws->allocation_failure = 0;
>
>   if (estimate > 0)
>     d_work_stack_resize(dws, estimate);
> }
>
> static inline void d_work_stack_resize(struct d_work_stack *dws, size_t
> need) {
>   size_t newalc;
>   int *newbp;
>
>   if (dws->allocation_failure)
>     return;
>
>   /* Start allocation at two bytes to avoid any possibility of confusion
>      with the special value of 1 used as a retrun to indicate
>      allocation failures.  */
>   newalc = dws->alc > 0 ? dws->alc : 2;
>   while (newalc < need)
>     newalc <<= 1;
>
>   newbp = (int *)realloc(dws->bp, newalc * sizeof(int));
>   if (newbp == NULL) {
>     free(dws->bp);
>     dws->bp = NULL;
>     dws->len = 0;
>     dws->alc = 0;
>     dws->allocation_failure = 1;
>     return;
>   }
>   dws->bp = newbp;
>   dws->alc = newalc;
> }
>
> static void d_work_stack_push(struct d_work_stack *dws, int dw) {
>   size_t need = dws->len + 1;
>   if (need > dws->alc)
>     d_work_stack_resize(dws, need);
>
>   if (dws->allocation_failure)
>     return;
>
>   dws->bp[(dws->len)++] = dw;
> }
>
> static int d_work_stack_pop(struct d_work_stack *dws) {
>   if (dws->len == 0)
>     return EMPTY;
>
>   return dws->bp[--(dws->len)];
> }
>
> static void d_work_stack_deinit(struct d_work_stack *dws) {
>   if (dws->bp != NULL) {
>     free(dws->bp);
>     dws->bp = NULL;
>   }
>
>   dws->len = 0;
>   dws->alc = 0;
>   dws->allocation_failure = 0;
> }

Note that the stack will have to hold intermediate data for many kinds
of mutually recursive functions in the demangler, so (either directly or
conceptually) it will have to hold items - probably structures - of
different types.  I do not think integers will cut it.

In the past when we were discussing the project, it was helpful to look
at function d_parmlist.  Making the indirect recursion explicit means
replacing the recursive calls, such as the call to cplus_demangle_type,
with a push on the stack of some structure encoding that a type has to
be demangled and immediately returning.  But that means that the rest of
the work has to be done later (when this and more stuff pushed on the
stack gets popped and the driving loop gets back to the same point in
the stack) and there is a lot of state - stored in local variables now -
which has to be carried over.  That state needs to be in the stack
(describing the current task) too, hence the need for a richer type.

>
> The only thing I need to iron out is the best way to use this structure. My
> crude idea at the moment is to give code which needs to be executed, after
> or in
> between recursive calls, an index. Then using a switch the algorithm can
> decide
> what to do depending on the index (only if the count is still greater than 0
> and the end of a branch was reached).
>
> Note: that you should think of a `branch` as the branch of the tree formed
> if
> you visualise the recursive calls until completion.
>
> However, this might not be the best approach. I might consider using the
> stack
> to hold function pointers instead.

I agree that function pointers are likely to be useful.  But note that often
you'll need to encode both something to identify the kind of stuff that
is being demangled (type, parmlist, component...) and the state how far
you already got (from what point you need to continue), so you may want
to use both.

Hope this is useful, good luck.

Martin

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: GSoC (Make cp-demangle non-recursive)
  2022-02-26 10:34 Juan Scerri
  2022-02-26 16:31 ` Juan Scerri
@ 2022-03-02 18:37 ` Martin Jambor
       [not found]   ` <CACJSivvYgNG+C2Y5ph=4yv5y0iaV+Ak7=vh3FGD7W6eQADxk9w@mail.gmail.com>
  1 sibling, 1 reply; 6+ messages in thread
From: Martin Jambor @ 2022-03-02 18:37 UTC (permalink / raw)
  To: Juan Scerri; +Cc: gcc

Hello Juan,

we are delighted you found contributing to GCC interesting.

On Sat, Feb 26 2022, Juan Scerri wrote:
> To whom this may concern,
>
> Firstly, I will introduce myself. I am Juan Scerri and I am a student at
> the University of Malta studying Computer Science and Mathematics for my
> undergraduate degree.
> I am currently in my first year of studies, which is technically my first
> formal introduction to GNU/Linux. However, I have been using GNU/Linux for
> over a year. I am
> mostly interested in programming which involves compilers, operating
> systems and things of a similar sort.
>
> I have looked at the selected project ideas by the GCC team and one of the
> problems which has caught my attention is making cp-demangle non-recursive.
> I am interested in this particular problem because as stated in the "Before
> you apply" section it is one of the selected problems which do not require
> extensive
> knowledge of the compiler theory (which I will be doing in my 3rd year of
> studies). However, I do understand the aim of the project.

Great.  The other skill necessary for the project is reading and writing
C code (not C++).

The bulk of the demangler is in libiberty which is shared between gcc
and binutils projects.  It is actually easier to hack on it in the
binutils repo, because there you get a utility to try it (c++filt) and
there are tests in the testsuite to check that code (still) works.

So I suggest you check out the binutils repo from git clone
git://sourceware.org/git/binutils-gdb.git and build it as described in
README (you do not need to run make install - you can run the configure
and make in a different working directory if you wish to keep things
clean).

The source code of interest to you is almost entirely in
libiberty/cp-demangle.c and libiberty/cp-demangle.h.

You will need to wade through some preprocessor cruft, it is there for
historical reasons and because the demangler can be used in utilities as
well as in libraries and some interface functions have different names
in those situations.  Some of that might need to be changed/adjusted, do
not be afraid of changing stuff.

Those are the files you need to study, identify the various recursion
cycles and think how to remove them and especially what would the data
structures to do so look like.  Ideally one by one, so that intermediate
results still can be checked.

I will have to refresh my own knowledge of the file so I will not go
into any more detail now, but if you have any questions about the source
code and would like to discuss approaches, priorities, anything, feel
free to ask again here on the mailing list.

When you build binutils, the simple utility to demangle stuff is called
binutils/cxxfilt (it is installed under the name of c++filt which is
what you'll find on Linux machines).  There is a testsuite, you need
package dejagnu for it, and you can run it by issuing

  make -C libiberty check

in the binutils build directory.

As I wrote above, if you have any further questions, feel free to ask
here on the list.

Good luck,

Martin



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: GSoC (Make cp-demangle non-recursive)
  2022-02-26 10:34 Juan Scerri
@ 2022-02-26 16:31 ` Juan Scerri
  2022-03-02 18:37 ` Martin Jambor
  1 sibling, 0 replies; 6+ messages in thread
From: Juan Scerri @ 2022-02-26 16:31 UTC (permalink / raw)
  To: gcc

I have just realised that I have a typo.

I meant to say 'do not hesitate'.

Regards,
Juan Scerri

On Sat, 26 Feb 2022 at 11:34, Juan Scerri <juan.scerri.21@um.edu.mt> wrote:

> To whom this may concern,
>
> Firstly, I will introduce myself. I am Juan Scerri and I am a student at
> the University of Malta studying Computer Science and Mathematics for my
> undergraduate degree.
> I am currently in my first year of studies, which is technically my first
> formal introduction to GNU/Linux. However, I have been using GNU/Linux for
> over a year. I am
> mostly interested in programming which involves compilers, operating
> systems and things of a similar sort.
>
> I have looked at the selected project ideas by the GCC team and one of the
> problems which has caught my attention is making cp-demangle non-recursive.
> I am interested in this particular problem because as stated in the
> "Before you apply" section it is one of the selected problems which do not
> require extensive
> knowledge of the compiler theory (which I will be doing in my 3rd year of
> studies). However, I do understand the aim of the project.
>
> Any guidance would be much appreciated.
>
> If I was no clear in any way do hesitate to contact me.
>
> Regards,
> Juan Scerri
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* GSoC (Make cp-demangle non-recursive)
@ 2022-02-26 10:34 Juan Scerri
  2022-02-26 16:31 ` Juan Scerri
  2022-03-02 18:37 ` Martin Jambor
  0 siblings, 2 replies; 6+ messages in thread
From: Juan Scerri @ 2022-02-26 10:34 UTC (permalink / raw)
  To: gcc

To whom this may concern,

Firstly, I will introduce myself. I am Juan Scerri and I am a student at
the University of Malta studying Computer Science and Mathematics for my
undergraduate degree.
I am currently in my first year of studies, which is technically my first
formal introduction to GNU/Linux. However, I have been using GNU/Linux for
over a year. I am
mostly interested in programming which involves compilers, operating
systems and things of a similar sort.

I have looked at the selected project ideas by the GCC team and one of the
problems which has caught my attention is making cp-demangle non-recursive.
I am interested in this particular problem because as stated in the "Before
you apply" section it is one of the selected problems which do not require
extensive
knowledge of the compiler theory (which I will be doing in my 3rd year of
studies). However, I do understand the aim of the project.

Any guidance would be much appreciated.

If I was no clear in any way do hesitate to contact me.

Regards,
Juan Scerri

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-04-07 15:24 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-27 12:21 GSoC (Make cp-demangle non-recursive) Juan Scerri
2022-04-07 15:24 ` Martin Jambor
  -- strict thread matches above, loose matches on Subject: below --
2022-02-26 10:34 Juan Scerri
2022-02-26 16:31 ` Juan Scerri
2022-03-02 18:37 ` Martin Jambor
     [not found]   ` <CACJSivvYgNG+C2Y5ph=4yv5y0iaV+Ak7=vh3FGD7W6eQADxk9w@mail.gmail.com>
2022-03-09 16:15     ` Martin Jambor

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).