From 3d604c7ca75e6293be5a84546b7f34bee48d3d92 Mon Sep 17 00:00:00 2001 From: Bernhard Reutner-Fischer Date: Tue, 1 Dec 2015 13:55:01 +0100 Subject: [PATCH] RFC: Use Levenshtein spelling suggestions in Fortran FE (v2) gcc/fortran/ChangeLog 2015-11-29 Bernhard Reutner-Fischer David Malcolm * gfortran.h (gfc_lookup_function_fuzzy): New declaration. * interface.c (check_interface0): Call gfc_lookup_function_fuzzy and use it to potentially suggest a hint for misspelled names. * resolve.c: Include spellcheck.h. (lookup_function_fuzzy_find_candidates): New static function. (lookup_uop_fuzzy_find_candidates): Likewise. (lookup_uop_fuzzy): Likewise. (resolve_operator) : Call lookup_uop_fuzzy. (gfc_lookup_function_fuzzy): New definition. (resolve_unknown_f): Call gfc_lookup_function_fuzzy. * symbol.c: Include spellcheck.h. (lookup_symbol_fuzzy_find_candidates): New static function. (lookup_symbol_fuzzy): Likewise. (gfc_set_default_type): Call lookup_symbol_fuzzy. (lookup_component_fuzzy_find_candidates): New static function. (lookup_component_fuzzy): Likewise. (gfc_find_component): Call lookup_component_fuzzy. gcc/ChangeLog: David Malcolm * spellcheck.c (find_closest_string): New function. * spellcheck.h (find_closest_string): New decl. gcc/testsuite/ChangeLog 2015-11-29 Bernhard Reutner-Fischer * gfortran.dg/spellcheck-operator.f90: New testcase. * gfortran.dg/spellcheck-procedure.f90: New testcase. * gfortran.dg/spellcheck-structure.f90: New testcase. --- gcc/fortran/gfortran.h | 1 + gcc/fortran/interface.c | 16 +++- gcc/fortran/resolve.c | 89 +++++++++++++++++++++- gcc/fortran/symbol.c | 83 +++++++++++++++++++- gcc/spellcheck.c | 43 +++++++++++ gcc/spellcheck.h | 4 + gcc/testsuite/gfortran.dg/spellcheck-operator.f90 | 30 ++++++++ gcc/testsuite/gfortran.dg/spellcheck-procedure.f90 | 41 ++++++++++ gcc/testsuite/gfortran.dg/spellcheck-structure.f90 | 35 +++++++++ 9 files changed, 331 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/spellcheck-operator.f90 create mode 100644 gcc/testsuite/gfortran.dg/spellcheck-procedure.f90 create mode 100644 gcc/testsuite/gfortran.dg/spellcheck-structure.f90 diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 9f61e45..7972c3c 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -3085,6 +3085,7 @@ bool gfc_type_is_extensible (gfc_symbol *); bool gfc_resolve_intrinsic (gfc_symbol *, locus *); bool gfc_explicit_interface_required (gfc_symbol *, char *, int); extern int gfc_do_concurrent_flag; +const char* gfc_lookup_function_fuzzy (const char *, gfc_symtree *); /* array.c */ diff --git a/gcc/fortran/interface.c b/gcc/fortran/interface.c index f74239d..3066d68 100644 --- a/gcc/fortran/interface.c +++ b/gcc/fortran/interface.c @@ -1590,10 +1590,18 @@ check_interface0 (gfc_interface *p, const char *interface_name) if (p->sym->attr.external) gfc_error ("Procedure %qs in %s at %L has no explicit interface", p->sym->name, interface_name, &p->sym->declared_at); - else - gfc_error ("Procedure %qs in %s at %L is neither function nor " - "subroutine", p->sym->name, interface_name, - &p->sym->declared_at); + else { + const char *guessed + = gfc_lookup_function_fuzzy (p->sym->name, p->sym->ns->sym_root); + if (guessed) + gfc_error ("Procedure %qs in %s at %L is neither function nor " + "subroutine; did you mean %qs?", p->sym->name, + interface_name, &p->sym->declared_at, guessed); + else + gfc_error ("Procedure %qs in %s at %L is neither function nor " + "subroutine", p->sym->name, interface_name, + &p->sym->declared_at); + } return 1; } diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c index 10add62..547930f 100644 --- a/gcc/fortran/resolve.c +++ b/gcc/fortran/resolve.c @@ -29,6 +29,7 @@ along with GCC; see the file COPYING3. If not see #include "data.h" #include "target-memory.h" /* for gfc_simplify_transfer */ #include "constructor.h" +#include "spellcheck.h" /* Types used in equivalence statements. */ @@ -2682,6 +2683,38 @@ resolve_specific_f (gfc_expr *expr) return true; } +/* Recursively append candidate SYM to CANDIDATES. */ + +static void +lookup_function_fuzzy_find_candidates (gfc_symtree *sym, + vec *candidates) +{ + gfc_symtree *p; + for (p = sym->right; p; p = p->right) + { + lookup_function_fuzzy_find_candidates (p, candidates); + if (p->n.sym->ts.type != BT_UNKNOWN) + candidates->safe_push (p->name); + } + for (p = sym->left; p; p = p->left) + { + lookup_function_fuzzy_find_candidates (p, candidates); + if (p->n.sym->ts.type != BT_UNKNOWN) + candidates->safe_push (p->name); + } +} + + +/* Lookup function FN fuzzily, taking names in FUN into account. */ + +const char* +gfc_lookup_function_fuzzy (const char *fn, gfc_symtree *fun) +{ + auto_vec candidates; + lookup_function_fuzzy_find_candidates (fun, &candidates); + return find_closest_string (fn, &candidates); +} + /* Resolve a procedure call not known to be generic nor specific. */ @@ -2732,8 +2765,15 @@ set_type: if (ts->type == BT_UNKNOWN) { - gfc_error ("Function %qs at %L has no IMPLICIT type", - sym->name, &expr->where); + const char *guessed + = gfc_lookup_function_fuzzy (sym->name, sym->ns->sym_root); + if (guessed) + gfc_error ("Function %qs at %L has no IMPLICIT type" + "; did you mean %qs?", + sym->name, &expr->where, guessed); + else + gfc_error ("Function %qs at %L has no IMPLICIT type", + sym->name, &expr->where); return false; } else @@ -3504,6 +3544,40 @@ compare_shapes (gfc_expr *op1, gfc_expr *op2) return t; } +/* Recursively append candidate UOP to CANDIDATES. */ + +static void +lookup_uop_fuzzy_find_candidates (gfc_symtree *uop, + vec *candidates) +{ + gfc_symtree *p; + /* Not sure how to properly filter here. Use all for a start. + n.uop.op is NULL for empty interface operators (is that legal?) disregard + these as i suppose they don't make terribly sense. */ + for (p = uop->right; p; p = p->right) + { + lookup_function_fuzzy_find_candidates (p, candidates); + if (p->n.uop->op != NULL) + candidates->safe_push (p->name); + } + for (p = uop->left; p; p = p->left) + { + lookup_function_fuzzy_find_candidates (p, candidates); + if (p->n.uop->op != NULL) + candidates->safe_push (p->name); + } +} + +/* Lookup user-operator OP fuzzily, taking names in UOP into account. */ + +static const char* +lookup_uop_fuzzy (const char *op, gfc_symtree *uop) +{ + auto_vec candidates; + lookup_uop_fuzzy_find_candidates (uop, &candidates); + return find_closest_string (op, &candidates); +} + /* Resolve an operator expression node. This can involve replacing the operation with a user defined function call. */ @@ -3703,7 +3777,16 @@ resolve_operator (gfc_expr *e) case INTRINSIC_USER: if (e->value.op.uop->op == NULL) - sprintf (msg, _("Unknown operator '%s' at %%L"), e->value.op.uop->name); + { + const char *name = e->value.op.uop->name; + const char *guessed; + guessed = lookup_uop_fuzzy (name, e->value.op.uop->ns->uop_root); + if (guessed) + sprintf (msg, _("Unknown operator '%s' at %%L; did you mean '%s'?"), + name, guessed); + else + sprintf (msg, _("Unknown operator '%s' at %%L"), name); + } else if (op2 == NULL) sprintf (msg, _("Operand of user operator '%s' at %%L is %s"), e->value.op.uop->name, gfc_typename (&op1->ts)); diff --git a/gcc/fortran/symbol.c b/gcc/fortran/symbol.c index ff9aff9..75f9b6d 100644 --- a/gcc/fortran/symbol.c +++ b/gcc/fortran/symbol.c @@ -27,6 +27,7 @@ along with GCC; see the file COPYING3. If not see #include "parse.h" #include "match.h" #include "constructor.h" +#include "spellcheck.h" /* Strings for all symbol attributes. We use these for dumping the @@ -235,6 +236,39 @@ gfc_get_default_type (const char *name, gfc_namespace *ns) } +/* Recursively append candidate SYM to CANDIDATES. */ + +static void +lookup_symbol_fuzzy_find_candidates (gfc_symtree *sym, + vec *candidates) +{ + gfc_symtree *p; + for (p = sym->right; p; p = p->right) + { + lookup_symbol_fuzzy_find_candidates (p, candidates); + if (p->n.sym->ts.type != BT_UNKNOWN) + candidates->safe_push (p->name); + } + for (p = sym->left; p; p = p->left) + { + lookup_symbol_fuzzy_find_candidates (p, candidates); + if (p->n.sym->ts.type != BT_UNKNOWN) + candidates->safe_push (p->name); + } +} + + +/* Lookup symbol SYM fuzzily, taking names in SYMBOL into account. */ + +static const char* +lookup_symbol_fuzzy (const char *sym, gfc_symbol *symbol) +{ + auto_vec candidates; + lookup_symbol_fuzzy_find_candidates (symbol->ns->sym_root, &candidates); + return find_closest_string (sym, &candidates); +} + + /* Given a pointer to a symbol, set its type according to the first letter of its name. Fails if the letter in question has no default type. */ @@ -253,8 +287,15 @@ gfc_set_default_type (gfc_symbol *sym, int error_flag, gfc_namespace *ns) { if (error_flag && !sym->attr.untyped) { - gfc_error ("Symbol %qs at %L has no IMPLICIT type", - sym->name, &sym->declared_at); + const char *guessed + = lookup_symbol_fuzzy (sym->name, sym); + if (guessed) + gfc_error ("Symbol %qs at %L has no IMPLICIT type" + "; did you mean %qs?", + sym->name, &sym->declared_at, guessed); + else + gfc_error ("Symbol %qs at %L has no IMPLICIT type", + sym->name, &sym->declared_at); sym->attr.untyped = 1; /* Ensure we only give an error once. */ } @@ -2188,6 +2229,32 @@ bad: } +/* Recursively append candidate COMPONENT structures to CANDIDATES. */ + +static void +lookup_component_fuzzy_find_candidates (gfc_component *component, + vec *candidates) +{ + for (gfc_component *p = component; p; p = p->next) + { + if (00 && p->ts.type == BT_DERIVED) + /* ??? There's no (suitable) DERIVED_TYPE which would come in + handy throughout the frontend; Use CLASS_DATA here for brevity. */ + lookup_component_fuzzy_find_candidates (CLASS_DATA (p), candidates); + candidates->safe_push (p->name); + } +} + +/* Lookup component MEMBER fuzzily, taking names in COMPONENT into account. */ + +static const char* +lookup_component_fuzzy (const char *member, gfc_component *component) +{ + auto_vec candidates; + lookup_component_fuzzy_find_candidates (component, &candidates); + return find_closest_string (member, &candidates); +} + /* Given a derived type node and a component name, try to locate the component structure. Returns the NULL pointer if the component is not found or the components are private. If noaccess is set, no access @@ -2238,8 +2305,16 @@ gfc_find_component (gfc_symbol *sym, const char *name, } if (p == NULL && !silent) - gfc_error ("%qs at %C is not a member of the %qs structure", - name, sym->name); + { + const char *guessed = lookup_component_fuzzy (name, sym->components); + if (guessed) + gfc_error ("%qs at %C is not a member of the %qs structure" + "; did you mean %qs?", + name, sym->name, guessed); + else + gfc_error ("%qs at %C is not a member of the %qs structure", + name, sym->name); + } return p; } diff --git a/gcc/spellcheck.c b/gcc/spellcheck.c index 32854cf..be4cef9 100644 --- a/gcc/spellcheck.c +++ b/gcc/spellcheck.c @@ -119,3 +119,46 @@ levenshtein_distance (const char *s, const char *t) { return levenshtein_distance (s, strlen (s), t, strlen (t)); } + +/* Given TARGET, a non-NULL string, and CANDIDATES, a vec of non-NULL + strings, determine which element within CANDIDATES has the lowest edit + distance to TARGET. If there are multiple elements with the + same minimal distance, the first in the vector wins. + + If more than half of the letters were misspelled, the suggestion is + likely to be meaningless, so return NULL for this case. */ + +const char * +find_closest_string (const char *target, + const auto_vec *candidates) +{ + gcc_assert (target); + gcc_assert (candidates); + + int i; + const char *string, *best_string = NULL; + edit_distance_t best_distance = MAX_EDIT_DISTANCE; + size_t len_target = strlen (target); + FOR_EACH_VEC_ELT (*candidates, i, string) + { + gcc_assert (string); + edit_distance_t dist = levenshtein_distance (target, len_target, + string, strlen (string)); + if (dist < best_distance) + { + best_distance = dist; + best_string = string; + } + } + + /* If more than half of the letters were misspelled, the suggestion is + likely to be meaningless. */ + if (best_string) + { + unsigned int cutoff = MAX (len_target, strlen (best_string)) / 2; + if (best_distance > cutoff) + return NULL; + } + + return best_string; +} diff --git a/gcc/spellcheck.h b/gcc/spellcheck.h index ad02998..1100d15 100644 --- a/gcc/spellcheck.h +++ b/gcc/spellcheck.h @@ -31,6 +31,10 @@ levenshtein_distance (const char *s, int len_s, extern edit_distance_t levenshtein_distance (const char *s, const char *t); +extern const char * +find_closest_string (const char *target, + const auto_vec *candidates); + /* spellcheck-tree.c */ extern edit_distance_t diff --git a/gcc/testsuite/gfortran.dg/spellcheck-operator.f90 b/gcc/testsuite/gfortran.dg/spellcheck-operator.f90 new file mode 100644 index 0000000..810a770 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/spellcheck-operator.f90 @@ -0,0 +1,30 @@ +! { dg-do compile } +! test levenshtein based spelling suggestions + +module mymod1 + implicit none + contains + function something_good (iarg1) + integer :: something_good + integer, intent(in) :: iarg1 + something_good = iarg1 + 42 + end function something_good +end module mymod1 + +program spellchekc + use mymod1 + implicit none + + interface operator (.mywrong.) + module procedure something_wring ! { dg-error "Procedure .something_wring. in operator interface .mywrong. at .1. is neither function nor subroutine; did you mean .something_good.\\?|User operator procedure .something_wring. at .1. must be a FUNCTION" } + end interface + + interface operator (.mygood.) + module procedure something_good + end interface + + integer :: i, j, added + i = 0 + j = 0 + added = .mygoof. j ! { dg-error "Unknown operator .mygoof. at .1.; did you mean .mygood.\\?" } +end program spellchekc diff --git a/gcc/testsuite/gfortran.dg/spellcheck-procedure.f90 b/gcc/testsuite/gfortran.dg/spellcheck-procedure.f90 new file mode 100644 index 0000000..7923081 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/spellcheck-procedure.f90 @@ -0,0 +1,41 @@ +! { dg-do compile } +! test levenshtein based spelling suggestions + +module mymod1 + implicit none + contains + function something_good (iarg1) + integer :: something_good + integer, intent(in) :: iarg1 + something_good = iarg1 + 42 + end function something_good +end module mymod1 + +subroutine bark_unless_zero(iarg) + implicit none + integer, intent(in) :: iarg + if (iarg /= 0) call abort +end subroutine bark_unless_zero + +function myadd(iarg1, iarg2) + implicit none + integer :: myadd + integer, intent(in) :: iarg1, iarg2 + myadd = iarg1 + iarg2 +end function myadd + +program spellchekc + use mymod1 + implicit none + + integer :: i, j, myadd + i = 0 + j = 0 +! I suppose this cannot be made to work, no\\? +! call barf_unless_zero(i) ! { -dg-error "; did you mean .bark_unless_zero.\\?" } + j = something_goof(j) ! { dg-error "no IMPLICIT type; did you mean .something_good.\\?" } + j = myaddd(i, j) ! { dg-error "no IMPLICIT type; did you mean .myadd.\\?" } + j = mya(i, j) ! { dg-error "no IMPLICIT type; did you mean .myadd.\\?" } + if (j /= 42) call abort + +end program spellchekc diff --git a/gcc/testsuite/gfortran.dg/spellcheck-structure.f90 b/gcc/testsuite/gfortran.dg/spellcheck-structure.f90 new file mode 100644 index 0000000..929e05f --- /dev/null +++ b/gcc/testsuite/gfortran.dg/spellcheck-structure.f90 @@ -0,0 +1,35 @@ +! { dg-do compile } +! test levenshtein based spelling suggestions +implicit none + +!!!!!!!!!!!!!! structure tests !!!!!!!!!!!!!! +type type1 + real :: radius + integer :: i +end type type1 + +type type2 + integer :: myint + type(type1) :: mytype +end type type2 + +type type3 + type(type2) :: type_2 +end type type3 +type type4 + type(type3) :: type_3 +end type type4 + +type(type1) :: t1 +t1%radiuz = .0 ! { dg-error ".radiuz. at .1. is not a member of the .type1. structure; did you mean .radius.\\?" } +t1%x = .0 ! { dg-error ".x. at .1. is not a member of the .type1. structure" } +type(type2) :: t2 +t2%mytape%radius = .0 ! { dg-error ".mytape. at .1. is not a member of the .type2. structure; did you mean .mytype.\\?" } +t2%mytype%radious = .0 ! { dg-error ".radious. at .1. is not a member of the .type1. structure; did you mean .radius.\\?" } +type(type4) :: t4 +t4%type_3%type_2%mytype%radium = 88.0 ! { dg-error ".radium. at .1. is not a member of the .type1. structure; did you mean .radius.\\?" } + +!!!!!!!!!!!!!! symbol tests !!!!!!!!!!!!!! +integer :: iarg1 +iarg2 = 1 ! { dg-error "Symbol .iarg2. at .1. has no IMPLICIT type; did you mean .iarg1.\\?" } +end -- 1.8.5.3