Adds the logic to handle -finput-charset in layout_get_source_line(), so that
source lines are converted from their input encodings prior to being output by
diagnostics machinery.

gcc/c-family/ChangeLog:

	PR other/93067
	* c-opts.c (c_common_post_options): Call new function
	input_initialize_cpp_context().

gcc/fortran/ChangeLog:

	PR other/93067
	* cpp.c (gfc_cpp_post_options): Call new function
	input_initialize_cpp_context().

gcc/ChangeLog:

	PR other/93067
	* input.c (input_initialize_cpp_context): New function.
	(read_data): Add prototype.
	(add_file_to_cache_tab): Use libcpp to convert input encoding when
	needed.
	(class fcache): Add new members to track input encoding conversion
	via libcpp.
	(fcache::fcache): Adapt for new members.
	(fcache::~fcache): Likewise.
	(maybe_grow): Likewise.
	(needs_read): Adapt to be aware that fp member may be NULL now.
	(get_next_line): Likewise.
	* input.h (struct cpp_reader): Forward declare for use...
	(input_initialize_cpp_context): ...here.  Declare new function.

libcpp/ChangeLog:

	PR other/93067
	* charset.c (init_iconv_desc): Adapt to permit PFILE argument to
	be NULL.
	(_cpp_convert_input): Likewise. Also move UTF-8 BOM logic to...
	(cpp_check_utf8_bom): ...here.  New function.
	(cpp_input_conversion_is_trivial): New function.
	* files.c (read_file_guts): Allow PFILE argument to be NULL.  Add
	INPUT_CHARSET argument as an alternate source of this information.
	(cpp_get_converted_source): New function.
	* include/cpplib.h (struct cpp_converted_source): Declare.
	(cpp_get_converted_source): Declare.
	(cpp_input_conversion_is_trivial): Declare.
	(cpp_check_utf8_bom): Declare.

gcc/testsuite/ChangeLog:

	PR other/93067
	* gcc.dg/diagnostic-input-charset-1.c: New test.
	* gcc.dg/diagnostic-input-charset-2.c: New test.

diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index 59cabd12407..d5aa7859cc1 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -1124,6 +1124,10 @@ c_common_post_options (const char **pfilename)
   cpp_post_options (parse_in);
   init_global_opts_from_cpp (&global_options, cpp_get_options (parse_in));
 
+  /* Let diagnostics infrastructure know we are using libcpp to read
+     the input.  */
+  input_initialize_cpp_context (parse_in);
+
   input_location = UNKNOWN_LOCATION;
 
   *pfilename = this_input_filename
diff --git a/gcc/fortran/cpp.c b/gcc/fortran/cpp.c
index 51baf141711..2b12a98afc0 100644
--- a/gcc/fortran/cpp.c
+++ b/gcc/fortran/cpp.c
@@ -493,6 +493,10 @@ gfc_cpp_post_options (void)
 
   cpp_post_options (cpp_in);
 
+  /* Let diagnostics infrastructure know we are using libcpp to read
+     the input.  */
+  input_initialize_cpp_context (cpp_in);
+
   gfc_cpp_register_include_paths ();
 }
 
diff --git a/gcc/input.c b/gcc/input.c
index 29d10f06b86..1dcdd464bc1 100644
--- a/gcc/input.c
+++ b/gcc/input.c
@@ -30,6 +30,24 @@ along with GCC; see the file COPYING3.  If not see
 #define HAVE_ICONV 0
 #endif
 
+/* If libcpp is being used to read the data, we need to note the configuration
+   so we can read files back in consistently in location_get_source_line().  */
+struct
+{
+  bool in_use;
+  bool conversion_is_trivial;
+  const char *charset;
+} static input_cpp_context;
+
+void input_initialize_cpp_context (cpp_reader *cpp)
+{
+  input_cpp_context.in_use = true;
+  const cpp_options *opts = cpp_get_options (cpp);
+  input_cpp_context.charset = opts->input_charset;
+  input_cpp_context.conversion_is_trivial
+    = cpp_input_conversion_is_trivial (input_cpp_context.charset);
+}
+
 /* This is a cache used by get_next_line to store the content of a
    file to be searched for file lines.  */
 class fcache
@@ -78,6 +96,10 @@ public:
      far.  */
   char *data;
 
+  /* The allocated buffer to be freed may start a little earlier than DATA,
+     e.g. if a UTF8 BOM was skipped at the beginning.  */
+  int alloc_offset;
+
   /*  The size of the DATA array above.*/
   size_t size;
 
@@ -118,6 +140,17 @@ public:
 
   fcache ();
   ~fcache ();
+
+  void offset_buffer (int offset)
+  {
+    gcc_assert (offset < 0 ? alloc_offset + offset >= 0
+		: (size_t) offset <= size);
+    gcc_assert (data);
+    alloc_offset += offset;
+    data += offset;
+    size -= offset;
+  }
+
 };
 
 /* Current position in real source file.  */
@@ -364,6 +397,9 @@ evicted_cache_tab_entry (unsigned *highest_use_count)
   return to_evict;
 }
 
+static bool
+read_data (fcache *c);
+
 /* Create the cache used for the content of a given file to be
    accessed by caret diagnostic.  This cache is added to an array of
    cache and can be retrieved by lookup_file_in_cache_tab.  This
@@ -384,6 +420,8 @@ add_file_to_cache_tab (const char *file_path)
   if (r->fp)
     fclose (r->fp);
   r->fp = fp;
+  if (r->alloc_offset)
+    r->offset_buffer (-r->alloc_offset);
   r->nb_read = 0;
   r->line_start_idx = 0;
   r->line_num = 0;
@@ -394,6 +432,42 @@ add_file_to_cache_tab (const char *file_path)
   r->total_lines = total_lines_num (file_path);
   r->missing_trailing_newline = true;
 
+  /* If libcpp is managing the reading, then there are two cases we need to
+     consider.  If -finput-charset is not in effect, then we just need to
+     strip a UTF-8 BOM, so do that ourselves rather than calling into libcpp so
+     as to avoid paying the penalty of using libcpp, namely that the entire file
+     must be read at once.  In the (generally rare) case that a non-trivial
+     -finput-charset is needed, then go ahead and use libcpp to read the whole
+     file and do the conversion.  */
+  if (input_cpp_context.in_use)
+    {
+      if (input_cpp_context.conversion_is_trivial)
+	{
+	  /* Strip the UTF8 BOM if present.  */
+	  if (read_data (r))
+	    {
+	      const int offset = cpp_check_utf8_bom (r->data, r->nb_read);
+	      r->offset_buffer (offset);
+	      r->nb_read -= offset;
+	    }
+	}
+      else
+	{
+	  /* Need a full-blown conversion of the input charset.  */
+	  fclose (r->fp);
+	  r->fp = NULL;
+	  const cpp_converted_source cs
+	    = cpp_get_converted_source (file_path, input_cpp_context.charset);
+	  if (!cs.data)
+	    return NULL;
+	  if (r->data)
+	    XDELETEVEC (r->data);
+	  r->data = cs.data;
+	  r->nb_read = r->size = cs.len;
+	  r->alloc_offset = cs.data - cs.to_free;
+	}
+    }
+
   return r;
 }
 
@@ -415,7 +489,7 @@ lookup_or_add_file_to_cache_tab (const char *file_path)
    diagnostic.  */
 
 fcache::fcache ()
-: use_count (0), file_path (NULL), fp (NULL), data (0),
+: use_count (0), file_path (NULL), fp (NULL), data (0), alloc_offset (0),
   size (0), nb_read (0), line_start_idx (0), line_num (0),
   total_lines (0), missing_trailing_newline (true)
 {
@@ -433,6 +507,7 @@ fcache::~fcache ()
     }
   if (data)
     {
+      offset_buffer (-alloc_offset);
       XDELETEVEC (data);
       data = 0;
     }
@@ -447,9 +522,9 @@ fcache::~fcache ()
 static bool
 needs_read (fcache *c)
 {
-  return (c->nb_read == 0
-	  || c->nb_read == c->size
-	  || (c->line_start_idx >= c->nb_read - 1));
+  return c->fp && (c->nb_read == 0
+		   || c->nb_read == c->size
+		   || (c->line_start_idx >= c->nb_read - 1));
 }
 
 /*  Return TRUE iff the cache is full and thus needs to be
@@ -469,9 +544,20 @@ maybe_grow (fcache *c)
   if (!needs_grow (c))
     return;
 
-  size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
-  c->data = XRESIZEVEC (char, c->data, size);
-  c->size = size;
+  if (!c->data)
+    {
+      gcc_assert (c->size == 0 && c->alloc_offset == 0);
+      c->size = fcache_buffer_size;
+      c->data = XNEWVEC (char, c->size);
+    }
+  else
+    {
+      const int offset = c->alloc_offset;
+      c->offset_buffer (-offset);
+      c->size *= 2;
+      c->data = XRESIZEVEC (char, c->data, c->size);
+      c->offset_buffer (offset);
+    }
 }
 
 /*  Read more data into the cache.  Extends the cache if need be.
@@ -570,7 +656,7 @@ get_next_line (fcache *c, char **line, ssize_t *line_len)
       c->missing_trailing_newline = false;
     }
 
-  if (ferror (c->fp))
+  if (c->fp && ferror (c->fp))
     return false;
 
   /* At this point, we've found the end of the of line.  It either
diff --git a/gcc/input.h b/gcc/input.h
index 4790a571c6a..0f1c6dc1f27 100644
--- a/gcc/input.h
+++ b/gcc/input.h
@@ -214,4 +214,10 @@ class GTY(()) string_concat_db
   hash_map <location_hash, string_concat *> *m_table;
 };
 
+/* Because we may read files a 2nd time, after libcpp does, in order to emit
+   diagnostics, we need to be aware if libcpp is being used and how it has
+   been configured, e.g., to know the value of -finput-charset.  This function
+   needs to be called by any frontend that is using libcpp to read its data.  */
+struct cpp_reader;
+void input_initialize_cpp_context (cpp_reader *cpp);
 #endif
diff --git a/libcpp/charset.c b/libcpp/charset.c
index 3e5578b1390..d6e4e096d33 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -630,7 +630,11 @@ static const struct cpp_conversion conversion_tab[] = {
    cset_converter structure for conversion from FROM to TO.  If
    iconv_open() fails, issue an error and return an identity
    converter.  Silently return an identity converter if FROM and TO
-   are identical.  */
+   are identical.
+
+   PFILE is only used for generating diagnostics; setting it to NULL
+   suppresses diagnostics.  */
+
 static struct cset_converter
 init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
 {
@@ -672,25 +676,31 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
 
       if (ret.cd == (iconv_t) -1)
 	{
-	  if (errno == EINVAL)
-	    cpp_error (pfile, CPP_DL_ERROR, /* FIXME should be DL_SORRY */
-		       "conversion from %s to %s not supported by iconv",
-		       from, to);
-	  else
-	    cpp_errno (pfile, CPP_DL_ERROR, "iconv_open");
-
+	  if (pfile)
+	    {
+	      if (errno == EINVAL)
+		cpp_error (pfile, CPP_DL_ERROR, /* FIXME should be DL_SORRY */
+			   "conversion from %s to %s not supported by iconv",
+			   from, to);
+	      else
+		cpp_errno (pfile, CPP_DL_ERROR, "iconv_open");
+	    }
 	  ret.func = convert_no_conversion;
 	}
     }
   else
     {
-      cpp_error (pfile, CPP_DL_ERROR, /* FIXME: should be DL_SORRY */
-		 "no iconv implementation, cannot convert from %s to %s",
-		 from, to);
+      if (pfile)
+	{
+	  cpp_error (pfile, CPP_DL_ERROR, /* FIXME: should be DL_SORRY */
+		     "no iconv implementation, cannot convert from %s to %s",
+		     from, to);
+	}
       ret.func = convert_no_conversion;
       ret.cd = (iconv_t) -1;
       ret.width = -1;
     }
+
   return ret;
 }
 
@@ -2122,6 +2132,25 @@ _cpp_interpret_identifier (cpp_reader *pfile, const uchar *id, size_t len)
 				  buf, bufp - buf, HT_ALLOC));
 }
 
+
+/* Utility to strip a UTF-8 byte order marking from the beginning
+   of a buffer.  Returns the number of bytes to skip, which currently
+   will be either 0 or 3.  */
+int
+cpp_check_utf8_bom (const char *data, size_t data_length)
+{
+
+#if HOST_CHARSET == HOST_CHARSET_ASCII
+  const unsigned char *udata = (const unsigned char *) data;
+  if (data_length >= 3 && udata[0] == 0xef && udata[1] == 0xbb
+      && udata[2] == 0xbf)
+    return 3;
+#endif
+
+  return 0;
+}
+
+
 /* Convert an input buffer (containing the complete contents of one
    source file) from INPUT_CHARSET to the source character set.  INPUT
    points to the input buffer, SIZE is its allocated size, and LEN is
@@ -2135,7 +2164,11 @@ _cpp_interpret_identifier (cpp_reader *pfile, const uchar *id, size_t len)
    INPUT is expected to have been allocated with xmalloc.  This
    function will either set *BUFFER_START to INPUT, or free it and set
    *BUFFER_START to a pointer to another xmalloc-allocated block of
-   memory.  */
+   memory.
+
+   PFILE is only used to generate diagnostics; setting it to NULL suppresses
+   diagnostics, and causes a return of NULL if there was any error instead.  */
+
 uchar * 
 _cpp_convert_input (cpp_reader *pfile, const char *input_charset,
 		    uchar *input, size_t size, size_t len,
@@ -2158,17 +2191,28 @@ _cpp_convert_input (cpp_reader *pfile, const char *input_charset,
       to.text = XNEWVEC (uchar, to.asize);
       to.len = 0;
 
-      if (!APPLY_CONVERSION (input_cset, input, len, &to))
-	cpp_error (pfile, CPP_DL_ERROR,
-		   "failure to convert %s to %s",
-		   CPP_OPTION (pfile, input_charset), SOURCE_CHARSET);
+      const bool ok = APPLY_CONVERSION (input_cset, input, len, &to);
 
-      free (input);
-    }
+      /* Clean up the mess.  */
+      if (input_cset.func == convert_using_iconv)
+	iconv_close (input_cset.cd);
 
-  /* Clean up the mess.  */
-  if (input_cset.func == convert_using_iconv)
-    iconv_close (input_cset.cd);
+      /* Handle conversion failure.  */
+      if (!ok)
+	{
+	  free (input);
+	  if (!pfile)
+	    {
+	      XDELETEVEC (to.text);
+	      *buffer_start = NULL;
+	      *st_size = 0;
+	      return NULL;
+	    }
+	  cpp_error (pfile, CPP_DL_ERROR,
+		     "failure to convert %s to %s",
+		     CPP_OPTION (pfile, input_charset), SOURCE_CHARSET);
+	}
+    }
 
   /* Resize buffer if we allocated substantially too much, or if we
      haven't enough space for the \n-terminator or following
@@ -2192,19 +2236,14 @@ _cpp_convert_input (cpp_reader *pfile, const char *input_charset,
 
   buffer = to.text;
   *st_size = to.len;
-#if HOST_CHARSET == HOST_CHARSET_ASCII
-  /* The HOST_CHARSET test just above ensures that the source charset
-     is UTF-8.  So, ignore a UTF-8 BOM if we see one.  Note that
-     glib'c UTF-8 iconv() provider (as of glibc 2.7) does not ignore a
+
+  /* Ignore a UTF-8 BOM if we see one and the source charset is UTF-8.  Note
+     that glib'c UTF-8 iconv() provider (as of glibc 2.7) does not ignore a
      BOM -- however, even if it did, we would still need this code due
      to the 'convert_no_conversion' case.  */
-  if (to.len >= 3 && to.text[0] == 0xef && to.text[1] == 0xbb
-      && to.text[2] == 0xbf)
-    {
-      *st_size -= 3;
-      buffer += 3;
-    }
-#endif
+  const int bom_len = cpp_check_utf8_bom ((const char *) to.text, to.len);
+  *st_size -= bom_len;
+  buffer += bom_len;
 
   *buffer_start = to.text;
   return buffer;
@@ -2244,6 +2283,13 @@ _cpp_default_encoding (void)
   return current_encoding;
 }
 
+/* Check if the configured input charset requires no conversion, other than
+   possibly stripping a UTF-8 BOM.  */
+bool cpp_input_conversion_is_trivial (const char *input_charset)
+{
+  return !strcasecmp (input_charset, SOURCE_CHARSET);
+}
+
 /* Implementation of class cpp_string_location_reader.  */
 
 /* Constructor for cpp_string_location_reader.  */
diff --git a/libcpp/files.c b/libcpp/files.c
index 301b2379a23..178bb9ed1e6 100644
--- a/libcpp/files.c
+++ b/libcpp/files.c
@@ -173,7 +173,7 @@ static bool pch_open_file (cpp_reader *pfile, _cpp_file *file,
 static bool find_file_in_dir (cpp_reader *pfile, _cpp_file *file,
 			      bool *invalid_pch, location_t loc);
 static bool read_file_guts (cpp_reader *pfile, _cpp_file *file,
-			    location_t loc);
+			    location_t loc, const char *input_charset = NULL);
 static bool read_file (cpp_reader *pfile, _cpp_file *file,
 		       location_t loc);
 static struct cpp_dir *search_path_head (cpp_reader *, const char *fname,
@@ -671,18 +671,32 @@ _cpp_find_file (cpp_reader *pfile, const char *fname, cpp_dir *start_dir,
 
    Use LOC for any diagnostics.
 
+   The input charset may be specified in the INPUT_CHARSET argument, or
+   else it will be taken from PFILE.
+
+   PFILE may be NULL.  In this case, no diagnostics are issued, and the
+   input charset must be specified in the arguments.
+
    FIXME: Flush file cache and try again if we run out of memory.  */
 static bool
-read_file_guts (cpp_reader *pfile, _cpp_file *file, location_t loc)
+read_file_guts (cpp_reader *pfile, _cpp_file *file, location_t loc,
+		const char *input_charset)
 {
   ssize_t size, total, count;
   uchar *buf;
   bool regular;
 
+  if (!input_charset)
+    {
+      gcc_assert (pfile);
+      input_charset = CPP_OPTION (pfile, input_charset);
+    }
+
   if (S_ISBLK (file->st.st_mode))
     {
-      cpp_error_at (pfile, CPP_DL_ERROR, loc,
-		    "%s is a block device", file->path);
+      if (pfile)
+	cpp_error_at (pfile, CPP_DL_ERROR, loc,
+		      "%s is a block device", file->path);
       return false;
     }
 
@@ -699,8 +713,9 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file, location_t loc)
 	 does not bite us.  */
       if (file->st.st_size > INTTYPE_MAXIMUM (ssize_t))
 	{
-	  cpp_error_at (pfile, CPP_DL_ERROR, loc,
-			"%s is too large", file->path);
+	  if (pfile)
+	    cpp_error_at (pfile, CPP_DL_ERROR, loc,
+			  "%s is too large", file->path);
 	  return false;
 	}
 
@@ -733,29 +748,29 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file, location_t loc)
 
   if (count < 0)
     {
-      cpp_errno_filename (pfile, CPP_DL_ERROR, file->path, loc);
+      if (pfile)
+	cpp_errno_filename (pfile, CPP_DL_ERROR, file->path, loc);
       free (buf);
       return false;
     }
 
-  if (regular && total != size && STAT_SIZE_RELIABLE (file->st))
+  if (pfile && regular && total != size && STAT_SIZE_RELIABLE (file->st))
     cpp_error_at (pfile, CPP_DL_WARNING, loc,
 	       "%s is shorter than expected", file->path);
 
   file->buffer = _cpp_convert_input (pfile,
-				     CPP_OPTION (pfile, input_charset),
+				     input_charset,
 				     buf, size + 16, total,
 				     &file->buffer_start,
 				     &file->st.st_size);
-  file->buffer_valid = true;
-
-  return true;
+  file->buffer_valid = file->buffer;
+  return file->buffer_valid;
 }
 
 /* Convenience wrapper around read_file_guts that opens the file if
    necessary and closes the file descriptor after reading.  FILE must
    have been passed through find_file() at some stage.  Use LOC for
-   any diagnostics.  */
+   any diagnostics.  Unlike read_file_guts(), PFILE may not be NULL.  */
 static bool
 read_file (cpp_reader *pfile, _cpp_file *file, location_t loc)
 {
@@ -2118,3 +2133,25 @@ _cpp_has_header (cpp_reader *pfile, const char *fname, int angle_brackets,
   return file->err_no != ENOENT;
 }
 
+/* Read a file and convert to input charset, the same as if it were being read
+   by a cpp_reader.  */
+
+cpp_converted_source
+cpp_get_converted_source (const char *fname, const char *input_charset)
+{
+  cpp_converted_source res = {};
+  _cpp_file file = {};
+  file.fd = -1;
+  file.name = lbasename (fname);
+  file.path = fname;
+  if (!open_file (&file))
+    return res;
+  const bool ok = read_file_guts (NULL, &file, 0, input_charset);
+  close (file.fd);
+  if (!ok)
+    return res;
+  res.to_free = (char *) file.buffer_start;
+  res.data = (char *) file.buffer;
+  res.len = file.st.st_size;
+  return res;
+}
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 50d28dc9d5a..d38dd040367 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -1368,6 +1368,20 @@ extern struct _cpp_file *cpp_get_file (cpp_buffer *);
 extern cpp_buffer *cpp_get_prev (cpp_buffer *);
 extern void cpp_clear_file_cache (cpp_reader *);
 
+/* cpp_get_converted_source returns the contents of the given file, as it exists
+   after cpplib has read it and converted it from the input charset to the
+   source charset.  Return struct will be zero-filled if the data could not be
+   read for any reason.  The data starts at the DATA pointer, but the TO_FREE
+   pointer is what should be passed to free(), as there may be an offset.  */
+struct cpp_converted_source
+{
+  char *to_free;
+  char *data;
+  size_t len;
+};
+cpp_converted_source cpp_get_converted_source (const char *fname,
+					       const char *input_charset);
+
 /* In pch.c */
 struct save_macro_data;
 extern int cpp_save_state (cpp_reader *, FILE *);
@@ -1438,6 +1452,7 @@ class cpp_display_width_computation {
 /* Convenience functions that are simple use cases for class
    cpp_display_width_computation.  Tab characters will be expanded to spaces
    as determined by TABSTOP.  */
+
 int cpp_byte_column_to_display_column (const char *data, int data_length,
 				       int column, int tabstop);
 inline int cpp_display_width (const char *data, int data_length,
@@ -1450,4 +1465,7 @@ int cpp_display_column_to_byte_column (const char *data, int data_length,
 				       int display_col, int tabstop);
 int cpp_wcwidth (cppchar_t c);
 
+bool cpp_input_conversion_is_trivial (const char *input_charset);
+int cpp_check_utf8_bom (const char *data, size_t data_length);
+
 #endif /* ! LIBCPP_CPPLIB_H */