From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-patches-return-409267-listarch-gcc-patches=gcc.gnu.org@gcc.gnu.org>
Received: (qmail 131036 invoked by alias); 6 Oct 2015 00:33:32 -0000
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-patches.gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-help@gcc.gnu.org>
Sender: gcc-patches-owner@gcc.gnu.org
Received: (qmail 131005 invoked by uid 89); 6 Oct 2015 00:33:28 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=-2.2 required=5.0 tests=AWL,BAYES_00,RCVD_IN_DNSWL_LOW,SPF_PASS,T_RP_MATCHES_RCVD autolearn=ham version=3.3.2
X-HELO: mail-qk0-f178.google.com
Received: from mail-qk0-f178.google.com (HELO mail-qk0-f178.google.com) (209.85.220.178) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-GCM-SHA256 encrypted) ESMTPS; Tue, 06 Oct 2015 00:33:27 +0000
Received: by qkap81 with SMTP id p81so76703113qka.2        for <gcc-patches@gcc.gnu.org>; Mon, 05 Oct 2015 17:33:25 -0700 (PDT)
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;        d=1e100.net; s=20130820;        h=x-gm-message-state:mime-version:in-reply-to:references:date         :message-id:subject:from:to:cc:content-type;        bh=R1Yh9cX/X828CxCMjLanKh1QMovvfJwgCf+JsnNEBqA=;        b=FWH8o0kcSUiPlIb7fqaItyKwpfBChzrTjNB7/9h4jyBevW5E9VZCYA8lPPGUDtQMSM         hzeq3E6+les3/ULJVqwehzLcprTLbeRAcY+OIY8G0/6tueZ0UbQ9ZoHKA8akbMtjULlm         bvo/Zwqkz/CErJlxI1cgfTxdBCemvM4/sbAc/41z5QIp6V/9VWWqLItLPIzxQ8OEyuFy         K5VkzRexroaKzbZtReLqtn9euhY5Ngx6MKBlXEdPAGLydL9f9Hb9yUvd63aZtZmd694p         IMkUcQ7riQTxUK2RuKyTf7GSektrgZdZT2S2WI12lvYrbP6M6w6iIx4VF4wfz6BGeaw6         68/g==
X-Gm-Message-State: ALoCoQkh/ajnmd/lciqWB9iGNosGMTmn+UEcqJGGbpQtxU8AupnWlNl21f/xQpTLOOWeEHsba/F+
MIME-Version: 1.0
X-Received: by 10.55.198.217 with SMTP id s86mr19926721qkl.75.1444091604964; Mon, 05 Oct 2015 17:33:24 -0700 (PDT)
Received: by 10.55.102.193 with HTTP; Mon, 5 Oct 2015 17:33:24 -0700 (PDT)
In-Reply-To: <CAF1bQ=QbkF0cjarT1YO_geh+tv=U9T0nHZGXEOZ=wjEwWjCBQA@mail.gmail.com>
References: <CAF1bQ=QbkF0cjarT1YO_geh+tv=U9T0nHZGXEOZ=wjEwWjCBQA@mail.gmail.com>
Date: Tue, 06 Oct 2015 00:33:00 -0000
Message-ID: <CAAkRFZJt5L+N9AnDqYJROJW00NpGU+M0GzQn3pPu-ECb_LQKhQ@mail.gmail.com>
Subject: Re: [google][gcc-4_9] encode and compress cc1 option strings in gcov_module_info
From: Xinliang David Li <davidxl@google.com>
To: Rong Xu <xur@google.com>
Cc: GCC Patches <gcc-patches@gcc.gnu.org>
Content-Type: text/plain; charset=UTF-8
X-IsSubscribed: yes
X-SW-Source: 2015-10/txt/msg00488.txt.bz2

       unsigned ggc_memory = gcov_read_unsigned ();
+      unsigned marker = 0, len = 0, k;
+      char **string_array, *saved_cc1_strings;
+
       for (unsigned j = 0; j < 7; j++)


Do not use hard coded number. Use the enum defined in coverage.c.


+        string_array[j] = xstrdup (gcov_read_string ());
+
+      k = 0;
+      for (unsigned j = 1; j < 7; j++)

Do not use hard coded number.


+        {
+          if (num_array[j] == 0)
+            continue;
+          marker += num_array[j];

It is better to read if the name of variable 'marker' is changed to
'j_end' or something similar

For all the substrings of 'j' kind, there should be just one marker,
right? It looks like here you introduce one marker per string, not one
marker per string kind.

+          len += 3; /* [[<FLAG>  */

Same here for hard coded value.

+          for (; k < marker; k++)
+            len += strlen (string_array[k]) + 1; /* 1 for delimter of ']'  */

Why do we need one ']' per string?


+        }
+      saved_cc1_strings = (char *) xmalloc (len + 1);
+      saved_cc1_strings[0] = 0;
+
+      marker = 0;
+      k = 0;
+      for (unsigned j = 1; j < 7; j++)

Same here for 7.

+        {
+          static const char lipo_string_flags[6] = {'Q', 'B', 'S',
'D','I', 'C'};
+          if (num_array[j] == 0)
+            continue;
+          marker += num_array[j];

Suggest changing marker to j_end

+          sprintf (saved_cc1_strings, "%s[[%c", saved_cc1_strings,
+                   lipo_string_flags[j - 1]);
+          for (; k < marker; k++)
+            {
+              sprintf (saved_cc1_strings, "%s%s]", saved_cc1_strings,
+                       string_array[k]);

+#define DELIMTER            "[["

Why double '[' ?

+#define DELIMTER2           "]"
+#define QUOTE_PATH_FLAG     'Q'
+#define BRACKET_PATH_FLAG   'B'
+#define SYSTEM_PATH_FLAG    'S'
+#define D_U_OPTION_FLAG     'D'
+#define INCLUDE_OPTION_FLAG 'I'
+#define COMMAND_ARG_FLAG    'C'
+
+enum lipo_cc1_string_kind {
+  k_quote_paths = 0,
+  k_bracket_paths,
+  k_system_paths,
+  k_cpp_defines,
+  k_cpp_includes,
+  k_lipo_cl_args,
+  num_lipo_cc1_string_kind
+};
+
+struct lipo_parsed_cc1_string {
+  const char* source_filename;
+  unsigned num[num_lipo_cc1_string_kind];
+  char **strings[num_lipo_cc1_string_kind];
+};
+
+struct lipo_parsed_cc1_string *
+lipo_parse_saved_cc1_string (const char *src, char *str,
+    bool parse_cl_args_only);
+void free_parsed_string (struct lipo_parsed_cc1_string *string);
+

Declare above in a header file.


 /* Returns true if the command-line arguments stored in the given module-infos
    are incompatible.  */
 bool
-incompatible_cl_args (struct gcov_module_info* mod_info1,
-      struct gcov_module_info* mod_info2)
+incompatible_cl_args (struct lipo_parsed_cc1_string* mod_info1,
+                      struct lipo_parsed_cc1_string* mod_info2)

Fix formating.

 {
     {
@@ -1647,7 +1679,7 @@ build_var (tree fn_decl, tree type, int counter)
 /* Creates the gcov_fn_info RECORD_TYPE.  */

                       NULL_TREE, get_gcov_unsigned_t ());
   DECL_CHAIN (field) = fields;
   fields = field;

-  /* Num bracket paths  */
+  /* cc1_uncompressed_strlen field */
   field = build_decl (BUILTINS_LOCATION, FIELD_DECL,
                       NULL_TREE, get_gcov_unsigned_t ());
   DECL_CHAIN (field) = fields;
   fields = field;


Why do we need to store uncompressed string length? If there is need
to do that, I suggest combine uncompressed length and compressed
length into one 32bit integer. (16/16, or 17/15 split)


 David

On Mon, Oct 5, 2015 at 3:51 PM, Rong Xu <xur@google.com> wrote:
> Hi,
>
> This patch is for google branch only.
>
> It encodes and compresses various cc1 option strings in
> gcov_module_info to reduce the lipo instrumented object size. The
> savings are from string compression and the reduced number of
> relocations.
>
> More specifically, we replace the following fields in gcov_module_info
>   gcov_unsigned_t num_quote_paths;
>   gcov_unsigned_t num_bracket_paths;
>   gcov_unsigned_t num_system_paths;
>   gcov_unsigned_t num_cpp_defines;
>   gcov_unsigned_t num_cpp_includes;
>   gcov_unsigned_t num_cl_args;
>   char *string_array[1];
> with
>   gcov_unsigned_t cc1_strlen;
>   gcov_unsigned_t cc1_uncompressed_strlen;
>   char *saved_cc1_strings;
>
> The new saved_cc1_strings are zlib compressed string.
>
> Tested with google internal benchmarks.
>
> Thanks,
>
> -Rong