From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from gnu.wildebeest.org (gnu.wildebeest.org [45.83.234.184]) by sourceware.org (Postfix) with ESMTPS id E49C13858C41 for ; Fri, 7 Jul 2023 14:00:04 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org E49C13858C41 Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=klomp.org Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=klomp.org Received: from r6.localdomain (82-217-174-174.cable.dynamic.v4.ziggo.nl [82.217.174.174]) (using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by gnu.wildebeest.org (Postfix) with ESMTPSA id 705343006CDC; Fri, 7 Jul 2023 15:59:58 +0200 (CEST) Received: by r6.localdomain (Postfix, from userid 1000) id 85D92340276; Fri, 7 Jul 2023 15:59:58 +0200 (CEST) Message-ID: Subject: Re: PR29472: debuginfod metadata query From: Mark Wielaard To: "Frank Ch. Eigler" , elfutils-devel@sourceware.org Date: Fri, 07 Jul 2023 15:59:58 +0200 In-Reply-To: References: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable User-Agent: Evolution 3.48.3 (3.48.3-1.fc38) MIME-Version: 1.0 X-Spam-Status: No, score=-3034.4 required=5.0 tests=BAYES_00,GIT_PATCH_0,JMQ_SPF_NEUTRAL,KAM_DMARC_STATUS,RCVD_IN_BARRACUDACENTRAL,SPF_HELO_NONE,SPF_PASS,TXREP,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: Hi Frank, And finally the debuginfod server code itself. On Wed, 2023-04-12 at 16:31 -0400, Frank Ch. Eigler via Elfutils-devel wrote: > + * debuginfod.cxx (DEBUGINFOD_SQLITE_DDL): Add an index or two. > + (metadata_maxtime_s, parse_opt): New parameter for load control. > + (add_client_federation_headers): New refactored function. > + (handle_metadata): New function. > + (handler_cb): Call it for /metadata URL. Trace it. > + (groom): Tweak sqlite_ps object lifetimes. > [...] > diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx > index 5ef6cc32189b..000820fec5ea 100644 > --- a/debuginfod/debuginfod.cxx > +++ b/debuginfod/debuginfod.cxx > @@ -1,5 +1,5 @@ > /* Debuginfo-over-http server. > - Copyright (C) 2019-2021 Red Hat, Inc. > + Copyright (C) 2019-2023 Red Hat, Inc. > Copyright (C) 2021, 2022 Mark J. Wielaard > This file is part of elfutils. > =20 > @@ -68,6 +68,7 @@ extern "C" { > #include > #include > #include > +#include > =20 > =20 > /* If fts.h is included before config.h, its indirect inclusions may not > @@ -127,6 +128,9 @@ using namespace std; > #define tid() pthread_self() > #endif > =20 > +#ifdef HAVE_JSON_C > + #include > +#endif > =20 > inline bool > string_endswith(const string& haystack, const string& needle) > @@ -185,7 +189,7 @@ static const char DEBUGINFOD_SQLITE_DDL[] =3D > " foreign key (buildid) references " BUILDIDS "_buildids(id) on= update cascade on delete cascade,\n" > " primary key (buildid, file, mtime)\n" > " ) " WITHOUT_ROWID ";\n" > - // Index for faster delete by file identifier > + // Index for faster delete by file identifier and metadata searches > "create index if not exists " BUILDIDS "_f_de_idx on " BUILDIDS "_f_de= (file, mtime);\n" > "create table if not exists " BUILDIDS "_f_s (\n" > " buildid integer not null,\n" > @@ -211,6 +215,8 @@ static const char DEBUGINFOD_SQLITE_DDL[] =3D > " ) " WITHOUT_ROWID ";\n" > // Index for faster delete by archive file identifier > "create index if not exists " BUILDIDS "_r_de_idx on " BUILDIDS "_r_de= (file, mtime);\n" > + // Index for metadata searches > + "create index if not exists " BUILDIDS "_r_de_idx2 on " BUILDIDS "_r_d= e (content);\n" =20 > "create table if not exists " BUILDIDS "_r_sref (\n" // outgoing dwarf= sourcefile references from rpm > " buildid integer not null,\n" > " artifactsrc integer not null,\n" OK, and extra index on content in the _r_de table. Which is an index into the _files table. In general I find is hard to keep the whole sql structure in my head. Is this described somewhere apart from just reading the whole DEBUGINFOD_SQLITE_DDL? > @@ -398,6 +404,9 @@ static const struct argp_option options[] =3D > { "passive", ARGP_KEY_PASSIVE, NULL, 0, "Do not scan or groom, read-o= nly database.", 0 }, > #define ARGP_KEY_DISABLE_SOURCE_SCAN 0x1009 > { "disable-source-scan", ARGP_KEY_DISABLE_SOURCE_SCAN, NULL, 0, "Do n= ot scan dwarf source info.", 0 }, > +#define ARGP_KEY_METADATA_MAXTIME 0x100A > + { "metadata-maxtime", ARGP_KEY_METADATA_MAXTIME, "SECONDS", 0, > + "Number of seconds to limit metadata query run time, 0=3Dunlimited.= ", 0 }, > { NULL, 0, NULL, 0, NULL, 0 }, > }; Ack. New --metadata-maxtime argument. > @@ -452,6 +461,8 @@ static unsigned forwarded_ttl_limit =3D 8; > static bool scan_source_info =3D true; > static string tmpdir; > static bool passive_p =3D false; > +static unsigned metadata_maxtime_s =3D 5; > + OK. Defaults to 5 seconds. > static void set_metric(const string& key, double value); > // static void inc_metric(const string& key); > @@ -653,6 +664,9 @@ parse_opt (int key, char *arg, > case ARGP_KEY_DISABLE_SOURCE_SCAN: > scan_source_info =3D false; > break; > + case ARGP_KEY_METADATA_MAXTIME: > + metadata_maxtime_s =3D (unsigned) atoi(arg); > + break; > // case 'h': argp_state_help (state, stderr, ARGP_HELP_LONG|ARGP_H= ELP_EXIT_OK); > default: return ARGP_ERR_UNKNOWN; > } OK. Can be set by user. > @@ -2040,6 +2054,58 @@ handle_buildid_r_match (bool internal_req_p, > return r; > } > =20 > +void > +add_client_federation_headers(debuginfod_client *client, MHD_Connection*= conn){ > + // Transcribe incoming User-Agent: > + string ua =3D MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "Use= r-Agent") ?: ""; > + string ua_complete =3D string("User-Agent: ") + ua; > + debuginfod_add_http_header (client, ua_complete.c_str()); > + > + // Compute larger XFF:, for avoiding info loss during > + // federation, and for future cyclicity detection. > + string xff =3D MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-= Forwarded-For") ?: ""; > + if (xff !=3D "") > + xff +=3D string(", "); // comma separated list > + > + unsigned int xff_count =3D 0; > + for (auto&& i : xff){ > + if (i =3D=3D ',') xff_count++; > + } > + > + // if X-Forwarded-For: exceeds N hops, > + // do not delegate a local lookup miss to upstream debuginfods. > + if (xff_count >=3D forwarded_ttl_limit) > + throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --forware= d-ttl-limit reached \ > +and will not query the upstream servers"); > + > + // Compute the client's numeric IP address only - so can't merge with = conninfo() > + const union MHD_ConnectionInfo *u =3D MHD_get_connection_info (conn, > + MHD_CONN= ECTION_INFO_CLIENT_ADDRESS); > + struct sockaddr *so =3D u ? u->client_addr : 0; > + char hostname[256] =3D ""; // RFC1035 > + if (so && so->sa_family =3D=3D AF_INET) { > + (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeo= f (hostname), NULL, 0, > + NI_NUMERICHOST); > + } else if (so && so->sa_family =3D=3D AF_INET6) { > + struct sockaddr_in6* addr6 =3D (struct sockaddr_in6*) so; > + if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) { > + struct sockaddr_in addr4; > + memset (&addr4, 0, sizeof(addr4)); > + addr4.sin_family =3D AF_INET; > + addr4.sin_port =3D addr6->sin6_port; > + memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeo= f(addr4.sin_addr.s_addr)); > + (void) getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4), > + hostname, sizeof (hostname), NULL, 0, > + NI_NUMERICHOST); > + } else { > + (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, si= zeof (hostname), NULL, 0, > + NI_NUMERICHOST); > + } > + } > + > + string xff_complete =3D string("X-Forwarded-For: ")+xff+string(hostnam= e); > + debuginfod_add_http_header (client, xff_complete.c_str()); > +} OK, extracted from handle_buildid. > =20 > static struct MHD_Response* > handle_buildid_match (bool internal_req_p, > @@ -2273,57 +2339,7 @@ handle_buildid (MHD_Connection* conn, > debuginfod_set_progressfn (client, & debuginfod_find_progress); > =20 > if (conn) > - { > - // Transcribe incoming User-Agent: > - string ua =3D MHD_lookup_connection_value (conn, MHD_HEADER_KIND, = "User-Agent") ?: ""; > - string ua_complete =3D string("User-Agent: ") + ua; > - debuginfod_add_http_header (client, ua_complete.c_str()); > - =20 > - // Compute larger XFF:, for avoiding info loss during > - // federation, and for future cyclicity detection. > - string xff =3D MHD_lookup_connection_value (conn, MHD_HEADER_KIND,= "X-Forwarded-For") ?: ""; > - if (xff !=3D "") > - xff +=3D string(", "); // comma separated list > - =20 > - unsigned int xff_count =3D 0; > - for (auto&& i : xff){ > - if (i =3D=3D ',') xff_count++; > - } > - > - // if X-Forwarded-For: exceeds N hops, > - // do not delegate a local lookup miss to upstream debuginfods. > - if (xff_count >=3D forwarded_ttl_limit) > - throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --for= wared-ttl-limit reached \ > -and will not query the upstream servers"); > - > - // Compute the client's numeric IP address only - so can't merge w= ith conninfo() > - const union MHD_ConnectionInfo *u =3D MHD_get_connection_info (con= n, > - MHD_C= ONNECTION_INFO_CLIENT_ADDRESS); > - struct sockaddr *so =3D u ? u->client_addr : 0; > - char hostname[256] =3D ""; // RFC1035 > - if (so && so->sa_family =3D=3D AF_INET) { > - (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, s= izeof (hostname), NULL, 0, > - NI_NUMERICHOST); > - } else if (so && so->sa_family =3D=3D AF_INET6) { > - struct sockaddr_in6* addr6 =3D (struct sockaddr_in6*) so; > - if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) { > - struct sockaddr_in addr4; > - memset (&addr4, 0, sizeof(addr4)); > - addr4.sin_family =3D AF_INET; > - addr4.sin_port =3D addr6->sin6_port; > - memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, s= izeof(addr4.sin_addr.s_addr)); > - (void) getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4), > - hostname, sizeof (hostname), NULL, 0, > - NI_NUMERICHOST); > - } else { > - (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname= , sizeof (hostname), NULL, 0, > - NI_NUMERICHOST); > - } > - } > - =20 > - string xff_complete =3D string("X-Forwarded-For: ")+xff+string(hos= tname); > - debuginfod_add_http_header (client, xff_complete.c_str()); > - } > + add_client_federation_headers(client, conn); > =20 > if (artifacttype =3D=3D "debuginfo") > fd =3D debuginfod_find_debuginfo (client, > @@ -2535,6 +2551,176 @@ handle_metrics (off_t* size) > return r; > } OK, moved into add_client_federation_headers. > + > +#ifdef HAVE_JSON_C > +static struct MHD_Response* > +handle_metadata (MHD_Connection* conn, > + string key, string value, off_t* size) > +{ > + MHD_Response* r; > + sqlite3 *thisdb =3D dbq; > + > + // Query locally for matching e, d files > + string op; > + if (key =3D=3D "glob") > + op =3D "glob"; > + else if (key =3D=3D "file") > + op =3D "=3D"; > + else > + throw reportable_exception("/metadata webapi error, unsupported key"= ); > + > + string sql =3D string( > + // explicit query r_de and f_de once here, rather = than the query_d and query_e > + // separately, because they scan the same tables, = so we'd double the work > + "select d1.executable_p, d1.debuginfo_p, 0 as sour= ce_p, b1.hex, f1.name as file, a1.name as archive " > + "from " BUILDIDS "_r_de d1, " BUILDIDS "_files f1,= " BUILDIDS "_buildids b1, " BUILDIDS "_files a1 " > + "where f1.id =3D d1.content and a1.id =3D d1.file = and d1.buildid =3D b1.id and f1.name " + op + " ? " > + "union all \n" > + "select d2.executable_p, d2.debuginfo_p, 0, b2.hex= , f2.name, NULL " > + "from " BUILDIDS "_f_de d2, " BUILDIDS "_files f2,= " BUILDIDS "_buildids b2 " > + "where f2.id =3D d2.file and d2.buildid =3D b2.id = and f2.name " + op + " ? "); > + // NB: we could query source file names too, thusly: > + // > + // select * from " BUILDIDS "_buildids b, " BUILDIDS "_files f1, " = BUILDIDS "_r_sref sr > + // where b.id =3D sr.buildid and f1.id =3D sr.artifactsrc and f1.na= me " + op + "?" > + // UNION ALL something with BUILDIDS "_f_s" > + // > + // But the first part of this query cannot run fast without the same i= ndex temp-created > + // during "maxigroom": > + // create index " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(arti= factsrc); > + // and unfortunately this index is HUGE. It's similar to the size of = the _r_sref > + // table, which is already the largest part of a debuginfod index. Ad= ding that index > + // would nearly double the .sqlite db size. =C2=A0 Magic sql query that looks ok. Thanks for the extra comments. > + sqlite_ps *pp =3D new sqlite_ps (thisdb, "mhd-query-meta-glob", sql); > + pp->reset(); > + pp->bind(1, value); > + pp->bind(2, value); > + // pp->bind(3, value); // "source" query clause disabled > + unique_ptr ps_closer(pp); // release pp if exception or ret= urn OK. > + json_object *metadata =3D json_object_new_array(); > + if (!metadata) > + throw libc_exception(ENOMEM, "json allocation"); If there is an exception below won't this leak? > + =20 > + // consume all the rows > + struct timespec ts_start; > + clock_gettime (CLOCK_MONOTONIC, &ts_start); > + =20 > + int rc; > + while (SQLITE_DONE !=3D (rc =3D pp->step())) > + { > + // break out of loop if we have searched too long > + struct timespec ts_end; > + clock_gettime (CLOCK_MONOTONIC, &ts_end); > + double deltas =3D (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_n= sec - ts_start.tv_nsec)/1.e9; > + if (metadata_maxtime_s > 0 && deltas > metadata_maxtime_s) > + break; // NB: no particular signal is given to the client about = incompleteness Why not? They get a partial result. > + if (rc !=3D SQLITE_ROW) throw sqlite_exception(rc, "step"); > + > + int m_executable_p =3D sqlite3_column_int (*pp, 0); > + int m_debuginfo_p =3D sqlite3_column_int (*pp, 1); > + int m_source_p =3D sqlite3_column_int (*pp, 2); > + string m_buildid =3D (const char*) sqlite3_column_text (*pp, 3) = ?: ""; // should always be non-null > + string m_file =3D (const char*) sqlite3_column_text (*pp, 4) = ?: ""; > + string m_archive =3D (const char*) sqlite3_column_text (*pp, 5) = ?: ""; =20 > + > + // Confirm that m_file matches in the fnmatch(FNM_PATHNAME) > + // sense, since sqlite's GLOB operator is a looser filter. > + if (key =3D=3D "glob" && fnmatch(value.c_str(), m_file.c_str(), FN= M_PATHNAME) !=3D 0) > + continue; If I understand correctly when FNM_PATHNAME is set then slashes '/' need to be explicitly matched. Is that deliberate? > + auto add_metadata =3D [metadata, m_buildid, m_file, m_archive](con= st string& type) { > + json_object* entry =3D json_object_new_object(); > + if (NULL =3D=3D entry) throw libc_exception (ENOMEM, "cannot all= ocate json"); > + defer_dtor entry_d(entry, json_object_put); > + =20 > + auto add_entry_metadata =3D [entry](const char* k, string v) { > + json_object* s; > + if(v !=3D "") { > + s =3D json_object_new_string(v.c_str()); > + if (NULL =3D=3D s) throw libc_exception (ENOMEM, "cannot all= ocate json"); > + json_object_object_add(entry, k, s); > + } > + }; > + =20 > + add_entry_metadata("type", type.c_str()); > + add_entry_metadata("buildid", m_buildid); > + add_entry_metadata("file", m_file); > + if (m_archive !=3D "") add_entry_metadata("archive", m_archive);= =20 > + if (verbose > 3) > + obatched(clog) << "metadata found local " > + << json_object_to_json_string_ext(entry, > + JSON_C_TO_STR= ING_PRETTY) > + << endl; > + =20 > + // Increase ref count to switch its ownership > + json_object_array_add(metadata, json_object_get(entry)); > + }; > + > + if (m_executable_p) add_metadata("executable"); > + if (m_debuginfo_p) add_metadata("debuginfo"); =20 > + if (m_source_p) add_metadata("source"); =C2=A0 Would it make sense to check the size of the created output here? =20 > + } > + pp->reset(); > + > + unsigned num_local_results =3D json_object_array_length(metadata); > + =20 > + // Query upstream as well > + debuginfod_client *client =3D debuginfod_pool_begin(); > + if (metadata && client !=3D NULL) > + { > + add_client_federation_headers(client, conn); > + > + int upstream_metadata_fd; > + upstream_metadata_fd =3D debuginfod_find_metadata(client, key.c_str(= ), value.c_str(), NULL); > + if (upstream_metadata_fd >=3D 0) { > + json_object *upstream_metadata_json =3D json_object_from_fd(upstre= am_metadata_fd); > + if (NULL !=3D upstream_metadata_json) > + { > + for (int i =3D 0, n =3D json_object_array_length(upstream_meta= data_json); i < n; i++) { > + json_object *entry =3D json_object_array_get_idx(upstream_me= tadata_json, i); > + if (verbose > 3) > + obatched(clog) << "metadata found remote " > + << json_object_to_json_string_ext(entry, > + JSON_C_TO= _STRING_PRETTY) > + << endl; > + =20 > + json_object_get(entry); // increment reference count > + json_object_array_add(metadata, entry); > + } What if the upstream json object isn't an array (or invalid)? > + json_object_put(upstream_metadata_json); > + } > + close(upstream_metadata_fd); > + } > + debuginfod_pool_end (client); > + } > + > + unsigned num_total_results =3D json_object_array_length(metadata); > + > + if (verbose > 2) > + obatched(clog) << "metadata found local=3D" << num_local_results > + << " remote=3D" << (num_total_results-num_local_resul= ts) > + << " total=3D" << num_total_results > + << endl; > + =20 > + const char* metadata_str =3D (metadata !=3D NULL) ? > + json_object_to_json_string(metadata) : "[ ]" ; > + if (! metadata_str) > + throw libc_exception (ENOMEM, "cannot allocate json"); > + r =3D MHD_create_response_from_buffer (strlen(metadata_str), > + (void*) metadata_str, > + MHD_RESPMEM_MUST_COPY); > + *size =3D strlen(metadata_str); > + json_object_put(metadata); I am not sure I totally follow the reference counting. There are a couple of json_object_gets before adding things to and array to increase their reference count (I assume they start at one). I assume the json_object_put will also put/decrease the count of all json objects contained (recursively)? Are the individual members released then? It seems their reference count is one too high. Or am I missing something? > + if (r) > + add_mhd_response_header(r, "Content-Type", "application/json"); > + return r; > +} > +#endif > + > + > static struct MHD_Response* > handle_root (off_t* size) > { > @@ -2601,6 +2787,7 @@ handler_cb (void * /*cls*/, > clock_gettime (CLOCK_MONOTONIC, &ts_start); > double afteryou =3D 0.0; > string artifacttype, suffix; > + string urlargs; // for logging > =20 > try > { > @@ -2669,6 +2856,21 @@ handler_cb (void * /*cls*/, > inc_metric("http_requests_total", "type", artifacttype); > r =3D handle_metrics(& http_size); > } > +#ifdef HAVE_JSON_C > + else if (url1 =3D=3D "/metadata") > + { > + tmp_inc_metric m ("thread_busy", "role", "http-metadata"); > + const char* key =3D MHD_lookup_connection_value(connection, MH= D_GET_ARGUMENT_KIND, "key"); > + const char* value =3D MHD_lookup_connection_value(connection, = MHD_GET_ARGUMENT_KIND, "value"); > + if (NULL =3D=3D value || NULL =3D=3D key) > + throw reportable_exception("/metadata webapi error, need key= and value"); > + > + urlargs =3D string("?key=3D") + string(key) + string("&value= =3D") + string(value); // apprx., for logging > + artifacttype =3D "metadata"; > + inc_metric("http_requests_total", "type", artifacttype); > + r =3D handle_metadata(connection, key, value, &http_size); > + } > +#endif Are the key and value url decoded? > else if (url1 =3D=3D "/") > { > artifacttype =3D "/"; > @@ -2705,7 +2907,7 @@ handler_cb (void * /*cls*/, > // afteryou: delay waiting for other client's identical query to compl= ete > // deltas: total latency, including afteryou waiting > obatched(clog) << conninfo(connection) > - << ' ' << method << ' ' << url > + << ' ' << method << ' ' << url << urlargs > << ' ' << http_code << ' ' << http_size > << ' ' << (int)(afteryou*1000) << '+' << (int)((deltas-= afteryou)*1000) << "ms" > << endl; OK, urlargs not set when no JSOC_C. > @@ -3956,12 +4158,13 @@ void groom() > if (interrupted) return; > =20 > // NB: "vacuum" is too heavy for even daily runs: it rewrites the enti= re db, so is done as maxigroom -G > - sqlite_ps g1 (db, "incremental vacuum", "pragma incremental_vacuum"); > - g1.reset().step_ok_done(); > - sqlite_ps g2 (db, "optimize", "pragma optimize"); > - g2.reset().step_ok_done(); > - sqlite_ps g3 (db, "wal checkpoint", "pragma wal_checkpoint=3Dtruncate"= ); > - g3.reset().step_ok_done(); > + { sqlite_ps g (db, "incremental vacuum", "pragma incremental_vacuum");= g.reset().step_ok_done(); } > + // https://www.sqlite.org/lang_analyze.html#approx > + { sqlite_ps g (db, "analyze setup", "pragma analysis_limit =3D 1000;\n= "); g.reset().step_ok_done(); } > + { sqlite_ps g (db, "analyze", "analyze"); g.reset().step_ok_done(); } > + { sqlite_ps g (db, "analyze reload", "analyze sqlite_schema"); g.reset= ().step_ok_done(); }=20 > + { sqlite_ps g (db, "optimize", "pragma optimize"); g.reset().step_ok_d= one(); } > + { sqlite_ps g (db, "wal checkpoint", "pragma wal_checkpoint=3Dtruncate= "); g.reset().step_ok_done(); } Read nicer formatted this way. I don't fully understand why you run both analyze and then optimize. Doesn't optimize call analyze itself? > database_stats_report(); > =20 > @@ -4333,6 +4536,8 @@ main (int argc, char *argv[]) > if (maxigroom) > { > obatched(clog) << "maxigrooming database, please wait." << endl; > + // NB: this index alone can nearly double the database size! > + // NB: this index would be necessary to run source-file metadata s= earches fast > extra_ddl.push_back("create index if not exists " BUILDIDS "_r_sre= f_arc on " BUILDIDS "_r_sref(artifactsrc);"); > extra_ddl.push_back("delete from " BUILDIDS "_r_sdef where not exi= sts (select 1 from " BUILDIDS "_r_sref b where " BUILDIDS "_r_sdef.content = =3D b.artifactsrc);"); > extra_ddl.push_back("drop index if exists " BUILDIDS "_r_sref_arc;= "); >=20