public inbox for gdb-patches@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] [gdb/testsuite] Add have_linux_btrace_bug
@ 2023-02-13 18:10 Tom de Vries
  2023-02-13 19:17 ` Metzger, Markus T
  0 siblings, 1 reply; 4+ messages in thread
From: Tom de Vries @ 2023-02-13 18:10 UTC (permalink / raw)
  To: gdb-patches; +Cc: Markus T . Metzger

The linux kernel commit 670638477aed ("perf/x86/intel/pt: Opportunistically
use single range output mode"), added in version v5.5.0 had a bug that was
fixed by commit ce0d998be927 ("perf/x86/intel/pt: Fix sampling using
single range output") in version 6.1.0.

The bug manifested for intel microarchitectures Rocket Lake, Raptor Lake and
Alder Lake.

Detect this set of conditions in a new proc have_linux_btrace_bug, and use it
in allow_btrace_tests.

I was initially planning to do just a require !have_linux_btrace_bug in the
failing test-cases, and that looked ok for PR30065 (with libipt) with just one
test-case failing, but there are a lot of fails for PR30073 (without libipt).

Tested on x86_64-linux.

PR testsuite/30073
PR testsuite/30075
Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=30073
Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=30075
---
 gdb/testsuite/lib/gdb.exp | 103 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)

diff --git a/gdb/testsuite/lib/gdb.exp b/gdb/testsuite/lib/gdb.exp
index 7f98f080328..a76397f50b8 100644
--- a/gdb/testsuite/lib/gdb.exp
+++ b/gdb/testsuite/lib/gdb.exp
@@ -3851,6 +3851,10 @@ gdb_caching_proc allow_btrace_tests {
     gdb_exit
     remote_file build delete $obj
 
+    if { $allow_btrace_tests } {
+	set allow_btrace_tests [expr ![have_linux_btrace_bug]]
+    }
+
     verbose "$me:  returning $allow_btrace_tests" 2
     return $allow_btrace_tests
 }
@@ -9374,5 +9378,104 @@ proc has_dependency { file dep } {
     return [regexp $dep $output]
 }
 
+# Return 1 if the linux kernel btrace bug introduced in kernel commit
+# 670638477aed ("perf/x86/intel/pt: Opportunistically use single range output
+# mode"), may manifest.
+
+gdb_caching_proc have_linux_btrace_bug {
+    set me "have_linux_btrace_bug"
+
+    if { ![istarget "i?86-*-*"] && ![istarget "x86_64-*-*"] } {
+	return 0
+    }
+
+    if { ![istarget *-*-linux*] } {
+	return 0
+    }
+
+    set res [remote_exec target "uname -r"]
+    set status [lindex $res 0]
+    set output [lindex $res 1]
+    if { $status != 0 } {
+	return 0
+    }
+
+    set re ^($::decimal)\\.($::decimal)\\.($::decimal)
+    if { [regexp $re $output dummy v1 v2 v3] != 1 } {
+	return 0
+    }
+    set v [list $v1 $v2 $v3]
+
+    set affected_version \
+	[expr [version_compare [list 5 5 0] <= $v] \
+	     && [version_compare $v < [list 6 1 0]]]
+    if { ! $affected_version } {
+	return 0
+    }
+
+    # Compile a test program.
+    set src {
+	#include "nat/x86-gcc-cpuid.h"
+
+	int main() {
+	  unsigned int eax, ebx, ecx, edx;
+
+	  if (!__get_cpuid (0, &eax, &ebx, &ecx, &edx))
+	    return 0;
+
+	  int intel_p = (ebx == signature_INTEL_ebx
+			 && ecx == signature_INTEL_ecx
+			 && edx == signature_INTEL_edx);
+
+	  if (!intel_p)
+	    return 0;
+
+	  if (! __get_cpuid (1, &eax, &ebx, &ecx, &edx))
+	    return 0;
+
+	  unsigned int ex_fam_id = (eax >> 20) & 0xff;
+	  unsigned int ex_mod_id = (eax >> 16) & 0xf;
+	  unsigned int fam_id = (eax >> 8) & 0xf;
+	  unsigned int model = (eax >> 4) & 0xf;
+
+	  if (fam_id == 6 || fam_id == 15)
+	    model = model + (ex_mod_id << 4);
+	  if (fam_id == 15)
+	    fam_id = fam_id + ex_fam_id;
+
+	  if (fam_id == 6)
+	    {
+	      /* Rocket Lake.  */
+	      if (model == 167)
+		return 1;
+	      /* Alder Lake.  */
+	      if (model == 151 || model == 154)
+		return 1;
+	      /* Raptor Lake.  */
+	      if (model == 183)
+		return 1;
+	  }
+
+	  return 0;
+	}
+    }
+
+    set flags "incdir=$::srcdir/.."
+    if { ! [gdb_simple_compile $me $src executable $flags] } {
+	return 0
+    }
+
+    set result [remote_exec target $obj]
+    set status [lindex $result 0]
+    set output [lindex $result 1]
+    if { $output != "" } {
+	set status 0
+    }
+
+    remote_file build delete $obj
+
+    return $status
+}
+
 # Always load compatibility stuff.
 load_lib future.exp

base-commit: 14d0e6818a022b72c265f15f63c8ccc2fc8c302a
-- 
2.35.3


^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH] [gdb/testsuite] Add have_linux_btrace_bug
  2023-02-13 18:10 [PATCH] [gdb/testsuite] Add have_linux_btrace_bug Tom de Vries
@ 2023-02-13 19:17 ` Metzger, Markus T
  2023-02-14 10:59   ` [PATCH] [gdb/testsuite] Add xfail in gdb.python/py-record-btrace.exp Tom de Vries
  0 siblings, 1 reply; 4+ messages in thread
From: Metzger, Markus T @ 2023-02-13 19:17 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gdb-patches

Hello Tom,

>The linux kernel commit 670638477aed ("perf/x86/intel/pt: Opportunistically
>use single range output mode"), added in version v5.5.0 had a bug that was
>fixed by commit ce0d998be927 ("perf/x86/intel/pt: Fix sampling using
>single range output") in version 6.1.0.
>
>The bug manifested for intel microarchitectures Rocket Lake, Raptor Lake and
>Alder Lake.

Actually, it's a h/w bug that got exposed by using single-range output.  It affects
Core processors starting from Ice Lake and it only affects Processor Trace.  Also,
it is only exposed by the py-record-btrace test, which does a lot of single-stepping.

It might be better to just add an XFAIL for that one test.  I'm not sure if maintaining
a processor list makes sense.  The kernel patch disables single-range for > 1 page
for all processors and does not try to maintain a list of affected processors.  We might
want to do the same in GDB and either disable that test for kernels between 5.5 and
6.1, or setup an XFAIL.

The Branch Trace Store issue you found seems to affect all btrace tests on ADL
E-cores.  This is a different issue.  I can reproduce it and I am currently debugging it.

Regards,
Markus.

Intel Deutschland GmbH
Registered Address: Am Campeon 10, 85579 Neubiberg, Germany
Tel: +49 89 99 8853-0, www.intel.de <http://www.intel.de>
Managing Directors: Christin Eisenschmid, Sharon Heck, Tiffany Doon Silva  
Chairperson of the Supervisory Board: Nicole Lau
Registered Office: Munich
Commercial Register: Amtsgericht Muenchen HRB 186928


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] [gdb/testsuite] Add xfail in gdb.python/py-record-btrace.exp
  2023-02-13 19:17 ` Metzger, Markus T
@ 2023-02-14 10:59   ` Tom de Vries
  2023-02-14 11:34     ` Metzger, Markus T
  0 siblings, 1 reply; 4+ messages in thread
From: Tom de Vries @ 2023-02-14 10:59 UTC (permalink / raw)
  To: Metzger, Markus T; +Cc: gdb-patches

[-- Attachment #1: Type: text/plain, Size: 1410 bytes --]

[ was: Re: [PATCH] [gdb/testsuite] Add have_linux_btrace_bug ]
On 2/13/23 20:17, Metzger, Markus T wrote:
> Hello Tom,
> 
>> The linux kernel commit 670638477aed ("perf/x86/intel/pt: Opportunistically
>> use single range output mode"), added in version v5.5.0 had a bug that was
>> fixed by commit ce0d998be927 ("perf/x86/intel/pt: Fix sampling using
>> single range output") in version 6.1.0.
>>
>> The bug manifested for intel microarchitectures Rocket Lake, Raptor Lake and
>> Alder Lake.
> 
> Actually, it's a h/w bug that got exposed by using single-range output.  It affects
> Core processors starting from Ice Lake and it only affects Processor Trace.  Also,
> it is only exposed by the py-record-btrace test, which does a lot of single-stepping.
> 
> It might be better to just add an XFAIL for that one test.  I'm not sure if maintaining
> a processor list makes sense.  The kernel patch disables single-range for > 1 page
> for all processors and does not try to maintain a list of affected processors.  We might
> want to do the same in GDB and either disable that test for kernels between 5.5 and
> 6.1, or setup an XFAIL.
> 

I've gone with the XFAIL, see attached patch.  WDYT?

> The Branch Trace Store issue you found seems to affect all btrace tests on ADL
> E-cores.  This is a different issue.  I can reproduce it and I am currently debugging it.

I see, ack, thanks for the update.

- Tom


[-- Attachment #2: 0001-gdb-testsuite-Add-xfail-in-gdb.python-py-record-btra.patch --]
[-- Type: text/x-patch, Size: 3583 bytes --]

From 99ebac02397f19bc16cad6a6fef7828fbc716803 Mon Sep 17 00:00:00 2001
From: Tom de Vries <tdevries@suse.de>
Date: Mon, 13 Feb 2023 18:27:17 +0100
Subject: [PATCH] [gdb/testsuite] Add xfail in gdb.python/py-record-btrace.exp

There's a HW bug affecting Processor Trace on some Intel processors
(Ice Lake to Raptor Lake microarchitectures).

The bug was exposed by linux kernel commit 670638477aed
("perf/x86/intel/pt: Opportunistically use single range output mode"),
added in version v5.5.0, and was worked around by commit ce0d998be927
("perf/x86/intel/pt: Fix sampling using single range output") in version
6.1.0.

The bug manifests (on a Performance-core of an i7-1250U, an Alder Lake cpu) in
a single test-case:
...
(gdb) python insn = r.instruction_history^M
warning: Decode error (-20) at instruction 33 (offset = 0x3d6a, \
  pc = 0x400501): compressed return without call.^M
(gdb) FAIL: gdb.python/py-record-btrace.exp: prepare record: \
  python insn = r.instruction_history
...

Add a corresponding XFAIL.

Note that the i7-1250U has both Performance-cores and Efficient-cores, and on
an Efficient-Core the test-case runs without any problems, so if the testsuite
run is not pinned to a specific cpu, the test may either PASS or XFAIL.

Tested on x86_64-linux:
- openSUSE Leap 15.4 with linux kernel version 5.14.21
- openSUSE Tumbleweed with linux kernel version 6.1.8

PR testsuite/30075
Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=30075
---
 gdb/testsuite/gdb.python/py-record-btrace.exp | 44 ++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/gdb/testsuite/gdb.python/py-record-btrace.exp b/gdb/testsuite/gdb.python/py-record-btrace.exp
index 555b70ae336..7deb157b73c 100644
--- a/gdb/testsuite/gdb.python/py-record-btrace.exp
+++ b/gdb/testsuite/gdb.python/py-record-btrace.exp
@@ -54,7 +54,49 @@ with_test_prefix "prepare record" {
     gdb_test "python print(r.method)" "btrace"
     gdb_test "python print(r.format)" "pt|bts"
     gdb_test "stepi 100" ".*"
-    gdb_test_no_output "python insn = r.instruction_history"
+
+    # There's a HW bug affecting Processor Trace on some Intel processors.
+    # The bug was exposed by linux kernel commit 670638477aed
+    # ("perf/x86/intel/pt: Opportunistically use single range output mode"),
+    # added in version v5.5.0, and was worked around by commit ce0d998be927
+    # ("perf/x86/intel/pt: Fix sampling using single range output") in version
+    # 6.1.0.  Detect the kernel version range for which the problem may
+    # manifest.
+    set have_xfail 0
+    set v [linux_kernel_version]
+    if { $v == {} } {
+	set have_xfail 0
+    } else {
+	set have_xfail \
+	    [expr [version_compare [list 5 5 0] <= $v] \
+		 && [version_compare $v < [list 6 1 0]]]
+    }
+    set xfail_re \
+	[join \
+	     [list \
+		  "warning: Decode error \\(.*\\) at instruction $decimal" \
+		  "\\(offset = $hex, pc = $hex\\):" \
+		  "compressed return without call\\."]]
+
+    set got_xfail 0
+    set cmd "python insn = r.instruction_history"
+    gdb_test_multiple $cmd "" {
+	-re "^[string_to_regexp $cmd]\r\n$::gdb_prompt $" {
+	    pass $gdb_test_name
+	}
+	-re -wrap "$xfail_re" {
+	    if { $have_xfail } {
+		xfail $gdb_test_name
+		set got_xfail 1
+	    } else {
+		fail $gdb_test_name
+	    }
+	}
+    }
+    if { $got_xfail } {
+	return
+    }
+
     gdb_test_no_output "python call = r.function_call_history"
     gdb_test_no_output "python i = insn\[0\]"
     gdb_test_no_output "python c = call\[0\]"

base-commit: 37d75d4552d6b4d292ffb4dee92329a449288375
-- 
2.35.3


^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH] [gdb/testsuite] Add xfail in gdb.python/py-record-btrace.exp
  2023-02-14 10:59   ` [PATCH] [gdb/testsuite] Add xfail in gdb.python/py-record-btrace.exp Tom de Vries
@ 2023-02-14 11:34     ` Metzger, Markus T
  0 siblings, 0 replies; 4+ messages in thread
From: Metzger, Markus T @ 2023-02-14 11:34 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gdb-patches

Hello Tom,

+    set have_xfail 0
+    set v [linux_kernel_version]
+    if { $v == {} } {
+	set have_xfail 0

It is already initialized to 0 so we may turn this into

    If { $v != {} } {

+    } else {
+	set have_xfail \
+	    [expr [version_compare [list 5 5 0] <= $v] \
+		 && [version_compare $v < [list 6 1 0]]]
+    }


+    set xfail_re \
+	[join \
+	     [list \
+		  "warning: Decode error \\(.*\\) at instruction $decimal" \
+		  "\\(offset = $hex, pc = $hex\\):" \
+		  "compressed return without call\\."]]

The exact error symptom may vary.  On my system I got 'bad packet context'.
It should suffice to grep for the first part, without the actual error string.

Looks good to me.

Regards,
Markus.

Intel Deutschland GmbH
Registered Address: Am Campeon 10, 85579 Neubiberg, Germany
Tel: +49 89 99 8853-0, www.intel.de <http://www.intel.de>
Managing Directors: Christin Eisenschmid, Sharon Heck, Tiffany Doon Silva  
Chairperson of the Supervisory Board: Nicole Lau
Registered Office: Munich
Commercial Register: Amtsgericht Muenchen HRB 186928

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-02-14 11:34 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-13 18:10 [PATCH] [gdb/testsuite] Add have_linux_btrace_bug Tom de Vries
2023-02-13 19:17 ` Metzger, Markus T
2023-02-14 10:59   ` [PATCH] [gdb/testsuite] Add xfail in gdb.python/py-record-btrace.exp Tom de Vries
2023-02-14 11:34     ` Metzger, Markus T

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).