public inbox for systemtap@sourceware.org
 help / color / mirror / Atom feed
* [RFC] Systemtap translator support for hardware breakpoints on
@ 2010-01-07  8:35 Prerna Saxena
  2010-01-07 18:01 ` Frank Ch. Eigler
                   ` (3 more replies)
  0 siblings, 4 replies; 31+ messages in thread
From: Prerna Saxena @ 2010-01-07  8:35 UTC (permalink / raw)
  To: systemtap

[-- Attachment #1: Type: text/plain, Size: 851 bytes --]

Hi all,
This patch enables systemtap translator to probe hardware breakpoints on 
x86 and x86_64 (based on mainline kernel).

Syntax :
probe kernel.data(ADDRESS).write
probe kernel.data(ADDRESS).rw
probe kernel.data(ADDRESS).length(LEN).write
probe kernel.data(ADDRESS).length(LEN).rw
probe kernel.data("SYMBOL_NAME").write
probe kernel.data("SYMBOL_NAME").rw

The 'length' construct is at present only supported with an address, and
not extended to symbol names. Wherever 'length' is not specified, the 
translator requests a hardware breakpoint probe of length 1.
If an invalid length is specified which is not supported by the 
architecture, the translator skips registration for that probe with a 
warning about incompatible length.

Awaiting feedback,

-- 
Prerna Saxena

Linux Technology Centre,
IBM Systems and Technology Lab,
Bangalore, India

[-- Attachment #2: hwbkpt.patch --]
[-- Type: text/x-diff, Size: 17025 bytes --]

Index: systemtap/tapsets.cxx
===================================================================
--- systemtap.orig/tapsets.cxx
+++ systemtap/tapsets.cxx
@@ -5652,7 +5652,348 @@ kprobe_builder::build(systemtap_session 
     }
 }
 
+// ------------------------------------------------------------------------
+//  Hardware breakpoint based probes.
+// ------------------------------------------------------------------------
+
+static const string TOK_HWBKPT("data");
+static const string TOK_HWBKPT_WRITE("write");
+static const string TOK_HWBKPT_RW("rw");
+static const string TOK_LENGTH("length");
+
+#define HWBKPT_READ 0
+#define HWBKPT_WRITE 1
+#define HWBKPT_RW 2
+struct hwbkpt_derived_probe: public derived_probe
+{
+  hwbkpt_derived_probe (probe *base,
+                        probe_point *location,
+                        int64_t addr,
+			string symname,
+			int len,
+			bool has_only_read_access,
+			bool has_only_write_access,
+			bool has_rw_access
+                        );
+  Dwarf_Addr hwbkpt_addr;
+  string symbol_name;
+  unsigned int hwbkpt_access,hwbkpt_len;
+
+  void printsig (std::ostream &o) const;
+  void join_group (systemtap_session& s);
+};
+
+struct hwbkpt_derived_probe_group: public derived_probe_group
+{
+private:
+
+  vector<hwbkpt_derived_probe*> hwbkpt_probes_vector;
+  typedef vector<hwbkpt_derived_probe*>::iterator h_p_v_iterator;
+
+public:
+  void enroll (hwbkpt_derived_probe* probe);
+  void emit_module_decls (systemtap_session& s);
+  void emit_module_init (systemtap_session& s);
+  void emit_module_exit (systemtap_session& s);
+};
+
+hwbkpt_derived_probe::hwbkpt_derived_probe (probe *base,
+					    probe_point *location,
+					    int64_t addr,
+					    string symname,
+					    int len,
+					    bool has_only_read_access,
+					    bool has_only_write_access,
+					    bool has_rw_access
+					    ):
+  derived_probe (base, location),
+  hwbkpt_addr (addr),
+  symbol_name (symname),
+  hwbkpt_len (len)
+{
+  this->tok = base->tok;
+
+  vector<probe_point::component*> comps;
+  comps.push_back (new probe_point::component(TOK_KERNEL));
+
+  if (hwbkpt_addr)
+	  comps.push_back (new probe_point::component (TOK_HWBKPT, new literal_number(hwbkpt_addr)));
+  else
+	if (symbol_name.size())
+	  comps.push_back (new probe_point::component (TOK_HWBKPT, new literal_string(symbol_name)));
+
+  comps.push_back (new probe_point::component (TOK_LENGTH, new literal_number(hwbkpt_len)));
+
+  if (has_only_read_access)
+	this->hwbkpt_access = HWBKPT_READ ;
+//TODO add code for comps.push_back for read, since this flag is not for x86
 
+  else
+	{
+	  if (has_only_write_access)
+		{
+		  this->hwbkpt_access = HWBKPT_WRITE ;
+	  	  comps.push_back (new probe_point::component(TOK_HWBKPT_WRITE));
+		}
+	  else
+		{
+		  this->hwbkpt_access = HWBKPT_RW ;
+	  	  comps.push_back (new probe_point::component(TOK_HWBKPT_RW));
+		}
+	}
+
+  this->sole_location()->components = comps;
+}
+
+void hwbkpt_derived_probe::printsig (ostream& o) const
+{
+  sole_location()->print (o);
+  printsig_nested (o);
+}
+
+void hwbkpt_derived_probe::join_group (systemtap_session& s)
+{
+  if (! s.hwbkpt_derived_probes)
+        s.hwbkpt_derived_probes = new hwbkpt_derived_probe_group ();
+  s.hwbkpt_derived_probes->enroll (this);
+}
+
+void hwbkpt_derived_probe_group::enroll (hwbkpt_derived_probe* p)
+{
+  hwbkpt_probes_vector.push_back (p);
+}
+
+void
+hwbkpt_derived_probe_group::emit_module_decls (systemtap_session& s)
+{
+  if (hwbkpt_probes_vector.empty()) return;
+
+  s.op->newline() << "/* ---- hwbkpt-based probes ---- */";
+
+  // Warn of misconfigured kernels
+  s.op->newline() << "#ifndef CONFIG_HAVE_HW_BREAKPOINT";
+  s.op->newline() << "#error \"Need CONFIG_HAVE_HW_BREAKPOINT!\"";
+  s.op->newline() << "#endif";
+
+  s.op->newline() << "#include <linux/perf_event.h>";
+  s.op->newline() << "#include <linux/hw_breakpoint.h>";
+  s.op->newline();
+
+// Define macros for access type
+  s.op->newline() << "#define HWBKPT_READ 0";
+  s.op->newline() << "#define HWBKPT_WRITE 1";
+  s.op->newline() << "#define HWBKPT_RW 2";
+
+  // Forward declare the master entry functions
+  s.op->newline() << "static int enter_hwbkpt_probe (struct perf_event *bp,";
+  s.op->line() << " int nmi,";
+  s.op->line() << " struct perf_sample_data *data,";
+  s.op->line() << " struct pt_regs *regs);";
+
+  // Emit the actual probe list.
+
+  s.op->newline() << "static struct perf_event_attr ";
+  s.op->newline() << "stap_hwbkpt_probe_array[" << hwbkpt_probes_vector.size() << "];";
+
+  s.op->newline() << "static struct perf_event **";
+  s.op->newline() << "stap_hwbkpt_ret_array[" << hwbkpt_probes_vector.size() << "];";
+  s.op->newline() << "static struct stap_hwbkpt_probe {";
+  s.op->newline() << "int registered_p:1;";
+// registered_p = -1 signifies an invalid probe that must not be registered
+// registered_p =  0 signifies a probe that failed registration
+// registered_p =  1 signifies a probe that got registered successfully
+
+  // Probe point & Symbol Names are mostly small and uniform enough
+  // to justify putting char[MAX]'s into  the array instead of
+  // relocated char*'s.
+
+  size_t pp_name_max = 0 , symbol_name_max = 0;
+  size_t pp_name_tot = 0 , symbol_name_tot = 0;
+  for (unsigned int it = 0; it < hwbkpt_probes_vector.size(); it++)
+    {
+      hwbkpt_derived_probe* p = hwbkpt_probes_vector.at(it);
+#define DOIT(var,expr) do {                             \
+        size_t var##_size = (expr) + 1;                 \
+        var##_max = max (var##_max, var##_size);        \
+        var##_tot += var##_size; } while (0)
+      DOIT(pp_name, lex_cast_qstring(*p->sole_location()).size());
+      DOIT(symbol_name, p->symbol_name.size());
+#undef DOIT
+    }
+
+#define CALCIT(var)                                                     \
+  s.op->newline() << "const char " << #var << "[" << var##_name_max << "] ;";
+  CALCIT(pp);
+  CALCIT(symbol);
+#undef CALCIT
+
+  s.op->newline() << "const unsigned long address;";
+  s.op->newline() << "uint8_t atype;";
+  s.op->newline() << "uint8_t len;";
+  s.op->newline() << "void (* const ph) (struct context*);";
+  s.op->newline() << "} stap_hwbkpt_probes[] = {";
+  s.op->indent(1);
+
+  for (unsigned int it = 0; it < hwbkpt_probes_vector.size(); it++)
+    {
+      hwbkpt_derived_probe* p = hwbkpt_probes_vector.at(it);
+      s.op->newline() << "{";
+      if (p->symbol_name.size())
+      s.op->line() << " .address=(unsigned long)0x0" << "ULL,";
+      else
+      s.op->line() << " .address=(unsigned long)0x" << hex << p->hwbkpt_addr << dec << "ULL,";
+      s.op->line() << " .atype=" << p->hwbkpt_access << ",";
+      s.op->line() << " .len=" << p->hwbkpt_len << ",";
+      s.op->line() << " .pp=" << lex_cast_qstring (*p->sole_location()) << ",";
+      s.op->line() << " .symbol=\"" << p->symbol_name << "\",";
+      s.op->line() << " .ph=&" << p->name << "";
+      s.op->line() << " },";
+    }
+  s.op->newline() << "};";
+
+  // Emit the hwbkpt callback function
+  s.op->newline() ;
+  s.op->newline() << "static int enter_hwbkpt_probe (struct perf_event *bp,";
+  s.op->line() << " int nmi,";
+  s.op->line() << " struct perf_sample_data *data,";
+  s.op->line() << " struct pt_regs *regs) {";
+  s.op->newline() << "unsigned int i;";
+  s.op->newline() << "if (bp->attr.type != PERF_TYPE_BREAKPOINT )";
+  s.op->newline() << "return -1;";
+  s.op->newline() << "for ( i=0; i<" << hwbkpt_probes_vector.size() << "; i++) {";
+  s.op->newline() << "struct perf_event_attr *hp = & stap_hwbkpt_probe_array[i];";
+  s.op->newline() << "if (bp->attr.bp_addr==hp->bp_addr && bp->attr.bp_type==hp->bp_type && bp->attr.bp_len==hp->bp_len ) {";
+  s.op->newline() << "struct stap_hwbkpt_probe *sdp = &stap_hwbkpt_probes[i];";
+  common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "sdp->pp");
+  s.op->newline() << "c->regs = regs;";
+  s.op->newline() << "(*sdp->ph) (c);";
+  common_probe_entryfn_epilogue (s.op);
+  s.op->newline() << "return 0;";
+  s.op->newline() << "}";
+  s.op->newline(-1) << "}";
+  s.op->newline() << "return 0;";
+  s.op->newline() << "}";
+}
+
+void
+hwbkpt_derived_probe_group::emit_module_init (systemtap_session& s)
+{
+  s.op->newline() << "for (i=0; i<" << hwbkpt_probes_vector.size() << "; i++) {";
+  s.op->newline(1) << "struct stap_hwbkpt_probe *sdp = & stap_hwbkpt_probes[i];";
+  s.op->newline() << "struct perf_event_attr *hp = & stap_hwbkpt_probe_array[i];";
+  s.op->newline() << "void *addr = (void *) sdp->address;";
+  s.op->newline() << "const char *hwbkpt_symbol_name = addr ? NULL : sdp->symbol;";
+  s.op->newline() << "	hw_breakpoint_init(hp);";
+  s.op->newline() << "	if (addr) ";
+  s.op->newline() << "	    hp->bp_addr = (unsigned long) addr;";
+  s.op->newline() << "	else ";
+  s.op->newline() << "	   hp->bp_addr = kallsyms_lookup_name(hwbkpt_symbol_name);";
+  s.op->newline() << "#ifdef CONFIG_X86";
+  s.op->newline() << "  switch(sdp->len) {";
+  s.op->newline() << "	case HW_BREAKPOINT_LEN_1:";
+  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_1;";
+  s.op->newline() << "    break;";
+  s.op->newline() << "	case HW_BREAKPOINT_LEN_2:";
+  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_2;";
+  s.op->newline() << "    break;";
+  s.op->newline() << "	case HW_BREAKPOINT_LEN_4:";
+  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_4;";
+  s.op->newline() << "    break;";
+  s.op->newline() << "#ifdef CONFIG_X86_64";
+  s.op->newline() << "	case HW_BREAKPOINT_LEN_8:";
+  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_8;";
+  s.op->newline() << "    break;";
+  s.op->newline() << "#endif /*CONFIG_X86_64*/";
+  s.op->newline() << "	default:";
+  s.op->newline() << "  	_stp_warn(\"Probe %s registration skipped : unsupported length. Supported lengths for x86 are 1,2,4 {8 for x86_84 only} \",sdp->pp);";
+  s.op->newline() << "		sdp->registered_p = -1;";
+  s.op->newline() << "		stap_hwbkpt_ret_array[i] = ERR_PTR(-EINVAL);";
+  s.op->newline() << "	}";
+  s.op->newline() << "  switch(sdp->atype)";
+  s.op->newline() << "	{";
+  s.op->newline() << "    case HWBKPT_READ:";
+  s.op->newline() << "  	_stp_warn(\"Probe %s registration skipped : READ-ONLY hardware breakpoints not supported on x86\",sdp->pp);";
+  s.op->newline() << "		sdp->registered_p = -1;";
+  s.op->newline() << "		break;";
+  s.op->newline() << "    case HWBKPT_WRITE:";
+  s.op->newline() << "		hp->bp_type = HW_BREAKPOINT_W;";
+  s.op->newline() << "		break;";
+  s.op->newline() << "    case HWBKPT_RW:";
+  s.op->newline() << "		hp->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;";
+  s.op->newline() << "		break;";
+  s.op->newline() << "  }";
+  s.op->newline() << "#endif /*CONFIG_X86*/";
+  s.op->newline() << "probe_point = sdp->pp;"; // for error messages
+  s.op->newline() << "if (sdp->registered_p >= 0) {";
+  s.op->newline() << " stap_hwbkpt_ret_array[i] = register_wide_hw_breakpoint( hp, (void *)&enter_hwbkpt_probe );";
+  s.op->newline() << " if (IS_ERR(stap_hwbkpt_ret_array[i])) {";
+  s.op->newline() << "     rc = PTR_ERR(stap_hwbkpt_ret_array[i]);";
+  s.op->newline() << "      sdp->registered_p = 0;";
+  s.op->newline(1) << "	    _stp_warn(\" Hwbkpt Probe %s : Registration error rc= %d, Addr = %p, name = %s\", probe_point, rc, addr, hwbkpt_symbol_name);";
+  s.op->newline(-1) << "  }";
+  s.op->newline() << "    else sdp->registered_p = 1;";
+  s.op->newline() << "}";
+  s.op->newline(-1) << "}"; // for loop
+}
+
+void
+hwbkpt_derived_probe_group::emit_module_exit (systemtap_session& s)
+{
+  //Unregister hwbkpt probes.
+  s.op->newline() << "for (i=0; i<" << hwbkpt_probes_vector.size() << "; i++) {";
+  s.op->newline(1) << "struct stap_hwbkpt_probe *sdp = & stap_hwbkpt_probes[i];";
+//  s.op->newline() << "if ( sdp->registered_p <= 0 ) continue;";
+  s.op->newline() << " if ( IS_ERR(stap_hwbkpt_ret_array[i]) ) continue;";
+  s.op->newline() << " unregister_wide_hw_breakpoint(stap_hwbkpt_ret_array[i]);";
+  s.op->newline() << "sdp->registered_p = 0;";
+  s.op->newline(-1) << "}";
+}
+
+struct hwbkpt_builder: public derived_probe_builder
+{
+  hwbkpt_builder() {}
+  virtual void build(systemtap_session & sess,
+		     probe * base,
+		     probe_point * location,
+		     literal_map_t const & parameters,
+		     vector<derived_probe *> & finished_results);
+};
+
+void
+hwbkpt_builder::build(systemtap_session & sess,
+		      probe * base,
+		      probe_point * location,
+		      literal_map_t const & parameters,
+		      vector<derived_probe *> & finished_results)
+{
+  string symbol_str_val;
+  int64_t hwbkpt_address, len;
+  bool has_addr, has_symbol_str, has_write, has_rw, has_len;
+
+  has_addr = get_param (parameters, TOK_HWBKPT, hwbkpt_address);
+  has_symbol_str = get_param (parameters, TOK_HWBKPT, symbol_str_val);
+  has_len = get_param (parameters, TOK_LENGTH, len);
+  has_write = (parameters.find(TOK_HWBKPT_WRITE) != parameters.end());
+  has_rw = (parameters.find(TOK_HWBKPT_RW) != parameters.end());
+
+  if (!has_len)
+	len = 1;
+
+  if (has_addr)
+      finished_results.push_back (new hwbkpt_derived_probe (base,
+							    location,
+							    hwbkpt_address,
+							    "",len,0,
+							    has_write,
+							    has_rw));
+  else // has symbol_str
+      finished_results.push_back (new hwbkpt_derived_probe (base,
+							    location,
+							    0,
+							    symbol_str_val,len,0,
+							    has_write,
+							    has_rw));
+}
 
 // ------------------------------------------------------------------------
 // statically inserted kernel-tracepoint derived probes
@@ -6143,7 +6484,7 @@ tracepoint_derived_probe_group::emit_mod
         }
       s.op->line() << ") {";
       s.op->indent(1);
-      common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", 
+      common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING",
                                      lex_cast_qstring (*p->sole_location()));
       s.op->newline() << "c->marker_name = "
                       << lex_cast_qstring (p->tracepoint_name)
@@ -6470,7 +6811,6 @@ tracepoint_builder::init_dw(systemtap_se
   return true;
 }
 
-
 void
 tracepoint_builder::build(systemtap_session& s,
                           probe *base, probe_point *location,
@@ -6488,7 +6828,6 @@ tracepoint_builder::build(systemtap_sess
 }
 
 
-
 // ------------------------------------------------------------------------
 //  Standard tapset registry.
 // ------------------------------------------------------------------------
@@ -6535,6 +6874,21 @@ register_standard_tapsets(systemtap_sess
      ->bind_num(TOK_MAXACTIVE)->bind(new kprobe_builder());
   s.pattern_root->bind(TOK_KPROBE)->bind_num(TOK_STATEMENT)
       ->bind(TOK_ABSOLUTE)->bind(new kprobe_builder());
+
+  //Hwbkpt based probe
+  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
+	->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder());
+  s.pattern_root->bind(TOK_KERNEL)->bind_str(TOK_HWBKPT)
+	->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder());
+  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
+	->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder());
+  s.pattern_root->bind(TOK_KERNEL)->bind_str(TOK_HWBKPT)
+	->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder());
+  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
+	->bind_num(TOK_LENGTH)->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder());
+  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
+	->bind_num(TOK_LENGTH)->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder());
+  // length supported with address only, not symbol names
 }
 
 
@@ -6561,6 +6915,7 @@ all_session_groups(systemtap_session& s)
   DOONE(mark);
   DOONE(tracepoint);
   DOONE(kprobe);
+  DOONE(hwbkpt);
   DOONE(hrtimer);
   DOONE(perfmon);
   DOONE(procfs);
Index: systemtap/session.h
===================================================================
--- systemtap.orig/session.h
+++ systemtap/session.h
@@ -31,6 +31,7 @@ struct derived_probe;
 struct be_derived_probe_group;
 struct dwarf_derived_probe_group;
 struct kprobe_derived_probe_group;
+struct hwbkpt_derived_probe_group;
 struct uprobe_derived_probe_group;
 struct utrace_derived_probe_group;
 struct itrace_derived_probe_group;
@@ -173,6 +174,7 @@ struct systemtap_session
   be_derived_probe_group* be_derived_probes;
   dwarf_derived_probe_group* dwarf_derived_probes;
   kprobe_derived_probe_group* kprobe_derived_probes;
+  hwbkpt_derived_probe_group* hwbkpt_derived_probes;
   uprobe_derived_probe_group* uprobe_derived_probes;
   utrace_derived_probe_group* utrace_derived_probes;
   itrace_derived_probe_group* itrace_derived_probes;
Index: systemtap/elaborate.cxx
===================================================================
--- systemtap.orig/elaborate.cxx
+++ systemtap/elaborate.cxx
@@ -1523,6 +1523,7 @@ systemtap_session::systemtap_session ():
   be_derived_probes(0),
   dwarf_derived_probes(0),
   kprobe_derived_probes(0),
+  hwbkpt_derived_probes(0),
   uprobe_derived_probes(0),
   utrace_derived_probes(0),
   itrace_derived_probes(0),

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-07  8:35 [RFC] Systemtap translator support for hardware breakpoints on Prerna Saxena
@ 2010-01-07 18:01 ` Frank Ch. Eigler
  2010-01-07 21:57   ` Roland McGrath
                     ` (2 more replies)
  2010-01-07 23:15 ` [RFC] Systemtap translator support for hardware breakpoints on Roland McGrath
                   ` (2 subsequent siblings)
  3 siblings, 3 replies; 31+ messages in thread
From: Frank Ch. Eigler @ 2010-01-07 18:01 UTC (permalink / raw)
  To: Prerna Saxena; +Cc: systemtap


Prerna Saxena <prerna@linux.vnet.ibm.com> writes:

> [...]
> This patch enables systemtap translator to probe hardware breakpoints
> on x86 and x86_64 (based on mainline kernel).

Looks like a good start.

In order to merge this, it is essential to have a testsuite, posted
results on a variety of architectures and kernel versions (kfail for
older kernels), and documentation.


> Syntax :
> probe kernel.data(ADDRESS).write
> probe kernel.data(ADDRESS).rw
> probe kernel.data(ADDRESS).length(LEN).write
> probe kernel.data(ADDRESS).length(LEN).rw
> probe kernel.data("SYMBOL_NAME").write
> probe kernel.data("SYMBOL_NAME").rw

(I wonder if simple .read or .write would be sufficient as suffixes,
and let a script render both as 

   probe kernel.data(ADDRESS).write, probe kernel.data(ADDRESS).read

and let the translator collapse such duplication into the internal RW case.)


> +  // Warn of misconfigured kernels
> +  s.op->newline() << "#ifndef CONFIG_HAVE_HW_BREAKPOINT";
> +  s.op->newline() << "#error \"Need CONFIG_HAVE_HW_BREAKPOINT!\"";
> +  s.op->newline() << "#endif";

Instead or in addition, the translator could make the whole probe point
type conditional on session.kernel_config("CONFIG_HAVE_HW_BREAKPOINT").

> +// Define macros for access type
> +  s.op->newline() << "#define HWBKPT_READ 0";
> +  s.op->newline() << "#define HWBKPT_WRITE 1";
> +  s.op->newline() << "#define HWBKPT_RW 2";

(Are these necessary?  Could the translator not emit symbols like
HW_BREAKPOINT_W directly?)


> +  s.op->newline() << "static struct stap_hwbkpt_probe {";
> +  s.op->newline() << "int registered_p:1;";
> +// registered_p = -1 signifies an invalid probe that must not be registered
> +// registered_p =  0 signifies a probe that failed registration
> +// registered_p =  1 signifies a probe that got registered successfully
 
A one-bit value can't represent three different values.


> +      if (p->symbol_name.size())
> +      s.op->line() << " .address=(unsigned long)0x0" << "ULL,";
 
(You probably don't need explicit initialization to zero.)


> +  s.op->newline() << "static int enter_hwbkpt_probe (struct perf_event *bp,";
> [...]
> +  s.op->newline() << "for ( i=0; i<" << hwbkpt_probes_vector.size() << "; i++) {";
> +  s.op->newline() << "struct perf_event_attr *hp = & stap_hwbkpt_probe_array[i];";
> +  s.op->newline() << "if (bp->attr.bp_addr==hp->bp_addr && bp->attr.bp_type==hp->bp_type && bp->attr.bp_len==hp->bp_len ) {";
> [...]

To what extent does this handle multiple hw breakpoints on the same address?


> [...]
> +void
> +hwbkpt_derived_probe_group::emit_module_init (systemtap_session& s)
> +{ [...]
> +  s.op->newline() << "const char *hwbkpt_symbol_name = addr ? NULL : sdp->symbol;";
> +  s.op->newline() << "	hw_breakpoint_init(hp);";

Normally we use newline(1) or (-1) to indent instead of explicit
spaces like this.

> +  s.op->newline() << "	if (addr) ";
> +  s.op->newline() << "	    hp->bp_addr = (unsigned long) addr;";
> +  s.op->newline() << "	else ";
> +  s.op->newline() << "	   hp->bp_addr = kallsyms_lookup_name(hwbkpt_symbol_name);";

Is kallsyms_lookup_name actually module-exported these days?  Should the
translator try to resolve symbols at translate time?  Wildcards?


> +  s.op->newline() << "#ifdef CONFIG_X86";
> +  s.op->newline() << "  switch(sdp->len) {";
> +  s.op->newline() << "	case HW_BREAKPOINT_LEN_1:";
> +  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_1;";
> [...]

Is this sort of switch really necessary?  Why not just set

    hp->bp_len = sdp->len;

> +  s.op->newline() << "	default:";
> +  s.op->newline() << "  	_stp_warn(\"Probe %s registration skipped : unsupported length. Supported lengths for x86 are 1,2,4 {8 for x86_84 only} \",sdp->pp);";
> +  s.op->newline() << "		sdp->registered_p = -1;";
> +  s.op->newline() << "		stap_hwbkpt_ret_array[i] = ERR_PTR(-EINVAL);";

Why is this necessary ...



> +  s.op->newline() << "    case HWBKPT_READ:";
> +  s.op->newline() << "  	_stp_warn(\"Probe %s registration skipped : READ-ONLY hardware breakpoints not supported on x86\",sdp->pp);";
> +  s.op->newline() << "		sdp->registered_p = -1;";
> +  s.op->newline() << "		break;";

... and this too ...

> +  s.op->newline() << "if (sdp->registered_p >= 0) {";
> +  s.op->newline() << " stap_hwbkpt_ret_array[i] = register_wide_hw_breakpoint( hp, (void *)&enter_hwbkpt_probe );";

... considering that register_wide_hw_breakpoint() would surely
check its inputs and reject requests deemed inappropriate for
the current architecture?  

Note also that registered_p always >= 0 for your actual int:1 data
type.  Consider simply == 0 for unregistered, == 1 for registered.


> +  s.op->newline() << " if (IS_ERR(stap_hwbkpt_ret_array[i])) {";
> +  s.op->newline() << "     rc = PTR_ERR(stap_hwbkpt_ret_array[i]);";
> +  s.op->newline() << "      sdp->registered_p = 0;";
> +  s.op->newline(1) << "	    _stp_warn(\" Hwbkpt Probe %s : Registration error rc= %d, Addr = %p, name = %s\", probe_point, rc, addr, hwbkpt_symbol_name);";
> +  s.op->newline(-1) << "  }";
> +  s.op->newline() << "    else sdp->registered_p = 1;";
> +  s.op->newline() << "}";
> +  s.op->newline(-1) << "}"; // for loop

Not so simple: if you get a failure on the sixth registration, you have to
unregister the prior ones.


- FChE

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-07 18:01 ` Frank Ch. Eigler
@ 2010-01-07 21:57   ` Roland McGrath
  2010-01-07 22:39     ` Roland McGrath
  2010-01-08 14:35   ` [RFC] Systemtap translator support for hw breakpoints on x86 Prerna Saxena
  2010-01-25 19:00   ` [RFC V2] Systemtap translator support for kernel hardware breakpoints Prerna Saxena
  2 siblings, 1 reply; 31+ messages in thread
From: Roland McGrath @ 2010-01-07 21:57 UTC (permalink / raw)
  To: Frank Ch. Eigler; +Cc: Prerna Saxena, systemtap

> (I wonder if simple .read or .write would be sufficient as suffixes,
> and let a script render both as 
> 
>    probe kernel.data(ADDRESS).write, probe kernel.data(ADDRESS).read
> 
> and let the translator collapse such duplication into the internal RW case.)

That sounds reasonable to me.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-07 21:57   ` Roland McGrath
@ 2010-01-07 22:39     ` Roland McGrath
  2010-01-07 22:45       ` Frank Ch. Eigler
  2010-01-08  5:01       ` Ananth N Mavinakayanahalli
  0 siblings, 2 replies; 31+ messages in thread
From: Roland McGrath @ 2010-01-07 22:39 UTC (permalink / raw)
  To: Frank Ch. Eigler, Prerna Saxena, systemtap

> > (I wonder if simple .read or .write would be sufficient as suffixes,
> > and let a script render both as 
> > 
> >    probe kernel.data(ADDRESS).write, probe kernel.data(ADDRESS).read
> > 
> > and let the translator collapse such duplication into the internal RW case.)
> 
> That sounds reasonable to me.

Let me add a wrinkle to this subject.  This may have been implied in what
you meant, but it's not obvious and it just occurred to me.

On x86 at least, you can't distinguish different kinds of hits.
If one debug register enables RW, then you just know that debug
register hit, not whether it was for a read or for a write.

So for:

probe kernel.data(0x1234).write, kernel.data(0x1234).read {
  same_stuff()
}

you can turn it into a RW breakpoint.  But for:

probe kernel.data(0x1234).write { one_thing() }
probe kernel.data(0x1234).read { another_thing() }

you cannot consolidate these into one RW breakpoint, because you
won't know which handler to run.  This is in contrast to e.g. two
kprobes at the same PC.  Those you can consolidate at translation
time so the module init code only inserts one kprobe and its handler
does both things.

Even for the same_stuff() case, it's not clear you can really 
consolidate them if something like pp() is used.  i.e., where the
name of the specific probe is part of the "stuff", it's not really
the same stuff any more.

On x86 you can distinguish this by just using two different debug
registers, one for the read and one for the write.  You can tell
which of the two hit.  Only if you don't care to distinguish them
can you use a single debug register set to RW.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-07 22:39     ` Roland McGrath
@ 2010-01-07 22:45       ` Frank Ch. Eigler
  2010-01-08  5:01       ` Ananth N Mavinakayanahalli
  1 sibling, 0 replies; 31+ messages in thread
From: Frank Ch. Eigler @ 2010-01-07 22:45 UTC (permalink / raw)
  To: Roland McGrath; +Cc: Prerna Saxena, systemtap

Hi -

On Thu, Jan 07, 2010 at 02:38:41PM -0800, Roland McGrath wrote:
> [...]
> On x86 at least, you can't distinguish different kinds of hits.

If that means that, at callback time, we can't tell the two cases
apart, then ...

> probe kernel.data(0x1234).write { one_thing() }
> probe kernel.data(0x1234).read { another_thing() }
> 
> you cannot consolidate these into one RW breakpoint, because you
> won't know which handler to run.

... this is a real problem.  Otherwise, if we had a usable access-type
flag available at the callback, we could dispatch to the handlers
(including setting custom pp()'s) based upon that.


- FChE

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-07  8:35 [RFC] Systemtap translator support for hardware breakpoints on Prerna Saxena
  2010-01-07 18:01 ` Frank Ch. Eigler
@ 2010-01-07 23:15 ` Roland McGrath
  2010-01-08  1:03   ` Frank Ch. Eigler
  2010-01-08  1:19 ` Roland McGrath
  2010-01-08  1:53 ` Roland McGrath
  3 siblings, 1 reply; 31+ messages in thread
From: Roland McGrath @ 2010-01-07 23:15 UTC (permalink / raw)
  To: Prerna Saxena; +Cc: systemtap

> probe kernel.data(ADDRESS).write
> probe kernel.data(ADDRESS).rw
> probe kernel.data(ADDRESS).length(LEN).write
> probe kernel.data(ADDRESS).length(LEN).rw
> probe kernel.data("SYMBOL_NAME").write
> probe kernel.data("SYMBOL_NAME").rw
> 
> The 'length' construct is at present only supported with an address, and
> not extended to symbol names. Wherever 'length' is not specified, the 
> translator requests a hardware breakpoint probe of length 1.
> If an invalid length is specified which is not supported by the 
> architecture, the translator skips registration for that probe with a 
> warning about incompatible length.

I don't understand why the literal vs symbolic selection of an address has
anything to do with .length().  The syntax should be consistent:

probe kernel.data({ADDRESS,"NAME"})[.length(LEN)].{read,write}

I don't think the specification of the script-language feature should have
that exact wording about the length.  On other machines (IIRC all but x86,
in fact), there is no option for length, it's always aligned-word-size.

On other machines there is a hard alignment constraint on the address.
e.g. on powerpc, it doesn't even store the low two bits of the address.

On x86, there is a length-based implicit alignment constraint.  That is,
for .length(2) the address is considered % 2, for .length(4) % 4, for
.length(8) % 8.  

On x86, and I think also on powerpc and probably all others, the meaning of
the addr+length (i.e. fixed addr+8 on powerpc) range is that access to any
byte within that range triggers the watchpoint, not just an exact access
that specifies that address and length.

Nothing will complain about misalignment (unless maybe the hw_breakpoint
layer does, I don't recall), but the low bits will be ignored so
kernel.data(0x123).length(2) really means kernel.data(0x122).length(2).
The translator should prevent you from doing something that nonobvious.


Next issue.  So that's the basic primitive feature.  I take this to mean
that "SYMBOL" is just a way to specify the address with an ELF symbol.
i.e. it means the same as ADDRESS aside from runtime address bias.

What I think you'd really want is:

	probe kernel.data($variable).write { ... }

That means "variable" gets looked up by $ rules, but in some idea of the
"global" scope for kernel (search all CUs' top-level scopes, I guess).
That could be a C++ $foo::bar::baz without doing the mangling yourself,
etc.  It uses the type information to decide the size of $variable and
implicitly set the length for the breakpoint.

Note you can use the full expressivity of $expr here,
i.e. $variable->field1->field2.  (IIRC our $ syntax uses only ->
and never . though in this case it's only valid to use what should
be $variable.field1.field2, i.e. static offset calculations rather
than pointer indirections.)

IOW, we would elaborate:

	probe kernel.data($var::iable->second_int_field)

as:

	probe kernel.data("_Zmangle_var_iable"+4).length(4)

Note that with $ syntax:

	probe module("foo").data($foovar)

makes sense to indicate which "global" context $foovar resolves in.


Third issue.  So I mentioned that on powerpc, all you get is an aligned
8-byte watchpoint.  Consider:

	int a;	// 0x124
	int b;	// 0x128
	int c;	// 0x12c

If you wanted:

	probe kernel.data($c).write

i.e.:

	probe kernel.data(0x12c).write.length(4)

you can't get it.  To start with, the translator says .length(8) is your
only option, you can't have it.  Then it says 0x12c is not aligned to 8,
you can't have it.  For real .write semantics, this is true, and so be it.
All you can do is probe kernel.data(0x128).write and tell that $a and/or $b
were touched.  For .read, you are just SOL and that's all you can do.

But probably the common use is that you really wanted:

	probe kernel.data($c).change { ... }

For this, we can do it for you with some implicit magic.  
i.e., translate this into:

	probe begin { c_val = $c }
	probe kernel.data(0x128).length(8) {
	  new_c = $c;  
	  if ($c == c_val) next;
	  c_val = new_c;
	  ...
	}

but really do it with implicit runtime stuff rather through scriptese.

When you have:

	probe kernel.data($b).change { some_stuff() }
	probe kernel.data($c).change { other_stuff() }

then you can turn this into:

	probe begin { b_val = $b; c_val = $c }
	probe kernel.data(0x128).length(8) {
	  new_b = $b; new_c = $c;
	  if ($b != b_val) some_stuff();
	  if ($c != c_val) other_stuff();
	  b_val = new_b;
	  c_val = new_c;
	}

Note that consistent .change semantics would require doing this even when
there isn't a size/alignment mismatch, just so that you distinguish "value
changed" from "same value written again".


Next, a related tangent.  For:

	probe kernel.data(0x120).length(8)

on i386 you can implicitly turn that into:

	probe kernel.data(0x120).length(4), kernel.data(0x124).length(4)

It is probably worth doing that for length 8 on i386, since "long long"
does get used there.  You could theoretically do it for larger things too,
but that is probably not worth bothering with.  On x86 you get to set up to
4 breakpoints, so x86-64 could cover up to 32 bytes this way.  But
e.g. powerpc has just the one, so you'll never actually win doing this even
for a 16-byte range.


That's enough magic for one message, so I'll leave the really exciting
subject to a separate strand of the thread.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-07 23:15 ` [RFC] Systemtap translator support for hardware breakpoints on Roland McGrath
@ 2010-01-08  1:03   ` Frank Ch. Eigler
  2010-01-08  1:28     ` Roland McGrath
  0 siblings, 1 reply; 31+ messages in thread
From: Frank Ch. Eigler @ 2010-01-08  1:03 UTC (permalink / raw)
  To: Roland McGrath; +Cc: Prerna Saxena, systemtap


roland wrote:

> [...]
> What I think you'd really want is:
> 	probe kernel.data($variable).write { ... }
> That means "variable" gets looked up by $ rules  [...]

This is sensible, but will require a parser-level language change.

> [...]
> If you wanted:
> 	probe kernel.data($c).write
> i.e.:
> 	probe kernel.data(0x12c).write.length(4)
> you can't get it.  To start with, the translator says .length(8) is your
> only option, you can't have it.  Then it says 0x12c is not aligned to 8,
> you can't have it.  [...]

The translator need not veto such a script request.  It should be able
to fulfill it by rounding up/aligning down, then filtering the
resulting callbacks.


> [...]
> 	probe kernel.data($c).change { other_stuff() }
> [...]

Neat idea.


- FChE

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-07  8:35 [RFC] Systemtap translator support for hardware breakpoints on Prerna Saxena
  2010-01-07 18:01 ` Frank Ch. Eigler
  2010-01-07 23:15 ` [RFC] Systemtap translator support for hardware breakpoints on Roland McGrath
@ 2010-01-08  1:19 ` Roland McGrath
  2010-01-08  1:53 ` Roland McGrath
  3 siblings, 0 replies; 31+ messages in thread
From: Roland McGrath @ 2010-01-08  1:19 UTC (permalink / raw)
  To: Prerna Saxena; +Cc: systemtap

As Frank said, this is a good start.  But it's really only the tip of the
iceberg of what you'd want to be able to use watchpoints for in practice.
That is, it does only permanent locations known statically at translation.
The really exciting uses of watchpoints are more dynamic than that.

I haven't thought much about the syntax or exact semantics for the cases
I've had in mind.  The syntax I'll use here is just a strawman to get our
minds onto the subject.  I have a feeling that there will be other kinds of
probes that are dynamic in the same sense I mean here and that we might
want the syntax for such things to be generalized across more such cases,
but nothing else in particular is coming to mind right now.

What is often the most worthwhile use of a watchpoint looks like this:

	probe watch_foo(loc) = kernel.data(loc)
	{
	  printf("touched %s: %s\n", describe(loc), backtrace())
	}

	probe kernel.function("foo")
	{
	  foo_watchers[tid()] = enable watch_foo($ptrarg->val)
	}

	probe kernel.function.return("foo")
	{
	  disable foo_watchers[tid()]
	  delete foo_watchers[tid()]
	}

This is dynamic in the sense that we cannot place "probe watch_foo" until
we know the replacement for "loc", and that address is not knowable until
we actually hit the "foo" entry probe and compute it as "&ptrarg->val".

The use of the array is meant to indicate that there could be multiple
independent instances of "probe watch_foo" at any given time, on
different addresses.  Hence it's not just "enable watch_foo" and
"disable watch_foo", which could correlate with the probe predicate
thingy we already have.  I have no idea what the actual method for that
would be, or, if it looked like that, what the type of the value in
foo_watchers[tid()] would be.

The "describe(loc)" is meant to indicate the idea that the parameterization
might apply to more than simply the mechanics of probe placement itself.
I'm not sure whether you'd want it to resolve to "0x1234" (final address)
or to "$ptrarg->val" (symbolic value of the "loc" parameter).

It's also not entirely clear what things like pp() should mean inside
watch_foo.  It could be "watch_foo".  It could be "watch_foo(0x1234)".
It could be "watch_foo($ptrarg->val)".  Or, instead it could be
"kernel.function(\"foo\")", i.e. the context of the "master" probe.
Perhaps we want ways to refer to both those things.

Or perhaps all of that can just be done as:

	probe watch_foo(loc) = kernel.data(loc)
	{
	  printf("touched %s: %s\n", foo_desc[tid()], backtrace())
	}
	probe kernel.function("foo")
	{
	  foo_desc[tid()] = sprintf("%s:ptrarg->val in %d", pp(), tid())
	  foo_watchers[tid()] = enable watch_foo($ptrarg->val)
	}

(For the general case, you'd want to use something beyond plain tid() as
the index there, for multiple separate watch_foo's active in one thread.)

Also, note that while a given instance of "watch_foo" is in some sense
tied to its "master probe", the "watch_foo" probe itself is a "rootless"
probe like timer et al.  It can happen anywhere, so you can't statically
assume any $ context for it.

I don't really have much clue about what the right way is to tackle
these kinds of issues in the script language.  But I know that we want
to enable these kinds of uses eventually, and make them as conceptually
easy to use in a script as they are for a programmer to describe in
words (i.e. "Watch ptrarg->val inside foo calls.").  So someone needs to
think harder about what it all means.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-08  1:03   ` Frank Ch. Eigler
@ 2010-01-08  1:28     ` Roland McGrath
  0 siblings, 0 replies; 31+ messages in thread
From: Roland McGrath @ 2010-01-08  1:28 UTC (permalink / raw)
  To: Frank Ch. Eigler; +Cc: Prerna Saxena, systemtap

> The translator need not veto such a script request.  It should be able
> to fulfill it by rounding up/aligning down, then filtering the
> resulting callbacks.

... and that is exactly what I described in the next paragraph.
There is no way to "filter" for .read, and the only way for .write
is to track changed values rather than writes per se.  So this means
something different (i.e. .change) than what .write means in the
sense of the actual hw_breakpoint layer (and the hardware feature).
Hence it is IMHO wrong to claim to satisfy any .write probe request
by implementing .change instead.  It could just warn, but I don't
see why it shouldn't give a hard error that says "try .change instead".


Thanks,
Roland

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-07  8:35 [RFC] Systemtap translator support for hardware breakpoints on Prerna Saxena
                   ` (2 preceding siblings ...)
  2010-01-08  1:19 ` Roland McGrath
@ 2010-01-08  1:53 ` Roland McGrath
  2010-01-08 16:08   ` Prerna Saxena
  2010-01-09  1:11   ` K.Prasad
  3 siblings, 2 replies; 31+ messages in thread
From: Roland McGrath @ 2010-01-08  1:53 UTC (permalink / raw)
  To: Prerna Saxena; +Cc: systemtap

Another thing to note is that hw_breakpoint registrations can fail at
runtime for "normal" reasons.  The hardware watchpoints are a scarce global
resource (4 on x86, only 1 on powerpc).

The translator might encode some per-arch assumptions about the known
kernel implementation and so give you a warning/error if your script as
elaborated statically requires more registrations than the kernel ever
supports (i.e. more than 4 on x86, more than 1 on powerpc).

But even if you only use one, it might not be free at runtime.  What I
presume your code does is just fail module initialization if registration
fails, so staprun just fails quickly and never even tries to run the
script.

Here's an idea for being fancier:

	probe kernel.data($foo).change { ... }
	probe kernel.data($foo).change.unavailable {
	  pred = 1
	  println("NOTE: watchpt not available, using plan B")
	}

	probe kernel.function("foo1") if pred { ... }
	probe kernel.function("foo2") if pred { ... }
	probe kernel.function("foo3") if pred { ... }

If there is no .unavailable probe given for some watchpoint probe, then
the default is to fail at initialization time.  If there is one, it acts
like a .begin probe.

That brings up a similar tangential idea.  If we have the dynamic
watchpoint probes in some form, then:

	probe watch_foo(loc) = .data(loc).change { ... }
	probe watch_foo(loc) = .data(loc).change.begin { ... }
	probe watch_foo(loc) = .data(loc).change.end { ... }

Those .begin and .end would run when "enable watch_foo" and "disable
watch_foo" happens.  The idea is that a tapset could define watch_foo
for watching a particular kind of thing in some fancy way, and then use
these to initialize and destroy elements in global state arrays it might
keep for each watchpoint instance to use across multiple changes to the
same variable.  For example, when watching a pointer variable you could
be tracking in some tapset-global array of used/live pointers of that
sort.

On a tangent of that tangent, here I've used .data(...) with a leading
. to mean "in the context of the enabling probe".  If it's an instance
of 'enable watch_foo' in 'probe kernel.foo' then it means kernel.data,
if in 'probe module("xyz").foo' it means module("xyz").data, etc.
Really, you could say that context is part of "loc" and it doesn't make
sense for the static spec of a dynamic probe schema ever to indicate an
absolute context.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-07 22:39     ` Roland McGrath
  2010-01-07 22:45       ` Frank Ch. Eigler
@ 2010-01-08  5:01       ` Ananth N Mavinakayanahalli
  2010-01-08 10:24         ` Roland McGrath
  1 sibling, 1 reply; 31+ messages in thread
From: Ananth N Mavinakayanahalli @ 2010-01-08  5:01 UTC (permalink / raw)
  To: Roland McGrath; +Cc: Frank Ch. Eigler, Prerna Saxena, systemtap

On Thu, Jan 07, 2010 at 02:38:41PM -0800, Roland McGrath wrote:
> 
> probe kernel.data(0x1234).write { one_thing() }
> probe kernel.data(0x1234).read { another_thing() }
> 
> you cannot consolidate these into one RW breakpoint, because you
> won't know which handler to run.  This is in contrast to e.g. two
> kprobes at the same PC.  Those you can consolidate at translation
> time so the module init code only inserts one kprobe and its handler
> does both things.
> 
> Even for the same_stuff() case, it's not clear you can really 
> consolidate them if something like pp() is used.  i.e., where the
> name of the specific probe is part of the "stuff", it's not really
> the same stuff any more.
> 
> On x86 you can distinguish this by just using two different debug
> registers, one for the read and one for the write.  You can tell
> which of the two hit.  Only if you don't care to distinguish them
> can you use a single debug register set to RW.

On x86, you imply something akin to using one debug register monitoring
"write" and the other monitoring "rw" for the same address, right?

We did try this sometime back. The event does trigger an (one) exception
and the only way to distinguish whether a 'read' happened is to look
at the debug status register (DR6) and see if one or both bits are set,
and take appropriate action.

Maybe, a better way to do it is to hide this complexity by stap taking
care of using 2 DRs underneath -- but there is no ironclad gurantee that
2 free debug registers are available for stap's use at all times.

Ananth

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-08  5:01       ` Ananth N Mavinakayanahalli
@ 2010-01-08 10:24         ` Roland McGrath
  2010-01-09  1:29           ` K.Prasad
  0 siblings, 1 reply; 31+ messages in thread
From: Roland McGrath @ 2010-01-08 10:24 UTC (permalink / raw)
  To: ananth; +Cc: Frank Ch. Eigler, Prerna Saxena, systemtap

> On x86, you imply something akin to using one debug register monitoring
> "write" and the other monitoring "rw" for the same address, right?

Right.

> We did try this sometime back. The event does trigger an (one) exception
> and the only way to distinguish whether a 'read' happened is to look
> at the debug status register (DR6) and see if one or both bits are set,
> and take appropriate action.

Right, that's what I meant.

> Maybe, a better way to do it is to hide this complexity by stap taking
> care of using 2 DRs underneath -- 

That's exactly what I was suggesting.

> but there is no ironclad gurantee that
> 2 free debug registers are available for stap's use at all times.

Indeed, nor that there is one.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hw breakpoints on x86
  2010-01-07 18:01 ` Frank Ch. Eigler
  2010-01-07 21:57   ` Roland McGrath
@ 2010-01-08 14:35   ` Prerna Saxena
  2010-01-08 15:21     ` Frank Ch. Eigler
  2010-01-25 19:00   ` [RFC V2] Systemtap translator support for kernel hardware breakpoints Prerna Saxena
  2 siblings, 1 reply; 31+ messages in thread
From: Prerna Saxena @ 2010-01-08 14:35 UTC (permalink / raw)
  To: Frank Ch. Eigler; +Cc: systemtap

Hi Frank,

Thanks for taking time to look. I'll respond to suggestions on my patch 
here. I'm sending a separate email to address valuable suggestions from 
you/ Roland on how hwbkpts could be utilized better by systemtap.


On 01/07/2010 11:30 PM, Frank Ch. Eigler wrote:
>
> Prerna Saxena<prerna@linux.vnet.ibm.com>  writes:
>
>> [...]
>> This patch enables systemtap translator to probe hardware breakpoints
>> on x86 and x86_64 (based on mainline kernel).
>
> Looks like a good start.
>
> In order to merge this, it is essential to have a testsuite, posted
> results on a variety of architectures and kernel versions (kfail for
> older kernels), and documentation.
>

This patch was meant to be an RFC. I'll add all this when this gets 
checked into the git tree.

>
>> Syntax :
>> probe kernel.data(ADDRESS).write
>> probe kernel.data(ADDRESS).rw
>> probe kernel.data(ADDRESS).length(LEN).write
>> probe kernel.data(ADDRESS).length(LEN).rw
>> probe kernel.data("SYMBOL_NAME").write
>> probe kernel.data("SYMBOL_NAME").rw
>
> (I wonder if simple .read or .write would be sufficient as suffixes,
> and let a script render both as
>
>     probe kernel.data(ADDRESS).write, probe kernel.data(ADDRESS).read
>
> and let the translator collapse such duplication into the internal RW case.)
>

There are issues here, as Roland pointed out. I'll explain in my next note.

>
>> +  // Warn of misconfigured kernels
>> +  s.op->newline()<<  "#ifndef CONFIG_HAVE_HW_BREAKPOINT";
>> +  s.op->newline()<<  "#error \"Need CONFIG_HAVE_HW_BREAKPOINT!\"";
>> +  s.op->newline()<<  "#endif";
>
> Instead or in addition, the translator could make the whole probe point
> type conditional on session.kernel_config("CONFIG_HAVE_HW_BREAKPOINT").

I'll add this in addition to the pass 4 check. Will this check take care 
of cross compilation for a different kernel ? (I think it should)

>
>> +// Define macros for access type
>> +  s.op->newline()<<  "#define HWBKPT_READ 0";
>> +  s.op->newline()<<  "#define HWBKPT_WRITE 1";
>> +  s.op->newline()<<  "#define HWBKPT_RW 2";
>
> (Are these necessary?  Could the translator not emit symbols like
> HW_BREAKPOINT_W directly?)
>

This had to be defined because some types are not defined on all 
architectures. X86 supports only HW_BREAKPOINT_W and HW_BREAKPOINT_RW, 
while ppc supports HW_BREAKPOINT_W and HW_BREAKPOINT_R. So, while a 
request to probe only reads to a location is acceptable on ppc, it 
cannot be employed on x86 for now (details in my next email ). So, I had 
to use a common mechanism to include all the 3 cases.

>
>> +  s.op->newline()<<  "static struct stap_hwbkpt_probe {";
>> +  s.op->newline()<<  "int registered_p:1;";
>> +// registered_p = -1 signifies an invalid probe that must not be registered
>> +// registered_p =  0 signifies a probe that failed registration
>> +// registered_p =  1 signifies a probe that got registered successfully
>
> A one-bit value can't represent three different values.
>

Wonder how such a blunder was missed. I'll fix this. :-)

>
>> +      if (p->symbol_name.size())
>> +      s.op->line()<<  " .address=(unsigned long)0x0"<<  "ULL,";
>
> (You probably don't need explicit initialization to zero.)
>
>
>> +  s.op->newline()<<  "static int enter_hwbkpt_probe (struct perf_event *bp,";
>> [...]
>> +  s.op->newline()<<  "for ( i=0; i<"<<  hwbkpt_probes_vector.size()<<  "; i++) {";
>> +  s.op->newline()<<  "struct perf_event_attr *hp =&  stap_hwbkpt_probe_array[i];";
>> +  s.op->newline()<<  "if (bp->attr.bp_addr==hp->bp_addr&&  bp->attr.bp_type==hp->bp_type&&  bp->attr.bp_len==hp->bp_len ) {";
>> [...]
>
> To what extent does this handle multiple hw breakpoints on the same address?
>

For now, the kernel API doesnt allow chaining handlers for identical 
hardware breakpoints. The kernel API registers each hardware breakpoint 
as a separate case, whether its for identical or different addresses. So 
I had to include a crude check to map a hwbkpt perf_event (returned by 
the API when a breakpoint is hit) to the perf_event attribute for which 
it was registered.

>
>> [...]
>> +void
>> +hwbkpt_derived_probe_group::emit_module_init (systemtap_session&  s)
>> +{ [...]
>> +  s.op->newline()<<  "const char *hwbkpt_symbol_name = addr ? NULL : sdp->symbol;";
>> +  s.op->newline()<<  "	hw_breakpoint_init(hp);";
>
> Normally we use newline(1) or (-1) to indent instead of explicit
> spaces like this.

I'll remember that, thanks :)

>
>> +  s.op->newline()<<  "	if (addr) ";
>> +  s.op->newline()<<  "	    hp->bp_addr = (unsigned long) addr;";
>> +  s.op->newline()<<  "	else ";
>> +  s.op->newline()<<  "	   hp->bp_addr = kallsyms_lookup_name(hwbkpt_symbol_name);";
>
> Is kallsyms_lookup_name actually module-exported these days?  Should the
> translator try to resolve symbols at translate time?  Wildcards?
>

I just checked ! kallsyms_lookup_name is exported in mainline kernel via 
commit  f60d24d2ad04977b0bd9e3eb35dba2d2fa569af9
...
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 8b6b8b6..8e5288a 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -181,6 +181,7 @@ unsigned long kallsyms_lookup_name(const char *name)
         }
         return module_kallsyms_lookup_name(name);
  }
+EXPORT_SYMBOL_GPL(kallsyms_lookup_name);
...

>
>> +  s.op->newline()<<  "#ifdef CONFIG_X86";
>> +  s.op->newline()<<  "  switch(sdp->len) {";
>> +  s.op->newline()<<  "	case HW_BREAKPOINT_LEN_1:";
>> +  s.op->newline()<<  "    hp->bp_len = HW_BREAKPOINT_LEN_1;";
>> [...]
>
> Is this sort of switch really necessary?  Why not just set
>
>      hp->bp_len = sdp->len;

I'll trim this.

>
>> +  s.op->newline()<<  "	default:";
>> +  s.op->newline()<<  "  	_stp_warn(\"Probe %s registration skipped : unsupported length. Supported lengths for x86 are 1,2,4 {8 for x86_84 only} \",sdp->pp);";
>> +  s.op->newline()<<  "		sdp->registered_p = -1;";
>> +  s.op->newline()<<  "		stap_hwbkpt_ret_array[i] = ERR_PTR(-EINVAL);";
>
> Why is this necessary ...
>

For now, I had designed the probes to error out if a user defined a 
length that was not supported by architecture (eg a length 3 on x86, as 
this supports lengths like 1,2,4 bytes only) However, I think your 
suggestion ( to let the translator override this with the closest length 
that the arch supports) is nice. I'll try implementing it.

>
>> +  s.op->newline()<<  "    case HWBKPT_READ:";
>> +  s.op->newline()<<  "  	_stp_warn(\"Probe %s registration skipped : READ-ONLY hardware breakpoints not supported on x86\",sdp->pp);";
>> +  s.op->newline()<<  "		sdp->registered_p = -1;";
>> +  s.op->newline()<<  "		break;";
>
> ... and this too ...
>
>> +  s.op->newline()<<  "if (sdp->registered_p>= 0) {";
>> +  s.op->newline()<<  " stap_hwbkpt_ret_array[i] = register_wide_hw_breakpoint( hp, (void *)&enter_hwbkpt_probe );";
>
> ... considering that register_wide_hw_breakpoint() would surely
> check its inputs and reject requests deemed inappropriate for
> the current architecture?
>

As explained earlier, there is no HW_BREAKPOINT_R flag defined for x86. 
I cannot define a read-only hardware breakpoint for x86 directly using 
the hw breakpoint api. The only way to do it using two debug registers, 
and setting up 2 separate breakpoints for the same address -- one with 
an RW flag, and the other with a write flag.
A read would happen when exactly one of these is hit and the other is not.

> Note also that registered_p always>= 0 for your actual int:1 data
> type.  Consider simply == 0 for unregistered, == 1 for registered.
>
>
I'll fix this.

>> +  s.op->newline()<<  " if (IS_ERR(stap_hwbkpt_ret_array[i])) {";
>> +  s.op->newline()<<  "     rc = PTR_ERR(stap_hwbkpt_ret_array[i]);";
>> +  s.op->newline()<<  "      sdp->registered_p = 0;";
>> +  s.op->newline(1)<<  "	    _stp_warn(\" Hwbkpt Probe %s : Registration error rc= %d, Addr = %p, name = %s\", probe_point, rc, addr, hwbkpt_symbol_name);";
>> +  s.op->newline(-1)<<  "  }";
>> +  s.op->newline()<<  "    else sdp->registered_p = 1;";
>> +  s.op->newline()<<  "}";
>> +  s.op->newline(-1)<<  "}"; // for loop
>
> Not so simple: if you get a failure on the sixth registration, you have to
> unregister the prior ones.
>

There is a glitch here. As Roland pointed out, debug registers are a 
scarce resource (1 on PPC, 4 on x86 ) I can modify the translator to 
throw an error in a script that tries to register more breakpoints than 
what that architecture allows. Eg, a script will not compile if it tries 
to probe 2 hw breakpoints on a ppc system, which can just have one 
breakpoint at a time.

However, in case less resources are available at runtime (say, only 3 
out of 4 debug registers are free on x86 when the stap-generated module 
is running), and the script has 4 hardware breakpoints -- the first 3 
will get registered while _only_ the 4th request will fail at 
registration. I thought this was an acceptable behaviour.
Pls let me know your views.

>
> - FChE

Regards,

-- 
Prerna Saxena

Linux Technology Centre,
IBM Systems and Technology Lab,
Bangalore, India

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hw breakpoints on x86
  2010-01-08 14:35   ` [RFC] Systemtap translator support for hw breakpoints on x86 Prerna Saxena
@ 2010-01-08 15:21     ` Frank Ch. Eigler
  2010-01-09  1:41       ` K.Prasad
  0 siblings, 1 reply; 31+ messages in thread
From: Frank Ch. Eigler @ 2010-01-08 15:21 UTC (permalink / raw)
  To: Prerna Saxena; +Cc: systemtap

Prerna Saxena <prerna@linux.vnet.ibm.com> writes:

> [...]
>> Instead or in addition, the translator could make the whole probe point
>> type conditional on session.kernel_config("CONFIG_HAVE_HW_BREAKPOINT").
>
> I'll add this in addition to the pass 4 check. Will this check take
> care of cross compilation for a different kernel ? (I think it should)

Yes.

>>> +// Define macros for access type
>>> +  s.op->newline()<<  "#define HWBKPT_READ 0";
>>> +  s.op->newline()<<  "#define HWBKPT_WRITE 1";
>>> +  s.op->newline()<<  "#define HWBKPT_RW 2";
>>
>> (Are these necessary?  Could the translator not emit symbols like
>> HW_BREAKPOINT_W directly?)
>
> This had to be defined because some types are not defined on all
> architectures. X86 supports only HW_BREAKPOINT_W and HW_BREAKPOINT_RW,
> while ppc supports HW_BREAKPOINT_W and HW_BREAKPOINT_R. [...]

But does this support or lack of support manifest itself in a
different list of symbols defined by the kernel headers?  For example,
the symbol HW_BREAKPOINT_R is defined in linux/hw_breakpoint.h, so if
systemtap emits code that uses it, this will compile just fine on all
architectures.


> [...]  For now, I had designed the probes to error out if a user
> defined a length that was not supported by architecture (eg a length
> 3 on x86, as this supports lengths like 1,2,4 bytes only) [...]

My concern with the above was not the script language interface or
mappings, but only the cleanliness / simplicity of the generated code.
If the translator neglects to filter out unsupported lengths, and the
generated code propagates the value 3, will the kernel's registration
function check and reject such requests at run time?  If so, then
there is no need for the generated code to contain preemptive checks /
filters / error messages.  Just let it listen to the result code from
the kernel's registration function.

As to having the translator filter for parameters expected to be
valid, that is fine, but those checks need not show up redundantly in
generated C code.


>>> +  s.op->newline()<<  "     rc = PTR_ERR(stap_hwbkpt_ret_array[i]);";
>>> +  s.op->newline()<<  "      sdp->registered_p = 0;";
>>> +  s.op->newline(1)<<  "	    _stp_warn(\" Hwbkpt Probe %s : Registration error rc= %d, Addr = %p, name = %s\", probe_point, rc, addr, hwbkpt_symbol_name);";
>>> +  s.op->newline(-1)<<  "  }";
>>> +  s.op->newline()<<  "    else sdp->registered_p = 1;";
>>> +  s.op->newline()<<  "}";
>>> +  s.op->newline(-1)<<  "}"; // for loop
>>
>> Not so simple: if you get a failure on the sixth registration, you have to
>> unregister the prior ones.
> [...]
> However, in case less resources are available at runtime (say, only 3
> out of 4 debug registers are free on x86 when the stap-generated
> module is running), and the script has 4 hardware breakpoints -- the
> first 3 will get registered while _only_ the 4th request will fail at
> registration. I thought this was an acceptable behaviour.

OK, if you want to let the registration phase pass with only partial
successes, then the code must set rc = 0 around your stp_warn() call.
As the code is currently, rc != 0, so the registration loop assumes
that this particular derived_probe_group *has already cleaned up* any
partial registrations, and will not get a subsequent call.

- FChE

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-08  1:53 ` Roland McGrath
@ 2010-01-08 16:08   ` Prerna Saxena
  2010-01-08 18:52     ` Roland McGrath
  2010-01-09  1:11   ` K.Prasad
  1 sibling, 1 reply; 31+ messages in thread
From: Prerna Saxena @ 2010-01-08 16:08 UTC (permalink / raw)
  To: Roland McGrath, Frank Ch. Eigler; +Cc: systemtap

Hi Roland, Frank,

Thanks for the exhaustive review and nice suggestions. The discussion 
was spread across a multitude of emails -- I'm posting a summary of 
ideas on how to augment the hardware breakpoints with systemtap, with my 
understanding on how this could be implemented.

1. Probe points :
probe kernel.data(ADDRESS).write
probe kernel.data(ADDRESS).read
probe kernel.data(ADDRESS).change
probe kernel.data(ADDRESS).rw
	i) .read -this probe point probes read access ONLY to an address -- 
needs 2 debug registers on x86 : one for rw, and the other for write. 
Underlying implementation will need to check if DR6 flashes a hit for 
only the rw-based probe, and not the write probe -- in which case its a 
read request. I need to explore the existing hw breakpoint API better to 
implement this.
     ii.) write : probes write access to an address.
     iii.) change : checks every time a write request modifies the value 
at an address. It weeds out redundant writes that overwrite the location 
with same data and will execute only if the value at the specified 
address indeed undergoes a change.
	Roland had a cool suggestion for this. I was wondering if this can best 
be implemented in a tapset, that calls a .write probe underneath, with 
the correct semantics. Or is it better to enable the translator to emit 
such code.
	iv. rw : Coalescing handlers (by defining a common handler for both 
.read and .write request) is difficult. To implement this, far more 
intelligence needs to be built to check if probe handlers described in 
the script are identical or not -- I'll extend this later, for now ,I 
was wondering if this syntax may be retained for ease of use.

2. Script Optimizations if more hwbkpt requests are present in the 
script than the  hardware can support.
I'll add an elementary check that errors out if too many requests exist. 
(eg, more than 1 probe kernel.data on ppc or more than 4 such probes on 
x86 )

3. probe kernel.data($variable).write { ... }
Nice suggestion again ! The translator can be augmented to use this, and 
add dwarf lookup for $variable.

4. probe kernel.data(0x12c).write.length(4)
For now, ppc patches are not done, but I was planning to allow this as a 
valid construct.
For ppc, any length upto 8 is acceptable. IIRC, the hwbkpt kernel API 
takes care of filtering a hit if the defined hwbkpt probe has a separate 
address interval and the hit is at an address which is outside that 
interval. (I need to check its implementation with hw breakpoint kernel 
API).

5. Dealing with invalid requests, eg :

i)    probe kernel.data(0x120).length(8) on x86
translator intelligence to split an invalid request into valid pieces 
will turn it into a case like :
     probe kernel.data(0x120).length(4),
	  kernel.data(0x124).length(4)
     --> This is doable. But, is it not still easier for this to error 
out, prompting the user about supported lengths on that arch ? the user 
can then place a more appropriate probe which matches a narrower address 
interval of interest ? This conserves scarce resource like debug regs.

ii) Another case of invalid requests on x86 :
     probe kernel.data(0x120).length(3)
     this can be dealt with, by approximating the length to the next 
allowable size, such as placing a request for hwbkpt of length 4 bytes here.
I will edit the existing patch to allow translator to approximate the 
request to a bigger address interval instead of invalidating the 
request, as its done now.

6. Dynamic enabling/disabling of a hardware breakpoint from kprobe 
handler context , AND enabling a hardware breakpoint within a function:
	I am not sure of how to implement this, since kprobe handlers run in 
exception context. I'm trying to understand how this can be done, will 
send out an update asap.

Looking forward to more interesting possibilities of how we can exploit 
hardware breakpoints better with systemtap..

Regards,

-- 
Prerna Saxena

Linux Technology Centre,
IBM Systems and Technology Lab,
Bangalore, India

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-08 16:08   ` Prerna Saxena
@ 2010-01-08 18:52     ` Roland McGrath
  2010-01-09  0:54       ` K.Prasad
  0 siblings, 1 reply; 31+ messages in thread
From: Roland McGrath @ 2010-01-08 18:52 UTC (permalink / raw)
  To: Prerna Saxena; +Cc: Frank Ch. Eigler, systemtap

> For ppc, any length upto 8 is acceptable. IIRC, the hwbkpt kernel API 
> takes care of filtering a hit if the defined hwbkpt probe has a separate 
> address interval and the hit is at an address which is outside that 
> interval. (I need to check its implementation with hw breakpoint kernel 
> API).

I don't understand.  Do you mean that the trap handler gets the precise
address of the load/store, not just the address aligned to 8?


Thanks,
Roland

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-08 18:52     ` Roland McGrath
@ 2010-01-09  0:54       ` K.Prasad
  2010-01-09  1:07         ` Roland McGrath
  0 siblings, 1 reply; 31+ messages in thread
From: K.Prasad @ 2010-01-09  0:54 UTC (permalink / raw)
  To: Roland McGrath; +Cc: Prerna Saxena, Frank Ch. Eigler, systemtap

On Fri, Jan 08, 2010 at 10:52:31AM -0800, Roland McGrath wrote:
> > For ppc, any length upto 8 is acceptable. IIRC, the hwbkpt kernel API 
> > takes care of filtering a hit if the defined hwbkpt probe has a separate 
> > address interval and the hit is at an address which is outside that 
> > interval. (I need to check its implementation with hw breakpoint kernel 
> > API).
> 
> I don't understand.  Do you mean that the trap handler gets the precise
> address of the load/store, not just the address aligned to 8?
>

Yes, we put the Data Address Register (DAR) to use here and know the
exact address which was accessed to cause the breakpoint exception. The
value of DAR along with the length of the symbol (taken as input from
the user when registering a breakpoint request) is used to filter out
extraneous breakpoint exceptions.

For instance, a breakpoint request for a symbol with length for 4 bytes
is accepted (anything <= 8 is considered valid). Whenever a breakpoint
exception arises, the hw_breakpoint_handler() checks to see if the value
of DAR lies between the bp_addr and bp_addr+len. If yes, we know that
the exception is a result of a memory access in the address-range we're
interested in, otherwise it returns early.

Thanks,
K.Prasad

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-09  0:54       ` K.Prasad
@ 2010-01-09  1:07         ` Roland McGrath
  0 siblings, 0 replies; 31+ messages in thread
From: Roland McGrath @ 2010-01-09  1:07 UTC (permalink / raw)
  To: prasad; +Cc: Prerna Saxena, Frank Ch. Eigler, systemtap

> Yes, we put the Data Address Register (DAR) to use here and know the
> exact address which was accessed to cause the breakpoint exception. The
> value of DAR along with the length of the symbol (taken as input from
> the user when registering a breakpoint request) is used to filter out
> extraneous breakpoint exceptions.

Ah!  Excellent.  I didn't know the hardware made that possible.  That's
something the x86 hardware can't do, AFAICT.  Of course, it does give you 4
breakpoints to use vs 1, which is a pretty good compensation. ;-)

I guess technically this means that the hw_breakpoint layer could let you
register a second one if it happens to be a length 4 and the two are
adjacent (or up to 8 adjacent one-byte ones, etc.--i.e. the aligned address
of all is the same).  But it probably makes more sense just to have much
higher layers like the stap translator noticing that it can consolidate
adjacencies into a single registration before it decides if you are trying
for an unreasonable number of simultaneous registrations.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-08  1:53 ` Roland McGrath
  2010-01-08 16:08   ` Prerna Saxena
@ 2010-01-09  1:11   ` K.Prasad
  2010-01-09  1:45     ` Roland McGrath
  1 sibling, 1 reply; 31+ messages in thread
From: K.Prasad @ 2010-01-09  1:11 UTC (permalink / raw)
  To: Roland McGrath; +Cc: Prerna Saxena, systemtap

On Thu, Jan 07, 2010 at 05:53:01PM -0800, Roland McGrath wrote:
> Another thing to note is that hw_breakpoint registrations can fail at
> runtime for "normal" reasons.  The hardware watchpoints are a scarce global
> resource (4 on x86, only 1 on powerpc).
> 
> The translator might encode some per-arch assumptions about the known
> kernel implementation and so give you a warning/error if your script as
> elaborated statically requires more registrations than the kernel ever
> supports (i.e. more than 4 on x86, more than 1 on powerpc).
> 
> But even if you only use one, it might not be free at runtime.  What I
> presume your code does is just fail module initialization if registration
> fails, so staprun just fails quickly and never even tries to run the
> script.
> 
> Here's an idea for being fancier:
> 
> 	probe kernel.data($foo).change { ... }
> 	probe kernel.data($foo).change.unavailable {
> 	  pred = 1
> 	  println("NOTE: watchpt not available, using plan B")
> 	}
> 
> 	probe kernel.function("foo1") if pred { ... }
> 	probe kernel.function("foo2") if pred { ... }
> 	probe kernel.function("foo3") if pred { ... }
> 
> If there is no .unavailable probe given for some watchpoint probe, then
> the default is to fail at initialization time.  If there is one, it acts
> like a .begin probe.
> 
> That brings up a similar tangential idea.  If we have the dynamic
> watchpoint probes in some form, then:
> 
> 	probe watch_foo(loc) = .data(loc).change { ... }
> 	probe watch_foo(loc) = .data(loc).change.begin { ... }
> 	probe watch_foo(loc) = .data(loc).change.end { ... }
> 
> Those .begin and .end would run when "enable watch_foo" and "disable
> watch_foo" happens.  The idea is that a tapset could define watch_foo
> for watching a particular kind of thing in some fancy way, and then use
> these to initialize and destroy elements in global state arrays it might
> keep for each watchpoint instance to use across multiple changes to the
> same variable.  For example, when watching a pointer variable you could
> be tracking in some tapset-global array of used/live pointers of that
> sort.
>

If my understanding is correct, this is a suggestion that demands an
'overcommit' feature (ability to accept requests more than the available
debug registers) in hw-breakpoints, right?

In its new form (post perf-events integration), hw-breakpoints can
indeed accept new requests that far exceed the number of underlying
debug registers. This can be achieved by making an 'un-pinned' breakpoint
request, where every such request gets a chance to use the debug
register in a round-robin fashion (all this is provided by perf-events
infrastructure anyway).

In other words, on x86, it is possible to have, say 3 'pinned'
breakpoint requests (which would dedicatedly consume 1 debug register
each) and any number of 'un-pinned' breakpoint requests (which would be
scheduled to 'use' the remaining 1 debug register in RR fashion).

Presently, the breakpoint infrastructure does not provide callbacks that
can be invoked whenever an 'un-pinned' breakpoint request is
scheduled-in/out (analogous to .enabled and .disabled). We could pursue
to get support for the same (of course, that would require a good
in-kernel user to convince the community!).

Thanks,
K.Prasad

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-08 10:24         ` Roland McGrath
@ 2010-01-09  1:29           ` K.Prasad
  2010-01-09  1:56             ` Roland McGrath
  0 siblings, 1 reply; 31+ messages in thread
From: K.Prasad @ 2010-01-09  1:29 UTC (permalink / raw)
  To: Roland McGrath; +Cc: ananth, Frank Ch. Eigler, Prerna Saxena, systemtap

On Fri, Jan 08, 2010 at 02:24:02AM -0800, Roland McGrath wrote:
> > On x86, you imply something akin to using one debug register monitoring
> > "write" and the other monitoring "rw" for the same address, right?
> 
> Right.
> 
> > We did try this sometime back. The event does trigger an (one) exception
> > and the only way to distinguish whether a 'read' happened is to look
> > at the debug status register (DR6) and see if one or both bits are set,
> > and take appropriate action.
> 
> Right, that's what I meant.
> 
> > Maybe, a better way to do it is to hide this complexity by stap taking
> > care of using 2 DRs underneath -- 
> 
> That's exactly what I was suggesting.
> 
> > but there is no ironclad gurantee that
> > 2 free debug registers are available for stap's use at all times.
> 
> Indeed, nor that there is one.
>

An easier (and less accurate, but more conservative with debug register
use) method to distinguish a .read (from a .write) in x86 would be to use
your suggested .change semantics. i.e.

Inside a hw-breakpoint exception, whenever "old_value == new_value", we
could interpret it as a .read (albeit inaccurately) operation.

Assuming that .write operations which re-write old_value will be fewer
in number, such a scheme would be really helpful; more so in debugging
context (although less-useful for read/write profiling).

Thanks,
K.Prasad

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hw breakpoints on x86
  2010-01-08 15:21     ` Frank Ch. Eigler
@ 2010-01-09  1:41       ` K.Prasad
  0 siblings, 0 replies; 31+ messages in thread
From: K.Prasad @ 2010-01-09  1:41 UTC (permalink / raw)
  To: Frank Ch. Eigler; +Cc: Prerna Saxena, systemtap, Roland McGrath

On Fri, Jan 08, 2010 at 10:21:12AM -0500, Frank Ch. Eigler wrote:
> Prerna Saxena <prerna@linux.vnet.ibm.com> writes:
<snipped>
> 
> > [...]  For now, I had designed the probes to error out if a user
> > defined a length that was not supported by architecture (eg a length
> > 3 on x86, as this supports lengths like 1,2,4 bytes only) [...]
> 
> My concern with the above was not the script language interface or
> mappings, but only the cleanliness / simplicity of the generated code.
> If the translator neglects to filter out unsupported lengths, and the
> generated code propagates the value 3, will the kernel's registration
> function check and reject such requests at run time?  If so, then
> there is no need for the generated code to contain preemptive checks /
> filters / error messages.  Just let it listen to the result code from
> the kernel's registration function.
> 
> As to having the translator filter for parameters expected to be
> valid, that is fine, but those checks need not show up redundantly in
> generated C code.
>

The hw-breakpoint API for registration returns success only after a
successful sanity check for alignment, breakpoint length (supported or
not), improper addresses (user vs kernel-space) among a few others.

So unless the translator wants to show more meaningful error messages
that states the exact cause (given that the bkpt API will return -EINVAL
in all of these cases), it can rely upon the kernel's registration
function.

Thanks,
K.Prasad
 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-09  1:11   ` K.Prasad
@ 2010-01-09  1:45     ` Roland McGrath
  2010-01-09  2:46       ` K.Prasad
  0 siblings, 1 reply; 31+ messages in thread
From: Roland McGrath @ 2010-01-09  1:45 UTC (permalink / raw)
  To: prasad; +Cc: Prerna Saxena, systemtap

> If my understanding is correct, this is a suggestion that demands an
> 'overcommit' feature (ability to accept requests more than the available
> debug registers) in hw-breakpoints, right?

Actually that's exactly not what I was talking about.  It's an
interesting subject to consider (I was the one who originally
proposed such complexity for hw_breakpoint at its inception).  

But all I was talking about here is what a stap module can do when a
"allocate it now and forever" call fails.  (This might be at script
startup time, or might be later during a module's lifetime if the
putative script-driven dynamic registration feature were used.)

> In its new form (post perf-events integration), hw-breakpoints can
> indeed accept new requests that far exceed the number of underlying
> debug registers. This can be achieved by making an 'un-pinned' breakpoint
> request, where every such request gets a chance to use the debug
> register in a round-robin fashion (all this is provided by perf-events
> infrastructure anyway).

I don't understand what "round-robin" means for breakpoints.  When
you let the kernel execute normally again after registration, either
my breakpoint is enabled or it isn't.  There is no meaningful sense
in which you can "time-share" a hardware breakpoint slot.  (That is,
except for doing per-task registrations, which is a different
semantics entirely.)

> Presently, the breakpoint infrastructure does not provide callbacks that
> can be invoked whenever an 'un-pinned' breakpoint request is
> scheduled-in/out (analogous to .enabled and .disabled). We could pursue
> to get support for the same (of course, that would require a good
> in-kernel user to convince the community!).

I am having trouble imagining what any kind of "scheduled-in/out"
could possibly useful to do at all if you don't notify me about
whether or not my breakpoint is in place.  A "best effort"
breakpoint, that might be caught and might not be and who knows
whether it's really installed, is just not useful.  I must be
missing the essence of what you mean.

The thing I had talked about before was each hw_breakpoint
registration having a priority number.  When another registration
comes along with a higher priority, it can boot yours and make a
callback so you know it's been stolen.  Conversely, when a competing
registration goes away, the highest-priority registration-in-waiting
gets installed and gets a callback to tell you it's now active.  (I
guess really there could just be a single notifier list that gets
called when a slot becomes free, so the suitors can try again to see
whose priority wins.  Whatever.)  But you've said this is not what
it does now.

Anyway, that kind of dynamicism is not what I was talking about here.
If we had it, the script-language features might look rather similar
and so work how you're talking about here.  i.e., the .enabled and
.disabled probes firing due to an hw_breakpoint layer callback that is
"spontaneous" to the stap module's eyes, i.e. driven by the ebb and
flow of external demand on the scarce shared resource.

In the examples I gave, the .unavailable sub-probe in the simplest
form is just a translation-time (tapset sugar) way to do some implicit
script-level stuff at 'probe begin' time, but conditional on whether
the associated hw_breakpoint registration at startup succeeded.

In the example with dynamic (i.e. runtime script-controlled)
registration, the .begin and .end sub-probes are just tapset sugar for
some script-level stuff to do implicitly when script code uses "enable
watch_foo" and "disable watch_foo".  For that use, you might have a
.unavailable sub-probe that runs when "enable" fails, or you might
just have "enable" return a testable success/failure to the script
code or whatnot.

Given all that, I can imagine wanting the tie-in for some kind of
"breakpoint scheduling" to be that "enable" has "enable-now-or-fail"
and "enable-now-or-later" variants.  Then in the "now or later"
variant, those same .begin and .end probes might get run during the
lifetime of a script (due to an hw_breakpoint layer callback) rather
than only at "enable foo" and "disable foo" time.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-09  1:29           ` K.Prasad
@ 2010-01-09  1:56             ` Roland McGrath
  0 siblings, 0 replies; 31+ messages in thread
From: Roland McGrath @ 2010-01-09  1:56 UTC (permalink / raw)
  To: prasad; +Cc: ananth, Frank Ch. Eigler, Prerna Saxena, systemtap

> An easier (and less accurate, but more conservative with debug register
> use) method to distinguish a .read (from a .write) in x86 would be to use
> your suggested .change semantics. i.e.
> 
> Inside a hw-breakpoint exception, whenever "old_value == new_value", we
> could interpret it as a .read (albeit inaccurately) operation.

That's a thought.  I'm a stickler for not lying implicitly about what the
semantics are.  So I'd go for this being a .nonchange alongside .change,
described as "access without change" rather than calling it .read when it's
not.  If only powerpc can support a true .read low-level probe with the
semantics the name implies, then so be it--let the translator refuse to
perform .read on x86.

I think it's fine to have .read/.write/.rw be a low-level layer of probes.
(Perhaps even name them more obscurely.)  Make .change and .access be the
high-level probes that people normally use in scripts.

Btw, for a built-in .change it would be handy to give $$old and $$new or
somesuch magic bindings for the probe's script code to see the pre-change
and post-change values.  (You can of course track it yourself in script
variables, but that becomes error-prone boilerplate, while the .change
machinery will have it on hand under the covers anyway.)

OTOH, everything supports .write and that is probably what people would
actually use most.  So this may all be more thought than is really required.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-09  1:45     ` Roland McGrath
@ 2010-01-09  2:46       ` K.Prasad
  2010-01-10 21:37         ` Roland McGrath
  0 siblings, 1 reply; 31+ messages in thread
From: K.Prasad @ 2010-01-09  2:46 UTC (permalink / raw)
  To: Roland McGrath; +Cc: Prerna Saxena, systemtap

On Fri, Jan 08, 2010 at 05:44:57PM -0800, Roland McGrath wrote:
> > If my understanding is correct, this is a suggestion that demands an
> > 'overcommit' feature (ability to accept requests more than the available
> > debug registers) in hw-breakpoints, right?
> 
> Actually that's exactly not what I was talking about.  It's an
> interesting subject to consider (I was the one who originally
> proposed such complexity for hw_breakpoint at its inception).  
> 
> But all I was talking about here is what a stap module can do when a
> "allocate it now and forever" call fails.  (This might be at script
> startup time, or might be later during a module's lifetime if the
> putative script-driven dynamic registration feature were used.)
> 

Yes, I remember reading-through the discussions you had with Alan Stern
as a result of which his code, originally, had the over-commit and
prioritisation feature but was subsequently removed during a re-design
as demanded by the community/maintainer. Post perf-events integration,
the over-commit is available in the form of 'un-pinned' requests.

> > In its new form (post perf-events integration), hw-breakpoints can
> > indeed accept new requests that far exceed the number of underlying
> > debug registers. This can be achieved by making an 'un-pinned' breakpoint
> > request, where every such request gets a chance to use the debug
> > register in a round-robin fashion (all this is provided by perf-events
> > infrastructure anyway).
> 
> I don't understand what "round-robin" means for breakpoints.  When
> you let the kernel execute normally again after registration, either
> my breakpoint is enabled or it isn't.  There is no meaningful sense
> in which you can "time-share" a hardware breakpoint slot.  (That is,
> except for doing per-task registrations, which is a different
> semantics entirely.)
>

As I said before, it is a 'feature' 'acquired' by breakpoints by virtue
of using the perf-events layer underneath. One can think of it remotely
being useful to profile (for reads/writes) a large number of addresses
simultaneously (reference:LKML message-id:1248856132.6987.3034.camel@twins
and related thread) to 'statistically' indicate a hit-count for the same;
however disastrous to use in debugging roles.

> > Presently, the breakpoint infrastructure does not provide callbacks that
> > can be invoked whenever an 'un-pinned' breakpoint request is
> > scheduled-in/out (analogous to .enabled and .disabled). We could pursue
> > to get support for the same (of course, that would require a good
> > in-kernel user to convince the community!).
> 
> I am having trouble imagining what any kind of "scheduled-in/out"
> could possibly useful to do at all if you don't notify me about
> whether or not my breakpoint is in place.  A "best effort"
> breakpoint, that might be caught and might not be and who knows
> whether it's really installed, is just not useful.  I must be
> missing the essence of what you mean.
> 

Unfortunately, what you've described above as "best effort" breakpoint
is what 'un-pinned' breakpoint now is. Again, it is a carry-over from
perf-events where the said behaviour suits fine for other performance
monitoring counters (provided by the PMU on Intel x86 processors).

There isn't an in-kernel user for 'un-pinned' breakpoint requests at
present and perhaps when such need arises, a notifier/callback
implementation during sched-in/out would also follow.

> The thing I had talked about before was each hw_breakpoint
> registration having a priority number.  When another registration
> comes along with a higher priority, it can boot yours and make a
> callback so you know it's been stolen.  Conversely, when a competing
> registration goes away, the highest-priority registration-in-waiting
> gets installed and gets a callback to tell you it's now active.  (I
> guess really there could just be a single notifier list that gets
> called when a slot becomes free, so the suitors can try again to see
> whose priority wins.  Whatever.)  But you've said this is not what
> it does now.
> 

Yes, priority based hw-breakpoint registration (and queueing of new
requests) is no longer there.

> Anyway, that kind of dynamicism is not what I was talking about here.
> If we had it, the script-language features might look rather similar
> and so work how you're talking about here.  i.e., the .enabled and
> .disabled probes firing due to an hw_breakpoint layer callback that is
> "spontaneous" to the stap module's eyes, i.e. driven by the ebb and
> flow of external demand on the scarce shared resource.
> 
> In the examples I gave, the .unavailable sub-probe in the simplest
> form is just a translation-time (tapset sugar) way to do some implicit
> script-level stuff at 'probe begin' time, but conditional on whether
> the associated hw_breakpoint registration at startup succeeded.
> 
> In the example with dynamic (i.e. runtime script-controlled)
> registration, the .begin and .end sub-probes are just tapset sugar for
> some script-level stuff to do implicitly when script code uses "enable
> watch_foo" and "disable watch_foo".  For that use, you might have a
> .unavailable sub-probe that runs when "enable" fails, or you might
> just have "enable" return a testable success/failure to the script
> code or whatnot.
> 

While hw-breakpoint APIs could originally be enabled from most contexts,
I'm unsure about the same post perf-integration. As Prerna stated, it
needs to be verified to know if (un)register_<> breakpoint interfaces
can be successfully invoked from a kprobe handler context.

> Given all that, I can imagine wanting the tie-in for some kind of
> "breakpoint scheduling" to be that "enable" has "enable-now-or-fail"
> and "enable-now-or-later" variants.  Then in the "now or later"
> variant, those same .begin and .end probes might get run during the
> lifetime of a script (due to an hw_breakpoint layer callback) rather
> than only at "enable foo" and "disable foo" time.
>

The kernel hw-breakpoint API supports only a "enable-now-or-fail"
variant and I think that the script cannot implement
"enable-now-or-later" without kernel support (to notify a vacated debug
register).

This means all 'pinned' breakpoint requests beyond HBP_NUM
(HBP_NUM = number of debug registers per-CPU) would always fail and
must fall-back to .unavailable probe. The failure can happen early in
case of pre-existing breakpoint requests.

So, is the .unavailable probe's role just to warn the user that his
request has failed? Or do you see more uses for the same (that isn't
obvious to me)?

Thanks,
K.Prasad

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC] Systemtap translator support for hardware breakpoints on
  2010-01-09  2:46       ` K.Prasad
@ 2010-01-10 21:37         ` Roland McGrath
  0 siblings, 0 replies; 31+ messages in thread
From: Roland McGrath @ 2010-01-10 21:37 UTC (permalink / raw)
  To: prasad; +Cc: Prerna Saxena, systemtap

> So, is the .unavailable probe's role just to warn the user that his
> request has failed? Or do you see more uses for the same (that isn't
> obvious to me)?

The example I had meant to suggest was writing a tapset or script that
would like to use a watchpoint to implement some particular higher-level
tracing, but is prepared to fall back to a different technique if it is not
possible to install a watchpoint.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [RFC V2] Systemtap translator support for kernel hardware breakpoints
  2010-01-07 18:01 ` Frank Ch. Eigler
  2010-01-07 21:57   ` Roland McGrath
  2010-01-08 14:35   ` [RFC] Systemtap translator support for hw breakpoints on x86 Prerna Saxena
@ 2010-01-25 19:00   ` Prerna Saxena
  2010-01-26 21:16     ` Frank Ch. Eigler
  2 siblings, 1 reply; 31+ messages in thread
From: Prerna Saxena @ 2010-01-25 19:00 UTC (permalink / raw)
  To: systemtap; +Cc: Frank Ch. Eigler

[-- Attachment #1: Type: text/plain, Size: 2300 bytes --]

The previous version of patches courted a very informative discussion on 
what we could do with hardware breakpoints using systemtap. I'm 
attaching a new set of patches to enable systemtap translator supported 
for hardware breakpoints on x86 / x86_64. The basic constructs 
intorduced earlier still stand :

probe kernel.data(ADDRESS).write
probe kernel.data(ADDRESS).rw
probe kernel.data(ADDRESS).length(LEN).write
probe kernel.data(ADDRESS).length(LEN).rw
probe kernel.data("SYMBOL_NAME").write
probe kernel.data("SYMBOL_NAME").rw

The patch is based on the mainline kernel. The above constructs do not 
depend on dwarf, and hence can be used even on kernels lacking debuginfo.

Patches :
hwbkpt.patch : Code changes for systemtap translator.
hwbkpt-misc.patch : Manpage updates, NEWS, testcase.

Changes from earlier version :
1. The script approximates user-specified lengths to the next largest 
request the architecture can support, in case the length is found to be 
invalid. An error is reported only if the length is too large for the 
architecture to support. (The earlier response was to flag an error and 
ignore the probe for all non-supported lengths.)

2. The script translation fails if the number of hardware breakpoint 
probes exceeds the number of debug registers for a given architecture.

3. For now, probing of a kernel function's local variables is not 
possible. Also, with export of kallsyms_lookup_name(), systemtap 
generated module can directly decode symbol address at run-time. IMO, 
use of dwarf routines seems an overkill for now.

What remains to be done (TODO):
1. Enabling systemtap hardware breakpoint probes for other architectures 
(powerpc/S390, etc) (The kernel hardware breakpoint API for these is not 
frozen as yet)
2. New construct : probe kernel.data(ADDRESS).change{}, which gets 
triggered each time a write on ADDRESS actually updates its value.
3. Dynamic enabling / disabling a hardware breakpoint handler from a 
kprobe context. (This will in turn allow one to set breakpoints on local 
variables of kernel functions)
4. Updation of systemtap language reference with the above constructs.( 
hwbkpt-misc.patch updates man pages only)

Awaiting feedback,

-- 
Prerna Saxena

Linux Technology Centre,
IBM Systems and Technology Lab,
Bangalore, India

[-- Attachment #2: hwbkpt.patch --]
[-- Type: text/x-diff, Size: 18326 bytes --]

Signed-off-by: Prerna Saxena <prerna@linux.vnet.ibm.com>

Index: systemtap/tapsets.cxx
===================================================================
--- systemtap.orig/tapsets.cxx
+++ systemtap/tapsets.cxx
@@ -5319,7 +5319,373 @@ kprobe_builder::build(systemtap_session 
     }
 }
 
+// ------------------------------------------------------------------------
+//  Hardware breakpoint based probes.
+// ------------------------------------------------------------------------
+
+static const string TOK_HWBKPT("data");
+static const string TOK_HWBKPT_WRITE("write");
+static const string TOK_HWBKPT_RW("rw");
+static const string TOK_LENGTH("length");
+
+#define HWBKPT_READ 0
+#define HWBKPT_WRITE 1
+#define HWBKPT_RW 2
+struct hwbkpt_derived_probe: public derived_probe
+{
+  hwbkpt_derived_probe (probe *base,
+                        probe_point *location,
+                        uint64_t addr,
+			string symname,
+			unsigned int len,
+			bool has_only_read_access,
+			bool has_only_write_access,
+			bool has_rw_access
+                        );
+  Dwarf_Addr hwbkpt_addr;
+  string symbol_name;
+  unsigned int hwbkpt_access,hwbkpt_len;
+
+  void printsig (std::ostream &o) const;
+  void join_group (systemtap_session& s);
+};
+
+struct hwbkpt_derived_probe_group: public derived_probe_group
+{
+  unsigned int max_hwbkpt_probes_by_arch;
+
+private:
+  vector<hwbkpt_derived_probe*> hwbkpt_probes_vector;
+  typedef vector<hwbkpt_derived_probe*>::iterator h_p_v_iterator;
+
+public:
+  void enroll (hwbkpt_derived_probe* probe, systemtap_session& s);
+  void emit_module_decls (systemtap_session& s);
+  void emit_module_init (systemtap_session& s);
+  void emit_module_exit (systemtap_session& s);
+};
+
+hwbkpt_derived_probe::hwbkpt_derived_probe (probe *base,
+					    probe_point *location,
+					    uint64_t addr,
+					    string symname,
+					    unsigned int len,
+					    bool has_only_read_access,
+					    bool has_only_write_access,
+					    bool has_rw_access
+					    ):
+  derived_probe (base, location),
+  hwbkpt_addr (addr),
+  symbol_name (symname),
+  hwbkpt_len (len)
+{
+  this->tok = base->tok;
+
+  vector<probe_point::component*> comps;
+  comps.push_back (new probe_point::component(TOK_KERNEL));
+
+  if (hwbkpt_addr)
+	  comps.push_back (new probe_point::component (TOK_HWBKPT, new literal_number(hwbkpt_addr)));
+  else
+	if (symbol_name.size())
+	  comps.push_back (new probe_point::component (TOK_HWBKPT, new literal_string(symbol_name)));
+
+  comps.push_back (new probe_point::component (TOK_LENGTH, new literal_number(hwbkpt_len)));
+
+  if (has_only_read_access)
+	this->hwbkpt_access = HWBKPT_READ ;
+//TODO add code for comps.push_back for read, since this flag is not for x86
+
+  else
+	{
+	  if (has_only_write_access)
+		{
+		  this->hwbkpt_access = HWBKPT_WRITE ;
+	  	  comps.push_back (new probe_point::component(TOK_HWBKPT_WRITE));
+		}
+	  else
+		{
+		  this->hwbkpt_access = HWBKPT_RW ;
+	  	  comps.push_back (new probe_point::component(TOK_HWBKPT_RW));
+		}
+	}
+
+  this->sole_location()->components = comps;
+}
+
+void hwbkpt_derived_probe::printsig (ostream& o) const
+{
+  sole_location()->print (o);
+  printsig_nested (o);
+}
+
+void hwbkpt_derived_probe::join_group (systemtap_session& s)
+{
+  if (! s.hwbkpt_derived_probes) {
+        s.hwbkpt_derived_probes = new hwbkpt_derived_probe_group ();
+	if (s.architecture == "i386" || s.architecture == "x86_64" )
+			s.hwbkpt_derived_probes->max_hwbkpt_probes_by_arch = 4;
+	else {
+			s.hwbkpt_derived_probes->max_hwbkpt_probes_by_arch = 0;
+			throw semantic_error ("Hardware Breakpoints not supported on arch " + s.architecture);
+		}
+	}
+  s.hwbkpt_derived_probes->enroll (this, s);
+}
+
+void hwbkpt_derived_probe_group::enroll (hwbkpt_derived_probe* p, systemtap_session& s)
+{
+  if (hwbkpt_probes_vector.size() < max_hwbkpt_probes_by_arch )
+	hwbkpt_probes_vector.push_back (p);
+  else  {
+	  std::stringstream hwbkpt_err_msg;
+	  hwbkpt_err_msg << "No of Hardware breakpoint probes in script exceeds the number of hardware breakpoints that the architecture can support : max " << max_hwbkpt_probes_by_arch << " on " << s.architecture ;
+	  throw semantic_error (hwbkpt_err_msg.str());
+	}
+}
+
+void
+hwbkpt_derived_probe_group::emit_module_decls (systemtap_session& s)
+{
+  if (hwbkpt_probes_vector.empty()) return;
+
+  s.op->newline() << "/* ---- hwbkpt-based probes ---- */";
+
+  // Warn of misconfigured kernels
+  s.op->newline() << "#ifndef CONFIG_HAVE_HW_BREAKPOINT";
+  s.op->newline() << "#error \"Need CONFIG_HAVE_HW_BREAKPOINT!\"";
+  s.op->newline() << "#endif";
+
+  s.op->newline() << "#include <linux/perf_event.h>";
+  s.op->newline() << "#include <linux/hw_breakpoint.h>";
+  s.op->newline();
+
+// Define macros for access type
+  s.op->newline() << "#define HWBKPT_READ 0";
+  s.op->newline() << "#define HWBKPT_WRITE 1";
+  s.op->newline() << "#define HWBKPT_RW 2";
+
+  // Forward declare the master entry functions
+  s.op->newline() << "static int enter_hwbkpt_probe (struct perf_event *bp,";
+  s.op->line() << " int nmi,";
+  s.op->line() << " struct perf_sample_data *data,";
+  s.op->line() << " struct pt_regs *regs);";
+
+  // Emit the actual probe list.
+
+  s.op->newline() << "static struct perf_event_attr ";
+  s.op->newline() << "stap_hwbkpt_probe_array[" << hwbkpt_probes_vector.size() << "];";
+
+  s.op->newline() << "static struct perf_event **";
+  s.op->newline() << "stap_hwbkpt_ret_array[" << hwbkpt_probes_vector.size() << "];";
+  s.op->newline() << "static struct stap_hwbkpt_probe {";
+  s.op->newline() << "int registered_p:1;";
+// registered_p =  0 signifies a probe that failed registration
+// registered_p =  1 signifies a probe that got registered successfully
+
+  // Probe point & Symbol Names are mostly small and uniform enough
+  // to justify putting char[MAX]'s into  the array instead of
+  // relocated char*'s.
+
+  size_t pp_name_max = 0 , symbol_name_max = 0;
+  size_t pp_name_tot = 0 , symbol_name_tot = 0;
+  for (unsigned int it = 0; it < hwbkpt_probes_vector.size(); it++)
+    {
+      hwbkpt_derived_probe* p = hwbkpt_probes_vector.at(it);
+#define DOIT(var,expr) do {                             \
+        size_t var##_size = (expr) + 1;                 \
+        var##_max = max (var##_max, var##_size);        \
+        var##_tot += var##_size; } while (0)
+      DOIT(pp_name, lex_cast_qstring(*p->sole_location()).size());
+      DOIT(symbol_name, p->symbol_name.size());
+#undef DOIT
+    }
+
+#define CALCIT(var)                                                     \
+  s.op->newline() << "const char " << #var << "[" << var##_name_max << "] ;";
+  CALCIT(pp);
+  CALCIT(symbol);
+#undef CALCIT
+
+  s.op->newline() << "const unsigned long address;";
+  s.op->newline() << "uint8_t atype;";
+  s.op->newline() << "uint8_t len;";
+  s.op->newline() << "void (* const ph) (struct context*);";
+  s.op->newline() << "} stap_hwbkpt_probes[] = {";
+  s.op->indent(1);
+
+  for (unsigned int it = 0; it < hwbkpt_probes_vector.size(); it++)
+    {
+      hwbkpt_derived_probe* p = hwbkpt_probes_vector.at(it);
+      s.op->newline() << "{";
+//      s.op->line() << " .registered_p=1,";
+      if (p->symbol_name.size())
+      s.op->line() << " .address=(unsigned long)0x0" << "ULL,";
+      else
+      s.op->line() << " .address=(unsigned long)0x" << hex << p->hwbkpt_addr << dec << "ULL,";
+      s.op->line() << " .atype=" << p->hwbkpt_access << ",";
+      s.op->line() << " .len=" << p->hwbkpt_len << ",";
+      s.op->line() << " .pp=" << lex_cast_qstring (*p->sole_location()) << ",";
+      s.op->line() << " .symbol=\"" << p->symbol_name << "\",";
+      s.op->line() << " .ph=&" << p->name << "";
+      s.op->line() << " },";
+    }
+  s.op->newline() << "};";
+
+  // Emit the hwbkpt callback function
+  s.op->newline() ;
+  s.op->newline() << "static int enter_hwbkpt_probe (struct perf_event *bp,";
+  s.op->line() << " int nmi,";
+  s.op->line() << " struct perf_sample_data *data,";
+  s.op->line() << " struct pt_regs *regs) {";
+  s.op->newline() << "unsigned int i;";
+  s.op->newline() << "if (bp->attr.type != PERF_TYPE_BREAKPOINT )";
+  s.op->newline() << "return -1;";
+  s.op->newline() << "for ( i=0; i<" << hwbkpt_probes_vector.size() << "; i++) {";
+  s.op->newline() << "struct perf_event_attr *hp = & stap_hwbkpt_probe_array[i];";
+  s.op->newline() << "if (bp->attr.bp_addr==hp->bp_addr && bp->attr.bp_type==hp->bp_type && bp->attr.bp_len==hp->bp_len ) {";
+  s.op->newline() << "struct stap_hwbkpt_probe *sdp = &stap_hwbkpt_probes[i];";
+  common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "sdp->pp");
+  s.op->newline() << "c->regs = regs;";
+  s.op->newline() << "(*sdp->ph) (c);";
+  common_probe_entryfn_epilogue (s.op);
+  s.op->newline() << "return 0;";
+  s.op->newline() << "}";
+  s.op->newline(-1) << "}";
+  s.op->newline() << "return 0;";
+  s.op->newline() << "}";
+}
+
+void
+hwbkpt_derived_probe_group::emit_module_init (systemtap_session& s)
+{
+  s.op->newline() << "for (i=0; i<" << hwbkpt_probes_vector.size() << "; i++) {";
+  s.op->newline(1) << "struct stap_hwbkpt_probe *sdp = & stap_hwbkpt_probes[i];";
+  s.op->newline() << "struct perf_event_attr *hp = & stap_hwbkpt_probe_array[i];";
+  s.op->newline() << "void *addr = (void *) sdp->address;";
+  s.op->newline() << "const char *hwbkpt_symbol_name = addr ? NULL : sdp->symbol;";
+  s.op->newline() << "	hw_breakpoint_init(hp);";
+  s.op->newline() << "	if (addr) ";
+  s.op->newline() << "	    hp->bp_addr = (unsigned long) addr;";
+  s.op->newline() << "	else { ";
+  s.op->newline() << "	   hp->bp_addr = kallsyms_lookup_name(hwbkpt_symbol_name);";
+  s.op->newline() << "	if (!hp->bp_addr) { ";
+  s.op->newline() << " 	_stp_warn(\"Probe %s registration skipped : invalid symbol %s \",sdp->pp,hwbkpt_symbol_name);";
+  s.op->newline() << "	sdp->registered_p = 0;";
+  s.op->newline() << "	stap_hwbkpt_ret_array[i] = ERR_PTR(-EINVAL);";
+  s.op->newline() << "		} ";
+  s.op->newline() << "	} ";
+  s.op->newline() << "#ifdef CONFIG_X86";
+  s.op->newline() << "  switch(sdp->len) {";
+  s.op->newline() << "	case 1:";
+  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_1;";
+  s.op->newline() << "    break;";
+  s.op->newline() << "	case 2:";
+  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_2;";
+  s.op->newline() << "    break;";
+  s.op->newline() << "	case 3:";
+  s.op->newline() << "	case 4:";
+  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_4;";
+  s.op->newline() << "    break;";
+  s.op->newline() << "#ifdef CONFIG_X86_64";
+  s.op->newline() << "	case 5:";
+  s.op->newline() << "	case 6:";
+  s.op->newline() << "	case 7:";
+  s.op->newline() << "	case 8:";
+  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_8;";
+  s.op->newline() << "    break;";
+  s.op->newline() << "#endif /*CONFIG_X86_64*/";
+  s.op->newline() << "	default:";
+  s.op->newline() << " 	_stp_warn(\"Probe %s registration skipped : unsupported length. Supported lengths for x86 are 1,2,4 {8 for x86_84 only} \",sdp->pp);";
+  s.op->newline() << "	sdp->registered_p = 0;";
+  s.op->newline() << "	stap_hwbkpt_ret_array[i] = ERR_PTR(-EINVAL);";
+  s.op->newline() << "	}";
+  s.op->newline() << "  switch(sdp->atype)";
+  s.op->newline() << "	{";
+  s.op->newline() << "    case HWBKPT_READ:";
+  s.op->newline() << "  	_stp_warn(\"Probe %s registration skipped : READ-ONLY hardware breakpoints not supported on x86\",sdp->pp);";
+  s.op->newline() << "		sdp->registered_p = 0;";
+  s.op->newline() << "		stap_hwbkpt_ret_array[i] = ERR_PTR(-EINVAL);";
+  s.op->newline() << "		break;";
+  s.op->newline() << "    case HWBKPT_WRITE:";
+  s.op->newline() << "		hp->bp_type = HW_BREAKPOINT_W;";
+  s.op->newline() << "		break;";
+  s.op->newline() << "    case HWBKPT_RW:";
+  s.op->newline() << "		hp->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;";
+  s.op->newline() << "		break;";
+  s.op->newline() << "  }";
+  s.op->newline() << "#endif /*CONFIG_X86*/";
+  s.op->newline() << "probe_point = sdp->pp;"; // for error messages
+  s.op->newline() << "if ( !IS_ERR(stap_hwbkpt_ret_array[i]) ) {";
+  s.op->newline() << " stap_hwbkpt_ret_array[i] = register_wide_hw_breakpoint( hp, (void *)&enter_hwbkpt_probe );";
+  s.op->newline() << " if (IS_ERR(stap_hwbkpt_ret_array[i])) {";
+  s.op->newline() << "     rc = PTR_ERR(stap_hwbkpt_ret_array[i]);";
+  s.op->newline() << "      sdp->registered_p = 0;";
+  s.op->newline(1) << "	    _stp_warn(\" Hwbkpt Probe %s : Registration error rc= %d, Addr = %p, name = %s\", probe_point, rc, addr, hwbkpt_symbol_name);";
+  s.op->newline(-1) << "  }";
+  s.op->newline() << "    else sdp->registered_p = 1;";
+  s.op->newline() << "}";
+  s.op->newline(-1) << "}"; // for loop
+}
+
+void
+hwbkpt_derived_probe_group::emit_module_exit (systemtap_session& s)
+{
+  //Unregister hwbkpt probes.
+  s.op->newline() << "for (i=0; i<" << hwbkpt_probes_vector.size() << "; i++) {";
+  s.op->newline(1) << "struct stap_hwbkpt_probe *sdp = & stap_hwbkpt_probes[i];";
+  s.op->newline() << " if ( IS_ERR(stap_hwbkpt_ret_array[i]) ) continue;";
+//  s.op->newline() << " if ( sdp->registered_p == 0 ) continue;";
+  s.op->newline() << " unregister_wide_hw_breakpoint(stap_hwbkpt_ret_array[i]);";
+  s.op->newline() << "sdp->registered_p = 0;";
+  s.op->newline(-1) << "}";
+}
 
+struct hwbkpt_builder: public derived_probe_builder
+{
+  hwbkpt_builder() {}
+  virtual void build(systemtap_session & sess,
+		     probe * base,
+		     probe_point * location,
+		     literal_map_t const & parameters,
+		     vector<derived_probe *> & finished_results);
+};
+
+void
+hwbkpt_builder::build(systemtap_session & sess,
+		      probe * base,
+		      probe_point * location,
+		      literal_map_t const & parameters,
+		      vector<derived_probe *> & finished_results)
+{
+  string symbol_str_val;
+  int64_t hwbkpt_address, len;
+  bool has_addr, has_symbol_str, has_write, has_rw, has_len;
+
+  has_addr = get_param (parameters, TOK_HWBKPT, hwbkpt_address);
+  has_symbol_str = get_param (parameters, TOK_HWBKPT, symbol_str_val);
+  has_len = get_param (parameters, TOK_LENGTH, len);
+  has_write = (parameters.find(TOK_HWBKPT_WRITE) != parameters.end());
+  has_rw = (parameters.find(TOK_HWBKPT_RW) != parameters.end());
+
+  if (!has_len)
+	len = 1;
+
+  if (has_addr)
+      finished_results.push_back (new hwbkpt_derived_probe (base,
+							    location,
+							    hwbkpt_address,
+							    "",len,0,
+							    has_write,
+							    has_rw));
+  else // has symbol_str
+      finished_results.push_back (new hwbkpt_derived_probe (base,
+							    location,
+							    0,
+							    symbol_str_val,len,0,
+							    has_write,
+							    has_rw));
+}
 
 // ------------------------------------------------------------------------
 // statically inserted kernel-tracepoint derived probes
@@ -5811,7 +6177,7 @@ tracepoint_derived_probe_group::emit_mod
         }
       s.op->line() << ") {";
       s.op->indent(1);
-      common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", 
+      common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING",
                                      lex_cast_qstring (*p->sole_location()));
       s.op->newline() << "c->marker_name = "
                       << lex_cast_qstring (p->tracepoint_name)
@@ -6140,7 +6506,6 @@ tracepoint_builder::init_dw(systemtap_se
   return true;
 }
 
-
 void
 tracepoint_builder::build(systemtap_session& s,
                           probe *base, probe_point *location,
@@ -6158,7 +6523,6 @@ tracepoint_builder::build(systemtap_sess
 }
 
 
-
 // ------------------------------------------------------------------------
 //  Standard tapset registry.
 // ------------------------------------------------------------------------
@@ -6205,6 +6569,21 @@ register_standard_tapsets(systemtap_sess
      ->bind_num(TOK_MAXACTIVE)->bind(new kprobe_builder());
   s.pattern_root->bind(TOK_KPROBE)->bind_num(TOK_STATEMENT)
       ->bind(TOK_ABSOLUTE)->bind(new kprobe_builder());
+
+  //Hwbkpt based probe
+  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
+	->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder());
+  s.pattern_root->bind(TOK_KERNEL)->bind_str(TOK_HWBKPT)
+	->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder());
+  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
+	->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder());
+  s.pattern_root->bind(TOK_KERNEL)->bind_str(TOK_HWBKPT)
+	->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder());
+  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
+	->bind_num(TOK_LENGTH)->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder());
+  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
+	->bind_num(TOK_LENGTH)->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder());
+  // length supported with address only, not symbol names
 }
 
 
@@ -6231,6 +6610,7 @@ all_session_groups(systemtap_session& s)
   DOONE(mark);
   DOONE(tracepoint);
   DOONE(kprobe);
+  DOONE(hwbkpt);
   DOONE(hrtimer);
   DOONE(perfmon);
   DOONE(procfs);
Index: systemtap/session.h
===================================================================
--- systemtap.orig/session.h
+++ systemtap/session.h
@@ -31,6 +31,7 @@ struct derived_probe;
 struct be_derived_probe_group;
 struct dwarf_derived_probe_group;
 struct kprobe_derived_probe_group;
+struct hwbkpt_derived_probe_group;
 struct uprobe_derived_probe_group;
 struct utrace_derived_probe_group;
 struct itrace_derived_probe_group;
@@ -175,6 +176,7 @@ struct systemtap_session
   be_derived_probe_group* be_derived_probes;
   dwarf_derived_probe_group* dwarf_derived_probes;
   kprobe_derived_probe_group* kprobe_derived_probes;
+  hwbkpt_derived_probe_group* hwbkpt_derived_probes;
   uprobe_derived_probe_group* uprobe_derived_probes;
   utrace_derived_probe_group* utrace_derived_probes;
   itrace_derived_probe_group* itrace_derived_probes;
Index: systemtap/elaborate.cxx
===================================================================
--- systemtap.orig/elaborate.cxx
+++ systemtap/elaborate.cxx
@@ -1523,6 +1523,7 @@ systemtap_session::systemtap_session ():
   be_derived_probes(0),
   dwarf_derived_probes(0),
   kprobe_derived_probes(0),
+  hwbkpt_derived_probes(0),
   uprobe_derived_probes(0),
   utrace_derived_probes(0),
   itrace_derived_probes(0),

[-- Attachment #3: hwbkpt-misc.patch --]
[-- Type: text/x-diff, Size: 3349 bytes --]

Signed-off-by: Prerna Saxena <prerna@linux.vnet.ibm.com>

Index: systemtap/stapprobes.3stap.in
===================================================================
--- systemtap.orig/stapprobes.3stap.in
+++ systemtap/stapprobes.3stap.in
@@ -660,6 +660,55 @@ and a string of name=value pairs for all
 is available in
 .BR $$vars " or " $$parms .
 
+.SS HARDWARE BREAKPOINTS
+This family of probes is used to set hardware watchpoints for a given
+ (global) kernel symbol. The probes take three components as inputs :
+
+1. The
+.BR virtual address / name
+of the kernel symbol to be traced is supplied as argument to this class
+of probes. ( Probes for only data segment variables are supported. Probing
+local variables of a function cannot be done.)
+
+2. Nature of access to be probed :
+a.
+.I .write
+probe gets triggered when a write happens at the specified address/symbol
+name.
+b.
+.I rw
+probe is triggered when either a read or write happens.
+
+3.
+.BR .length
+(optional)
+Users have the option of specifying the address interval to be probed
+using "length" constructs. The user-specified length gets approximated
+to the closest possible address length that the architecture can
+support. If the specified length exceeds the limits imposed by
+architecture, an error message is flagged and probe registration fails.
+Wherever 'length' is not specified, the translator requests a hardware
+breakpoint probe of length 1. It should be noted that the "length"
+construct is not valid with symbol names.
+
+Following constructs are supported :
+.SAMPLE
+probe kernel.data(ADDRESS).write
+probe kernel.data(ADDRESS).rw
+probe kernel.data(ADDRESS).length(LEN).write
+probe kernel.data(ADDRESS).length(LEN).rw
+probe kernel.data("SYMBOL_NAME").write
+probe kernel.data("SYMBOL_NAME").rw
+.ESAMPLE
+
+This set of probes make use of the debug registers of the processor,
+which is a scarce resource. (4 on x86 , 1 on powerpc ) The script
+translation fails if a user sets more hardware breakpoint probes than
+the limits set by architecture. For example,a pass-2 error is flagged
+when an input script requests 5 hardware breakpoint probes on an x86
+system while x86 architecture supports a maximum of 4 breakpoints.
+Users are cautioned to set probes judiciously.
+
 .SH EXAMPLES
 .PP
 Here are some example probe points, defining the associated events.
@@ -696,6 +745,9 @@ refers to the statement of line 2917 wit
 kernel.statement("bio_init@fs/bio.c+3")
 refers to the statement at line bio_init+3 within "fs/bio.c".
 .TP
+kernel.data("pid_max").write
+refers to a hardware preakpoint of type "write" set on pid_max
+.TP
 syscall.*.return
 refers to the group of probe aliases with any name in the third position
 
Index: systemtap/testsuite/buildok/hwbkpt.stp
===================================================================
--- /dev/null
+++ systemtap/testsuite/buildok/hwbkpt.stp
@@ -0,0 +1,4 @@
+#! stap -wp4
+
+probe kernel.data("pid_max").write {}
+probe kernel.data("pid_max").rw {}
Index: systemtap/NEWS
===================================================================
--- systemtap.orig/NEWS
+++ systemtap/NEWS
@@ -1,3 +1,5 @@
+- New : systemtap now supports probes for Hardware Breakpoints. For syntax details, see man page for stapprobes.
+
 * What's new in version 1.1
 
 - New tracepoint based tapset for memory subsystem.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC V2] Systemtap translator support for kernel hardware breakpoints
  2010-01-25 19:00   ` [RFC V2] Systemtap translator support for kernel hardware breakpoints Prerna Saxena
@ 2010-01-26 21:16     ` Frank Ch. Eigler
  2010-01-27 15:55       ` Prerna Saxena
  0 siblings, 1 reply; 31+ messages in thread
From: Frank Ch. Eigler @ 2010-01-26 21:16 UTC (permalink / raw)
  To: Prerna Saxena; +Cc: systemtap

Prerna Saxena <prerna@linux.vnet.ibm.com> writes:

> [...] The basic constructs intorduced earlier still stand :
>
> probe kernel.data(ADDRESS).write
> probe kernel.data(ADDRESS).rw
> probe kernel.data(ADDRESS).length(LEN).write
> probe kernel.data(ADDRESS).length(LEN).rw
> probe kernel.data("SYMBOL_NAME").write
> probe kernel.data("SYMBOL_NAME").rw

OK.

> The patch is based on the mainline kernel. The above constructs do not
> depend on dwarf, and hence can be used even on kernels lacking
> debuginfo.

OK.  The translator can detect the absence or presence of this
functionality, and should preclude translation without it.  Use
systemtap_session.kernel_config["CONFIG_HAVE_HW_BREAKPOINTS"]

> 2. The script translation fails if the number of hardware breakpoint
> probes exceeds the number of debug registers for a given architecture.

OK, though I suggest making this part a warning only.  This code has
no way to know what the actual limits are at the moment of execution.
They could easily be lower, and if perf etc. get some sort of magical
multiplexing, the limits could be higher.  The runtime should (but
doesn't yet - see below) handle errors such as overreservations
correctly.


> 3. For now, probing of a kernel function's local variables is not
> possible. Also, with export of kallsyms_lookup_name(), systemtap
> generated module can directly decode symbol address at run-time. IMO,
> use of dwarf routines seems an overkill for now.

OK, assuming that this re-exported function stays re-exported, there
may be no need to do an autoconf-level check for it, if the code
already checks for CONFIG_HAVE_HW_BREAKPOINTS.


> [...]
> 3. Dynamic enabling / disabling a hardware breakpoint handler from a
> kprobe context. (This will in turn allow one to set breakpoints on
> local variables of kernel functions)

We hope to address this general area more comprehensively for systemtap-1.2.

> +void hwbkpt_derived_probe::join_group (systemtap_session& s)
> +{
> +  if (! s.hwbkpt_derived_probes) {
> +        s.hwbkpt_derived_probes = new hwbkpt_derived_probe_group ();
> +	if (s.architecture == "i386" || s.architecture == "x86_64" )
> +			s.hwbkpt_derived_probes->max_hwbkpt_probes_by_arch = 4;
> +	else {
> +			s.hwbkpt_derived_probes->max_hwbkpt_probes_by_arch = 0;
> +			throw semantic_error ("Hardware Breakpoints not supported on arch " + s.architecture);
> +		}
> +	}
> +  s.hwbkpt_derived_probes->enroll (this, s);
> +}

I suggest removing this error - or turning it into a (conditional) warning.


> +  // Warn of misconfigured kernels
> +  s.op->newline() << "#ifndef CONFIG_HAVE_HW_BREAKPOINT";
> +  s.op->newline() << "#error \"Need CONFIG_HAVE_HW_BREAKPOINT!\"";
> +  s.op->newline() << "#endif";

If the code also checks for the kernel_config[] above, this check
should never fail.


> +// Define macros for access type
> +  s.op->newline() << "#define HWBKPT_READ 0";
> +  s.op->newline() << "#define HWBKPT_WRITE 1";
> +  s.op->newline() << "#define HWBKPT_RW 2";

Why, instead of just using the symbols already defined in the
kernel headers?


> +  size_t pp_name_max = 0 , symbol_name_max = 0;
> +  size_t pp_name_tot = 0 , symbol_name_tot = 0;
> +  for (unsigned int it = 0; it < hwbkpt_probes_vector.size(); it++)
> +    {
> +      hwbkpt_derived_probe* p = hwbkpt_probes_vector.at(it);
> +#define DOIT(var,expr) do {                             \
> +        size_t var##_size = (expr) + 1;                 \
> +        var##_max = max (var##_max, var##_size);        \
> +        var##_tot += var##_size; } while (0)
> +      DOIT(pp_name, lex_cast_qstring(*p->sole_location()).size());
> +      DOIT(symbol_name, p->symbol_name.size());
> +#undef DOIT
> +    }
> +
> +#define CALCIT(var)                                                     \
> +  s.op->newline() << "const char " << #var << "[" << var##_name_max << "] ;";
> +  CALCIT(pp);
> +  CALCIT(symbol);
> +#undef CALCIT

Considering that we're likely to have a small handful of such probes
in a script, this is all unnecessary optimization.  Just plop a char*
in there.



> +  s.op->newline() << "#ifdef CONFIG_X86";
> +  s.op->newline() << "  switch(sdp->len) {";
> +  s.op->newline() << "	case 1:";
> +  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_1;";
> +  s.op->newline() << "    break;";
> +  s.op->newline() << "	case 2:";
> +  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_2;";
> +  s.op->newline() << "    break;";
> +  s.op->newline() << "	case 3:";
> +  s.op->newline() << "	case 4:";
> +  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_4;";
> +  s.op->newline() << "    break;";
> +  s.op->newline() << "#ifdef CONFIG_X86_64";
> +  s.op->newline() << "	case 5:";
> +  s.op->newline() << "	case 6:";
> +  s.op->newline() << "	case 7:";
> +  s.op->newline() << "	case 8:";
> +  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_8;";
> +  s.op->newline() << "    break;";
> +  s.op->newline() << "#endif /*CONFIG_X86_64*/";

Are you sure this CONFIG_arch* stuff is needed?  Let the kernel
try 8-byte watchpoints, and let us handle the error.


> +  s.op->newline() << "if ( !IS_ERR(stap_hwbkpt_ret_array[i]) ) {";
> +  s.op->newline() << " stap_hwbkpt_ret_array[i] = register_wide_hw_breakpoint( hp, (void *)&enter_hwbkpt_probe );";
> +  s.op->newline() << " if (IS_ERR(stap_hwbkpt_ret_array[i])) {";
> +  s.op->newline() << "     rc = PTR_ERR(stap_hwbkpt_ret_array[i]);";
> +  s.op->newline() << "      sdp->registered_p = 0;";
> +  s.op->newline(1) << "	    _stp_warn(\" Hwbkpt Probe %s : Registration error rc= %d, Addr = %p, name = %s\", probe_point, rc, addr, hwbkpt_symbol_name);";
> +  s.op->newline(-1) << "  }";
> +  s.op->newline() << "    else sdp->registered_p = 1;";
> +  s.op->newline() << "}";
> +  s.op->newline(-1) << "}"; // for loop

I mentioned this in the previous review, but this is insufficient.
(Please review my earlier comments in case something else was missed.)
If the [i]th registration attempt fails, and you return an rc != 0 to
the wrapping code, this code right here must ensure that all
already-registered entries ([0..i-1]) have been unregistered.


> +  s.op->newline() << " if ( IS_ERR(stap_hwbkpt_ret_array[i]) ) continue;";
> +//  s.op->newline() << " if ( sdp->registered_p == 0 ) continue;";

Just use registered_p = 0 or 1.  You don't need to archive the error
codes.

> +  //Hwbkpt based probe
> +  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
> +	->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder());
> +  s.pattern_root->bind(TOK_KERNEL)->bind_str(TOK_HWBKPT)
> +	->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder());
> +  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
> +	->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder());
> +  s.pattern_root->bind(TOK_KERNEL)->bind_str(TOK_HWBKPT)
> +	->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder());
> +  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
> +	->bind_num(TOK_LENGTH)->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder());
> +  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
> +	->bind_num(TOK_LENGTH)->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder());
> +  // length supported with address only, not symbol names

This is probably the easiest place to wrap with a kernel_config[...]
conditional to disable this probe point family on unsupporting kernels.


- FChE

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC V2] Systemtap translator support for kernel hardware breakpoints
  2010-01-26 21:16     ` Frank Ch. Eigler
@ 2010-01-27 15:55       ` Prerna Saxena
  2010-01-27 17:12         ` Frank Ch. Eigler
  0 siblings, 1 reply; 31+ messages in thread
From: Prerna Saxena @ 2010-01-27 15:55 UTC (permalink / raw)
  To: Frank Ch. Eigler; +Cc: systemtap

[-- Attachment #1: Type: text/plain, Size: 8492 bytes --]

Hi Frank,
Thanks for the prompt review. Attaching a new version of patches that 
address your comments.

On 01/27/2010 02:46 AM, Frank Ch. Eigler wrote:

> OK.  The translator can detect the absence or presence of this
> functionality, and should preclude translation without it.  Use
> systemtap_session.kernel_config["CONFIG_HAVE_HW_BREAKPOINTS"]

Done.

>
>> 2. The script translation fails if the number of hardware breakpoint
>> probes exceeds the number of debug registers for a given architecture.
>
> OK, though I suggest making this part a warning only.  This code has
> no way to know what the actual limits are at the moment of execution.
> They could easily be lower, and if perf etc. get some sort of magical
> multiplexing, the limits could be higher.  The runtime should (but
> doesn't yet - see below) handle errors such as overreservations
> correctly.
>

Perf allows multiplexing only for non-pinned breakpoints, not the pinned 
ones used by systemtap. If this script is allowed to run with a warning, 
the runtime will register the first 'n' possible requests and reject the 
later ones. Is it not better to notify to the user that they are making 
more requests than the maximum limit imposed by hardware ? (more on this 
later)

>
>> 3. For now, probing of a kernel function's local variables is not
>> possible. Also, with export of kallsyms_lookup_name(), systemtap
>> generated module can directly decode symbol address at run-time. IMO,
>> use of dwarf routines seems an overkill for now.
>
> OK, assuming that this re-exported function stays re-exported, there
> may be no need to do an autoconf-level check for it, if the code
> already checks for CONFIG_HAVE_HW_BREAKPOINTS.
>

Agree.

>
>> [...]
>> 3. Dynamic enabling / disabling a hardware breakpoint handler from a
>> kprobe context. (This will in turn allow one to set breakpoints on
>> local variables of kernel functions)
>
> We hope to address this general area more comprehensively for systemtap-1.2.
>
>> +void hwbkpt_derived_probe::join_group (systemtap_session&  s)
>> +{
>> +  if (! s.hwbkpt_derived_probes) {
>> +        s.hwbkpt_derived_probes = new hwbkpt_derived_probe_group ();
>> +	if (s.architecture == "i386" || s.architecture == "x86_64" )
>> +			s.hwbkpt_derived_probes->max_hwbkpt_probes_by_arch = 4;
>> +	else {
>> +			s.hwbkpt_derived_probes->max_hwbkpt_probes_by_arch = 0;
>> +			throw semantic_error ("Hardware Breakpoints not supported on arch " + s.architecture);
>> +		}
>> +	}
>> +  s.hwbkpt_derived_probes->enroll (this, s);
>> +}
>
> I suggest removing this error - or turning it into a (conditional) warning.

I've elucidated this below.

>
>
>> +  // Warn of misconfigured kernels
>> +  s.op->newline()<<  "#ifndef CONFIG_HAVE_HW_BREAKPOINT";
>> +  s.op->newline()<<  "#error \"Need CONFIG_HAVE_HW_BREAKPOINT!\"";
>> +  s.op->newline()<<  "#endif";
> If the code also checks for the kernel_config[] above, this check
> should never fail.

Done.

>
>
>> +// Define macros for access type
>> +  s.op->newline()<<  "#define HWBKPT_READ 0";
>> +  s.op->newline()<<  "#define HWBKPT_WRITE 1";
>> +  s.op->newline()<<  "#define HWBKPT_RW 2";
>
> Why, instead of just using the symbols already defined in the
> kernel headers?
>
>

Removed this redundancy.

>> +#define CALCIT(var)                                                     \
>> +  s.op->newline()<<  "const char "<<  #var<<  "["<<  var##_name_max<<  "] ;";
>> +  CALCIT(pp);
>> +  CALCIT(symbol);
>> +#undef CALCIT
>
> Considering that we're likely to have a small handful of such probes
> in a script, this is all unnecessary optimization.  Just plop a char*
> in there.
>

Done.

>> +  s.op->newline()<<  "#ifdef CONFIG_X86";
>> +  s.op->newline()<<  "  switch(sdp->len) {";
>> +  s.op->newline()<<  "	case 1:";
>> +  s.op->newline()<<  "    hp->bp_len = HW_BREAKPOINT_LEN_1;";
>> +  s.op->newline()<<  "    break;";
>> +  s.op->newline()<<  "	case 2:";
>> +  s.op->newline()<<  "    hp->bp_len = HW_BREAKPOINT_LEN_2;";
>> +  s.op->newline()<<  "    break;";
>> +  s.op->newline()<<  "	case 3:";
>> +  s.op->newline()<<  "	case 4:";
>> +  s.op->newline()<<  "    hp->bp_len = HW_BREAKPOINT_LEN_4;";
>> +  s.op->newline()<<  "    break;";
>> +  s.op->newline()<<  "#ifdef CONFIG_X86_64";
>> +  s.op->newline()<<  "	case 5:";
>> +  s.op->newline()<<  "	case 6:";
>> +  s.op->newline()<<  "	case 7:";
>> +  s.op->newline()<<  "	case 8:";
>> +  s.op->newline()<<  "    hp->bp_len = HW_BREAKPOINT_LEN_8;";
>> +  s.op->newline()<<  "    break;";
>> +  s.op->newline()<<  "#endif /*CONFIG_X86_64*/";
>
> Are you sure this CONFIG_arch* stuff is needed?  Let the kernel
> try 8-byte watchpoints, and let us handle the error.
>

I cannot foresee the advantage of requesting an erroneous breakpoint, 
only to see the request fail later. Could you pls let me know if there 
are any specific issues wrt above, apart from the hassle of 
arch-specific checks?

>> +  s.op->newline()<<  "if ( !IS_ERR(stap_hwbkpt_ret_array[i]) ) {";
>> +  s.op->newline()<<  " stap_hwbkpt_ret_array[i] = register_wide_hw_breakpoint( hp, (void *)&enter_hwbkpt_probe );";
>> +  s.op->newline()<<  " if (IS_ERR(stap_hwbkpt_ret_array[i])) {";
>> +  s.op->newline()<<  "     rc = PTR_ERR(stap_hwbkpt_ret_array[i]);";
>> +  s.op->newline()<<  "      sdp->registered_p = 0;";
>> +  s.op->newline(1)<<  "	    _stp_warn(\" Hwbkpt Probe %s : Registration error rc= %d, Addr = %p, name = %s\", probe_point, rc, addr, hwbkpt_symbol_name);";
>> +  s.op->newline(-1)<<  "  }";
>> +  s.op->newline()<<  "    else sdp->registered_p = 1;";
>> +  s.op->newline()<<  "}";
>> +  s.op->newline(-1)<<  "}"; // for loop
>
> I mentioned this in the previous review, but this is insufficient.
> (Please review my earlier comments in case something else was missed.)
> If the [i]th registration attempt fails, and you return an rc != 0 to
> the wrapping code, this code right here must ensure that all
> already-registered entries ([0..i-1]) have been unregistered.

I beg to differ here. As is the case with dwarfless probes, is it not 
better to ignore an erroneous probe(that failed registration) and 
continue running the other probes that got successfully registered ? 
There might be numerous reasons for probe registration to fail -- 
incorrect length, bad symbol name, unsupported type (eg a read-only 
breakpoint on x86) apart from the unavailability of a debug register. I 
  dont see sense in penalizing the other valid probes just because of 
this singular, incorrect request. The registration failure is notified 
to the user via a warning. If it defeats the purpose of the script, the 
user has the option of tweaking his script.

Going by your suggestion, we are essentially allowing the user to 
overcommit breakpoint requests in systemtap scripts, and rejecting the 
script completely (unregistering _all_ probes) even if a singular 
request fails. Is it not a better idea to ration the number of 
breakpoint requests to start with, and then allow as many breakpoints in 
the script to get registered as possible -- and at runtime, filter out 
only invalid requests / requests failing because some debug registers 
have already been consumed ?

>> +  s.op->newline()<<  " if ( IS_ERR(stap_hwbkpt_ret_array[i]) ) continue;";
> Just use registered_p = 0 or 1.  You don't need to archive the error
> codes.

Done.

>> +  //Hwbkpt based probe
>> +  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
>> +	->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder());
>> +  s.pattern_root->bind(TOK_KERNEL)->bind_str(TOK_HWBKPT)
>> +	->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder());
>> +  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
>> +	->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder());
>> +  s.pattern_root->bind(TOK_KERNEL)->bind_str(TOK_HWBKPT)
>> +	->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder());
>> +  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
>> +	->bind_num(TOK_LENGTH)->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder());
>> +  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
>> +	->bind_num(TOK_LENGTH)->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder());
>> +  // length supported with address only, not symbol names
>
> This is probably the easiest place to wrap with a kernel_config[...]
> conditional to disable this probe point family on unsupporting kernels.

Thanks for the suggestion, I added a kernel_config() check here.


-- 
Prerna Saxena

Linux Technology Centre,
IBM Systems and Technology Lab,
Bangalore, India

[-- Attachment #2: hwbkpt.patch --]
[-- Type: text/x-diff, Size: 17051 bytes --]

Signed-off-by: Prerna Saxena <prerna@linux.vnet.ibm.com>

Index: stap-git-jan-25/tapsets.cxx
===================================================================
--- stap-git-jan-25.orig/tapsets.cxx
+++ stap-git-jan-25/tapsets.cxx
@@ -5319,7 +5319,342 @@ kprobe_builder::build(systemtap_session 
     }
 }
 
+// ------------------------------------------------------------------------
+//  Hardware breakpoint based probes.
+// ------------------------------------------------------------------------
+
+static const string TOK_HWBKPT("data");
+static const string TOK_HWBKPT_WRITE("write");
+static const string TOK_HWBKPT_RW("rw");
+static const string TOK_LENGTH("length");
+
+#define HWBKPT_READ 0
+#define HWBKPT_WRITE 1
+#define HWBKPT_RW 2
+struct hwbkpt_derived_probe: public derived_probe
+{
+  hwbkpt_derived_probe (probe *base,
+                        probe_point *location,
+                        uint64_t addr,
+			string symname,
+			unsigned int len,
+			bool has_only_read_access,
+			bool has_only_write_access,
+			bool has_rw_access
+                        );
+  Dwarf_Addr hwbkpt_addr;
+  string symbol_name;
+  unsigned int hwbkpt_access,hwbkpt_len;
+
+  void printsig (std::ostream &o) const;
+  void join_group (systemtap_session& s);
+};
+
+struct hwbkpt_derived_probe_group: public derived_probe_group
+{
+  unsigned int max_hwbkpt_probes_by_arch;
+
+private:
+  vector<hwbkpt_derived_probe*> hwbkpt_probes_vector;
+  typedef vector<hwbkpt_derived_probe*>::iterator h_p_v_iterator;
 
+public:
+  void enroll (hwbkpt_derived_probe* probe, systemtap_session& s);
+  void emit_module_decls (systemtap_session& s);
+  void emit_module_init (systemtap_session& s);
+  void emit_module_exit (systemtap_session& s);
+};
+
+hwbkpt_derived_probe::hwbkpt_derived_probe (probe *base,
+					    probe_point *location,
+					    uint64_t addr,
+					    string symname,
+					    unsigned int len,
+					    bool has_only_read_access,
+					    bool has_only_write_access,
+					    bool has_rw_access
+					    ):
+  derived_probe (base, location),
+  hwbkpt_addr (addr),
+  symbol_name (symname),
+  hwbkpt_len (len)
+{
+  this->tok = base->tok;
+
+  vector<probe_point::component*> comps;
+  comps.push_back (new probe_point::component(TOK_KERNEL));
+
+  if (hwbkpt_addr)
+	  comps.push_back (new probe_point::component (TOK_HWBKPT, new literal_number(hwbkpt_addr)));
+  else
+	if (symbol_name.size())
+	  comps.push_back (new probe_point::component (TOK_HWBKPT, new literal_string(symbol_name)));
+
+  comps.push_back (new probe_point::component (TOK_LENGTH, new literal_number(hwbkpt_len)));
+
+  if (has_only_read_access)
+	this->hwbkpt_access = HWBKPT_READ ;
+//TODO add code for comps.push_back for read, since this flag is not for x86
+
+  else
+	{
+	  if (has_only_write_access)
+		{
+		  this->hwbkpt_access = HWBKPT_WRITE ;
+	  	  comps.push_back (new probe_point::component(TOK_HWBKPT_WRITE));
+		}
+	  else
+		{
+		  this->hwbkpt_access = HWBKPT_RW ;
+	  	  comps.push_back (new probe_point::component(TOK_HWBKPT_RW));
+		}
+	}
+
+  this->sole_location()->components = comps;
+}
+
+void hwbkpt_derived_probe::printsig (ostream& o) const
+{
+  sole_location()->print (o);
+  printsig_nested (o);
+}
+
+void hwbkpt_derived_probe::join_group (systemtap_session& s)
+{
+  if (! s.hwbkpt_derived_probes) {
+        s.hwbkpt_derived_probes = new hwbkpt_derived_probe_group ();
+	if (s.architecture == "i386" || s.architecture == "x86_64" )
+			s.hwbkpt_derived_probes->max_hwbkpt_probes_by_arch = 4;
+	else {
+			s.hwbkpt_derived_probes->max_hwbkpt_probes_by_arch = 0;
+			throw semantic_error ("Hardware Breakpoints not supported on arch " + s.architecture);
+		}
+	}
+  s.hwbkpt_derived_probes->enroll (this, s);
+}
+
+void hwbkpt_derived_probe_group::enroll (hwbkpt_derived_probe* p, systemtap_session& s)
+{
+  if (hwbkpt_probes_vector.size() < max_hwbkpt_probes_by_arch )
+	hwbkpt_probes_vector.push_back (p);
+  else  {
+	  std::stringstream hwbkpt_err_msg;
+	  hwbkpt_err_msg << "No of Hardware breakpoint probes in script exceeds the number of hardware breakpoints that the architecture can support : max " << max_hwbkpt_probes_by_arch << " on " << s.architecture ;
+	  throw semantic_error (hwbkpt_err_msg.str());
+	}
+}
+
+void
+hwbkpt_derived_probe_group::emit_module_decls (systemtap_session& s)
+{
+  if (hwbkpt_probes_vector.empty()) return;
+
+  s.op->newline() << "/* ---- hwbkpt-based probes ---- */";
+
+  s.op->newline() << "#include <linux/perf_event.h>";
+  s.op->newline() << "#include <linux/hw_breakpoint.h>";
+  s.op->newline();
+
+  // Forward declare the master entry functions
+  s.op->newline() << "static int enter_hwbkpt_probe (struct perf_event *bp,";
+  s.op->line() << " int nmi,";
+  s.op->line() << " struct perf_sample_data *data,";
+  s.op->line() << " struct pt_regs *regs);";
+
+  // Emit the actual probe list.
+
+  s.op->newline() << "static struct perf_event_attr ";
+  s.op->newline() << "stap_hwbkpt_probe_array[" << hwbkpt_probes_vector.size() << "];";
+
+  s.op->newline() << "static struct perf_event **";
+  s.op->newline() << "stap_hwbkpt_ret_array[" << hwbkpt_probes_vector.size() << "];";
+  s.op->newline() << "static struct stap_hwbkpt_probe {";
+  s.op->newline() << "int registered_p:1;";
+// registered_p =  0 signifies a probe that failed registration
+// registered_p =  1 signifies a probe that got registered successfully
+
+  // Probe point & Symbol Names are mostly small and uniform enough
+  // to justify putting const char*.
+#define CALCIT(var)                                                     \
+  s.op->newline() << "const char * const " << #var << ";";
+  CALCIT(pp);
+  CALCIT(symbol);
+#undef CALCIT
+
+  s.op->newline() << "const unsigned long address;";
+  s.op->newline() << "uint8_t atype;";
+  s.op->newline() << "uint8_t len;";
+  s.op->newline() << "void (* const ph) (struct context*);";
+  s.op->newline() << "} stap_hwbkpt_probes[] = {";
+  s.op->indent(1);
+
+  for (unsigned int it = 0; it < hwbkpt_probes_vector.size(); it++)
+    {
+      hwbkpt_derived_probe* p = hwbkpt_probes_vector.at(it);
+      s.op->newline() << "{";
+      s.op->line() << " .registered_p=1,";
+      if (p->symbol_name.size())
+      s.op->line() << " .address=(unsigned long)0x0" << "ULL,";
+      else
+      s.op->line() << " .address=(unsigned long)0x" << hex << p->hwbkpt_addr << dec << "ULL,";
+      switch(p->hwbkpt_access){
+      case HWBKPT_READ:
+		s.op->line() << " .atype=HW_BREAKPOINT_R ,";
+      case HWBKPT_WRITE:
+		s.op->line() << " .atype=HW_BREAKPOINT_W ,";
+      case HWBKPT_RW:
+		s.op->line() << " .atype=HW_BREAKPOINT_R|HW_BREAKPOINT_W ,";
+	};
+      s.op->line() << " .len=" << p->hwbkpt_len << ",";
+      s.op->line() << " .pp=" << lex_cast_qstring (*p->sole_location()) << ",";
+      s.op->line() << " .symbol=\"" << p->symbol_name << "\",";
+      s.op->line() << " .ph=&" << p->name << "";
+      s.op->line() << " },";
+    }
+  s.op->newline() << "};";
+
+  // Emit the hwbkpt callback function
+  s.op->newline() ;
+  s.op->newline() << "static int enter_hwbkpt_probe (struct perf_event *bp,";
+  s.op->line() << " int nmi,";
+  s.op->line() << " struct perf_sample_data *data,";
+  s.op->line() << " struct pt_regs *regs) {";
+  s.op->newline() << "unsigned int i;";
+  s.op->newline() << "if (bp->attr.type != PERF_TYPE_BREAKPOINT )";
+  s.op->newline() << "return -1;";
+  s.op->newline() << "for ( i=0; i<" << hwbkpt_probes_vector.size() << "; i++) {";
+  s.op->newline() << "struct perf_event_attr *hp = & stap_hwbkpt_probe_array[i];";
+  s.op->newline() << "if (bp->attr.bp_addr==hp->bp_addr && bp->attr.bp_type==hp->bp_type && bp->attr.bp_len==hp->bp_len ) {";
+  s.op->newline() << "struct stap_hwbkpt_probe *sdp = &stap_hwbkpt_probes[i];";
+  common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "sdp->pp");
+  s.op->newline() << "c->regs = regs;";
+  s.op->newline() << "(*sdp->ph) (c);";
+  common_probe_entryfn_epilogue (s.op);
+  s.op->newline() << "return 0;";
+  s.op->newline() << "}";
+  s.op->newline(-1) << "}";
+  s.op->newline() << "return 0;";
+  s.op->newline() << "}";
+}
+
+void
+hwbkpt_derived_probe_group::emit_module_init (systemtap_session& s)
+{
+  s.op->newline() << "for (i=0; i<" << hwbkpt_probes_vector.size() << "; i++) {";
+  s.op->newline(1) << "struct stap_hwbkpt_probe *sdp = & stap_hwbkpt_probes[i];";
+  s.op->newline() << "struct perf_event_attr *hp = & stap_hwbkpt_probe_array[i];";
+  s.op->newline() << "void *addr = (void *) sdp->address;";
+  s.op->newline() << "const char *hwbkpt_symbol_name = addr ? NULL : sdp->symbol;";
+  s.op->newline() << "	hw_breakpoint_init(hp);";
+  s.op->newline() << "	if (addr) ";
+  s.op->newline() << "	    hp->bp_addr = (unsigned long) addr;";
+  s.op->newline() << "	else { ";
+  s.op->newline() << "	   hp->bp_addr = kallsyms_lookup_name(hwbkpt_symbol_name);";
+  s.op->newline() << "	if (!hp->bp_addr) { ";
+  s.op->newline() << " 	_stp_warn(\"Probe %s registration skipped : invalid symbol %s \",sdp->pp,hwbkpt_symbol_name);";
+  s.op->newline() << "	sdp->registered_p = 0;";
+  s.op->newline() << "		} ";
+  s.op->newline() << "	} ";
+  s.op->newline() << "#ifdef CONFIG_X86";
+  s.op->newline() << "  switch(sdp->len) {";
+  s.op->newline() << "	case 1:";
+  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_1;";
+  s.op->newline() << "    break;";
+  s.op->newline() << "	case 2:";
+  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_2;";
+  s.op->newline() << "    break;";
+  s.op->newline() << "	case 3:";
+  s.op->newline() << "	case 4:";
+  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_4;";
+  s.op->newline() << "    break;";
+  s.op->newline() << "#ifdef CONFIG_X86_64";
+  s.op->newline() << "	case 5:";
+  s.op->newline() << "	case 6:";
+  s.op->newline() << "	case 7:";
+  s.op->newline() << "	case 8:";
+  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_8;";
+  s.op->newline() << "    break;";
+  s.op->newline() << "#endif /*CONFIG_X86_64*/";
+  s.op->newline() << "	default:";
+  s.op->newline() << " 	_stp_warn(\"Probe %s registration skipped : unsupported length. Supported lengths for x86 are 1,2,4 {8 for x86_84 only} \",sdp->pp);";
+  s.op->newline() << "	sdp->registered_p = 0;";
+  s.op->newline() << "	}";
+  s.op->newline() << "  hp->bp_type = sdp->atype;";
+  s.op->newline() << "  if (sdp->atype == HW_BREAKPOINT_R) {";
+  s.op->newline() << "  	_stp_warn(\"Probe %s registration skipped : READ-ONLY hardware breakpoints not supported on x86\",sdp->pp);";
+  s.op->newline() << "		sdp->registered_p = 0;";
+  s.op->newline() << "  }";
+  s.op->newline() << "#endif /*CONFIG_X86*/";
+  s.op->newline() << "probe_point = sdp->pp;"; // for error messages
+  s.op->newline() << "if ( sdp->registered_p ) {";
+  s.op->newline() << " stap_hwbkpt_ret_array[i] = register_wide_hw_breakpoint( hp, (void *)&enter_hwbkpt_probe );";
+  s.op->newline() << " if (IS_ERR(stap_hwbkpt_ret_array[i])) {";
+  s.op->newline() << "     rc = PTR_ERR(stap_hwbkpt_ret_array[i]);";
+  s.op->newline() << "      sdp->registered_p = 0;";
+  s.op->newline(1) << "	    _stp_warn(\" Hwbkpt Probe %s : Registration error rc= %d, Addr = %p, name = %s\", probe_point, rc, addr, hwbkpt_symbol_name);";
+  s.op->newline(-1) << "  }";
+  s.op->newline() << "    else sdp->registered_p = 1;";
+  s.op->newline() << "}";
+  s.op->newline(-1) << "}"; // for loop
+}
+
+void
+hwbkpt_derived_probe_group::emit_module_exit (systemtap_session& s)
+{
+  //Unregister hwbkpt probes.
+  s.op->newline() << "for (i=0; i<" << hwbkpt_probes_vector.size() << "; i++) {";
+  s.op->newline(1) << "struct stap_hwbkpt_probe *sdp = & stap_hwbkpt_probes[i];";
+  s.op->newline() << " if ( sdp->registered_p == 0 ) continue;";
+  s.op->newline() << " unregister_wide_hw_breakpoint(stap_hwbkpt_ret_array[i]);";
+  s.op->newline() << "sdp->registered_p = 0;";
+  s.op->newline(-1) << "}";
+}
+
+struct hwbkpt_builder: public derived_probe_builder
+{
+  hwbkpt_builder() {}
+  virtual void build(systemtap_session & sess,
+		     probe * base,
+		     probe_point * location,
+		     literal_map_t const & parameters,
+		     vector<derived_probe *> & finished_results);
+};
+
+void
+hwbkpt_builder::build(systemtap_session & sess,
+		      probe * base,
+		      probe_point * location,
+		      literal_map_t const & parameters,
+		      vector<derived_probe *> & finished_results)
+{
+  string symbol_str_val;
+  int64_t hwbkpt_address, len;
+  bool has_addr, has_symbol_str, has_write, has_rw, has_len;
+
+  has_addr = get_param (parameters, TOK_HWBKPT, hwbkpt_address);
+  has_symbol_str = get_param (parameters, TOK_HWBKPT, symbol_str_val);
+  has_len = get_param (parameters, TOK_LENGTH, len);
+  has_write = (parameters.find(TOK_HWBKPT_WRITE) != parameters.end());
+  has_rw = (parameters.find(TOK_HWBKPT_RW) != parameters.end());
+
+  if (!has_len)
+	len = 1;
+
+  if (has_addr)
+      finished_results.push_back (new hwbkpt_derived_probe (base,
+							    location,
+							    hwbkpt_address,
+							    "",len,0,
+							    has_write,
+							    has_rw));
+  else // has symbol_str
+      finished_results.push_back (new hwbkpt_derived_probe (base,
+							    location,
+							    0,
+							    symbol_str_val,len,0,
+							    has_write,
+							    has_rw));
+}
 
 // ------------------------------------------------------------------------
 // statically inserted kernel-tracepoint derived probes
@@ -5811,7 +6146,7 @@ tracepoint_derived_probe_group::emit_mod
         }
       s.op->line() << ") {";
       s.op->indent(1);
-      common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", 
+      common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING",
                                      lex_cast_qstring (*p->sole_location()));
       s.op->newline() << "c->marker_name = "
                       << lex_cast_qstring (p->tracepoint_name)
@@ -6140,7 +6475,6 @@ tracepoint_builder::init_dw(systemtap_se
   return true;
 }
 
-
 void
 tracepoint_builder::build(systemtap_session& s,
                           probe *base, probe_point *location,
@@ -6158,7 +6492,6 @@ tracepoint_builder::build(systemtap_sess
 }
 
 
-
 // ------------------------------------------------------------------------
 //  Standard tapset registry.
 // ------------------------------------------------------------------------
@@ -6205,6 +6538,25 @@ register_standard_tapsets(systemtap_sess
      ->bind_num(TOK_MAXACTIVE)->bind(new kprobe_builder());
   s.pattern_root->bind(TOK_KPROBE)->bind_num(TOK_STATEMENT)
       ->bind(TOK_ABSOLUTE)->bind(new kprobe_builder());
+
+  //Hwbkpt based probe
+  if (s.kernel_config["CONFIG_PERF_EVENTS"] == string("y") &&
+	s.kernel_config["CONFIG_HAVE_HW_BREAKPOINT"] == string("y")) {
+	  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
+		->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder());
+	  s.pattern_root->bind(TOK_KERNEL)->bind_str(TOK_HWBKPT)
+		->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder());
+	  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
+		->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder());
+	  s.pattern_root->bind(TOK_KERNEL)->bind_str(TOK_HWBKPT)
+		->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder());
+	  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
+		->bind_num(TOK_LENGTH)->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder());
+	  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
+		->bind_num(TOK_LENGTH)->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder());
+	  // length supported with address only, not symbol names
+	}
+
 }
 
 
@@ -6231,6 +6583,7 @@ all_session_groups(systemtap_session& s)
   DOONE(mark);
   DOONE(tracepoint);
   DOONE(kprobe);
+  DOONE(hwbkpt);
   DOONE(hrtimer);
   DOONE(perfmon);
   DOONE(procfs);
Index: stap-git-jan-25/session.h
===================================================================
--- stap-git-jan-25.orig/session.h
+++ stap-git-jan-25/session.h
@@ -31,6 +31,7 @@ struct derived_probe;
 struct be_derived_probe_group;
 struct dwarf_derived_probe_group;
 struct kprobe_derived_probe_group;
+struct hwbkpt_derived_probe_group;
 struct uprobe_derived_probe_group;
 struct utrace_derived_probe_group;
 struct itrace_derived_probe_group;
@@ -175,6 +176,7 @@ struct systemtap_session
   be_derived_probe_group* be_derived_probes;
   dwarf_derived_probe_group* dwarf_derived_probes;
   kprobe_derived_probe_group* kprobe_derived_probes;
+  hwbkpt_derived_probe_group* hwbkpt_derived_probes;
   uprobe_derived_probe_group* uprobe_derived_probes;
   utrace_derived_probe_group* utrace_derived_probes;
   itrace_derived_probe_group* itrace_derived_probes;
Index: stap-git-jan-25/elaborate.cxx
===================================================================
--- stap-git-jan-25.orig/elaborate.cxx
+++ stap-git-jan-25/elaborate.cxx
@@ -1523,6 +1523,7 @@ systemtap_session::systemtap_session ():
   be_derived_probes(0),
   dwarf_derived_probes(0),
   kprobe_derived_probes(0),
+  hwbkpt_derived_probes(0),
   uprobe_derived_probes(0),
   utrace_derived_probes(0),
   itrace_derived_probes(0),

[-- Attachment #3: hwbkpt-misc.patch --]
[-- Type: text/x-diff, Size: 3339 bytes --]

Index: stap-git-jan-25/stapprobes.3stap.in
===================================================================
--- stap-git-jan-25.orig/stapprobes.3stap.in
+++ stap-git-jan-25/stapprobes.3stap.in
@@ -660,6 +660,55 @@ and a string of name=value pairs for all
 is available in
 .BR $$vars " or " $$parms .
 
+.SS HARDWARE BREAKPOINTS
+This family of probes is used to set hardware watchpoints for a given
+ (global) kernel symbol. The probes take three components as inputs :
+
+1. The
+.BR virtual address / name
+of the kernel symbol to be traced is supplied as argument to this class
+of probes. ( Probes for only data segment variables are supported. Probing
+local variables of a function cannot be done.)
+
+2. Nature of access to be probed :
+a.
+.I .write
+probe gets triggered when a write happens at the specified address/symbol
+name.
+b.
+.I rw
+probe is triggered when either a read or write happens.
+
+3.
+.BR .length
+(optional)
+Users have the option of specifying the address interval to be probed
+using "length" constructs. The user-specified length gets approximated
+to the closest possible address length that the architecture can
+support. If the specified length exceeds the limits imposed by
+architecture, an error message is flagged and probe registration fails.
+Wherever 'length' is not specified, the translator requests a hardware
+breakpoint probe of length 1. It should be noted that the "length"
+construct is not valid with symbol names.
+
+Following constructs are supported :
+.SAMPLE
+probe kernel.data(ADDRESS).write
+probe kernel.data(ADDRESS).rw
+probe kernel.data(ADDRESS).length(LEN).write
+probe kernel.data(ADDRESS).length(LEN).rw
+probe kernel.data("SYMBOL_NAME").write
+probe kernel.data("SYMBOL_NAME").rw
+.ESAMPLE
+
+This set of probes make use of the debug registers of the processor,
+which is a scarce resource. (4 on x86 , 1 on powerpc ) The script
+translation fails if a user sets more hardware breakpoint probes than
+the limits set by architecture. For example,a pass-2 error is flagged
+when an input script requests 5 hardware breakpoint probes on an x86
+system while x86 architecture supports a maximum of 4 breakpoints.
+Users are cautioned to set probes judiciously.
+
 .SH EXAMPLES
 .PP
 Here are some example probe points, defining the associated events.
@@ -696,6 +745,9 @@ refers to the statement of line 2917 wit
 kernel.statement("bio_init@fs/bio.c+3")
 refers to the statement at line bio_init+3 within "fs/bio.c".
 .TP
+kernel.data("pid_max").write
+refers to a hardware preakpoint of type "write" set on pid_max
+.TP
 syscall.*.return
 refers to the group of probe aliases with any name in the third position
 
Index: stap-git-jan-25/testsuite/buildok/hwbkpt.stp
===================================================================
--- /dev/null
+++ stap-git-jan-25/testsuite/buildok/hwbkpt.stp
@@ -0,0 +1,4 @@
+#! stap -wp4
+
+probe kernel.data("pid_max").write {}
+probe kernel.data("pid_max").rw {}
Index: stap-git-jan-25/NEWS
===================================================================
--- stap-git-jan-25.orig/NEWS
+++ stap-git-jan-25/NEWS
@@ -1,3 +1,5 @@
+- New : systemtap now supports probes for Hardware Breakpoints. For syntax details, see man page for stapprobes.
+
 * What's new in version 1.1
 
 - New tracepoint based tapset for memory subsystem.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC V2] Systemtap translator support for kernel hardware breakpoints
  2010-01-27 15:55       ` Prerna Saxena
@ 2010-01-27 17:12         ` Frank Ch. Eigler
  2010-01-28  9:59           ` Prerna Saxena
  0 siblings, 1 reply; 31+ messages in thread
From: Frank Ch. Eigler @ 2010-01-27 17:12 UTC (permalink / raw)
  To: Prerna Saxena; +Cc: systemtap

Prerna Saxena <prerna@linux.vnet.ibm.com> writes:

> [...]
> Thanks for the prompt review. Attaching a new version of patches that
> address your comments.

Thanks for the quick turnaround.

> Perf allows multiplexing only for non-pinned breakpoints, not the
> pinned ones used by systemtap. If this script is allowed to run with a
> warning, the runtime will register the first 'n' possible requests and
> reject the later ones. Is it not better to notify to the user that
> they are making more requests than the maximum limit imposed by
> hardware ? (more on this later)

I didn't say "don't notify", just "don't make it a translate-time
error".  It is possible that in the future, some of these limitations
will be relaxed by virtualization or improvements in hardware.  By
hard-coding them in systemtap, we are forcing ourselves to worry about
this again in the future.


> [...]
>> Are you sure this CONFIG_arch* stuff is needed?  Let the kernel
>> try 8-byte watchpoints, and let us handle the error.
>>
>
> I cannot foresee the advantage of requesting an erroneous breakpoint,
> only to see the request fail later. Could you pls let me know if there
> are any specific issues wrt above, apart from the hassle of
> arch-specific checks?

This becomes a porting burden in systemtap, when we could just rely on
the existing porting effort in the kernel.  The kernel will already
make the same checks and report an error.  Checks on our own side,
made immediately before the kernel checks, are redundant.


>>> +  s.op->newline()<<  "if ( !IS_ERR(stap_hwbkpt_ret_array[i]) ) {";
>>> +  s.op->newline()<<  " stap_hwbkpt_ret_array[i] = register_wide_hw_breakpoint( hp, (void *)&enter_hwbkpt_probe );";
>>> +  s.op->newline()<<  " if (IS_ERR(stap_hwbkpt_ret_array[i])) {";
>>> +  s.op->newline()<<  "     rc = PTR_ERR(stap_hwbkpt_ret_array[i]);";
>>> +  s.op->newline()<<  "      sdp->registered_p = 0;";
>>> +  s.op->newline(1)<<  "	    _stp_warn(\" Hwbkpt Probe %s : Registration error rc= %d, Addr = %p, name = %s\", probe_point, rc, addr, hwbkpt_symbol_name);";
>>> +  s.op->newline(-1)<<  "  }";
>>> +  s.op->newline()<<  "    else sdp->registered_p = 1;";
>>> +  s.op->newline()<<  "}";
>>> +  s.op->newline(-1)<<  "}"; // for loop
>>
>> I mentioned this in the previous review, but this is insufficient.
>> (Please review my earlier comments in case something else was missed.)
>> If the [i]th registration attempt fails, and you return an rc != 0 to
>> the wrapping code, this code right here must ensure that all
>> already-registered entries ([0..i-1]) have been unregistered.

> I beg to differ here. As is the case with dwarfless probes, is it
> not better to ignore an erroneous probe(that failed registration)
> and continue running the other probes that got successfully
> registered ? [...]

Sure, but then your code must keep rc zeroed instead of setting it to
an nonzero PTR_ERR.


- FChE

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC V2] Systemtap translator support for kernel hardware breakpoints
  2010-01-27 17:12         ` Frank Ch. Eigler
@ 2010-01-28  9:59           ` Prerna Saxena
  2010-01-29  9:15             ` Prerna Saxena
  0 siblings, 1 reply; 31+ messages in thread
From: Prerna Saxena @ 2010-01-28  9:59 UTC (permalink / raw)
  To: Frank Ch. Eigler, systemtap

[-- Attachment #1: Type: text/plain, Size: 3719 bytes --]

Attaching a new version. Hope this addresses the concerns raised. If ppl 
are fine with it, I'll check it in.

On 01/27/2010 10:42 PM, Frank Ch. Eigler wrote:
> Prerna Saxena<prerna@linux.vnet.ibm.com>  writes:
>
>> [...]
>> Thanks for the prompt review. Attaching a new version of patches that
>> address your comments.
>
> Thanks for the quick turnaround.
>
>> Perf allows multiplexing only for non-pinned breakpoints, not the
>> pinned ones used by systemtap. If this script is allowed to run with a
>> warning, the runtime will register the first 'n' possible requests and
>> reject the later ones. Is it not better to notify to the user that
>> they are making more requests than the maximum limit imposed by
>> hardware ? (more on this later)
>
> I didn't say "don't notify", just "don't make it a translate-time
> error".  It is possible that in the future, some of these limitations
> will be relaxed by virtualization or improvements in hardware.  By
> hard-coding them in systemtap, we are forcing ourselves to worry about
> this again in the future.
>
>

Right. So I'm only flashing a pass-2 warning for now.

>>> Are you sure this CONFIG_arch* stuff is needed?  Let the kernel
>>> try 8-byte watchpoints, and let us handle the error.
>>>
>>
>> I cannot foresee the advantage of requesting an erroneous breakpoint,
>> only to see the request fail later. Could you pls let me know if there
>> are any specific issues wrt above, apart from the hassle of
>> arch-specific checks?
>
> This becomes a porting burden in systemtap, when we could just rely on
> the existing porting effort in the kernel.  The kernel will already
> make the same checks and report an error.  Checks on our own side,
> made immediately before the kernel checks, are redundant.

I need to maintain some past of code thats specific to x86 only -- For 
example, the constraints wrt length. (Powerpc allows any length upto 8)
However, instead of using CONFIG options, I'm basing this check on 
systemtap_session.architecture, and generating systemtap module code 
specific to each arch, instead of generic code with CONFIG options to 
distinguish architectures.

>>>> +  s.op->newline()<<   "if ( !IS_ERR(stap_hwbkpt_ret_array[i]) ) {";
>>>> +  s.op->newline()<<   " stap_hwbkpt_ret_array[i] = register_wide_hw_breakpoint( hp, (void *)&enter_hwbkpt_probe );";
>>>> +  s.op->newline()<<   " if (IS_ERR(stap_hwbkpt_ret_array[i])) {";
>>>> +  s.op->newline()<<   "     rc = PTR_ERR(stap_hwbkpt_ret_array[i]);";
>>>> +  s.op->newline()<<   "      sdp->registered_p = 0;";
>>>> +  s.op->newline(1)<<   "	    _stp_warn(\" Hwbkpt Probe %s : Registration error rc= %d, Addr = %p, name = %s\", probe_point, rc, addr, hwbkpt_symbol_name);";
>>>> +  s.op->newline(-1)<<   "  }";
>>>> +  s.op->newline()<<   "    else sdp->registered_p = 1;";
>>>> +  s.op->newline()<<   "}";
>>>> +  s.op->newline(-1)<<   "}"; // for loop
>>>
>>> I mentioned this in the previous review, but this is insufficient.
>>> (Please review my earlier comments in case something else was missed.)
>>> If the [i]th registration attempt fails, and you return an rc != 0 to
>>> the wrapping code, this code right here must ensure that all
>>> already-registered entries ([0..i-1]) have been unregistered.
>
>> I beg to differ here. As is the case with dwarfless probes, is it
>> not better to ignore an erroneous probe(that failed registration)
>> and continue running the other probes that got successfully
>> registered ? [...]
>
> Sure, but then your code must keep rc zeroed instead of setting it to
> an nonzero PTR_ERR.

Thanks for the suggestion, I've modified this.

Regards,
-- 
Prerna Saxena

Linux Technology Centre,
IBM Systems and Technology Lab,
Bangalore, India

[-- Attachment #2: hwbkpt.patch --]
[-- Type: text/x-diff, Size: 16926 bytes --]

Signed-off-by: Prerna Saxena <prerna@linux.vnet.ibm.com>

Index: systemtap/tapsets.cxx
===================================================================
--- systemtap.orig/tapsets.cxx
+++ systemtap/tapsets.cxx
@@ -5319,7 +5319,341 @@ kprobe_builder::build(systemtap_session 
     }
 }
 
+// ------------------------------------------------------------------------
+//  Hardware breakpoint based probes.
+// ------------------------------------------------------------------------
+
+static const string TOK_HWBKPT("data");
+static const string TOK_HWBKPT_WRITE("write");
+static const string TOK_HWBKPT_RW("rw");
+static const string TOK_LENGTH("length");
+
+#define HWBKPT_READ 0
+#define HWBKPT_WRITE 1
+#define HWBKPT_RW 2
+struct hwbkpt_derived_probe: public derived_probe
+{
+  hwbkpt_derived_probe (probe *base,
+                        probe_point *location,
+                        uint64_t addr,
+			string symname,
+			unsigned int len,
+			bool has_only_read_access,
+			bool has_only_write_access,
+			bool has_rw_access
+                        );
+  Dwarf_Addr hwbkpt_addr;
+  string symbol_name;
+  unsigned int hwbkpt_access,hwbkpt_len;
+
+  void printsig (std::ostream &o) const;
+  void join_group (systemtap_session& s);
+};
+
+struct hwbkpt_derived_probe_group: public derived_probe_group
+{
+  unsigned int max_hwbkpt_probes_by_arch;
+
+private:
+  vector<hwbkpt_derived_probe*> hwbkpt_probes_vector;
+  typedef vector<hwbkpt_derived_probe*>::iterator h_p_v_iterator;
 
+public:
+  void enroll (hwbkpt_derived_probe* probe, systemtap_session& s);
+  void emit_module_decls (systemtap_session& s);
+  void emit_module_init (systemtap_session& s);
+  void emit_module_exit (systemtap_session& s);
+};
+
+hwbkpt_derived_probe::hwbkpt_derived_probe (probe *base,
+					    probe_point *location,
+					    uint64_t addr,
+					    string symname,
+					    unsigned int len,
+					    bool has_only_read_access,
+					    bool has_only_write_access,
+					    bool has_rw_access
+					    ):
+  derived_probe (base, location),
+  hwbkpt_addr (addr),
+  symbol_name (symname),
+  hwbkpt_len (len)
+{
+  this->tok = base->tok;
+
+  vector<probe_point::component*> comps;
+  comps.push_back (new probe_point::component(TOK_KERNEL));
+
+  if (hwbkpt_addr)
+	  comps.push_back (new probe_point::component (TOK_HWBKPT, new literal_number(hwbkpt_addr)));
+  else
+	if (symbol_name.size())
+	  comps.push_back (new probe_point::component (TOK_HWBKPT, new literal_string(symbol_name)));
+
+  comps.push_back (new probe_point::component (TOK_LENGTH, new literal_number(hwbkpt_len)));
+
+  if (has_only_read_access)
+	this->hwbkpt_access = HWBKPT_READ ;
+//TODO add code for comps.push_back for read, since this flag is not for x86
+
+  else
+	{
+	  if (has_only_write_access)
+		{
+		  this->hwbkpt_access = HWBKPT_WRITE ;
+	  	  comps.push_back (new probe_point::component(TOK_HWBKPT_WRITE));
+		}
+	  else
+		{
+		  this->hwbkpt_access = HWBKPT_RW ;
+	  	  comps.push_back (new probe_point::component(TOK_HWBKPT_RW));
+		}
+	}
+
+  this->sole_location()->components = comps;
+}
+
+void hwbkpt_derived_probe::printsig (ostream& o) const
+{
+  sole_location()->print (o);
+  printsig_nested (o);
+}
+
+void hwbkpt_derived_probe::join_group (systemtap_session& s)
+{
+  if (! s.hwbkpt_derived_probes) {
+        s.hwbkpt_derived_probes = new hwbkpt_derived_probe_group ();
+	if (s.architecture == "i386" || s.architecture == "x86_64" )
+			s.hwbkpt_derived_probes->max_hwbkpt_probes_by_arch = 4;
+	else {
+			//TODO: Add code for other archs too
+			s.hwbkpt_derived_probes->max_hwbkpt_probes_by_arch = 0;
+			throw semantic_error ("Hardware Breakpoints not supported on arch " + s.architecture);
+		}
+	}
+  s.hwbkpt_derived_probes->enroll (this, s);
+}
+
+void hwbkpt_derived_probe_group::enroll (hwbkpt_derived_probe* p, systemtap_session& s)
+{
+  if (hwbkpt_probes_vector.size() >= max_hwbkpt_probes_by_arch ) {
+	  clog  << "Warning:No of Hardware breakpoint probes in script exceeds "
+		<< "the number of hardware breakpoints that the architecture "
+		<< "can support : max " << max_hwbkpt_probes_by_arch
+		<< " on " << s.architecture ;
+	}
+  hwbkpt_probes_vector.push_back (p);
+}
+
+void
+hwbkpt_derived_probe_group::emit_module_decls (systemtap_session& s)
+{
+  if (hwbkpt_probes_vector.empty()) return;
+
+  s.op->newline() << "/* ---- hwbkpt-based probes ---- */";
+
+  s.op->newline() << "#include <linux/perf_event.h>";
+  s.op->newline() << "#include <linux/hw_breakpoint.h>";
+  s.op->newline();
+
+  // Forward declare the master entry functions
+  s.op->newline() << "static int enter_hwbkpt_probe (struct perf_event *bp,";
+  s.op->line() << " int nmi,";
+  s.op->line() << " struct perf_sample_data *data,";
+  s.op->line() << " struct pt_regs *regs);";
+
+  // Emit the actual probe list.
+
+  s.op->newline() << "static struct perf_event_attr ";
+  s.op->newline() << "stap_hwbkpt_probe_array[" << hwbkpt_probes_vector.size() << "];";
+
+  s.op->newline() << "static struct perf_event **";
+  s.op->newline() << "stap_hwbkpt_ret_array[" << hwbkpt_probes_vector.size() << "];";
+  s.op->newline() << "static struct stap_hwbkpt_probe {";
+  s.op->newline() << "int registered_p:1;";
+// registered_p =  0 signifies a probe that failed registration
+// registered_p =  1 signifies a probe that got registered successfully
+
+  // Probe point & Symbol Names are mostly small and uniform enough
+  // to justify putting const char*.
+#define CALCIT(var)                                                     \
+  s.op->newline() << "const char * const " << #var << ";";
+  CALCIT(pp);
+  CALCIT(symbol);
+#undef CALCIT
+
+  s.op->newline() << "const unsigned long address;";
+  s.op->newline() << "uint8_t atype;";
+  s.op->newline() << "uint8_t len;";
+  s.op->newline() << "void (* const ph) (struct context*);";
+  s.op->newline() << "} stap_hwbkpt_probes[] = {";
+  s.op->indent(1);
+
+  for (unsigned int it = 0; it < hwbkpt_probes_vector.size(); it++)
+    {
+      hwbkpt_derived_probe* p = hwbkpt_probes_vector.at(it);
+      s.op->newline() << "{";
+      s.op->line() << " .registered_p=1,";
+      if (p->symbol_name.size())
+      s.op->line() << " .address=(unsigned long)0x0" << "ULL,";
+      else
+      s.op->line() << " .address=(unsigned long)0x" << hex << p->hwbkpt_addr << dec << "ULL,";
+      switch(p->hwbkpt_access){
+      case HWBKPT_READ:
+		s.op->line() << " .atype=HW_BREAKPOINT_R ,";
+      case HWBKPT_WRITE:
+		s.op->line() << " .atype=HW_BREAKPOINT_W ,";
+      case HWBKPT_RW:
+		s.op->line() << " .atype=HW_BREAKPOINT_R|HW_BREAKPOINT_W ,";
+	};
+      s.op->line() << " .len=" << p->hwbkpt_len << ",";
+      s.op->line() << " .pp=" << lex_cast_qstring (*p->sole_location()) << ",";
+      s.op->line() << " .symbol=\"" << p->symbol_name << "\",";
+      s.op->line() << " .ph=&" << p->name << "";
+      s.op->line() << " },";
+    }
+  s.op->newline() << "};";
+
+  // Emit the hwbkpt callback function
+  s.op->newline() ;
+  s.op->newline() << "static int enter_hwbkpt_probe (struct perf_event *bp,";
+  s.op->line() << " int nmi,";
+  s.op->line() << " struct perf_sample_data *data,";
+  s.op->line() << " struct pt_regs *regs) {";
+  s.op->newline() << "unsigned int i;";
+  s.op->newline() << "if (bp->attr.type != PERF_TYPE_BREAKPOINT )";
+  s.op->newline() << "return -1;";
+  s.op->newline() << "for ( i=0; i<" << hwbkpt_probes_vector.size() << "; i++) {";
+  s.op->newline() << "struct perf_event_attr *hp = & stap_hwbkpt_probe_array[i];";
+  s.op->newline() << "if (bp->attr.bp_addr==hp->bp_addr && bp->attr.bp_type==hp->bp_type && bp->attr.bp_len==hp->bp_len ) {";
+  s.op->newline() << "struct stap_hwbkpt_probe *sdp = &stap_hwbkpt_probes[i];";
+  common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "sdp->pp");
+  s.op->newline() << "c->regs = regs;";
+  s.op->newline() << "(*sdp->ph) (c);";
+  common_probe_entryfn_epilogue (s.op);
+  s.op->newline() << "return 0;";
+  s.op->newline() << "}";
+  s.op->newline(-1) << "}";
+  s.op->newline() << "return 0;";
+  s.op->newline() << "}";
+}
+
+void
+hwbkpt_derived_probe_group::emit_module_init (systemtap_session& s)
+{
+  s.op->newline() << "for (i=0; i<" << hwbkpt_probes_vector.size() << "; i++) {";
+  s.op->newline(1) << "struct stap_hwbkpt_probe *sdp = & stap_hwbkpt_probes[i];";
+  s.op->newline() << "struct perf_event_attr *hp = & stap_hwbkpt_probe_array[i];";
+  s.op->newline() << "void *addr = (void *) sdp->address;";
+  s.op->newline() << "const char *hwbkpt_symbol_name = addr ? NULL : sdp->symbol;";
+  s.op->newline() << "	hw_breakpoint_init(hp);";
+  s.op->newline() << "	if (addr) ";
+  s.op->newline() << "	    hp->bp_addr = (unsigned long) addr;";
+  s.op->newline() << "	else { ";
+  s.op->newline() << "	   hp->bp_addr = kallsyms_lookup_name(hwbkpt_symbol_name);";
+  s.op->newline() << "	if (!hp->bp_addr) { ";
+  s.op->newline() << " 	_stp_warn(\"Probe %s registration skipped : invalid symbol %s \",sdp->pp,hwbkpt_symbol_name);";
+  s.op->newline() << "	sdp->registered_p = 0;";
+  s.op->newline() << "		} ";
+  s.op->newline() << "	} ";
+  if (s.architecture == "i386" || s.architecture == "x86_64" ) {
+	  s.op->newline() << "  switch(sdp->len) {";
+	  s.op->newline() << "	case 1:";
+	  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_1;";
+	  s.op->newline() << "    break;";
+	  s.op->newline() << "	case 2:";
+	  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_2;";
+	  s.op->newline() << "    break;";
+	  s.op->newline() << "	case 3:";
+	  s.op->newline() << "	case 4:";
+	  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_4;";
+	  s.op->newline() << "    break;";
+	  s.op->newline() << "	case 5:";
+	  s.op->newline() << "	case 6:";
+	  s.op->newline() << "	case 7:";
+	  s.op->newline() << "	case 8:";
+	  s.op->newline() << "    hp->bp_len = HW_BREAKPOINT_LEN_8;";
+	  s.op->newline() << "    break;";
+	  s.op->newline() << "	default:";
+	  s.op->newline() << " 	_stp_warn(\"Probe %s registration skipped : unsupported length. Supported lengths for x86 are 1,2,4 {8 for x86_84 only} \",sdp->pp);";
+	  s.op->newline() << "	sdp->registered_p = 0;";
+	  s.op->newline() << "	}";
+	  s.op->newline() << "  hp->bp_type = sdp->atype;";
+	  s.op->newline() << "  if (sdp->atype == HW_BREAKPOINT_R) {";
+	  s.op->newline() << "  	_stp_warn(\"Probe %s registration skipped : READ-ONLY hardware breakpoints not supported on x86\",sdp->pp);";
+	  s.op->newline() << "		sdp->registered_p = 0;";
+	  s.op->newline() << "  }";
+  } // end of x86 / x86_64 specific checks
+  s.op->newline() << "probe_point = sdp->pp;"; // for error messages
+  s.op->newline() << "if ( sdp->registered_p ) {";
+  s.op->newline() << " stap_hwbkpt_ret_array[i] = register_wide_hw_breakpoint( hp, (void *)&enter_hwbkpt_probe );";
+  s.op->newline() << " if (IS_ERR(stap_hwbkpt_ret_array[i])) {";
+  s.op->newline() << "      int err_code = PTR_ERR(stap_hwbkpt_ret_array[i]);";
+  s.op->newline() << "      sdp->registered_p = 0;";
+  s.op->newline(1) << "	    _stp_warn(\" Hwbkpt Probe %s : Registration error = %d, Addr = %p, name = %s\", probe_point, err_code, addr, hwbkpt_symbol_name);";
+  s.op->newline(-1) << "  }";
+  s.op->newline() << " else sdp->registered_p = 1;";
+  s.op->newline() << "}";
+  s.op->newline(-1) << "}"; // for loop
+}
+
+void
+hwbkpt_derived_probe_group::emit_module_exit (systemtap_session& s)
+{
+  //Unregister hwbkpt probes.
+  s.op->newline() << "for (i=0; i<" << hwbkpt_probes_vector.size() << "; i++) {";
+  s.op->newline(1) << "struct stap_hwbkpt_probe *sdp = & stap_hwbkpt_probes[i];";
+  s.op->newline() << " if ( sdp->registered_p == 0 ) continue;";
+  s.op->newline() << " unregister_wide_hw_breakpoint(stap_hwbkpt_ret_array[i]);";
+  s.op->newline() << "sdp->registered_p = 0;";
+  s.op->newline(-1) << "}";
+}
+
+struct hwbkpt_builder: public derived_probe_builder
+{
+  hwbkpt_builder() {}
+  virtual void build(systemtap_session & sess,
+		     probe * base,
+		     probe_point * location,
+		     literal_map_t const & parameters,
+		     vector<derived_probe *> & finished_results);
+};
+
+void
+hwbkpt_builder::build(systemtap_session & sess,
+		      probe * base,
+		      probe_point * location,
+		      literal_map_t const & parameters,
+		      vector<derived_probe *> & finished_results)
+{
+  string symbol_str_val;
+  int64_t hwbkpt_address, len;
+  bool has_addr, has_symbol_str, has_write, has_rw, has_len;
+
+  has_addr = get_param (parameters, TOK_HWBKPT, hwbkpt_address);
+  has_symbol_str = get_param (parameters, TOK_HWBKPT, symbol_str_val);
+  has_len = get_param (parameters, TOK_LENGTH, len);
+  has_write = (parameters.find(TOK_HWBKPT_WRITE) != parameters.end());
+  has_rw = (parameters.find(TOK_HWBKPT_RW) != parameters.end());
+
+  if (!has_len)
+	len = 1;
+
+  if (has_addr)
+      finished_results.push_back (new hwbkpt_derived_probe (base,
+							    location,
+							    hwbkpt_address,
+							    "",len,0,
+							    has_write,
+							    has_rw));
+  else // has symbol_str
+      finished_results.push_back (new hwbkpt_derived_probe (base,
+							    location,
+							    0,
+							    symbol_str_val,len,0,
+							    has_write,
+							    has_rw));
+}
 
 // ------------------------------------------------------------------------
 // statically inserted kernel-tracepoint derived probes
@@ -5811,7 +6145,7 @@ tracepoint_derived_probe_group::emit_mod
         }
       s.op->line() << ") {";
       s.op->indent(1);
-      common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", 
+      common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING",
                                      lex_cast_qstring (*p->sole_location()));
       s.op->newline() << "c->marker_name = "
                       << lex_cast_qstring (p->tracepoint_name)
@@ -6140,7 +6474,6 @@ tracepoint_builder::init_dw(systemtap_se
   return true;
 }
 
-
 void
 tracepoint_builder::build(systemtap_session& s,
                           probe *base, probe_point *location,
@@ -6158,7 +6491,6 @@ tracepoint_builder::build(systemtap_sess
 }
 
 
-
 // ------------------------------------------------------------------------
 //  Standard tapset registry.
 // ------------------------------------------------------------------------
@@ -6205,6 +6537,25 @@ register_standard_tapsets(systemtap_sess
      ->bind_num(TOK_MAXACTIVE)->bind(new kprobe_builder());
   s.pattern_root->bind(TOK_KPROBE)->bind_num(TOK_STATEMENT)
       ->bind(TOK_ABSOLUTE)->bind(new kprobe_builder());
+
+  //Hwbkpt based probe
+  if (s.kernel_config["CONFIG_PERF_EVENTS"] == string("y") &&
+	s.kernel_config["CONFIG_HAVE_HW_BREAKPOINT"] == string("y")) {
+	  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
+		->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder());
+	  s.pattern_root->bind(TOK_KERNEL)->bind_str(TOK_HWBKPT)
+		->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder());
+	  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
+		->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder());
+	  s.pattern_root->bind(TOK_KERNEL)->bind_str(TOK_HWBKPT)
+		->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder());
+	  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
+		->bind_num(TOK_LENGTH)->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder());
+	  s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
+		->bind_num(TOK_LENGTH)->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder());
+	  // length supported with address only, not symbol names
+	}
+
 }
 
 
@@ -6231,6 +6582,7 @@ all_session_groups(systemtap_session& s)
   DOONE(mark);
   DOONE(tracepoint);
   DOONE(kprobe);
+  DOONE(hwbkpt);
   DOONE(hrtimer);
   DOONE(perfmon);
   DOONE(procfs);
Index: systemtap/session.h
===================================================================
--- systemtap.orig/session.h
+++ systemtap/session.h
@@ -31,6 +31,7 @@ struct derived_probe;
 struct be_derived_probe_group;
 struct dwarf_derived_probe_group;
 struct kprobe_derived_probe_group;
+struct hwbkpt_derived_probe_group;
 struct uprobe_derived_probe_group;
 struct utrace_derived_probe_group;
 struct itrace_derived_probe_group;
@@ -175,6 +176,7 @@ struct systemtap_session
   be_derived_probe_group* be_derived_probes;
   dwarf_derived_probe_group* dwarf_derived_probes;
   kprobe_derived_probe_group* kprobe_derived_probes;
+  hwbkpt_derived_probe_group* hwbkpt_derived_probes;
   uprobe_derived_probe_group* uprobe_derived_probes;
   utrace_derived_probe_group* utrace_derived_probes;
   itrace_derived_probe_group* itrace_derived_probes;
Index: systemtap/elaborate.cxx
===================================================================
--- systemtap.orig/elaborate.cxx
+++ systemtap/elaborate.cxx
@@ -1523,6 +1523,7 @@ systemtap_session::systemtap_session ():
   be_derived_probes(0),
   dwarf_derived_probes(0),
   kprobe_derived_probes(0),
+  hwbkpt_derived_probes(0),
   uprobe_derived_probes(0),
   utrace_derived_probes(0),
   itrace_derived_probes(0),

[-- Attachment #3: hwbkpt-misc.patch --]
[-- Type: text/x-diff, Size: 3307 bytes --]

Index: systemtap/stapprobes.3stap.in
===================================================================
--- systemtap.orig/stapprobes.3stap.in
+++ systemtap/stapprobes.3stap.in
@@ -660,6 +660,55 @@ and a string of name=value pairs for all
 is available in
 .BR $$vars " or " $$parms .
 
+.SS HARDWARE BREAKPOINTS
+This family of probes is used to set hardware watchpoints for a given
+ (global) kernel symbol. The probes take three components as inputs :
+
+1. The
+.BR virtual address / name
+of the kernel symbol to be traced is supplied as argument to this class
+of probes. ( Probes for only data segment variables are supported. Probing
+local variables of a function cannot be done.)
+
+2. Nature of access to be probed :
+a.
+.I .write
+probe gets triggered when a write happens at the specified address/symbol
+name.
+b.
+.I rw
+probe is triggered when either a read or write happens.
+
+3.
+.BR .length
+(optional)
+Users have the option of specifying the address interval to be probed
+using "length" constructs. The user-specified length gets approximated
+to the closest possible address length that the architecture can
+support. If the specified length exceeds the limits imposed by
+architecture, an error message is flagged and probe registration fails.
+Wherever 'length' is not specified, the translator requests a hardware
+breakpoint probe of length 1. It should be noted that the "length"
+construct is not valid with symbol names.
+
+Following constructs are supported :
+.SAMPLE
+probe kernel.data(ADDRESS).write
+probe kernel.data(ADDRESS).rw
+probe kernel.data(ADDRESS).length(LEN).write
+probe kernel.data(ADDRESS).length(LEN).rw
+probe kernel.data("SYMBOL_NAME").write
+probe kernel.data("SYMBOL_NAME").rw
+.ESAMPLE
+
+This set of probes make use of the debug registers of the processor,
+which is a scarce resource. (4 on x86 , 1 on powerpc ) The script
+translation flags a warning if a user requests more hardware breakpoint probes
+than the limits set by architecture. For example,a pass-2 warning is flashed
+when an input script requests 5 hardware breakpoint probes on an x86
+system while x86 architecture supports a maximum of 4 breakpoints.
+Users are cautioned to set probes judiciously.
+
 .SH EXAMPLES
 .PP
 Here are some example probe points, defining the associated events.
@@ -696,6 +745,9 @@ refers to the statement of line 2917 wit
 kernel.statement("bio_init@fs/bio.c+3")
 refers to the statement at line bio_init+3 within "fs/bio.c".
 .TP
+kernel.data("pid_max").write
+refers to a hardware preakpoint of type "write" set on pid_max
+.TP
 syscall.*.return
 refers to the group of probe aliases with any name in the third position
 
Index: systemtap/testsuite/buildok/hwbkpt.stp
===================================================================
--- /dev/null
+++ systemtap/testsuite/buildok/hwbkpt.stp
@@ -0,0 +1,4 @@
+#! stap -wp4
+
+probe kernel.data("pid_max").write {}
+probe kernel.data("pid_max").rw {}
Index: systemtap/NEWS
===================================================================
--- systemtap.orig/NEWS
+++ systemtap/NEWS
@@ -1,3 +1,5 @@
+- New : systemtap now supports probes for Hardware Breakpoints. For syntax details, see man page for stapprobes.
+
 * What's new in version 1.1
 
 - New tracepoint based tapset for memory subsystem.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC V2] Systemtap translator support for kernel hardware breakpoints
  2010-01-28  9:59           ` Prerna Saxena
@ 2010-01-29  9:15             ` Prerna Saxena
  0 siblings, 0 replies; 31+ messages in thread
From: Prerna Saxena @ 2010-01-29  9:15 UTC (permalink / raw)
  To: systemtap

On 01/28/2010 03:29 PM, Prerna Saxena wrote:
> Attaching a new version. Hope this addresses the concerns raised. If ppl
> are fine with it, I'll check it in.

Checked in via commit dd225250dd8e76484fe9032e49cb0ef132180a59 and 
91036867d38153408de7b41c6d12d59f470f6bcd

Regards,
-- 
Prerna Saxena

Linux Technology Centre,
IBM Systems and Technology Lab,
Bangalore, India

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2010-01-29  9:15 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-01-07  8:35 [RFC] Systemtap translator support for hardware breakpoints on Prerna Saxena
2010-01-07 18:01 ` Frank Ch. Eigler
2010-01-07 21:57   ` Roland McGrath
2010-01-07 22:39     ` Roland McGrath
2010-01-07 22:45       ` Frank Ch. Eigler
2010-01-08  5:01       ` Ananth N Mavinakayanahalli
2010-01-08 10:24         ` Roland McGrath
2010-01-09  1:29           ` K.Prasad
2010-01-09  1:56             ` Roland McGrath
2010-01-08 14:35   ` [RFC] Systemtap translator support for hw breakpoints on x86 Prerna Saxena
2010-01-08 15:21     ` Frank Ch. Eigler
2010-01-09  1:41       ` K.Prasad
2010-01-25 19:00   ` [RFC V2] Systemtap translator support for kernel hardware breakpoints Prerna Saxena
2010-01-26 21:16     ` Frank Ch. Eigler
2010-01-27 15:55       ` Prerna Saxena
2010-01-27 17:12         ` Frank Ch. Eigler
2010-01-28  9:59           ` Prerna Saxena
2010-01-29  9:15             ` Prerna Saxena
2010-01-07 23:15 ` [RFC] Systemtap translator support for hardware breakpoints on Roland McGrath
2010-01-08  1:03   ` Frank Ch. Eigler
2010-01-08  1:28     ` Roland McGrath
2010-01-08  1:19 ` Roland McGrath
2010-01-08  1:53 ` Roland McGrath
2010-01-08 16:08   ` Prerna Saxena
2010-01-08 18:52     ` Roland McGrath
2010-01-09  0:54       ` K.Prasad
2010-01-09  1:07         ` Roland McGrath
2010-01-09  1:11   ` K.Prasad
2010-01-09  1:45     ` Roland McGrath
2010-01-09  2:46       ` K.Prasad
2010-01-10 21:37         ` Roland McGrath

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).