public inbox for systemtap@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 1/6] New Infiniband (OFED) tapset
@ 2009-02-06  0:29 David J. Wilder
  2009-02-06  4:34 ` Josh Stone
  0 siblings, 1 reply; 3+ messages in thread
From: David J. Wilder @ 2009-02-06  0:29 UTC (permalink / raw)
  To: systemtap, xma, pradeep, prasad

This tapset is the first of several tapsets used to probe the ofed
infiniband stack.  Ofed.stp includes probes to several of the ib
"kernel" verbs.  Also included are core functions that users of the ofed
tapescripts may find helpful.

Signed-off-by: David Wilder <dwilder@us.ibm.com>

------------------------------------------------------
 tapset/ofed.stp |  501 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 501 insertions(+), 0 deletions(-)

diff --git a/tapset/ofed.stp b/tapset/ofed.stp
new file mode 100644
index 0000000..2b5e13f
--- /dev/null
+++ b/tapset/ofed.stp
@@ -0,0 +1,501 @@
+/*
+ *	OFED tapset
+ *	Copyright (C) 2009 IBM Corp.
+ *	
+ *	This file is part of systemtap, and is free software.  You can
+ *	redistribute it and/or modify it under the terms of the GNU General
+ *	Public License (GPL); either version 2, or (at your option) any
+ *	later version.
+ *	
+ *	Initial version: wilder@us.ibm.com   	2008/09/09
+ *	Version 1.0	wilder@us.ibm.com	2009/01/19
+ */
+
+%{
+#include <rdma/ib_verbs.h>
+%}
+
+/*
+ * Event and state helper functions
+ */
+global _ib_event_num2str[18]
+global _ib_event_str2num[18]
+global _ib_wc_status_num2str[22]
+global _ib_wc_status_str2num[22]
+global _ib_wr_opcode_num2str[8]
+global _ib_wr_opcode_str2num[8]
+global _ib_wr_bit2str[6]
+
+/**
+ * sfunction ib_event_num2str - String representation of an ib_event_type.
+ *
+ * Given an ib_event_type, return a string representation.
+ */
+function ib_event_num2str:string (event:long)
+{
+	return (event in _ib_event_num2str ? _ib_event_num2str[event] : "UNDEF")
+}
+
+/**
+ * sfunction ib_event_str2num - The numeric value of an ib_event_type.
+ *
+ * Given an ib_event_type (string), return the corresponding event number.
+ */
+function ib_event_str2num:long (event:string)
+{
+	return (event in _ib_event_str2num ? _ib_event_str2num[event] : -1)
+}
+
+/**
+ * sfunction ib_wc_status_num2str - String representation of an ib_wc_status.
+ *
+ * Given an ib_wc_status, return a string representation.
+ */
+function ib_wc_status_num2str:string (status:long)
+{
+	return (status in _ib_wc_status_num2str ?
+		_ib_wc_status_num2str[status] : "UNDEF")
+}
+
+/**
+ * sfunction ib_wc_status_str2num - The numeric value of an ib_wc_status.
+ * 
+ * Given an ib_wc_status (string), return the corresponding number.
+ */
+function ib_wc_status_str2num:long (status:string)
+{
+	return (status in _ib_wc_status_str2num ?
+		_ib_wc_status_str2num[status] : -1)
+}
+
+/**
+ * sfunction ib_wr_opcode_num2str - String representation of an ib_wr_opcode.
+ *
+ * Given an ib_wr_opcode, return a string representation.
+ */
+function ib_wr_opcode_num2str:string (opcode:long)
+{
+        return (opcode in _ib_wr_opcode_num2str ?
+		_ib_wr_opcode_num2str[opcode] : "UNDEF")
+}
+
+/**
+ * sfunction ib_wr_opcode_str2num - The numeric value of an ib_wr_opcode.
+ *
+ * Given an ib_wr_opcode_str2num (string), return the corresponding number.
+ */
+function ib_wr_opcode_str2num:long (opcode:string)
+{
+        return (opcode in _ib_wr_opcode_str2num ?
+		_ib_wr_opcode_str2num[opcode] : -1)
+}
+
+/* 	
+ * Initialize the state and event tables.
+ */
+
+probe begin(-1001)
+{
+	init_ib_wr_send_flags()
+	init_ib_event_types()
+	init_ib_wc_status()
+	init_ib_wr_opcodes()
+}
+
+function init_ib_wr_send_flags()
+{
+	/* From rdma/ib_verbs.h */
+	_ib_wr_bit2str[0] = "IB_SEND_FENCE"
+	_ib_wr_bit2str[1] = "IB_SEND_SIGNALED"
+	_ib_wr_bit2str[2] = "IB_SEND_SOLICITED"
+	_ib_wr_bit2str[3] = "IB_SEND_INLINE"
+	_ib_wr_bit2str[4] = "IB_SEND_IP_CSUM"
+	_ib_wr_bit2str[5] = "IB_SEND_UDP_LSO"
+}
+
+function init_ib_event_types()
+{
+	/* From rdma/ib_verbs.h
+	 * Mapping of ib_event_type enumeration
+	 */
+	_ib_event_num2str[0] = "IB_EVENT_CQ_ERR"
+	_ib_event_num2str[1] = "IB_EVENT_QP_FATAL"
+	_ib_event_num2str[2] = "IB_EVENT_QP_REQ_ERR"
+	_ib_event_num2str[3] = "IB_EVENT_QP_ACCESS_ERR"
+	_ib_event_num2str[4] = "IB_EVENT_COMM_EST"
+	_ib_event_num2str[5] = "IB_EVENT_SQ_DRAINED"
+	_ib_event_num2str[6] = "IB_EVENT_PATH_MIG"
+	_ib_event_num2str[7] = "IB_EVENT_PATH_MIG_ERR"
+	_ib_event_num2str[8] = "IB_EVENT_DEVICE_FATAL"
+	_ib_event_num2str[9] = "IB_EVENT_PORT_ACTIVE"
+	_ib_event_num2str[10] = "IB_EVENT_PORT_ERR"
+	_ib_event_num2str[11] = "IB_EVENT_LID_CHANGE"
+	_ib_event_num2str[12] = "IB_EVENT_PKEY_CHANGE"
+	_ib_event_num2str[13] = "IB_EVENT_SM_CHANGE"
+	_ib_event_num2str[14] = "IB_EVENT_SRQ_ERR"
+	_ib_event_num2str[15] = "IB_EVENT_SRQ_LIMIT_REACHED"
+	_ib_event_num2str[16] = "IB_EVENT_QP_LAST_WQE_REACHED"
+	_ib_event_num2str[17] = "IB_EVENT_CLIENT_REREGISTER"
+
+	_ib_event_str2num["IB_EVENT_CQ_ERR"]			= 0
+	_ib_event_str2num["IB_EVENT_QP_FATAL"]			= 1
+	_ib_event_str2num["IB_EVENT_QP_REQ_ERR"]		= 2
+	_ib_event_str2num["IB_EVENT_QP_ACCESS_ERR"]		= 3
+	_ib_event_str2num["IB_EVENT_COMM_EST"]			= 4
+	_ib_event_str2num["IB_EVENT_SQ_DRAINED"]		= 5
+	_ib_event_str2num["IB_EVENT_PATH_MIG"]			= 6
+	_ib_event_str2num["IB_EVENT_PATH_MIG_ERR"]		= 7
+	_ib_event_str2num["IB_EVENT_DEVICE_FATAL"]		= 8
+	_ib_event_str2num["IB_EVENT_PORT_ACTIVE"]		= 9
+	_ib_event_str2num["IB_EVENT_PORT_ERR"]			= 10
+	_ib_event_str2num["IB_EVENT_LID_CHANGE"]		= 11
+	_ib_event_str2num["IB_EVENT_PKEY_CHANGE"]		= 12
+	_ib_event_str2num["IB_EVENT_SM_CHANGE"]			= 13
+	_ib_event_str2num["IB_EVENT_SRQ_ERR"]			= 14
+	_ib_event_str2num["IB_EVENT_SRQ_LIMIT_REACHED"]		= 15
+	_ib_event_str2num["IB_EVENT_QP_LAST_WQE_REACHED"]	= 16
+	_ib_event_str2num["IB_EVENT_CLIENT_REREGISTER"]		= 17
+}
+
+function init_ib_wc_status()
+{
+	/* from ib_verbs.h */
+	_ib_wc_status_num2str[0]  = "IB_WC_SUCCESS"
+	_ib_wc_status_num2str[1]  = "IB_WC_LOC_LEN_ERR"
+	_ib_wc_status_num2str[2]  = "IB_WC_LOC_QP_OP_ERR"
+	_ib_wc_status_num2str[3]  = "IB_WC_LOC_EEC_OP_ERR"
+	_ib_wc_status_num2str[4]  = "IB_WC_LOC_PROT_ERR"
+	_ib_wc_status_num2str[5]  = "IB_WC_WR_FLUSH_ERR"
+	_ib_wc_status_num2str[6]  = "IB_WC_MW_BIND_ERR"
+	_ib_wc_status_num2str[7]  = "IB_WC_BAD_RESP_ERR"
+	_ib_wc_status_num2str[8]  = "IB_WC_LOC_ACCESS_ERR"
+	_ib_wc_status_num2str[9]  = "IB_WC_REM_INV_REQ_ERR"
+	_ib_wc_status_num2str[10] = "IB_WC_REM_ACCESS_ERR"
+	_ib_wc_status_num2str[11] = "IB_WC_REM_OP_ERR"
+	_ib_wc_status_num2str[12] = "IB_WC_RETRY_EXC_ERR"
+	_ib_wc_status_num2str[13] = "IB_WC_RNR_RETRY_EXC_ERR"
+	_ib_wc_status_num2str[14] = "IB_WC_LOC_RDD_VIOL_ERR"
+	_ib_wc_status_num2str[15] = "IB_WC_REM_INV_RD_REQ_ERR"
+	_ib_wc_status_num2str[16] = "IB_WC_REM_ABORT_ERR"
+	_ib_wc_status_num2str[17] = "IB_WC_INV_EECN_ERR"
+	_ib_wc_status_num2str[18] = "IB_WC_INV_EEC_STATE_ERR"
+	_ib_wc_status_num2str[19] = "IB_WC_FATAL_ERR"
+	_ib_wc_status_num2str[20] = "IB_WC_RESP_TIMEOUT_ERR"
+	_ib_wc_status_num2str[21] = "IB_WC_GENERAL_ERR"
+
+	_ib_wc_status_str2num["IB_WC_SUCCESS"]			=0
+	_ib_wc_status_str2num["IB_WC_LOC_LEN_ERR"]		=1
+	_ib_wc_status_str2num["IB_WC_LOC_QP_OP_ERR"]		=2
+	_ib_wc_status_str2num["IB_WC_LOC_EEC_OP_ERR"]		=3
+	_ib_wc_status_str2num["IB_WC_LOC_PROT_ERR"]		=4
+	_ib_wc_status_str2num["IB_WC_WR_FLUSH_ERR"]		=5
+	_ib_wc_status_str2num["IB_WC_MW_BIND_ERR"]		=6
+	_ib_wc_status_str2num["IB_WC_BAD_RESP_ERR"]		=7
+	_ib_wc_status_str2num["IB_WC_LOC_ACCESS_ERR"]		=8
+	_ib_wc_status_str2num["IB_WC_REM_INV_REQ_ERR"]		=9
+	_ib_wc_status_str2num["IB_WC_REM_ACCESS_ERR"]		=10
+	_ib_wc_status_str2num["IB_WC_REM_OP_ERR"]		=11
+	_ib_wc_status_str2num["IB_WC_RETRY_EXC_ERR"]		=12
+	_ib_wc_status_str2num["IB_WC_RNR_RETRY_EXC_ERR"]	=13
+	_ib_wc_status_str2num["IB_WC_LOC_RDD_VIOL_ERR"]		=14
+	_ib_wc_status_str2num["IB_WC_REM_INV_RD_REQ_ERR"]	=15
+	_ib_wc_status_str2num["IB_WC_REM_ABORT_ERR"]		=16
+	_ib_wc_status_str2num["IB_WC_INV_EECN_ERR"]		=17
+	_ib_wc_status_str2num["IB_WC_INV_EEC_STATE_ERR"]	=18
+	_ib_wc_status_str2num["IB_WC_FATAL_ERR"]		=19
+	_ib_wc_status_str2num["IB_WC_RESP_TIMEOUT_ERR"]		=20
+	_ib_wc_status_str2num["IB_WC_GENERAL_ERR"]		=21
+}
+
+function init_ib_wr_opcodes()
+{
+	/* from rdma/ib_verbs.h */
+	_ib_wr_opcode_num2str[0] = "IB_WR_RDMA_WRITE"
+	_ib_wr_opcode_num2str[1] = "IB_WR_RDMA_WRITE_WITH_IMM"
+	_ib_wr_opcode_num2str[2] = "IB_WR_SEND"
+	_ib_wr_opcode_num2str[3] = "IB_WR_SEND_WITH_IMM"
+	_ib_wr_opcode_num2str[4] = "IB_WR_RDMA_READ"
+	_ib_wr_opcode_num2str[5] = "IB_WR_ATOMIC_CMP_AND_SWP"
+	_ib_wr_opcode_num2str[6] = "IB_WR_ATOMIC_FETCH_AND_ADD"
+	_ib_wr_opcode_num2str[7] = "IB_WR_LSO"
+
+	_ib_wr_opcode_str2num["IB_WR_RDMA_WRITE"]		=0
+	_ib_wr_opcode_str2num["IB_WR_RDMA_WRITE_WITH_IMM"]	=1
+	_ib_wr_opcode_str2num["IB_WR_SEND"]			=2
+	_ib_wr_opcode_str2num["IB_WR_SEND_WITH_IMM"]		=3
+	_ib_wr_opcode_str2num["IB_WR_RDMA_READ"]		=4
+	_ib_wr_opcode_str2num["IB_WR_ATOMIC_CMP_AND_SWP"]	=5
+	_ib_wr_opcode_str2num["IB_WR_ATOMIC_FETCH_AND_ADD"]	=6
+	_ib_wr_opcode_str2num["IB_WR_LSO"]			=7
+}
+
+/*
+ *      Macros for safely accessing members of a structure.
+ *
+ * Example usage: using get_member() macro to read ib_cm_id->state.
+ * --------------------------------------------------------
+ * function ib_cm_id__state:long (A:long)
+ * %{ THIS->__retvalue  =  get_member(ib_cm_id, state, __be32) %}
+ *
+ * Example expansion of macro get_member():
+ * --------------------------------------------------------
+ * void ib_cm_id__state (A)
+ * {
+ * THIS->__retvalue = (long)(__be32)kread(&((struct ib_cm_id *)THIS->A)->state);
+ * goto end;
+ * deref_fault: 
+ *          printk("deref fault in ofed.stp, base pointer = %p\n",
+ *                         (struct ib_cm_id *)THIS->A);
+ *         THIS->__retvalue = -1;
+ * end:;
+ * }
+ */
+%{
+
+#define get_memberfault(STRUCT, MEMBER)				\
+	goto end;						\
+deref_fault:							\
+	printk("deref fault in ofed.stp, base pointer = %p\n",	\
+	(struct STRUCT *)THIS->A); 				\
+	THIS->__retvalue = -1;					\
+end: ;
+
+#define __get_member(STRUCT,MEMBER) kread(&((struct STRUCT *)THIS->A)->MEMBER); 
+#define	_get_member(STRUCT,MEMBER)	__get_member(STRUCT,MEMBER); get_memberfault(STRUCT,MEMBER);
+#define get_member(STRUCT,MEMBER,TYPE) (long)(TYPE)_get_member(STRUCT, MEMBER);
+%}
+
+/*      These functions return the value of some members
+ *      of a struct ib_qp.
+ *
+ */
+function ib_qp__device:long (A:long)
+ %{ THIS->__retvalue  =  get_member(ib_qp, device, void *)%}
+
+function ib_qp__pd:long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_qp, pd, void *)%}
+
+function ib_qp__send_cq:long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_qp, send_cq, void *)%}
+
+function ib_qp__recv_cq :long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_qp, recv_cq, void *)%}
+
+function ib_qp__srq:long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_qp, srq, void *)%}
+
+function ib_qp__uobject:long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_qp, uobject, void *)%}
+
+function ib_qp__qp_context:long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_qp, qp_context, void *)%}
+
+function ib_qp__qp_num:long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_qp, qp_num, u32)%}
+
+function ib_qp__qp_type:long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_qp, qp_type, int)%}
+
+/*
+ *      These functions return the value of some members
+ *      of a struct ib_send_wr. kread() is used for all de-references.
+ */
+function ib_send_wr__wr_id:long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_send_wr, wr_id, u64) %}
+
+function ib_send_wr__sg_list:long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_send_wr, sg_list, void *) %}
+
+function ib_send_wr__num_sge:long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_send_wr, num_sge, int) %}
+
+function ib_send_wr__opcode:long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_send_wr, opcode, int) %}
+
+function ib_send_wr__imm_data:long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_send_wr, imm_data, __be32) %}
+
+function ib_send_wr__send_flags:long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_send_wr, send_flags, __be32) %}
+
+/*
+ *	These functions return the value of some members
+ *	of a struct ib_send_wr. kread() is used for all de-references.
+ */
+function ib_recv_wr__wr_id:long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_recv_wr, wr_id, u64) %}
+
+function ib_recv_wr__ib_sge:long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_recv_wr, ib_sge, void *) %}
+
+function ib_recv_wr__num_sge:long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_recv_wr, num_sge, int) %}
+
+/*
+ *	These functions return the value of some members
+ *	of a struct ib_sge. kread() is used for all de-references.
+ */
+function ib_sge__addr:long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_sge, addr, u64) %}
+
+function ib_sge__length:long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_sge, length, u32) %}
+
+function ib_sge__lkey:long (A:long)
+%{ THIS->__retvalue  =  get_member(ib_sge, lkey, u32) %}
+
+/*
+ * Helper functions
+ */
+
+/**
+ * sfunction hexdump - Print a buffer in hex.
+ *
+ * Given the address and the length of a buffer, print its contents in hex. 
+ */
+function hexdump (buff:long, len:long) {
+	printf("%s\n",_hexdump(buff, len))
+}
+
+function _hexdump:string (buff:long, _len:long)
+%{ /* pure */
+	char c;
+	uint i=0; // init to stop gcc warning
+	char *p = (char *)(long)THIS->buff;
+	int len = (int)(long)THIS->_len;
+	char *t = (char *) &(THIS->__retvalue);
+
+	for (i = 0; i < len; i++) {
+		//c = (u8)kread(p++); XXX
+		c = (u8)*p++;
+		t += sprintf(t, "%02x ", c);
+		if ((i & 0x0f) == 0x0f)
+			t += sprintf(t, "\n");
+		if (i >= MAXSTRINGLEN)
+			break;
+	}
+deref_fault: ;
+%}
+
+function next_sge:long (sg_list:long)
+%{
+	struct ib_sge *sge = (struct ib_sge *)THIS->sg_list;
+	THIS->__retvalue = (long) ++sge;
+%}
+
+/**
+ * sfunction ib_sg_dump - Print the contents of ib_sge in hex.
+ *
+ * Given a pointer to a list of ib_sge (ib scatter gather elements) 
+ * and the number of elements print each element in hex.
+ */
+function ib_sg_dump (sg_list:long, num_sge:long)
+{
+	if ( num_sge ) {
+		printf("SG buffer ")
+		for ( i=0; i<num_sge; ++i ) {
+			len = ib_sge__length(sg_list)
+			addr = ib_sge__addr(sg_list)
+			printf("#%d\n",i);
+			hexdump(addr,len)
+			sg_list=next_sge(sg_list)
+		}
+	} else 
+		printf("SG buffer is empty\n")
+}
+
+/*
+ *      Driver post probe definitions.
+ *
+ *	ib_post_send() and ib_post recv() are inlines.  Ideally
+ *	we want to probe all instances of these inlines by the 
+ *	modules we are probing. Due to a current limitations of the 
+ *	debug data the location of the arguments of inlines are not 
+ *	available.  To work-around this issue we set probes
+ *	on the corresponding functions for each of the hca drivers.
+ *	This approach has two disadvantages, 1) Performance will
+ *	be impacted as all call to these functions are probed.
+ *	2) The users of these probes must filter out the probe
+ *	hits of no interest, use the value of $qp to filter on. 
+ */
+
+/**
+ * probe ib_post_send - Posting a list of work requests to a send queue.
+ * @ib_post_send_qp: The QP (struct ib_qp *) the work request is to be posted.
+ * @ib_post_send_wr: A list of work requests (struct ib_recv_wr *).
+ *
+ * Context:
+ *  The requester, kernel only
+ *
+ * Fires when posting a list of WQEs to a send queue.
+ */
+probe ib_post_send = 	ib_ehca_post_send ?,
+			ib_mlx4_post_send ?,
+			ib_mthca_post_send ?,
+			ib_nes_post_send ? {}
+
+probe ib_ehca_post_send = module("*ib*").function("ehca_post_send") ?
+{
+		ib_post_send_qp = $qp
+		ib_post_send_wr = $send_wr
+}
+
+probe ib_mlx4_post_send = module("*ib*").function("mlx4_ib_post_send") ?
+{
+		ib_post_send_qp = $ibqp
+		ib_post_send_wr = $wr
+}
+
+probe ib_mthca_post_send = module("*ib*").function("mthca_tavor_post_send") ?,
+                        module("*ib*").function("mthca_arbel_post_send") ?
+{
+		ib_post_send_qp = $ibqp
+		ib_post_send_wr = $wr
+}
+
+probe ib_nes_post_send = module("*ib*").function("nes_post_send") ?
+{
+		ib_post_send_qp = $ibqp
+		ib_post_send_wr = $wr
+}
+
+/**
+ * probe ib_post_recv - Posting work requests to a to a receive queue.
+ * @ib_post_recv_qp: The QP (struct ib_qp *) the work request is to be posted.
+ * @ib_post_recv_wr: A list of work requests (struct ib_recv_wr *).
+ *
+ * Context:
+ *  The requester, kernel only
+ *
+ * Fires when posting a list of WQEs to a receive queue.
+ */
+probe ib_post_recv =	ib_ehca_post_recv ?,
+			ib_mlx4_post_recv ?,
+			ib_mthca_post_recv ?,
+			ib_nes_post_recv ? {}
+	
+
+probe ib_ehca_post_recv = module("*ib*").function("ehca_post_recv") ?
+{
+		ib_post_recv_qp = $qp
+		ib_post_recv_wr = $recv_wr
+}
+		
+probe ib_mlx4_post_recv = module("*ib*").function("mlx4_ib_post_recv") ?
+{
+		ib_post_recv_qp = $ibqp
+		ib_post_recv_wr = $wr
+}
+		
+probe ib_mthca_post_recv = module("*ib*").function("mthca_arbel_post_receive") ?,
+			module("*ib*").function("mthca_tavor_post_receive") ?
+{
+		ib_post_recv_qp = $ibqp
+		ib_post_recv_wr = $wr
+}
+		
+probe ib_nes_post_recv = module("*ib*").function("nes_post_recv") ?
+{
+		ib_post_recv_qp = $ibqp
+		ib_post_recv_wr = $wr
+}


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 1/6] New Infiniband (OFED) tapset
  2009-02-06  0:29 [PATCH 1/6] New Infiniband (OFED) tapset David J. Wilder
@ 2009-02-06  4:34 ` Josh Stone
  2009-02-06 15:56   ` David J. Wilder
  0 siblings, 1 reply; 3+ messages in thread
From: Josh Stone @ 2009-02-06  4:34 UTC (permalink / raw)
  To: David J. Wilder; +Cc: systemtap, xma, pradeep, prasad

Hi -

David J. Wilder wrote:
> This tapset is the first of several tapsets used to probe the ofed
> infiniband stack.  Ofed.stp includes probes to several of the ib
> "kernel" verbs.  Also included are core functions that users of the ofed
> tapescripts may find helpful

I know little about infiniband, so I'm mostly peeking through here for
safety issues and adherence to conventions...

> +%{
> +
> +#define get_memberfault(STRUCT, MEMBER)				\
> +	goto end;						\
> +deref_fault:							\
> +	printk("deref fault in ofed.stp, base pointer = %p\n",	\
> +	(struct STRUCT *)THIS->A); 				\
> +	THIS->__retvalue = -1;					\
> +end: ;

The kread will prepare an error string for the normal stap error
channels (c->last_error), so you don't need another message in printk.
You can also use CATCH_DEREF_FAULT() to provide the deref_fault label,
unless you really want the retvalue set to -1.

> +#define __get_member(STRUCT,MEMBER) kread(&((struct STRUCT *)THIS->A)->MEMBER); 

I think you need to cast with (struct STRUCT *)(long) for 32-bit platforms.

> +#define	_get_member(STRUCT,MEMBER)	__get_member(STRUCT,MEMBER); get_memberfault(STRUCT,MEMBER);
> +#define get_member(STRUCT,MEMBER,TYPE) (long)(TYPE)_get_member(STRUCT, MEMBER);
> +%}

The kread will cast the value to typeof(STRUCT->MEMBER), so do you
really need the manual TYPE cast?

> +function ib_qp__device:long (A:long)
> + %{ THIS->__retvalue  =  get_member(ib_qp, device, void *)%}

Side-effect-free embedded-c like this should have a comment "/* pure */"
so our optimizer knows that it may be possible to remove it.

I'm working on bz5634 to support native type casting, so hopefully
really soon it will be possible to write such functions as:

   function ip_qp__device:long (A:long)
   { return @cast(A, "struct ib_qp")->device }

Not yet though...

> +/**
> + * sfunction hexdump - Print a buffer in hex.
> + *
> + * Given the address and the length of a buffer, print its contents in hex. 
> + */
> +function hexdump (buff:long, len:long) {
> +	printf("%s\n",_hexdump(buff, len))
> +}

I think a stap-native printf("%.*M\n", 2*len, buff) will work like this...


> +probe ib_post_send = 	ib_ehca_post_send ?,
> +			ib_mlx4_post_send ?,
> +			ib_mthca_post_send ?,
> +			ib_nes_post_send ? {}

Our convention is to use dots for separating probe namespaces, so this
should be something like "ib.post_send".  Then for the internal
variants, prefix it with an underscore like "_ib.ehca_post_send".

> +		ib_post_send_qp = $qp
> +		ib_post_send_wr = $send_wr

Since you're already in an ib-specific probe, I think these variables
should be named more simply, like "qp" and "wr".


Josh

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 1/6] New Infiniband (OFED) tapset
  2009-02-06  4:34 ` Josh Stone
@ 2009-02-06 15:56   ` David J. Wilder
  0 siblings, 0 replies; 3+ messages in thread
From: David J. Wilder @ 2009-02-06 15:56 UTC (permalink / raw)
  To: Josh Stone; +Cc: systemtap, xma, pradeep, prasad

Thanks for the review Josh.
> 
> I'm working on bz5634 to support native type casting, so hopefully
> really soon it will be possible to write such functions as:
> 
>    function ip_qp__device:long (A:long)
>    { return @cast(A, "struct ib_qp")->device }
> 
> Not yet though...

Good to know, I will look forward to getting rid of my get_member mess.

> 
> > +/**
> > + * sfunction hexdump - Print a buffer in hex.
> > + *
> > + * Given the address and the length of a buffer, print its contents in hex. 
> > + */
> > +function hexdump (buff:long, len:long) {
> > +	printf("%s\n",_hexdump(buff, len))
> > +}
> 
> I think a stap-native printf("%.*M\n", 2*len, buff) will work like this.

Thanks, I will give it a try..

> ..
> 
> 
> > +probe ib_post_send = 	ib_ehca_post_send ?,
> > +			ib_mlx4_post_send ?,
> > +			ib_mthca_post_send ?,
> > +			ib_nes_post_send ? {}
> 
> Our convention is to use dots for separating probe namespaces, so this
> should be something like "ib.post_send".  Then for the internal
> variants, prefix it with an underscore like "_ib.ehca_post_send".
I will clean this up. 

What I wanted to do was place a probe on the inline ib_post_send()  (ib_
is just part of the name). But probing inlines has problems right now. 

My intention was for each user of the inline to sets a separate probe on
it's instance of ib_post_send() giving it a unique namespace.  Something
like: 
probe ib_ipoib.ib_post_send = module(ib_ipoib).inline(ib_post_send)
What you are seeing here is the results of working around the problem.

> > +		ib_post_send_qp = $qp
> > +		ib_post_send_wr = $send_wr
> 
> Since you're already in an ib-specific probe, I think these variables
> should be named more simply, like "qp" and "wr".
> 
> 
> Josh
> 

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2009-02-06  4:34 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-02-06  0:29 [PATCH 1/6] New Infiniband (OFED) tapset David J. Wilder
2009-02-06  4:34 ` Josh Stone
2009-02-06 15:56   ` David J. Wilder

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).