From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 24021 invoked by alias); 6 Feb 2009 00:29:46 -0000 Received: (qmail 23024 invoked by uid 22791); 6 Feb 2009 00:29:34 -0000 X-SWARE-Spam-Status: No, hits=0.3 required=5.0 tests=AWL,BAYES_00,J_CHICKENPOX_34,J_CHICKENPOX_46,J_CHICKENPOX_53,J_CHICKENPOX_54,J_CHICKENPOX_56,J_CHICKENPOX_64,J_CHICKENPOX_66,J_CHICKENPOX_71,SPF_PASS X-Spam-Check-By: sourceware.org Received: from e39.co.us.ibm.com (HELO e39.co.us.ibm.com) (32.97.110.160) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Fri, 06 Feb 2009 00:29:25 +0000 Received: from d03relay02.boulder.ibm.com (d03relay02.boulder.ibm.com [9.17.195.227]) by e39.co.us.ibm.com (8.13.1/8.13.1) with ESMTP id n160R7TR012986 for ; Thu, 5 Feb 2009 17:27:07 -0700 Received: from d03av04.boulder.ibm.com (d03av04.boulder.ibm.com [9.17.195.170]) by d03relay02.boulder.ibm.com (8.13.8/8.13.8/NCO v9.1) with ESMTP id n160TNO2209140 for ; Thu, 5 Feb 2009 17:29:23 -0700 Received: from d03av04.boulder.ibm.com (loopback [127.0.0.1]) by d03av04.boulder.ibm.com (8.12.11.20060308/8.13.3) with ESMTP id n160TNp1016610 for ; Thu, 5 Feb 2009 17:29:23 -0700 Received: from [9.65.29.205] (sig-9-65-29-205.mts.ibm.com [9.65.29.205]) by d03av04.boulder.ibm.com (8.12.11.20060308/8.12.11) with ESMTP id n160TLrt016587; Thu, 5 Feb 2009 17:29:22 -0700 Subject: [PATCH 3/6] New Infiniband (OFED) tapset From: "David J. Wilder" To: systemtap@sourceware.org, xma@us.ibm.com, pradeep@us.ibm.com, prasad@linux.vnet.ibm.com Content-Type: text/plain Date: Fri, 06 Feb 2009 01:13:00 -0000 Message-Id: <1233880161.23376.37.camel@wilder.ibm.com> Mime-Version: 1.0 Content-Transfer-Encoding: 7bit X-IsSubscribed: yes Mailing-List: contact systemtap-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Post: List-Help: , Sender: systemtap-owner@sourceware.org X-SW-Source: 2009-q1/txt/msg00380.txt.bz2 This tapset is used to probe the ip-over-ib module of the ofed infiniband stack (ib_ipoib.ko). Probe point are included to monitor callbacks from the ib verb and connection manager layers. Also included is a trace data formatting and display feature (dprint). Signed-off-by: David Wilder ------------------------------------------------------ tapset/ofed_ipoib.stp | 817 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 817 insertions(+), 0 deletions(-) diff --git a/tapset/ofed_ipoib.stp b/tapset/ofed_ipoib.stp new file mode 100644 index 0000000..8288d41 --- /dev/null +++ b/tapset/ofed_ipoib.stp @@ -0,0 +1,817 @@ +/* + * OFED ipoib tapset + * Copyright (C) 2009 IBM Corp. + * + * This file is part of systemtap, and is free software. You can + * redistribute it and/or modify it under the terms of the GNU General + * Public License (GPL); either version 2, or (at your option) any + * later version. + * + * Initial version: wilder@us.ibm.com 2008/09/09 + * Version 1.0 wilder@us.ibm.com 2009/01/19 + */ +%{ +#include +#include +%} + +/* back-porting */ +%( kernel_v < "2.6.19" %? +%{ + struct delayed_work { + struct work_struct work; + }; +%} +%) + +/* + * Several structure definitions are needed that are found in + * .../drivers/infiniband/ulp/ipoib/ipoib.h. If ipoib.h is located + * at /usr/src/linux/drivers/infiniband/ulp/ipoib/ipoib.h it will + * automatically be found. Otherwise the calling tapscript will need + * to specify the location in the tap script, for example: + * + * %{ + * #include "path-to-ipobi.h" + * %} + * + * Or, use a predefined search path by adding + * a matching #defines. For example: + * + * %{ + * #define SLES_OFED_1_3 + * %} + */ +%( kernel_vr == "*-ppc64" %? +%{ +#ifdef SLES_OFED_1_3 +#include "/usr/src/debug/ofa_kernel-1.3/obj/ppc64/drivers/infiniband/ulp/ipoib/ipoib.h" +#endif +%} +%) + +%( kernel_vr == "*-default" %? +%{ +#ifdef SLES_OFED_1_3 +#include "/usr/src/debug/ofa_kernel-1.3/obj/default/drivers/infiniband/ulp/ipoib/ipoib.h" +#endif +%} +%) + +/* + * If the kernel source is located in or linked to /usr/src/linux the + * following include will cover you without adding anything to your tapscript. + */ +%{ +#ifndef _IPOIB_H +#include "/usr/src/linux/drivers/infiniband/ulp/ipoib/ipoib.h" +#endif /* _IPOIB_H */ +%} + +/* + * These functions return the value of some members + * of the struct ipoib_dev_priv. kread() is used for all de-references. + */ +function ipoib_dev_priv__qp:long (A:long) +%{ THIS->__retvalue = get_member(ipoib_dev_priv, qp, void *) %} + +/* + * Data formatting functions + */ +function proper_name (note:string) { + if ( strlen(note) ) + return sprintf("%s.%s(%s):",probemod(), probefunc(), note) + else + return sprintf("%s.%s:",probemod(), probefunc()) +} + +function ipoib_netdev_priv:long (dev:long) +%{ /* pure */ + struct net_device *net_dev = (struct net_device *) THIS->dev; + THIS->__retvalue = (long) netdev_priv(net_dev); +%} + +function find_cm_skb:long (dev:long, wc:long) +%{ /* XXX pure */ + struct net_device *net_dev = (struct net_device *) THIS->dev; + struct ib_wc *wc = (struct ib_wc *) THIS->wc; + struct ipoib_cm_rx *p=wc->qp->qp_context; + struct ipoib_dev_priv *priv = netdev_priv(net_dev); + unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV); + struct sk_buff *skb = NULL; + struct ipoib_cm_rx_buf *rx_ring; + + if (wr_id < IPOIB_MAX_QUEUE_SIZE ) { + if ( !!priv->cm.srq ) // Using srq? + rx_ring = priv->cm.srq_ring; + else + rx_ring = p->rx_ring; + + skb = rx_ring[wr_id].skb; + } + THIS->__retvalue = (long) skb; +%} + +function find_ud_skb:long (dev:long, wc:long) +%{ /* XXX pure */ + struct net_device *net_dev = (struct net_device *) THIS->dev; + struct ib_wc *wc = (struct ib_wc *) THIS->wc; + struct ipoib_dev_priv *priv = netdev_priv(net_dev); + struct ipoib_sg_rx_buf *rx_ring = priv->rx_ring; + unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV; + struct sk_buff *skb = NULL; + + if (wr_id < IPOIB_MAX_QUEUE_SIZE ) + skb = rx_ring[wr_id].skb; + + THIS->__retvalue = (long) skb; +%} + +function clean_wr_id:long (wr_id:long) +%{ + THIS->__retvalue = (THIS->wr_id) & ~(IPOIB_OP_CM | IPOIB_OP_RECV); +%} + +function skbdump (skb:long) { + if ( (skb != -1) && (skb != 0) ) + printf("sk_buff=%p\n%s\n",skb,_skbdump(skb)) + else + printf("sk_buff=%p\n",skb) +} + +function _skbdump:string (skb:long) +%{ /* pure */ + char c; + uint i=0; // init to stop gcc warning + char *t = (char *) &(THIS->__retvalue); + struct sk_buff *skb = (struct sk_buff *)(long)THIS->skb; + //char *p = skb->data; + char *p = skb->head; + // XXX what is the correct length to use ? + uint len = skb->len - skb->data_len; + + t += sprintf(t, "data=%p len=%d\n",p,len); + len = MAXSTRINGLEN; // XXX Just use the max for now + + for (i = 0; i < len; i++) { + c = *p++; + t += sprintf(t, "%02x ", c); + if ((i & 0x0f) == 0x0f) + t += sprintf(t, "\n"); + if ( i >= MAXSTRINGLEN ) + break; + } +%} + +/* + * These functions return the value of some members + * of a struct ipoib_cm_rx. kread() is used for all de-references. + * + * struct ipoib_cm_rx { + * struct ib_cm_id *id; + * struct ib_qp *qp; + * struct ipoib_cm_rx_buf *rx_ring; + * struct list_head list; + * struct net_device *dev; + * unsigned long jiffies; + * enum ipoib_cm_state state; + * int index; + * int recv_count; + * }; + */ + +function ipoib_cm_rx__id:long (A:long) +%{ THIS->__retvalue = get_member(ipoib_cm_rx, id, void *)%} + +function ipoib_cm_rx__qp:long (A:long) +%{ THIS->__retvalue = get_member(ipoib_cm_rx, qp, void *)%} + +function ipoib_cm_rx__rx_ring:long (A:long) +%{ THIS->__retvalue = get_member(ipoib_cm_rx, rx_ring, void *)%} + +function ipoib_cm_rx__dev:long (A:long) +%{ THIS->__retvalue = get_member(ipoib_cm_rx, dev, void *)%} + +function ipoib_cm_rx__state:long (A:long) +%{ THIS->__retvalue = get_member(ipoib_cm_rx, state, void *)%} + +function ipoib_cm_rx__index:long (A:long) +%{ THIS->__retvalue = get_member(ipoib_cm_rx, index, int)%} + +function ipoib_cm_rx__recv_count:long (A:long) +%{ THIS->__retvalue = get_member(ipoib_cm_rx, recv_count, int)%} + +/* + * Trace filtering + * + * ib_ipoib_[UD|CM]_qp_list holds lists of all qp that are being traced. + * At this time the filter list is only used to filter probe hits + * of ib_post_send and ib_post_recv. See the comment in ib_post_send(). + */ +global ib_ipoib_UD_qp_list; +global ib_ipoib_CM_qp_list; + +/** + * sfunction ib_ipoib_trace_UD_qp - Add a QP to the list of traced QPs. + * + * Adds a new QP to the list of datagram mode (UD) QPs that are being traced. + */ +function ib_ipoib_trace_UD_qp (qp:long) +{ + ib_ipoib_UD_qp_list[qp]=1 +} + +/** + * sfunction ib_ipoib_rm_UD_qp - Remove a QP from the list of traced QPs. + * + * Removes a QP from the list of datagram mode (UD) QPs that are being traced. + */ +function ib_ipoib_rm_UD_qp (qp:long) +{ + delete ib_ipoib_UD_qp_list[qp] +} + +/** + * sfunction ib_ipoib_UD_qp_is_traced - Tests if a QP is being traced. + * + * Returns 1 if the QP is in the list of datagram mode (UD) QPs that are traced. + */ +function ib_ipoib_UD_qp_is_traced:long (qp:long) +{ + if ( qp in ib_ipoib_UD_qp_list ) + return 1 + return 0 +} + +/** + * sfunction ib_ipoib_trace_CM_qp - Add a QP to the list of traced QPs. + * + * Adds a new QP to the list of connection mode (CM) QPs that are being traced. + */ +function ib_ipoib_trace_CM_qp (qp:long) +{ + ib_ipoib_CM_qp_list[qp]=1 +} + +/** + * sfunction ib_ipoib_rm_CM_qp - Remove a QP from the list of traced QPs. + * + * Removes a QP from the list of connection mode (CM) QPs that are being traced. + */ +function ib_ipoib_rm_CM_qp (qp:long) +{ + delete ib_ipoib_CM_qp_list[qp] +} + +/** + * sfunction ib_ipoib_CM_qp_is_traced - Tests if a QP is being traced. + * + * Returns 1 if the QP is in the list of connection mode(CM) traced QPs. + */ +function ib_ipoib_CM_qp_is_traced:long (qp:long) +{ + if ( qp in ib_ipoib_CM_qp_list ) + return 1 + return 0 +} + +/* + * Functions to toggle features on and off + */ +global ib_use_dprint = 0 + +/** + * sfunction ib_dprint_on - Enable the dprint facility + * + * Turn on the function tracing print feature. + */ +function ib_dprint_on () +{ + ib_use_dprint = 1 +} + +/** + * sfunction ib_dprint_off - Disable the dprint facility + * + * Turn off the function tracing print feature. + */ +function ib_dprint_off () +{ + ib_use_dprint = 0 +} + +global ib_ipoib_post_send_dump = 0 + +function ib_ipoib_post_send_dump_on () +{ + ib_ipoib_post_send_dump = 1 +} + +function ib_ipoib_post_send_dump_off () +{ + ib_ipoib_post_send_dump = 0 +} + +# +# Trace data printing function for ib_ipoib. +# + +function ib_ipoib_dprint (name:string, + cm_id:long, + device_str:string, + qp_num:long, + wr_id:long, + event:string, + status:string, + skb:long, + dprint_format:string) +{ + if ( !ib_use_dprint ) + return 0; + + if ( dprint_format == "OUTPUT" ) { + printf("%-58s(%3s):%7d(%03d)--->%-11s%s\n", + name, device_str, qp_num, + clean_wr_id(wr_id)," ",status); + + } else if ( dprint_format == "INPUT" ) { + printf("%-58s(%3s):%16s %d(%03d)%-3s%s\n", + name, device_str,"<---", qp_num, + clean_wr_id(wr_id)," ",status); + + } else if ( dprint_format == "EVENT" ) { + printf("%-58s(%3s): %s", + name, device_str,event); + if ( (cm_id != -1) && (cm_id != 0) ) + printf("%s cm_id=%p state=%s"," ",cm_id,status); + printf("\n"); + } else + printf("%-58s\n",name); +} + +/* + * Event and state helper functions + */ + +global _ipoib_cm_state_num2str[3] +global _ipoib_cm_state_str2num[3] + +/** + * sfunction ipoib_cm_state_num2str - String representation of an ipoib_cm_state. + * + * Given an ipoib_cm_state, return a string representation. + */ +function ipoib_cm_state_num2str:string (status:long) +{ + return (status in _ipoib_cm_state_num2str ? _ipoib_cm_state_num2str[status] : "UNDEF") +} + +/** + * sfunction ipoib_cm_state_str2num - The numeric value of an ipoib_cm_state. + * + * Given an ipoib_cm_state (string), return the corresponding state number. + */ +function ipoib_cm_state_str2num:long (status:string) +{ + return (status in _ipoib_cm_state_str2num ? _ipoib_cm_state_str2num[status] : -1) +} + +/** + * sfunction ib_wr_flags2str - String representation of the ib_wr_flags + * + * Given a set of ib_wr_flags, return a string representation. + */ +function ib_wr_flags2str:string (flags:long) +{ + str = ""; + foreach (bit in _ib_wr_bit2str) + if ( flags & (1<qp); +} + +/** + * probe ib_ipoib.UD.ipoib_ib_handle_tx_wc - TX handler has been called + * + * Context: + * ib_ipoib UD mode + * + * ib_ipoib UD is about to process a CQE from its TX completion queue. + */ +# +# static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) +# +probe ib_ipoib.UD.ipoib_ib_handle_tx_wc = module("ib_ipoib").function("ipoib_ib_handle_tx_wc") +{ +%( kernel_vr == "2.6.27.11-1-ppc64" // XXX cant get at the arguments +%? + ib_ipoib_dprint( + proper_name("ud send completion"), //probe name:string + -1, // *ib_cm_id:long + "", // device name:string + -1, // qp number:long + -1, // wr_id:long + "", // event:string + "", // status or state:str + -1, // *sk_buff:long + "OUTPUT" // print format:string + ); +%: + ib_ipoib_dprint( + proper_name("ud send completion"), //probe name:string + -1, // *ib_cm_id:long +%( kernel_vr > "2.6.27" +%? + "", // XXX cant get at $dev +%: + kernel_string($dev->name), // device name:string +%) + $wc->qp->qp_num, // qp number:long + $wc->wr_id, // wr_id:long + "", // event:string + ib_wc_status_num2str($wc->status), // status or state:str + -1, // *sk_buff:long + "OUTPUT" // print format:string + ); +%) +} + +/** + * probe ib_ipoib.CM.ipoib_cm_handle_tx_wc - TX handler has been called + * + * Context: + * ib_ipoib CM mode + * + * ib_ipoib CM is about to process a CQE from its TX completion queue. + */ +# +# void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) +# +probe ib_ipoib.CM.ipoib_cm_handle_tx_wc = module("ib_ipoib").function("ipoib_cm_handle_tx_wc") +{ + ib_ipoib_dprint( + proper_name("cm send completion"), //probe name:string + -1, // *ib_cm_id:long + kernel_string($dev->name), // device name:string + $wc->qp->qp_num, // qp number:long + $wc->wr_id, // wr_id:long + "", // event:string + ib_wc_status_num2str($wc->status), // status or state:long + find_cm_skb($dev, $wc), // *sk_buff:long + "OUTPUT" // print format:string + ); +} + +/** + * probe ib_ipoib.UD.ipoib_ib_handle_rx_wc - RX handler has been called + * + * Context: + * ib_ipoib UD mode + * + * ib_ipoib UD is about to process a CQE from its RX completion queue. + */ +# +# static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) +# +probe ib_ipoib.UD.ipoib_ib_handle_rx_wc = module("ib_ipoib").function("ipoib_ib_handle_rx_wc") +{ + ib_ipoib_trace_UD_qp($wc->qp); + ib_ipoib_dprint( + proper_name("ud receive completion"), //probe name:string + -1, // *ib_cm_id:long + kernel_string($dev->name), // device name:string + $wc->qp->qp_num, // qp number:long + $wc->wr_id, // wr_id:long + "", // event:string + ib_wc_status_num2str($wc->status), // status or state:long + -1, // *sk_buff:long + "INPUT" // print format:string + ); +} + +/** + * probe ib_ipoib.CM.ipoib_cm_handle_rx_wc - RX handler has been called + * + * Context: + * ib_ipoib CM mode + * + * ib_ipoib CM is about to process a CQE from its RX completion queue. + */ +# +# void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) +# +probe ib_ipoib.CM.ipoib_cm_handle_rx_wc = module("ib_ipoib").function("ipoib_cm_handle_rx_wc") +{ + ib_ipoib_trace_CM_qp($wc->qp); + ib_ipoib_dprint( + proper_name("cm receive completion"), //probe name:string + -1, // *ib_cm_id:long + kernel_string($dev->name), // device name:string + $wc->qp->qp_num, // qp number:long + $wc->wr_id, // wr_id:long + "", // event:string + ib_wc_status_num2str($wc->status), // status or state + find_cm_skb($dev, $wc), // *sk_buff:long + "INPUT" // print format:string + ); +} + +/** + * probe ib_ipoib.CM.ipoib_cm_tx_handler - A connection event has occurred + * + * Context: + * ib_ipoib CM mode + * + * The connection manager is reporting of the occurrence of an event. + */ +# +# int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +# +probe ib_ipoib.CM.ipoib_cm_tx_handler =module("ib_ipoib").function("ipoib_cm_tx_handler") +{ + ib_ipoib_dprint( + proper_name("TX connection event"), //probe name:string + $cm_id, // *ib_cm_id:long + "", // device name:string + -1, // qp number:long + -1, // wr_id:long + ib_cm_event_type_num2str($event->event),// event:string + ib_cm_state_num2str($cm_id->state), // status or state + -1, // *sk_buff:long + "EVENT" // print format:string + ); +} + +/** + * probe ipoib_cm_rx_handler - A connection event has occurred + * + * Context: + * ib_ipoib CM mode + * + * The connection manager is reporting of the occurrence of an event. + */ +# +# int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,struct ib_cm_event *event) +# +probe ib_ipoib.CM.ipoib_cm_rx_handler =module("ib_ipoib").function("ipoib_cm_rx_handler") +{ + ib_ipoib_dprint( + proper_name("RX connection event"), //probe name:string + $cm_id, // *ib_cm_id:long + "", // device name:string + -1, // qp number:long + -1, // wr_id:long + ib_cm_event_type_num2str($event->event),// event:string + ib_cm_state_num2str($cm_id->state), // status or state + -1, // *sk_buff:long + "EVENT" // print format:string + ); +} + +# +# static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, +# struct ib_qp *qp, struct ib_cm_req_event_param *req, +# unsigned psn) +# +# XXX - stap can not find probe point ipoib_cm_send_rep. +# +probe XXXipoib.CM.send_rep = module("ib_ipoib").function("ipoib_cm_send_rep") +{ + ib_ipoib_dprint( + proper_name("send REP"), //probe name:string + $cm_id, // *ib_cm_id:long + kernel_string($dev->name), // device name:string + $qp->qp_num, // qp number:long + $psn->starting_psn, // wr_id:long + "", // event:string + "", // status or state + -1, // *sk_buff:long + "EVENT" // print format:string + ); +} + +/** + * probe ipoib_cm_send_req - Sending a REQ + * + * Context: + * ib_ipoib CM mode + * + * Send a connection request (REQ). + */ +# +# static int ipoib_cm_send_req(struct net_device *dev, +# struct ib_cm_id *id, struct ib_qp *qp, +# u32 qpn, +# struct ib_sa_path_rec *pathrec) +# +probe ib_ipoib.CM_send_req = module("ib_ipoib").function("ipoib_cm_send_req") +{ + ib_ipoib_trace_CM_qp($qp); + ib_ipoib_dprint( + proper_name("send REQ"), //probe name:string + $cm_id, // *ib_cm_id:long + kernel_string($dev->name), // device name:string + $qpn, // qp number:long + -1, // wr_id:long + "", // event:string + "", // status or state + -1, // *sk_buff:long + "EVENT" // print format:string + ); +} + +/** + * probe ib_ipoib.CM.ipoib_cm_rx_event_handler - An event occurred. + * + * Context: + * ib_ipoib CM mode + * + * An event has has occurred on a queue. + */ +# +# static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx) +# +probe ib_ipoib.CM.ipoib_cm_rx_event_handler = module("ib_ipoib").function("ipoib_cm_rx_event_handler") +{ + ib_ipoib_dprint( + proper_name("RX event"), //probe name:string + ipoib_cm_rx__id($ctx), // *ib_cm_id:long + "", // device name:string + ib_qp__qp_num(ipoib_cm_rx__qp($ctx)), // qp number:long + -1, // wr_id:long + ib_event_num2str($event->event), // event:string + ipoib_cm_state_num2str(ipoib_cm_rx__state($ctx)),//state + -1, // *sk_buff:long + "EVENT" // print format:string + ); +}