diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index 1d6d02d6ba52..a5c784c89312 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -122,7 +122,7 @@ SOF_TIMESTAMPING_RAW_HARDWARE: 1.3.3 Timestamp Options -The interface supports one option +The interface supports the options SOF_TIMESTAMPING_OPT_ID: @@ -130,19 +130,36 @@ SOF_TIMESTAMPING_OPT_ID: have multiple concurrent timestamping requests outstanding. Packets can be reordered in the transmit path, for instance in the packet scheduler. In that case timestamps will be queued onto the error - queue out of order from the original send() calls. This option - embeds a counter that is incremented at send() time, to order - timestamps within a flow. + queue out of order from the original send() calls. It is not always + possible to uniquely match timestamps to the original send() calls + based on timestamp order or payload inspection alone, then. + + This option associates each packet at send() with a unique + identifier and returns that along with the timestamp. The identifier + is derived from a per-socket u32 counter (that wraps). For datagram + sockets, the counter increments with each sent packet. For stream + sockets, it increments with every byte. + + The counter starts at zero. It is initialized the first time that + the socket option is enabled. It is reset each time the option is + enabled after having been disabled. Resetting the counter does not + change the identifiers of existing packets in the system. This option is implemented only for transmit timestamps. There, the timestamp is always looped along with a struct sock_extended_err. The option modifies field ee_data to pass an id that is unique among all possibly concurrently outstanding timestamp requests for - that socket. In practice, it is a monotonically increasing u32 - (that wraps). + that socket. - In datagram sockets, the counter increments on each send call. In - stream sockets, it increments with every byte. + +SOF_TIMESTAMPING_OPT_CMSG: + + Support recv() cmsg for all timestamped packets. Control messages + are already supported unconditionally on all packets with receive + timestamps and on IPv6 packets with transmit timestamp. This option + extends them to IPv4 packets with transmit timestamp. One use case + is to correlate packets with their egress device, by enabling socket + option IP_PKTINFO simultaneously. 1.4 Bytestream Timestamps diff --git a/Documentation/networking/timestamping/txtimestamp.c b/Documentation/networking/timestamping/txtimestamp.c index b32fc2a07734..876f71c5625a 100644 --- a/Documentation/networking/timestamping/txtimestamp.c +++ b/Documentation/networking/timestamping/txtimestamp.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +59,14 @@ #include #include +/* ugly hack to work around netinet/in.h and linux/ipv6.h conflicts */ +#ifndef in6_pktinfo +struct in6_pktinfo { + struct in6_addr ipi6_addr; + int ipi6_ifindex; +}; +#endif + /* command line parameters */ static int cfg_proto = SOCK_STREAM; static int cfg_ipproto = IPPROTO_TCP; @@ -65,6 +74,8 @@ static int cfg_num_pkts = 4; static int do_ipv4 = 1; static int do_ipv6 = 1; static int cfg_payload_len = 10; +static bool cfg_show_payload; +static bool cfg_do_pktinfo; static uint16_t dest_port = 9000; static struct sockaddr_in daddr; @@ -131,6 +142,30 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype, __print_timestamp(tsname, &tss->ts[0], tskey, payload_len); } +/* TODO: convert to check_and_print payload once API is stable */ +static void print_payload(char *data, int len) +{ + int i; + + if (len > 70) + len = 70; + + fprintf(stderr, "payload: "); + for (i = 0; i < len; i++) + fprintf(stderr, "%02hhx ", data[i]); + fprintf(stderr, "\n"); +} + +static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr) +{ + char sa[INET6_ADDRSTRLEN], da[INET6_ADDRSTRLEN]; + + fprintf(stderr, " pktinfo: ifindex=%u src=%s dst=%s\n", + ifindex, + saddr ? inet_ntop(family, saddr, sa, sizeof(sa)) : "unknown", + daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown"); +} + static void __poll(int fd) { struct pollfd pollfd; @@ -156,10 +191,9 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) cm->cmsg_type == SCM_TIMESTAMPING) { tss = (void *) CMSG_DATA(cm); } else if ((cm->cmsg_level == SOL_IP && - cm->cmsg_type == IP_RECVERR) || - (cm->cmsg_level == SOL_IPV6 && - cm->cmsg_type == IPV6_RECVERR)) { - + cm->cmsg_type == IP_RECVERR) || + (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type == IPV6_RECVERR)) { serr = (void *) CMSG_DATA(cm); if (serr->ee_errno != ENOMSG || serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) { @@ -168,6 +202,16 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) serr->ee_origin); serr = NULL; } + } else if (cm->cmsg_level == SOL_IP && + cm->cmsg_type == IP_PKTINFO) { + struct in_pktinfo *info = (void *) CMSG_DATA(cm); + print_pktinfo(AF_INET, info->ipi_ifindex, + &info->ipi_spec_dst, &info->ipi_addr); + } else if (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type == IPV6_PKTINFO) { + struct in6_pktinfo *info6 = (void *) CMSG_DATA(cm); + print_pktinfo(AF_INET6, info6->ipi6_ifindex, + NULL, &info6->ipi6_addr); } else fprintf(stderr, "unknown cmsg %d,%d\n", cm->cmsg_level, cm->cmsg_type); @@ -206,7 +250,11 @@ static int recv_errmsg(int fd) if (ret == -1 && errno != EAGAIN) error(1, errno, "recvmsg"); - __recv_errmsg_cmsg(&msg, ret); + if (ret > 0) { + __recv_errmsg_cmsg(&msg, ret); + if (cfg_show_payload) + print_payload(data, cfg_payload_len); + } free(data); return ret == -1; @@ -215,9 +263,9 @@ static int recv_errmsg(int fd) static void do_test(int family, unsigned int opt) { char *buf; - int fd, i, val, total_len; + int fd, i, val = 1, total_len; - if (family == IPPROTO_IPV6 && cfg_proto != SOCK_STREAM) { + if (family == AF_INET6 && cfg_proto != SOCK_STREAM) { /* due to lack of checksum generation code */ fprintf(stderr, "test: skipping datagram over IPv6\n"); return; @@ -239,7 +287,6 @@ static void do_test(int family, unsigned int opt) error(1, errno, "socket"); if (cfg_proto == SOCK_STREAM) { - val = 1; if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char*) &val, sizeof(val))) error(1, 0, "setsockopt no nagle"); @@ -253,7 +300,20 @@ static void do_test(int family, unsigned int opt) } } + if (cfg_do_pktinfo) { + if (family == AF_INET6) { + if (setsockopt(fd, SOL_IPV6, IPV6_RECVPKTINFO, + &val, sizeof(val))) + error(1, errno, "setsockopt pktinfo ipv6"); + } else { + if (setsockopt(fd, SOL_IP, IP_PKTINFO, + &val, sizeof(val))) + error(1, errno, "setsockopt pktinfo ipv4"); + } + } + opt |= SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_CMSG | SOF_TIMESTAMPING_OPT_ID; if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, (char *) &opt, sizeof(opt))) @@ -262,8 +322,6 @@ static void do_test(int family, unsigned int opt) for (i = 0; i < cfg_num_pkts; i++) { memset(&ts_prev, 0, sizeof(ts_prev)); memset(buf, 'a' + i, total_len); - buf[total_len - 2] = '\n'; - buf[total_len - 1] = '\0'; if (cfg_proto == SOCK_RAW) { struct udphdr *udph; @@ -324,11 +382,13 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -4: only IPv4\n" " -6: only IPv6\n" " -h: show this message\n" + " -I: request PKTINFO\n" " -l N: send N bytes at a time\n" " -r: use raw\n" " -R: use raw (IP_HDRINCL)\n" " -p N: connect to port N\n" - " -u: use udp\n", + " -u: use udp\n" + " -x: show payload (up to 70 bytes)\n", filepath); exit(1); } @@ -338,7 +398,7 @@ static void parse_opt(int argc, char **argv) int proto_count = 0; char c; - while ((c = getopt(argc, argv, "46hl:p:rRu")) != -1) { + while ((c = getopt(argc, argv, "46hIl:p:rRux")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -346,6 +406,9 @@ static void parse_opt(int argc, char **argv) case '6': do_ipv4 = 0; break; + case 'I': + cfg_do_pktinfo = true; + break; case 'r': proto_count++; cfg_proto = SOCK_RAW; @@ -367,6 +430,9 @@ static void parse_opt(int argc, char **argv) case 'p': dest_port = strtoul(optarg, NULL, 10); break; + case 'x': + cfg_show_payload = true; + break; case 'h': default: usage(argv[0]); diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index ff354021bb69..edbc888ceb51 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -23,8 +23,9 @@ enum { SOF_TIMESTAMPING_OPT_ID = (1<<7), SOF_TIMESTAMPING_TX_SCHED = (1<<8), SOF_TIMESTAMPING_TX_ACK = (1<<9), + SOF_TIMESTAMPING_OPT_CMSG = (1<<10), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_TX_ACK, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_CMSG, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b7826575d215..640f26c6a9fe 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -399,6 +399,22 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf kfree_skb(skb); } +static bool ipv4_pktinfo_prepare_errqueue(const struct sock *sk, + const struct sk_buff *skb, + int ee_origin) +{ + struct in_pktinfo *info = PKTINFO_SKB_CB(skb); + + if ((ee_origin != SO_EE_ORIGIN_TIMESTAMPING) || + (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) || + (!skb->dev)) + return false; + + info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; + info->ipi_ifindex = skb->dev->ifindex; + return true; +} + /* * Handle MSG_ERRQUEUE */ @@ -414,6 +430,8 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) int err; int copied; + WARN_ON_ONCE(sk->sk_family == AF_INET6); + err = -EAGAIN; skb = sock_dequeue_err_skb(sk); if (skb == NULL) @@ -444,7 +462,9 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); sin = &errhdr.offender; sin->sin_family = AF_UNSPEC; - if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP) { + + if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) { struct inet_sock *inet = inet_sk(sk); sin->sin_family = AF_INET; @@ -1049,7 +1069,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, } /** - * ipv4_pktinfo_prepare - transfert some info from rtable to skb + * ipv4_pktinfo_prepare - transfer some info from rtable to skb * @sk: socket * @skb: buffer * diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index cc1139687fd7..2464a00e36ab 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -325,6 +325,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) kfree_skb(skb); } +static void ip6_datagram_prepare_pktinfo_errqueue(struct sk_buff *skb) +{ + int ifindex = skb->dev ? skb->dev->ifindex : -1; + + if (skb->protocol == htons(ETH_P_IPV6)) + IP6CB(skb)->iif = ifindex; + else + PKTINFO_SKB_CB(skb)->ipi_ifindex = ifindex; +} + /* * Handle MSG_ERRQUEUE */ @@ -388,8 +398,12 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; sin->sin6_port = 0; - if (np->rxopt.all) + if (np->rxopt.all) { + if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP && + serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6) + ip6_datagram_prepare_pktinfo_errqueue(skb); ip6_datagram_recv_common_ctl(sk, msg, skb); + } if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) @@ -491,7 +505,10 @@ void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg, ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, &src_info.ipi6_addr); } - put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); + + if (src_info.ipi6_ifindex >= 0) + put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, + sizeof(src_info), &src_info); } }