Commit b7aa0bf70c4afb9e38be25f5c0922498d0f8684c

Authored by Eric Dumazet
Committed by David S. Miller
1 parent 3927f2e8f9

[NET]: convert network timestamps to ktime_t

We currently use a special structure (struct skb_timeval) and plain
'struct timeval' to store packet timestamps in sk_buffs and struct
sock.

This has some drawbacks :
- Fixed resolution of micro second.
- Waste of space on 64bit platforms where sizeof(struct timeval)=16

I suggest using ktime_t that is a nice abstraction of high resolution
time services, currently capable of nanosecond resolution.

As sizeof(ktime_t) is 8 bytes, using ktime_t in 'struct sock' permits
a 8 byte shrink of this structure on 64bit architectures. Some other
structures also benefit from this size reduction (struct ipq in
ipv4/ip_fragment.c, struct frag_queue in ipv6/reassembly.c, ...)

Once this ktime infrastructure adopted, we can more easily provide
nanosecond resolution on top of it. (ioctl SIOCGSTAMPNS and/or
SO_TIMESTAMPNS/SCM_TIMESTAMPNS)

Note : this patch includes a bug correction in
compat_sock_get_timestamp() where a "err = 0;" was missing (so this
syscall returned -ENOENT instead of 0)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
CC: Stephen Hemminger <shemminger@linux-foundation.org>
CC: John find <linux.kernel@free.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 20 changed files with 85 additions and 98 deletions Side-by-side Diff

include/linux/skbuff.h
... ... @@ -27,6 +27,7 @@
27 27 #include <net/checksum.h>
28 28 #include <linux/rcupdate.h>
29 29 #include <linux/dmaengine.h>
  30 +#include <linux/hrtimer.h>
30 31  
31 32 #define HAVE_ALLOC_SKB /* For the drivers to know */
32 33 #define HAVE_ALIGNABLE_SKB /* Ditto 8) */
33 34  
... ... @@ -156,12 +157,7 @@
156 157 #define SKB_DATAREF_SHIFT 16
157 158 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1)
158 159  
159   -struct skb_timeval {
160   - u32 off_sec;
161   - u32 off_usec;
162   -};
163 160  
164   -
165 161 enum {
166 162 SKB_FCLONE_UNAVAILABLE,
167 163 SKB_FCLONE_ORIG,
... ... @@ -233,7 +229,7 @@
233 229 struct sk_buff *prev;
234 230  
235 231 struct sock *sk;
236   - struct skb_timeval tstamp;
  232 + ktime_t tstamp;
237 233 struct net_device *dev;
238 234 int iif;
239 235 /* 4 byte hole on 64 bit*/
240 236  
241 237  
242 238  
... ... @@ -1365,26 +1361,14 @@
1365 1361 */
1366 1362 static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval *stamp)
1367 1363 {
1368   - stamp->tv_sec = skb->tstamp.off_sec;
1369   - stamp->tv_usec = skb->tstamp.off_usec;
  1364 + *stamp = ktime_to_timeval(skb->tstamp);
1370 1365 }
1371 1366  
1372   -/**
1373   - * skb_set_timestamp - set timestamp of a skb
1374   - * @skb: skb to set stamp of
1375   - * @stamp: pointer to struct timeval to get stamp from
1376   - *
1377   - * Timestamps are stored in the skb as offsets to a base timestamp.
1378   - * This function converts a struct timeval to an offset and stores
1379   - * it in the skb.
1380   - */
1381   -static inline void skb_set_timestamp(struct sk_buff *skb, const struct timeval *stamp)
  1367 +static inline void __net_timestamp(struct sk_buff *skb)
1382 1368 {
1383   - skb->tstamp.off_sec = stamp->tv_sec;
1384   - skb->tstamp.off_usec = stamp->tv_usec;
  1369 + skb->tstamp = ktime_get_real();
1385 1370 }
1386 1371  
1387   -extern void __net_timestamp(struct sk_buff *skb);
1388 1372  
1389 1373 extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
1390 1374  
... ... @@ -244,7 +244,7 @@
244 244 struct sk_filter *sk_filter;
245 245 void *sk_protinfo;
246 246 struct timer_list sk_timer;
247   - struct timeval sk_stamp;
  247 + ktime_t sk_stamp;
248 248 struct socket *sk_socket;
249 249 void *sk_user_data;
250 250 struct page *sk_sndmsg_page;
251 251  
252 252  
253 253  
254 254  
... ... @@ -1307,19 +1307,19 @@
1307 1307 static __inline__ void
1308 1308 sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
1309 1309 {
1310   - struct timeval stamp;
  1310 + ktime_t kt = skb->tstamp;
1311 1311  
1312   - skb_get_timestamp(skb, &stamp);
1313 1312 if (sock_flag(sk, SOCK_RCVTSTAMP)) {
  1313 + struct timeval tv;
1314 1314 /* Race occurred between timestamp enabling and packet
1315 1315 receiving. Fill in the current time for now. */
1316   - if (stamp.tv_sec == 0)
1317   - do_gettimeofday(&stamp);
1318   - skb_set_timestamp(skb, &stamp);
1319   - put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval),
1320   - &stamp);
  1316 + if (kt.tv64 == 0)
  1317 + kt = ktime_get_real();
  1318 + skb->tstamp = kt;
  1319 + tv = ktime_to_timeval(kt);
  1320 + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(tv), &tv);
1321 1321 } else
1322   - sk->sk_stamp = stamp;
  1322 + sk->sk_stamp = kt;
1323 1323 }
1324 1324  
1325 1325 /**
... ... @@ -469,6 +469,7 @@
469 469  
470 470 return tv;
471 471 }
  472 +EXPORT_SYMBOL(ns_to_timeval);
472 473  
473 474 /*
474 475 * Convert jiffies to milliseconds and back.
net/bridge/netfilter/ebt_ulog.c
... ... @@ -130,6 +130,7 @@
130 130 unsigned int group = uloginfo->nlgroup;
131 131 ebt_ulog_buff_t *ub = &ulog_buffers[group];
132 132 spinlock_t *lock = &ub->lock;
  133 + ktime_t kt;
133 134  
134 135 if ((uloginfo->cprange == 0) ||
135 136 (uloginfo->cprange > skb->len + ETH_HLEN))
136 137  
... ... @@ -164,9 +165,10 @@
164 165  
165 166 /* Fill in the ulog data */
166 167 pm->version = EBT_ULOG_VERSION;
167   - do_gettimeofday(&pm->stamp);
  168 + kt = ktime_get_real();
  169 + pm->stamp = ktime_to_timeval(kt);
168 170 if (ub->qlen == 1)
169   - skb_set_timestamp(ub->skb, &pm->stamp);
  171 + ub->skb->tstamp = kt;
170 172 pm->data_len = copy_len;
171 173 pm->mark = skb->mark;
172 174 pm->hook = hooknr;
... ... @@ -545,15 +545,20 @@
545 545 struct compat_timeval __user *ctv =
546 546 (struct compat_timeval __user*) userstamp;
547 547 int err = -ENOENT;
  548 + struct timeval tv;
548 549  
549 550 if (!sock_flag(sk, SOCK_TIMESTAMP))
550 551 sock_enable_timestamp(sk);
551   - if (sk->sk_stamp.tv_sec == -1)
  552 + tv = ktime_to_timeval(sk->sk_stamp);
  553 + if (tv.tv_sec == -1)
552 554 return err;
553   - if (sk->sk_stamp.tv_sec == 0)
554   - do_gettimeofday(&sk->sk_stamp);
555   - if (put_user(sk->sk_stamp.tv_sec, &ctv->tv_sec) ||
556   - put_user(sk->sk_stamp.tv_usec, &ctv->tv_usec))
  555 + if (tv.tv_sec == 0) {
  556 + sk->sk_stamp = ktime_get_real();
  557 + tv = ktime_to_timeval(sk->sk_stamp);
  558 + }
  559 + err = 0;
  560 + if (put_user(tv.tv_sec, &ctv->tv_sec) ||
  561 + put_user(tv.tv_usec, &ctv->tv_usec))
557 562 err = -EFAULT;
558 563 return err;
559 564 }
... ... @@ -1031,23 +1031,12 @@
1031 1031 atomic_dec(&netstamp_needed);
1032 1032 }
1033 1033  
1034   -void __net_timestamp(struct sk_buff *skb)
1035   -{
1036   - struct timeval tv;
1037   -
1038   - do_gettimeofday(&tv);
1039   - skb_set_timestamp(skb, &tv);
1040   -}
1041   -EXPORT_SYMBOL(__net_timestamp);
1042   -
1043 1034 static inline void net_timestamp(struct sk_buff *skb)
1044 1035 {
1045 1036 if (atomic_read(&netstamp_needed))
1046 1037 __net_timestamp(skb);
1047   - else {
1048   - skb->tstamp.off_sec = 0;
1049   - skb->tstamp.off_usec = 0;
1050   - }
  1038 + else
  1039 + skb->tstamp.tv64 = 0;
1051 1040 }
1052 1041  
1053 1042 /*
... ... @@ -1577,7 +1566,7 @@
1577 1566 if (netpoll_rx(skb))
1578 1567 return NET_RX_DROP;
1579 1568  
1580   - if (!skb->tstamp.off_sec)
  1569 + if (!skb->tstamp.tv64)
1581 1570 net_timestamp(skb);
1582 1571  
1583 1572 /*
... ... @@ -1769,7 +1758,7 @@
1769 1758 if (skb->dev->poll && netpoll_rx(skb))
1770 1759 return NET_RX_DROP;
1771 1760  
1772   - if (!skb->tstamp.off_sec)
  1761 + if (!skb->tstamp.tv64)
1773 1762 net_timestamp(skb);
1774 1763  
1775 1764 if (!skb->iif)
... ... @@ -1512,8 +1512,7 @@
1512 1512 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1513 1513 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
1514 1514  
1515   - sk->sk_stamp.tv_sec = -1L;
1516   - sk->sk_stamp.tv_usec = -1L;
  1515 + sk->sk_stamp = ktime_set(-1L, -1L);
1517 1516  
1518 1517 atomic_set(&sk->sk_refcnt, 1);
1519 1518 }
1520 1519  
1521 1520  
... ... @@ -1554,14 +1553,17 @@
1554 1553  
1555 1554 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1556 1555 {
  1556 + struct timeval tv;
1557 1557 if (!sock_flag(sk, SOCK_TIMESTAMP))
1558 1558 sock_enable_timestamp(sk);
1559   - if (sk->sk_stamp.tv_sec == -1)
  1559 + tv = ktime_to_timeval(sk->sk_stamp);
  1560 + if (tv.tv_sec == -1)
1560 1561 return -ENOENT;
1561   - if (sk->sk_stamp.tv_sec == 0)
1562   - do_gettimeofday(&sk->sk_stamp);
1563   - return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
1564   - -EFAULT : 0;
  1562 + if (tv.tv_sec == 0) {
  1563 + sk->sk_stamp = ktime_get_real();
  1564 + tv = ktime_to_timeval(sk->sk_stamp);
  1565 + }
  1566 + return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
1565 1567 }
1566 1568 EXPORT_SYMBOL(sock_get_timestamp);
1567 1569  
net/econet/af_econet.c
... ... @@ -162,7 +162,7 @@
162 162 err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
163 163 if (err)
164 164 goto out_free;
165   - skb_get_timestamp(skb, &sk->sk_stamp);
  165 + sk->sk_stamp = skb->tstamp;
166 166  
167 167 if (msg->msg_name)
168 168 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
net/ipv4/ip_fragment.c
... ... @@ -92,7 +92,7 @@
92 92 spinlock_t lock;
93 93 atomic_t refcnt;
94 94 struct timer_list timer; /* when will this queue expire? */
95   - struct timeval stamp;
  95 + ktime_t stamp;
96 96 int iif;
97 97 unsigned int rid;
98 98 struct inet_peer *peer;
... ... @@ -592,7 +592,7 @@
592 592 if (skb->dev)
593 593 qp->iif = skb->dev->ifindex;
594 594 skb->dev = NULL;
595   - skb_get_timestamp(skb, &qp->stamp);
  595 + qp->stamp = skb->tstamp;
596 596 qp->meat += skb->len;
597 597 atomic_add(skb->truesize, &ip_frag_mem);
598 598 if (offset == 0)
... ... @@ -674,7 +674,7 @@
674 674  
675 675 head->next = NULL;
676 676 head->dev = dev;
677   - skb_set_timestamp(head, &qp->stamp);
  677 + head->tstamp = qp->stamp;
678 678  
679 679 iph = head->nh.iph;
680 680 iph->frag_off = 0;
... ... @@ -734,7 +734,7 @@
734 734 return NULL;
735 735 }
736 736  
737   -void ipfrag_init(void)
  737 +void __init ipfrag_init(void)
738 738 {
739 739 ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
740 740 (jiffies ^ (jiffies >> 6)));
net/ipv4/netfilter/ip_queue.c
... ... @@ -197,6 +197,7 @@
197 197 struct sk_buff *skb;
198 198 struct ipq_packet_msg *pmsg;
199 199 struct nlmsghdr *nlh;
  200 + struct timeval tv;
200 201  
201 202 read_lock_bh(&queue_lock);
202 203  
... ... @@ -241,8 +242,9 @@
241 242  
242 243 pmsg->packet_id = (unsigned long )entry;
243 244 pmsg->data_len = data_len;
244   - pmsg->timestamp_sec = entry->skb->tstamp.off_sec;
245   - pmsg->timestamp_usec = entry->skb->tstamp.off_usec;
  245 + tv = ktime_to_timeval(entry->skb->tstamp);
  246 + pmsg->timestamp_sec = tv.tv_sec;
  247 + pmsg->timestamp_usec = tv.tv_usec;
246 248 pmsg->mark = entry->skb->mark;
247 249 pmsg->hook = entry->info->hook;
248 250 pmsg->hw_protocol = entry->skb->protocol;
net/ipv4/netfilter/ipt_ULOG.c
... ... @@ -187,6 +187,7 @@
187 187 ulog_packet_msg_t *pm;
188 188 size_t size, copy_len;
189 189 struct nlmsghdr *nlh;
  190 + struct timeval tv;
190 191  
191 192 /* ffs == find first bit set, necessary because userspace
192 193 * is already shifting groupnumber, but we need unshifted.
193 194  
... ... @@ -232,13 +233,14 @@
232 233 pm = NLMSG_DATA(nlh);
233 234  
234 235 /* We might not have a timestamp, get one */
235   - if (skb->tstamp.off_sec == 0)
  236 + if (skb->tstamp.tv64 == 0)
236 237 __net_timestamp((struct sk_buff *)skb);
237 238  
238 239 /* copy hook, prefix, timestamp, payload, etc. */
239 240 pm->data_len = copy_len;
240   - put_unaligned(skb->tstamp.off_sec, &pm->timestamp_sec);
241   - put_unaligned(skb->tstamp.off_usec, &pm->timestamp_usec);
  241 + tv = ktime_to_timeval(skb->tstamp);
  242 + put_unaligned(tv.tv_sec, &pm->timestamp_sec);
  243 + put_unaligned(tv.tv_usec, &pm->timestamp_usec);
242 244 put_unaligned(skb->mark, &pm->mark);
243 245 pm->hook = hooknum;
244 246 if (prefix != NULL)
... ... @@ -255,7 +255,7 @@
255 255 ipv6_addr_copy(&ipv6h->saddr, &hao->addr);
256 256 ipv6_addr_copy(&hao->addr, &tmp_addr);
257 257  
258   - if (skb->tstamp.off_sec == 0)
  258 + if (skb->tstamp.tv64 == 0)
259 259 __net_timestamp(skb);
260 260  
261 261 return 1;
net/ipv6/netfilter/ip6_queue.c
... ... @@ -195,6 +195,7 @@
195 195 struct sk_buff *skb;
196 196 struct ipq_packet_msg *pmsg;
197 197 struct nlmsghdr *nlh;
  198 + struct timeval tv;
198 199  
199 200 read_lock_bh(&queue_lock);
200 201  
... ... @@ -239,8 +240,9 @@
239 240  
240 241 pmsg->packet_id = (unsigned long )entry;
241 242 pmsg->data_len = data_len;
242   - pmsg->timestamp_sec = entry->skb->tstamp.off_sec;
243   - pmsg->timestamp_usec = entry->skb->tstamp.off_usec;
  243 + tv = ktime_to_timeval(entry->skb->tstamp);
  244 + pmsg->timestamp_sec = tv.tv_sec;
  245 + pmsg->timestamp_usec = tv.tv_usec;
244 246 pmsg->mark = entry->skb->mark;
245 247 pmsg->hook = entry->info->hook;
246 248 pmsg->hw_protocol = entry->skb->protocol;
net/ipv6/netfilter/nf_conntrack_reasm.c
... ... @@ -82,7 +82,7 @@
82 82 struct sk_buff *fragments;
83 83 int len;
84 84 int meat;
85   - struct timeval stamp;
  85 + ktime_t stamp;
86 86 unsigned int csum;
87 87 __u8 last_in; /* has first/last segment arrived? */
88 88 #define COMPLETE 4
... ... @@ -542,7 +542,7 @@
542 542 fq->fragments = skb;
543 543  
544 544 skb->dev = NULL;
545   - skb_get_timestamp(skb, &fq->stamp);
  545 + fq->stamp = skb->tstamp;
546 546 fq->meat += skb->len;
547 547 atomic_add(skb->truesize, &nf_ct_frag6_mem);
548 548  
... ... @@ -648,7 +648,7 @@
648 648  
649 649 head->next = NULL;
650 650 head->dev = dev;
651   - skb_set_timestamp(head, &fq->stamp);
  651 + head->tstamp = fq->stamp;
652 652 head->nh.ipv6h->payload_len = htons(payload_len);
653 653  
654 654 /* Yes, and fold redundant checksum back. 8) */
net/ipv6/reassembly.c
... ... @@ -88,7 +88,7 @@
88 88 int len;
89 89 int meat;
90 90 int iif;
91   - struct timeval stamp;
  91 + ktime_t stamp;
92 92 unsigned int csum;
93 93 __u8 last_in; /* has first/last segment arrived? */
94 94 #define COMPLETE 4
... ... @@ -562,7 +562,7 @@
562 562 if (skb->dev)
563 563 fq->iif = skb->dev->ifindex;
564 564 skb->dev = NULL;
565   - skb_get_timestamp(skb, &fq->stamp);
  565 + fq->stamp = skb->tstamp;
566 566 fq->meat += skb->len;
567 567 atomic_add(skb->truesize, &ip6_frag_mem);
568 568  
... ... @@ -663,7 +663,7 @@
663 663  
664 664 head->next = NULL;
665 665 head->dev = dev;
666   - skb_set_timestamp(head, &fq->stamp);
  666 + head->tstamp = fq->stamp;
667 667 head->nh.ipv6h->payload_len = htons(payload_len);
668 668 IP6CB(head)->nhoff = nhoff;
669 669  
... ... @@ -1807,8 +1807,8 @@
1807 1807 copied);
1808 1808 if (rc)
1809 1809 goto out_free;
1810   - if (skb->tstamp.off_sec)
1811   - skb_get_timestamp(skb, &sk->sk_stamp);
  1810 + if (skb->tstamp.tv64)
  1811 + sk->sk_stamp = skb->tstamp;
1812 1812  
1813 1813 msg->msg_namelen = sizeof(*sipx);
1814 1814  
net/netfilter/nfnetlink_log.c
... ... @@ -509,11 +509,11 @@
509 509 NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw);
510 510 }
511 511  
512   - if (skb->tstamp.off_sec) {
  512 + if (skb->tstamp.tv64) {
513 513 struct nfulnl_msg_packet_timestamp ts;
514   -
515   - ts.sec = cpu_to_be64(skb->tstamp.off_sec);
516   - ts.usec = cpu_to_be64(skb->tstamp.off_usec);
  514 + struct timeval tv = ktime_to_timeval(skb->tstamp);
  515 + ts.sec = cpu_to_be64(tv.tv_sec);
  516 + ts.usec = cpu_to_be64(tv.tv_usec);
517 517  
518 518 NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts);
519 519 }
net/netfilter/nfnetlink_queue.c
... ... @@ -495,11 +495,11 @@
495 495 NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw);
496 496 }
497 497  
498   - if (entskb->tstamp.off_sec) {
  498 + if (entskb->tstamp.tv64) {
499 499 struct nfqnl_msg_packet_timestamp ts;
500   -
501   - ts.sec = cpu_to_be64(entskb->tstamp.off_sec);
502   - ts.usec = cpu_to_be64(entskb->tstamp.off_usec);
  500 + struct timeval tv = ktime_to_timeval(entskb->tstamp);
  501 + ts.sec = cpu_to_be64(tv.tv_sec);
  502 + ts.usec = cpu_to_be64(tv.tv_usec);
503 503  
504 504 NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts);
505 505 }
net/packet/af_packet.c
... ... @@ -582,6 +582,7 @@
582 582 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
583 583 unsigned short macoff, netoff;
584 584 struct sk_buff *copy_skb = NULL;
  585 + struct timeval tv;
585 586  
586 587 if (skb->pkt_type == PACKET_LOOPBACK)
587 588 goto drop;
588 589  
... ... @@ -656,12 +657,13 @@
656 657 h->tp_snaplen = snaplen;
657 658 h->tp_mac = macoff;
658 659 h->tp_net = netoff;
659   - if (skb->tstamp.off_sec == 0) {
  660 + if (skb->tstamp.tv64 == 0) {
660 661 __net_timestamp(skb);
661 662 sock_enable_timestamp(sk);
662 663 }
663   - h->tp_sec = skb->tstamp.off_sec;
664   - h->tp_usec = skb->tstamp.off_usec;
  664 + tv = ktime_to_timeval(skb->tstamp);
  665 + h->tp_sec = tv.tv_sec;
  666 + h->tp_usec = tv.tv_usec;
665 667  
666 668 sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
667 669 sll->sll_halen = 0;
net/sunrpc/svcsock.c
... ... @@ -798,16 +798,12 @@
798 798 dprintk("svc: recvfrom returned error %d\n", -err);
799 799 }
800 800 rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
801   - if (skb->tstamp.off_sec == 0) {
802   - struct timeval tv;
803   -
804   - tv.tv_sec = xtime.tv_sec;
805   - tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC;
806   - skb_set_timestamp(skb, &tv);
  801 + if (skb->tstamp.tv64 == 0) {
  802 + skb->tstamp = ktime_get_real();
807 803 /* Don't enable netstamp, sunrpc doesn't
808 804 need that much accuracy */
809 805 }
810   - skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp);
  806 + svsk->sk_sk->sk_stamp = skb->tstamp;
811 807 set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */
812 808  
813 809 /*