Commit c19384b5b296905d4988c7c684ff540a0f9d65be

Authored by Pierre Peiffer
Committed by Linus Torvalds
1 parent ec92d08292

Make futex_wait() use an hrtimer for timeout

This patch modifies futex_wait() to use an hrtimer + schedule() in place of
schedule_timeout().

schedule_timeout() is tick based, therefore the timeout granularity is the
tick (1 ms, 4 ms or 10 ms depending on HZ).  By using a high resolution timer
for timeout wakeup, we can attain a much finer timeout granularity (in the
microsecond range).  This parallels what is already done for futex_lock_pi().

The timeout passed to the syscall is no longer converted to jiffies and is
therefore passed to do_futex() and futex_wait() as an absolute ktime_t
therefore keeping nanosecond resolution.

Also this removes the need to pass the nanoseconds timeout part to
futex_lock_pi() in val2.

In futex_wait(), if there is no timeout then a regular schedule() is
performed.  Otherwise, an hrtimer is fired before schedule() is called.

[akpm@linux-foundation.org: fix `make headers_check']
Signed-off-by: Sebastien Dugue <sebastien.dugue@bull.net>
Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 57 additions and 55 deletions Side-by-side Diff

include/linux/futex.h
... ... @@ -3,6 +3,8 @@
3 3  
4 4 #include <linux/sched.h>
5 5  
  6 +union ktime;
  7 +
6 8 /* Second argument to futex syscall */
7 9  
8 10  
... ... @@ -94,7 +96,7 @@
94 96 #define ROBUST_LIST_LIMIT 2048
95 97  
96 98 #ifdef __KERNEL__
97   -long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout,
  99 +long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout,
98 100 u32 __user *uaddr2, u32 val2, u32 val3);
99 101  
100 102 extern int
... ... @@ -1001,16 +1001,16 @@
1001 1001 }
1002 1002  
1003 1003 static long futex_wait_restart(struct restart_block *restart);
1004   -static int futex_wait_abstime(u32 __user *uaddr, u32 val,
1005   - int timed, unsigned long abs_time)
  1004 +static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
1006 1005 {
1007 1006 struct task_struct *curr = current;
1008 1007 DECLARE_WAITQUEUE(wait, curr);
1009 1008 struct futex_hash_bucket *hb;
1010 1009 struct futex_q q;
1011   - unsigned long time_left = 0;
1012 1010 u32 uval;
1013 1011 int ret;
  1012 + struct hrtimer_sleeper t;
  1013 + int rem = 0;
1014 1014  
1015 1015 q.pi_state = NULL;
1016 1016 retry:
1017 1017  
1018 1018  
1019 1019  
... ... @@ -1088,20 +1088,29 @@
1088 1088 * !plist_node_empty() is safe here without any lock.
1089 1089 * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
1090 1090 */
1091   - time_left = 0;
1092 1091 if (likely(!plist_node_empty(&q.list))) {
1093   - unsigned long rel_time;
  1092 + if (!abs_time)
  1093 + schedule();
  1094 + else {
  1095 + hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  1096 + hrtimer_init_sleeper(&t, current);
  1097 + t.timer.expires = *abs_time;
1094 1098  
1095   - if (timed) {
1096   - unsigned long now = jiffies;
1097   - if (time_after(now, abs_time))
1098   - rel_time = 0;
1099   - else
1100   - rel_time = abs_time - now;
1101   - } else
1102   - rel_time = MAX_SCHEDULE_TIMEOUT;
  1099 + hrtimer_start(&t.timer, t.timer.expires, HRTIMER_MODE_ABS);
1103 1100  
1104   - time_left = schedule_timeout(rel_time);
  1101 + /*
  1102 + * the timer could have already expired, in which
  1103 + * case current would be flagged for rescheduling.
  1104 + * Don't bother calling schedule.
  1105 + */
  1106 + if (likely(t.task))
  1107 + schedule();
  1108 +
  1109 + hrtimer_cancel(&t.timer);
  1110 +
  1111 + /* Flag if a timeout occured */
  1112 + rem = (t.task == NULL);
  1113 + }
1105 1114 }
1106 1115 __set_current_state(TASK_RUNNING);
1107 1116  
1108 1117  
... ... @@ -1113,14 +1122,14 @@
1113 1122 /* If we were woken (and unqueued), we succeeded, whatever. */
1114 1123 if (!unqueue_me(&q))
1115 1124 return 0;
1116   - if (time_left == 0)
  1125 + if (rem)
1117 1126 return -ETIMEDOUT;
1118 1127  
1119 1128 /*
1120 1129 * We expect signal_pending(current), but another thread may
1121 1130 * have handled it for us already.
1122 1131 */
1123   - if (time_left == MAX_SCHEDULE_TIMEOUT)
  1132 + if (!abs_time)
1124 1133 return -ERESTARTSYS;
1125 1134 else {
1126 1135 struct restart_block *restart;
... ... @@ -1128,8 +1137,7 @@
1128 1137 restart->fn = futex_wait_restart;
1129 1138 restart->arg0 = (unsigned long)uaddr;
1130 1139 restart->arg1 = (unsigned long)val;
1131   - restart->arg2 = (unsigned long)timed;
1132   - restart->arg3 = abs_time;
  1140 + restart->arg2 = (unsigned long)abs_time;
1133 1141 return -ERESTART_RESTARTBLOCK;
1134 1142 }
1135 1143  
1136 1144  
1137 1145  
... ... @@ -1141,21 +1149,15 @@
1141 1149 return ret;
1142 1150 }
1143 1151  
1144   -static int futex_wait(u32 __user *uaddr, u32 val, unsigned long rel_time)
1145   -{
1146   - int timed = (rel_time != MAX_SCHEDULE_TIMEOUT);
1147   - return futex_wait_abstime(uaddr, val, timed, jiffies+rel_time);
1148   -}
1149 1152  
1150 1153 static long futex_wait_restart(struct restart_block *restart)
1151 1154 {
1152 1155 u32 __user *uaddr = (u32 __user *)restart->arg0;
1153 1156 u32 val = (u32)restart->arg1;
1154   - int timed = (int)restart->arg2;
1155   - unsigned long abs_time = restart->arg3;
  1157 + ktime_t *abs_time = (ktime_t *)restart->arg2;
1156 1158  
1157 1159 restart->fn = do_no_restart_syscall;
1158   - return (long)futex_wait_abstime(uaddr, val, timed, abs_time);
  1160 + return (long)futex_wait(uaddr, val, abs_time);
1159 1161 }
1160 1162  
1161 1163  
... ... @@ -1165,8 +1167,8 @@
1165 1167 * if there are waiters then it will block, it does PI, etc. (Due to
1166 1168 * races the kernel might see a 0 value of the futex too.)
1167 1169 */
1168   -static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
1169   - long nsec, int trylock)
  1170 +static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
  1171 + int trylock)
1170 1172 {
1171 1173 struct hrtimer_sleeper timeout, *to = NULL;
1172 1174 struct task_struct *curr = current;
1173 1175  
... ... @@ -1178,11 +1180,11 @@
1178 1180 if (refill_pi_state_cache())
1179 1181 return -ENOMEM;
1180 1182  
1181   - if (sec != MAX_SCHEDULE_TIMEOUT) {
  1183 + if (time) {
1182 1184 to = &timeout;
1183 1185 hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
1184 1186 hrtimer_init_sleeper(to, current);
1185   - to->timer.expires = ktime_set(sec, nsec);
  1187 + to->timer.expires = *time;
1186 1188 }
1187 1189  
1188 1190 q.pi_state = NULL;
... ... @@ -1818,7 +1820,7 @@
1818 1820 }
1819 1821 }
1820 1822  
1821   -long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout,
  1823 +long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
1822 1824 u32 __user *uaddr2, u32 val2, u32 val3)
1823 1825 {
1824 1826 int ret;
1825 1827  
... ... @@ -1844,13 +1846,13 @@
1844 1846 ret = futex_wake_op(uaddr, uaddr2, val, val2, val3);
1845 1847 break;
1846 1848 case FUTEX_LOCK_PI:
1847   - ret = futex_lock_pi(uaddr, val, timeout, val2, 0);
  1849 + ret = futex_lock_pi(uaddr, val, timeout, 0);
1848 1850 break;
1849 1851 case FUTEX_UNLOCK_PI:
1850 1852 ret = futex_unlock_pi(uaddr);
1851 1853 break;
1852 1854 case FUTEX_TRYLOCK_PI:
1853   - ret = futex_lock_pi(uaddr, 0, timeout, val2, 1);
  1855 + ret = futex_lock_pi(uaddr, 0, timeout, 1);
1854 1856 break;
1855 1857 default:
1856 1858 ret = -ENOSYS;
1857 1859  
1858 1860  
1859 1861  
1860 1862  
... ... @@ -1863,21 +1865,20 @@
1863 1865 struct timespec __user *utime, u32 __user *uaddr2,
1864 1866 u32 val3)
1865 1867 {
1866   - struct timespec t;
1867   - unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
  1868 + struct timespec ts;
  1869 + ktime_t t, *tp = NULL;
1868 1870 u32 val2 = 0;
1869 1871  
1870 1872 if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
1871   - if (copy_from_user(&t, utime, sizeof(t)) != 0)
  1873 + if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
1872 1874 return -EFAULT;
1873   - if (!timespec_valid(&t))
  1875 + if (!timespec_valid(&ts))
1874 1876 return -EINVAL;
  1877 +
  1878 + t = timespec_to_ktime(ts);
1875 1879 if (op == FUTEX_WAIT)
1876   - timeout = timespec_to_jiffies(&t) + 1;
1877   - else {
1878   - timeout = t.tv_sec;
1879   - val2 = t.tv_nsec;
1880   - }
  1880 + t = ktime_add(ktime_get(), t);
  1881 + tp = &t;
1881 1882 }
1882 1883 /*
1883 1884 * requeue parameter in 'utime' if op == FUTEX_REQUEUE.
... ... @@ -1885,7 +1886,7 @@
1885 1886 if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
1886 1887 val2 = (u32) (unsigned long) utime;
1887 1888  
1888   - return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3);
  1889 + return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
1889 1890 }
1890 1891  
1891 1892 static int futexfs_get_sb(struct file_system_type *fs_type,
kernel/futex_compat.c
... ... @@ -141,25 +141,24 @@
141 141 struct compat_timespec __user *utime, u32 __user *uaddr2,
142 142 u32 val3)
143 143 {
144   - struct timespec t;
145   - unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
  144 + struct timespec ts;
  145 + ktime_t t, *tp = NULL;
146 146 int val2 = 0;
147 147  
148 148 if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
149   - if (get_compat_timespec(&t, utime))
  149 + if (get_compat_timespec(&ts, utime))
150 150 return -EFAULT;
151   - if (!timespec_valid(&t))
  151 + if (!timespec_valid(&ts))
152 152 return -EINVAL;
  153 +
  154 + t = timespec_to_ktime(ts);
153 155 if (op == FUTEX_WAIT)
154   - timeout = timespec_to_jiffies(&t) + 1;
155   - else {
156   - timeout = t.tv_sec;
157   - val2 = t.tv_nsec;
158   - }
  156 + t = ktime_add(ktime_get(), t);
  157 + tp = &t;
159 158 }
160 159 if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
161 160 val2 = (int) (unsigned long) utime;
162 161  
163   - return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3);
  162 + return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
164 163 }