Commit c19384b5b296905d4988c7c684ff540a0f9d65be
Committed by
Linus Torvalds
1 parent
ec92d08292
Exists in
master
and in
20 other branches
Make futex_wait() use an hrtimer for timeout
This patch modifies futex_wait() to use an hrtimer + schedule() in place of schedule_timeout(). schedule_timeout() is tick based, therefore the timeout granularity is the tick (1 ms, 4 ms or 10 ms depending on HZ). By using a high resolution timer for timeout wakeup, we can attain a much finer timeout granularity (in the microsecond range). This parallels what is already done for futex_lock_pi(). The timeout passed to the syscall is no longer converted to jiffies and is therefore passed to do_futex() and futex_wait() as an absolute ktime_t therefore keeping nanosecond resolution. Also this removes the need to pass the nanoseconds timeout part to futex_lock_pi() in val2. In futex_wait(), if there is no timeout then a regular schedule() is performed. Otherwise, an hrtimer is fired before schedule() is called. [akpm@linux-foundation.org: fix `make headers_check'] Signed-off-by: Sebastien Dugue <sebastien.dugue@bull.net> Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net> Cc: Ingo Molnar <mingo@elte.hu> Cc: Ulrich Drepper <drepper@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 57 additions and 55 deletions Side-by-side Diff
include/linux/futex.h
... | ... | @@ -3,6 +3,8 @@ |
3 | 3 | |
4 | 4 | #include <linux/sched.h> |
5 | 5 | |
6 | +union ktime; | |
7 | + | |
6 | 8 | /* Second argument to futex syscall */ |
7 | 9 | |
8 | 10 | |
... | ... | @@ -94,7 +96,7 @@ |
94 | 96 | #define ROBUST_LIST_LIMIT 2048 |
95 | 97 | |
96 | 98 | #ifdef __KERNEL__ |
97 | -long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout, | |
99 | +long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout, | |
98 | 100 | u32 __user *uaddr2, u32 val2, u32 val3); |
99 | 101 | |
100 | 102 | extern int |
kernel/futex.c
... | ... | @@ -1001,16 +1001,16 @@ |
1001 | 1001 | } |
1002 | 1002 | |
1003 | 1003 | static long futex_wait_restart(struct restart_block *restart); |
1004 | -static int futex_wait_abstime(u32 __user *uaddr, u32 val, | |
1005 | - int timed, unsigned long abs_time) | |
1004 | +static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time) | |
1006 | 1005 | { |
1007 | 1006 | struct task_struct *curr = current; |
1008 | 1007 | DECLARE_WAITQUEUE(wait, curr); |
1009 | 1008 | struct futex_hash_bucket *hb; |
1010 | 1009 | struct futex_q q; |
1011 | - unsigned long time_left = 0; | |
1012 | 1010 | u32 uval; |
1013 | 1011 | int ret; |
1012 | + struct hrtimer_sleeper t; | |
1013 | + int rem = 0; | |
1014 | 1014 | |
1015 | 1015 | q.pi_state = NULL; |
1016 | 1016 | retry: |
1017 | 1017 | |
1018 | 1018 | |
1019 | 1019 | |
... | ... | @@ -1088,20 +1088,29 @@ |
1088 | 1088 | * !plist_node_empty() is safe here without any lock. |
1089 | 1089 | * q.lock_ptr != 0 is not safe, because of ordering against wakeup. |
1090 | 1090 | */ |
1091 | - time_left = 0; | |
1092 | 1091 | if (likely(!plist_node_empty(&q.list))) { |
1093 | - unsigned long rel_time; | |
1092 | + if (!abs_time) | |
1093 | + schedule(); | |
1094 | + else { | |
1095 | + hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | |
1096 | + hrtimer_init_sleeper(&t, current); | |
1097 | + t.timer.expires = *abs_time; | |
1094 | 1098 | |
1095 | - if (timed) { | |
1096 | - unsigned long now = jiffies; | |
1097 | - if (time_after(now, abs_time)) | |
1098 | - rel_time = 0; | |
1099 | - else | |
1100 | - rel_time = abs_time - now; | |
1101 | - } else | |
1102 | - rel_time = MAX_SCHEDULE_TIMEOUT; | |
1099 | + hrtimer_start(&t.timer, t.timer.expires, HRTIMER_MODE_ABS); | |
1103 | 1100 | |
1104 | - time_left = schedule_timeout(rel_time); | |
1101 | + /* | |
1102 | + * the timer could have already expired, in which | |
1103 | + * case current would be flagged for rescheduling. | |
1104 | + * Don't bother calling schedule. | |
1105 | + */ | |
1106 | + if (likely(t.task)) | |
1107 | + schedule(); | |
1108 | + | |
1109 | + hrtimer_cancel(&t.timer); | |
1110 | + | |
1111 | + /* Flag if a timeout occured */ | |
1112 | + rem = (t.task == NULL); | |
1113 | + } | |
1105 | 1114 | } |
1106 | 1115 | __set_current_state(TASK_RUNNING); |
1107 | 1116 | |
1108 | 1117 | |
... | ... | @@ -1113,14 +1122,14 @@ |
1113 | 1122 | /* If we were woken (and unqueued), we succeeded, whatever. */ |
1114 | 1123 | if (!unqueue_me(&q)) |
1115 | 1124 | return 0; |
1116 | - if (time_left == 0) | |
1125 | + if (rem) | |
1117 | 1126 | return -ETIMEDOUT; |
1118 | 1127 | |
1119 | 1128 | /* |
1120 | 1129 | * We expect signal_pending(current), but another thread may |
1121 | 1130 | * have handled it for us already. |
1122 | 1131 | */ |
1123 | - if (time_left == MAX_SCHEDULE_TIMEOUT) | |
1132 | + if (!abs_time) | |
1124 | 1133 | return -ERESTARTSYS; |
1125 | 1134 | else { |
1126 | 1135 | struct restart_block *restart; |
... | ... | @@ -1128,8 +1137,7 @@ |
1128 | 1137 | restart->fn = futex_wait_restart; |
1129 | 1138 | restart->arg0 = (unsigned long)uaddr; |
1130 | 1139 | restart->arg1 = (unsigned long)val; |
1131 | - restart->arg2 = (unsigned long)timed; | |
1132 | - restart->arg3 = abs_time; | |
1140 | + restart->arg2 = (unsigned long)abs_time; | |
1133 | 1141 | return -ERESTART_RESTARTBLOCK; |
1134 | 1142 | } |
1135 | 1143 | |
1136 | 1144 | |
1137 | 1145 | |
... | ... | @@ -1141,21 +1149,15 @@ |
1141 | 1149 | return ret; |
1142 | 1150 | } |
1143 | 1151 | |
1144 | -static int futex_wait(u32 __user *uaddr, u32 val, unsigned long rel_time) | |
1145 | -{ | |
1146 | - int timed = (rel_time != MAX_SCHEDULE_TIMEOUT); | |
1147 | - return futex_wait_abstime(uaddr, val, timed, jiffies+rel_time); | |
1148 | -} | |
1149 | 1152 | |
1150 | 1153 | static long futex_wait_restart(struct restart_block *restart) |
1151 | 1154 | { |
1152 | 1155 | u32 __user *uaddr = (u32 __user *)restart->arg0; |
1153 | 1156 | u32 val = (u32)restart->arg1; |
1154 | - int timed = (int)restart->arg2; | |
1155 | - unsigned long abs_time = restart->arg3; | |
1157 | + ktime_t *abs_time = (ktime_t *)restart->arg2; | |
1156 | 1158 | |
1157 | 1159 | restart->fn = do_no_restart_syscall; |
1158 | - return (long)futex_wait_abstime(uaddr, val, timed, abs_time); | |
1160 | + return (long)futex_wait(uaddr, val, abs_time); | |
1159 | 1161 | } |
1160 | 1162 | |
1161 | 1163 | |
... | ... | @@ -1165,8 +1167,8 @@ |
1165 | 1167 | * if there are waiters then it will block, it does PI, etc. (Due to |
1166 | 1168 | * races the kernel might see a 0 value of the futex too.) |
1167 | 1169 | */ |
1168 | -static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, | |
1169 | - long nsec, int trylock) | |
1170 | +static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time, | |
1171 | + int trylock) | |
1170 | 1172 | { |
1171 | 1173 | struct hrtimer_sleeper timeout, *to = NULL; |
1172 | 1174 | struct task_struct *curr = current; |
1173 | 1175 | |
... | ... | @@ -1178,11 +1180,11 @@ |
1178 | 1180 | if (refill_pi_state_cache()) |
1179 | 1181 | return -ENOMEM; |
1180 | 1182 | |
1181 | - if (sec != MAX_SCHEDULE_TIMEOUT) { | |
1183 | + if (time) { | |
1182 | 1184 | to = &timeout; |
1183 | 1185 | hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); |
1184 | 1186 | hrtimer_init_sleeper(to, current); |
1185 | - to->timer.expires = ktime_set(sec, nsec); | |
1187 | + to->timer.expires = *time; | |
1186 | 1188 | } |
1187 | 1189 | |
1188 | 1190 | q.pi_state = NULL; |
... | ... | @@ -1818,7 +1820,7 @@ |
1818 | 1820 | } |
1819 | 1821 | } |
1820 | 1822 | |
1821 | -long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout, | |
1823 | +long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, | |
1822 | 1824 | u32 __user *uaddr2, u32 val2, u32 val3) |
1823 | 1825 | { |
1824 | 1826 | int ret; |
1825 | 1827 | |
... | ... | @@ -1844,13 +1846,13 @@ |
1844 | 1846 | ret = futex_wake_op(uaddr, uaddr2, val, val2, val3); |
1845 | 1847 | break; |
1846 | 1848 | case FUTEX_LOCK_PI: |
1847 | - ret = futex_lock_pi(uaddr, val, timeout, val2, 0); | |
1849 | + ret = futex_lock_pi(uaddr, val, timeout, 0); | |
1848 | 1850 | break; |
1849 | 1851 | case FUTEX_UNLOCK_PI: |
1850 | 1852 | ret = futex_unlock_pi(uaddr); |
1851 | 1853 | break; |
1852 | 1854 | case FUTEX_TRYLOCK_PI: |
1853 | - ret = futex_lock_pi(uaddr, 0, timeout, val2, 1); | |
1855 | + ret = futex_lock_pi(uaddr, 0, timeout, 1); | |
1854 | 1856 | break; |
1855 | 1857 | default: |
1856 | 1858 | ret = -ENOSYS; |
1857 | 1859 | |
1858 | 1860 | |
1859 | 1861 | |
1860 | 1862 | |
... | ... | @@ -1863,21 +1865,20 @@ |
1863 | 1865 | struct timespec __user *utime, u32 __user *uaddr2, |
1864 | 1866 | u32 val3) |
1865 | 1867 | { |
1866 | - struct timespec t; | |
1867 | - unsigned long timeout = MAX_SCHEDULE_TIMEOUT; | |
1868 | + struct timespec ts; | |
1869 | + ktime_t t, *tp = NULL; | |
1868 | 1870 | u32 val2 = 0; |
1869 | 1871 | |
1870 | 1872 | if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) { |
1871 | - if (copy_from_user(&t, utime, sizeof(t)) != 0) | |
1873 | + if (copy_from_user(&ts, utime, sizeof(ts)) != 0) | |
1872 | 1874 | return -EFAULT; |
1873 | - if (!timespec_valid(&t)) | |
1875 | + if (!timespec_valid(&ts)) | |
1874 | 1876 | return -EINVAL; |
1877 | + | |
1878 | + t = timespec_to_ktime(ts); | |
1875 | 1879 | if (op == FUTEX_WAIT) |
1876 | - timeout = timespec_to_jiffies(&t) + 1; | |
1877 | - else { | |
1878 | - timeout = t.tv_sec; | |
1879 | - val2 = t.tv_nsec; | |
1880 | - } | |
1880 | + t = ktime_add(ktime_get(), t); | |
1881 | + tp = &t; | |
1881 | 1882 | } |
1882 | 1883 | /* |
1883 | 1884 | * requeue parameter in 'utime' if op == FUTEX_REQUEUE. |
... | ... | @@ -1885,7 +1886,7 @@ |
1885 | 1886 | if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE) |
1886 | 1887 | val2 = (u32) (unsigned long) utime; |
1887 | 1888 | |
1888 | - return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3); | |
1889 | + return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); | |
1889 | 1890 | } |
1890 | 1891 | |
1891 | 1892 | static int futexfs_get_sb(struct file_system_type *fs_type, |
kernel/futex_compat.c
... | ... | @@ -141,25 +141,24 @@ |
141 | 141 | struct compat_timespec __user *utime, u32 __user *uaddr2, |
142 | 142 | u32 val3) |
143 | 143 | { |
144 | - struct timespec t; | |
145 | - unsigned long timeout = MAX_SCHEDULE_TIMEOUT; | |
144 | + struct timespec ts; | |
145 | + ktime_t t, *tp = NULL; | |
146 | 146 | int val2 = 0; |
147 | 147 | |
148 | 148 | if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) { |
149 | - if (get_compat_timespec(&t, utime)) | |
149 | + if (get_compat_timespec(&ts, utime)) | |
150 | 150 | return -EFAULT; |
151 | - if (!timespec_valid(&t)) | |
151 | + if (!timespec_valid(&ts)) | |
152 | 152 | return -EINVAL; |
153 | + | |
154 | + t = timespec_to_ktime(ts); | |
153 | 155 | if (op == FUTEX_WAIT) |
154 | - timeout = timespec_to_jiffies(&t) + 1; | |
155 | - else { | |
156 | - timeout = t.tv_sec; | |
157 | - val2 = t.tv_nsec; | |
158 | - } | |
156 | + t = ktime_add(ktime_get(), t); | |
157 | + tp = &t; | |
159 | 158 | } |
160 | 159 | if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE) |
161 | 160 | val2 = (int) (unsigned long) utime; |
162 | 161 | |
163 | - return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3); | |
162 | + return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); | |
164 | 163 | } |