Commit 3278a2c20cb302d27e6f6ee45a3f57361176e426

Authored by Manfred Spraul
Committed by Linus Torvalds
1 parent 5ac893b8cb

ipc: conserve sequence numbers in ipcmni_extend mode

Rewrite, based on the patch from Waiman Long:

The mixing in of a sequence number into the IPC IDs is probably to avoid
ID reuse in userspace as much as possible.  With ipcmni_extend mode, the
number of usable sequence numbers is greatly reduced leading to higher
chance of ID reuse.

To address this issue, we need to conserve the sequence number space as
much as possible.  Right now, the sequence number is incremented for
every new ID created.  In reality, we only need to increment the
sequence number when new allocated ID is not greater than the last one
allocated.  It is in such case that the new ID may collide with an
existing one.  This is being done irrespective of the ipcmni mode.

In order to avoid any races, the index is first allocated and then the
pointer is replaced.

Changes compared to the initial patch:
 - Handle failures from idr_alloc().
 - Avoid that concurrent operations can see the wrong sequence number.
   (This is achieved by using idr_replace()).
 - IPCMNI_SEQ_SHIFT is not a constant, thus renamed to
   ipcmni_seq_shift().
 - IPCMNI_SEQ_MAX is not a constant, thus renamed to ipcmni_seq_max().

Link: http://lkml.kernel.org/r/20190329204930.21620-2-longman@redhat.com
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Waiman Long <longman@redhat.com>
Suggested-by: Matthew Wilcox <willy@infradead.org>
Acked-by: Waiman Long <longman@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kees Cook <keescook@chromium.org>
Cc: "Luis R. Rodriguez" <mcgrof@kernel.org>
Cc: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 35 additions and 9 deletions Side-by-side Diff

include/linux/ipc_namespace.h
... ... @@ -19,6 +19,7 @@
19 19 struct rw_semaphore rwsem;
20 20 struct idr ipcs_idr;
21 21 int max_idx;
  22 + int last_idx; /* For wrap around detection */
22 23 #ifdef CONFIG_CHECKPOINT_RESTORE
23 24 int next_id;
24 25 #endif
... ... @@ -119,6 +119,7 @@
119 119 rhashtable_init(&ids->key_ht, &ipc_kht_params);
120 120 idr_init(&ids->ipcs_idr);
121 121 ids->max_idx = -1;
  122 + ids->last_idx = -1;
122 123 #ifdef CONFIG_CHECKPOINT_RESTORE
123 124 ids->next_id = -1;
124 125 #endif
... ... @@ -192,6 +193,10 @@
192 193 *
193 194 * The caller must own kern_ipc_perm.lock.of the new object.
194 195 * On error, the function returns a (negative) error code.
  196 + *
  197 + * To conserve sequence number space, especially with extended ipc_mni,
  198 + * the sequence number is incremented only when the returned ID is less than
  199 + * the last one.
195 200 */
196 201 static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
197 202 {
198 203  
... ... @@ -215,17 +220,37 @@
215 220 */
216 221  
217 222 if (next_id < 0) { /* !CHECKPOINT_RESTORE or next_id is unset */
218   - new->seq = ids->seq++;
219   - if (ids->seq > IPCID_SEQ_MAX)
220   - ids->seq = 0;
221   - idx = idr_alloc(&ids->ipcs_idr, new, 0, 0, GFP_NOWAIT);
  223 +
  224 + /* allocate the idx, with a NULL struct kern_ipc_perm */
  225 + idx = idr_alloc(&ids->ipcs_idr, NULL, 0, 0, GFP_NOWAIT);
  226 +
  227 + if (idx >= 0) {
  228 + /*
  229 + * idx got allocated successfully.
  230 + * Now calculate the sequence number and set the
  231 + * pointer for real.
  232 + */
  233 + if (idx <= ids->last_idx) {
  234 + ids->seq++;
  235 + if (ids->seq >= ipcid_seq_max())
  236 + ids->seq = 0;
  237 + }
  238 + ids->last_idx = idx;
  239 +
  240 + new->seq = ids->seq;
  241 + /* no need for smp_wmb(), this is done
  242 + * inside idr_replace, as part of
  243 + * rcu_assign_pointer
  244 + */
  245 + idr_replace(&ids->ipcs_idr, new, idx);
  246 + }
222 247 } else {
223 248 new->seq = ipcid_to_seqx(next_id);
224 249 idx = idr_alloc(&ids->ipcs_idr, new, ipcid_to_idx(next_id),
225 250 0, GFP_NOWAIT);
226 251 }
227 252 if (idx >= 0)
228   - new->id = (new->seq << IPCMNI_SEQ_SHIFT) + idx;
  253 + new->id = (new->seq << ipcmni_seq_shift()) + idx;
229 254 return idx;
230 255 }
231 256  
... ... @@ -34,13 +34,13 @@
34 34 extern int ipc_mni;
35 35 extern int ipc_mni_shift;
36 36  
37   -#define IPCMNI_SEQ_SHIFT ipc_mni_shift
  37 +#define ipcmni_seq_shift() ipc_mni_shift
38 38 #define IPCMNI_IDX_MASK ((1 << ipc_mni_shift) - 1)
39 39  
40 40 #else /* CONFIG_SYSVIPC_SYSCTL */
41 41  
42 42 #define ipc_mni IPCMNI
43   -#define IPCMNI_SEQ_SHIFT IPCMNI_SHIFT
  43 +#define ipcmni_seq_shift() IPCMNI_SHIFT
44 44 #define IPCMNI_IDX_MASK ((1 << IPCMNI_SHIFT) - 1)
45 45 #endif /* CONFIG_SYSVIPC_SYSCTL */
46 46  
... ... @@ -123,8 +123,8 @@
123 123 #define IPC_SHM_IDS 2
124 124  
125 125 #define ipcid_to_idx(id) ((id) & IPCMNI_IDX_MASK)
126   -#define ipcid_to_seqx(id) ((id) >> IPCMNI_SEQ_SHIFT)
127   -#define IPCID_SEQ_MAX (INT_MAX >> IPCMNI_SEQ_SHIFT)
  126 +#define ipcid_to_seqx(id) ((id) >> ipcmni_seq_shift())
  127 +#define ipcid_seq_max() (INT_MAX >> ipcmni_seq_shift())
128 128  
129 129 /* must be called with ids->rwsem acquired for writing */
130 130 int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);