Commit 90edf27fb89241917e91155bfdcc7c8e5a587222

Authored by Ingo Molnar

Merge branch 'linus' into perf/core

Conflicts:
	kernel/hw_breakpoint.c

Merge reason: resolve the conflict.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 26 changed files Side-by-side Diff

... ... @@ -32,8 +32,9 @@
32 32  
33 33 config KPROBES
34 34 bool "Kprobes"
35   - depends on KALLSYMS && MODULES
  35 + depends on MODULES
36 36 depends on HAVE_KPROBES
  37 + select KALLSYMS
37 38 help
38 39 Kprobes allows you to trap at almost any kernel address and
39 40 execute a callback function. register_kprobe() establishes
... ... @@ -45,7 +46,6 @@
45 46 def_bool y
46 47 depends on KPROBES && HAVE_OPTPROBES
47 48 depends on !PREEMPT
48   - select KALLSYMS_ALL
49 49  
50 50 config HAVE_EFFICIENT_UNALIGNED_ACCESS
51 51 bool
arch/x86/include/asm/hw_breakpoint.h
... ... @@ -20,7 +20,7 @@
20 20 #include <linux/list.h>
21 21  
22 22 /* Available HW breakpoint length encodings */
23   -#define X86_BREAKPOINT_LEN_X 0x00
  23 +#define X86_BREAKPOINT_LEN_X 0x40
24 24 #define X86_BREAKPOINT_LEN_1 0x40
25 25 #define X86_BREAKPOINT_LEN_2 0x44
26 26 #define X86_BREAKPOINT_LEN_4 0x4c
arch/x86/kernel/hw_breakpoint.c
... ... @@ -206,11 +206,27 @@
206 206 int arch_bp_generic_fields(int x86_len, int x86_type,
207 207 int *gen_len, int *gen_type)
208 208 {
209   - /* Len */
210   - switch (x86_len) {
211   - case X86_BREAKPOINT_LEN_X:
  209 + /* Type */
  210 + switch (x86_type) {
  211 + case X86_BREAKPOINT_EXECUTE:
  212 + if (x86_len != X86_BREAKPOINT_LEN_X)
  213 + return -EINVAL;
  214 +
  215 + *gen_type = HW_BREAKPOINT_X;
212 216 *gen_len = sizeof(long);
  217 + return 0;
  218 + case X86_BREAKPOINT_WRITE:
  219 + *gen_type = HW_BREAKPOINT_W;
213 220 break;
  221 + case X86_BREAKPOINT_RW:
  222 + *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
  223 + break;
  224 + default:
  225 + return -EINVAL;
  226 + }
  227 +
  228 + /* Len */
  229 + switch (x86_len) {
214 230 case X86_BREAKPOINT_LEN_1:
215 231 *gen_len = HW_BREAKPOINT_LEN_1;
216 232 break;
... ... @@ -229,21 +245,6 @@
229 245 return -EINVAL;
230 246 }
231 247  
232   - /* Type */
233   - switch (x86_type) {
234   - case X86_BREAKPOINT_EXECUTE:
235   - *gen_type = HW_BREAKPOINT_X;
236   - break;
237   - case X86_BREAKPOINT_WRITE:
238   - *gen_type = HW_BREAKPOINT_W;
239   - break;
240   - case X86_BREAKPOINT_RW:
241   - *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
242   - break;
243   - default:
244   - return -EINVAL;
245   - }
246   -
247 248 return 0;
248 249 }
249 250  
... ... @@ -316,9 +317,6 @@
316 317 ret = -EINVAL;
317 318  
318 319 switch (info->len) {
319   - case X86_BREAKPOINT_LEN_X:
320   - align = sizeof(long) -1;
321   - break;
322 320 case X86_BREAKPOINT_LEN_1:
323 321 align = 0;
324 322 break;
arch/x86/lguest/boot.c
... ... @@ -324,9 +324,8 @@
324 324 }
325 325  
326 326 /*
327   - * For a single GDT entry which changes, we do the lazy thing: alter our GDT,
328   - * then tell the Host to reload the entire thing. This operation is so rare
329   - * that this naive implementation is reasonable.
  327 + * For a single GDT entry which changes, we simply change our copy and
  328 + * then tell the host about it.
330 329 */
331 330 static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
332 331 const void *desc, int type)
333 332  
... ... @@ -338,9 +337,13 @@
338 337 }
339 338  
340 339 /*
341   - * OK, I lied. There are three "thread local storage" GDT entries which change
  340 + * There are three "thread local storage" GDT entries which change
342 341 * on every context switch (these three entries are how glibc implements
343   - * __thread variables). So we have a hypercall specifically for this case.
  342 + * __thread variables). As an optimization, we have a hypercall
  343 + * specifically for this case.
  344 + *
  345 + * Wouldn't it be nicer to have a general LOAD_GDT_ENTRIES hypercall
  346 + * which took a range of entries?
344 347 */
345 348 static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
346 349 {
arch/x86/oprofile/nmi_int.c
... ... @@ -671,7 +671,9 @@
671 671 case 14:
672 672 *cpu_type = "i386/core";
673 673 break;
674   - case 15: case 23:
  674 + case 0x0f:
  675 + case 0x16:
  676 + case 0x17:
675 677 *cpu_type = "i386/core_2";
676 678 break;
677 679 case 0x1a:
drivers/char/agp/intel-agp.c
... ... @@ -806,6 +806,8 @@
806 806 "G45/G43", NULL, &intel_i965_driver },
807 807 { PCI_DEVICE_ID_INTEL_B43_HB, PCI_DEVICE_ID_INTEL_B43_IG,
808 808 "B43", NULL, &intel_i965_driver },
  809 + { PCI_DEVICE_ID_INTEL_B43_1_HB, PCI_DEVICE_ID_INTEL_B43_1_IG,
  810 + "B43", NULL, &intel_i965_driver },
809 811 { PCI_DEVICE_ID_INTEL_G41_HB, PCI_DEVICE_ID_INTEL_G41_IG,
810 812 "G41", NULL, &intel_i965_driver },
811 813 { PCI_DEVICE_ID_INTEL_IRONLAKE_D_HB, PCI_DEVICE_ID_INTEL_IRONLAKE_D_IG,
drivers/char/agp/intel-agp.h
... ... @@ -186,6 +186,8 @@
186 186 #define PCI_DEVICE_ID_INTEL_Q33_IG 0x29D2
187 187 #define PCI_DEVICE_ID_INTEL_B43_HB 0x2E40
188 188 #define PCI_DEVICE_ID_INTEL_B43_IG 0x2E42
  189 +#define PCI_DEVICE_ID_INTEL_B43_1_HB 0x2E90
  190 +#define PCI_DEVICE_ID_INTEL_B43_1_IG 0x2E92
189 191 #define PCI_DEVICE_ID_INTEL_GM45_HB 0x2A40
190 192 #define PCI_DEVICE_ID_INTEL_GM45_IG 0x2A42
191 193 #define PCI_DEVICE_ID_INTEL_EAGLELAKE_HB 0x2E00
drivers/char/virtio_console.c
... ... @@ -596,6 +596,10 @@
596 596 ssize_t ret;
597 597 bool nonblock;
598 598  
  599 + /* Userspace could be out to fool us */
  600 + if (!count)
  601 + return 0;
  602 +
599 603 port = filp->private_data;
600 604  
601 605 nonblock = filp->f_flags & O_NONBLOCK;
... ... @@ -642,7 +646,7 @@
642 646 poll_wait(filp, &port->waitqueue, wait);
643 647  
644 648 ret = 0;
645   - if (port->inbuf)
  649 + if (!will_read_block(port))
646 650 ret |= POLLIN | POLLRDNORM;
647 651 if (!will_write_block(port))
648 652 ret |= POLLOUT;
drivers/gpu/drm/i915/i915_drv.c
... ... @@ -170,6 +170,7 @@
170 170 INTEL_VGA_DEVICE(0x2e22, &intel_g45_info), /* G45_G */
171 171 INTEL_VGA_DEVICE(0x2e32, &intel_g45_info), /* G41_G */
172 172 INTEL_VGA_DEVICE(0x2e42, &intel_g45_info), /* B43_G */
  173 + INTEL_VGA_DEVICE(0x2e92, &intel_g45_info), /* B43_G.1 */
173 174 INTEL_VGA_DEVICE(0xa001, &intel_pineview_info),
174 175 INTEL_VGA_DEVICE(0xa011, &intel_pineview_info),
175 176 INTEL_VGA_DEVICE(0x0042, &intel_ironlake_d_info),
drivers/gpu/drm/i915/i915_gem.c
... ... @@ -2351,14 +2351,21 @@
2351 2351  
2352 2352 reg->obj = obj;
2353 2353  
2354   - if (IS_GEN6(dev))
  2354 + switch (INTEL_INFO(dev)->gen) {
  2355 + case 6:
2355 2356 sandybridge_write_fence_reg(reg);
2356   - else if (IS_I965G(dev))
  2357 + break;
  2358 + case 5:
  2359 + case 4:
2357 2360 i965_write_fence_reg(reg);
2358   - else if (IS_I9XX(dev))
  2361 + break;
  2362 + case 3:
2359 2363 i915_write_fence_reg(reg);
2360   - else
  2364 + break;
  2365 + case 2:
2361 2366 i830_write_fence_reg(reg);
  2367 + break;
  2368 + }
2362 2369  
2363 2370 trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg,
2364 2371 obj_priv->tiling_mode);
2365 2372  
2366 2373  
2367 2374  
2368 2375  
2369 2376  
... ... @@ -2381,22 +2388,26 @@
2381 2388 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2382 2389 struct drm_i915_fence_reg *reg =
2383 2390 &dev_priv->fence_regs[obj_priv->fence_reg];
  2391 + uint32_t fence_reg;
2384 2392  
2385   - if (IS_GEN6(dev)) {
  2393 + switch (INTEL_INFO(dev)->gen) {
  2394 + case 6:
2386 2395 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 +
2387 2396 (obj_priv->fence_reg * 8), 0);
2388   - } else if (IS_I965G(dev)) {
  2397 + break;
  2398 + case 5:
  2399 + case 4:
2389 2400 I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0);
2390   - } else {
2391   - uint32_t fence_reg;
2392   -
2393   - if (obj_priv->fence_reg < 8)
2394   - fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4;
  2401 + break;
  2402 + case 3:
  2403 + if (obj_priv->fence_reg > 8)
  2404 + fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg - 8) * 4;
2395 2405 else
2396   - fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg -
2397   - 8) * 4;
  2406 + case 2:
  2407 + fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4;
2398 2408  
2399 2409 I915_WRITE(fence_reg, 0);
  2410 + break;
2400 2411 }
2401 2412  
2402 2413 reg->obj = NULL;
drivers/gpu/drm/i915/i915_gem_evict.c
... ... @@ -79,6 +79,7 @@
79 79 struct list_head *unwind)
80 80 {
81 81 list_add(&obj_priv->evict_list, unwind);
  82 + drm_gem_object_reference(&obj_priv->base);
82 83 return drm_mm_scan_add_block(obj_priv->gtt_space);
83 84 }
84 85  
... ... @@ -165,6 +166,7 @@
165 166 list_for_each_entry(obj_priv, &unwind_list, evict_list) {
166 167 ret = drm_mm_scan_remove_block(obj_priv->gtt_space);
167 168 BUG_ON(ret);
  169 + drm_gem_object_unreference(&obj_priv->base);
168 170 }
169 171  
170 172 /* We expect the caller to unpin, evict all and try again, or give up.
171 173  
172 174  
... ... @@ -181,18 +183,21 @@
181 183 * scanning, therefore store to be evicted objects on a
182 184 * temporary list. */
183 185 list_move(&obj_priv->evict_list, &eviction_list);
184   - }
  186 + } else
  187 + drm_gem_object_unreference(&obj_priv->base);
185 188 }
186 189  
187 190 /* Unbinding will emit any required flushes */
188 191 list_for_each_entry_safe(obj_priv, tmp_obj_priv,
189 192 &eviction_list, evict_list) {
190 193 #if WATCH_LRU
191   - DRM_INFO("%s: evicting %p\n", __func__, obj);
  194 + DRM_INFO("%s: evicting %p\n", __func__, &obj_priv->base);
192 195 #endif
193 196 ret = i915_gem_object_unbind(&obj_priv->base);
194 197 if (ret)
195 198 return ret;
  199 +
  200 + drm_gem_object_unreference(&obj_priv->base);
196 201 }
197 202  
198 203 /* The just created free hole should be on the top of the free stack
drivers/gpu/drm/i915/i915_suspend.c
... ... @@ -789,16 +789,25 @@
789 789 dev_priv->saveSWF2[i] = I915_READ(SWF30 + (i << 2));
790 790  
791 791 /* Fences */
792   - if (IS_I965G(dev)) {
  792 + switch (INTEL_INFO(dev)->gen) {
  793 + case 6:
793 794 for (i = 0; i < 16; i++)
  795 + dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_SANDYBRIDGE_0 + (i * 8));
  796 + break;
  797 + case 5:
  798 + case 4:
  799 + for (i = 0; i < 16; i++)
794 800 dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_965_0 + (i * 8));
795   - } else {
796   - for (i = 0; i < 8; i++)
797   - dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4));
798   -
  801 + break;
  802 + case 3:
799 803 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
800 804 for (i = 0; i < 8; i++)
801 805 dev_priv->saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4));
  806 + case 2:
  807 + for (i = 0; i < 8; i++)
  808 + dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4));
  809 + break;
  810 +
802 811 }
803 812  
804 813 return 0;
805 814  
806 815  
807 816  
... ... @@ -815,15 +824,24 @@
815 824 I915_WRITE(HWS_PGA, dev_priv->saveHWS);
816 825  
817 826 /* Fences */
818   - if (IS_I965G(dev)) {
  827 + switch (INTEL_INFO(dev)->gen) {
  828 + case 6:
819 829 for (i = 0; i < 16; i++)
  830 + I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (i * 8), dev_priv->saveFENCE[i]);
  831 + break;
  832 + case 5:
  833 + case 4:
  834 + for (i = 0; i < 16; i++)
820 835 I915_WRITE64(FENCE_REG_965_0 + (i * 8), dev_priv->saveFENCE[i]);
821   - } else {
822   - for (i = 0; i < 8; i++)
823   - I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]);
  836 + break;
  837 + case 3:
  838 + case 2:
824 839 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
825 840 for (i = 0; i < 8; i++)
826 841 I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->saveFENCE[i+8]);
  842 + for (i = 0; i < 8; i++)
  843 + I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]);
  844 + break;
827 845 }
828 846  
829 847 i915_restore_display(dev);
drivers/gpu/drm/i915/intel_crt.c
... ... @@ -188,7 +188,7 @@
188 188  
189 189 if (wait_for((I915_READ(PCH_ADPA) & ADPA_CRT_HOTPLUG_FORCE_TRIGGER) == 0,
190 190 1000, 1))
191   - DRM_ERROR("timed out waiting for FORCE_TRIGGER");
  191 + DRM_DEBUG_KMS("timed out waiting for FORCE_TRIGGER");
192 192  
193 193 if (turn_off_dac) {
194 194 I915_WRITE(PCH_ADPA, temp);
... ... @@ -245,7 +245,7 @@
245 245 if (wait_for((I915_READ(PORT_HOTPLUG_EN) &
246 246 CRT_HOTPLUG_FORCE_DETECT) == 0,
247 247 1000, 1))
248   - DRM_ERROR("timed out waiting for FORCE_DETECT to go off");
  248 + DRM_DEBUG_KMS("timed out waiting for FORCE_DETECT to go off");
249 249 }
250 250  
251 251 stat = I915_READ(PORT_HOTPLUG_STAT);
drivers/gpu/drm/i915/intel_display.c
... ... @@ -2463,11 +2463,19 @@
2463 2463 struct drm_display_mode *adjusted_mode)
2464 2464 {
2465 2465 struct drm_device *dev = crtc->dev;
  2466 +
2466 2467 if (HAS_PCH_SPLIT(dev)) {
2467 2468 /* FDI link clock is fixed at 2.7G */
2468 2469 if (mode->clock * 3 > IRONLAKE_FDI_FREQ * 4)
2469 2470 return false;
2470 2471 }
  2472 +
  2473 + /* XXX some encoders set the crtcinfo, others don't.
  2474 + * Obviously we need some form of conflict resolution here...
  2475 + */
  2476 + if (adjusted_mode->crtc_htotal == 0)
  2477 + drm_mode_set_crtcinfo(adjusted_mode, 0);
  2478 +
2471 2479 return true;
2472 2480 }
2473 2481  
... ... @@ -3,6 +3,7 @@
3 3 depends on INET && EXPERIMENTAL
4 4 select LIBCRC32C
5 5 select CRYPTO_AES
  6 + select CRYPTO
6 7 help
7 8 Choose Y or M here to include support for mounting the
8 9 experimental Ceph distributed file system. Ceph is an extremely
... ... @@ -411,8 +411,8 @@
411 411 if (i_size < page_off + len)
412 412 len = i_size - page_off;
413 413  
414   - dout("writepage %p page %p index %lu on %llu~%u\n",
415   - inode, page, page->index, page_off, len);
  414 + dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
  415 + inode, page, page->index, page_off, len, snapc);
416 416  
417 417 writeback_stat = atomic_long_inc_return(&client->writeback_count);
418 418 if (writeback_stat >
... ... @@ -766,7 +766,8 @@
766 766 /* ok */
767 767 if (locked_pages == 0) {
768 768 /* prepare async write request */
769   - offset = page->index << PAGE_CACHE_SHIFT;
  769 + offset = (unsigned long long)page->index
  770 + << PAGE_CACHE_SHIFT;
770 771 len = wsize;
771 772 req = ceph_osdc_new_request(&client->osdc,
772 773 &ci->i_layout,
... ... @@ -814,7 +814,7 @@
814 814 used |= CEPH_CAP_PIN;
815 815 if (ci->i_rd_ref)
816 816 used |= CEPH_CAP_FILE_RD;
817   - if (ci->i_rdcache_ref || ci->i_rdcache_gen)
  817 + if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages)
818 818 used |= CEPH_CAP_FILE_CACHE;
819 819 if (ci->i_wr_ref)
820 820 used |= CEPH_CAP_FILE_WR;
821 821  
... ... @@ -1195,10 +1195,14 @@
1195 1195 * asynchronously back to the MDS once sync writes complete and dirty
1196 1196 * data is written out.
1197 1197 *
  1198 + * Unless @again is true, skip cap_snaps that were already sent to
  1199 + * the MDS (i.e., during this session).
  1200 + *
1198 1201 * Called under i_lock. Takes s_mutex as needed.
1199 1202 */
1200 1203 void __ceph_flush_snaps(struct ceph_inode_info *ci,
1201   - struct ceph_mds_session **psession)
  1204 + struct ceph_mds_session **psession,
  1205 + int again)
1202 1206 __releases(ci->vfs_inode->i_lock)
1203 1207 __acquires(ci->vfs_inode->i_lock)
1204 1208 {
... ... @@ -1227,7 +1231,7 @@
1227 1231 * pages to be written out.
1228 1232 */
1229 1233 if (capsnap->dirty_pages || capsnap->writing)
1230   - continue;
  1234 + break;
1231 1235  
1232 1236 /*
1233 1237 * if cap writeback already occurred, we should have dropped
... ... @@ -1240,6 +1244,13 @@
1240 1244 dout("no auth cap (migrating?), doing nothing\n");
1241 1245 goto out;
1242 1246 }
  1247 +
  1248 + /* only flush each capsnap once */
  1249 + if (!again && !list_empty(&capsnap->flushing_item)) {
  1250 + dout("already flushed %p, skipping\n", capsnap);
  1251 + continue;
  1252 + }
  1253 +
1243 1254 mds = ci->i_auth_cap->session->s_mds;
1244 1255 mseq = ci->i_auth_cap->mseq;
1245 1256  
... ... @@ -1276,8 +1287,8 @@
1276 1287 &session->s_cap_snaps_flushing);
1277 1288 spin_unlock(&inode->i_lock);
1278 1289  
1279   - dout("flush_snaps %p cap_snap %p follows %lld size %llu\n",
1280   - inode, capsnap, next_follows, capsnap->size);
  1290 + dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
  1291 + inode, capsnap, capsnap->follows, capsnap->flush_tid);
1281 1292 send_cap_msg(session, ceph_vino(inode).ino, 0,
1282 1293 CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0,
1283 1294 capsnap->dirty, 0, capsnap->flush_tid, 0, mseq,
... ... @@ -1314,7 +1325,7 @@
1314 1325 struct inode *inode = &ci->vfs_inode;
1315 1326  
1316 1327 spin_lock(&inode->i_lock);
1317   - __ceph_flush_snaps(ci, NULL);
  1328 + __ceph_flush_snaps(ci, NULL, 0);
1318 1329 spin_unlock(&inode->i_lock);
1319 1330 }
1320 1331  
... ... @@ -1477,7 +1488,7 @@
1477 1488  
1478 1489 /* flush snaps first time around only */
1479 1490 if (!list_empty(&ci->i_cap_snaps))
1480   - __ceph_flush_snaps(ci, &session);
  1491 + __ceph_flush_snaps(ci, &session, 0);
1481 1492 goto retry_locked;
1482 1493 retry:
1483 1494 spin_lock(&inode->i_lock);
... ... @@ -1894,7 +1905,7 @@
1894 1905 if (cap && cap->session == session) {
1895 1906 dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
1896 1907 cap, capsnap);
1897   - __ceph_flush_snaps(ci, &session);
  1908 + __ceph_flush_snaps(ci, &session, 1);
1898 1909 } else {
1899 1910 pr_err("%p auth cap %p not mds%d ???\n", inode,
1900 1911 cap, session->s_mds);
... ... @@ -1021,11 +1021,15 @@
1021 1021 static void ceph_dentry_release(struct dentry *dentry)
1022 1022 {
1023 1023 struct ceph_dentry_info *di = ceph_dentry(dentry);
1024   - struct inode *parent_inode = dentry->d_parent->d_inode;
1025   - u64 snapid = ceph_snap(parent_inode);
  1024 + struct inode *parent_inode = NULL;
  1025 + u64 snapid = CEPH_NOSNAP;
1026 1026  
  1027 + if (!IS_ROOT(dentry)) {
  1028 + parent_inode = dentry->d_parent->d_inode;
  1029 + if (parent_inode)
  1030 + snapid = ceph_snap(parent_inode);
  1031 + }
1027 1032 dout("dentry_release %p parent %p\n", dentry, parent_inode);
1028   -
1029 1033 if (parent_inode && snapid != CEPH_SNAPDIR) {
1030 1034 struct ceph_inode_info *ci = ceph_inode(parent_inode);
1031 1035  
... ... @@ -845,7 +845,7 @@
845 845 * the caller) if we fail.
846 846 */
847 847 static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
848   - bool *prehash)
  848 + bool *prehash, bool set_offset)
849 849 {
850 850 struct dentry *realdn;
851 851  
... ... @@ -877,7 +877,8 @@
877 877 }
878 878 if ((!prehash || *prehash) && d_unhashed(dn))
879 879 d_rehash(dn);
880   - ceph_set_dentry_offset(dn);
  880 + if (set_offset)
  881 + ceph_set_dentry_offset(dn);
881 882 out:
882 883 return dn;
883 884 }
... ... @@ -1062,7 +1063,7 @@
1062 1063 d_delete(dn);
1063 1064 goto done;
1064 1065 }
1065   - dn = splice_dentry(dn, in, &have_lease);
  1066 + dn = splice_dentry(dn, in, &have_lease, true);
1066 1067 if (IS_ERR(dn)) {
1067 1068 err = PTR_ERR(dn);
1068 1069 goto done;
... ... @@ -1105,7 +1106,7 @@
1105 1106 goto done;
1106 1107 }
1107 1108 dout(" linking snapped dir %p to dn %p\n", in, dn);
1108   - dn = splice_dentry(dn, in, NULL);
  1109 + dn = splice_dentry(dn, in, NULL, true);
1109 1110 if (IS_ERR(dn)) {
1110 1111 err = PTR_ERR(dn);
1111 1112 goto done;
... ... @@ -1237,7 +1238,7 @@
1237 1238 err = PTR_ERR(in);
1238 1239 goto out;
1239 1240 }
1240   - dn = splice_dentry(dn, in, NULL);
  1241 + dn = splice_dentry(dn, in, NULL, false);
1241 1242 if (IS_ERR(dn))
1242 1243 dn = NULL;
1243 1244 }
fs/ceph/mds_client.c
... ... @@ -2374,6 +2374,8 @@
2374 2374 num_fcntl_locks,
2375 2375 num_flock_locks);
2376 2376 unlock_kernel();
  2377 + } else {
  2378 + err = ceph_pagelist_append(pagelist, &rec, reclen);
2377 2379 }
2378 2380  
2379 2381 out_free:
... ... @@ -5,10 +5,18 @@
5 5  
6 6 #include "pagelist.h"
7 7  
  8 +static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
  9 +{
  10 + struct page *page = list_entry(pl->head.prev, struct page,
  11 + lru);
  12 + kunmap(page);
  13 +}
  14 +
8 15 int ceph_pagelist_release(struct ceph_pagelist *pl)
9 16 {
10 17 if (pl->mapped_tail)
11   - kunmap(pl->mapped_tail);
  18 + ceph_pagelist_unmap_tail(pl);
  19 +
12 20 while (!list_empty(&pl->head)) {
13 21 struct page *page = list_first_entry(&pl->head, struct page,
14 22 lru);
... ... @@ -26,7 +34,7 @@
26 34 pl->room += PAGE_SIZE;
27 35 list_add_tail(&page->lru, &pl->head);
28 36 if (pl->mapped_tail)
29   - kunmap(pl->mapped_tail);
  37 + ceph_pagelist_unmap_tail(pl);
30 38 pl->mapped_tail = kmap(page);
31 39 return 0;
32 40 }
... ... @@ -119,6 +119,7 @@
119 119 INIT_LIST_HEAD(&realm->children);
120 120 INIT_LIST_HEAD(&realm->child_item);
121 121 INIT_LIST_HEAD(&realm->empty_item);
  122 + INIT_LIST_HEAD(&realm->dirty_item);
122 123 INIT_LIST_HEAD(&realm->inodes_with_caps);
123 124 spin_lock_init(&realm->inodes_with_caps_lock);
124 125 __insert_snap_realm(&mdsc->snap_realms, realm);
... ... @@ -467,7 +468,7 @@
467 468 INIT_LIST_HEAD(&capsnap->ci_item);
468 469 INIT_LIST_HEAD(&capsnap->flushing_item);
469 470  
470   - capsnap->follows = snapc->seq - 1;
  471 + capsnap->follows = snapc->seq;
471 472 capsnap->issued = __ceph_caps_issued(ci, NULL);
472 473 capsnap->dirty = dirty;
473 474  
... ... @@ -604,6 +605,7 @@
604 605 struct ceph_snap_realm *realm;
605 606 int invalidate = 0;
606 607 int err = -ENOMEM;
  608 + LIST_HEAD(dirty_realms);
607 609  
608 610 dout("update_snap_trace deletion=%d\n", deletion);
609 611 more:
... ... @@ -626,24 +628,6 @@
626 628 }
627 629 }
628 630  
629   - if (le64_to_cpu(ri->seq) > realm->seq) {
630   - dout("update_snap_trace updating %llx %p %lld -> %lld\n",
631   - realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
632   - /*
633   - * if the realm seq has changed, queue a cap_snap for every
634   - * inode with open caps. we do this _before_ we update
635   - * the realm info so that we prepare for writeback under the
636   - * _previous_ snap context.
637   - *
638   - * ...unless it's a snap deletion!
639   - */
640   - if (!deletion)
641   - queue_realm_cap_snaps(realm);
642   - } else {
643   - dout("update_snap_trace %llx %p seq %lld unchanged\n",
644   - realm->ino, realm, realm->seq);
645   - }
646   -
647 631 /* ensure the parent is correct */
648 632 err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent));
649 633 if (err < 0)
... ... @@ -651,6 +635,8 @@
651 635 invalidate += err;
652 636  
653 637 if (le64_to_cpu(ri->seq) > realm->seq) {
  638 + dout("update_snap_trace updating %llx %p %lld -> %lld\n",
  639 + realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
654 640 /* update realm parameters, snap lists */
655 641 realm->seq = le64_to_cpu(ri->seq);
656 642 realm->created = le64_to_cpu(ri->created);
657 643  
658 644  
... ... @@ -668,9 +654,17 @@
668 654 if (err < 0)
669 655 goto fail;
670 656  
  657 + /* queue realm for cap_snap creation */
  658 + list_add(&realm->dirty_item, &dirty_realms);
  659 +
671 660 invalidate = 1;
672 661 } else if (!realm->cached_context) {
  662 + dout("update_snap_trace %llx %p seq %lld new\n",
  663 + realm->ino, realm, realm->seq);
673 664 invalidate = 1;
  665 + } else {
  666 + dout("update_snap_trace %llx %p seq %lld unchanged\n",
  667 + realm->ino, realm, realm->seq);
674 668 }
675 669  
676 670 dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino,
... ... @@ -683,6 +677,14 @@
683 677 if (invalidate)
684 678 rebuild_snap_realms(realm);
685 679  
  680 + /*
  681 + * queue cap snaps _after_ we've built the new snap contexts,
  682 + * so that i_head_snapc can be set appropriately.
  683 + */
  684 + list_for_each_entry(realm, &dirty_realms, dirty_item) {
  685 + queue_realm_cap_snaps(realm);
  686 + }
  687 +
686 688 __cleanup_empty_realms(mdsc);
687 689 return 0;
688 690  
... ... @@ -715,7 +717,7 @@
715 717 igrab(inode);
716 718 spin_unlock(&mdsc->snap_flush_lock);
717 719 spin_lock(&inode->i_lock);
718   - __ceph_flush_snaps(ci, &session);
  720 + __ceph_flush_snaps(ci, &session, 0);
719 721 spin_unlock(&inode->i_lock);
720 722 iput(inode);
721 723 spin_lock(&mdsc->snap_flush_lock);
... ... @@ -816,6 +818,7 @@
816 818 };
817 819 struct inode *inode = ceph_find_inode(sb, vino);
818 820 struct ceph_inode_info *ci;
  821 + struct ceph_snap_realm *oldrealm;
819 822  
820 823 if (!inode)
821 824 continue;
822 825  
823 826  
... ... @@ -841,18 +844,19 @@
841 844 dout(" will move %p to split realm %llx %p\n",
842 845 inode, realm->ino, realm);
843 846 /*
844   - * Remove the inode from the realm's inode
845   - * list, but don't add it to the new realm
846   - * yet. We don't want the cap_snap to be
847   - * queued (again) by ceph_update_snap_trace()
848   - * below. Queue it _now_, under the old context.
  847 + * Move the inode to the new realm
849 848 */
850 849 spin_lock(&realm->inodes_with_caps_lock);
851 850 list_del_init(&ci->i_snap_realm_item);
  851 + list_add(&ci->i_snap_realm_item,
  852 + &realm->inodes_with_caps);
  853 + oldrealm = ci->i_snap_realm;
  854 + ci->i_snap_realm = realm;
852 855 spin_unlock(&realm->inodes_with_caps_lock);
853 856 spin_unlock(&inode->i_lock);
854 857  
855   - ceph_queue_cap_snap(ci);
  858 + ceph_get_snap_realm(mdsc, realm);
  859 + ceph_put_snap_realm(mdsc, oldrealm);
856 860  
857 861 iput(inode);
858 862 continue;
859 863  
... ... @@ -880,43 +884,9 @@
880 884 ceph_update_snap_trace(mdsc, p, e,
881 885 op == CEPH_SNAP_OP_DESTROY);
882 886  
883   - if (op == CEPH_SNAP_OP_SPLIT) {
884   - /*
885   - * ok, _now_ add the inodes into the new realm.
886   - */
887   - for (i = 0; i < num_split_inos; i++) {
888   - struct ceph_vino vino = {
889   - .ino = le64_to_cpu(split_inos[i]),
890   - .snap = CEPH_NOSNAP,
891   - };
892   - struct inode *inode = ceph_find_inode(sb, vino);
893   - struct ceph_inode_info *ci;
894   -
895   - if (!inode)
896   - continue;
897   - ci = ceph_inode(inode);
898   - spin_lock(&inode->i_lock);
899   - if (list_empty(&ci->i_snap_realm_item)) {
900   - struct ceph_snap_realm *oldrealm =
901   - ci->i_snap_realm;
902   -
903   - dout(" moving %p to split realm %llx %p\n",
904   - inode, realm->ino, realm);
905   - spin_lock(&realm->inodes_with_caps_lock);
906   - list_add(&ci->i_snap_realm_item,
907   - &realm->inodes_with_caps);
908   - ci->i_snap_realm = realm;
909   - spin_unlock(&realm->inodes_with_caps_lock);
910   - ceph_get_snap_realm(mdsc, realm);
911   - ceph_put_snap_realm(mdsc, oldrealm);
912   - }
913   - spin_unlock(&inode->i_lock);
914   - iput(inode);
915   - }
916   -
  887 + if (op == CEPH_SNAP_OP_SPLIT)
917 888 /* we took a reference when we created the realm, above */
918 889 ceph_put_snap_realm(mdsc, realm);
919   - }
920 890  
921 891 __cleanup_empty_realms(mdsc);
922 892  
... ... @@ -690,6 +690,8 @@
690 690  
691 691 struct list_head empty_item; /* if i have ref==0 */
692 692  
  693 + struct list_head dirty_item; /* if realm needs new context */
  694 +
693 695 /* the current set of snaps for this realm */
694 696 struct ceph_snap_context *cached_context;
695 697  
... ... @@ -826,7 +828,8 @@
826 828 extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
827 829 struct ceph_snap_context *snapc);
828 830 extern void __ceph_flush_snaps(struct ceph_inode_info *ci,
829   - struct ceph_mds_session **psession);
  831 + struct ceph_mds_session **psession,
  832 + int again);
830 833 extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
831 834 struct ceph_mds_session *session);
832 835 extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
... ... @@ -1093,6 +1093,10 @@
1093 1093  
1094 1094 #include <linux/fcntl.h>
1095 1095  
  1096 +/* temporary stubs for BKL removal */
  1097 +#define lock_flocks() lock_kernel()
  1098 +#define unlock_flocks() unlock_kernel()
  1099 +
1096 1100 extern void send_sigio(struct fown_struct *fown, int fd, int band);
1097 1101  
1098 1102 #ifdef CONFIG_FILE_LOCKING
... ... @@ -3513,9 +3513,9 @@
3513 3513 rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
3514 3514  
3515 3515 if (total) {
3516   - u64 temp;
  3516 + u64 temp = rtime;
3517 3517  
3518   - temp = (u64)(rtime * utime);
  3518 + temp *= utime;
3519 3519 do_div(temp, total);
3520 3520 utime = (cputime_t)temp;
3521 3521 } else
3522 3522  
... ... @@ -3546,9 +3546,9 @@
3546 3546 rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
3547 3547  
3548 3548 if (total) {
3549   - u64 temp;
  3549 + u64 temp = rtime;
3550 3550  
3551   - temp = (u64)(rtime * cputime.utime);
  3551 + temp *= cputime.utime;
3552 3552 do_div(temp, total);
3553 3553 utime = (cputime_t)temp;
3554 3554 } else
... ... @@ -3630,7 +3630,7 @@
3630 3630 if (time_before(now, nohz.next_balance))
3631 3631 return 0;
3632 3632  
3633   - if (!rq->nr_running)
  3633 + if (rq->idle_at_tick)
3634 3634 return 0;
3635 3635  
3636 3636 first_pick_cpu = atomic_read(&nohz.first_pick_cpu);