Commit b45305fce5bb1abec263fcff9d81ebecd6306ede
1 parent
6547fbdbff
Exists in
master
and in
20 other branches
drm/i915: Implement workaround for broken CS tlb on i830/845
Now that Chris Wilson demonstrated that the key for stability on early gen 2 is to simple _never_ exchange the physical backing storage of batch buffers I've tried a stab at a kernel solution. Doesn't look too nefarious imho, now that I don't try to be too clever for my own good any more. v2: After discussing the various techniques, we've decided to always blit batches on the suspect devices, but allow userspace to opt out of the kernel workaround assume full responsibility for providing coherent batches. The principal reason is that avoiding the blit does improve performance in a few key microbenchmarks and also in cairo-trace replays. Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: - Drop the hunk which uses HAS_BROKEN_CS_TLB to implement the ring wrap w/a. Suggested by Chris Wilson. - Also add the ACTHD check from Chris Wilson for the error state dumping, so that we still catch batches when userspace opts out of the w/a.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Showing 7 changed files with 100 additions and 8 deletions Side-by-side Diff
drivers/gpu/drm/i915/i915_dma.c
drivers/gpu/drm/i915/i915_drv.h
... | ... | @@ -1100,6 +1100,7 @@ |
1100 | 1100 | */ |
1101 | 1101 | atomic_t pending_flip; |
1102 | 1102 | }; |
1103 | +#define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base) | |
1103 | 1104 | |
1104 | 1105 | #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base) |
1105 | 1106 | |
... | ... | @@ -1198,6 +1199,9 @@ |
1198 | 1199 | |
1199 | 1200 | #define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay) |
1200 | 1201 | #define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical) |
1202 | + | |
1203 | +/* Early gen2 have a totally busted CS tlb and require pinned batches. */ | |
1204 | +#define HAS_BROKEN_CS_TLB(dev) (IS_I830(dev) || IS_845G(dev)) | |
1201 | 1205 | |
1202 | 1206 | /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte |
1203 | 1207 | * rows, which changed the alignment requirements and fence programming. |
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/i915_irq.c
... | ... | @@ -1087,6 +1087,18 @@ |
1087 | 1087 | if (!ring->get_seqno) |
1088 | 1088 | return NULL; |
1089 | 1089 | |
1090 | + if (HAS_BROKEN_CS_TLB(dev_priv->dev)) { | |
1091 | + u32 acthd = I915_READ(ACTHD); | |
1092 | + | |
1093 | + if (WARN_ON(ring->id != RCS)) | |
1094 | + return NULL; | |
1095 | + | |
1096 | + obj = ring->private; | |
1097 | + if (acthd >= obj->gtt_offset && | |
1098 | + acthd < obj->gtt_offset + obj->base.size) | |
1099 | + return i915_error_object_create(dev_priv, obj); | |
1100 | + } | |
1101 | + | |
1090 | 1102 | seqno = ring->get_seqno(ring, false); |
1091 | 1103 | list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) { |
1092 | 1104 | if (obj->ring != ring) |
drivers/gpu/drm/i915/intel_ringbuffer.c
... | ... | @@ -547,9 +547,14 @@ |
547 | 547 | |
548 | 548 | static void render_ring_cleanup(struct intel_ring_buffer *ring) |
549 | 549 | { |
550 | + struct drm_device *dev = ring->dev; | |
551 | + | |
550 | 552 | if (!ring->private) |
551 | 553 | return; |
552 | 554 | |
555 | + if (HAS_BROKEN_CS_TLB(dev)) | |
556 | + drm_gem_object_unreference(to_gem_object(ring->private)); | |
557 | + | |
553 | 558 | cleanup_pipe_control(ring); |
554 | 559 | } |
555 | 560 | |
... | ... | @@ -969,6 +974,8 @@ |
969 | 974 | return 0; |
970 | 975 | } |
971 | 976 | |
977 | +/* Just userspace ABI convention to limit the wa batch bo to a resonable size */ | |
978 | +#define I830_BATCH_LIMIT (256*1024) | |
972 | 979 | static int |
973 | 980 | i830_dispatch_execbuffer(struct intel_ring_buffer *ring, |
974 | 981 | u32 offset, u32 len, |
975 | 982 | |
976 | 983 | |
... | ... | @@ -976,16 +983,48 @@ |
976 | 983 | { |
977 | 984 | int ret; |
978 | 985 | |
979 | - ret = intel_ring_begin(ring, 4); | |
980 | - if (ret) | |
981 | - return ret; | |
986 | + if (flags & I915_DISPATCH_PINNED) { | |
987 | + ret = intel_ring_begin(ring, 4); | |
988 | + if (ret) | |
989 | + return ret; | |
982 | 990 | |
983 | - intel_ring_emit(ring, MI_BATCH_BUFFER); | |
984 | - intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); | |
985 | - intel_ring_emit(ring, offset + len - 8); | |
986 | - intel_ring_emit(ring, 0); | |
987 | - intel_ring_advance(ring); | |
991 | + intel_ring_emit(ring, MI_BATCH_BUFFER); | |
992 | + intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); | |
993 | + intel_ring_emit(ring, offset + len - 8); | |
994 | + intel_ring_emit(ring, MI_NOOP); | |
995 | + intel_ring_advance(ring); | |
996 | + } else { | |
997 | + struct drm_i915_gem_object *obj = ring->private; | |
998 | + u32 cs_offset = obj->gtt_offset; | |
988 | 999 | |
1000 | + if (len > I830_BATCH_LIMIT) | |
1001 | + return -ENOSPC; | |
1002 | + | |
1003 | + ret = intel_ring_begin(ring, 9+3); | |
1004 | + if (ret) | |
1005 | + return ret; | |
1006 | + /* Blit the batch (which has now all relocs applied) to the stable batch | |
1007 | + * scratch bo area (so that the CS never stumbles over its tlb | |
1008 | + * invalidation bug) ... */ | |
1009 | + intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD | | |
1010 | + XY_SRC_COPY_BLT_WRITE_ALPHA | | |
1011 | + XY_SRC_COPY_BLT_WRITE_RGB); | |
1012 | + intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096); | |
1013 | + intel_ring_emit(ring, 0); | |
1014 | + intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024); | |
1015 | + intel_ring_emit(ring, cs_offset); | |
1016 | + intel_ring_emit(ring, 0); | |
1017 | + intel_ring_emit(ring, 4096); | |
1018 | + intel_ring_emit(ring, offset); | |
1019 | + intel_ring_emit(ring, MI_FLUSH); | |
1020 | + | |
1021 | + /* ... and execute it. */ | |
1022 | + intel_ring_emit(ring, MI_BATCH_BUFFER); | |
1023 | + intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); | |
1024 | + intel_ring_emit(ring, cs_offset + len - 8); | |
1025 | + intel_ring_advance(ring); | |
1026 | + } | |
1027 | + | |
989 | 1028 | return 0; |
990 | 1029 | } |
991 | 1030 | |
... | ... | @@ -1595,6 +1634,27 @@ |
1595 | 1634 | ring->dispatch_execbuffer = i915_dispatch_execbuffer; |
1596 | 1635 | ring->init = init_render_ring; |
1597 | 1636 | ring->cleanup = render_ring_cleanup; |
1637 | + | |
1638 | + /* Workaround batchbuffer to combat CS tlb bug. */ | |
1639 | + if (HAS_BROKEN_CS_TLB(dev)) { | |
1640 | + struct drm_i915_gem_object *obj; | |
1641 | + int ret; | |
1642 | + | |
1643 | + obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT); | |
1644 | + if (obj == NULL) { | |
1645 | + DRM_ERROR("Failed to allocate batch bo\n"); | |
1646 | + return -ENOMEM; | |
1647 | + } | |
1648 | + | |
1649 | + ret = i915_gem_object_pin(obj, 0, true, false); | |
1650 | + if (ret != 0) { | |
1651 | + drm_gem_object_unreference(&obj->base); | |
1652 | + DRM_ERROR("Failed to ping batch bo\n"); | |
1653 | + return ret; | |
1654 | + } | |
1655 | + | |
1656 | + ring->private = obj; | |
1657 | + } | |
1598 | 1658 | |
1599 | 1659 | return intel_init_ring_buffer(dev, ring); |
1600 | 1660 | } |
drivers/gpu/drm/i915/intel_ringbuffer.h
include/uapi/drm/i915_drm.h
... | ... | @@ -307,6 +307,7 @@ |
307 | 307 | #define I915_PARAM_HAS_PRIME_VMAP_FLUSH 21 |
308 | 308 | #define I915_PARAM_RSVD_FOR_FUTURE_USE 22 |
309 | 309 | #define I915_PARAM_HAS_SECURE_BATCHES 23 |
310 | +#define I915_PARAM_HAS_PINNED_BATCHES 24 | |
310 | 311 | |
311 | 312 | typedef struct drm_i915_getparam { |
312 | 313 | int param; |
... | ... | @@ -676,6 +677,15 @@ |
676 | 677 | * DRM_ROOT_ONLY | DRM_MASTER processes. |
677 | 678 | */ |
678 | 679 | #define I915_EXEC_SECURE (1<<9) |
680 | + | |
681 | +/** Inform the kernel that the batch is and will always be pinned. This | |
682 | + * negates the requirement for a workaround to be performed to avoid | |
683 | + * an incoherent CS (such as can be found on 830/845). If this flag is | |
684 | + * not passed, the kernel will endeavour to make sure the batch is | |
685 | + * coherent with the CS before execution. If this flag is passed, | |
686 | + * userspace assumes the responsibility for ensuring the same. | |
687 | + */ | |
688 | +#define I915_EXEC_IS_PINNED (1<<10) | |
679 | 689 | |
680 | 690 | #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) |
681 | 691 | #define i915_execbuffer2_set_context_id(eb2, context) \ |
-
mentioned in commit d472fc
-
mentioned in commit d472fc
-
mentioned in commit d472fc
-
mentioned in commit d472fc
-
mentioned in commit d472fc
-
mentioned in commit d472fc
-
mentioned in commit d472fc
-
mentioned in commit d472fc
-
mentioned in commit d472fc
-
mentioned in commit d472fc
-
mentioned in commit d472fc
-
mentioned in commit d472fc
-
mentioned in commit d472fc