Commit d1442d85cc30ea75f7d399474ca738e0bc96f715

Authored by Nadav Amit
Committed by Paolo Bonzini
1 parent 234f3ce485

KVM: x86: Handle errors when RIP is set during far jumps

Far jmp/call/ret may fault while loading a new RIP.  Currently KVM does not
handle this case, and may result in failed vm-entry once the assignment is
done.  The tricky part of doing so is that loading the new CS affects the
VMCS/VMCB state, so if we fail during loading the new RIP, we are left in
unconsistent state.  Therefore, this patch saves on 64-bit the old CS
descriptor and restores it if loading RIP failed.

This fixes CVE-2014-3647.

Cc: stable@vger.kernel.org
Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Showing 1 changed file with 88 additions and 30 deletions Side-by-side Diff

arch/x86/kvm/emulate.c
... ... @@ -1443,7 +1443,9 @@
1443 1443  
1444 1444 /* Does not support long mode */
1445 1445 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1446   - u16 selector, int seg, u8 cpl, bool in_task_switch)
  1446 + u16 selector, int seg, u8 cpl,
  1447 + bool in_task_switch,
  1448 + struct desc_struct *desc)
1447 1449 {
1448 1450 struct desc_struct seg_desc, old_desc;
1449 1451 u8 dpl, rpl;
... ... @@ -1584,6 +1586,8 @@
1584 1586 }
1585 1587 load:
1586 1588 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
  1589 + if (desc)
  1590 + *desc = seg_desc;
1587 1591 return X86EMUL_CONTINUE;
1588 1592 exception:
1589 1593 return emulate_exception(ctxt, err_vec, err_code, true);
... ... @@ -1593,7 +1597,7 @@
1593 1597 u16 selector, int seg)
1594 1598 {
1595 1599 u8 cpl = ctxt->ops->cpl(ctxt);
1596   - return __load_segment_descriptor(ctxt, selector, seg, cpl, false);
  1600 + return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL);
1597 1601 }
1598 1602  
1599 1603 static void write_register_operand(struct operand *op)
1600 1604  
1601 1605  
1602 1606  
... ... @@ -1987,17 +1991,31 @@
1987 1991 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
1988 1992 {
1989 1993 int rc;
1990   - unsigned short sel;
  1994 + unsigned short sel, old_sel;
  1995 + struct desc_struct old_desc, new_desc;
  1996 + const struct x86_emulate_ops *ops = ctxt->ops;
  1997 + u8 cpl = ctxt->ops->cpl(ctxt);
1991 1998  
  1999 + /* Assignment of RIP may only fail in 64-bit mode */
  2000 + if (ctxt->mode == X86EMUL_MODE_PROT64)
  2001 + ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
  2002 + VCPU_SREG_CS);
  2003 +
1992 2004 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
1993 2005  
1994   - rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS);
  2006 + rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
  2007 + &new_desc);
1995 2008 if (rc != X86EMUL_CONTINUE)
1996 2009 return rc;
1997 2010  
1998   - ctxt->_eip = 0;
1999   - memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
2000   - return X86EMUL_CONTINUE;
  2011 + rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
  2012 + if (rc != X86EMUL_CONTINUE) {
  2013 + WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
  2014 + /* assigning eip failed; restore the old cs */
  2015 + ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
  2016 + return rc;
  2017 + }
  2018 + return rc;
2001 2019 }
2002 2020  
2003 2021 static int em_grp45(struct x86_emulate_ctxt *ctxt)
2004 2022  
2005 2023  
2006 2024  
2007 2025  
... ... @@ -2064,21 +2082,34 @@
2064 2082 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2065 2083 {
2066 2084 int rc;
2067   - unsigned long cs;
  2085 + unsigned long eip, cs;
  2086 + u16 old_cs;
2068 2087 int cpl = ctxt->ops->cpl(ctxt);
  2088 + struct desc_struct old_desc, new_desc;
  2089 + const struct x86_emulate_ops *ops = ctxt->ops;
2069 2090  
2070   - rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes);
  2091 + if (ctxt->mode == X86EMUL_MODE_PROT64)
  2092 + ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
  2093 + VCPU_SREG_CS);
  2094 +
  2095 + rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2071 2096 if (rc != X86EMUL_CONTINUE)
2072 2097 return rc;
2073   - if (ctxt->op_bytes == 4)
2074   - ctxt->_eip = (u32)ctxt->_eip;
2075 2098 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2076 2099 if (rc != X86EMUL_CONTINUE)
2077 2100 return rc;
2078 2101 /* Outer-privilege level return is not implemented */
2079 2102 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2080 2103 return X86EMUL_UNHANDLEABLE;
2081   - rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
  2104 + rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false,
  2105 + &new_desc);
  2106 + if (rc != X86EMUL_CONTINUE)
  2107 + return rc;
  2108 + rc = assign_eip_far(ctxt, eip, new_desc.l);
  2109 + if (rc != X86EMUL_CONTINUE) {
  2110 + WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
  2111 + ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
  2112 + }
2082 2113 return rc;
2083 2114 }
2084 2115  
2085 2116  
2086 2117  
2087 2118  
2088 2119  
... ... @@ -2505,19 +2536,24 @@
2505 2536 * Now load segment descriptors. If fault happens at this stage
2506 2537 * it is handled in a context of new task
2507 2538 */
2508   - ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, true);
  2539 + ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
  2540 + true, NULL);
2509 2541 if (ret != X86EMUL_CONTINUE)
2510 2542 return ret;
2511   - ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true);
  2543 + ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
  2544 + true, NULL);
2512 2545 if (ret != X86EMUL_CONTINUE)
2513 2546 return ret;
2514   - ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true);
  2547 + ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
  2548 + true, NULL);
2515 2549 if (ret != X86EMUL_CONTINUE)
2516 2550 return ret;
2517   - ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true);
  2551 + ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
  2552 + true, NULL);
2518 2553 if (ret != X86EMUL_CONTINUE)
2519 2554 return ret;
2520   - ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true);
  2555 + ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
  2556 + true, NULL);
2521 2557 if (ret != X86EMUL_CONTINUE)
2522 2558 return ret;
2523 2559  
2524 2560  
2525 2561  
2526 2562  
2527 2563  
2528 2564  
2529 2565  
... ... @@ -2642,25 +2678,32 @@
2642 2678 * Now load segment descriptors. If fault happenes at this stage
2643 2679 * it is handled in a context of new task
2644 2680 */
2645   - ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl, true);
  2681 + ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
  2682 + cpl, true, NULL);
2646 2683 if (ret != X86EMUL_CONTINUE)
2647 2684 return ret;
2648   - ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true);
  2685 + ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
  2686 + true, NULL);
2649 2687 if (ret != X86EMUL_CONTINUE)
2650 2688 return ret;
2651   - ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true);
  2689 + ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
  2690 + true, NULL);
2652 2691 if (ret != X86EMUL_CONTINUE)
2653 2692 return ret;
2654   - ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true);
  2693 + ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
  2694 + true, NULL);
2655 2695 if (ret != X86EMUL_CONTINUE)
2656 2696 return ret;
2657   - ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true);
  2697 + ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
  2698 + true, NULL);
2658 2699 if (ret != X86EMUL_CONTINUE)
2659 2700 return ret;
2660   - ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, true);
  2701 + ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
  2702 + true, NULL);
2661 2703 if (ret != X86EMUL_CONTINUE)
2662 2704 return ret;
2663   - ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, true);
  2705 + ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
  2706 + true, NULL);
2664 2707 if (ret != X86EMUL_CONTINUE)
2665 2708 return ret;
2666 2709  
2667 2710  
2668 2711  
2669 2712  
2670 2713  
2671 2714  
2672 2715  
... ... @@ -2942,24 +2985,39 @@
2942 2985 u16 sel, old_cs;
2943 2986 ulong old_eip;
2944 2987 int rc;
  2988 + struct desc_struct old_desc, new_desc;
  2989 + const struct x86_emulate_ops *ops = ctxt->ops;
  2990 + int cpl = ctxt->ops->cpl(ctxt);
2945 2991  
2946   - old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2947 2992 old_eip = ctxt->_eip;
  2993 + ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
2948 2994  
2949 2995 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2950   - if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS))
  2996 + rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
  2997 + &new_desc);
  2998 + if (rc != X86EMUL_CONTINUE)
2951 2999 return X86EMUL_CONTINUE;
2952 3000  
2953   - ctxt->_eip = 0;
2954   - memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
  3001 + rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
  3002 + if (rc != X86EMUL_CONTINUE)
  3003 + goto fail;
2955 3004  
2956 3005 ctxt->src.val = old_cs;
2957 3006 rc = em_push(ctxt);
2958 3007 if (rc != X86EMUL_CONTINUE)
2959   - return rc;
  3008 + goto fail;
2960 3009  
2961 3010 ctxt->src.val = old_eip;
2962   - return em_push(ctxt);
  3011 + rc = em_push(ctxt);
  3012 + /* If we failed, we tainted the memory, but the very least we should
  3013 + restore cs */
  3014 + if (rc != X86EMUL_CONTINUE)
  3015 + goto fail;
  3016 + return rc;
  3017 +fail:
  3018 + ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
  3019 + return rc;
  3020 +
2963 3021 }
2964 3022  
2965 3023 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)