Commit 34b85e3574424beb30e4cd163e6da2e2282d2683

Authored by Linus Torvalds

Merge tag 'powerpc-3.19-2' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux

Pull second batch of powerpc updates from Michael Ellerman:
 "The highlight is the series that reworks the idle management on
  powernv, which allows us to use deeper idle states on those machines.

  There's the fix from Anton for the "BUG at kernel/smpboot.c:134!"
  problem.

  An i2c driver for powernv.  This is acked by Wolfram Sang, and he
  asked that we take it through the powerpc tree.

  A fix for audit from rgb at Red Hat, acked by Paul Moore who is one of
  the audit maintainers.

  A patch from Ben to export the symbol map of our OPAL firmware as a
  sysfs file, so that tools can use it.

  Also some CXL fixes, a couple of powerpc perf fixes, a fix for
  smt-enabled, and the patch to add __force to get_user() so we can use
  bitwise types"

* tag 'powerpc-3.19-2' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux:
  powerpc/powernv: Ignore smt-enabled on Power8 and later
  powerpc/uaccess: Allow get_user() with bitwise types
  powerpc/powernv: Expose OPAL firmware symbol map
  powernv/powerpc: Add winkle support for offline cpus
  powernv/cpuidle: Redesign idle states management
  powerpc/powernv: Enable Offline CPUs to enter deep idle states
  powerpc/powernv: Switch off MMU before entering nap/sleep/rvwinkle mode
  i2c: Driver to expose PowerNV platform i2c busses
  powerpc: add little endian flag to syscall_get_arch()
  power/perf/hv-24x7: Use kmem_cache_free() instead of kfree
  powerpc/perf/hv-24x7: Use per-cpu page buffer
  cxl: Unmap MMIO regions when detaching a context
  cxl: Add timeout to process element commands
  cxl: Change contexts_lock to a mutex to fix sleep while atomic bug
  powerpc: Secondary CPUs must set cpu_callin_map after setting active and online

Showing 32 changed files Side-by-side Diff

Documentation/devicetree/bindings/i2c/i2c-opal.txt
  1 +Device-tree bindings for I2C OPAL driver
  2 +----------------------------------------
  3 +
  4 +Most of the device node and properties layout is specific to the firmware and
  5 +used by the firmware itself for configuring the port. From the linux
  6 +perspective, the properties of use are "ibm,port-name" and "ibm,opal-id".
  7 +
  8 +Required properties:
  9 +
  10 +- reg: Port-id within a given master
  11 +- compatible: must be "ibm,opal-i2c"
  12 +- ibm,opal-id: Refers to a specific bus and used to identify it when calling
  13 + the relevant OPAL functions.
  14 +- bus-frequency: Operating frequency of the i2c bus (in HZ). Informational for
  15 + linux, used by the FW though.
  16 +
  17 +Optional properties:
  18 +- ibm,port-name: Firmware provides this name that uniquely identifies the i2c
  19 + port.
  20 +
  21 +The node contains a number of other properties that are used by the FW itself
  22 +and depend on the specific hardware implementation. The example below depicts
  23 +a P8 on-chip bus.
  24 +
  25 +Example:
  26 +
  27 +i2c-bus@0 {
  28 + reg = <0x0>;
  29 + bus-frequency = <0x61a80>;
  30 + compatible = "ibm,power8-i2c-port", "ibm,opal-i2c";
  31 + ibm,opal-id = <0x1>;
  32 + ibm,port-name = "p8_00000000_e1p0";
  33 + #address-cells = <0x1>;
  34 + phandle = <0x10000006>;
  35 + #size-cells = <0x0>;
  36 + linux,phandle = <0x10000006>;
  37 +};
arch/powerpc/include/asm/cpuidle.h
  1 +#ifndef _ASM_POWERPC_CPUIDLE_H
  2 +#define _ASM_POWERPC_CPUIDLE_H
  3 +
  4 +#ifdef CONFIG_PPC_POWERNV
  5 +/* Used in powernv idle state management */
  6 +#define PNV_THREAD_RUNNING 0
  7 +#define PNV_THREAD_NAP 1
  8 +#define PNV_THREAD_SLEEP 2
  9 +#define PNV_THREAD_WINKLE 3
  10 +#define PNV_CORE_IDLE_LOCK_BIT 0x100
  11 +#define PNV_CORE_IDLE_THREAD_BITS 0x0FF
  12 +
  13 +#ifndef __ASSEMBLY__
  14 +extern u32 pnv_fastsleep_workaround_at_entry[];
  15 +extern u32 pnv_fastsleep_workaround_at_exit[];
  16 +#endif
  17 +
  18 +#endif
  19 +
  20 +#endif
arch/powerpc/include/asm/opal.h
... ... @@ -56,6 +56,14 @@
56 56 #define OPAL_HARDWARE_FROZEN -13
57 57 #define OPAL_WRONG_STATE -14
58 58 #define OPAL_ASYNC_COMPLETION -15
  59 +#define OPAL_I2C_TIMEOUT -17
  60 +#define OPAL_I2C_INVALID_CMD -18
  61 +#define OPAL_I2C_LBUS_PARITY -19
  62 +#define OPAL_I2C_BKEND_OVERRUN -20
  63 +#define OPAL_I2C_BKEND_ACCESS -21
  64 +#define OPAL_I2C_ARBT_LOST -22
  65 +#define OPAL_I2C_NACK_RCVD -23
  66 +#define OPAL_I2C_STOP_ERR -24
59 67  
60 68 /* API Tokens (in r0) */
61 69 #define OPAL_INVALID_CALL -1
62 70  
63 71  
... ... @@ -152,13 +160,26 @@
152 160 #define OPAL_PCI_ERR_INJECT 96
153 161 #define OPAL_PCI_EEH_FREEZE_SET 97
154 162 #define OPAL_HANDLE_HMI 98
  163 +#define OPAL_CONFIG_CPU_IDLE_STATE 99
  164 +#define OPAL_SLW_SET_REG 100
155 165 #define OPAL_REGISTER_DUMP_REGION 101
156 166 #define OPAL_UNREGISTER_DUMP_REGION 102
157 167 #define OPAL_WRITE_TPO 103
158 168 #define OPAL_READ_TPO 104
159 169 #define OPAL_IPMI_SEND 107
160 170 #define OPAL_IPMI_RECV 108
  171 +#define OPAL_I2C_REQUEST 109
161 172  
  173 +/* Device tree flags */
  174 +
  175 +/* Flags set in power-mgmt nodes in device tree if
  176 + * respective idle states are supported in the platform.
  177 + */
  178 +#define OPAL_PM_NAP_ENABLED 0x00010000
  179 +#define OPAL_PM_SLEEP_ENABLED 0x00020000
  180 +#define OPAL_PM_WINKLE_ENABLED 0x00040000
  181 +#define OPAL_PM_SLEEP_ENABLED_ER1 0x00080000
  182 +
162 183 #ifndef __ASSEMBLY__
163 184  
164 185 #include <linux/notifier.h>
... ... @@ -712,6 +733,24 @@
712 733 uint64_t line_len;
713 734 } oppanel_line_t;
714 735  
  736 +/* OPAL I2C request */
  737 +struct opal_i2c_request {
  738 + uint8_t type;
  739 +#define OPAL_I2C_RAW_READ 0
  740 +#define OPAL_I2C_RAW_WRITE 1
  741 +#define OPAL_I2C_SM_READ 2
  742 +#define OPAL_I2C_SM_WRITE 3
  743 + uint8_t flags;
  744 +#define OPAL_I2C_ADDR_10 0x01 /* Not supported yet */
  745 + uint8_t subaddr_sz; /* Max 4 */
  746 + uint8_t reserved;
  747 + __be16 addr; /* 7 or 10 bit address */
  748 + __be16 reserved2;
  749 + __be32 subaddr; /* Sub-address if any */
  750 + __be32 size; /* Data size */
  751 + __be64 buffer_ra; /* Buffer real address */
  752 +};
  753 +
715 754 /* /sys/firmware/opal */
716 755 extern struct kobject *opal_kobj;
717 756  
718 757  
... ... @@ -876,11 +915,14 @@
876 915 int64_t opal_handle_hmi(void);
877 916 int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
878 917 int64_t opal_unregister_dump_region(uint32_t id);
  918 +int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
879 919 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number);
880 920 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
881 921 uint64_t msg_len);
882 922 int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
883 923 uint64_t *msg_len);
  924 +int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id,
  925 + struct opal_i2c_request *oreq);
884 926  
885 927 /* Internal functions */
886 928 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
arch/powerpc/include/asm/paca.h
... ... @@ -152,6 +152,16 @@
152 152 u64 tm_scratch; /* TM scratch area for reclaim */
153 153 #endif
154 154  
  155 +#ifdef CONFIG_PPC_POWERNV
  156 + /* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */
  157 + u32 *core_idle_state_ptr;
  158 + u8 thread_idle_state; /* PNV_THREAD_RUNNING/NAP/SLEEP */
  159 + /* Mask to indicate thread id in core */
  160 + u8 thread_mask;
  161 + /* Mask to denote subcore sibling threads */
  162 + u8 subcore_sibling_mask;
  163 +#endif
  164 +
155 165 #ifdef CONFIG_PPC_BOOK3S_64
156 166 /* Exclusive emergency stack pointer for machine check exception. */
157 167 void *mc_emergency_sp;
arch/powerpc/include/asm/ppc-opcode.h
... ... @@ -194,6 +194,7 @@
194 194  
195 195 #define PPC_INST_NAP 0x4c000364
196 196 #define PPC_INST_SLEEP 0x4c0003a4
  197 +#define PPC_INST_WINKLE 0x4c0003e4
197 198  
198 199 /* A2 specific instructions */
199 200 #define PPC_INST_ERATWE 0x7c0001a6
... ... @@ -375,6 +376,7 @@
375 376  
376 377 #define PPC_NAP stringify_in_c(.long PPC_INST_NAP)
377 378 #define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP)
  379 +#define PPC_WINKLE stringify_in_c(.long PPC_INST_WINKLE)
378 380  
379 381 /* BHRB instructions */
380 382 #define PPC_CLRBHRB stringify_in_c(.long PPC_INST_CLRBHRB)
arch/powerpc/include/asm/processor.h
... ... @@ -452,7 +452,8 @@
452 452  
453 453 extern int powersave_nap; /* set if nap mode can be used in idle loop */
454 454 extern unsigned long power7_nap(int check_irq);
455   -extern void power7_sleep(void);
  455 +extern unsigned long power7_sleep(void);
  456 +extern unsigned long power7_winkle(void);
456 457 extern void flush_instruction_cache(void);
457 458 extern void hard_reset_now(void);
458 459 extern void poweroff_now(void);
arch/powerpc/include/asm/reg.h
... ... @@ -118,8 +118,10 @@
118 118 #define __MSR (MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV)
119 119 #ifdef __BIG_ENDIAN__
120 120 #define MSR_ __MSR
  121 +#define MSR_IDLE (MSR_ME | MSR_SF | MSR_HV)
121 122 #else
122 123 #define MSR_ (__MSR | MSR_LE)
  124 +#define MSR_IDLE (MSR_ME | MSR_SF | MSR_HV | MSR_LE)
123 125 #endif
124 126 #define MSR_KERNEL (MSR_ | MSR_64BIT)
125 127 #define MSR_USER32 (MSR_ | MSR_PR | MSR_EE)
... ... @@ -371,6 +373,7 @@
371 373 #define SPRN_DBAT7L 0x23F /* Data BAT 7 Lower Register */
372 374 #define SPRN_DBAT7U 0x23E /* Data BAT 7 Upper Register */
373 375 #define SPRN_PPR 0x380 /* SMT Thread status Register */
  376 +#define SPRN_TSCR 0x399 /* Thread Switch Control Register */
374 377  
375 378 #define SPRN_DEC 0x016 /* Decrement Register */
376 379 #define SPRN_DER 0x095 /* Debug Enable Regsiter */
... ... @@ -728,6 +731,7 @@
728 731 #define SPRN_BESCR 806 /* Branch event status and control register */
729 732 #define BESCR_GE 0x8000000000000000ULL /* Global Enable */
730 733 #define SPRN_WORT 895 /* Workload optimization register - thread */
  734 +#define SPRN_WORC 863 /* Workload optimization register - core */
731 735  
732 736 #define SPRN_PMC1 787
733 737 #define SPRN_PMC2 788
arch/powerpc/include/asm/syscall.h
... ... @@ -90,7 +90,11 @@
90 90  
91 91 static inline int syscall_get_arch(void)
92 92 {
93   - return is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
  93 + int arch = is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
  94 +#ifdef __LITTLE_ENDIAN__
  95 + arch |= __AUDIT_ARCH_LE;
  96 +#endif
  97 + return arch;
94 98 }
95 99 #endif /* _ASM_SYSCALL_H */
arch/powerpc/include/asm/uaccess.h
... ... @@ -284,7 +284,7 @@
284 284 if (!is_kernel_addr((unsigned long)__gu_addr)) \
285 285 might_fault(); \
286 286 __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
287   - (x) = (__typeof__(*(ptr)))__gu_val; \
  287 + (x) = (__force __typeof__(*(ptr)))__gu_val; \
288 288 __gu_err; \
289 289 })
290 290 #endif /* __powerpc64__ */
... ... @@ -297,7 +297,7 @@
297 297 might_fault(); \
298 298 if (access_ok(VERIFY_READ, __gu_addr, (size))) \
299 299 __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
300   - (x) = (__typeof__(*(ptr)))__gu_val; \
  300 + (x) = (__force __typeof__(*(ptr)))__gu_val; \
301 301 __gu_err; \
302 302 })
303 303  
... ... @@ -308,7 +308,7 @@
308 308 const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
309 309 __chk_user_ptr(ptr); \
310 310 __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
311   - (x) = (__typeof__(*(ptr)))__gu_val; \
  311 + (x) = (__force __typeof__(*(ptr)))__gu_val; \
312 312 __gu_err; \
313 313 })
314 314  
arch/powerpc/kernel/asm-offsets.c
... ... @@ -726,6 +726,17 @@
726 726 arch.timing_last_enter.tv32.tbl));
727 727 #endif
728 728  
  729 +#ifdef CONFIG_PPC_POWERNV
  730 + DEFINE(PACA_CORE_IDLE_STATE_PTR,
  731 + offsetof(struct paca_struct, core_idle_state_ptr));
  732 + DEFINE(PACA_THREAD_IDLE_STATE,
  733 + offsetof(struct paca_struct, thread_idle_state));
  734 + DEFINE(PACA_THREAD_MASK,
  735 + offsetof(struct paca_struct, thread_mask));
  736 + DEFINE(PACA_SUBCORE_SIBLING_MASK,
  737 + offsetof(struct paca_struct, subcore_sibling_mask));
  738 +#endif
  739 +
729 740 return 0;
730 741 }
arch/powerpc/kernel/exceptions-64s.S
... ... @@ -15,6 +15,7 @@
15 15 #include <asm/hw_irq.h>
16 16 #include <asm/exception-64s.h>
17 17 #include <asm/ptrace.h>
  18 +#include <asm/cpuidle.h>
18 19  
19 20 /*
20 21 * We layout physical memory as follows:
21 22  
22 23  
23 24  
24 25  
... ... @@ -101,24 +102,35 @@
101 102 #ifdef CONFIG_PPC_P7_NAP
102 103 BEGIN_FTR_SECTION
103 104 /* Running native on arch 2.06 or later, check if we are
104   - * waking up from nap. We only handle no state loss and
105   - * supervisor state loss. We do -not- handle hypervisor
106   - * state loss at this time.
  105 + * waking up from nap/sleep/winkle.
107 106 */
108 107 mfspr r13,SPRN_SRR1
109 108 rlwinm. r13,r13,47-31,30,31
110 109 beq 9f
111 110  
112   - /* waking up from powersave (nap) state */
113   - cmpwi cr1,r13,2
114   - /* Total loss of HV state is fatal, we could try to use the
115   - * PIR to locate a PACA, then use an emergency stack etc...
116   - * OPAL v3 based powernv platforms have new idle states
117   - * which fall in this catagory.
  111 + cmpwi cr3,r13,2
  112 +
  113 + /*
  114 + * Check if last bit of HSPGR0 is set. This indicates whether we are
  115 + * waking up from winkle.
118 116 */
119   - bgt cr1,8f
120 117 GET_PACA(r13)
  118 + clrldi r5,r13,63
  119 + clrrdi r13,r13,1
  120 + cmpwi cr4,r5,1
  121 + mtspr SPRN_HSPRG0,r13
121 122  
  123 + lbz r0,PACA_THREAD_IDLE_STATE(r13)
  124 + cmpwi cr2,r0,PNV_THREAD_NAP
  125 + bgt cr2,8f /* Either sleep or Winkle */
  126 +
  127 + /* Waking up from nap should not cause hypervisor state loss */
  128 + bgt cr3,.
  129 +
  130 + /* Waking up from nap */
  131 + li r0,PNV_THREAD_RUNNING
  132 + stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
  133 +
122 134 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
123 135 li r0,KVM_HWTHREAD_IN_KERNEL
124 136 stb r0,HSTATE_HWTHREAD_STATE(r13)
... ... @@ -133,7 +145,7 @@
133 145  
134 146 /* Return SRR1 from power7_nap() */
135 147 mfspr r3,SPRN_SRR1
136   - beq cr1,2f
  148 + beq cr3,2f
137 149 b power7_wakeup_noloss
138 150 2: b power7_wakeup_loss
139 151  
... ... @@ -1382,6 +1394,7 @@
1382 1394 MACHINE_CHECK_HANDLER_WINDUP
1383 1395 GET_PACA(r13)
1384 1396 ld r1,PACAR1(r13)
  1397 + li r3,PNV_THREAD_NAP
1385 1398 b power7_enter_nap_mode
1386 1399 4:
1387 1400 #endif
arch/powerpc/kernel/idle_power7.S
... ... @@ -18,9 +18,25 @@
18 18 #include <asm/hw_irq.h>
19 19 #include <asm/kvm_book3s_asm.h>
20 20 #include <asm/opal.h>
  21 +#include <asm/cpuidle.h>
  22 +#include <asm/mmu-hash64.h>
21 23  
22 24 #undef DEBUG
23 25  
  26 +/*
  27 + * Use unused space in the interrupt stack to save and restore
  28 + * registers for winkle support.
  29 + */
  30 +#define _SDR1 GPR3
  31 +#define _RPR GPR4
  32 +#define _SPURR GPR5
  33 +#define _PURR GPR6
  34 +#define _TSCR GPR7
  35 +#define _DSCR GPR8
  36 +#define _AMOR GPR9
  37 +#define _WORT GPR10
  38 +#define _WORC GPR11
  39 +
24 40 /* Idle state entry routines */
25 41  
26 42 #define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
... ... @@ -37,8 +53,7 @@
37 53  
38 54 /*
39 55 * Pass requested state in r3:
40   - * 0 - nap
41   - * 1 - sleep
  56 + * r3 - PNV_THREAD_NAP/SLEEP/WINKLE
42 57 *
43 58 * To check IRQ_HAPPENED in r4
44 59 * 0 - don't check
45 60  
46 61  
47 62  
... ... @@ -101,19 +116,106 @@
101 116 std r9,_MSR(r1)
102 117 std r1,PACAR1(r13)
103 118  
104   -_GLOBAL(power7_enter_nap_mode)
  119 + /*
  120 + * Go to real mode to do the nap, as required by the architecture.
  121 + * Also, we need to be in real mode before setting hwthread_state,
  122 + * because as soon as we do that, another thread can switch
  123 + * the MMU context to the guest.
  124 + */
  125 + LOAD_REG_IMMEDIATE(r5, MSR_IDLE)
  126 + li r6, MSR_RI
  127 + andc r6, r9, r6
  128 + LOAD_REG_ADDR(r7, power7_enter_nap_mode)
  129 + mtmsrd r6, 1 /* clear RI before setting SRR0/1 */
  130 + mtspr SPRN_SRR0, r7
  131 + mtspr SPRN_SRR1, r5
  132 + rfid
  133 +
  134 + .globl power7_enter_nap_mode
  135 +power7_enter_nap_mode:
105 136 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
106 137 /* Tell KVM we're napping */
107 138 li r4,KVM_HWTHREAD_IN_NAP
108 139 stb r4,HSTATE_HWTHREAD_STATE(r13)
109 140 #endif
110   - cmpwi cr0,r3,1
111   - beq 2f
  141 + stb r3,PACA_THREAD_IDLE_STATE(r13)
  142 + cmpwi cr3,r3,PNV_THREAD_SLEEP
  143 + bge cr3,2f
112 144 IDLE_STATE_ENTER_SEQ(PPC_NAP)
113 145 /* No return */
114   -2: IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
115   - /* No return */
  146 +2:
  147 + /* Sleep or winkle */
  148 + lbz r7,PACA_THREAD_MASK(r13)
  149 + ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
  150 +lwarx_loop1:
  151 + lwarx r15,0,r14
  152 + andc r15,r15,r7 /* Clear thread bit */
116 153  
  154 + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
  155 +
  156 +/*
  157 + * If cr0 = 0, then current thread is the last thread of the core entering
  158 + * sleep. Last thread needs to execute the hardware bug workaround code if
  159 + * required by the platform.
  160 + * Make the workaround call unconditionally here. The below branch call is
  161 + * patched out when the idle states are discovered if the platform does not
  162 + * require it.
  163 + */
  164 +.global pnv_fastsleep_workaround_at_entry
  165 +pnv_fastsleep_workaround_at_entry:
  166 + beq fastsleep_workaround_at_entry
  167 +
  168 + stwcx. r15,0,r14
  169 + bne- lwarx_loop1
  170 + isync
  171 +
  172 +common_enter: /* common code for all the threads entering sleep or winkle */
  173 + bgt cr3,enter_winkle
  174 + IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
  175 +
  176 +fastsleep_workaround_at_entry:
  177 + ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
  178 + stwcx. r15,0,r14
  179 + bne- lwarx_loop1
  180 + isync
  181 +
  182 + /* Fast sleep workaround */
  183 + li r3,1
  184 + li r4,1
  185 + li r0,OPAL_CONFIG_CPU_IDLE_STATE
  186 + bl opal_call_realmode
  187 +
  188 + /* Clear Lock bit */
  189 + li r0,0
  190 + lwsync
  191 + stw r0,0(r14)
  192 + b common_enter
  193 +
  194 +enter_winkle:
  195 + /*
  196 + * Note all register i.e per-core, per-subcore or per-thread is saved
  197 + * here since any thread in the core might wake up first
  198 + */
  199 + mfspr r3,SPRN_SDR1
  200 + std r3,_SDR1(r1)
  201 + mfspr r3,SPRN_RPR
  202 + std r3,_RPR(r1)
  203 + mfspr r3,SPRN_SPURR
  204 + std r3,_SPURR(r1)
  205 + mfspr r3,SPRN_PURR
  206 + std r3,_PURR(r1)
  207 + mfspr r3,SPRN_TSCR
  208 + std r3,_TSCR(r1)
  209 + mfspr r3,SPRN_DSCR
  210 + std r3,_DSCR(r1)
  211 + mfspr r3,SPRN_AMOR
  212 + std r3,_AMOR(r1)
  213 + mfspr r3,SPRN_WORT
  214 + std r3,_WORT(r1)
  215 + mfspr r3,SPRN_WORC
  216 + std r3,_WORC(r1)
  217 + IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
  218 +
117 219 _GLOBAL(power7_idle)
118 220 /* Now check if user or arch enabled NAP mode */
119 221 LOAD_REG_ADDRBASE(r3,powersave_nap)
120 222  
121 223  
122 224  
... ... @@ -125,49 +227,22 @@
125 227  
126 228 _GLOBAL(power7_nap)
127 229 mr r4,r3
128   - li r3,0
  230 + li r3,PNV_THREAD_NAP
129 231 b power7_powersave_common
130 232 /* No return */
131 233  
132 234 _GLOBAL(power7_sleep)
133   - li r3,1
  235 + li r3,PNV_THREAD_SLEEP
134 236 li r4,1
135 237 b power7_powersave_common
136 238 /* No return */
137 239  
138   -/*
139   - * Make opal call in realmode. This is a generic function to be called
140   - * from realmode from reset vector. It handles endianess.
141   - *
142   - * r13 - paca pointer
143   - * r1 - stack pointer
144   - * r3 - opal token
145   - */
146   -opal_call_realmode:
147   - mflr r12
148   - std r12,_LINK(r1)
149   - ld r2,PACATOC(r13)
150   - /* Set opal return address */
151   - LOAD_REG_ADDR(r0,return_from_opal_call)
152   - mtlr r0
153   - /* Handle endian-ness */
154   - li r0,MSR_LE
155   - mfmsr r12
156   - andc r12,r12,r0
157   - mtspr SPRN_HSRR1,r12
158   - mr r0,r3 /* Move opal token to r0 */
159   - LOAD_REG_ADDR(r11,opal)
160   - ld r12,8(r11)
161   - ld r2,0(r11)
162   - mtspr SPRN_HSRR0,r12
163   - hrfid
  240 +_GLOBAL(power7_winkle)
  241 + li r3,3
  242 + li r4,1
  243 + b power7_powersave_common
  244 + /* No return */
164 245  
165   -return_from_opal_call:
166   - FIXUP_ENDIAN
167   - ld r0,_LINK(r1)
168   - mtlr r0
169   - blr
170   -
171 246 #define CHECK_HMI_INTERRUPT \
172 247 mfspr r0,SPRN_SRR1; \
173 248 BEGIN_FTR_SECTION_NESTED(66); \
... ... @@ -181,7 +256,7 @@
181 256 ld r2,PACATOC(r13); \
182 257 ld r1,PACAR1(r13); \
183 258 std r3,ORIG_GPR3(r1); /* Save original r3 */ \
184   - li r3,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \
  259 + li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \
185 260 bl opal_call_realmode; \
186 261 ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \
187 262 20: nop;
188 263  
189 264  
190 265  
191 266  
192 267  
... ... @@ -190,16 +265,190 @@
190 265 _GLOBAL(power7_wakeup_tb_loss)
191 266 ld r2,PACATOC(r13);
192 267 ld r1,PACAR1(r13)
  268 + /*
  269 + * Before entering any idle state, the NVGPRs are saved in the stack
  270 + * and they are restored before switching to the process context. Hence
  271 + * until they are restored, they are free to be used.
  272 + *
  273 + * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode
  274 + * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the
  275 + * wakeup reason if we branch to kvm_start_guest.
  276 + */
193 277  
  278 + mfspr r16,SPRN_SRR1
194 279 BEGIN_FTR_SECTION
195 280 CHECK_HMI_INTERRUPT
196 281 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
  282 +
  283 + lbz r7,PACA_THREAD_MASK(r13)
  284 + ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
  285 +lwarx_loop2:
  286 + lwarx r15,0,r14
  287 + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
  288 + /*
  289 + * Lock bit is set in one of the 2 cases-
  290 + * a. In the sleep/winkle enter path, the last thread is executing
  291 + * fastsleep workaround code.
  292 + * b. In the wake up path, another thread is executing fastsleep
  293 + * workaround undo code or resyncing timebase or restoring context
  294 + * In either case loop until the lock bit is cleared.
  295 + */
  296 + bne core_idle_lock_held
  297 +
  298 + cmpwi cr2,r15,0
  299 + lbz r4,PACA_SUBCORE_SIBLING_MASK(r13)
  300 + and r4,r4,r15
  301 + cmpwi cr1,r4,0 /* Check if first in subcore */
  302 +
  303 + /*
  304 + * At this stage
  305 + * cr1 - 0b0100 if first thread to wakeup in subcore
  306 + * cr2 - 0b0100 if first thread to wakeup in core
  307 + * cr3- 0b0010 if waking up from sleep or winkle
  308 + * cr4 - 0b0100 if waking up from winkle
  309 + */
  310 +
  311 + or r15,r15,r7 /* Set thread bit */
  312 +
  313 + beq cr1,first_thread_in_subcore
  314 +
  315 + /* Not first thread in subcore to wake up */
  316 + stwcx. r15,0,r14
  317 + bne- lwarx_loop2
  318 + isync
  319 + b common_exit
  320 +
  321 +core_idle_lock_held:
  322 + HMT_LOW
  323 +core_idle_lock_loop:
  324 + lwz r15,0(14)
  325 + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
  326 + bne core_idle_lock_loop
  327 + HMT_MEDIUM
  328 + b lwarx_loop2
  329 +
  330 +first_thread_in_subcore:
  331 + /* First thread in subcore to wakeup */
  332 + ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
  333 + stwcx. r15,0,r14
  334 + bne- lwarx_loop2
  335 + isync
  336 +
  337 + /*
  338 + * If waking up from sleep, subcore state is not lost. Hence
  339 + * skip subcore state restore
  340 + */
  341 + bne cr4,subcore_state_restored
  342 +
  343 + /* Restore per-subcore state */
  344 + ld r4,_SDR1(r1)
  345 + mtspr SPRN_SDR1,r4
  346 + ld r4,_RPR(r1)
  347 + mtspr SPRN_RPR,r4
  348 + ld r4,_AMOR(r1)
  349 + mtspr SPRN_AMOR,r4
  350 +
  351 +subcore_state_restored:
  352 + /*
  353 + * Check if the thread is also the first thread in the core. If not,
  354 + * skip to clear_lock.
  355 + */
  356 + bne cr2,clear_lock
  357 +
  358 +first_thread_in_core:
  359 +
  360 + /*
  361 + * First thread in the core waking up from fastsleep. It needs to
  362 + * call the fastsleep workaround code if the platform requires it.
  363 + * Call it unconditionally here. The below branch instruction will
  364 + * be patched out when the idle states are discovered if platform
  365 + * does not require workaround.
  366 + */
  367 +.global pnv_fastsleep_workaround_at_exit
  368 +pnv_fastsleep_workaround_at_exit:
  369 + b fastsleep_workaround_at_exit
  370 +
  371 +timebase_resync:
  372 + /* Do timebase resync if we are waking up from sleep. Use cr3 value
  373 + * set in exceptions-64s.S */
  374 + ble cr3,clear_lock
197 375 /* Time base re-sync */
198   - li r3,OPAL_RESYNC_TIMEBASE
  376 + li r0,OPAL_RESYNC_TIMEBASE
199 377 bl opal_call_realmode;
200   -
201 378 /* TODO: Check r3 for failure */
202 379  
  380 + /*
  381 + * If waking up from sleep, per core state is not lost, skip to
  382 + * clear_lock.
  383 + */
  384 + bne cr4,clear_lock
  385 +
  386 + /* Restore per core state */
  387 + ld r4,_TSCR(r1)
  388 + mtspr SPRN_TSCR,r4
  389 + ld r4,_WORC(r1)
  390 + mtspr SPRN_WORC,r4
  391 +
  392 +clear_lock:
  393 + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
  394 + lwsync
  395 + stw r15,0(r14)
  396 +
  397 +common_exit:
  398 + /*
  399 + * Common to all threads.
  400 + *
  401 + * If waking up from sleep, hypervisor state is not lost. Hence
  402 + * skip hypervisor state restore.
  403 + */
  404 + bne cr4,hypervisor_state_restored
  405 +
  406 + /* Waking up from winkle */
  407 +
  408 + /* Restore per thread state */
  409 + bl __restore_cpu_power8
  410 +
  411 + /* Restore SLB from PACA */
  412 + ld r8,PACA_SLBSHADOWPTR(r13)
  413 +
  414 + .rept SLB_NUM_BOLTED
  415 + li r3, SLBSHADOW_SAVEAREA
  416 + LDX_BE r5, r8, r3
  417 + addi r3, r3, 8
  418 + LDX_BE r6, r8, r3
  419 + andis. r7,r5,SLB_ESID_V@h
  420 + beq 1f
  421 + slbmte r6,r5
  422 +1: addi r8,r8,16
  423 + .endr
  424 +
  425 + ld r4,_SPURR(r1)
  426 + mtspr SPRN_SPURR,r4
  427 + ld r4,_PURR(r1)
  428 + mtspr SPRN_PURR,r4
  429 + ld r4,_DSCR(r1)
  430 + mtspr SPRN_DSCR,r4
  431 + ld r4,_WORT(r1)
  432 + mtspr SPRN_WORT,r4
  433 +
  434 +hypervisor_state_restored:
  435 +
  436 + li r5,PNV_THREAD_RUNNING
  437 + stb r5,PACA_THREAD_IDLE_STATE(r13)
  438 +
  439 + mtspr SPRN_SRR1,r16
  440 +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
  441 + li r0,KVM_HWTHREAD_IN_KERNEL
  442 + stb r0,HSTATE_HWTHREAD_STATE(r13)
  443 + /* Order setting hwthread_state vs. testing hwthread_req */
  444 + sync
  445 + lbz r0,HSTATE_HWTHREAD_REQ(r13)
  446 + cmpwi r0,0
  447 + beq 6f
  448 + b kvm_start_guest
  449 +6:
  450 +#endif
  451 +
203 452 REST_NVGPRS(r1)
204 453 REST_GPR(2, r1)
205 454 ld r3,_CCR(r1)
... ... @@ -211,6 +460,13 @@
211 460 mtspr SPRN_SRR1,r4
212 461 mtspr SPRN_SRR0,r5
213 462 rfid
  463 +
  464 +fastsleep_workaround_at_exit:
  465 + li r3,1
  466 + li r4,0
  467 + li r0,OPAL_CONFIG_CPU_IDLE_STATE
  468 + bl opal_call_realmode
  469 + b timebase_resync
214 470  
215 471 /*
216 472 * R3 here contains the value that will be returned to the caller
arch/powerpc/kernel/smp.c
... ... @@ -700,7 +700,6 @@
700 700 smp_store_cpu_info(cpu);
701 701 set_dec(tb_ticks_per_jiffy);
702 702 preempt_disable();
703   - cpu_callin_map[cpu] = 1;
704 703  
705 704 if (smp_ops->setup_cpu)
706 705 smp_ops->setup_cpu(cpu);
... ... @@ -738,6 +737,14 @@
738 737 smp_wmb();
739 738 notify_cpu_starting(cpu);
740 739 set_cpu_online(cpu, true);
  740 +
  741 + /*
  742 + * CPU must be marked active and online before we signal back to the
  743 + * master, because the scheduler needs to see the cpu_online and
  744 + * cpu_active bits set.
  745 + */
  746 + smp_wmb();
  747 + cpu_callin_map[cpu] = 1;
741 748  
742 749 local_irq_enable();
743 750  
arch/powerpc/perf/hv-24x7.c
... ... @@ -177,7 +177,7 @@
177 177 } \
178 178 ret = sprintf(buf, _fmt, _expr); \
179 179 e_free: \
180   - kfree(page); \
  180 + kmem_cache_free(hv_page_cache, page); \
181 181 return ret; \
182 182 } \
183 183 static DEVICE_ATTR_RO(_name)
184 184  
... ... @@ -217,11 +217,14 @@
217 217 domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CORE;
218 218 }
219 219  
  220 +DEFINE_PER_CPU(char, hv_24x7_reqb[4096]) __aligned(4096);
  221 +DEFINE_PER_CPU(char, hv_24x7_resb[4096]) __aligned(4096);
  222 +
220 223 static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
221 224 u16 lpar, u64 *res,
222 225 bool success_expected)
223 226 {
224   - unsigned long ret = -ENOMEM;
  227 + unsigned long ret;
225 228  
226 229 /*
227 230 * request_buffer and result_buffer are not required to be 4k aligned,
228 231  
... ... @@ -243,13 +246,11 @@
243 246 BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
244 247 BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
245 248  
246   - request_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER);
247   - if (!request_buffer)
248   - goto out;
  249 + request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
  250 + result_buffer = (void *)get_cpu_var(hv_24x7_resb);
249 251  
250   - result_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER);
251   - if (!result_buffer)
252   - goto out_free_request_buffer;
  252 + memset(request_buffer, 0, 4096);
  253 + memset(result_buffer, 0, 4096);
253 254  
254 255 *request_buffer = (struct reqb) {
255 256 .buf = {
256 257  
... ... @@ -278,15 +279,11 @@
278 279 domain, offset, ix, lpar, ret, ret,
279 280 result_buffer->buf.detailed_rc,
280 281 result_buffer->buf.failing_request_ix);
281   - goto out_free_result_buffer;
  282 + goto out;
282 283 }
283 284  
284 285 *res = be64_to_cpu(result_buffer->result);
285 286  
286   -out_free_result_buffer:
287   - kfree(result_buffer);
288   -out_free_request_buffer:
289   - kfree(request_buffer);
290 287 out:
291 288 return ret;
292 289 }
arch/powerpc/platforms/powernv/opal-wrappers.S
... ... @@ -158,6 +158,43 @@
158 158 blr
159 159 #endif
160 160  
  161 +/*
  162 + * Make opal call in realmode. This is a generic function to be called
  163 + * from realmode. It handles endianness.
  164 + *
  165 + * r13 - paca pointer
  166 + * r1 - stack pointer
  167 + * r0 - opal token
  168 + */
  169 +_GLOBAL(opal_call_realmode)
  170 + mflr r12
  171 + std r12,PPC_LR_STKOFF(r1)
  172 + ld r2,PACATOC(r13)
  173 + /* Set opal return address */
  174 + LOAD_REG_ADDR(r12,return_from_opal_call)
  175 + mtlr r12
  176 +
  177 + mfmsr r12
  178 +#ifdef __LITTLE_ENDIAN__
  179 + /* Handle endian-ness */
  180 + li r11,MSR_LE
  181 + andc r12,r12,r11
  182 +#endif
  183 + mtspr SPRN_HSRR1,r12
  184 + LOAD_REG_ADDR(r11,opal)
  185 + ld r12,8(r11)
  186 + ld r2,0(r11)
  187 + mtspr SPRN_HSRR0,r12
  188 + hrfid
  189 +
  190 +return_from_opal_call:
  191 +#ifdef __LITTLE_ENDIAN__
  192 + FIXUP_ENDIAN
  193 +#endif
  194 + ld r12,PPC_LR_STKOFF(r1)
  195 + mtlr r12
  196 + blr
  197 +
161 198 OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL);
162 199 OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE);
163 200 OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ);
... ... @@ -247,6 +284,7 @@
247 284 OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
248 285 OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
249 286 OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
  287 +OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG);
250 288 OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
251 289 OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION);
252 290 OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CXL_MODE);
... ... @@ -254,4 +292,5 @@
254 292 OPAL_CALL(opal_tpo_read, OPAL_READ_TPO);
255 293 OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND);
256 294 OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV);
  295 +OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST);
arch/powerpc/platforms/powernv/opal.c
... ... @@ -9,8 +9,9 @@
9 9 * 2 of the License, or (at your option) any later version.
10 10 */
11 11  
12   -#undef DEBUG
  12 +#define pr_fmt(fmt) "opal: " fmt
13 13  
  14 +#include <linux/printk.h>
14 15 #include <linux/types.h>
15 16 #include <linux/of.h>
16 17 #include <linux/of_fdt.h>
... ... @@ -625,6 +626,39 @@
625 626 return 0;
626 627 }
627 628  
  629 +static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
  630 + struct bin_attribute *bin_attr,
  631 + char *buf, loff_t off, size_t count)
  632 +{
  633 + return memory_read_from_buffer(buf, count, &off, bin_attr->private,
  634 + bin_attr->size);
  635 +}
  636 +
  637 +static BIN_ATTR_RO(symbol_map, 0);
  638 +
  639 +static void opal_export_symmap(void)
  640 +{
  641 + const __be64 *syms;
  642 + unsigned int size;
  643 + struct device_node *fw;
  644 + int rc;
  645 +
  646 + fw = of_find_node_by_path("/ibm,opal/firmware");
  647 + if (!fw)
  648 + return;
  649 + syms = of_get_property(fw, "symbol-map", &size);
  650 + if (!syms || size != 2 * sizeof(__be64))
  651 + return;
  652 +
  653 + /* Setup attributes */
  654 + bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
  655 + bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
  656 +
  657 + rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
  658 + if (rc)
  659 + pr_warn("Error %d creating OPAL symbols file\n", rc);
  660 +}
  661 +
628 662 static void __init opal_dump_region_init(void)
629 663 {
630 664 void *addr;
... ... @@ -653,6 +687,14 @@
653 687 of_platform_device_create(np, NULL, NULL);
654 688 }
655 689  
  690 +static void opal_i2c_create_devs(void)
  691 +{
  692 + struct device_node *np;
  693 +
  694 + for_each_compatible_node(np, NULL, "ibm,opal-i2c")
  695 + of_platform_device_create(np, NULL, NULL);
  696 +}
  697 +
656 698 static int __init opal_init(void)
657 699 {
658 700 struct device_node *np, *consoles;
... ... @@ -679,6 +721,9 @@
679 721 of_node_put(consoles);
680 722 }
681 723  
  724 + /* Create i2c platform devices */
  725 + opal_i2c_create_devs();
  726 +
682 727 /* Find all OPAL interrupts and request them */
683 728 irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
684 729 pr_debug("opal: Found %d interrupts reserved for OPAL\n",
... ... @@ -702,6 +747,8 @@
702 747 /* Create "opal" kobject under /sys/firmware */
703 748 rc = opal_sysfs_init();
704 749 if (rc == 0) {
  750 + /* Export symbol map to userspace */
  751 + opal_export_symmap();
705 752 /* Setup dump region interface */
706 753 opal_dump_region_init();
707 754 /* Setup error log interface */
... ... @@ -824,4 +871,5 @@
824 871 EXPORT_SYMBOL_GPL(opal_rtc_write);
825 872 EXPORT_SYMBOL_GPL(opal_tpo_read);
826 873 EXPORT_SYMBOL_GPL(opal_tpo_write);
  874 +EXPORT_SYMBOL_GPL(opal_i2c_request);
arch/powerpc/platforms/powernv/powernv.h
... ... @@ -29,6 +29,8 @@
29 29 }
30 30 #endif
31 31  
  32 +extern u32 pnv_get_supported_cpuidle_states(void);
  33 +
32 34 extern void pnv_lpc_init(void);
33 35  
34 36 bool cpu_core_split_required(void);
arch/powerpc/platforms/powernv/setup.c
... ... @@ -36,8 +36,12 @@
36 36 #include <asm/opal.h>
37 37 #include <asm/kexec.h>
38 38 #include <asm/smp.h>
  39 +#include <asm/cputhreads.h>
  40 +#include <asm/cpuidle.h>
  41 +#include <asm/code-patching.h>
39 42  
40 43 #include "powernv.h"
  44 +#include "subcore.h"
41 45  
42 46 static void __init pnv_setup_arch(void)
43 47 {
... ... @@ -287,6 +291,168 @@
287 291 ppc_md.halt = rtas_halt;
288 292 }
289 293 #endif /* CONFIG_PPC_POWERNV_RTAS */
  294 +
  295 +static u32 supported_cpuidle_states;
  296 +
  297 +int pnv_save_sprs_for_winkle(void)
  298 +{
  299 + int cpu;
  300 + int rc;
  301 +
  302 + /*
  303 + * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross
  304 + * all cpus at boot. Get these reg values of current cpu and use the
  305 + * same accross all cpus.
  306 + */
  307 + uint64_t lpcr_val = mfspr(SPRN_LPCR);
  308 + uint64_t hid0_val = mfspr(SPRN_HID0);
  309 + uint64_t hid1_val = mfspr(SPRN_HID1);
  310 + uint64_t hid4_val = mfspr(SPRN_HID4);
  311 + uint64_t hid5_val = mfspr(SPRN_HID5);
  312 + uint64_t hmeer_val = mfspr(SPRN_HMEER);
  313 +
  314 + for_each_possible_cpu(cpu) {
  315 + uint64_t pir = get_hard_smp_processor_id(cpu);
  316 + uint64_t hsprg0_val = (uint64_t)&paca[cpu];
  317 +
  318 + /*
  319 + * HSPRG0 is used to store the cpu's pointer to paca. Hence last
  320 + * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0
  321 + * with 63rd bit set, so that when a thread wakes up at 0x100 we
  322 + * can use this bit to distinguish between fastsleep and
  323 + * deep winkle.
  324 + */
  325 + hsprg0_val |= 1;
  326 +
  327 + rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
  328 + if (rc != 0)
  329 + return rc;
  330 +
  331 + rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
  332 + if (rc != 0)
  333 + return rc;
  334 +
  335 + /* HIDs are per core registers */
  336 + if (cpu_thread_in_core(cpu) == 0) {
  337 +
  338 + rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
  339 + if (rc != 0)
  340 + return rc;
  341 +
  342 + rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
  343 + if (rc != 0)
  344 + return rc;
  345 +
  346 + rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
  347 + if (rc != 0)
  348 + return rc;
  349 +
  350 + rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
  351 + if (rc != 0)
  352 + return rc;
  353 +
  354 + rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
  355 + if (rc != 0)
  356 + return rc;
  357 + }
  358 + }
  359 +
  360 + return 0;
  361 +}
  362 +
  363 +static void pnv_alloc_idle_core_states(void)
  364 +{
  365 + int i, j;
  366 + int nr_cores = cpu_nr_cores();
  367 + u32 *core_idle_state;
  368 +
  369 + /*
  370 + * core_idle_state - First 8 bits track the idle state of each thread
  371 + * of the core. The 8th bit is the lock bit. Initially all thread bits
  372 + * are set. They are cleared when the thread enters deep idle state
  373 + * like sleep and winkle. Initially the lock bit is cleared.
  374 + * The lock bit has 2 purposes
  375 + * a. While the first thread is restoring core state, it prevents
  376 + * other threads in the core from switching to process context.
  377 + * b. While the last thread in the core is saving the core state, it
  378 + * prevents a different thread from waking up.
  379 + */
  380 + for (i = 0; i < nr_cores; i++) {
  381 + int first_cpu = i * threads_per_core;
  382 + int node = cpu_to_node(first_cpu);
  383 +
  384 + core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
  385 + *core_idle_state = PNV_CORE_IDLE_THREAD_BITS;
  386 +
  387 + for (j = 0; j < threads_per_core; j++) {
  388 + int cpu = first_cpu + j;
  389 +
  390 + paca[cpu].core_idle_state_ptr = core_idle_state;
  391 + paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
  392 + paca[cpu].thread_mask = 1 << j;
  393 + }
  394 + }
  395 +
  396 + update_subcore_sibling_mask();
  397 +
  398 + if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED)
  399 + pnv_save_sprs_for_winkle();
  400 +}
  401 +
  402 +u32 pnv_get_supported_cpuidle_states(void)
  403 +{
  404 + return supported_cpuidle_states;
  405 +}
  406 +EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
  407 +
  408 +static int __init pnv_init_idle_states(void)
  409 +{
  410 + struct device_node *power_mgt;
  411 + int dt_idle_states;
  412 + const __be32 *idle_state_flags;
  413 + u32 len_flags, flags;
  414 + int i;
  415 +
  416 + supported_cpuidle_states = 0;
  417 +
  418 + if (cpuidle_disable != IDLE_NO_OVERRIDE)
  419 + return 0;
  420 +
  421 + if (!firmware_has_feature(FW_FEATURE_OPALv3))
  422 + return 0;
  423 +
  424 + power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
  425 + if (!power_mgt) {
  426 + pr_warn("opal: PowerMgmt Node not found\n");
  427 + return 0;
  428 + }
  429 +
  430 + idle_state_flags = of_get_property(power_mgt,
  431 + "ibm,cpu-idle-state-flags", &len_flags);
  432 + if (!idle_state_flags) {
  433 + pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n");
  434 + return 0;
  435 + }
  436 +
  437 + dt_idle_states = len_flags / sizeof(u32);
  438 +
  439 + for (i = 0; i < dt_idle_states; i++) {
  440 + flags = be32_to_cpu(idle_state_flags[i]);
  441 + supported_cpuidle_states |= flags;
  442 + }
  443 + if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
  444 + patch_instruction(
  445 + (unsigned int *)pnv_fastsleep_workaround_at_entry,
  446 + PPC_INST_NOP);
  447 + patch_instruction(
  448 + (unsigned int *)pnv_fastsleep_workaround_at_exit,
  449 + PPC_INST_NOP);
  450 + }
  451 + pnv_alloc_idle_core_states();
  452 + return 0;
  453 +}
  454 +
  455 +subsys_initcall(pnv_init_idle_states);
290 456  
291 457 static int __init pnv_probe(void)
292 458 {
arch/powerpc/platforms/powernv/smp.c
... ... @@ -150,6 +150,7 @@
150 150 {
151 151 unsigned int cpu;
152 152 unsigned long srr1;
  153 + u32 idle_states;
153 154  
154 155 /* Standard hot unplug procedure */
155 156 local_irq_disable();
156 157  
157 158  
... ... @@ -160,13 +161,23 @@
160 161 generic_set_cpu_dead(cpu);
161 162 smp_wmb();
162 163  
  164 + idle_states = pnv_get_supported_cpuidle_states();
163 165 /* We don't want to take decrementer interrupts while we are offline,
164 166 * so clear LPCR:PECE1. We keep PECE2 enabled.
165 167 */
166 168 mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
167 169 while (!generic_check_cpu_restart(cpu)) {
  170 +
168 171 ppc64_runlatch_off();
169   - srr1 = power7_nap(1);
  172 +
  173 + if (idle_states & OPAL_PM_WINKLE_ENABLED)
  174 + srr1 = power7_winkle();
  175 + else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
  176 + (idle_states & OPAL_PM_SLEEP_ENABLED_ER1))
  177 + srr1 = power7_sleep();
  178 + else
  179 + srr1 = power7_nap(1);
  180 +
170 181 ppc64_runlatch_on();
171 182  
172 183 /*
173 184  
... ... @@ -198,13 +209,27 @@
198 209  
199 210 #endif /* CONFIG_HOTPLUG_CPU */
200 211  
  212 +static int pnv_cpu_bootable(unsigned int nr)
  213 +{
  214 + /*
  215 + * Starting with POWER8, the subcore logic relies on all threads of a
  216 + * core being booted so that they can participate in split mode
  217 + * switches. So on those machines we ignore the smt_enabled_at_boot
  218 + * setting (smt-enabled on the kernel command line).
  219 + */
  220 + if (cpu_has_feature(CPU_FTR_ARCH_207S))
  221 + return 1;
  222 +
  223 + return smp_generic_cpu_bootable(nr);
  224 +}
  225 +
201 226 static struct smp_ops_t pnv_smp_ops = {
202 227 .message_pass = smp_muxed_ipi_message_pass,
203 228 .cause_ipi = NULL, /* Filled at runtime by xics_smp_probe() */
204 229 .probe = xics_smp_probe,
205 230 .kick_cpu = pnv_smp_kick_cpu,
206 231 .setup_cpu = pnv_smp_setup_cpu,
207   - .cpu_bootable = smp_generic_cpu_bootable,
  232 + .cpu_bootable = pnv_cpu_bootable,
208 233 #ifdef CONFIG_HOTPLUG_CPU
209 234 .cpu_disable = pnv_smp_cpu_disable,
210 235 .cpu_die = generic_cpu_die,
arch/powerpc/platforms/powernv/subcore.c
... ... @@ -160,6 +160,18 @@
160 160 mb();
161 161 }
162 162  
  163 +static void update_hid_in_slw(u64 hid0)
  164 +{
  165 + u64 idle_states = pnv_get_supported_cpuidle_states();
  166 +
  167 + if (idle_states & OPAL_PM_WINKLE_ENABLED) {
  168 + /* OPAL call to patch slw with the new HID0 value */
  169 + u64 cpu_pir = hard_smp_processor_id();
  170 +
  171 + opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0);
  172 + }
  173 +}
  174 +
163 175 static void unsplit_core(void)
164 176 {
165 177 u64 hid0, mask;
... ... @@ -179,6 +191,7 @@
179 191 hid0 = mfspr(SPRN_HID0);
180 192 hid0 &= ~HID0_POWER8_DYNLPARDIS;
181 193 mtspr(SPRN_HID0, hid0);
  194 + update_hid_in_slw(hid0);
182 195  
183 196 while (mfspr(SPRN_HID0) & mask)
184 197 cpu_relax();
... ... @@ -215,6 +228,7 @@
215 228 hid0 = mfspr(SPRN_HID0);
216 229 hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value;
217 230 mtspr(SPRN_HID0, hid0);
  231 + update_hid_in_slw(hid0);
218 232  
219 233 /* Wait for it to happen */
220 234 while (!(mfspr(SPRN_HID0) & split_parms[i].mask))
... ... @@ -251,6 +265,25 @@
251 265 return true;
252 266 }
253 267  
  268 +void update_subcore_sibling_mask(void)
  269 +{
  270 + int cpu;
  271 + /*
  272 + * sibling mask for the first cpu. Left shift this by required bits
  273 + * to get sibling mask for the rest of the cpus.
  274 + */
  275 + int sibling_mask_first_cpu = (1 << threads_per_subcore) - 1;
  276 +
  277 + for_each_possible_cpu(cpu) {
  278 + int tid = cpu_thread_in_core(cpu);
  279 + int offset = (tid / threads_per_subcore) * threads_per_subcore;
  280 + int mask = sibling_mask_first_cpu << offset;
  281 +
  282 + paca[cpu].subcore_sibling_mask = mask;
  283 +
  284 + }
  285 +}
  286 +
254 287 static int cpu_update_split_mode(void *data)
255 288 {
256 289 int cpu, new_mode = *(int *)data;
... ... @@ -284,6 +317,7 @@
284 317 /* Make the new mode public */
285 318 subcores_per_core = new_mode;
286 319 threads_per_subcore = threads_per_core / subcores_per_core;
  320 + update_subcore_sibling_mask();
287 321  
288 322 /* Make sure the new mode is written before we exit */
289 323 mb();
arch/powerpc/platforms/powernv/subcore.h
... ... @@ -14,6 +14,13 @@
14 14 #define SYNC_STEP_FINISHED 3 /* Set by secondary when split/unsplit is done */
15 15  
16 16 #ifndef __ASSEMBLY__
  17 +
  18 +#ifdef CONFIG_SMP
17 19 void split_core_secondary_loop(u8 *state);
18   -#endif
  20 +extern void update_subcore_sibling_mask(void);
  21 +#else
  22 +static inline void update_subcore_sibling_mask(void) { };
  23 +#endif /* CONFIG_SMP */
  24 +
  25 +#endif /* __ASSEMBLY__ */
drivers/cpuidle/cpuidle-powernv.c
... ... @@ -16,13 +16,10 @@
16 16  
17 17 #include <asm/machdep.h>
18 18 #include <asm/firmware.h>
  19 +#include <asm/opal.h>
19 20 #include <asm/runlatch.h>
20 21  
21   -/* Flags and constants used in PowerNV platform */
22   -
23 22 #define MAX_POWERNV_IDLE_STATES 8
24   -#define IDLE_USE_INST_NAP 0x00010000 /* Use nap instruction */
25   -#define IDLE_USE_INST_SLEEP 0x00020000 /* Use sleep instruction */
26 23  
27 24 struct cpuidle_driver powernv_idle_driver = {
28 25 .name = "powernv_idle",
... ... @@ -197,7 +194,7 @@
197 194 * target residency to be 10x exit_latency
198 195 */
199 196 latency_ns = be32_to_cpu(idle_state_latency[i]);
200   - if (flags & IDLE_USE_INST_NAP) {
  197 + if (flags & OPAL_PM_NAP_ENABLED) {
201 198 /* Add NAP state */
202 199 strcpy(powernv_states[nr_idle_states].name, "Nap");
203 200 strcpy(powernv_states[nr_idle_states].desc, "Nap");
... ... @@ -210,7 +207,8 @@
210 207 nr_idle_states++;
211 208 }
212 209  
213   - if (flags & IDLE_USE_INST_SLEEP) {
  210 + if (flags & OPAL_PM_SLEEP_ENABLED ||
  211 + flags & OPAL_PM_SLEEP_ENABLED_ER1) {
214 212 /* Add FASTSLEEP state */
215 213 strcpy(powernv_states[nr_idle_states].name, "FastSleep");
216 214 strcpy(powernv_states[nr_idle_states].desc, "FastSleep");
drivers/i2c/busses/Kconfig
... ... @@ -1072,5 +1072,16 @@
1072 1072 This support is also available as a module. If so, the module
1073 1073 will be called scx200_acb.
1074 1074  
  1075 +config I2C_OPAL
  1076 + tristate "IBM OPAL I2C driver"
  1077 + depends on PPC_POWERNV
  1078 + default y
  1079 + help
  1080 + This exposes the PowerNV platform i2c busses to the linux i2c layer,
  1081 + the driver is based on the OPAL interfaces.
  1082 +
  1083 + This driver can also be built as a module. If so, the module will be
  1084 + called as i2c-opal.
  1085 +
1075 1086 endmenu
drivers/i2c/busses/Makefile
... ... @@ -102,6 +102,7 @@
102 102 obj-$(CONFIG_I2C_BCM_KONA) += i2c-bcm-kona.o
103 103 obj-$(CONFIG_I2C_CROS_EC_TUNNEL) += i2c-cros-ec-tunnel.o
104 104 obj-$(CONFIG_I2C_ELEKTOR) += i2c-elektor.o
  105 +obj-$(CONFIG_I2C_OPAL) += i2c-opal.o
105 106 obj-$(CONFIG_I2C_PCA_ISA) += i2c-pca-isa.o
106 107 obj-$(CONFIG_I2C_SIBYTE) += i2c-sibyte.o
107 108 obj-$(CONFIG_SCx200_ACB) += scx200_acb.o
drivers/i2c/busses/i2c-opal.c
  1 +/*
  2 + * IBM OPAL I2C driver
  3 + * Copyright (C) 2014 IBM
  4 + *
  5 + * This program is free software; you can redistribute it and/or modify
  6 + * it under the terms of the GNU General Public License as published by
  7 + * the Free Software Foundation; either version 2 of the License, or
  8 + * (at your option) any later version.
  9 + *
  10 + * This program is distributed in the hope that it will be useful,
  11 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13 + * GNU General Public License for more details.
  14 + *
  15 + * You should have received a copy of the GNU General Public License
  16 + * along with this program.
  17 + */
  18 +
  19 +#include <linux/device.h>
  20 +#include <linux/i2c.h>
  21 +#include <linux/kernel.h>
  22 +#include <linux/mm.h>
  23 +#include <linux/module.h>
  24 +#include <linux/of.h>
  25 +#include <linux/platform_device.h>
  26 +#include <linux/slab.h>
  27 +
  28 +#include <asm/firmware.h>
  29 +#include <asm/opal.h>
  30 +
  31 +static int i2c_opal_translate_error(int rc)
  32 +{
  33 + switch (rc) {
  34 + case OPAL_NO_MEM:
  35 + return -ENOMEM;
  36 + case OPAL_PARAMETER:
  37 + return -EINVAL;
  38 + case OPAL_I2C_ARBT_LOST:
  39 + return -EAGAIN;
  40 + case OPAL_I2C_TIMEOUT:
  41 + return -ETIMEDOUT;
  42 + case OPAL_I2C_NACK_RCVD:
  43 + return -ENXIO;
  44 + case OPAL_I2C_STOP_ERR:
  45 + return -EBUSY;
  46 + default:
  47 + return -EIO;
  48 + }
  49 +}
  50 +
  51 +static int i2c_opal_send_request(u32 bus_id, struct opal_i2c_request *req)
  52 +{
  53 + struct opal_msg msg;
  54 + int token, rc;
  55 +
  56 + token = opal_async_get_token_interruptible();
  57 + if (token < 0) {
  58 + if (token != -ERESTARTSYS)
  59 + pr_err("Failed to get the async token\n");
  60 +
  61 + return token;
  62 + }
  63 +
  64 + rc = opal_i2c_request(token, bus_id, req);
  65 + if (rc != OPAL_ASYNC_COMPLETION) {
  66 + rc = i2c_opal_translate_error(rc);
  67 + goto exit;
  68 + }
  69 +
  70 + rc = opal_async_wait_response(token, &msg);
  71 + if (rc)
  72 + goto exit;
  73 +
  74 + rc = be64_to_cpu(msg.params[1]);
  75 + if (rc != OPAL_SUCCESS) {
  76 + rc = i2c_opal_translate_error(rc);
  77 + goto exit;
  78 + }
  79 +
  80 +exit:
  81 + opal_async_release_token(token);
  82 + return rc;
  83 +}
  84 +
  85 +static int i2c_opal_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
  86 + int num)
  87 +{
  88 + unsigned long opal_id = (unsigned long)adap->algo_data;
  89 + struct opal_i2c_request req;
  90 + int rc, i;
  91 +
  92 + /* We only support fairly simple combinations here of one
  93 + * or two messages
  94 + */
  95 + memset(&req, 0, sizeof(req));
  96 + switch(num) {
  97 + case 0:
  98 + return 0;
  99 + case 1:
  100 + req.type = (msgs[0].flags & I2C_M_RD) ?
  101 + OPAL_I2C_RAW_READ : OPAL_I2C_RAW_WRITE;
  102 + req.addr = cpu_to_be16(msgs[0].addr);
  103 + req.size = cpu_to_be32(msgs[0].len);
  104 + req.buffer_ra = cpu_to_be64(__pa(msgs[0].buf));
  105 + break;
  106 + case 2:
  107 + /* For two messages, we basically support only simple
  108 + * smbus transactions of a write plus a read. We might
  109 + * want to allow also two writes but we'd have to bounce
  110 + * the data into a single buffer.
  111 + */
  112 + if ((msgs[0].flags & I2C_M_RD) || !(msgs[1].flags & I2C_M_RD))
  113 + return -EOPNOTSUPP;
  114 + if (msgs[0].len > 4)
  115 + return -EOPNOTSUPP;
  116 + if (msgs[0].addr != msgs[1].addr)
  117 + return -EOPNOTSUPP;
  118 + req.type = OPAL_I2C_SM_READ;
  119 + req.addr = cpu_to_be16(msgs[0].addr);
  120 + req.subaddr_sz = msgs[0].len;
  121 + for (i = 0; i < msgs[0].len; i++)
  122 + req.subaddr = (req.subaddr << 8) | msgs[0].buf[i];
  123 + req.subaddr = cpu_to_be32(req.subaddr);
  124 + req.size = cpu_to_be32(msgs[1].len);
  125 + req.buffer_ra = cpu_to_be64(__pa(msgs[1].buf));
  126 + break;
  127 + default:
  128 + return -EOPNOTSUPP;
  129 + }
  130 +
  131 + rc = i2c_opal_send_request(opal_id, &req);
  132 + if (rc)
  133 + return rc;
  134 +
  135 + return num;
  136 +}
  137 +
  138 +static int i2c_opal_smbus_xfer(struct i2c_adapter *adap, u16 addr,
  139 + unsigned short flags, char read_write,
  140 + u8 command, int size, union i2c_smbus_data *data)
  141 +{
  142 + unsigned long opal_id = (unsigned long)adap->algo_data;
  143 + struct opal_i2c_request req;
  144 + u8 local[2];
  145 + int rc;
  146 +
  147 + memset(&req, 0, sizeof(req));
  148 +
  149 + req.addr = cpu_to_be16(addr);
  150 + switch (size) {
  151 + case I2C_SMBUS_BYTE:
  152 + req.buffer_ra = cpu_to_be64(__pa(&data->byte));
  153 + req.size = cpu_to_be32(1);
  154 + /* Fall through */
  155 + case I2C_SMBUS_QUICK:
  156 + req.type = (read_write == I2C_SMBUS_READ) ?
  157 + OPAL_I2C_RAW_READ : OPAL_I2C_RAW_WRITE;
  158 + break;
  159 + case I2C_SMBUS_BYTE_DATA:
  160 + req.buffer_ra = cpu_to_be64(__pa(&data->byte));
  161 + req.size = cpu_to_be32(1);
  162 + req.subaddr = cpu_to_be32(command);
  163 + req.subaddr_sz = 1;
  164 + req.type = (read_write == I2C_SMBUS_READ) ?
  165 + OPAL_I2C_SM_READ : OPAL_I2C_SM_WRITE;
  166 + break;
  167 + case I2C_SMBUS_WORD_DATA:
  168 + if (!read_write) {
  169 + local[0] = data->word & 0xff;
  170 + local[1] = (data->word >> 8) & 0xff;
  171 + }
  172 + req.buffer_ra = cpu_to_be64(__pa(local));
  173 + req.size = cpu_to_be32(2);
  174 + req.subaddr = cpu_to_be32(command);
  175 + req.subaddr_sz = 1;
  176 + req.type = (read_write == I2C_SMBUS_READ) ?
  177 + OPAL_I2C_SM_READ : OPAL_I2C_SM_WRITE;
  178 + break;
  179 + case I2C_SMBUS_I2C_BLOCK_DATA:
  180 + req.buffer_ra = cpu_to_be64(__pa(&data->block[1]));
  181 + req.size = cpu_to_be32(data->block[0]);
  182 + req.subaddr = cpu_to_be32(command);
  183 + req.subaddr_sz = 1;
  184 + req.type = (read_write == I2C_SMBUS_READ) ?
  185 + OPAL_I2C_SM_READ : OPAL_I2C_SM_WRITE;
  186 + break;
  187 + default:
  188 + return -EINVAL;
  189 + }
  190 +
  191 + rc = i2c_opal_send_request(opal_id, &req);
  192 + if (!rc && read_write && size == I2C_SMBUS_WORD_DATA) {
  193 + data->word = ((u16)local[1]) << 8;
  194 + data->word |= local[0];
  195 + }
  196 +
  197 + return rc;
  198 +}
  199 +
  200 +static u32 i2c_opal_func(struct i2c_adapter *adapter)
  201 +{
  202 + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
  203 + I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
  204 + I2C_FUNC_SMBUS_I2C_BLOCK;
  205 +}
  206 +
  207 +static const struct i2c_algorithm i2c_opal_algo = {
  208 + .master_xfer = i2c_opal_master_xfer,
  209 + .smbus_xfer = i2c_opal_smbus_xfer,
  210 + .functionality = i2c_opal_func,
  211 +};
  212 +
  213 +static int i2c_opal_probe(struct platform_device *pdev)
  214 +{
  215 + struct i2c_adapter *adapter;
  216 + const char *pname;
  217 + u32 opal_id;
  218 + int rc;
  219 +
  220 + if (!pdev->dev.of_node)
  221 + return -ENODEV;
  222 +
  223 + rc = of_property_read_u32(pdev->dev.of_node, "ibm,opal-id", &opal_id);
  224 + if (rc) {
  225 + dev_err(&pdev->dev, "Missing ibm,opal-id property !\n");
  226 + return -EIO;
  227 + }
  228 +
  229 + adapter = devm_kzalloc(&pdev->dev, sizeof(*adapter), GFP_KERNEL);
  230 + if (!adapter)
  231 + return -ENOMEM;
  232 +
  233 + adapter->algo = &i2c_opal_algo;
  234 + adapter->algo_data = (void *)(unsigned long)opal_id;
  235 + adapter->dev.parent = &pdev->dev;
  236 + adapter->dev.of_node = of_node_get(pdev->dev.of_node);
  237 + pname = of_get_property(pdev->dev.of_node, "ibm,port-name", NULL);
  238 + if (pname)
  239 + strlcpy(adapter->name, pname, sizeof(adapter->name));
  240 + else
  241 + strlcpy(adapter->name, "opal", sizeof(adapter->name));
  242 +
  243 + platform_set_drvdata(pdev, adapter);
  244 + rc = i2c_add_adapter(adapter);
  245 + if (rc)
  246 + dev_err(&pdev->dev, "Failed to register the i2c adapter\n");
  247 +
  248 + return rc;
  249 +}
  250 +
  251 +static int i2c_opal_remove(struct platform_device *pdev)
  252 +{
  253 + struct i2c_adapter *adapter = platform_get_drvdata(pdev);
  254 +
  255 + i2c_del_adapter(adapter);
  256 +
  257 + return 0;
  258 +}
  259 +
  260 +static const struct of_device_id i2c_opal_of_match[] = {
  261 + {
  262 + .compatible = "ibm,opal-i2c",
  263 + },
  264 + { }
  265 +};
  266 +MODULE_DEVICE_TABLE(of, i2c_opal_of_match);
  267 +
  268 +static struct platform_driver i2c_opal_driver = {
  269 + .probe = i2c_opal_probe,
  270 + .remove = i2c_opal_remove,
  271 + .driver = {
  272 + .name = "i2c-opal",
  273 + .of_match_table = i2c_opal_of_match,
  274 + },
  275 +};
  276 +
  277 +static int __init i2c_opal_init(void)
  278 +{
  279 + if (!firmware_has_feature(FW_FEATURE_OPAL))
  280 + return -ENODEV;
  281 +
  282 + return platform_driver_register(&i2c_opal_driver);
  283 +}
  284 +module_init(i2c_opal_init);
  285 +
  286 +static void __exit i2c_opal_exit(void)
  287 +{
  288 + return platform_driver_unregister(&i2c_opal_driver);
  289 +}
  290 +module_exit(i2c_opal_exit);
  291 +
  292 +MODULE_AUTHOR("Neelesh Gupta <neelegup@linux.vnet.ibm.com>");
  293 +MODULE_DESCRIPTION("IBM OPAL I2C driver");
  294 +MODULE_LICENSE("GPL");
drivers/misc/cxl/context.c
... ... @@ -34,7 +34,8 @@
34 34 /*
35 35 * Initialises a CXL context.
36 36 */
37   -int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
  37 +int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
  38 + struct address_space *mapping)
38 39 {
39 40 int i;
40 41  
... ... @@ -42,6 +43,8 @@
42 43 ctx->afu = afu;
43 44 ctx->master = master;
44 45 ctx->pid = NULL; /* Set in start work ioctl */
  46 + mutex_init(&ctx->mapping_lock);
  47 + ctx->mapping = mapping;
45 48  
46 49 /*
47 50 * Allocate the segment table before we put it in the IDR so that we
48 51  
49 52  
50 53  
... ... @@ -82,12 +85,12 @@
82 85 * Allocating IDR! We better make sure everything's setup that
83 86 * dereferences from it.
84 87 */
  88 + mutex_lock(&afu->contexts_lock);
85 89 idr_preload(GFP_KERNEL);
86   - spin_lock(&afu->contexts_lock);
87 90 i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0,
88 91 ctx->afu->num_procs, GFP_NOWAIT);
89   - spin_unlock(&afu->contexts_lock);
90 92 idr_preload_end();
  93 + mutex_unlock(&afu->contexts_lock);
91 94 if (i < 0)
92 95 return i;
93 96  
... ... @@ -147,6 +150,12 @@
147 150 afu_release_irqs(ctx);
148 151 flush_work(&ctx->fault_work); /* Only needed for dedicated process */
149 152 wake_up_all(&ctx->wq);
  153 +
  154 + /* Release Problem State Area mapping */
  155 + mutex_lock(&ctx->mapping_lock);
  156 + if (ctx->mapping)
  157 + unmap_mapping_range(ctx->mapping, 0, 0, 1);
  158 + mutex_unlock(&ctx->mapping_lock);
150 159 }
151 160  
152 161 /*
153 162  
154 163  
155 164  
... ... @@ -168,21 +177,22 @@
168 177 struct cxl_context *ctx;
169 178 int tmp;
170 179  
171   - rcu_read_lock();
172   - idr_for_each_entry(&afu->contexts_idr, ctx, tmp)
  180 + mutex_lock(&afu->contexts_lock);
  181 + idr_for_each_entry(&afu->contexts_idr, ctx, tmp) {
173 182 /*
174 183 * Anything done in here needs to be setup before the IDR is
175 184 * created and torn down after the IDR removed
176 185 */
177 186 __detach_context(ctx);
178   - rcu_read_unlock();
  187 + }
  188 + mutex_unlock(&afu->contexts_lock);
179 189 }
180 190  
181 191 void cxl_context_free(struct cxl_context *ctx)
182 192 {
183   - spin_lock(&ctx->afu->contexts_lock);
  193 + mutex_lock(&ctx->afu->contexts_lock);
184 194 idr_remove(&ctx->afu->contexts_idr, ctx->pe);
185   - spin_unlock(&ctx->afu->contexts_lock);
  195 + mutex_unlock(&ctx->afu->contexts_lock);
186 196 synchronize_rcu();
187 197  
188 198 free_page((u64)ctx->sstp);
drivers/misc/cxl/cxl.h
... ... @@ -351,7 +351,7 @@
351 351 struct device *chardev_s, *chardev_m, *chardev_d;
352 352 struct idr contexts_idr;
353 353 struct dentry *debugfs;
354   - spinlock_t contexts_lock;
  354 + struct mutex contexts_lock;
355 355 struct mutex spa_mutex;
356 356 spinlock_t afu_cntl_lock;
357 357  
... ... @@ -398,6 +398,10 @@
398 398 phys_addr_t psn_phys;
399 399 u64 psn_size;
400 400  
  401 + /* Used to unmap any mmaps when force detaching */
  402 + struct address_space *mapping;
  403 + struct mutex mapping_lock;
  404 +
401 405 spinlock_t sste_lock; /* Protects segment table entries */
402 406 struct cxl_sste *sstp;
403 407 u64 sstp0, sstp1;
... ... @@ -599,7 +603,8 @@
599 603 void init_cxl_native(void);
600 604  
601 605 struct cxl_context *cxl_context_alloc(void);
602   -int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master);
  606 +int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
  607 + struct address_space *mapping);
603 608 void cxl_context_free(struct cxl_context *ctx);
604 609 int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma);
605 610  
drivers/misc/cxl/file.c
... ... @@ -77,7 +77,7 @@
77 77 goto err_put_afu;
78 78 }
79 79  
80   - if ((rc = cxl_context_init(ctx, afu, master)))
  80 + if ((rc = cxl_context_init(ctx, afu, master, inode->i_mapping)))
81 81 goto err_put_afu;
82 82  
83 83 pr_devel("afu_open pe: %i\n", ctx->pe);
... ... @@ -112,6 +112,10 @@
112 112 pr_devel("%s: closing cxl file descriptor. pe: %i\n",
113 113 __func__, ctx->pe);
114 114 cxl_context_detach(ctx);
  115 +
  116 + mutex_lock(&ctx->mapping_lock);
  117 + ctx->mapping = NULL;
  118 + mutex_unlock(&ctx->mapping_lock);
115 119  
116 120 put_device(&ctx->afu->dev);
117 121  
drivers/misc/cxl/native.c
... ... @@ -277,6 +277,7 @@
277 277 u64 cmd, u64 pe_state)
278 278 {
279 279 u64 state;
  280 + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
280 281  
281 282 WARN_ON(!ctx->afu->enabled);
282 283  
... ... @@ -286,6 +287,10 @@
286 287 smp_mb();
287 288 cxl_p1n_write(ctx->afu, CXL_PSL_LLCMD_An, cmd | ctx->pe);
288 289 while (1) {
  290 + if (time_after_eq(jiffies, timeout)) {
  291 + dev_warn(&ctx->afu->dev, "WARNING: Process Element Command timed out!\n");
  292 + return -EBUSY;
  293 + }
289 294 state = be64_to_cpup(ctx->afu->sw_command_status);
290 295 if (state == ~0ULL) {
291 296 pr_err("cxl: Error adding process element to AFU\n");
... ... @@ -610,13 +615,6 @@
610 615 return 0;
611 616 }
612 617  
613   -/*
614   - * TODO: handle case when this is called inside a rcu_read_lock() which may
615   - * happen when we unbind the driver (ie. cxl_context_detach_all()) . Terminate
616   - * & remove use a mutex lock and schedule which will not good with lock held.
617   - * May need to write do_process_element_cmd() that handles outstanding page
618   - * faults synchronously.
619   - */
620 618 static inline int detach_process_native_afu_directed(struct cxl_context *ctx)
621 619 {
622 620 if (!ctx->pe_inserted)
drivers/misc/cxl/pci.c
... ... @@ -502,7 +502,7 @@
502 502 afu->dev.release = cxl_release_afu;
503 503 afu->slice = slice;
504 504 idr_init(&afu->contexts_idr);
505   - spin_lock_init(&afu->contexts_lock);
  505 + mutex_init(&afu->contexts_lock);
506 506 spin_lock_init(&afu->afu_cntl_lock);
507 507 mutex_init(&afu->spa_mutex);
508 508  
drivers/misc/cxl/sysfs.c
... ... @@ -121,7 +121,7 @@
121 121 int rc;
122 122  
123 123 /* Not safe to reset if it is currently in use */
124   - spin_lock(&afu->contexts_lock);
  124 + mutex_lock(&afu->contexts_lock);
125 125 if (!idr_is_empty(&afu->contexts_idr)) {
126 126 rc = -EBUSY;
127 127 goto err;
... ... @@ -132,7 +132,7 @@
132 132  
133 133 rc = count;
134 134 err:
135   - spin_unlock(&afu->contexts_lock);
  135 + mutex_unlock(&afu->contexts_lock);
136 136 return rc;
137 137 }
138 138  
... ... @@ -247,7 +247,7 @@
247 247 int rc = -EBUSY;
248 248  
249 249 /* can't change this if we have a user */
250   - spin_lock(&afu->contexts_lock);
  250 + mutex_lock(&afu->contexts_lock);
251 251 if (!idr_is_empty(&afu->contexts_idr))
252 252 goto err;
253 253  
... ... @@ -271,7 +271,7 @@
271 271 afu->current_mode = 0;
272 272 afu->num_procs = 0;
273 273  
274   - spin_unlock(&afu->contexts_lock);
  274 + mutex_unlock(&afu->contexts_lock);
275 275  
276 276 if ((rc = _cxl_afu_deactivate_mode(afu, old_mode)))
277 277 return rc;
... ... @@ -280,7 +280,7 @@
280 280  
281 281 return count;
282 282 err:
283   - spin_unlock(&afu->contexts_lock);
  283 + mutex_unlock(&afu->contexts_lock);
284 284 return rc;
285 285 }
286 286  
include/uapi/linux/audit.h
... ... @@ -371,7 +371,9 @@
371 371 #define AUDIT_ARCH_PARISC (EM_PARISC)
372 372 #define AUDIT_ARCH_PARISC64 (EM_PARISC|__AUDIT_ARCH_64BIT)
373 373 #define AUDIT_ARCH_PPC (EM_PPC)
  374 +/* do not define AUDIT_ARCH_PPCLE since it is not supported by audit */
374 375 #define AUDIT_ARCH_PPC64 (EM_PPC64|__AUDIT_ARCH_64BIT)
  376 +#define AUDIT_ARCH_PPC64LE (EM_PPC64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
375 377 #define AUDIT_ARCH_S390 (EM_S390)
376 378 #define AUDIT_ARCH_S390X (EM_S390|__AUDIT_ARCH_64BIT)
377 379 #define AUDIT_ARCH_SH (EM_SH)