Commit 6c52a96e6cacb35403b85c3b42db0faf26f3ed85

Authored by David S. Miller
1 parent bde4e4ee9f

[SPARC64]: Revamp Spitfire error trap handling.

Current uncorrectable error handling was poor enough
that the processor could just loop taking the same
trap over and over again.  Fix things up so that we
at least get a log message and perhaps even some register
state.

In the process, much consolidation became possible,
particularly with the correctable error handler.

Prefix assembler and C function names with "spitfire"
to indicate that these are for Ultra-I/II/IIi/IIe only.

More work is needed to make these routines robust and
featureful to the level of the Ultra-III error handlers.

Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 6 changed files with 418 additions and 245 deletions Side-by-side Diff

arch/sparc64/kernel/entry.S
... ... @@ -21,6 +21,7 @@
21 21 #include <asm/visasm.h>
22 22 #include <asm/estate.h>
23 23 #include <asm/auxio.h>
  24 +#include <asm/sfafsr.h>
24 25  
25 26 #define curptr g6
26 27  
... ... @@ -690,9 +691,159 @@
690 691 retl
691 692 nop
692 693  
693   - .globl __do_data_access_exception
694   - .globl __do_data_access_exception_tl1
695   -__do_data_access_exception_tl1:
  694 + /* We need to carefully read the error status, ACK
  695 + * the errors, prevent recursive traps, and pass the
  696 + * information on to C code for logging.
  697 + *
  698 + * We pass the AFAR in as-is, and we encode the status
  699 + * information as described in asm-sparc64/sfafsr.h
  700 + */
  701 + .globl __spitfire_access_error
  702 +__spitfire_access_error:
  703 + /* Disable ESTATE error reporting so that we do not
  704 + * take recursive traps and RED state the processor.
  705 + */
  706 + stxa %g0, [%g0] ASI_ESTATE_ERROR_EN
  707 + membar #Sync
  708 +
  709 + mov UDBE_UE, %g1
  710 + ldxa [%g0] ASI_AFSR, %g4 ! Get AFSR
  711 +
  712 + /* __spitfire_cee_trap branches here with AFSR in %g4 and
  713 + * UDBE_CE in %g1. It only clears ESTATE_ERR_CE in the
  714 + * ESTATE Error Enable register.
  715 + */
  716 +__spitfire_cee_trap_continue:
  717 + ldxa [%g0] ASI_AFAR, %g5 ! Get AFAR
  718 +
  719 + rdpr %tt, %g3
  720 + and %g3, 0x1ff, %g3 ! Paranoia
  721 + sllx %g3, SFSTAT_TRAP_TYPE_SHIFT, %g3
  722 + or %g4, %g3, %g4
  723 + rdpr %tl, %g3
  724 + cmp %g3, 1
  725 + mov 1, %g3
  726 + bleu %xcc, 1f
  727 + sllx %g3, SFSTAT_TL_GT_ONE_SHIFT, %g3
  728 +
  729 + or %g4, %g3, %g4
  730 +
  731 + /* Read in the UDB error register state, clearing the
  732 + * sticky error bits as-needed. We only clear them if
  733 + * the UE bit is set. Likewise, __spitfire_cee_trap
  734 + * below will only do so if the CE bit is set.
  735 + *
  736 + * NOTE: UltraSparc-I/II have high and low UDB error
  737 + * registers, corresponding to the two UDB units
  738 + * present on those chips. UltraSparc-IIi only
  739 + * has a single UDB, called "SDB" in the manual.
  740 + * For IIi the upper UDB register always reads
  741 + * as zero so for our purposes things will just
  742 + * work with the checks below.
  743 + */
  744 +1: ldxa [%g0] ASI_UDBH_ERROR_R, %g3
  745 + and %g3, 0x3ff, %g7 ! Paranoia
  746 + sllx %g7, SFSTAT_UDBH_SHIFT, %g7
  747 + or %g4, %g7, %g4
  748 + andcc %g3, %g1, %g3 ! UDBE_UE or UDBE_CE
  749 + be,pn %xcc, 1f
  750 + nop
  751 + stxa %g3, [%g0] ASI_UDB_ERROR_W
  752 + membar #Sync
  753 +
  754 +1: mov 0x18, %g3
  755 + ldxa [%g3] ASI_UDBL_ERROR_R, %g3
  756 + and %g3, 0x3ff, %g7 ! Paranoia
  757 + sllx %g7, SFSTAT_UDBL_SHIFT, %g7
  758 + or %g4, %g7, %g4
  759 + andcc %g3, %g1, %g3 ! UDBE_UE or UDBE_CE
  760 + be,pn %xcc, 1f
  761 + nop
  762 + mov 0x18, %g7
  763 + stxa %g3, [%g7] ASI_UDB_ERROR_W
  764 + membar #Sync
  765 +
  766 +1: /* Ok, now that we've latched the error state,
  767 + * clear the sticky bits in the AFSR.
  768 + */
  769 + stxa %g4, [%g0] ASI_AFSR
  770 + membar #Sync
  771 +
  772 + rdpr %tl, %g2
  773 + cmp %g2, 1
  774 + rdpr %pil, %g2
  775 + bleu,pt %xcc, 1f
  776 + wrpr %g0, 15, %pil
  777 +
  778 + ba,pt %xcc, etraptl1
  779 + rd %pc, %g7
  780 +
  781 + ba,pt %xcc, 2f
  782 + nop
  783 +
  784 +1: ba,pt %xcc, etrap_irq
  785 + rd %pc, %g7
  786 +
  787 +2: mov %l4, %o1
  788 + mov %l5, %o2
  789 + call spitfire_access_error
  790 + add %sp, PTREGS_OFF, %o0
  791 + ba,pt %xcc, rtrap
  792 + clr %l6
  793 +
  794 + /* This is the trap handler entry point for ECC correctable
  795 + * errors. They are corrected, but we listen for the trap
  796 + * so that the event can be logged.
  797 + *
  798 + * Disrupting errors are either:
  799 + * 1) single-bit ECC errors during UDB reads to system
  800 + * memory
  801 + * 2) data parity errors during write-back events
  802 + *
  803 + * As far as I can make out from the manual, the CEE trap
  804 + * is only for correctable errors during memory read
  805 + * accesses by the front-end of the processor.
  806 + *
  807 + * The code below is only for trap level 1 CEE events,
  808 + * as it is the only situation where we can safely record
  809 + * and log. For trap level >1 we just clear the CE bit
  810 + * in the AFSR and return.
  811 + *
  812 + * This is just like __spiftire_access_error above, but it
  813 + * specifically handles correctable errors. If an
  814 + * uncorrectable error is indicated in the AFSR we
  815 + * will branch directly above to __spitfire_access_error
  816 + * to handle it instead. Uncorrectable therefore takes
  817 + * priority over correctable, and the error logging
  818 + * C code will notice this case by inspecting the
  819 + * trap type.
  820 + */
  821 + .globl __spitfire_cee_trap
  822 +__spitfire_cee_trap:
  823 + ldxa [%g0] ASI_AFSR, %g4 ! Get AFSR
  824 + mov 1, %g3
  825 + sllx %g3, SFAFSR_UE_SHIFT, %g3
  826 + andcc %g4, %g3, %g0 ! Check for UE
  827 + bne,pn %xcc, __spitfire_access_error
  828 + nop
  829 +
  830 + /* Ok, in this case we only have a correctable error.
  831 + * Indicate we only wish to capture that state in register
  832 + * %g1, and we only disable CE error reporting unlike UE
  833 + * handling which disables all errors.
  834 + */
  835 + ldxa [%g0] ASI_ESTATE_ERROR_EN, %g3
  836 + andn %g3, ESTATE_ERR_CE, %g3
  837 + stxa %g3, [%g0] ASI_ESTATE_ERROR_EN
  838 + membar #Sync
  839 +
  840 + /* Preserve AFSR in %g4, indicate UDB state to capture in %g1 */
  841 + ba,pt %xcc, __spitfire_cee_trap_continue
  842 + mov UDBE_CE, %g1
  843 +
  844 + .globl __spitfire_data_access_exception
  845 + .globl __spitfire_data_access_exception_tl1
  846 +__spitfire_data_access_exception_tl1:
696 847 rdpr %pstate, %g4
697 848 wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate
698 849 mov TLB_SFSR, %g3
699 850  
... ... @@ -714,12 +865,12 @@
714 865 109: or %g7, %lo(109b), %g7
715 866 mov %l4, %o1
716 867 mov %l5, %o2
717   - call data_access_exception_tl1
  868 + call spitfire_data_access_exception_tl1
718 869 add %sp, PTREGS_OFF, %o0
719 870 ba,pt %xcc, rtrap
720 871 clr %l6
721 872  
722   -__do_data_access_exception:
  873 +__spitfire_data_access_exception:
723 874 rdpr %pstate, %g4
724 875 wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate
725 876 mov TLB_SFSR, %g3
726 877  
... ... @@ -733,14 +884,14 @@
733 884 109: or %g7, %lo(109b), %g7
734 885 mov %l4, %o1
735 886 mov %l5, %o2
736   - call data_access_exception
  887 + call spitfire_data_access_exception
737 888 add %sp, PTREGS_OFF, %o0
738 889 ba,pt %xcc, rtrap
739 890 clr %l6
740 891  
741   - .globl __do_instruction_access_exception
742   - .globl __do_instruction_access_exception_tl1
743   -__do_instruction_access_exception_tl1:
  892 + .globl __spitfire_insn_access_exception
  893 + .globl __spitfire_insn_access_exception_tl1
  894 +__spitfire_insn_access_exception_tl1:
744 895 rdpr %pstate, %g4
745 896 wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate
746 897 mov TLB_SFSR, %g3
747 898  
... ... @@ -753,12 +904,12 @@
753 904 109: or %g7, %lo(109b), %g7
754 905 mov %l4, %o1
755 906 mov %l5, %o2
756   - call instruction_access_exception_tl1
  907 + call spitfire_insn_access_exception_tl1
757 908 add %sp, PTREGS_OFF, %o0
758 909 ba,pt %xcc, rtrap
759 910 clr %l6
760 911  
761   -__do_instruction_access_exception:
  912 +__spitfire_insn_access_exception:
762 913 rdpr %pstate, %g4
763 914 wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate
764 915 mov TLB_SFSR, %g3
765 916  
... ... @@ -771,101 +922,10 @@
771 922 109: or %g7, %lo(109b), %g7
772 923 mov %l4, %o1
773 924 mov %l5, %o2
774   - call instruction_access_exception
  925 + call spitfire_insn_access_exception
775 926 add %sp, PTREGS_OFF, %o0
776 927 ba,pt %xcc, rtrap
777 928 clr %l6
778   -
779   - /* This is the trap handler entry point for ECC correctable
780   - * errors. They are corrected, but we listen for the trap
781   - * so that the event can be logged.
782   - *
783   - * Disrupting errors are either:
784   - * 1) single-bit ECC errors during UDB reads to system
785   - * memory
786   - * 2) data parity errors during write-back events
787   - *
788   - * As far as I can make out from the manual, the CEE trap
789   - * is only for correctable errors during memory read
790   - * accesses by the front-end of the processor.
791   - *
792   - * The code below is only for trap level 1 CEE events,
793   - * as it is the only situation where we can safely record
794   - * and log. For trap level >1 we just clear the CE bit
795   - * in the AFSR and return.
796   - */
797   -
798   - /* Our trap handling infrastructure allows us to preserve
799   - * two 64-bit values during etrap for arguments to
800   - * subsequent C code. Therefore we encode the information
801   - * as follows:
802   - *
803   - * value 1) Full 64-bits of AFAR
804   - * value 2) Low 33-bits of AFSR, then bits 33-->42
805   - * are UDBL error status and bits 43-->52
806   - * are UDBH error status
807   - */
808   - .align 64
809   - .globl cee_trap
810   -cee_trap:
811   - ldxa [%g0] ASI_AFSR, %g1 ! Read AFSR
812   - ldxa [%g0] ASI_AFAR, %g2 ! Read AFAR
813   - sllx %g1, 31, %g1 ! Clear reserved bits
814   - srlx %g1, 31, %g1 ! in AFSR
815   -
816   - /* NOTE: UltraSparc-I/II have high and low UDB error
817   - * registers, corresponding to the two UDB units
818   - * present on those chips. UltraSparc-IIi only
819   - * has a single UDB, called "SDB" in the manual.
820   - * For IIi the upper UDB register always reads
821   - * as zero so for our purposes things will just
822   - * work with the checks below.
823   - */
824   - ldxa [%g0] ASI_UDBL_ERROR_R, %g3 ! Read UDB-Low error status
825   - andcc %g3, (1 << 8), %g4 ! Check CE bit
826   - sllx %g3, (64 - 10), %g3 ! Clear reserved bits
827   - srlx %g3, (64 - 10), %g3 ! in UDB-Low error status
828   -
829   - sllx %g3, (33 + 0), %g3 ! Shift up to encoding area
830   - or %g1, %g3, %g1 ! Or it in
831   - be,pn %xcc, 1f ! Branch if CE bit was clear
832   - nop
833   - stxa %g4, [%g0] ASI_UDB_ERROR_W ! Clear CE sticky bit in UDBL
834   - membar #Sync ! Synchronize ASI stores
835   -1: mov 0x18, %g5 ! Addr of UDB-High error status
836   - ldxa [%g5] ASI_UDBH_ERROR_R, %g3 ! Read it
837   -
838   - andcc %g3, (1 << 8), %g4 ! Check CE bit
839   - sllx %g3, (64 - 10), %g3 ! Clear reserved bits
840   - srlx %g3, (64 - 10), %g3 ! in UDB-High error status
841   - sllx %g3, (33 + 10), %g3 ! Shift up to encoding area
842   - or %g1, %g3, %g1 ! Or it in
843   - be,pn %xcc, 1f ! Branch if CE bit was clear
844   - nop
845   - nop
846   -
847   - stxa %g4, [%g5] ASI_UDB_ERROR_W ! Clear CE sticky bit in UDBH
848   - membar #Sync ! Synchronize ASI stores
849   -1: mov 1, %g5 ! AFSR CE bit is
850   - sllx %g5, 20, %g5 ! bit 20
851   - stxa %g5, [%g0] ASI_AFSR ! Clear CE sticky bit in AFSR
852   - membar #Sync ! Synchronize ASI stores
853   - sllx %g2, (64 - 41), %g2 ! Clear reserved bits
854   - srlx %g2, (64 - 41), %g2 ! in latched AFAR
855   -
856   - andn %g2, 0x0f, %g2 ! Finish resv bit clearing
857   - mov %g1, %g4 ! Move AFSR+UDB* into save reg
858   - mov %g2, %g5 ! Move AFAR into save reg
859   - rdpr %pil, %g2
860   - wrpr %g0, 15, %pil
861   - ba,pt %xcc, etrap_irq
862   - rd %pc, %g7
863   - mov %l4, %o0
864   -
865   - mov %l5, %o1
866   - call cee_log
867   - add %sp, PTREGS_OFF, %o2
868   - ba,a,pt %xcc, rtrap_irq
869 929  
870 930 /* Capture I/D/E-cache state into per-cpu error scoreboard.
871 931 *
arch/sparc64/kernel/traps.c
... ... @@ -33,6 +33,7 @@
33 33 #include <asm/dcu.h>
34 34 #include <asm/estate.h>
35 35 #include <asm/chafsr.h>
  36 +#include <asm/sfafsr.h>
36 37 #include <asm/psrcompat.h>
37 38 #include <asm/processor.h>
38 39 #include <asm/timer.h>
... ... @@ -143,8 +144,7 @@
143 144 }
144 145 #endif
145 146  
146   -void instruction_access_exception(struct pt_regs *regs,
147   - unsigned long sfsr, unsigned long sfar)
  147 +void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
148 148 {
149 149 siginfo_t info;
150 150  
... ... @@ -153,8 +153,8 @@
153 153 return;
154 154  
155 155 if (regs->tstate & TSTATE_PRIV) {
156   - printk("instruction_access_exception: SFSR[%016lx] SFAR[%016lx], going.\n",
157   - sfsr, sfar);
  156 + printk("spitfire_insn_access_exception: SFSR[%016lx] "
  157 + "SFAR[%016lx], going.\n", sfsr, sfar);
158 158 die_if_kernel("Iax", regs);
159 159 }
160 160 if (test_thread_flag(TIF_32BIT)) {
161 161  
162 162  
... ... @@ -169,19 +169,17 @@
169 169 force_sig_info(SIGSEGV, &info, current);
170 170 }
171 171  
172   -void instruction_access_exception_tl1(struct pt_regs *regs,
173   - unsigned long sfsr, unsigned long sfar)
  172 +void spitfire_insn_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
174 173 {
175 174 if (notify_die(DIE_TRAP_TL1, "instruction access exception tl1", regs,
176 175 0, 0x8, SIGTRAP) == NOTIFY_STOP)
177 176 return;
178 177  
179 178 dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
180   - instruction_access_exception(regs, sfsr, sfar);
  179 + spitfire_insn_access_exception(regs, sfsr, sfar);
181 180 }
182 181  
183   -void data_access_exception(struct pt_regs *regs,
184   - unsigned long sfsr, unsigned long sfar)
  182 +void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
185 183 {
186 184 siginfo_t info;
187 185  
... ... @@ -207,8 +205,8 @@
207 205 return;
208 206 }
209 207 /* Shit... */
210   - printk("data_access_exception: SFSR[%016lx] SFAR[%016lx], going.\n",
211   - sfsr, sfar);
  208 + printk("spitfire_data_access_exception: SFSR[%016lx] "
  209 + "SFAR[%016lx], going.\n", sfsr, sfar);
212 210 die_if_kernel("Dax", regs);
213 211 }
214 212  
215 213  
... ... @@ -220,15 +218,14 @@
220 218 force_sig_info(SIGSEGV, &info, current);
221 219 }
222 220  
223   -void data_access_exception_tl1(struct pt_regs *regs,
224   - unsigned long sfsr, unsigned long sfar)
  221 +void spitfire_data_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
225 222 {
226 223 if (notify_die(DIE_TRAP_TL1, "data access exception tl1", regs,
227 224 0, 0x30, SIGTRAP) == NOTIFY_STOP)
228 225 return;
229 226  
230 227 dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
231   - data_access_exception(regs, sfsr, sfar);
  228 + spitfire_data_access_exception(regs, sfsr, sfar);
232 229 }
233 230  
234 231 #ifdef CONFIG_PCI
235 232  
236 233  
... ... @@ -264,56 +261,15 @@
264 261 : "memory");
265 262 }
266 263  
267   -void do_iae(struct pt_regs *regs)
  264 +static void spitfire_enable_estate_errors(void)
268 265 {
269   - siginfo_t info;
270   -
271   - spitfire_clean_and_reenable_l1_caches();
272   -
273   - if (notify_die(DIE_TRAP, "instruction access exception", regs,
274   - 0, 0x8, SIGTRAP) == NOTIFY_STOP)
275   - return;
276   -
277   - info.si_signo = SIGBUS;
278   - info.si_errno = 0;
279   - info.si_code = BUS_OBJERR;
280   - info.si_addr = (void *)0;
281   - info.si_trapno = 0;
282   - force_sig_info(SIGBUS, &info, current);
  266 + __asm__ __volatile__("stxa %0, [%%g0] %1\n\t"
  267 + "membar #Sync"
  268 + : /* no outputs */
  269 + : "r" (ESTATE_ERR_ALL),
  270 + "i" (ASI_ESTATE_ERROR_EN));
283 271 }
284 272  
285   -void do_dae(struct pt_regs *regs)
286   -{
287   - siginfo_t info;
288   -
289   -#ifdef CONFIG_PCI
290   - if (pci_poke_in_progress && pci_poke_cpu == smp_processor_id()) {
291   - spitfire_clean_and_reenable_l1_caches();
292   -
293   - pci_poke_faulted = 1;
294   -
295   - /* Why the fuck did they have to change this? */
296   - if (tlb_type == cheetah || tlb_type == cheetah_plus)
297   - regs->tpc += 4;
298   -
299   - regs->tnpc = regs->tpc + 4;
300   - return;
301   - }
302   -#endif
303   - spitfire_clean_and_reenable_l1_caches();
304   -
305   - if (notify_die(DIE_TRAP, "data access exception", regs,
306   - 0, 0x30, SIGTRAP) == NOTIFY_STOP)
307   - return;
308   -
309   - info.si_signo = SIGBUS;
310   - info.si_errno = 0;
311   - info.si_code = BUS_OBJERR;
312   - info.si_addr = (void *)0;
313   - info.si_trapno = 0;
314   - force_sig_info(SIGBUS, &info, current);
315   -}
316   -
317 273 static char ecc_syndrome_table[] = {
318 274 0x4c, 0x40, 0x41, 0x48, 0x42, 0x48, 0x48, 0x49,
319 275 0x43, 0x48, 0x48, 0x49, 0x48, 0x49, 0x49, 0x4a,
320 276  
321 277  
322 278  
... ... @@ -349,65 +305,15 @@
349 305 0x0b, 0x48, 0x48, 0x4b, 0x48, 0x4b, 0x4b, 0x4a
350 306 };
351 307  
352   -/* cee_trap in entry.S encodes AFSR/UDBH/UDBL error status
353   - * in the following format. The AFAR is left as is, with
354   - * reserved bits cleared, and is a raw 40-bit physical
355   - * address.
356   - */
357   -#define CE_STATUS_UDBH_UE (1UL << (43 + 9))
358   -#define CE_STATUS_UDBH_CE (1UL << (43 + 8))
359   -#define CE_STATUS_UDBH_ESYNDR (0xffUL << 43)
360   -#define CE_STATUS_UDBH_SHIFT 43
361   -#define CE_STATUS_UDBL_UE (1UL << (33 + 9))
362   -#define CE_STATUS_UDBL_CE (1UL << (33 + 8))
363   -#define CE_STATUS_UDBL_ESYNDR (0xffUL << 33)
364   -#define CE_STATUS_UDBL_SHIFT 33
365   -#define CE_STATUS_AFSR_MASK (0x1ffffffffUL)
366   -#define CE_STATUS_AFSR_ME (1UL << 32)
367   -#define CE_STATUS_AFSR_PRIV (1UL << 31)
368   -#define CE_STATUS_AFSR_ISAP (1UL << 30)
369   -#define CE_STATUS_AFSR_ETP (1UL << 29)
370   -#define CE_STATUS_AFSR_IVUE (1UL << 28)
371   -#define CE_STATUS_AFSR_TO (1UL << 27)
372   -#define CE_STATUS_AFSR_BERR (1UL << 26)
373   -#define CE_STATUS_AFSR_LDP (1UL << 25)
374   -#define CE_STATUS_AFSR_CP (1UL << 24)
375   -#define CE_STATUS_AFSR_WP (1UL << 23)
376   -#define CE_STATUS_AFSR_EDP (1UL << 22)
377   -#define CE_STATUS_AFSR_UE (1UL << 21)
378   -#define CE_STATUS_AFSR_CE (1UL << 20)
379   -#define CE_STATUS_AFSR_ETS (0xfUL << 16)
380   -#define CE_STATUS_AFSR_ETS_SHIFT 16
381   -#define CE_STATUS_AFSR_PSYND (0xffffUL << 0)
382   -#define CE_STATUS_AFSR_PSYND_SHIFT 0
383   -
384   -/* Layout of Ecache TAG Parity Syndrome of AFSR */
385   -#define AFSR_ETSYNDROME_7_0 0x1UL /* E$-tag bus bits <7:0> */
386   -#define AFSR_ETSYNDROME_15_8 0x2UL /* E$-tag bus bits <15:8> */
387   -#define AFSR_ETSYNDROME_21_16 0x4UL /* E$-tag bus bits <21:16> */
388   -#define AFSR_ETSYNDROME_24_22 0x8UL /* E$-tag bus bits <24:22> */
389   -
390 308 static char *syndrome_unknown = "<Unknown>";
391 309  
392   -asmlinkage void cee_log(unsigned long ce_status,
393   - unsigned long afar,
394   - struct pt_regs *regs)
  310 +static void spitfire_log_udb_syndrome(unsigned long afar, unsigned long udbh, unsigned long udbl, unsigned long bit)
395 311 {
396   - char memmod_str[64];
397   - char *p;
398   - unsigned short scode, udb_reg;
  312 + unsigned short scode;
  313 + char memmod_str[64], *p;
399 314  
400   - printk(KERN_WARNING "CPU[%d]: Correctable ECC Error "
401   - "AFSR[%lx] AFAR[%016lx] UDBL[%lx] UDBH[%lx]\n",
402   - smp_processor_id(),
403   - (ce_status & CE_STATUS_AFSR_MASK),
404   - afar,
405   - ((ce_status >> CE_STATUS_UDBL_SHIFT) & 0x3ffUL),
406   - ((ce_status >> CE_STATUS_UDBH_SHIFT) & 0x3ffUL));
407   -
408   - udb_reg = ((ce_status >> CE_STATUS_UDBL_SHIFT) & 0x3ffUL);
409   - if (udb_reg & (1 << 8)) {
410   - scode = ecc_syndrome_table[udb_reg & 0xff];
  315 + if (udbl & bit) {
  316 + scode = ecc_syndrome_table[udbl & 0xff];
411 317 if (prom_getunumber(scode, afar,
412 318 memmod_str, sizeof(memmod_str)) == -1)
413 319 p = syndrome_unknown;
... ... @@ -418,9 +324,8 @@
418 324 smp_processor_id(), scode, p);
419 325 }
420 326  
421   - udb_reg = ((ce_status >> CE_STATUS_UDBH_SHIFT) & 0x3ffUL);
422   - if (udb_reg & (1 << 8)) {
423   - scode = ecc_syndrome_table[udb_reg & 0xff];
  327 + if (udbh & bit) {
  328 + scode = ecc_syndrome_table[udbh & 0xff];
424 329 if (prom_getunumber(scode, afar,
425 330 memmod_str, sizeof(memmod_str)) == -1)
426 331 p = syndrome_unknown;
... ... @@ -429,6 +334,127 @@
429 334 printk(KERN_WARNING "CPU[%d]: UDBH Syndrome[%x] "
430 335 "Memory Module \"%s\"\n",
431 336 smp_processor_id(), scode, p);
  337 + }
  338 +
  339 +}
  340 +
  341 +static void spitfire_cee_log(unsigned long afsr, unsigned long afar, unsigned long udbh, unsigned long udbl, int tl1, struct pt_regs *regs)
  342 +{
  343 +
  344 + printk(KERN_WARNING "CPU[%d]: Correctable ECC Error "
  345 + "AFSR[%lx] AFAR[%016lx] UDBL[%lx] UDBH[%lx] TL>1[%d]\n",
  346 + smp_processor_id(), afsr, afar, udbl, udbh, tl1);
  347 +
  348 + spitfire_log_udb_syndrome(afar, udbh, udbl, UDBE_CE);
  349 +
  350 + /* We always log it, even if someone is listening for this
  351 + * trap.
  352 + */
  353 + notify_die(DIE_TRAP, "Correctable ECC Error", regs,
  354 + 0, TRAP_TYPE_CEE, SIGTRAP);
  355 +
  356 + /* The Correctable ECC Error trap does not disable I/D caches. So
  357 + * we only have to restore the ESTATE Error Enable register.
  358 + */
  359 + spitfire_enable_estate_errors();
  360 +}
  361 +
  362 +static void spitfire_ue_log(unsigned long afsr, unsigned long afar, unsigned long udbh, unsigned long udbl, unsigned long tt, int tl1, struct pt_regs *regs)
  363 +{
  364 + siginfo_t info;
  365 +
  366 + printk(KERN_WARNING "CPU[%d]: Uncorrectable Error AFSR[%lx] "
  367 + "AFAR[%lx] UDBL[%lx] UDBH[%ld] TT[%lx] TL>1[%d]\n",
  368 + smp_processor_id(), afsr, afar, udbl, udbh, tt, tl1);
  369 +
  370 + /* XXX add more human friendly logging of the error status
  371 + * XXX as is implemented for cheetah
  372 + */
  373 +
  374 + spitfire_log_udb_syndrome(afar, udbh, udbl, UDBE_UE);
  375 +
  376 + /* We always log it, even if someone is listening for this
  377 + * trap.
  378 + */
  379 + notify_die(DIE_TRAP, "Uncorrectable Error", regs,
  380 + 0, tt, SIGTRAP);
  381 +
  382 + if (regs->tstate & TSTATE_PRIV) {
  383 + if (tl1)
  384 + dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
  385 + die_if_kernel("UE", regs);
  386 + }
  387 +
  388 + /* XXX need more intelligent processing here, such as is implemented
  389 + * XXX for cheetah errors, in fact if the E-cache still holds the
  390 + * XXX line with bad parity this will loop
  391 + */
  392 +
  393 + spitfire_clean_and_reenable_l1_caches();
  394 + spitfire_enable_estate_errors();
  395 +
  396 + if (test_thread_flag(TIF_32BIT)) {
  397 + regs->tpc &= 0xffffffff;
  398 + regs->tnpc &= 0xffffffff;
  399 + }
  400 + info.si_signo = SIGBUS;
  401 + info.si_errno = 0;
  402 + info.si_code = BUS_OBJERR;
  403 + info.si_addr = (void *)0;
  404 + info.si_trapno = 0;
  405 + force_sig_info(SIGBUS, &info, current);
  406 +}
  407 +
  408 +void spitfire_access_error(struct pt_regs *regs, unsigned long status_encoded, unsigned long afar)
  409 +{
  410 + unsigned long afsr, tt, udbh, udbl;
  411 + int tl1;
  412 +
  413 + afsr = (status_encoded & SFSTAT_AFSR_MASK) >> SFSTAT_AFSR_SHIFT;
  414 + tt = (status_encoded & SFSTAT_TRAP_TYPE) >> SFSTAT_TRAP_TYPE_SHIFT;
  415 + tl1 = (status_encoded & SFSTAT_TL_GT_ONE) ? 1 : 0;
  416 + udbl = (status_encoded & SFSTAT_UDBL_MASK) >> SFSTAT_UDBL_SHIFT;
  417 + udbh = (status_encoded & SFSTAT_UDBH_MASK) >> SFSTAT_UDBH_SHIFT;
  418 +
  419 +#ifdef CONFIG_PCI
  420 + if (tt == TRAP_TYPE_DAE &&
  421 + pci_poke_in_progress && pci_poke_cpu == smp_processor_id()) {
  422 + spitfire_clean_and_reenable_l1_caches();
  423 + spitfire_enable_estate_errors();
  424 +
  425 + pci_poke_faulted = 1;
  426 + regs->tnpc = regs->tpc + 4;
  427 + return;
  428 + }
  429 +#endif
  430 +
  431 + if (afsr & SFAFSR_UE)
  432 + spitfire_ue_log(afsr, afar, udbh, udbl, tt, tl1, regs);
  433 +
  434 + if (tt == TRAP_TYPE_CEE) {
  435 + /* Handle the case where we took a CEE trap, but ACK'd
  436 + * only the UE state in the UDB error registers.
  437 + */
  438 + if (afsr & SFAFSR_UE) {
  439 + if (udbh & UDBE_CE) {
  440 + __asm__ __volatile__(
  441 + "stxa %0, [%1] %2\n\t"
  442 + "membar #Sync"
  443 + : /* no outputs */
  444 + : "r" (udbh & UDBE_CE),
  445 + "r" (0x0), "i" (ASI_UDB_ERROR_W));
  446 + }
  447 + if (udbl & UDBE_CE) {
  448 + __asm__ __volatile__(
  449 + "stxa %0, [%1] %2\n\t"
  450 + "membar #Sync"
  451 + : /* no outputs */
  452 + : "r" (udbl & UDBE_CE),
  453 + "r" (0x18), "i" (ASI_UDB_ERROR_W));
  454 + }
  455 + }
  456 +
  457 + spitfire_cee_log(afsr, afar, udbh, udbl, tl1, regs);
432 458 }
433 459 }
434 460  
arch/sparc64/kernel/ttable.S
... ... @@ -18,9 +18,10 @@
18 18 tl0_resv000: BOOT_KERNEL BTRAP(0x1) BTRAP(0x2) BTRAP(0x3)
19 19 tl0_resv004: BTRAP(0x4) BTRAP(0x5) BTRAP(0x6) BTRAP(0x7)
20 20 tl0_iax: membar #Sync
21   - TRAP_NOSAVE_7INSNS(__do_instruction_access_exception)
  21 + TRAP_NOSAVE_7INSNS(__spitfire_insn_access_exception)
22 22 tl0_resv009: BTRAP(0x9)
23   -tl0_iae: TRAP(do_iae)
  23 +tl0_iae: membar #Sync
  24 + TRAP_NOSAVE_7INSNS(__spitfire_access_error)
24 25 tl0_resv00b: BTRAP(0xb) BTRAP(0xc) BTRAP(0xd) BTRAP(0xe) BTRAP(0xf)
25 26 tl0_ill: membar #Sync
26 27 TRAP_7INSNS(do_illegal_instruction)
27 28  
... ... @@ -36,9 +37,10 @@
36 37 tl0_div0: TRAP(do_div0)
37 38 tl0_resv029: BTRAP(0x29) BTRAP(0x2a) BTRAP(0x2b) BTRAP(0x2c) BTRAP(0x2d) BTRAP(0x2e)
38 39 tl0_resv02f: BTRAP(0x2f)
39   -tl0_dax: TRAP_NOSAVE(__do_data_access_exception)
  40 +tl0_dax: TRAP_NOSAVE(__spitfire_data_access_exception)
40 41 tl0_resv031: BTRAP(0x31)
41   -tl0_dae: TRAP(do_dae)
  42 +tl0_dae: membar #Sync
  43 + TRAP_NOSAVE_7INSNS(__spitfire_access_error)
42 44 tl0_resv033: BTRAP(0x33)
43 45 tl0_mna: TRAP_NOSAVE(do_mna)
44 46 tl0_lddfmna: TRAP_NOSAVE(do_lddfmna)
... ... @@ -73,7 +75,8 @@
73 75 tl0_ivec: TRAP_IVEC
74 76 tl0_paw: TRAP(do_paw)
75 77 tl0_vaw: TRAP(do_vaw)
76   -tl0_cee: TRAP_NOSAVE(cee_trap)
  78 +tl0_cee: membar #Sync
  79 + TRAP_NOSAVE_7INSNS(__spitfire_cee_trap)
77 80 tl0_iamiss:
78 81 #include "itlb_base.S"
79 82 tl0_damiss:
80 83  
... ... @@ -175,9 +178,10 @@
175 178 sparc64_ttable_tl1:
176 179 tl1_resv000: BOOT_KERNEL BTRAPTL1(0x1) BTRAPTL1(0x2) BTRAPTL1(0x3)
177 180 tl1_resv004: BTRAPTL1(0x4) BTRAPTL1(0x5) BTRAPTL1(0x6) BTRAPTL1(0x7)
178   -tl1_iax: TRAP_NOSAVE(__do_instruction_access_exception_tl1)
  181 +tl1_iax: TRAP_NOSAVE(__spitfire_insn_access_exception_tl1)
179 182 tl1_resv009: BTRAPTL1(0x9)
180   -tl1_iae: TRAPTL1(do_iae_tl1)
  183 +tl1_iae: membar #Sync
  184 + TRAP_NOSAVE_7INSNS(__spitfire_access_error)
181 185 tl1_resv00b: BTRAPTL1(0xb) BTRAPTL1(0xc) BTRAPTL1(0xd) BTRAPTL1(0xe) BTRAPTL1(0xf)
182 186 tl1_ill: TRAPTL1(do_ill_tl1)
183 187 tl1_privop: BTRAPTL1(0x11)
184 188  
... ... @@ -193,9 +197,10 @@
193 197 tl1_div0: TRAPTL1(do_div0_tl1)
194 198 tl1_resv029: BTRAPTL1(0x29) BTRAPTL1(0x2a) BTRAPTL1(0x2b) BTRAPTL1(0x2c)
195 199 tl1_resv02d: BTRAPTL1(0x2d) BTRAPTL1(0x2e) BTRAPTL1(0x2f)
196   -tl1_dax: TRAP_NOSAVE(__do_data_access_exception_tl1)
  200 +tl1_dax: TRAP_NOSAVE(__spitfire_data_access_exception_tl1)
197 201 tl1_resv031: BTRAPTL1(0x31)
198   -tl1_dae: TRAPTL1(do_dae_tl1)
  202 +tl1_dae: membar #Sync
  203 + TRAP_NOSAVE_7INSNS(__spitfire_access_error)
199 204 tl1_resv033: BTRAPTL1(0x33)
200 205 tl1_mna: TRAP_NOSAVE(do_mna)
201 206 tl1_lddfmna: TRAPTL1(do_lddfmna_tl1)
... ... @@ -219,8 +224,8 @@
219 224 tl1_vaw: TRAPTL1(do_vaw_tl1)
220 225  
221 226 /* The grotty trick to save %g1 into current->thread.cee_stuff
222   - * is because when we take this trap we could be interrupting trap
223   - * code already using the trap alternate global registers.
  227 + * is because when we take this trap we could be interrupting
  228 + * trap code already using the trap alternate global registers.
224 229 *
225 230 * We cross our fingers and pray that this store/load does
226 231 * not cause yet another CEE trap.
arch/sparc64/kernel/unaligned.c
... ... @@ -349,9 +349,9 @@
349 349  
350 350 extern void do_fpother(struct pt_regs *regs);
351 351 extern void do_privact(struct pt_regs *regs);
352   -extern void data_access_exception(struct pt_regs *regs,
353   - unsigned long sfsr,
354   - unsigned long sfar);
  352 +extern void spitfire_data_access_exception(struct pt_regs *regs,
  353 + unsigned long sfsr,
  354 + unsigned long sfar);
355 355  
356 356 int handle_ldf_stq(u32 insn, struct pt_regs *regs)
357 357 {
358 358  
... ... @@ -394,14 +394,14 @@
394 394 break;
395 395 }
396 396 default:
397   - data_access_exception(regs, 0, addr);
  397 + spitfire_data_access_exception(regs, 0, addr);
398 398 return 1;
399 399 }
400 400 if (put_user (first >> 32, (u32 __user *)addr) ||
401 401 __put_user ((u32)first, (u32 __user *)(addr + 4)) ||
402 402 __put_user (second >> 32, (u32 __user *)(addr + 8)) ||
403 403 __put_user ((u32)second, (u32 __user *)(addr + 12))) {
404   - data_access_exception(regs, 0, addr);
  404 + spitfire_data_access_exception(regs, 0, addr);
405 405 return 1;
406 406 }
407 407 } else {
... ... @@ -414,7 +414,7 @@
414 414 do_privact(regs);
415 415 return 1;
416 416 } else if (asi > ASI_SNFL) {
417   - data_access_exception(regs, 0, addr);
  417 + spitfire_data_access_exception(regs, 0, addr);
418 418 return 1;
419 419 }
420 420 switch (insn & 0x180000) {
... ... @@ -431,7 +431,7 @@
431 431 err |= __get_user (data[i], (u32 __user *)(addr + 4*i));
432 432 }
433 433 if (err && !(asi & 0x2 /* NF */)) {
434   - data_access_exception(regs, 0, addr);
  434 + spitfire_data_access_exception(regs, 0, addr);
435 435 return 1;
436 436 }
437 437 if (asi & 0x8) /* Little */ {
... ... @@ -534,7 +534,7 @@
534 534 *(u64 *)(f->regs + freg) = value;
535 535 current_thread_info()->fpsaved[0] |= flag;
536 536 } else {
537   -daex: data_access_exception(regs, sfsr, sfar);
  537 +daex: spitfire_data_access_exception(regs, sfsr, sfar);
538 538 return;
539 539 }
540 540 advance(regs);
... ... @@ -578,7 +578,7 @@
578 578 __put_user ((u32)value, (u32 __user *)(sfar + 4)))
579 579 goto daex;
580 580 } else {
581   -daex: data_access_exception(regs, sfsr, sfar);
  581 +daex: spitfire_data_access_exception(regs, sfsr, sfar);
582 582 return;
583 583 }
584 584 advance(regs);
arch/sparc64/kernel/winfixup.S
... ... @@ -318,7 +318,7 @@
318 318 nop
319 319 rdpr %pstate, %l1 ! Prepare to change globals.
320 320 mov %g4, %o1 ! Setup args for
321   - mov %g5, %o2 ! final call to data_access_exception.
  321 + mov %g5, %o2 ! final call to spitfire_data_access_exception.
322 322 andn %l1, PSTATE_MM, %l1 ! We want to be in RMO
323 323  
324 324 mov %g6, %o7 ! Stash away current.
... ... @@ -330,7 +330,7 @@
330 330 mov TSB_REG, %g1
331 331 ldxa [%g1] ASI_IMMU, %g5
332 332 #endif
333   - call data_access_exception
  333 + call spitfire_data_access_exception
334 334 add %sp, PTREGS_OFF, %o0
335 335  
336 336 b,pt %xcc, rtrap
... ... @@ -391,7 +391,7 @@
391 391 109: or %g7, %lo(109b), %g7
392 392 mov %l4, %o1
393 393 mov %l5, %o2
394   - call data_access_exception
  394 + call spitfire_data_access_exception
395 395 add %sp, PTREGS_OFF, %o0
396 396 ba,pt %xcc, rtrap
397 397 clr %l6
include/asm-sparc64/sfafsr.h
  1 +#ifndef _SPARC64_SFAFSR_H
  2 +#define _SPARC64_SFAFSR_H
  3 +
  4 +#include <asm/const.h>
  5 +
  6 +/* Spitfire Asynchronous Fault Status register, ASI=0x4C VA<63:0>=0x0 */
  7 +
  8 +#define SFAFSR_ME (_AC(1,UL) << SFAFSR_ME_SHIFT)
  9 +#define SFAFSR_ME_SHIFT 32
  10 +#define SFAFSR_PRIV (_AC(1,UL) << SFAFSR_PRIV_SHIFT)
  11 +#define SFAFSR_PRIV_SHIFT 31
  12 +#define SFAFSR_ISAP (_AC(1,UL) << SFAFSR_ISAP_SHIFT)
  13 +#define SFAFSR_ISAP_SHIFT 30
  14 +#define SFAFSR_ETP (_AC(1,UL) << SFAFSR_ETP_SHIFT)
  15 +#define SFAFSR_ETP_SHIFT 29
  16 +#define SFAFSR_IVUE (_AC(1,UL) << SFAFSR_IVUE_SHIFT)
  17 +#define SFAFSR_IVUE_SHIFT 28
  18 +#define SFAFSR_TO (_AC(1,UL) << SFAFSR_TO_SHIFT)
  19 +#define SFAFSR_TO_SHIFT 27
  20 +#define SFAFSR_BERR (_AC(1,UL) << SFAFSR_BERR_SHIFT)
  21 +#define SFAFSR_BERR_SHIFT 26
  22 +#define SFAFSR_LDP (_AC(1,UL) << SFAFSR_LDP_SHIFT)
  23 +#define SFAFSR_LDP_SHIFT 25
  24 +#define SFAFSR_CP (_AC(1,UL) << SFAFSR_CP_SHIFT)
  25 +#define SFAFSR_CP_SHIFT 24
  26 +#define SFAFSR_WP (_AC(1,UL) << SFAFSR_WP_SHIFT)
  27 +#define SFAFSR_WP_SHIFT 23
  28 +#define SFAFSR_EDP (_AC(1,UL) << SFAFSR_EDP_SHIFT)
  29 +#define SFAFSR_EDP_SHIFT 22
  30 +#define SFAFSR_UE (_AC(1,UL) << SFAFSR_UE_SHIFT)
  31 +#define SFAFSR_UE_SHIFT 21
  32 +#define SFAFSR_CE (_AC(1,UL) << SFAFSR_CE_SHIFT)
  33 +#define SFAFSR_CE_SHIFT 20
  34 +#define SFAFSR_ETS (_AC(0xf,UL) << SFAFSR_ETS_SHIFT)
  35 +#define SFAFSR_ETS_SHIFT 16
  36 +#define SFAFSR_PSYND (_AC(0xffff,UL) << SFAFSR_PSYND_SHIFT)
  37 +#define SFAFSR_PSYND_SHIFT 0
  38 +
  39 +/* UDB Error Register, ASI=0x7f VA<63:0>=0x0(High),0x18(Low) for read
  40 + * ASI=0x77 VA<63:0>=0x0(High),0x18(Low) for write
  41 + */
  42 +
  43 +#define UDBE_UE (_AC(1,UL) << 9)
  44 +#define UDBE_CE (_AC(1,UL) << 8)
  45 +#define UDBE_E_SYNDR (_AC(0xff,UL) << 0)
  46 +
  47 +/* The trap handlers for asynchronous errors encode the AFSR and
  48 + * other pieces of information into a 64-bit argument for C code
  49 + * encoded as follows:
  50 + *
  51 + * -----------------------------------------------
  52 + * | UDB_H | UDB_L | TL>1 | TT | AFSR |
  53 + * -----------------------------------------------
  54 + * 63 54 53 44 42 41 33 32 0
  55 + *
  56 + * The AFAR is passed in unchanged.
  57 + */
  58 +#define SFSTAT_UDBH_MASK (_AC(0x3ff,UL) << SFSTAT_UDBH_SHIFT)
  59 +#define SFSTAT_UDBH_SHIFT 54
  60 +#define SFSTAT_UDBL_MASK (_AC(0x3ff,UL) << SFSTAT_UDBH_SHIFT)
  61 +#define SFSTAT_UDBL_SHIFT 44
  62 +#define SFSTAT_TL_GT_ONE (_AC(1,UL) << SFSTAT_TL_GT_ONE_SHIFT)
  63 +#define SFSTAT_TL_GT_ONE_SHIFT 42
  64 +#define SFSTAT_TRAP_TYPE (_AC(0x1FF,UL) << SFSTAT_TRAP_TYPE_SHIFT)
  65 +#define SFSTAT_TRAP_TYPE_SHIFT 33
  66 +#define SFSTAT_AFSR_MASK (_AC(0x1ffffffff,UL) << SFSTAT_AFSR_SHIFT)
  67 +#define SFSTAT_AFSR_SHIFT 0
  68 +
  69 +/* ESTATE Error Enable Register, ASI=0x4b VA<63:0>=0x0 */
  70 +#define ESTATE_ERR_CE 0x1 /* Correctable errors */
  71 +#define ESTATE_ERR_NCE 0x2 /* TO, BERR, LDP, ETP, EDP, WP, UE, IVUE */
  72 +#define ESTATE_ERR_ISAP 0x4 /* System address parity error */
  73 +#define ESTATE_ERR_ALL (ESTATE_ERR_CE | \
  74 + ESTATE_ERR_NCE | \
  75 + ESTATE_ERR_ISAP)
  76 +
  77 +/* The various trap types that report using the above state. */
  78 +#define TRAP_TYPE_IAE 0x09 /* Instruction Access Error */
  79 +#define TRAP_TYPE_DAE 0x32 /* Data Access Error */
  80 +#define TRAP_TYPE_CEE 0x63 /* Correctable ECC Error */
  81 +
  82 +#endif /* _SPARC64_SFAFSR_H */