Commit 5c7707554858eca8903706b6df7cba5c0f802244

Authored by Chris Metcalf
1 parent d91c641233

drivers/edac: provide support for tile architecture

Add tile support for the EDAC driver, which provides unified system
error (memory, PCI, etc.) reporting. For now, the TILEPro port
reports memory correctable error (CE) only.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>

Showing 7 changed files with 384 additions and 2 deletions Side-by-side Diff

... ... @@ -6082,6 +6082,7 @@
6082 6082 F: arch/tile/
6083 6083 F: drivers/char/hvc_tile.c
6084 6084 F: drivers/net/tile/
  6085 +F: drivers/edac/tile_edac.c
6085 6086  
6086 6087 TLAN NETWORK DRIVER
6087 6088 M: Samuel Chessman <chessman@tux.org>
arch/tile/include/asm/edac.h
  1 +/*
  2 + * Copyright 2011 Tilera Corporation. All Rights Reserved.
  3 + *
  4 + * This program is free software; you can redistribute it and/or
  5 + * modify it under the terms of the GNU General Public License
  6 + * as published by the Free Software Foundation, version 2.
  7 + *
  8 + * This program is distributed in the hope that it will be useful, but
  9 + * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11 + * NON INFRINGEMENT. See the GNU General Public License for
  12 + * more details.
  13 + */
  14 +
  15 +#ifndef _ASM_TILE_EDAC_H
  16 +#define _ASM_TILE_EDAC_H
  17 +
  18 +/* ECC atomic, DMA, SMP and interrupt safe scrub function */
  19 +
  20 +static inline void atomic_scrub(void *va, u32 size)
  21 +{
  22 + /*
  23 + * These is nothing to be done here because CE is
  24 + * corrected by the mshim.
  25 + */
  26 + return;
  27 +}
  28 +
  29 +#endif /* _ASM_TILE_EDAC_H */
arch/tile/include/hv/drv_mshim_intf.h
  1 +/*
  2 + * Copyright 2011 Tilera Corporation. All Rights Reserved.
  3 + *
  4 + * This program is free software; you can redistribute it and/or
  5 + * modify it under the terms of the GNU General Public License
  6 + * as published by the Free Software Foundation, version 2.
  7 + *
  8 + * This program is distributed in the hope that it will be useful, but
  9 + * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11 + * NON INFRINGEMENT. See the GNU General Public License for
  12 + * more details.
  13 + */
  14 +
  15 +/**
  16 + * @file drv_mshim_intf.h
  17 + * Interface definitions for the Linux EDAC memory controller driver.
  18 + */
  19 +
  20 +#ifndef _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H
  21 +#define _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H
  22 +
  23 +/** Number of memory controllers in the public API. */
  24 +#define TILE_MAX_MSHIMS 4
  25 +
  26 +/** Memory info under each memory controller. */
  27 +struct mshim_mem_info
  28 +{
  29 + uint64_t mem_size; /**< Total memory size in bytes. */
  30 + uint8_t mem_type; /**< Memory type, DDR2 or DDR3. */
  31 + uint8_t mem_ecc; /**< Memory supports ECC. */
  32 +};
  33 +
  34 +/**
  35 + * DIMM error structure.
  36 + * For now, only correctable errors are counted and the mshim doesn't record
  37 + * the error PA. HV takes panic upon uncorrectable errors.
  38 + */
  39 +struct mshim_mem_error
  40 +{
  41 + uint32_t sbe_count; /**< Number of single-bit errors. */
  42 +};
  43 +
  44 +/** Read this offset to get the memory info per mshim. */
  45 +#define MSHIM_MEM_INFO_OFF 0x100
  46 +
  47 +/** Read this offset to check DIMM error. */
  48 +#define MSHIM_MEM_ERROR_OFF 0x200
  49 +
  50 +#endif /* _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H */
arch/tile/include/hv/hypervisor.h
... ... @@ -338,9 +338,10 @@
338 338 #define HV_ENOTREADY -812 /**< Device not ready */
339 339 #define HV_EIO -813 /**< I/O error */
340 340 #define HV_ENOMEM -814 /**< Out of memory */
  341 +#define HV_EAGAIN -815 /**< Try again */
341 342  
342 343 #define HV_ERR_MAX -801 /**< Largest HV error code */
343   -#define HV_ERR_MIN -814 /**< Smallest HV error code */
  344 +#define HV_ERR_MIN -815 /**< Smallest HV error code */
344 345  
345 346 #ifndef __ASSEMBLER__
346 347  
347 348  
... ... @@ -867,7 +868,44 @@
867 868 */
868 869 HV_PhysAddrRange hv_inquire_physical(int idx);
869 870  
  871 +/** Possible DIMM types. */
  872 +typedef enum
  873 +{
  874 + NO_DIMM = 0, /**< No DIMM */
  875 + DDR2 = 1, /**< DDR2 */
  876 + DDR3 = 2 /**< DDR3 */
  877 +} HV_DIMM_Type;
870 878  
  879 +#ifdef __tilegx__
  880 +
  881 +/** Log2 of minimum DIMM bytes supported by the memory controller. */
  882 +#define HV_MSH_MIN_DIMM_SIZE_SHIFT 29
  883 +
  884 +/** Max number of DIMMs contained by one memory controller. */
  885 +#define HV_MSH_MAX_DIMMS 8
  886 +
  887 +#else
  888 +
  889 +/** Log2 of minimum DIMM bytes supported by the memory controller. */
  890 +#define HV_MSH_MIN_DIMM_SIZE_SHIFT 26
  891 +
  892 +/** Max number of DIMMs contained by one memory controller. */
  893 +#define HV_MSH_MAX_DIMMS 2
  894 +
  895 +#endif
  896 +
  897 +/** Number of bits to right-shift to get the DIMM type. */
  898 +#define HV_DIMM_TYPE_SHIFT 0
  899 +
  900 +/** Bits to mask to get the DIMM type. */
  901 +#define HV_DIMM_TYPE_MASK 0xf
  902 +
  903 +/** Number of bits to right-shift to get the DIMM size. */
  904 +#define HV_DIMM_SIZE_SHIFT 4
  905 +
  906 +/** Bits to mask to get the DIMM size. */
  907 +#define HV_DIMM_SIZE_MASK 0xf
  908 +
871 909 /** Memory controller information. */
872 910 typedef struct
873 911 {
... ... @@ -1043,6 +1081,7 @@
1043 1081 * downcall:
1044 1082 *
1045 1083 * INT_MESSAGE_RCV_DWNCL (hypervisor message available)
  1084 + * INT_DEV_INTR_DWNCL (device interrupt)
1046 1085 * INT_DMATLB_MISS_DWNCL (DMA TLB miss)
1047 1086 * INT_SNITLB_MISS_DWNCL (SNI TLB miss)
1048 1087 * INT_DMATLB_ACCESS_DWNCL (DMA TLB access violation)
drivers/edac/Kconfig
... ... @@ -7,7 +7,7 @@
7 7 menuconfig EDAC
8 8 bool "EDAC (Error Detection And Correction) reporting"
9 9 depends on HAS_IOMEM
10   - depends on X86 || PPC
  10 + depends on X86 || PPC || TILE
11 11 help
12 12 EDAC is designed to report errors in the core system.
13 13 These are low-level errors that are reported in the CPU or
... ... @@ -281,6 +281,14 @@
281 281 IBM CPC925 Bridge and Memory Controller, which is
282 282 a companion chip to the PowerPC 970 family of
283 283 processors.
  284 +
  285 +config EDAC_TILE
  286 + tristate "Tilera Memory Controller"
  287 + depends on EDAC_MM_EDAC && TILE
  288 + default y
  289 + help
  290 + Support for error detection and correction on the
  291 + Tilera memory controller.
284 292  
285 293 endif # EDAC
drivers/edac/Makefile
... ... @@ -53,4 +53,6 @@
53 53 obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o
54 54 obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o
55 55 obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o
  56 +
  57 +obj-$(CONFIG_EDAC_TILE) += tile_edac.o
drivers/edac/tile_edac.c
  1 +/*
  2 + * Copyright 2011 Tilera Corporation. All Rights Reserved.
  3 + *
  4 + * This program is free software; you can redistribute it and/or
  5 + * modify it under the terms of the GNU General Public License
  6 + * as published by the Free Software Foundation, version 2.
  7 + *
  8 + * This program is distributed in the hope that it will be useful, but
  9 + * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11 + * NON INFRINGEMENT. See the GNU General Public License for
  12 + * more details.
  13 + * Tilera-specific EDAC driver.
  14 + *
  15 + * This source code is derived from the following driver:
  16 + *
  17 + * Cell MIC driver for ECC counting
  18 + *
  19 + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
  20 + * <benh@kernel.crashing.org>
  21 + *
  22 + */
  23 +
  24 +#include <linux/module.h>
  25 +#include <linux/init.h>
  26 +#include <linux/platform_device.h>
  27 +#include <linux/io.h>
  28 +#include <linux/uaccess.h>
  29 +#include <linux/edac.h>
  30 +#include <hv/hypervisor.h>
  31 +#include <hv/drv_mshim_intf.h>
  32 +
  33 +#include "edac_core.h"
  34 +
  35 +#define DRV_NAME "tile-edac"
  36 +
  37 +/* Number of cs_rows needed per memory controller on TILEPro. */
  38 +#define TILE_EDAC_NR_CSROWS 1
  39 +
  40 +/* Number of channels per memory controller on TILEPro. */
  41 +#define TILE_EDAC_NR_CHANS 1
  42 +
  43 +/* Granularity of reported error in bytes on TILEPro. */
  44 +#define TILE_EDAC_ERROR_GRAIN 8
  45 +
  46 +/* TILE processor has multiple independent memory controllers. */
  47 +struct platform_device *mshim_pdev[TILE_MAX_MSHIMS];
  48 +
  49 +struct tile_edac_priv {
  50 + int hv_devhdl; /* Hypervisor device handle. */
  51 + int node; /* Memory controller instance #. */
  52 + unsigned int ce_count; /*
  53 + * Correctable-error counter
  54 + * kept by the driver.
  55 + */
  56 +};
  57 +
  58 +static void tile_edac_check(struct mem_ctl_info *mci)
  59 +{
  60 + struct tile_edac_priv *priv = mci->pvt_info;
  61 + struct mshim_mem_error mem_error;
  62 +
  63 + if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&mem_error,
  64 + sizeof(struct mshim_mem_error), MSHIM_MEM_ERROR_OFF) !=
  65 + sizeof(struct mshim_mem_error)) {
  66 + pr_err(DRV_NAME ": MSHIM_MEM_ERROR_OFF pread failure.\n");
  67 + return;
  68 + }
  69 +
  70 + /* Check if the current error count is different from the saved one. */
  71 + if (mem_error.sbe_count != priv->ce_count) {
  72 + dev_dbg(mci->dev, "ECC CE err on node %d\n", priv->node);
  73 + priv->ce_count = mem_error.sbe_count;
  74 + edac_mc_handle_ce(mci, 0, 0, 0, 0, 0, mci->ctl_name);
  75 + }
  76 +}
  77 +
  78 +/*
  79 + * Initialize the 'csrows' table within the mci control structure with the
  80 + * addressing of memory.
  81 + */
  82 +static int __devinit tile_edac_init_csrows(struct mem_ctl_info *mci)
  83 +{
  84 + struct csrow_info *csrow = &mci->csrows[0];
  85 + struct tile_edac_priv *priv = mci->pvt_info;
  86 + struct mshim_mem_info mem_info;
  87 +
  88 + if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&mem_info,
  89 + sizeof(struct mshim_mem_info), MSHIM_MEM_INFO_OFF) !=
  90 + sizeof(struct mshim_mem_info)) {
  91 + pr_err(DRV_NAME ": MSHIM_MEM_INFO_OFF pread failure.\n");
  92 + return -1;
  93 + }
  94 +
  95 + if (mem_info.mem_ecc)
  96 + csrow->edac_mode = EDAC_SECDED;
  97 + else
  98 + csrow->edac_mode = EDAC_NONE;
  99 + switch (mem_info.mem_type) {
  100 + case DDR2:
  101 + csrow->mtype = MEM_DDR2;
  102 + break;
  103 +
  104 + case DDR3:
  105 + csrow->mtype = MEM_DDR3;
  106 + break;
  107 +
  108 + default:
  109 + return -1;
  110 + }
  111 +
  112 + csrow->first_page = 0;
  113 + csrow->nr_pages = mem_info.mem_size >> PAGE_SHIFT;
  114 + csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
  115 + csrow->grain = TILE_EDAC_ERROR_GRAIN;
  116 + csrow->dtype = DEV_UNKNOWN;
  117 +
  118 + return 0;
  119 +}
  120 +
  121 +static int __devinit tile_edac_mc_probe(struct platform_device *pdev)
  122 +{
  123 + char hv_file[32];
  124 + int hv_devhdl;
  125 + struct mem_ctl_info *mci;
  126 + struct tile_edac_priv *priv;
  127 + int rc;
  128 +
  129 + sprintf(hv_file, "mshim/%d", pdev->id);
  130 + hv_devhdl = hv_dev_open((HV_VirtAddr)hv_file, 0);
  131 + if (hv_devhdl < 0)
  132 + return -EINVAL;
  133 +
  134 + /* A TILE MC has a single channel and one chip-select row. */
  135 + mci = edac_mc_alloc(sizeof(struct tile_edac_priv),
  136 + TILE_EDAC_NR_CSROWS, TILE_EDAC_NR_CHANS, pdev->id);
  137 + if (mci == NULL)
  138 + return -ENOMEM;
  139 + priv = mci->pvt_info;
  140 + priv->node = pdev->id;
  141 + priv->hv_devhdl = hv_devhdl;
  142 +
  143 + mci->dev = &pdev->dev;
  144 + mci->mtype_cap = MEM_FLAG_DDR2;
  145 + mci->edac_ctl_cap = EDAC_FLAG_SECDED;
  146 +
  147 + mci->mod_name = DRV_NAME;
  148 + mci->ctl_name = "TILEPro_Memory_Controller";
  149 + mci->dev_name = dev_name(&pdev->dev);
  150 + mci->edac_check = tile_edac_check;
  151 +
  152 + /*
  153 + * Initialize the MC control structure 'csrows' table
  154 + * with the mapping and control information.
  155 + */
  156 + if (tile_edac_init_csrows(mci)) {
  157 + /* No csrows found. */
  158 + mci->edac_cap = EDAC_FLAG_NONE;
  159 + } else {
  160 + mci->edac_cap = EDAC_FLAG_SECDED;
  161 + }
  162 +
  163 + platform_set_drvdata(pdev, mci);
  164 +
  165 + /* Register with EDAC core */
  166 + rc = edac_mc_add_mc(mci);
  167 + if (rc) {
  168 + dev_err(&pdev->dev, "failed to register with EDAC core\n");
  169 + edac_mc_free(mci);
  170 + return rc;
  171 + }
  172 +
  173 + return 0;
  174 +}
  175 +
  176 +static int __devexit tile_edac_mc_remove(struct platform_device *pdev)
  177 +{
  178 + struct mem_ctl_info *mci = platform_get_drvdata(pdev);
  179 +
  180 + edac_mc_del_mc(&pdev->dev);
  181 + if (mci)
  182 + edac_mc_free(mci);
  183 + return 0;
  184 +}
  185 +
  186 +static struct platform_driver tile_edac_mc_driver = {
  187 + .driver = {
  188 + .name = DRV_NAME,
  189 + .owner = THIS_MODULE,
  190 + },
  191 + .probe = tile_edac_mc_probe,
  192 + .remove = __devexit_p(tile_edac_mc_remove),
  193 +};
  194 +
  195 +/*
  196 + * Driver init routine.
  197 + */
  198 +static int __init tile_edac_init(void)
  199 +{
  200 + char hv_file[32];
  201 + struct platform_device *pdev;
  202 + int i, err, num = 0;
  203 +
  204 + /* Only support POLL mode. */
  205 + edac_op_state = EDAC_OPSTATE_POLL;
  206 +
  207 + err = platform_driver_register(&tile_edac_mc_driver);
  208 + if (err)
  209 + return err;
  210 +
  211 + for (i = 0; i < TILE_MAX_MSHIMS; i++) {
  212 + /*
  213 + * Not all memory controllers are configured such as in the
  214 + * case of a simulator. So we register only those mshims
  215 + * that are configured by the hypervisor.
  216 + */
  217 + sprintf(hv_file, "mshim/%d", i);
  218 + if (hv_dev_open((HV_VirtAddr)hv_file, 0) < 0)
  219 + continue;
  220 +
  221 + pdev = platform_device_register_simple(DRV_NAME, i, NULL, 0);
  222 + if (IS_ERR(pdev))
  223 + continue;
  224 + mshim_pdev[i] = pdev;
  225 + num++;
  226 + }
  227 +
  228 + if (num == 0) {
  229 + platform_driver_unregister(&tile_edac_mc_driver);
  230 + return -ENODEV;
  231 + }
  232 + return 0;
  233 +}
  234 +
  235 +/*
  236 + * Driver cleanup routine.
  237 + */
  238 +static void __exit tile_edac_exit(void)
  239 +{
  240 + int i;
  241 +
  242 + for (i = 0; i < TILE_MAX_MSHIMS; i++) {
  243 + struct platform_device *pdev = mshim_pdev[i];
  244 + if (!pdev)
  245 + continue;
  246 +
  247 + platform_set_drvdata(pdev, NULL);
  248 + platform_device_unregister(pdev);
  249 + }
  250 + platform_driver_unregister(&tile_edac_mc_driver);
  251 +}
  252 +
  253 +module_init(tile_edac_init);
  254 +module_exit(tile_edac_exit);