Commit f1a18a10566081abfce1649c2f3884b28fff7372

Authored by Srinivas Pandruvada
Committed by Zhang Rui
1 parent 25cdce170d

Thermal: CPU Package temperature thermal

This driver register CPU digital temperature sensor as a thermal
zone at package level.
Each package will show up as one zone with at max two trip points.
These trip points can be both read and updated. Once a non zero
value is set in the trip point, if the package package temperature
goes above or below this setting, a thermal notification is
generated.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>

Showing 3 changed files with 655 additions and 1 deletions Side-by-side Diff

drivers/thermal/Kconfig
... ... @@ -169,5 +169,17 @@
169 169 enforce idle time which results in more package C-state residency. The
170 170 user interface is exposed via generic thermal framework.
171 171  
  172 +config X86_PKG_TEMP_THERMAL
  173 + tristate "X86 package temperature thermal driver"
  174 + depends on THERMAL
  175 + depends on X86
  176 + select THERMAL_GOV_USER_SPACE
  177 + default m
  178 + help
  179 + Enable this to register CPU digital sensor for package temperature as
  180 + thermal zone. Each package will have its own thermal zone. There are
  181 + two trip points which can be set by user to get notifications via thermal
  182 + notification methods.
  183 +
172 184 endif
drivers/thermal/Makefile
... ... @@ -23,4 +23,5 @@
23 23 obj-$(CONFIG_ARMADA_THERMAL) += armada_thermal.o
24 24 obj-$(CONFIG_DB8500_CPUFREQ_COOLING) += db8500_cpufreq_cooling.o
25 25 obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o
  26 +obj-$(CONFIG_X86_PKG_TEMP_THERMAL) += x86_pkg_temp_thermal.o
drivers/thermal/x86_pkg_temp_thermal.c
  1 +/*
  2 + * x86_pkg_temp_thermal driver
  3 + * Copyright (c) 2013, Intel Corporation.
  4 + *
  5 + * This program is free software; you can redistribute it and/or modify it
  6 + * under the terms and conditions of the GNU General Public License,
  7 + * version 2, as published by the Free Software Foundation.
  8 + *
  9 + * This program is distributed in the hope it will be useful, but WITHOUT
  10 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12 + * more details.
  13 + *
  14 + * You should have received a copy of the GNU General Public License along with
  15 + * this program; if not, write to the Free Software Foundation, Inc.
  16 + *
  17 + */
  18 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  19 +
  20 +#include <linux/module.h>
  21 +#include <linux/init.h>
  22 +#include <linux/err.h>
  23 +#include <linux/param.h>
  24 +#include <linux/device.h>
  25 +#include <linux/platform_device.h>
  26 +#include <linux/cpu.h>
  27 +#include <linux/smp.h>
  28 +#include <linux/slab.h>
  29 +#include <linux/pm.h>
  30 +#include <linux/thermal.h>
  31 +#include <linux/debugfs.h>
  32 +#include <asm/cpu_device_id.h>
  33 +#include <asm/mce.h>
  34 +
  35 +/*
  36 +* Rate control delay: Idea is to introduce denounce effect
  37 +* This should be long enough to avoid reduce events, when
  38 +* threshold is set to a temperature, which is constantly
  39 +* violated, but at the short enough to take any action.
  40 +* The action can be remove threshold or change it to next
  41 +* interesting setting. Based on experiments, in around
  42 +* every 5 seconds under load will give us a significant
  43 +* temperature change.
  44 +*/
  45 +#define PKG_TEMP_THERMAL_NOTIFY_DELAY 5000
  46 +static int notify_delay_ms = PKG_TEMP_THERMAL_NOTIFY_DELAY;
  47 +module_param(notify_delay_ms, int, 0644);
  48 +MODULE_PARM_DESC(notify_delay_ms,
  49 + "User space notification delay in milli seconds.");
  50 +
  51 +/* Number of trip points in thermal zone. Currently it can't
  52 +* be more than 2. MSR can allow setting and getting notifications
  53 +* for only 2 thresholds. This define enforces this, if there
  54 +* is some wrong values returned by cpuid for number of thresholds.
  55 +*/
  56 +#define MAX_NUMBER_OF_TRIPS 2
  57 +
  58 +struct phy_dev_entry {
  59 + struct list_head list;
  60 + u16 phys_proc_id;
  61 + u16 first_cpu;
  62 + u32 tj_max;
  63 + int ref_cnt;
  64 + u32 start_pkg_therm_low;
  65 + u32 start_pkg_therm_high;
  66 + struct thermal_zone_device *tzone;
  67 +};
  68 +
  69 +/* List maintaining number of package instances */
  70 +static LIST_HEAD(phy_dev_list);
  71 +static DEFINE_MUTEX(phy_dev_list_mutex);
  72 +
  73 +/* Interrupt to work function schedule queue */
  74 +static DEFINE_PER_CPU(struct delayed_work, pkg_temp_thermal_threshold_work);
  75 +
  76 +/* To track if the work is already scheduled on a package */
  77 +static u8 *pkg_work_scheduled;
  78 +
  79 +/* Spin lock to prevent races with pkg_work_scheduled */
  80 +static spinlock_t pkg_work_lock;
  81 +static u16 max_phy_id;
  82 +
  83 +/* Debug counters to show using debugfs */
  84 +static struct dentry *debugfs;
  85 +static unsigned int pkg_interrupt_cnt;
  86 +static unsigned int pkg_work_cnt;
  87 +
  88 +static int pkg_temp_debugfs_init(void)
  89 +{
  90 + struct dentry *d;
  91 +
  92 + debugfs = debugfs_create_dir("pkg_temp_thermal", NULL);
  93 + if (!debugfs)
  94 + return -ENOENT;
  95 +
  96 + d = debugfs_create_u32("pkg_thres_interrupt", S_IRUGO, debugfs,
  97 + (u32 *)&pkg_interrupt_cnt);
  98 + if (!d)
  99 + goto err_out;
  100 +
  101 + d = debugfs_create_u32("pkg_thres_work", S_IRUGO, debugfs,
  102 + (u32 *)&pkg_work_cnt);
  103 + if (!d)
  104 + goto err_out;
  105 +
  106 + return 0;
  107 +
  108 +err_out:
  109 + debugfs_remove_recursive(debugfs);
  110 + return -ENOENT;
  111 +}
  112 +
  113 +static struct phy_dev_entry
  114 + *pkg_temp_thermal_get_phy_entry(unsigned int cpu)
  115 +{
  116 + u16 phys_proc_id = topology_physical_package_id(cpu);
  117 + struct phy_dev_entry *phy_ptr;
  118 +
  119 + mutex_lock(&phy_dev_list_mutex);
  120 +
  121 + list_for_each_entry(phy_ptr, &phy_dev_list, list)
  122 + if (phy_ptr->phys_proc_id == phys_proc_id) {
  123 + mutex_unlock(&phy_dev_list_mutex);
  124 + return phy_ptr;
  125 + }
  126 +
  127 + mutex_unlock(&phy_dev_list_mutex);
  128 +
  129 + return NULL;
  130 +}
  131 +
  132 +/*
  133 +* tj-max is is interesting because threshold is set relative to this
  134 +* temperature.
  135 +*/
  136 +static int get_tj_max(int cpu, u32 *tj_max)
  137 +{
  138 + u32 eax, edx;
  139 + u32 val;
  140 + int err;
  141 +
  142 + err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx);
  143 + if (err)
  144 + goto err_ret;
  145 + else {
  146 + val = (eax >> 16) & 0xff;
  147 + if (val)
  148 + *tj_max = val * 1000;
  149 + else {
  150 + err = -EINVAL;
  151 + goto err_ret;
  152 + }
  153 + }
  154 +
  155 + return 0;
  156 +err_ret:
  157 + *tj_max = 0;
  158 + return err;
  159 +}
  160 +
  161 +static int sys_get_curr_temp(struct thermal_zone_device *tzd, unsigned long *temp)
  162 +{
  163 + u32 eax, edx;
  164 + struct phy_dev_entry *phy_dev_entry;
  165 +
  166 + phy_dev_entry = tzd->devdata;
  167 + rdmsr_on_cpu(phy_dev_entry->first_cpu, MSR_IA32_PACKAGE_THERM_STATUS,
  168 + &eax, &edx);
  169 + if (eax & 0x80000000) {
  170 + *temp = phy_dev_entry->tj_max -
  171 + ((eax >> 16) & 0x7f) * 1000;
  172 + pr_debug("sys_get_curr_temp %ld\n", *temp);
  173 + return 0;
  174 + }
  175 +
  176 + return -EINVAL;
  177 +}
  178 +
  179 +static int sys_get_trip_temp(struct thermal_zone_device *tzd,
  180 + int trip, unsigned long *temp)
  181 +{
  182 + u32 eax, edx;
  183 + struct phy_dev_entry *phy_dev_entry;
  184 + u32 mask, shift;
  185 + unsigned long thres_reg_value;
  186 + int ret;
  187 +
  188 + if (trip >= MAX_NUMBER_OF_TRIPS)
  189 + return -EINVAL;
  190 +
  191 + phy_dev_entry = tzd->devdata;
  192 +
  193 + if (trip) {
  194 + mask = THERM_MASK_THRESHOLD1;
  195 + shift = THERM_SHIFT_THRESHOLD1;
  196 + } else {
  197 + mask = THERM_MASK_THRESHOLD0;
  198 + shift = THERM_SHIFT_THRESHOLD0;
  199 + }
  200 +
  201 + ret = rdmsr_on_cpu(phy_dev_entry->first_cpu,
  202 + MSR_IA32_PACKAGE_THERM_INTERRUPT, &eax, &edx);
  203 + if (ret < 0)
  204 + return -EINVAL;
  205 +
  206 + thres_reg_value = (eax & mask) >> shift;
  207 + if (thres_reg_value)
  208 + *temp = phy_dev_entry->tj_max - thres_reg_value * 1000;
  209 + else
  210 + *temp = 0;
  211 + pr_debug("sys_get_trip_temp %ld\n", *temp);
  212 +
  213 + return 0;
  214 +}
  215 +
  216 +int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip,
  217 + unsigned long temp)
  218 +{
  219 + u32 l, h;
  220 + struct phy_dev_entry *phy_dev_entry;
  221 + u32 mask, shift, intr;
  222 + int ret;
  223 +
  224 + phy_dev_entry = tzd->devdata;
  225 +
  226 + if (trip >= MAX_NUMBER_OF_TRIPS || temp >= phy_dev_entry->tj_max)
  227 + return -EINVAL;
  228 +
  229 + ret = rdmsr_on_cpu(phy_dev_entry->first_cpu,
  230 + MSR_IA32_PACKAGE_THERM_INTERRUPT,
  231 + &l, &h);
  232 + if (ret < 0)
  233 + return -EINVAL;
  234 +
  235 + if (trip) {
  236 + mask = THERM_MASK_THRESHOLD1;
  237 + shift = THERM_SHIFT_THRESHOLD1;
  238 + intr = THERM_INT_THRESHOLD1_ENABLE;
  239 + } else {
  240 + mask = THERM_MASK_THRESHOLD0;
  241 + shift = THERM_SHIFT_THRESHOLD0;
  242 + intr = THERM_INT_THRESHOLD0_ENABLE;
  243 + }
  244 + l &= ~mask;
  245 + /*
  246 + * When users space sets a trip temperature == 0, which is indication
  247 + * that, it is no longer interested in receiving notifications.
  248 + */
  249 + if (!temp)
  250 + l &= ~intr;
  251 + else {
  252 + l |= (phy_dev_entry->tj_max - temp)/1000 << shift;
  253 + l |= intr;
  254 + }
  255 +
  256 + return wrmsr_on_cpu(phy_dev_entry->first_cpu,
  257 + MSR_IA32_PACKAGE_THERM_INTERRUPT,
  258 + l, h);
  259 +}
  260 +
  261 +static int sys_get_trip_type(struct thermal_zone_device *thermal,
  262 + int trip, enum thermal_trip_type *type)
  263 +{
  264 +
  265 + *type = THERMAL_TRIP_PASSIVE;
  266 +
  267 + return 0;
  268 +}
  269 +
  270 +/* Thermal zone callback registry */
  271 +static struct thermal_zone_device_ops tzone_ops = {
  272 + .get_temp = sys_get_curr_temp,
  273 + .get_trip_temp = sys_get_trip_temp,
  274 + .get_trip_type = sys_get_trip_type,
  275 + .set_trip_temp = sys_set_trip_temp,
  276 +};
  277 +
  278 +static bool pkg_temp_thermal_platform_thermal_rate_control(void)
  279 +{
  280 + return true;
  281 +}
  282 +
  283 +/* Enable threshold interrupt on local package/cpu */
  284 +static inline void enable_pkg_thres_interrupt(void)
  285 +{
  286 + u32 l, h;
  287 + u8 thres_0, thres_1;
  288 +
  289 + rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
  290 + /* only enable/disable if it had valid threshold value */
  291 + thres_0 = (l & THERM_MASK_THRESHOLD0) >> THERM_SHIFT_THRESHOLD0;
  292 + thres_1 = (l & THERM_MASK_THRESHOLD1) >> THERM_SHIFT_THRESHOLD1;
  293 + if (thres_0)
  294 + l |= THERM_INT_THRESHOLD0_ENABLE;
  295 + if (thres_1)
  296 + l |= THERM_INT_THRESHOLD1_ENABLE;
  297 + wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
  298 +}
  299 +
  300 +/* Disable threshold interrupt on local package/cpu */
  301 +static inline void disable_pkg_thres_interrupt(void)
  302 +{
  303 + u32 l, h;
  304 + rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
  305 + wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
  306 + l & (~THERM_INT_THRESHOLD0_ENABLE) &
  307 + (~THERM_INT_THRESHOLD1_ENABLE), h);
  308 +}
  309 +
  310 +static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
  311 +{
  312 + __u64 msr_val;
  313 + int cpu = smp_processor_id();
  314 + int phy_id = topology_physical_package_id(cpu);
  315 + struct phy_dev_entry *phdev = pkg_temp_thermal_get_phy_entry(cpu);
  316 + bool notify = false;
  317 +
  318 + if (!phdev)
  319 + return;
  320 +
  321 + spin_lock(&pkg_work_lock);
  322 + ++pkg_work_cnt;
  323 + if (unlikely(phy_id > max_phy_id)) {
  324 + spin_unlock(&pkg_work_lock);
  325 + return;
  326 + }
  327 + pkg_work_scheduled[phy_id] = 0;
  328 + spin_unlock(&pkg_work_lock);
  329 +
  330 + enable_pkg_thres_interrupt();
  331 + rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
  332 + if (msr_val & THERM_LOG_THRESHOLD0) {
  333 + wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS,
  334 + msr_val & ~THERM_LOG_THRESHOLD0);
  335 + notify = true;
  336 + }
  337 + if (msr_val & THERM_LOG_THRESHOLD1) {
  338 + wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS,
  339 + msr_val & ~THERM_LOG_THRESHOLD1);
  340 + notify = true;
  341 + }
  342 + if (notify) {
  343 + pr_debug("thermal_zone_device_update\n");
  344 + thermal_zone_device_update(phdev->tzone);
  345 + }
  346 +}
  347 +
  348 +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  349 +{
  350 + unsigned long flags;
  351 + int cpu = smp_processor_id();
  352 + int phy_id = topology_physical_package_id(cpu);
  353 +
  354 + /*
  355 + * When a package is in interrupted state, all CPU's in that package
  356 + * are in the same interrupt state. So scheduling on any one CPU in
  357 + * the package is enough and simply return for others.
  358 + */
  359 + spin_lock_irqsave(&pkg_work_lock, flags);
  360 + ++pkg_interrupt_cnt;
  361 + if (unlikely(phy_id > max_phy_id) || unlikely(!pkg_work_scheduled) ||
  362 + pkg_work_scheduled[phy_id]) {
  363 + disable_pkg_thres_interrupt();
  364 + spin_unlock_irqrestore(&pkg_work_lock, flags);
  365 + return -EINVAL;
  366 + }
  367 + pkg_work_scheduled[phy_id] = 1;
  368 + spin_unlock_irqrestore(&pkg_work_lock, flags);
  369 +
  370 + disable_pkg_thres_interrupt();
  371 + schedule_delayed_work_on(cpu,
  372 + &per_cpu(pkg_temp_thermal_threshold_work, cpu),
  373 + msecs_to_jiffies(notify_delay_ms));
  374 + return 0;
  375 +}
  376 +
  377 +static int find_siblings_cpu(int cpu)
  378 +{
  379 + int i;
  380 + int id = topology_physical_package_id(cpu);
  381 +
  382 + for_each_online_cpu(i)
  383 + if (i != cpu && topology_physical_package_id(i) == id)
  384 + return i;
  385 +
  386 + return 0;
  387 +}
  388 +
  389 +static int pkg_temp_thermal_device_add(unsigned int cpu)
  390 +{
  391 + int err;
  392 + u32 tj_max;
  393 + struct phy_dev_entry *phy_dev_entry;
  394 + char buffer[30];
  395 + int thres_count;
  396 + u32 eax, ebx, ecx, edx;
  397 +
  398 + cpuid(6, &eax, &ebx, &ecx, &edx);
  399 + thres_count = ebx & 0x07;
  400 + if (!thres_count)
  401 + return -ENODEV;
  402 +
  403 + thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS);
  404 +
  405 + err = get_tj_max(cpu, &tj_max);
  406 + if (err)
  407 + goto err_ret;
  408 +
  409 + mutex_lock(&phy_dev_list_mutex);
  410 +
  411 + phy_dev_entry = kzalloc(sizeof(*phy_dev_entry), GFP_KERNEL);
  412 + if (!phy_dev_entry) {
  413 + err = -ENOMEM;
  414 + goto err_ret_unlock;
  415 + }
  416 +
  417 + spin_lock(&pkg_work_lock);
  418 + if (topology_physical_package_id(cpu) > max_phy_id)
  419 + max_phy_id = topology_physical_package_id(cpu);
  420 + pkg_work_scheduled = krealloc(pkg_work_scheduled,
  421 + (max_phy_id+1) * sizeof(u8), GFP_ATOMIC);
  422 + if (!pkg_work_scheduled) {
  423 + spin_unlock(&pkg_work_lock);
  424 + err = -ENOMEM;
  425 + goto err_ret_free;
  426 + }
  427 + pkg_work_scheduled[topology_physical_package_id(cpu)] = 0;
  428 + spin_unlock(&pkg_work_lock);
  429 +
  430 + phy_dev_entry->phys_proc_id = topology_physical_package_id(cpu);
  431 + phy_dev_entry->first_cpu = cpu;
  432 + phy_dev_entry->tj_max = tj_max;
  433 + phy_dev_entry->ref_cnt = 1;
  434 + snprintf(buffer, sizeof(buffer), "pkg-temp-%d\n",
  435 + phy_dev_entry->phys_proc_id);
  436 + phy_dev_entry->tzone = thermal_zone_device_register(buffer,
  437 + thres_count,
  438 + (thres_count == MAX_NUMBER_OF_TRIPS) ?
  439 + 0x03 : 0x01,
  440 + phy_dev_entry, &tzone_ops, NULL, 0, 0);
  441 + if (IS_ERR(phy_dev_entry->tzone)) {
  442 + err = PTR_ERR(phy_dev_entry->tzone);
  443 + goto err_ret_free;
  444 + }
  445 + /* Store MSR value for package thermal interrupt, to restore at exit */
  446 + rdmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
  447 + &phy_dev_entry->start_pkg_therm_low,
  448 + &phy_dev_entry->start_pkg_therm_high);
  449 +
  450 + list_add_tail(&phy_dev_entry->list, &phy_dev_list);
  451 + pr_debug("pkg_temp_thermal_device_add :phy_id %d cpu %d\n",
  452 + phy_dev_entry->phys_proc_id, cpu);
  453 +
  454 + mutex_unlock(&phy_dev_list_mutex);
  455 +
  456 + return 0;
  457 +
  458 +err_ret_free:
  459 + kfree(phy_dev_entry);
  460 +err_ret_unlock:
  461 + mutex_unlock(&phy_dev_list_mutex);
  462 +
  463 +err_ret:
  464 + return err;
  465 +}
  466 +
  467 +static int pkg_temp_thermal_device_remove(unsigned int cpu)
  468 +{
  469 + struct phy_dev_entry *n;
  470 + u16 phys_proc_id = topology_physical_package_id(cpu);
  471 + struct phy_dev_entry *phdev =
  472 + pkg_temp_thermal_get_phy_entry(cpu);
  473 +
  474 + if (!phdev)
  475 + return -ENODEV;
  476 +
  477 + mutex_lock(&phy_dev_list_mutex);
  478 + /* If we are loosing the first cpu for this package, we need change */
  479 + if (phdev->first_cpu == cpu) {
  480 + phdev->first_cpu = find_siblings_cpu(cpu);
  481 + pr_debug("thermal_device_remove: first cpu switched %d\n",
  482 + phdev->first_cpu);
  483 + }
  484 + /*
  485 + * It is possible that no siblings left as this was the last cpu
  486 + * going offline. We don't need to worry about this assignment
  487 + * as the phydev entry will be removed in this case and
  488 + * thermal zone is removed.
  489 + */
  490 + --phdev->ref_cnt;
  491 + pr_debug("thermal_device_remove: pkg: %d cpu %d ref_cnt %d\n",
  492 + phys_proc_id, cpu, phdev->ref_cnt);
  493 + if (!phdev->ref_cnt)
  494 + list_for_each_entry_safe(phdev, n, &phy_dev_list, list) {
  495 + if (phdev->phys_proc_id == phys_proc_id) {
  496 + thermal_zone_device_unregister(phdev->tzone);
  497 + list_del(&phdev->list);
  498 + kfree(phdev);
  499 + break;
  500 + }
  501 + }
  502 + mutex_unlock(&phy_dev_list_mutex);
  503 +
  504 + return 0;
  505 +}
  506 +
  507 +static int get_core_online(unsigned int cpu)
  508 +{
  509 + struct cpuinfo_x86 *c = &cpu_data(cpu);
  510 + struct phy_dev_entry *phdev = pkg_temp_thermal_get_phy_entry(cpu);
  511 +
  512 + /* Check if there is already an instance for this package */
  513 + if (!phdev) {
  514 + if (!cpu_has(c, X86_FEATURE_DTHERM) &&
  515 + !cpu_has(c, X86_FEATURE_PTS))
  516 + return -ENODEV;
  517 + if (pkg_temp_thermal_device_add(cpu))
  518 + return -ENODEV;
  519 + } else {
  520 + mutex_lock(&phy_dev_list_mutex);
  521 + ++phdev->ref_cnt;
  522 + pr_debug("get_core_online: cpu %d ref_cnt %d\n",
  523 + cpu, phdev->ref_cnt);
  524 + mutex_unlock(&phy_dev_list_mutex);
  525 + }
  526 + INIT_DELAYED_WORK(&per_cpu(pkg_temp_thermal_threshold_work, cpu),
  527 + pkg_temp_thermal_threshold_work_fn);
  528 +
  529 + pr_debug("get_core_online: cpu %d successful\n", cpu);
  530 +
  531 + return 0;
  532 +}
  533 +
  534 +static void put_core_offline(unsigned int cpu)
  535 +{
  536 + if (!pkg_temp_thermal_device_remove(cpu))
  537 + cancel_delayed_work_sync(
  538 + &per_cpu(pkg_temp_thermal_threshold_work, cpu));
  539 +
  540 + pr_debug("put_core_offline: cpu %d\n", cpu);
  541 +}
  542 +
  543 +static int pkg_temp_thermal_cpu_callback(struct notifier_block *nfb,
  544 + unsigned long action, void *hcpu)
  545 +{
  546 + unsigned int cpu = (unsigned long) hcpu;
  547 +
  548 + switch (action) {
  549 + case CPU_ONLINE:
  550 + case CPU_DOWN_FAILED:
  551 + get_core_online(cpu);
  552 + break;
  553 + case CPU_DOWN_PREPARE:
  554 + put_core_offline(cpu);
  555 + break;
  556 + }
  557 + return NOTIFY_OK;
  558 +}
  559 +
  560 +static struct notifier_block pkg_temp_thermal_notifier __refdata = {
  561 + .notifier_call = pkg_temp_thermal_cpu_callback,
  562 +};
  563 +
  564 +static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = {
  565 + { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_DTHERM },
  566 + {}
  567 +};
  568 +MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids);
  569 +
  570 +static int __init pkg_temp_thermal_init(void)
  571 +{
  572 + int i;
  573 +
  574 + if (!x86_match_cpu(pkg_temp_thermal_ids))
  575 + return -ENODEV;
  576 +
  577 + spin_lock_init(&pkg_work_lock);
  578 + platform_thermal_package_notify =
  579 + pkg_temp_thermal_platform_thermal_notify;
  580 + platform_thermal_package_rate_control =
  581 + pkg_temp_thermal_platform_thermal_rate_control;
  582 +
  583 + get_online_cpus();
  584 + for_each_online_cpu(i)
  585 + if (get_core_online(i))
  586 + goto err_ret;
  587 + register_hotcpu_notifier(&pkg_temp_thermal_notifier);
  588 + put_online_cpus();
  589 +
  590 + pkg_temp_debugfs_init(); /* Don't care if fails */
  591 +
  592 + return 0;
  593 +
  594 +err_ret:
  595 + get_online_cpus();
  596 + for_each_online_cpu(i)
  597 + put_core_offline(i);
  598 + put_online_cpus();
  599 + kfree(pkg_work_scheduled);
  600 + platform_thermal_package_notify = NULL;
  601 + platform_thermal_package_rate_control = NULL;
  602 +
  603 + return -ENODEV;
  604 +}
  605 +
  606 +static void __exit pkg_temp_thermal_exit(void)
  607 +{
  608 + struct phy_dev_entry *phdev, *n;
  609 + int i;
  610 +
  611 + get_online_cpus();
  612 + unregister_hotcpu_notifier(&pkg_temp_thermal_notifier);
  613 + mutex_lock(&phy_dev_list_mutex);
  614 + list_for_each_entry_safe(phdev, n, &phy_dev_list, list) {
  615 + /* Retore old MSR value for package thermal interrupt */
  616 + wrmsr_on_cpu(phdev->first_cpu,
  617 + MSR_IA32_PACKAGE_THERM_INTERRUPT,
  618 + phdev->start_pkg_therm_low,
  619 + phdev->start_pkg_therm_high);
  620 + thermal_zone_device_unregister(phdev->tzone);
  621 + list_del(&phdev->list);
  622 + kfree(phdev);
  623 + }
  624 + mutex_unlock(&phy_dev_list_mutex);
  625 + platform_thermal_package_notify = NULL;
  626 + platform_thermal_package_rate_control = NULL;
  627 + for_each_online_cpu(i)
  628 + cancel_delayed_work_sync(
  629 + &per_cpu(pkg_temp_thermal_threshold_work, i));
  630 + put_online_cpus();
  631 +
  632 + kfree(pkg_work_scheduled);
  633 +
  634 + debugfs_remove_recursive(debugfs);
  635 +}
  636 +
  637 +module_init(pkg_temp_thermal_init)
  638 +module_exit(pkg_temp_thermal_exit)
  639 +
  640 +MODULE_DESCRIPTION("X86 PKG TEMP Thermal Driver");
  641 +MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
  642 +MODULE_LICENSE("GPL v2");