Patrick Georgi (patrick@georgi-clan.de) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/1658
-gerrit
commit 3fcc0bc02004f773b9bee1271bbb5ce72e64ee3a Author: Nico Huber nico.huber@secunet.com Date: Tue Oct 2 11:11:42 2012 +0200
Overhaul speedstep code
This adds proper support for turbo and super-low-frequency modes. Calculation of the p-states has been rewritten and moved into an extra file speedstep.c so it can be used for non-acpi stuff like EMTTM table generation.
It has been tested with a Core2Duo T9400 (Penryn) processor.
Change-Id: I5f7104fc921ba67d85794254f11d486b6688ecec Signed-off-by: Nico Huber nico.huber@secunet.com --- src/cpu/intel/speedstep/Makefile.inc | 2 +- src/cpu/intel/speedstep/acpi.c | 195 +++++++++++++++++++++-------------- src/cpu/intel/speedstep/speedstep.c | 171 ++++++++++++++++++++++++++++++ src/include/cpu/intel/speedstep.h | 74 +++++++++++++ 4 files changed, 361 insertions(+), 81 deletions(-)
diff --git a/src/cpu/intel/speedstep/Makefile.inc b/src/cpu/intel/speedstep/Makefile.inc index c717a33..753dbcd 100644 --- a/src/cpu/intel/speedstep/Makefile.inc +++ b/src/cpu/intel/speedstep/Makefile.inc @@ -1 +1 @@ -ramstage-$(CONFIG_GENERATE_ACPI_TABLES) += acpi.c +ramstage-$(CONFIG_GENERATE_ACPI_TABLES) += acpi.c speedstep.c diff --git a/src/cpu/intel/speedstep/acpi.c b/src/cpu/intel/speedstep/acpi.c index 070f8d5..910055d 100644 --- a/src/cpu/intel/speedstep/acpi.c +++ b/src/cpu/intel/speedstep/acpi.c @@ -2,6 +2,7 @@ * This file is part of the coreboot project. * * Copyright (C) 2009 coresystems GmbH + * 2012 secunet Security Networks AG * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -28,7 +29,18 @@ #include <cpu/intel/speedstep.h> #include <device/device.h>
-// XXX: PSS table values for power consumption are for Merom only +/** + * @brief Returns c-state entries for this system + * + * This function is usually overwritten in mainboard code. + * + * @return Number of c-states *entries will point to. + */ +int __attribute__((weak)) get_cst_entries(acpi_cstate_t **entries + __attribute__((unused))) +{ + return 0; +}
static int determine_total_number_of_cores(void) { @@ -47,110 +59,133 @@ static int determine_total_number_of_cores(void) return count; }
+/** + * @brief Returns three times the FSB clock in MHz + * + * The result of calculations with the returned value shall be divided by 3. + * This helps to avoid rounding errors. + */ static int get_fsb(void) { const u32 fsbcode = rdmsr(0xcd).lo & 7; switch (fsbcode) { - case 0: return 266; - case 1: return 133; - case 2: return 200; - case 3: return 166; - case 4: return 333; - case 5: return 100; - case 6: return 400; + case 0: return 800; /* / 3 == 266 */ + case 1: return 400; /* / 3 == 133 */ + case 2: return 600; /* / 3 == 200 */ + case 3: return 500; /* / 3 == 166 */ + case 4: return 1000; /* / 3 == 333 */ + case 5: return 300; /* / 3 == 100 */ + case 6: return 1200; /* / 3 == 400 */ } - printk(BIOS_DEBUG, "Warning: No supported FSB frequency. Assuming 200MHz\n"); - return 200; + printk(BIOS_WARNING, + "Warning: No supported FSB frequency. Assuming 200MHz\n"); + return 600; }
-int __attribute__((weak)) get_cst_entries(acpi_cstate_t **entries __attribute__((unused))) +static int gen_pstate_entries(const sst_table_t *const pstates, + const int cpuID, const int cores_per_package, + const uint8_t coordination) { - return 0; + int i; + int len, len_ps; + int frequency; + + len = acpigen_write_empty_PCT(); + len += acpigen_write_PSD_package( + cpuID, cores_per_package, coordination); + len += acpigen_write_name("_PSS"); + + const int fsb3 = get_fsb(); + const int min_ratio2 = SPEEDSTEP_DOUBLE_RATIO( + pstates->states[pstates->num_states - 1]); + const int max_ratio2 = SPEEDSTEP_DOUBLE_RATIO(pstates->states[0]); + printk(BIOS_DEBUG, "clocks between %d and %d MHz.\n", + (min_ratio2 * fsb3) + / (pstates->states[pstates->num_states - 1].is_slfm ? 12 : 6), + (max_ratio2 * fsb3) / 6); + + printk(BIOS_DEBUG, "adding %x P-States between " + "busratio %x and %x, ""incl. P0\n", + pstates->num_states, min_ratio2 / 2, max_ratio2 / 2); + len_ps = acpigen_write_package(pstates->num_states); + for (i = 0; i < pstates->num_states; ++i) { + const sst_state_t *const pstate = &pstates->states[i]; + /* Report frequency of turbo mode as that of HFM + 1. */ + if (pstate->is_turbo) + frequency = (SPEEDSTEP_DOUBLE_RATIO( + pstates->states[i + 1]) * fsb3) / 6 + 1; + /* Super-LFM runs at half frequency. */ + else if (pstate->is_slfm) + frequency = (SPEEDSTEP_DOUBLE_RATIO(*pstate)*fsb3)/12; + else + frequency = (SPEEDSTEP_DOUBLE_RATIO(*pstate)*fsb3)/6; + len_ps += acpigen_write_PSS_package( + frequency, pstate->power, 0, 0, + SPEEDSTEP_ENCODE_STATE(*pstate), + SPEEDSTEP_ENCODE_STATE(*pstate)); + } + len_ps--; + acpigen_patch_len(len_ps); + + len += acpigen_write_PPC(0); + + len += len_ps; + + return len; }
+/** + * @brief Generate ACPI entries for Speedstep for each cpu + */ void generate_cpu_entries(void) { - int len_pr, len_ps; + int len_pr; int coreID, cpuID, pcontrol_blk = PMB0_BASE, plen = 6; - msr_t msr; int totalcores = determine_total_number_of_cores(); int cores_per_package = (cpuid_ebx(1)>>16) & 0xff; - int numcpus = totalcores/cores_per_package; // this assumes that all CPUs share the same layout - int count; - acpi_cstate_t *cst_entries; + int numcpus = totalcores/cores_per_package; /* This assumes that all + CPUs share the same + layout. */ + int num_cstates; + acpi_cstate_t *cstates; + sst_table_t pstates; + uint8_t coordination; + + printk(BIOS_DEBUG, "Found %d CPU(s) with %d core(s) each.\n", + numcpus, cores_per_package);
- printk(BIOS_DEBUG, "Found %d CPU(s) with %d core(s) each.\n", numcpus, cores_per_package); + num_cstates = get_cst_entries(&cstates); + speedstep_gen_pstates(&pstates); + if (((cpuid_eax(1) >> 4) & 0xffff) == 0x1067) + /* For Penryn use HW_ALL. */ + coordination = HW_ALL; + else + /* Use SW_ANY as that was the default. */ + coordination = SW_ANY;
- for (cpuID=1; cpuID <=numcpus; cpuID++) { + for (cpuID = 0; cpuID < numcpus; ++cpuID) { for (coreID=1; coreID<=cores_per_package; coreID++) { if (coreID>1) { pcontrol_blk = 0; plen = 0; } + + /* Generate processor _PR.CPUx. */ len_pr = acpigen_write_processor( - (cpuID - 1) * cores_per_package + coreID - 1, pcontrol_blk, plen); - len_pr += acpigen_write_empty_PCT(); - len_pr += acpigen_write_PSD_package(cpuID-1,cores_per_package,SW_ANY); - if ((count = get_cst_entries(&cst_entries)) > 0) - len_pr += acpigen_write_CST_package(cst_entries, count); - len_pr += acpigen_write_name("_PSS"); - - int max_states=8; - int busratio_step=2; - msr = rdmsr(IA32_PERF_STS); - int busratio_min=(msr.lo >> 24) & 0x1f; - int busratio_max=(msr.hi >> (40-32)) & 0x1f; - int vid_min=msr.lo & 0x3f; - msr = rdmsr(IA32_PLATFORM_ID); - int vid_max=msr.lo & 0x3f; - int clock_max=get_fsb()*busratio_max; - int clock_min=get_fsb()*busratio_min; - printk(BIOS_DEBUG, "clocks between %d and %d MHz.\n", clock_min, clock_max); -#define MEROM_MIN_POWER 16000 -#define MEROM_MAX_POWER 35000 - int power_max=MEROM_MAX_POWER; - int power_min=MEROM_MIN_POWER; - - int num_states=(busratio_max-busratio_min)/busratio_step; - while (num_states > max_states-1) { - busratio_step <<= 1; - num_states >>= 1; - } - printk(BIOS_DEBUG, "adding %x P-States between busratio %x and %x, incl. P0\n", - num_states+1, busratio_min, busratio_max); - int vid_step=(vid_max-vid_min)/num_states; - int power_step=(power_max-power_min)/num_states; - int clock_step=(clock_max-clock_min)/num_states; - len_ps = acpigen_write_package(num_states + 1); /* For Super LFM, this must - be increases by another one. */ - len_ps += acpigen_write_PSS_package( - clock_max /*mhz*/, power_max /*mW*/, 0 /*lat1*/, 0 /*lat2*/, - (busratio_max << 8) | vid_max /*control*/, - (busratio_max << 8) | vid_max /*status*/); - - int current_busratio=busratio_min+((num_states-1)*busratio_step); - int current_vid=vid_min+((num_states-1)*vid_step); - int current_power=power_min+((num_states-1)*power_step); - int current_clock=clock_min+((num_states-1)*clock_step); - int i; - for (i=0;i<num_states; i++) { - len_ps += acpigen_write_PSS_package( - current_clock /*mhz*/, current_power /*mW*/, - 0 /*lat1*/, 0 /*lat2*/, - (current_busratio << 8) | current_vid /*control*/, - (current_busratio << 8) | current_vid /*status*/); - current_busratio -= busratio_step; - current_vid -= vid_step; - current_power -= power_step; - current_clock -= clock_step; - } - len_ps--; - acpigen_patch_len(len_ps); - len_pr += acpigen_write_PPC(0); - len_pr += len_ps; + cpuID * cores_per_package + coreID - 1, + pcontrol_blk, plen); + + /* Generate p-state entries. */ + len_pr += gen_pstate_entries(&pstates, cpuID, + cores_per_package, coordination); + + /* Generate c-state entries. */ + if (num_cstates > 0) + len_pr += acpigen_write_CST_package( + cstates, num_cstates); + len_pr--; acpigen_patch_len(len_pr); } } } - diff --git a/src/cpu/intel/speedstep/speedstep.c b/src/cpu/intel/speedstep/speedstep.c new file mode 100644 index 0000000..e5509ec --- /dev/null +++ b/src/cpu/intel/speedstep/speedstep.c @@ -0,0 +1,171 @@ +/* + * This file is part of the coreboot project. + * + * Copyright (C) 2012 secunet Security Networks AG + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of + * the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + */ + +#include <types.h> +#include <string.h> +#include <arch/cpu.h> +#include <cpu/x86/msr.h> +#include <console/console.h> +#include <cpu/intel/speedstep.h> + +/** + * @brief Gather speedstep limits for current processor + * + * At least power limits are processor type specific. Penryn introduced half + * steps in bus ratios. Don't know about Atom processors. + */ +static void speedstep_get_limits(sst_params_t *const params) +{ + msr_t msr; + + const uint16_t cpu_id = (cpuid_eax(1) >> 4) & 0xffff; + const uint32_t state_mask = + /* Penryn supports non integer (i.e. half) ratios. */ + ((cpu_id == 0x1067) ? SPEEDSTEP_RATIO_NONINT : 0) + | SPEEDSTEP_RATIO_VALUE_MASK | SPEEDSTEP_VID_MASK; + + /* Initialize params to zero. */ + memset(params, '\0', sizeof(*params)); + + /* Read Super-LFM parameters. */ + if (((rdmsr(MSR_EXTENDED_CONFIG).lo >> 27) & 3) == 3) {/*supported and + enabled bits */ + msr = rdmsr(MSR_FSB_CLOCK_VCC); + params->slfm = SPEEDSTEP_STATE_FROM_MSR(msr.lo, state_mask); + params->slfm.dynfsb = 1; + params->slfm.is_slfm = 1; + } + + /* Read normal minimum parameters. */ + msr = rdmsr(MSR_THERM2_CTL); + params->min = SPEEDSTEP_STATE_FROM_MSR(msr.lo, state_mask); + + /* Read normal maximum parameters. */ + msr = rdmsr(IA32_PLATFORM_ID); + params->max = SPEEDSTEP_STATE_FROM_MSR(msr.lo, state_mask); + + /* Read turbo parameters. */ + msr = rdmsr(MSR_FSB_CLOCK_VCC); + if ((msr.hi & (1 << (63 - 32))) && + /* supported and */ + !(rdmsr(IA32_MISC_ENABLES).hi & (1 << (38 - 32)))) { + /* not disabled */ + params->turbo = SPEEDSTEP_STATE_FROM_MSR(msr.hi, state_mask); + params->turbo.is_turbo = 1; + } + + /* Set power limits by processor type. */ + /* Defined values match the normal voltage versions only. But + they are only a hint for OSPM, so this should not hurt much. */ + switch (cpu_id) { + case 0x1067: + /* Penryn */ + params->slfm.power = SPEEDSTEP_SLFM_POWER_PENRYN; + params->min.power = SPEEDSTEP_MIN_POWER_PENRYN; + params->max.power = SPEEDSTEP_MAX_POWER_PENRYN; + params->turbo.power = SPEEDSTEP_TURBO_POWER_PENRYN; + break; + default: + /* Use Merom values by default (as before). */ + params->slfm.power = SPEEDSTEP_SLFM_POWER_MEROM; + params->min.power = SPEEDSTEP_MIN_POWER_MEROM; + params->max.power = SPEEDSTEP_MAX_POWER_MEROM; + params->turbo.power = SPEEDSTEP_TURBO_POWER_MEROM; + break; + } +} + +/** + * @brief Generate full p-states table from processor parameters + * + * This is generic code and should work at least for Merom and Penryn + * processors. It is used to generate acpi tables and configure EMTTM. + */ +void speedstep_gen_pstates(sst_table_t *const table) +{ + sst_params_t params; + /* Gather speedstep limits. */ + speedstep_get_limits(¶ms); + + + /*\ First, find the number of normal states: */ + + /* Calculate with doubled values to work + around non-integer (.5) bus ratios. */ + const int power_diff2 = (params.max.power - params.min.power) * 2; + const int vid_diff2 = (params.max.vid - params.min.vid) * 2; + const int max_ratio2 = SPEEDSTEP_DOUBLE_RATIO(params.max); + const int min_ratio2 = SPEEDSTEP_DOUBLE_RATIO(params.min); + const int ratio_diff2 = max_ratio2 - min_ratio2; + /* Calculate number of normal states (LFM to HFM, min to max). */ + /* Increase step size, until all states fit into the table. + (Note: First try should always work, if + SPEEDSTEP_MAX_NORMAL_STATES is set correctly.) */ + int states, step2 = 0; + do { + step2 += 2 * 2; /* Must be a multiple of 2 (doubled). */ + states = ratio_diff2 / step2 + 1; + } while (states > SPEEDSTEP_MAX_NORMAL_STATES); + if (step2 > 4) + printk(BIOS_INFO, "Enhanced Speedstep processor with " + "more than %d possible p-states.\n", + SPEEDSTEP_MAX_NORMAL_STATES); + if (states < 2) /* Report at least two normal states. */ + states = 2; + + + /*\ Now, fill the table: */ + + table->num_states = 0; + + /* Add turbo state if supported. */ + if (params.turbo.is_turbo) + table->states[table->num_states++] = params.turbo; + + /* Add HFM first. */ + table->states[table->num_states] = params.max; + /* Work around HFM and LFM having the same bus ratio. */ + if ((params.max.dynfsb == params.min.dynfsb) && + (params.max.nonint == params.min.nonint) && + (params.max.ratio == params.min.ratio)) + table->states[table->num_states].vid = params.min.vid; + ++table->num_states; + --states; + + /* Now, add all other normal states based on LFM (min). */ + const int power_step = power_diff2 / ratio_diff2; + const int vid_step = vid_diff2 / ratio_diff2; + const int ratio_step = step2 / 2; + int power = params.min.power + (states - 1) * power_step; + int vid = params.min.vid + (states - 1) * vid_step; + int ratio = params.min.ratio + (states - 1) * ratio_step; + for (; states > 0; --states) { + table->states[table->num_states++] = + (sst_state_t){ 0, 0, ratio, vid, 0, 0, power }; + power -= power_step; + vid -= vid_step; + ratio -= ratio_step; + } + + /* At last, add Super-LFM state if supported. */ + if (params.slfm.is_slfm) + table->states[table->num_states++] = params.slfm; +} diff --git a/src/include/cpu/intel/speedstep.h b/src/include/cpu/intel/speedstep.h index c3cd2d2..697c34a 100644 --- a/src/include/cpu/intel/speedstep.h +++ b/src/include/cpu/intel/speedstep.h @@ -2,6 +2,7 @@ * This file is part of the coreboot project. * * Copyright (C) 2007-2009 coresystems GmbH + * 2012 secunet Security Networks AG * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -19,6 +20,11 @@ * MA 02110-1301 USA */
+#ifndef CPU_INTEL_SPEEDSTEP_H +#define CPU_INTEL_SPEEDSTEP_H + +#include <stdint.h> + /* Magic value used to locate speedstep configuration in the device tree */ #define SPEEDSTEP_APIC_MAGIC 0xACAC
@@ -39,3 +45,71 @@ #define IA32_PERF_CTL 0x199 #define MSR_THERM2_CTL 0x19D #define IA32_MISC_ENABLES 0x1A0 +#define MSR_FSB_CLOCK_VCC 0xce +#define MSR_EXTENDED_CONFIG 0xee + + +typedef struct { + uint8_t dynfsb : 1; /* whether this is SLFM */ + uint8_t nonint : 1; /* add .5 to ratio */ + uint8_t ratio : 6; + uint8_t vid; + uint8_t is_turbo; + uint8_t is_slfm; + uint32_t power; +} sst_state_t; +#define SPEEDSTEP_RATIO_SHIFT 8 +#define SPEEDSTEP_RATIO_DYNFSB_SHIFT (7 + SPEEDSTEP_RATIO_SHIFT) +#define SPEEDSTEP_RATIO_DYNFSB (1 << SPEEDSTEP_RATIO_DYNFSB_SHIFT) +#define SPEEDSTEP_RATIO_NONINT_SHIFT (6 + SPEEDSTEP_RATIO_SHIFT) +#define SPEEDSTEP_RATIO_NONINT (1 << SPEEDSTEP_RATIO_NONINT_SHIFT) +#define SPEEDSTEP_RATIO_VALUE_MASK (0x1f << SPEEDSTEP_RATIO_SHIFT) +#define SPEEDSTEP_VID_MASK 0x3f +#define SPEEDSTEP_STATE_FROM_MSR(val, mask) ((sst_state_t){ \ + 0, /* dynfsb won't be read. */ \ + ((val & mask) & SPEEDSTEP_RATIO_NONINT) ? 1 : 0, \ + (((val & mask) & SPEEDSTEP_RATIO_VALUE_MASK) \ + >> SPEEDSTEP_RATIO_SHIFT), \ + (val & mask) & SPEEDSTEP_VID_MASK, \ + 0, /* not turbo by default */ \ + 0, /* not slfm by default */ \ + 0 /* power is hardcoded in software. */ \ + }) +#define SPEEDSTEP_ENCODE_STATE(state) ( \ + ((uint16_t)(state).dynfsb << SPEEDSTEP_RATIO_DYNFSB_SHIFT) | \ + ((uint16_t)(state).nonint << SPEEDSTEP_RATIO_NONINT_SHIFT) | \ + ((uint16_t)(state).ratio << SPEEDSTEP_RATIO_SHIFT) | \ + ((uint16_t)(state).vid & SPEEDSTEP_VID_MASK)) +#define SPEEDSTEP_DOUBLE_RATIO(state) ( \ + ((uint8_t)(state).ratio * 2) + (state).nonint) + +typedef struct { + sst_state_t slfm; + sst_state_t min; + sst_state_t max; + sst_state_t turbo; +} sst_params_t; + +/* Looking at core2's spec, the highest normal bus ratio for an eist enabled + processor is 14, the lowest is always 6. This makes 5 states with the + minimal step width of 2. With turbo mode and super LFM we have at most 7. */ +#define SPEEDSTEP_MAX_NORMAL_STATES 5 +#define SPEEDSTEP_MAX_STATES (SPEEDSTEP_MAX_NORMAL_STATES + 2) +typedef struct { + /* Table of p-states for EMTTM and ACPI by decreasing performance. */ + sst_state_t states[SPEEDSTEP_MAX_STATES]; + int num_states; +} sst_table_t; + +void speedstep_gen_pstates(sst_table_t *); + +#define SPEEDSTEP_TURBO_POWER_MEROM 35000 +#define SPEEDSTEP_MAX_POWER_MEROM 35000 +#define SPEEDSTEP_MIN_POWER_MEROM 25000 +#define SPEEDSTEP_SLFM_POWER_MEROM 12000 +#define SPEEDSTEP_TURBO_POWER_PENRYN 35000 +#define SPEEDSTEP_MAX_POWER_PENRYN 35000 +#define SPEEDSTEP_MIN_POWER_PENRYN 15000 +#define SPEEDSTEP_SLFM_POWER_PENRYN 12000 + +#endif /* CPU_INTEL_SPEEDSTEP_H */