Skip to content

Commit

Permalink
x86/mce/AMD, EDAC: Enable error decoding of Scalable MCA errors
Browse files Browse the repository at this point in the history
For Scalable MCA enabled processors, errors are listed per IP block. And
since it is not required for an IP to map to a particular bank, we need
to use HWID and McaType values from the MCx_IPID register to figure out
which IP a given bank represents.

We also have a new bit (TCC) in the MCx_STATUS register to indicate Task
context is corrupt.

Add logic here to decode errors from all known IP blocks for Fam17h
Model 00-0fh and to print TCC errors.

[ Minor fixups. ]
Signed-off-by: Aravind Gopalakrishnan <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: linux-edac <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
Aravind Gopalakrishnan authored and Ingo Molnar committed Mar 8, 2016
1 parent adc53f2 commit be0aec2
Show file tree
Hide file tree
Showing 3 changed files with 420 additions and 3 deletions.
59 changes: 59 additions & 0 deletions arch/x86/include/asm/mce.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,18 @@
/* AMD-specific bits */
#define MCI_STATUS_DEFERRED (1ULL<<44) /* declare an uncorrected error */
#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */
#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */

/*
* McaX field if set indicates a given bank supports MCA extensions:
* - Deferred error interrupt type is specifiable by bank.
* - MCx_MISC0[BlkPtr] field indicates presence of extended MISC registers,
* But should not be used to determine MSR numbers.
* - TCC bit is present in MCx_STATUS.
*/
#define MCI_CONFIG_MCAX 0x1
#define MCI_IPID_MCATYPE 0xFFFF0000
#define MCI_IPID_HWID 0xFFF

/*
* Note that the full MCACOD field of IA32_MCi_STATUS MSR is
Expand Down Expand Up @@ -93,7 +105,9 @@

/* AMD Scalable MCA */
#define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004
#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005
#define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))

/*
* This structure contains all data related to the MCE log. Also
Expand Down Expand Up @@ -291,4 +305,49 @@ struct cper_sec_mem_err;
extern void apei_mce_report_mem_error(int corrected,
struct cper_sec_mem_err *mem_err);

/*
* Enumerate new IP types and HWID values in AMD processors which support
* Scalable MCA.
*/
#ifdef CONFIG_X86_MCE_AMD
enum amd_ip_types {
SMCA_F17H_CORE = 0, /* Core errors */
SMCA_DF, /* Data Fabric */
SMCA_UMC, /* Unified Memory Controller */
SMCA_PB, /* Parameter Block */
SMCA_PSP, /* Platform Security Processor */
SMCA_SMU, /* System Management Unit */
N_AMD_IP_TYPES
};

struct amd_hwid {
const char *name;
unsigned int hwid;
};

extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES];

enum amd_core_mca_blocks {
SMCA_LS = 0, /* Load Store */
SMCA_IF, /* Instruction Fetch */
SMCA_L2_CACHE, /* L2 cache */
SMCA_DE, /* Decoder unit */
RES, /* Reserved */
SMCA_EX, /* Execution unit */
SMCA_FP, /* Floating Point */
SMCA_L3_CACHE, /* L3 cache */
N_CORE_MCA_BLOCKS
};

extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS];

enum amd_df_mca_blocks {
SMCA_CS = 0, /* Coherent Slave */
SMCA_PIE, /* Power management, Interrupts, etc */
N_DF_BLOCKS
};

extern const char * const amd_df_mcablock_names[N_DF_BLOCKS];
#endif

#endif /* _ASM_X86_MCE_H */
29 changes: 29 additions & 0 deletions arch/x86/kernel/cpu/mcheck/mce_amd.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,35 @@ static const char * const th_names[] = {
"execution_unit",
};

/* Define HWID to IP type mappings for Scalable MCA */
struct amd_hwid amd_hwids[] = {
[SMCA_F17H_CORE] = { "f17h_core", 0xB0 },
[SMCA_DF] = { "data_fabric", 0x2E },
[SMCA_UMC] = { "umc", 0x96 },
[SMCA_PB] = { "param_block", 0x5 },
[SMCA_PSP] = { "psp", 0xFF },
[SMCA_SMU] = { "smu", 0x1 },
};
EXPORT_SYMBOL_GPL(amd_hwids);

const char * const amd_core_mcablock_names[] = {
[SMCA_LS] = "load_store",
[SMCA_IF] = "insn_fetch",
[SMCA_L2_CACHE] = "l2_cache",
[SMCA_DE] = "decode_unit",
[RES] = "",
[SMCA_EX] = "execution_unit",
[SMCA_FP] = "floating_point",
[SMCA_L3_CACHE] = "l3_cache",
};
EXPORT_SYMBOL_GPL(amd_core_mcablock_names);

const char * const amd_df_mcablock_names[] = {
[SMCA_CS] = "coherent_slave",
[SMCA_PIE] = "pie",
};
EXPORT_SYMBOL_GPL(amd_df_mcablock_names);

static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */

Expand Down
Loading

0 comments on commit be0aec2

Please sign in to comment.