* [PATCH 7] MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension
@ 2011-05-08 11:23 Liu, Jinsong
2011-05-13 14:34 ` Liu, Jinsong
0 siblings, 1 reply; 5+ messages in thread
From: Liu, Jinsong @ 2011-05-08 11:23 UTC (permalink / raw)
To: Keir Fraser, xen-devel; +Cc: Jiang, Yunhong, Li, Xin
[-- Attachment #1: Type: text/plain, Size: 11053 bytes --]
MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension
This patch simplify mce_action(). Basically the 2 set of mce status flags MCA_... and MCER_... are highly functional similar. This patch remove the redundant middle-level flag MCA_..., and its related data structure and function, so that mce_action() logic is more clean and simpler.
This patch also update mem err handler. intel_memerr_dhandler() will be commonly used by SRAO and SRAR, so for the extension of future SRAR case, this patch remove the default fail flag from intel_memerr_dhandler() outside to SRAO/SRAR level, since only SRAO/SRAR handler itself has the knowledge of how to handle the failure when mem err handler fail.
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 13:39:40 2011 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 18:15:13 2011 +0800
@@ -172,23 +172,14 @@ static unsigned int __read_mostly mce_dh
static unsigned int __read_mostly mce_dhandler_num;
static unsigned int __read_mostly mce_uhandler_num;
-enum mce_result
-{
- MCER_NOERROR,
- MCER_RECOVERED,
- /* Not recoverd, but can continue */
- MCER_CONTINUE,
- MCER_RESET,
-};
-
/* Maybe called in MCE context, no lock, no printk */
static enum mce_result mce_action(struct cpu_user_regs *regs,
mctelem_cookie_t mctc)
{
struct mc_info *local_mi;
- enum mce_result ret = MCER_NOERROR;
+ enum mce_result bank_result = MCER_NOERROR;
+ enum mce_result worst_result = MCER_NOERROR;
struct mcinfo_common *mic = NULL;
- struct mca_handle_result mca_res;
struct mca_binfo binfo;
const struct mca_error_handler *handlers = mce_dhandlers;
unsigned int i, handler_num = mce_dhandler_num;
@@ -217,7 +208,7 @@ static enum mce_result mce_action(struct
/* Processing bank information */
x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK);
- for ( ; ret != MCER_RESET && mic && mic->size;
+ for ( ; bank_result != MCER_RESET && mic && mic->size;
mic = x86_mcinfo_next(mic) )
{
if (mic->type != MC_TYPE_BANK) {
@@ -225,34 +216,20 @@ static enum mce_result mce_action(struct
}
binfo.mib = (struct mcinfo_bank*)mic;
binfo.bank = binfo.mib->mc_bank;
- memset(&mca_res, 0x0f, sizeof(mca_res));
+ bank_result = MCER_NOERROR;
for ( i = 0; i < handler_num; i++ ) {
if (handlers[i].owned_error(binfo.mib->mc_status))
{
- handlers[i].recovery_handler(&binfo, &mca_res);
-
- if (mca_res.result & MCA_OWNER)
- binfo.mib->mc_domid = mca_res.owner;
-
- if (mca_res.result == MCA_NEED_RESET)
- ret = MCER_RESET;
- else if (mca_res.result == MCA_RECOVERED)
- {
- if (ret < MCER_RECOVERED)
- ret = MCER_RECOVERED;
- }
- else if (mca_res.result == MCA_NO_ACTION)
- {
- if (ret < MCER_CONTINUE)
- ret = MCER_CONTINUE;
- }
+ handlers[i].recovery_handler(&binfo, &bank_result);
+ if (worst_result < bank_result)
+ worst_result = bank_result;
break;
}
}
ASSERT(i != handler_num);
}
- return ret;
+ return worst_result;
}
/*
@@ -608,7 +585,7 @@ struct mcinfo_recovery *mci_add_pageoff_
static void intel_memerr_dhandler(
struct mca_binfo *binfo,
- struct mca_handle_result *result)
+ enum mce_result *result)
{
struct mcinfo_bank *bank = binfo->mib;
struct mcinfo_global *global = binfo->mig;
@@ -618,7 +595,6 @@ static void intel_memerr_dhandler(
uint64_t mc_status, mc_misc;
mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n");
- result->result = MCA_NEED_RESET;
mc_status = bank->mc_status;
mc_misc = bank->mc_misc;
@@ -626,7 +602,6 @@ static void intel_memerr_dhandler(
!(mc_status & MCi_STATUS_MISCV) ||
((mc_misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) )
{
- result->result |= MCA_NO_ACTION;
dprintk(XENLOG_WARNING,
"No physical address provided for memory error\n");
return;
@@ -644,23 +619,19 @@ static void intel_memerr_dhandler(
/* This is free page */
if (status & PG_OFFLINE_OFFLINED)
- result->result = MCA_RECOVERED;
+ *result = MCER_RECOVERED;
else if (status & PG_OFFLINE_PENDING) {
/* This page has owner */
if (status & PG_OFFLINE_OWNED) {
- result->result |= MCA_OWNER;
- result->owner = status >> PG_OFFLINE_OWNER_SHIFT;
+ bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT;
mce_printk(MCE_QUIET, "MCE: This error page is ownded"
- " by DOM %d\n", result->owner);
- /* Fill vMCE# injection and vMCE# MSR virtualization "
- * "related data */
- bank->mc_domid = result->owner;
+ " by DOM %d\n", bank->mc_domid);
/* XXX: Cannot handle shared pages yet
* (this should identify all domains and gfn mapping to
* the mfn in question) */
- BUG_ON( result->owner == DOMID_COW );
- if ( result->owner != DOMID_XEN ) {
- d = get_domain_by_id(result->owner);
+ BUG_ON( bank->mc_domid == DOMID_COW );
+ if ( bank->mc_domid != DOMID_XEN ) {
+ d = get_domain_by_id(bank->mc_domid);
ASSERT(d);
gfn = get_gpfn_from_mfn((bank->mc_addr) >> PAGE_SHIFT);
@@ -683,7 +654,7 @@ static void intel_memerr_dhandler(
global->mc_gstatus) == -1 )
{
mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d "
- "failed\n", result->owner);
+ "failed\n", bank->mc_domid);
goto vmce_failed;
}
@@ -699,7 +670,7 @@ static void intel_memerr_dhandler(
* For xen, it has contained the error and finished
* its own recovery job.
*/
- result->result = MCA_RECOVERED;
+ *result = MCER_RECOVERED;
put_domain(d);
return;
@@ -718,11 +689,13 @@ static int intel_srao_check(uint64_t sta
static void intel_srao_dhandler(
struct mca_binfo *binfo,
- struct mca_handle_result *result)
+ enum mce_result *result)
{
uint64_t status = binfo->mib->mc_status;
/* For unknown srao error code, no action required */
+ *result = MCER_CONTINUE;
+
if ( status & MCi_STATUS_VAL )
{
switch ( status & INTEL_MCCOD_MASK )
@@ -744,7 +717,7 @@ static int intel_default_check(uint64_t
static void intel_default_mce_dhandler(
struct mca_binfo *binfo,
- struct mca_handle_result *result)
+ enum mce_result *result)
{
uint64_t status = binfo->mib->mc_status;
enum intel_mce_type type;
@@ -752,9 +725,9 @@ static void intel_default_mce_dhandler(
type = intel_check_mce_type(status);
if (type == intel_mce_fatal || type == intel_mce_ucr_srar)
- result->result = MCA_NEED_RESET;
- else if (type == intel_mce_ucr_srao)
- result->result = MCA_NO_ACTION;
+ *result = MCER_RESET;
+ else
+ *result = MCER_CONTINUE;
}
static const struct mca_error_handler intel_mce_dhandlers[] = {
@@ -764,7 +737,7 @@ static const struct mca_error_handler in
static void intel_default_mce_uhandler(
struct mca_binfo *binfo,
- struct mca_handle_result *result)
+ enum mce_result *result)
{
uint64_t status = binfo->mib->mc_status;
enum intel_mce_type type;
@@ -776,10 +749,10 @@ static void intel_default_mce_uhandler(
/* Panic if no handler for SRAR error */
case intel_mce_ucr_srar:
case intel_mce_fatal:
- result->result = MCA_NEED_RESET;
+ *result = MCER_RESET;
break;
default:
- result->result = MCA_NO_ACTION;
+ *result = MCER_CONTINUE;
break;
}
}
diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/x86_mca.h
--- a/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 13:39:40 2011 +0800
+++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 18:15:13 2011 +0800
@@ -124,36 +124,6 @@ void mcabanks_free(struct mca_banks *ban
void mcabanks_free(struct mca_banks *banks);
extern struct mca_banks *mca_allbanks;
-/* Below interfaces are defined for MCA internal processing:
- * a. pre_handler will be called early in MCA ISR context, mainly for early
- * need_reset detection for avoiding log missing. Also, it is used to judge
- * impacted DOMAIN if possible.
- * b. mca_error_handler is actually a (error_action_index,
- * recovery_hanlder pointer) pair. The defined recovery_handler
- * performs the actual recovery operations such as page_offline, cpu_offline
- * in softIRQ context when the per_bank MCA error matching the corresponding
- * mca_code index. If pre_handler can't judge the impacted domain,
- * recovery_handler must figure it out.
-*/
-
-/* MCA error has been recovered successfully by the recovery action*/
-#define MCA_RECOVERED (0x1 << 0)
-/* MCA error impact the specified DOMAIN in owner field below */
-#define MCA_OWNER (0x1 << 1)
-/* MCA error can't be recovered and need reset */
-#define MCA_NEED_RESET (0x1 << 2)
-/* MCA error did not have any action yet */
-#define MCA_NO_ACTION (0x1 << 3)
-
-struct mca_handle_result
-{
- uint32_t result;
- /* Used one result & MCA_OWNER */
- domid_t owner;
- /* Used by mca_error_handler, result & MCA_RECOVRED */
- struct recovery_action *action;
-};
-
/*Keep bank so that we can get staus even if mib is NULL */
struct mca_binfo {
int bank;
@@ -163,8 +133,14 @@ struct mca_binfo {
struct cpu_user_regs *regs;
};
-extern void (*mca_prehandler)( struct cpu_user_regs *regs,
- struct mca_handle_result *result);
+enum mce_result
+{
+ MCER_NOERROR,
+ MCER_RECOVERED,
+ /* Not recoverd, but can continue */
+ MCER_CONTINUE,
+ MCER_RESET,
+};
struct mca_error_handler
{
@@ -175,7 +151,7 @@ struct mca_error_handler
*/
int (*owned_error)(uint64_t status);
void (*recovery_handler)(struct mca_binfo *binfo,
- struct mca_handle_result *result);
+ enum mce_result *result);
};
/* Global variables */
[-- Attachment #2: mca-cleanup-7.patch --]
[-- Type: application/octet-stream, Size: 10772 bytes --]
MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension
This patch simplify mce_action(). Basically the 2 set of mce status flags MCA_... and MCER_... are highly functional similar. This patch remove the redundant middle-level flag MCA_..., and its related data structure and function, so that mce_action() logic is more clean and simpler.
This patch also update mem err handler. intel_memerr_dhandler() will be commonly used by SRAO and SRAR, so for the extension of future SRAR case, this patch remove the default fail flag from intel_memerr_dhandler() outside to SRAO/SRAR level, since only SRAO/SRAR handler itself has the knowledge of how to handle the failure when mem err handler fail.
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 13:39:40 2011 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 18:15:13 2011 +0800
@@ -172,23 +172,14 @@ static unsigned int __read_mostly mce_dh
static unsigned int __read_mostly mce_dhandler_num;
static unsigned int __read_mostly mce_uhandler_num;
-enum mce_result
-{
- MCER_NOERROR,
- MCER_RECOVERED,
- /* Not recoverd, but can continue */
- MCER_CONTINUE,
- MCER_RESET,
-};
-
/* Maybe called in MCE context, no lock, no printk */
static enum mce_result mce_action(struct cpu_user_regs *regs,
mctelem_cookie_t mctc)
{
struct mc_info *local_mi;
- enum mce_result ret = MCER_NOERROR;
+ enum mce_result bank_result = MCER_NOERROR;
+ enum mce_result worst_result = MCER_NOERROR;
struct mcinfo_common *mic = NULL;
- struct mca_handle_result mca_res;
struct mca_binfo binfo;
const struct mca_error_handler *handlers = mce_dhandlers;
unsigned int i, handler_num = mce_dhandler_num;
@@ -217,7 +208,7 @@ static enum mce_result mce_action(struct
/* Processing bank information */
x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK);
- for ( ; ret != MCER_RESET && mic && mic->size;
+ for ( ; bank_result != MCER_RESET && mic && mic->size;
mic = x86_mcinfo_next(mic) )
{
if (mic->type != MC_TYPE_BANK) {
@@ -225,34 +216,20 @@ static enum mce_result mce_action(struct
}
binfo.mib = (struct mcinfo_bank*)mic;
binfo.bank = binfo.mib->mc_bank;
- memset(&mca_res, 0x0f, sizeof(mca_res));
+ bank_result = MCER_NOERROR;
for ( i = 0; i < handler_num; i++ ) {
if (handlers[i].owned_error(binfo.mib->mc_status))
{
- handlers[i].recovery_handler(&binfo, &mca_res);
-
- if (mca_res.result & MCA_OWNER)
- binfo.mib->mc_domid = mca_res.owner;
-
- if (mca_res.result == MCA_NEED_RESET)
- ret = MCER_RESET;
- else if (mca_res.result == MCA_RECOVERED)
- {
- if (ret < MCER_RECOVERED)
- ret = MCER_RECOVERED;
- }
- else if (mca_res.result == MCA_NO_ACTION)
- {
- if (ret < MCER_CONTINUE)
- ret = MCER_CONTINUE;
- }
+ handlers[i].recovery_handler(&binfo, &bank_result);
+ if (worst_result < bank_result)
+ worst_result = bank_result;
break;
}
}
ASSERT(i != handler_num);
}
- return ret;
+ return worst_result;
}
/*
@@ -608,7 +585,7 @@ struct mcinfo_recovery *mci_add_pageoff_
static void intel_memerr_dhandler(
struct mca_binfo *binfo,
- struct mca_handle_result *result)
+ enum mce_result *result)
{
struct mcinfo_bank *bank = binfo->mib;
struct mcinfo_global *global = binfo->mig;
@@ -618,7 +595,6 @@ static void intel_memerr_dhandler(
uint64_t mc_status, mc_misc;
mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n");
- result->result = MCA_NEED_RESET;
mc_status = bank->mc_status;
mc_misc = bank->mc_misc;
@@ -626,7 +602,6 @@ static void intel_memerr_dhandler(
!(mc_status & MCi_STATUS_MISCV) ||
((mc_misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) )
{
- result->result |= MCA_NO_ACTION;
dprintk(XENLOG_WARNING,
"No physical address provided for memory error\n");
return;
@@ -644,23 +619,19 @@ static void intel_memerr_dhandler(
/* This is free page */
if (status & PG_OFFLINE_OFFLINED)
- result->result = MCA_RECOVERED;
+ *result = MCER_RECOVERED;
else if (status & PG_OFFLINE_PENDING) {
/* This page has owner */
if (status & PG_OFFLINE_OWNED) {
- result->result |= MCA_OWNER;
- result->owner = status >> PG_OFFLINE_OWNER_SHIFT;
+ bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT;
mce_printk(MCE_QUIET, "MCE: This error page is ownded"
- " by DOM %d\n", result->owner);
- /* Fill vMCE# injection and vMCE# MSR virtualization "
- * "related data */
- bank->mc_domid = result->owner;
+ " by DOM %d\n", bank->mc_domid);
/* XXX: Cannot handle shared pages yet
* (this should identify all domains and gfn mapping to
* the mfn in question) */
- BUG_ON( result->owner == DOMID_COW );
- if ( result->owner != DOMID_XEN ) {
- d = get_domain_by_id(result->owner);
+ BUG_ON( bank->mc_domid == DOMID_COW );
+ if ( bank->mc_domid != DOMID_XEN ) {
+ d = get_domain_by_id(bank->mc_domid);
ASSERT(d);
gfn = get_gpfn_from_mfn((bank->mc_addr) >> PAGE_SHIFT);
@@ -683,7 +654,7 @@ static void intel_memerr_dhandler(
global->mc_gstatus) == -1 )
{
mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d "
- "failed\n", result->owner);
+ "failed\n", bank->mc_domid);
goto vmce_failed;
}
@@ -699,7 +670,7 @@ static void intel_memerr_dhandler(
* For xen, it has contained the error and finished
* its own recovery job.
*/
- result->result = MCA_RECOVERED;
+ *result = MCER_RECOVERED;
put_domain(d);
return;
@@ -718,11 +689,13 @@ static int intel_srao_check(uint64_t sta
static void intel_srao_dhandler(
struct mca_binfo *binfo,
- struct mca_handle_result *result)
+ enum mce_result *result)
{
uint64_t status = binfo->mib->mc_status;
/* For unknown srao error code, no action required */
+ *result = MCER_CONTINUE;
+
if ( status & MCi_STATUS_VAL )
{
switch ( status & INTEL_MCCOD_MASK )
@@ -744,7 +717,7 @@ static int intel_default_check(uint64_t
static void intel_default_mce_dhandler(
struct mca_binfo *binfo,
- struct mca_handle_result *result)
+ enum mce_result *result)
{
uint64_t status = binfo->mib->mc_status;
enum intel_mce_type type;
@@ -752,9 +725,9 @@ static void intel_default_mce_dhandler(
type = intel_check_mce_type(status);
if (type == intel_mce_fatal || type == intel_mce_ucr_srar)
- result->result = MCA_NEED_RESET;
- else if (type == intel_mce_ucr_srao)
- result->result = MCA_NO_ACTION;
+ *result = MCER_RESET;
+ else
+ *result = MCER_CONTINUE;
}
static const struct mca_error_handler intel_mce_dhandlers[] = {
@@ -764,7 +737,7 @@ static const struct mca_error_handler in
static void intel_default_mce_uhandler(
struct mca_binfo *binfo,
- struct mca_handle_result *result)
+ enum mce_result *result)
{
uint64_t status = binfo->mib->mc_status;
enum intel_mce_type type;
@@ -776,10 +749,10 @@ static void intel_default_mce_uhandler(
/* Panic if no handler for SRAR error */
case intel_mce_ucr_srar:
case intel_mce_fatal:
- result->result = MCA_NEED_RESET;
+ *result = MCER_RESET;
break;
default:
- result->result = MCA_NO_ACTION;
+ *result = MCER_CONTINUE;
break;
}
}
diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/x86_mca.h
--- a/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 13:39:40 2011 +0800
+++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 18:15:13 2011 +0800
@@ -124,36 +124,6 @@ void mcabanks_free(struct mca_banks *ban
void mcabanks_free(struct mca_banks *banks);
extern struct mca_banks *mca_allbanks;
-/* Below interfaces are defined for MCA internal processing:
- * a. pre_handler will be called early in MCA ISR context, mainly for early
- * need_reset detection for avoiding log missing. Also, it is used to judge
- * impacted DOMAIN if possible.
- * b. mca_error_handler is actually a (error_action_index,
- * recovery_hanlder pointer) pair. The defined recovery_handler
- * performs the actual recovery operations such as page_offline, cpu_offline
- * in softIRQ context when the per_bank MCA error matching the corresponding
- * mca_code index. If pre_handler can't judge the impacted domain,
- * recovery_handler must figure it out.
-*/
-
-/* MCA error has been recovered successfully by the recovery action*/
-#define MCA_RECOVERED (0x1 << 0)
-/* MCA error impact the specified DOMAIN in owner field below */
-#define MCA_OWNER (0x1 << 1)
-/* MCA error can't be recovered and need reset */
-#define MCA_NEED_RESET (0x1 << 2)
-/* MCA error did not have any action yet */
-#define MCA_NO_ACTION (0x1 << 3)
-
-struct mca_handle_result
-{
- uint32_t result;
- /* Used one result & MCA_OWNER */
- domid_t owner;
- /* Used by mca_error_handler, result & MCA_RECOVRED */
- struct recovery_action *action;
-};
-
/*Keep bank so that we can get staus even if mib is NULL */
struct mca_binfo {
int bank;
@@ -163,8 +133,14 @@ struct mca_binfo {
struct cpu_user_regs *regs;
};
-extern void (*mca_prehandler)( struct cpu_user_regs *regs,
- struct mca_handle_result *result);
+enum mce_result
+{
+ MCER_NOERROR,
+ MCER_RECOVERED,
+ /* Not recoverd, but can continue */
+ MCER_CONTINUE,
+ MCER_RESET,
+};
struct mca_error_handler
{
@@ -175,7 +151,7 @@ struct mca_error_handler
*/
int (*owned_error)(uint64_t status);
void (*recovery_handler)(struct mca_binfo *binfo,
- struct mca_handle_result *result);
+ enum mce_result *result);
};
/* Global variables */
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [PATCH 7] MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension
2011-05-08 11:23 [PATCH 7] MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension Liu, Jinsong
@ 2011-05-13 14:34 ` Liu, Jinsong
2011-05-14 9:45 ` Keir Fraser
0 siblings, 1 reply; 5+ messages in thread
From: Liu, Jinsong @ 2011-05-13 14:34 UTC (permalink / raw)
To: Jan Beulich, Keir Fraser, xen-devel; +Cc: Jiang, Yunhong, Li, Xin
Jan and Keir,
Any comments?
Thanks
Jinsong
Liu, Jinsong wrote:
> MCA: simplify mce actoin, and some update of mem err handler for
> future SRAR extension
>
> This patch simplify mce_action(). Basically the 2 set of mce status
> flags MCA_... and MCER_... are highly functional similar. This patch
> remove the redundant middle-level flag MCA_..., and its related data
> structure and function, so that mce_action() logic is more clean and
> simpler.
> This patch also update mem err handler. intel_memerr_dhandler() will
> be commonly used by SRAO and SRAR, so for the extension of future
> SRAR case, this patch remove the default fail flag from
> intel_memerr_dhandler() outside to SRAO/SRAR level, since only
> SRAO/SRAR handler itself has the knowledge of how to handle the
> failure when mem err handler fail.
>
> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
>
> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/mce_intel.c
> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 13:39:40 2011
> +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 18:15:13
> 2011 +0800 @@ -172,23 +172,14 @@ static unsigned int __read_mostly
> mce_dh static unsigned int __read_mostly mce_dhandler_num;
> static unsigned int __read_mostly mce_uhandler_num;
>
> -enum mce_result
> -{
> - MCER_NOERROR,
> - MCER_RECOVERED,
> - /* Not recoverd, but can continue */
> - MCER_CONTINUE,
> - MCER_RESET,
> -};
> -
> /* Maybe called in MCE context, no lock, no printk */
> static enum mce_result mce_action(struct cpu_user_regs *regs,
> mctelem_cookie_t mctc)
> {
> struct mc_info *local_mi;
> - enum mce_result ret = MCER_NOERROR;
> + enum mce_result bank_result = MCER_NOERROR;
> + enum mce_result worst_result = MCER_NOERROR;
> struct mcinfo_common *mic = NULL;
> - struct mca_handle_result mca_res;
> struct mca_binfo binfo;
> const struct mca_error_handler *handlers = mce_dhandlers;
> unsigned int i, handler_num = mce_dhandler_num;
> @@ -217,7 +208,7 @@ static enum mce_result mce_action(struct
> /* Processing bank information */
> x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK);
>
> - for ( ; ret != MCER_RESET && mic && mic->size;
> + for ( ; bank_result != MCER_RESET && mic && mic->size;
> mic = x86_mcinfo_next(mic) )
> {
> if (mic->type != MC_TYPE_BANK) {
> @@ -225,34 +216,20 @@ static enum mce_result mce_action(struct
> }
> binfo.mib = (struct mcinfo_bank*)mic;
> binfo.bank = binfo.mib->mc_bank;
> - memset(&mca_res, 0x0f, sizeof(mca_res));
> + bank_result = MCER_NOERROR;
> for ( i = 0; i < handler_num; i++ ) {
> if (handlers[i].owned_error(binfo.mib->mc_status))
> {
> - handlers[i].recovery_handler(&binfo, &mca_res);
> -
> - if (mca_res.result & MCA_OWNER)
> - binfo.mib->mc_domid = mca_res.owner;
> -
> - if (mca_res.result == MCA_NEED_RESET)
> - ret = MCER_RESET;
> - else if (mca_res.result == MCA_RECOVERED)
> - {
> - if (ret < MCER_RECOVERED)
> - ret = MCER_RECOVERED;
> - }
> - else if (mca_res.result == MCA_NO_ACTION)
> - {
> - if (ret < MCER_CONTINUE)
> - ret = MCER_CONTINUE;
> - }
> + handlers[i].recovery_handler(&binfo, &bank_result);
> + if (worst_result < bank_result)
> + worst_result = bank_result;
> break;
> }
> }
> ASSERT(i != handler_num);
> }
>
> - return ret;
> + return worst_result;
> }
>
> /*
> @@ -608,7 +585,7 @@ struct mcinfo_recovery *mci_add_pageoff_
>
> static void intel_memerr_dhandler(
> struct mca_binfo *binfo,
> - struct mca_handle_result *result)
> + enum mce_result *result)
> {
> struct mcinfo_bank *bank = binfo->mib;
> struct mcinfo_global *global = binfo->mig;
> @@ -618,7 +595,6 @@ static void intel_memerr_dhandler(
> uint64_t mc_status, mc_misc;
>
> mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n");
> - result->result = MCA_NEED_RESET;
>
> mc_status = bank->mc_status;
> mc_misc = bank->mc_misc;
> @@ -626,7 +602,6 @@ static void intel_memerr_dhandler(
> !(mc_status & MCi_STATUS_MISCV) ||
> ((mc_misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) )
> {
> - result->result |= MCA_NO_ACTION;
> dprintk(XENLOG_WARNING,
> "No physical address provided for memory error\n");
> return;
> @@ -644,23 +619,19 @@ static void intel_memerr_dhandler(
>
> /* This is free page */
> if (status & PG_OFFLINE_OFFLINED)
> - result->result = MCA_RECOVERED;
> + *result = MCER_RECOVERED;
> else if (status & PG_OFFLINE_PENDING) {
> /* This page has owner */
> if (status & PG_OFFLINE_OWNED) {
> - result->result |= MCA_OWNER;
> - result->owner = status >> PG_OFFLINE_OWNER_SHIFT;
> + bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT;
> mce_printk(MCE_QUIET, "MCE: This error page is ownded"
> - " by DOM %d\n", result->owner);
> - /* Fill vMCE# injection and vMCE# MSR virtualization "
> - * "related data */
> - bank->mc_domid = result->owner;
> + " by DOM %d\n", bank->mc_domid);
> /* XXX: Cannot handle shared pages yet
> * (this should identify all domains and gfn mapping to
> * the mfn in question) */
> - BUG_ON( result->owner == DOMID_COW );
> - if ( result->owner != DOMID_XEN ) {
> - d = get_domain_by_id(result->owner);
> + BUG_ON( bank->mc_domid == DOMID_COW );
> + if ( bank->mc_domid != DOMID_XEN ) {
> + d = get_domain_by_id(bank->mc_domid);
> ASSERT(d);
> gfn = get_gpfn_from_mfn((bank->mc_addr) >>
> PAGE_SHIFT);
>
> @@ -683,7 +654,7 @@ static void intel_memerr_dhandler(
> global->mc_gstatus) == -1 )
> {
> mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d
> " - "failed\n", result->owner);
> + "failed\n", bank->mc_domid);
> goto vmce_failed;
> }
>
> @@ -699,7 +670,7 @@ static void intel_memerr_dhandler(
> * For xen, it has contained the error and finished
> * its own recovery job.
> */
> - result->result = MCA_RECOVERED;
> + *result = MCER_RECOVERED;
> put_domain(d);
>
> return;
> @@ -718,11 +689,13 @@ static int intel_srao_check(uint64_t sta
>
> static void intel_srao_dhandler(
> struct mca_binfo *binfo,
> - struct mca_handle_result *result)
> + enum mce_result *result)
> {
> uint64_t status = binfo->mib->mc_status;
>
> /* For unknown srao error code, no action required */
> + *result = MCER_CONTINUE;
> +
> if ( status & MCi_STATUS_VAL )
> {
> switch ( status & INTEL_MCCOD_MASK )
> @@ -744,7 +717,7 @@ static int intel_default_check(uint64_t
>
> static void intel_default_mce_dhandler(
> struct mca_binfo *binfo,
> - struct mca_handle_result *result)
> + enum mce_result *result)
> {
> uint64_t status = binfo->mib->mc_status;
> enum intel_mce_type type;
> @@ -752,9 +725,9 @@ static void intel_default_mce_dhandler(
> type = intel_check_mce_type(status);
>
> if (type == intel_mce_fatal || type == intel_mce_ucr_srar)
> - result->result = MCA_NEED_RESET;
> - else if (type == intel_mce_ucr_srao)
> - result->result = MCA_NO_ACTION;
> + *result = MCER_RESET;
> + else
> + *result = MCER_CONTINUE;
> }
>
> static const struct mca_error_handler intel_mce_dhandlers[] = {
> @@ -764,7 +737,7 @@ static const struct mca_error_handler in
>
> static void intel_default_mce_uhandler(
> struct mca_binfo *binfo,
> - struct mca_handle_result *result)
> + enum mce_result *result)
> {
> uint64_t status = binfo->mib->mc_status;
> enum intel_mce_type type;
> @@ -776,10 +749,10 @@ static void intel_default_mce_uhandler(
> /* Panic if no handler for SRAR error */
> case intel_mce_ucr_srar:
> case intel_mce_fatal:
> - result->result = MCA_NEED_RESET;
> + *result = MCER_RESET;
> break;
> default:
> - result->result = MCA_NO_ACTION;
> + *result = MCER_CONTINUE;
> break;
> }
> }
> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/x86_mca.h
> --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 13:39:40 2011 +0800
> +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 18:15:13 2011 +0800
> @@ -124,36 +124,6 @@ void mcabanks_free(struct mca_banks *ban
> void mcabanks_free(struct mca_banks *banks);
> extern struct mca_banks *mca_allbanks;
>
> -/* Below interfaces are defined for MCA internal processing:
> - * a. pre_handler will be called early in MCA ISR context, mainly
> for early
> - * need_reset detection for avoiding log missing. Also, it is
> used to judge
> - * impacted DOMAIN if possible.
> - * b. mca_error_handler is actually a (error_action_index,
> - * recovery_hanlder pointer) pair. The defined recovery_handler
> - * performs the actual recovery operations such as page_offline,
> cpu_offline
> - * in softIRQ context when the per_bank MCA error matching the
> corresponding
> - * mca_code index. If pre_handler can't judge the impacted domain,
> - * recovery_handler must figure it out.
> -*/
> -
> -/* MCA error has been recovered successfully by the recovery action*/
> -#define MCA_RECOVERED (0x1 << 0)
> -/* MCA error impact the specified DOMAIN in owner field below */
> -#define MCA_OWNER (0x1 << 1)
> -/* MCA error can't be recovered and need reset */
> -#define MCA_NEED_RESET (0x1 << 2)
> -/* MCA error did not have any action yet */
> -#define MCA_NO_ACTION (0x1 << 3)
> -
> -struct mca_handle_result
> -{
> - uint32_t result;
> - /* Used one result & MCA_OWNER */
> - domid_t owner;
> - /* Used by mca_error_handler, result & MCA_RECOVRED */
> - struct recovery_action *action;
> -};
> -
> /*Keep bank so that we can get staus even if mib is NULL */
> struct mca_binfo {
> int bank;
> @@ -163,8 +133,14 @@ struct mca_binfo {
> struct cpu_user_regs *regs;
> };
>
> -extern void (*mca_prehandler)( struct cpu_user_regs *regs,
> - struct mca_handle_result *result);
> +enum mce_result
> +{
> + MCER_NOERROR,
> + MCER_RECOVERED,
> + /* Not recoverd, but can continue */
> + MCER_CONTINUE,
> + MCER_RESET,
> +};
>
> struct mca_error_handler
> {
> @@ -175,7 +151,7 @@ struct mca_error_handler
> */
> int (*owned_error)(uint64_t status);
> void (*recovery_handler)(struct mca_binfo *binfo,
> - struct mca_handle_result *result);
> + enum mce_result *result);
> };
>
> /* Global variables */
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 7] MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension
2011-05-13 14:34 ` Liu, Jinsong
@ 2011-05-14 9:45 ` Keir Fraser
2011-05-14 10:01 ` Keir Fraser
0 siblings, 1 reply; 5+ messages in thread
From: Keir Fraser @ 2011-05-14 9:45 UTC (permalink / raw)
To: Liu, Jinsong, Jan Beulich, xen-devel; +Cc: Jiang, Yunhong, Li, Xin
I don't seem to have this one in my queue. You should resend.
-- Keir
On 13/05/2011 15:34, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
> Jan and Keir,
>
> Any comments?
>
> Thanks
> Jinsong
>
>
> Liu, Jinsong wrote:
>> MCA: simplify mce actoin, and some update of mem err handler for
>> future SRAR extension
>>
>> This patch simplify mce_action(). Basically the 2 set of mce status
>> flags MCA_... and MCER_... are highly functional similar. This patch
>> remove the redundant middle-level flag MCA_..., and its related data
>> structure and function, so that mce_action() logic is more clean and
>> simpler.
>> This patch also update mem err handler. intel_memerr_dhandler() will
>> be commonly used by SRAO and SRAR, so for the extension of future
>> SRAR case, this patch remove the default fail flag from
>> intel_memerr_dhandler() outside to SRAO/SRAR level, since only
>> SRAO/SRAR handler itself has the knowledge of how to handle the
>> failure when mem err handler fail.
>>
>> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
>>
>> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/mce_intel.c
>> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 13:39:40 2011
>> +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 18:15:13
>> 2011 +0800 @@ -172,23 +172,14 @@ static unsigned int __read_mostly
>> mce_dh static unsigned int __read_mostly mce_dhandler_num;
>> static unsigned int __read_mostly mce_uhandler_num;
>>
>> -enum mce_result
>> -{
>> - MCER_NOERROR,
>> - MCER_RECOVERED,
>> - /* Not recoverd, but can continue */
>> - MCER_CONTINUE,
>> - MCER_RESET,
>> -};
>> -
>> /* Maybe called in MCE context, no lock, no printk */
>> static enum mce_result mce_action(struct cpu_user_regs *regs,
>> mctelem_cookie_t mctc)
>> {
>> struct mc_info *local_mi;
>> - enum mce_result ret = MCER_NOERROR;
>> + enum mce_result bank_result = MCER_NOERROR;
>> + enum mce_result worst_result = MCER_NOERROR;
>> struct mcinfo_common *mic = NULL;
>> - struct mca_handle_result mca_res;
>> struct mca_binfo binfo;
>> const struct mca_error_handler *handlers = mce_dhandlers;
>> unsigned int i, handler_num = mce_dhandler_num;
>> @@ -217,7 +208,7 @@ static enum mce_result mce_action(struct
>> /* Processing bank information */
>> x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK);
>>
>> - for ( ; ret != MCER_RESET && mic && mic->size;
>> + for ( ; bank_result != MCER_RESET && mic && mic->size;
>> mic = x86_mcinfo_next(mic) )
>> {
>> if (mic->type != MC_TYPE_BANK) {
>> @@ -225,34 +216,20 @@ static enum mce_result mce_action(struct
>> }
>> binfo.mib = (struct mcinfo_bank*)mic;
>> binfo.bank = binfo.mib->mc_bank;
>> - memset(&mca_res, 0x0f, sizeof(mca_res));
>> + bank_result = MCER_NOERROR;
>> for ( i = 0; i < handler_num; i++ ) {
>> if (handlers[i].owned_error(binfo.mib->mc_status))
>> {
>> - handlers[i].recovery_handler(&binfo, &mca_res);
>> -
>> - if (mca_res.result & MCA_OWNER)
>> - binfo.mib->mc_domid = mca_res.owner;
>> -
>> - if (mca_res.result == MCA_NEED_RESET)
>> - ret = MCER_RESET;
>> - else if (mca_res.result == MCA_RECOVERED)
>> - {
>> - if (ret < MCER_RECOVERED)
>> - ret = MCER_RECOVERED;
>> - }
>> - else if (mca_res.result == MCA_NO_ACTION)
>> - {
>> - if (ret < MCER_CONTINUE)
>> - ret = MCER_CONTINUE;
>> - }
>> + handlers[i].recovery_handler(&binfo, &bank_result);
>> + if (worst_result < bank_result)
>> + worst_result = bank_result;
>> break;
>> }
>> }
>> ASSERT(i != handler_num);
>> }
>>
>> - return ret;
>> + return worst_result;
>> }
>>
>> /*
>> @@ -608,7 +585,7 @@ struct mcinfo_recovery *mci_add_pageoff_
>>
>> static void intel_memerr_dhandler(
>> struct mca_binfo *binfo,
>> - struct mca_handle_result *result)
>> + enum mce_result *result)
>> {
>> struct mcinfo_bank *bank = binfo->mib;
>> struct mcinfo_global *global = binfo->mig;
>> @@ -618,7 +595,6 @@ static void intel_memerr_dhandler(
>> uint64_t mc_status, mc_misc;
>>
>> mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n");
>> - result->result = MCA_NEED_RESET;
>>
>> mc_status = bank->mc_status;
>> mc_misc = bank->mc_misc;
>> @@ -626,7 +602,6 @@ static void intel_memerr_dhandler(
>> !(mc_status & MCi_STATUS_MISCV) ||
>> ((mc_misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) )
>> {
>> - result->result |= MCA_NO_ACTION;
>> dprintk(XENLOG_WARNING,
>> "No physical address provided for memory error\n");
>> return;
>> @@ -644,23 +619,19 @@ static void intel_memerr_dhandler(
>>
>> /* This is free page */
>> if (status & PG_OFFLINE_OFFLINED)
>> - result->result = MCA_RECOVERED;
>> + *result = MCER_RECOVERED;
>> else if (status & PG_OFFLINE_PENDING) {
>> /* This page has owner */
>> if (status & PG_OFFLINE_OWNED) {
>> - result->result |= MCA_OWNER;
>> - result->owner = status >> PG_OFFLINE_OWNER_SHIFT;
>> + bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT;
>> mce_printk(MCE_QUIET, "MCE: This error page is ownded"
>> - " by DOM %d\n", result->owner);
>> - /* Fill vMCE# injection and vMCE# MSR virtualization "
>> - * "related data */
>> - bank->mc_domid = result->owner;
>> + " by DOM %d\n", bank->mc_domid);
>> /* XXX: Cannot handle shared pages yet
>> * (this should identify all domains and gfn mapping to
>> * the mfn in question) */
>> - BUG_ON( result->owner == DOMID_COW );
>> - if ( result->owner != DOMID_XEN ) {
>> - d = get_domain_by_id(result->owner);
>> + BUG_ON( bank->mc_domid == DOMID_COW );
>> + if ( bank->mc_domid != DOMID_XEN ) {
>> + d = get_domain_by_id(bank->mc_domid);
>> ASSERT(d);
>> gfn = get_gpfn_from_mfn((bank->mc_addr) >>
>> PAGE_SHIFT);
>>
>> @@ -683,7 +654,7 @@ static void intel_memerr_dhandler(
>> global->mc_gstatus) == -1 )
>> {
>> mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d
>> " - "failed\n", result->owner);
>> + "failed\n", bank->mc_domid);
>> goto vmce_failed;
>> }
>>
>> @@ -699,7 +670,7 @@ static void intel_memerr_dhandler(
>> * For xen, it has contained the error and finished
>> * its own recovery job.
>> */
>> - result->result = MCA_RECOVERED;
>> + *result = MCER_RECOVERED;
>> put_domain(d);
>>
>> return;
>> @@ -718,11 +689,13 @@ static int intel_srao_check(uint64_t sta
>>
>> static void intel_srao_dhandler(
>> struct mca_binfo *binfo,
>> - struct mca_handle_result *result)
>> + enum mce_result *result)
>> {
>> uint64_t status = binfo->mib->mc_status;
>>
>> /* For unknown srao error code, no action required */
>> + *result = MCER_CONTINUE;
>> +
>> if ( status & MCi_STATUS_VAL )
>> {
>> switch ( status & INTEL_MCCOD_MASK )
>> @@ -744,7 +717,7 @@ static int intel_default_check(uint64_t
>>
>> static void intel_default_mce_dhandler(
>> struct mca_binfo *binfo,
>> - struct mca_handle_result *result)
>> + enum mce_result *result)
>> {
>> uint64_t status = binfo->mib->mc_status;
>> enum intel_mce_type type;
>> @@ -752,9 +725,9 @@ static void intel_default_mce_dhandler(
>> type = intel_check_mce_type(status);
>>
>> if (type == intel_mce_fatal || type == intel_mce_ucr_srar)
>> - result->result = MCA_NEED_RESET;
>> - else if (type == intel_mce_ucr_srao)
>> - result->result = MCA_NO_ACTION;
>> + *result = MCER_RESET;
>> + else
>> + *result = MCER_CONTINUE;
>> }
>>
>> static const struct mca_error_handler intel_mce_dhandlers[] = {
>> @@ -764,7 +737,7 @@ static const struct mca_error_handler in
>>
>> static void intel_default_mce_uhandler(
>> struct mca_binfo *binfo,
>> - struct mca_handle_result *result)
>> + enum mce_result *result)
>> {
>> uint64_t status = binfo->mib->mc_status;
>> enum intel_mce_type type;
>> @@ -776,10 +749,10 @@ static void intel_default_mce_uhandler(
>> /* Panic if no handler for SRAR error */
>> case intel_mce_ucr_srar:
>> case intel_mce_fatal:
>> - result->result = MCA_NEED_RESET;
>> + *result = MCER_RESET;
>> break;
>> default:
>> - result->result = MCA_NO_ACTION;
>> + *result = MCER_CONTINUE;
>> break;
>> }
>> }
>> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/x86_mca.h
>> --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 13:39:40 2011 +0800
>> +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 18:15:13 2011 +0800
>> @@ -124,36 +124,6 @@ void mcabanks_free(struct mca_banks *ban
>> void mcabanks_free(struct mca_banks *banks);
>> extern struct mca_banks *mca_allbanks;
>>
>> -/* Below interfaces are defined for MCA internal processing:
>> - * a. pre_handler will be called early in MCA ISR context, mainly
>> for early
>> - * need_reset detection for avoiding log missing. Also, it is
>> used to judge
>> - * impacted DOMAIN if possible.
>> - * b. mca_error_handler is actually a (error_action_index,
>> - * recovery_hanlder pointer) pair. The defined recovery_handler
>> - * performs the actual recovery operations such as page_offline,
>> cpu_offline
>> - * in softIRQ context when the per_bank MCA error matching the
>> corresponding
>> - * mca_code index. If pre_handler can't judge the impacted domain,
>> - * recovery_handler must figure it out.
>> -*/
>> -
>> -/* MCA error has been recovered successfully by the recovery action*/
>> -#define MCA_RECOVERED (0x1 << 0)
>> -/* MCA error impact the specified DOMAIN in owner field below */
>> -#define MCA_OWNER (0x1 << 1)
>> -/* MCA error can't be recovered and need reset */
>> -#define MCA_NEED_RESET (0x1 << 2)
>> -/* MCA error did not have any action yet */
>> -#define MCA_NO_ACTION (0x1 << 3)
>> -
>> -struct mca_handle_result
>> -{
>> - uint32_t result;
>> - /* Used one result & MCA_OWNER */
>> - domid_t owner;
>> - /* Used by mca_error_handler, result & MCA_RECOVRED */
>> - struct recovery_action *action;
>> -};
>> -
>> /*Keep bank so that we can get staus even if mib is NULL */
>> struct mca_binfo {
>> int bank;
>> @@ -163,8 +133,14 @@ struct mca_binfo {
>> struct cpu_user_regs *regs;
>> };
>>
>> -extern void (*mca_prehandler)( struct cpu_user_regs *regs,
>> - struct mca_handle_result *result);
>> +enum mce_result
>> +{
>> + MCER_NOERROR,
>> + MCER_RECOVERED,
>> + /* Not recoverd, but can continue */
>> + MCER_CONTINUE,
>> + MCER_RESET,
>> +};
>>
>> struct mca_error_handler
>> {
>> @@ -175,7 +151,7 @@ struct mca_error_handler
>> */
>> int (*owned_error)(uint64_t status);
>> void (*recovery_handler)(struct mca_binfo *binfo,
>> - struct mca_handle_result *result);
>> + enum mce_result *result);
>> };
>>
>> /* Global variables */
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 7] MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension
2011-05-14 9:45 ` Keir Fraser
@ 2011-05-14 10:01 ` Keir Fraser
2011-05-15 5:08 ` Liu, Jinsong
0 siblings, 1 reply; 5+ messages in thread
From: Keir Fraser @ 2011-05-14 10:01 UTC (permalink / raw)
To: Liu, Jinsong, Jan Beulich, xen-devel; +Cc: Jiang, Yunhong, Li, Xin
Ah no, I found it! I'll take a look at it on Monday.
-- Keir
On 14/05/2011 10:45, "Keir Fraser" <keir@xen.org> wrote:
> I don't seem to have this one in my queue. You should resend.
>
> -- Keir
>
> On 13/05/2011 15:34, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
>
>> Jan and Keir,
>>
>> Any comments?
>>
>> Thanks
>> Jinsong
>>
>>
>> Liu, Jinsong wrote:
>>> MCA: simplify mce actoin, and some update of mem err handler for
>>> future SRAR extension
>>>
>>> This patch simplify mce_action(). Basically the 2 set of mce status
>>> flags MCA_... and MCER_... are highly functional similar. This patch
>>> remove the redundant middle-level flag MCA_..., and its related data
>>> structure and function, so that mce_action() logic is more clean and
>>> simpler.
>>> This patch also update mem err handler. intel_memerr_dhandler() will
>>> be commonly used by SRAO and SRAR, so for the extension of future
>>> SRAR case, this patch remove the default fail flag from
>>> intel_memerr_dhandler() outside to SRAO/SRAR level, since only
>>> SRAO/SRAR handler itself has the knowledge of how to handle the
>>> failure when mem err handler fail.
>>>
>>> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
>>>
>>> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/mce_intel.c
>>> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 13:39:40 2011
>>> +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 18:15:13
>>> 2011 +0800 @@ -172,23 +172,14 @@ static unsigned int __read_mostly
>>> mce_dh static unsigned int __read_mostly mce_dhandler_num;
>>> static unsigned int __read_mostly mce_uhandler_num;
>>>
>>> -enum mce_result
>>> -{
>>> - MCER_NOERROR,
>>> - MCER_RECOVERED,
>>> - /* Not recoverd, but can continue */
>>> - MCER_CONTINUE,
>>> - MCER_RESET,
>>> -};
>>> -
>>> /* Maybe called in MCE context, no lock, no printk */
>>> static enum mce_result mce_action(struct cpu_user_regs *regs,
>>> mctelem_cookie_t mctc)
>>> {
>>> struct mc_info *local_mi;
>>> - enum mce_result ret = MCER_NOERROR;
>>> + enum mce_result bank_result = MCER_NOERROR;
>>> + enum mce_result worst_result = MCER_NOERROR;
>>> struct mcinfo_common *mic = NULL;
>>> - struct mca_handle_result mca_res;
>>> struct mca_binfo binfo;
>>> const struct mca_error_handler *handlers = mce_dhandlers;
>>> unsigned int i, handler_num = mce_dhandler_num;
>>> @@ -217,7 +208,7 @@ static enum mce_result mce_action(struct
>>> /* Processing bank information */
>>> x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK);
>>>
>>> - for ( ; ret != MCER_RESET && mic && mic->size;
>>> + for ( ; bank_result != MCER_RESET && mic && mic->size;
>>> mic = x86_mcinfo_next(mic) )
>>> {
>>> if (mic->type != MC_TYPE_BANK) {
>>> @@ -225,34 +216,20 @@ static enum mce_result mce_action(struct
>>> }
>>> binfo.mib = (struct mcinfo_bank*)mic;
>>> binfo.bank = binfo.mib->mc_bank;
>>> - memset(&mca_res, 0x0f, sizeof(mca_res));
>>> + bank_result = MCER_NOERROR;
>>> for ( i = 0; i < handler_num; i++ ) {
>>> if (handlers[i].owned_error(binfo.mib->mc_status))
>>> {
>>> - handlers[i].recovery_handler(&binfo, &mca_res);
>>> -
>>> - if (mca_res.result & MCA_OWNER)
>>> - binfo.mib->mc_domid = mca_res.owner;
>>> -
>>> - if (mca_res.result == MCA_NEED_RESET)
>>> - ret = MCER_RESET;
>>> - else if (mca_res.result == MCA_RECOVERED)
>>> - {
>>> - if (ret < MCER_RECOVERED)
>>> - ret = MCER_RECOVERED;
>>> - }
>>> - else if (mca_res.result == MCA_NO_ACTION)
>>> - {
>>> - if (ret < MCER_CONTINUE)
>>> - ret = MCER_CONTINUE;
>>> - }
>>> + handlers[i].recovery_handler(&binfo, &bank_result);
>>> + if (worst_result < bank_result)
>>> + worst_result = bank_result;
>>> break;
>>> }
>>> }
>>> ASSERT(i != handler_num);
>>> }
>>>
>>> - return ret;
>>> + return worst_result;
>>> }
>>>
>>> /*
>>> @@ -608,7 +585,7 @@ struct mcinfo_recovery *mci_add_pageoff_
>>>
>>> static void intel_memerr_dhandler(
>>> struct mca_binfo *binfo,
>>> - struct mca_handle_result *result)
>>> + enum mce_result *result)
>>> {
>>> struct mcinfo_bank *bank = binfo->mib;
>>> struct mcinfo_global *global = binfo->mig;
>>> @@ -618,7 +595,6 @@ static void intel_memerr_dhandler(
>>> uint64_t mc_status, mc_misc;
>>>
>>> mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n");
>>> - result->result = MCA_NEED_RESET;
>>>
>>> mc_status = bank->mc_status;
>>> mc_misc = bank->mc_misc;
>>> @@ -626,7 +602,6 @@ static void intel_memerr_dhandler(
>>> !(mc_status & MCi_STATUS_MISCV) ||
>>> ((mc_misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) )
>>> {
>>> - result->result |= MCA_NO_ACTION;
>>> dprintk(XENLOG_WARNING,
>>> "No physical address provided for memory error\n");
>>> return;
>>> @@ -644,23 +619,19 @@ static void intel_memerr_dhandler(
>>>
>>> /* This is free page */
>>> if (status & PG_OFFLINE_OFFLINED)
>>> - result->result = MCA_RECOVERED;
>>> + *result = MCER_RECOVERED;
>>> else if (status & PG_OFFLINE_PENDING) {
>>> /* This page has owner */
>>> if (status & PG_OFFLINE_OWNED) {
>>> - result->result |= MCA_OWNER;
>>> - result->owner = status >> PG_OFFLINE_OWNER_SHIFT;
>>> + bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT;
>>> mce_printk(MCE_QUIET, "MCE: This error page is ownded"
>>> - " by DOM %d\n", result->owner);
>>> - /* Fill vMCE# injection and vMCE# MSR virtualization "
>>> - * "related data */
>>> - bank->mc_domid = result->owner;
>>> + " by DOM %d\n", bank->mc_domid);
>>> /* XXX: Cannot handle shared pages yet
>>> * (this should identify all domains and gfn mapping to
>>> * the mfn in question) */
>>> - BUG_ON( result->owner == DOMID_COW );
>>> - if ( result->owner != DOMID_XEN ) {
>>> - d = get_domain_by_id(result->owner);
>>> + BUG_ON( bank->mc_domid == DOMID_COW );
>>> + if ( bank->mc_domid != DOMID_XEN ) {
>>> + d = get_domain_by_id(bank->mc_domid);
>>> ASSERT(d);
>>> gfn = get_gpfn_from_mfn((bank->mc_addr) >>
>>> PAGE_SHIFT);
>>>
>>> @@ -683,7 +654,7 @@ static void intel_memerr_dhandler(
>>> global->mc_gstatus) == -1 )
>>> {
>>> mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d
>>> " - "failed\n", result->owner);
>>> + "failed\n", bank->mc_domid);
>>> goto vmce_failed;
>>> }
>>>
>>> @@ -699,7 +670,7 @@ static void intel_memerr_dhandler(
>>> * For xen, it has contained the error and finished
>>> * its own recovery job.
>>> */
>>> - result->result = MCA_RECOVERED;
>>> + *result = MCER_RECOVERED;
>>> put_domain(d);
>>>
>>> return;
>>> @@ -718,11 +689,13 @@ static int intel_srao_check(uint64_t sta
>>>
>>> static void intel_srao_dhandler(
>>> struct mca_binfo *binfo,
>>> - struct mca_handle_result *result)
>>> + enum mce_result *result)
>>> {
>>> uint64_t status = binfo->mib->mc_status;
>>>
>>> /* For unknown srao error code, no action required */
>>> + *result = MCER_CONTINUE;
>>> +
>>> if ( status & MCi_STATUS_VAL )
>>> {
>>> switch ( status & INTEL_MCCOD_MASK )
>>> @@ -744,7 +717,7 @@ static int intel_default_check(uint64_t
>>>
>>> static void intel_default_mce_dhandler(
>>> struct mca_binfo *binfo,
>>> - struct mca_handle_result *result)
>>> + enum mce_result *result)
>>> {
>>> uint64_t status = binfo->mib->mc_status;
>>> enum intel_mce_type type;
>>> @@ -752,9 +725,9 @@ static void intel_default_mce_dhandler(
>>> type = intel_check_mce_type(status);
>>>
>>> if (type == intel_mce_fatal || type == intel_mce_ucr_srar)
>>> - result->result = MCA_NEED_RESET;
>>> - else if (type == intel_mce_ucr_srao)
>>> - result->result = MCA_NO_ACTION;
>>> + *result = MCER_RESET;
>>> + else
>>> + *result = MCER_CONTINUE;
>>> }
>>>
>>> static const struct mca_error_handler intel_mce_dhandlers[] = {
>>> @@ -764,7 +737,7 @@ static const struct mca_error_handler in
>>>
>>> static void intel_default_mce_uhandler(
>>> struct mca_binfo *binfo,
>>> - struct mca_handle_result *result)
>>> + enum mce_result *result)
>>> {
>>> uint64_t status = binfo->mib->mc_status;
>>> enum intel_mce_type type;
>>> @@ -776,10 +749,10 @@ static void intel_default_mce_uhandler(
>>> /* Panic if no handler for SRAR error */
>>> case intel_mce_ucr_srar:
>>> case intel_mce_fatal:
>>> - result->result = MCA_NEED_RESET;
>>> + *result = MCER_RESET;
>>> break;
>>> default:
>>> - result->result = MCA_NO_ACTION;
>>> + *result = MCER_CONTINUE;
>>> break;
>>> }
>>> }
>>> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/x86_mca.h
>>> --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 13:39:40 2011 +0800
>>> +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 18:15:13 2011 +0800
>>> @@ -124,36 +124,6 @@ void mcabanks_free(struct mca_banks *ban
>>> void mcabanks_free(struct mca_banks *banks);
>>> extern struct mca_banks *mca_allbanks;
>>>
>>> -/* Below interfaces are defined for MCA internal processing:
>>> - * a. pre_handler will be called early in MCA ISR context, mainly
>>> for early
>>> - * need_reset detection for avoiding log missing. Also, it is
>>> used to judge
>>> - * impacted DOMAIN if possible.
>>> - * b. mca_error_handler is actually a (error_action_index,
>>> - * recovery_hanlder pointer) pair. The defined recovery_handler
>>> - * performs the actual recovery operations such as page_offline,
>>> cpu_offline
>>> - * in softIRQ context when the per_bank MCA error matching the
>>> corresponding
>>> - * mca_code index. If pre_handler can't judge the impacted domain,
>>> - * recovery_handler must figure it out.
>>> -*/
>>> -
>>> -/* MCA error has been recovered successfully by the recovery action*/
>>> -#define MCA_RECOVERED (0x1 << 0)
>>> -/* MCA error impact the specified DOMAIN in owner field below */
>>> -#define MCA_OWNER (0x1 << 1)
>>> -/* MCA error can't be recovered and need reset */
>>> -#define MCA_NEED_RESET (0x1 << 2)
>>> -/* MCA error did not have any action yet */
>>> -#define MCA_NO_ACTION (0x1 << 3)
>>> -
>>> -struct mca_handle_result
>>> -{
>>> - uint32_t result;
>>> - /* Used one result & MCA_OWNER */
>>> - domid_t owner;
>>> - /* Used by mca_error_handler, result & MCA_RECOVRED */
>>> - struct recovery_action *action;
>>> -};
>>> -
>>> /*Keep bank so that we can get staus even if mib is NULL */
>>> struct mca_binfo {
>>> int bank;
>>> @@ -163,8 +133,14 @@ struct mca_binfo {
>>> struct cpu_user_regs *regs;
>>> };
>>>
>>> -extern void (*mca_prehandler)( struct cpu_user_regs *regs,
>>> - struct mca_handle_result *result);
>>> +enum mce_result
>>> +{
>>> + MCER_NOERROR,
>>> + MCER_RECOVERED,
>>> + /* Not recoverd, but can continue */
>>> + MCER_CONTINUE,
>>> + MCER_RESET,
>>> +};
>>>
>>> struct mca_error_handler
>>> {
>>> @@ -175,7 +151,7 @@ struct mca_error_handler
>>> */
>>> int (*owned_error)(uint64_t status);
>>> void (*recovery_handler)(struct mca_binfo *binfo,
>>> - struct mca_handle_result *result);
>>> + enum mce_result *result);
>>> };
>>>
>>> /* Global variables */
>>
>
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [PATCH 7] MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension
2011-05-14 10:01 ` Keir Fraser
@ 2011-05-15 5:08 ` Liu, Jinsong
0 siblings, 0 replies; 5+ messages in thread
From: Liu, Jinsong @ 2011-05-15 5:08 UTC (permalink / raw)
To: Keir Fraser, Jan Beulich, xen-devel; +Cc: Jiang, Yunhong, Li, Xin
OK, thanks :)
Jinsong
Keir Fraser wrote:
> Ah no, I found it! I'll take a look at it on Monday.
>
> -- Keir
>
> On 14/05/2011 10:45, "Keir Fraser" <keir@xen.org> wrote:
>
>> I don't seem to have this one in my queue. You should resend.
>>
>> -- Keir
>>
>> On 13/05/2011 15:34, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
>>
>>> Jan and Keir,
>>>
>>> Any comments?
>>>
>>> Thanks
>>> Jinsong
>>>
>>>
>>> Liu, Jinsong wrote:
>>>> MCA: simplify mce actoin, and some update of mem err handler for
>>>> future SRAR extension
>>>>
>>>> This patch simplify mce_action(). Basically the 2 set of mce status
>>>> flags MCA_... and MCER_... are highly functional similar. This
>>>> patch remove the redundant middle-level flag MCA_..., and its
>>>> related data structure and function, so that mce_action() logic is
>>>> more clean and simpler. This patch also update mem err handler.
>>>> intel_memerr_dhandler() will be commonly used by SRAO and SRAR, so
>>>> for the extension of future SRAR case, this patch remove the
>>>> default fail flag from intel_memerr_dhandler() outside to
>>>> SRAO/SRAR level, since only SRAO/SRAR handler itself has the
>>>> knowledge of how to handle the failure when mem err handler fail.
>>>>
>>>> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
>>>>
>>>> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/mce_intel.c
>>>> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 13:39:40 2011
>>>> +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 18:15:13
>>>> 2011 +0800 @@ -172,23 +172,14 @@ static unsigned int __read_mostly
>>>> mce_dh static unsigned int __read_mostly mce_dhandler_num;
>>>> static unsigned int __read_mostly mce_uhandler_num;
>>>>
>>>> -enum mce_result
>>>> -{
>>>> - MCER_NOERROR,
>>>> - MCER_RECOVERED,
>>>> - /* Not recoverd, but can continue */
>>>> - MCER_CONTINUE,
>>>> - MCER_RESET,
>>>> -};
>>>> -
>>>> /* Maybe called in MCE context, no lock, no printk */
>>>> static enum mce_result mce_action(struct cpu_user_regs *regs,
>>>> mctelem_cookie_t mctc)
>>>> {
>>>> struct mc_info *local_mi;
>>>> - enum mce_result ret = MCER_NOERROR;
>>>> + enum mce_result bank_result = MCER_NOERROR;
>>>> + enum mce_result worst_result = MCER_NOERROR;
>>>> struct mcinfo_common *mic = NULL;
>>>> - struct mca_handle_result mca_res;
>>>> struct mca_binfo binfo;
>>>> const struct mca_error_handler *handlers = mce_dhandlers;
>>>> unsigned int i, handler_num = mce_dhandler_num;
>>>> @@ -217,7 +208,7 @@ static enum mce_result mce_action(struct
>>>> /* Processing bank information */
>>>> x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK);
>>>>
>>>> - for ( ; ret != MCER_RESET && mic && mic->size;
>>>> + for ( ; bank_result != MCER_RESET && mic && mic->size;
>>>> mic = x86_mcinfo_next(mic) )
>>>> {
>>>> if (mic->type != MC_TYPE_BANK) {
>>>> @@ -225,34 +216,20 @@ static enum mce_result mce_action(struct
>>>> } binfo.mib = (struct mcinfo_bank*)mic;
>>>> binfo.bank = binfo.mib->mc_bank;
>>>> - memset(&mca_res, 0x0f, sizeof(mca_res));
>>>> + bank_result = MCER_NOERROR;
>>>> for ( i = 0; i < handler_num; i++ ) {
>>>> if (handlers[i].owned_error(binfo.mib->mc_status))
>>>> {
>>>> - handlers[i].recovery_handler(&binfo, &mca_res); -
>>>> - if (mca_res.result & MCA_OWNER)
>>>> - binfo.mib->mc_domid = mca_res.owner; -
>>>> - if (mca_res.result == MCA_NEED_RESET)
>>>> - ret = MCER_RESET;
>>>> - else if (mca_res.result == MCA_RECOVERED)
>>>> - {
>>>> - if (ret < MCER_RECOVERED)
>>>> - ret = MCER_RECOVERED;
>>>> - }
>>>> - else if (mca_res.result == MCA_NO_ACTION)
>>>> - {
>>>> - if (ret < MCER_CONTINUE)
>>>> - ret = MCER_CONTINUE;
>>>> - }
>>>> + handlers[i].recovery_handler(&binfo,
>>>> &bank_result); + if (worst_result < bank_result)
>>>> + worst_result = bank_result;
>>>> break;
>>>> }
>>>> }
>>>> ASSERT(i != handler_num);
>>>> }
>>>>
>>>> - return ret;
>>>> + return worst_result;
>>>> }
>>>>
>>>> /*
>>>> @@ -608,7 +585,7 @@ struct mcinfo_recovery *mci_add_pageoff_
>>>>
>>>> static void intel_memerr_dhandler(
>>>> struct mca_binfo *binfo,
>>>> - struct mca_handle_result *result)
>>>> + enum mce_result *result)
>>>> {
>>>> struct mcinfo_bank *bank = binfo->mib;
>>>> struct mcinfo_global *global = binfo->mig;
>>>> @@ -618,7 +595,6 @@ static void intel_memerr_dhandler(
>>>> uint64_t mc_status, mc_misc;
>>>>
>>>> mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n");
>>>> - result->result = MCA_NEED_RESET;
>>>>
>>>> mc_status = bank->mc_status;
>>>> mc_misc = bank->mc_misc;
>>>> @@ -626,7 +602,6 @@ static void intel_memerr_dhandler(
>>>> !(mc_status & MCi_STATUS_MISCV) ||
>>>> ((mc_misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) )
>>>> { - result->result |= MCA_NO_ACTION;
>>>> dprintk(XENLOG_WARNING,
>>>> "No physical address provided for memory error\n");
>>>> return; @@ -644,23 +619,19 @@ static void intel_memerr_dhandler(
>>>>
>>>> /* This is free page */
>>>> if (status & PG_OFFLINE_OFFLINED)
>>>> - result->result = MCA_RECOVERED;
>>>> + *result = MCER_RECOVERED;
>>>> else if (status & PG_OFFLINE_PENDING) {
>>>> /* This page has owner */
>>>> if (status & PG_OFFLINE_OWNED) {
>>>> - result->result |= MCA_OWNER;
>>>> - result->owner = status >> PG_OFFLINE_OWNER_SHIFT;
>>>> + bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT;
>>>> mce_printk(MCE_QUIET, "MCE: This error page is ownded"
>>>> - " by DOM %d\n", result->owner);
>>>> - /* Fill vMCE# injection and vMCE# MSR virtualization "
>>>> - * "related data */
>>>> - bank->mc_domid = result->owner;
>>>> + " by DOM %d\n", bank->mc_domid);
>>>> /* XXX: Cannot handle shared pages yet
>>>> * (this should identify all domains and gfn mapping
>>>> to
>>>> * the mfn in question) */
>>>> - BUG_ON( result->owner == DOMID_COW );
>>>> - if ( result->owner != DOMID_XEN ) {
>>>> - d = get_domain_by_id(result->owner);
>>>> + BUG_ON( bank->mc_domid == DOMID_COW );
>>>> + if ( bank->mc_domid != DOMID_XEN ) {
>>>> + d = get_domain_by_id(bank->mc_domid);
>>>> ASSERT(d); gfn =
>>>> get_gpfn_from_mfn((bank->mc_addr) >> PAGE_SHIFT);
>>>>
>>>> @@ -683,7 +654,7 @@ static void intel_memerr_dhandler(
>>>> global->mc_gstatus) == -1 )
>>>> {
>>>> mce_printk(MCE_QUIET, "Fill vMCE# data for
>>>> DOM%d " - "failed\n", result->owner);
>>>> + "failed\n", bank->mc_domid);
>>>> goto vmce_failed;
>>>> }
>>>>
>>>> @@ -699,7 +670,7 @@ static void intel_memerr_dhandler(
>>>> * For xen, it has contained the error and
>>>> finished
>>>> * its own recovery job.
>>>> */
>>>> - result->result = MCA_RECOVERED;
>>>> + *result = MCER_RECOVERED;
>>>> put_domain(d);
>>>>
>>>> return;
>>>> @@ -718,11 +689,13 @@ static int intel_srao_check(uint64_t sta
>>>>
>>>> static void intel_srao_dhandler(
>>>> struct mca_binfo *binfo,
>>>> - struct mca_handle_result *result)
>>>> + enum mce_result *result)
>>>> {
>>>> uint64_t status = binfo->mib->mc_status;
>>>>
>>>> /* For unknown srao error code, no action required */ +
>>>> *result = MCER_CONTINUE; +
>>>> if ( status & MCi_STATUS_VAL )
>>>> {
>>>> switch ( status & INTEL_MCCOD_MASK )
>>>> @@ -744,7 +717,7 @@ static int intel_default_check(uint64_t
>>>>
>>>> static void intel_default_mce_dhandler(
>>>> struct mca_binfo *binfo,
>>>> - struct mca_handle_result *result)
>>>> + enum mce_result *result)
>>>> {
>>>> uint64_t status = binfo->mib->mc_status;
>>>> enum intel_mce_type type;
>>>> @@ -752,9 +725,9 @@ static void intel_default_mce_dhandler(
>>>> type = intel_check_mce_type(status);
>>>>
>>>> if (type == intel_mce_fatal || type == intel_mce_ucr_srar)
>>>> - result->result = MCA_NEED_RESET;
>>>> - else if (type == intel_mce_ucr_srao)
>>>> - result->result = MCA_NO_ACTION;
>>>> + *result = MCER_RESET;
>>>> + else
>>>> + *result = MCER_CONTINUE;
>>>> }
>>>>
>>>> static const struct mca_error_handler intel_mce_dhandlers[] = {
>>>> @@ -764,7 +737,7 @@ static const struct mca_error_handler in
>>>>
>>>> static void intel_default_mce_uhandler(
>>>> struct mca_binfo *binfo,
>>>> - struct mca_handle_result *result)
>>>> + enum mce_result *result)
>>>> {
>>>> uint64_t status = binfo->mib->mc_status;
>>>> enum intel_mce_type type;
>>>> @@ -776,10 +749,10 @@ static void intel_default_mce_uhandler(
>>>> /* Panic if no handler for SRAR error */
>>>> case intel_mce_ucr_srar:
>>>> case intel_mce_fatal:
>>>> - result->result = MCA_NEED_RESET;
>>>> + *result = MCER_RESET;
>>>> break;
>>>> default:
>>>> - result->result = MCA_NO_ACTION;
>>>> + *result = MCER_CONTINUE;
>>>> break;
>>>> }
>>>> }
>>>> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/x86_mca.h
>>>> --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 13:39:40 2011
>>>> +0800 +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 18:15:13
>>>> 2011 +0800 @@ -124,36 +124,6 @@ void mcabanks_free(struct
>>>> mca_banks *ban void mcabanks_free(struct mca_banks *banks);
>>>> extern struct mca_banks *mca_allbanks;
>>>>
>>>> -/* Below interfaces are defined for MCA internal processing:
>>>> - * a. pre_handler will be called early in MCA ISR context, mainly
>>>> for early
>>>> - * need_reset detection for avoiding log missing. Also, it is
>>>> used to judge
>>>> - * impacted DOMAIN if possible.
>>>> - * b. mca_error_handler is actually a (error_action_index,
>>>> - * recovery_hanlder pointer) pair. The defined recovery_handler
>>>> - * performs the actual recovery operations such as
>>>> page_offline, cpu_offline
>>>> - * in softIRQ context when the per_bank MCA error matching the
>>>> corresponding
>>>> - * mca_code index. If pre_handler can't judge the impacted
>>>> domain,
>>>> - * recovery_handler must figure it out.
>>>> -*/
>>>> -
>>>> -/* MCA error has been recovered successfully by the recovery
>>>> action*/
>>>> -#define MCA_RECOVERED (0x1 << 0)
>>>> -/* MCA error impact the specified DOMAIN in owner field below */
>>>> -#define MCA_OWNER (0x1 << 1)
>>>> -/* MCA error can't be recovered and need reset */
>>>> -#define MCA_NEED_RESET (0x1 << 2)
>>>> -/* MCA error did not have any action yet */
>>>> -#define MCA_NO_ACTION (0x1 << 3)
>>>> -
>>>> -struct mca_handle_result
>>>> -{
>>>> - uint32_t result;
>>>> - /* Used one result & MCA_OWNER */
>>>> - domid_t owner;
>>>> - /* Used by mca_error_handler, result & MCA_RECOVRED */
>>>> - struct recovery_action *action;
>>>> -};
>>>> -
>>>> /*Keep bank so that we can get staus even if mib is NULL */
>>>> struct mca_binfo { int bank;
>>>> @@ -163,8 +133,14 @@ struct mca_binfo {
>>>> struct cpu_user_regs *regs;
>>>> };
>>>>
>>>> -extern void (*mca_prehandler)( struct cpu_user_regs *regs,
>>>> - struct mca_handle_result *result); +enum
>>>> mce_result +{
>>>> + MCER_NOERROR,
>>>> + MCER_RECOVERED,
>>>> + /* Not recoverd, but can continue */
>>>> + MCER_CONTINUE,
>>>> + MCER_RESET,
>>>> +};
>>>>
>>>> struct mca_error_handler
>>>> {
>>>> @@ -175,7 +151,7 @@ struct mca_error_handler
>>>> */
>>>> int (*owned_error)(uint64_t status);
>>>> void (*recovery_handler)(struct mca_binfo *binfo,
>>>> - struct mca_handle_result *result);
>>>> + enum mce_result *result);
>>>> };
>>>>
>>>> /* Global variables */
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2011-05-15 5:08 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-05-08 11:23 [PATCH 7] MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension Liu, Jinsong
2011-05-13 14:34 ` Liu, Jinsong
2011-05-14 9:45 ` Keir Fraser
2011-05-14 10:01 ` Keir Fraser
2011-05-15 5:08 ` Liu, Jinsong
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.