* [patch 1/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64
2007-06-15 16:56 [patch 0/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64 Robert Richter
@ 2007-06-15 16:57 ` Robert Richter
2007-06-15 18:54 ` David Rientjes
2007-06-19 12:39 ` Stephane Eranian
2007-06-15 16:58 ` [patch 2/8] " Robert Richter
` (6 subsequent siblings)
7 siblings, 2 replies; 19+ messages in thread
From: Robert Richter @ 2007-06-15 16:57 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Andi Kleen, linux-kernel
This patch implements Barcelona CPU detection.
Signed-off-by: Robert Richter <robert.richter@amd.com>
Index: linux-2.6.22-rc3/arch/i386/perfmon/perfmon.c
===================================================================
--- linux-2.6.22-rc3.orig/arch/i386/perfmon/perfmon.c
+++ linux-2.6.22-rc3/arch/i386/perfmon/perfmon.c
@@ -1059,6 +1059,7 @@ char *pfm_arch_get_pmu_module_name(void)
goto try_arch;
}
case 15:
+ case 16:
/* All Opteron processors */
if (cpu_data->x86_vendor == X86_VENDOR_AMD)
return "perfmon_k8";
Index: linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_k8.c
===================================================================
--- linux-2.6.22-rc3.orig/arch/x86_64/perfmon/perfmon_k8.c
+++ linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_k8.c
@@ -307,7 +307,12 @@ static int pfm_k8_probe_pmu(void)
return -1;
}
- if (current_cpu_data.x86 != 15) {
+ switch (current_cpu_data.x86) {
+ case 15:
+ case 16:
+ PFM_INFO("found family=%d", current_cpu_data.x86);
+ break;
+ default:
PFM_INFO("unsupported family=%d", current_cpu_data.x86);
return -1;
}
--
AMD Saxony, Dresden, Germany
Operating System Research Center
email: robert.richter@amd.com
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch 1/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64
2007-06-15 16:57 ` [patch 1/8] " Robert Richter
@ 2007-06-15 18:54 ` David Rientjes
2007-06-20 18:36 ` Robert Richter
2007-06-19 12:39 ` Stephane Eranian
1 sibling, 1 reply; 19+ messages in thread
From: David Rientjes @ 2007-06-15 18:54 UTC (permalink / raw)
To: Robert Richter; +Cc: Stephane Eranian, Andi Kleen, linux-kernel
On Fri, 15 Jun 2007, Robert Richter wrote:
> Index: linux-2.6.22-rc3/arch/i386/perfmon/perfmon.c
> ===================================================================
> --- linux-2.6.22-rc3.orig/arch/i386/perfmon/perfmon.c
> +++ linux-2.6.22-rc3/arch/i386/perfmon/perfmon.c
> @@ -1059,6 +1059,7 @@ char *pfm_arch_get_pmu_module_name(void)
> goto try_arch;
> }
> case 15:
> + case 16:
> /* All Opteron processors */
> if (cpu_data->x86_vendor == X86_VENDOR_AMD)
> return "perfmon_k8";
Constants like these should be #define'd in a header file but, admittedly,
it might become a rather intrusive change at this point for the above
case.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch 1/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64
2007-06-15 18:54 ` David Rientjes
@ 2007-06-20 18:36 ` Robert Richter
0 siblings, 0 replies; 19+ messages in thread
From: Robert Richter @ 2007-06-20 18:36 UTC (permalink / raw)
To: David Rientjes; +Cc: Stephane Eranian, Andi Kleen, linux-kernel
David,
thanks for your comments.
On 15.06.07 11:54:24, David Rientjes wrote:
> On Fri, 15 Jun 2007, Robert Richter wrote:
>
> > Index: linux-2.6.22-rc3/arch/i386/perfmon/perfmon.c
> > ===================================================================
> > --- linux-2.6.22-rc3.orig/arch/i386/perfmon/perfmon.c
> > +++ linux-2.6.22-rc3/arch/i386/perfmon/perfmon.c
> > @@ -1059,6 +1059,7 @@ char *pfm_arch_get_pmu_module_name(void)
> > goto try_arch;
> > }
> > case 15:
> > + case 16:
> > /* All Opteron processors */
> > if (cpu_data->x86_vendor == X86_VENDOR_AMD)
> > return "perfmon_k8";
>
> Constants like these should be #define'd in a header file but, admittedly,
> it might become a rather intrusive change at this point for the above
> case.
All current x86 CPU detection code in the kernel is using hard coded
numbers. Introducing new defines for family and model numbers would
not be suitable here.
Thanks,
-Robert
--
AMD Saxony, Dresden, Germany
Operating System Research Center
email: robert.richter@amd.com
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch 1/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64
2007-06-15 16:57 ` [patch 1/8] " Robert Richter
2007-06-15 18:54 ` David Rientjes
@ 2007-06-19 12:39 ` Stephane Eranian
1 sibling, 0 replies; 19+ messages in thread
From: Stephane Eranian @ 2007-06-19 12:39 UTC (permalink / raw)
To: Robert Richter; +Cc: Andi Kleen, linux-kernel
Robert,
I applied this patch to my tree.
Thanks.
On Fri, Jun 15, 2007 at 06:57:30PM +0200, Robert Richter wrote:
> This patch implements Barcelona CPU detection.
>
> Signed-off-by: Robert Richter <robert.richter@amd.com>
>
> Index: linux-2.6.22-rc3/arch/i386/perfmon/perfmon.c
> ===================================================================
> --- linux-2.6.22-rc3.orig/arch/i386/perfmon/perfmon.c
> +++ linux-2.6.22-rc3/arch/i386/perfmon/perfmon.c
> @@ -1059,6 +1059,7 @@ char *pfm_arch_get_pmu_module_name(void)
> goto try_arch;
> }
> case 15:
> + case 16:
> /* All Opteron processors */
> if (cpu_data->x86_vendor == X86_VENDOR_AMD)
> return "perfmon_k8";
> Index: linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_k8.c
> ===================================================================
> --- linux-2.6.22-rc3.orig/arch/x86_64/perfmon/perfmon_k8.c
> +++ linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_k8.c
> @@ -307,7 +307,12 @@ static int pfm_k8_probe_pmu(void)
> return -1;
> }
>
> - if (current_cpu_data.x86 != 15) {
> + switch (current_cpu_data.x86) {
> + case 15:
> + case 16:
> + PFM_INFO("found family=%d", current_cpu_data.x86);
> + break;
> + default:
> PFM_INFO("unsupported family=%d", current_cpu_data.x86);
> return -1;
> }
>
> --
> AMD Saxony, Dresden, Germany
> Operating System Research Center
> email: robert.richter@amd.com
>
>
>
--
-Stephane
^ permalink raw reply [flat|nested] 19+ messages in thread
* [patch 2/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64
2007-06-15 16:56 [patch 0/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64 Robert Richter
2007-06-15 16:57 ` [patch 1/8] " Robert Richter
@ 2007-06-15 16:58 ` Robert Richter
2007-06-19 12:39 ` Stephane Eranian
2007-06-15 16:58 ` [patch 3/8] " Robert Richter
` (5 subsequent siblings)
7 siblings, 1 reply; 19+ messages in thread
From: Robert Richter @ 2007-06-15 16:58 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Andi Kleen, linux-kernel
Debug messages added for better debugging.
Signed-off-by: Robert Richter <robert.richter@amd.com>
Index: linux-2.6.22-rc3/perfmon/perfmon_file.c
===================================================================
--- linux-2.6.22-rc3.orig/perfmon/perfmon_file.c
+++ linux-2.6.22-rc3/perfmon/perfmon_file.c
@@ -192,6 +192,8 @@ static int pfm_mmap(struct file *file, s
unsigned long flags;
int ret;
+ PFM_DBG("pfm_file_ops");
+
ctx = file->private_data;
size = (vma->vm_end - vma->vm_start);
@@ -332,6 +334,8 @@ static ssize_t pfm_read(struct file *fil
union pfarg_msg msg_buf;
int non_block, ret;
+ PFM_DBG("pfm_file_ops");
+
ctx = filp->private_data;
if (ctx == NULL) {
PFM_ERR("no ctx for pfm_read");
@@ -375,6 +379,8 @@ static unsigned int pfm_poll(struct file
unsigned long flags;
unsigned int mask = 0;
+ PFM_DBG("pfm_file_ops");
+
if (filp->f_op != &pfm_file_ops) {
PFM_ERR("pfm_poll bad magic");
return 0;
@@ -449,6 +455,8 @@ static int pfm_fasync(int fd, struct fil
struct pfm_context *ctx;
int ret;
+ PFM_DBG("pfm_file_ops");
+
ctx = filp->private_data;
if (ctx == NULL) {
PFM_ERR("pfm_fasync no ctx");
@@ -611,6 +619,8 @@ static int pfm_close(struct inode *inode
{
struct pfm_context *ctx;
+ PFM_DBG("pfm_file_ops");
+
ctx = filp->private_data;
if (ctx == NULL) {
PFM_ERR("no ctx");
@@ -621,6 +631,8 @@ static int pfm_close(struct inode *inode
static int pfm_no_open(struct inode *irrelevant, struct file *dontcare)
{
+ PFM_DBG("pfm_file_ops");
+
return -ENXIO;
}
@@ -637,6 +649,8 @@ static int pfm_flush(struct file *filp,
{
struct pfm_context *ctx;
+ PFM_DBG("pfm_file_ops");
+
ctx = filp->private_data;
if (ctx == NULL) {
PFM_ERR("pfm_flush no ctx");
Index: linux-2.6.22-rc3/perfmon/perfmon_syscalls.c
===================================================================
--- linux-2.6.22-rc3.orig/perfmon/perfmon_syscalls.c
+++ linux-2.6.22-rc3/perfmon/perfmon_syscalls.c
@@ -403,6 +403,8 @@ asmlinkage long sys_pfm_create_context(s
void *fmt_arg = NULL;
int ret;
+ PFM_DBG("syscall");
+
if (atomic_read(&perfmon_disabled))
return -ENOSYS;
@@ -433,8 +435,12 @@ asmlinkage long sys_pfm_write_pmcs(int f
size_t sz;
int ret, fput_needed;
- if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
+ PFM_DBG("syscall");
+
+ if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) {
+ PFM_DBG("invalid arg count %d", count);
return -EINVAL;
+ }
sz = count*sizeof(*ureq);
@@ -475,6 +481,8 @@ asmlinkage long sys_pfm_write_pmcs(int f
kfree(fptr);
error:
fput_light(filp, fput_needed);
+ if (ret)
+ PFM_DBG("failed: errno=%d", -ret);
return ret;
}
@@ -490,6 +498,8 @@ asmlinkage long sys_pfm_write_pmds(int f
size_t sz;
int ret, fput_needed;
+ PFM_DBG("syscall");
+
if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
return -EINVAL;
@@ -543,6 +553,8 @@ asmlinkage long sys_pfm_read_pmds(int fd
size_t sz;
int ret, fput_needed;
+ PFM_DBG("syscall");
+
if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
return -EINVAL;
@@ -591,6 +603,8 @@ asmlinkage long sys_pfm_restart(int fd)
unsigned long flags;
int ret, fput_needed, complete_needed;
+ PFM_DBG("syscall");
+
filp = fget_light(fd, &fput_needed);
if (unlikely(filp == NULL)) {
PFM_DBG("invalid fd %d", fd);
@@ -647,6 +661,8 @@ asmlinkage long sys_pfm_stop(int fd)
unsigned long flags;
int ret, fput_needed;
+ PFM_DBG("syscall");
+
filp = fget_light(fd, &fput_needed);
if (unlikely(filp == NULL)) {
PFM_DBG("invalid fd %d", fd);
@@ -682,6 +698,8 @@ asmlinkage long sys_pfm_start(int fd, st
unsigned long flags;
int ret, fput_needed;
+ PFM_DBG("syscall");
+
filp = fget_light(fd, &fput_needed);
if (unlikely(filp == NULL)) {
PFM_DBG("invalid fd %d", fd);
@@ -724,6 +742,8 @@ asmlinkage long sys_pfm_load_context(int
struct pfarg_load req;
int ret, fput_needed;
+ PFM_DBG("syscall");
+
if (copy_from_user(&req, ureq, sizeof(req)))
return -EFAULT;
@@ -792,6 +812,8 @@ asmlinkage long sys_pfm_unload_context(i
int is_system, can_release = 0;
u32 cpu;
+ PFM_DBG("syscall");
+
filp = fget_light(fd, &fput_needed);
if (unlikely(filp == NULL)) {
PFM_DBG("invalid fd %d", fd);
@@ -834,6 +856,8 @@ asmlinkage long sys_pfm_create_evtsets(i
size_t sz;
int ret, fput_needed;
+ PFM_DBG("syscall");
+
if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
return -EINVAL;
@@ -890,6 +914,8 @@ asmlinkage long sys_pfm_getinfo_evtsets
size_t sz;
int ret, fput_needed;
+ PFM_DBG("syscall");
+
if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
return -EINVAL;
@@ -932,6 +958,8 @@ asmlinkage long sys_pfm_getinfo_evtsets
kfree(fptr);
error:
fput_light(filp, fput_needed);
+ if (ret)
+ PFM_DBG("failed: errno=%d", -ret);
return ret;
}
@@ -945,6 +973,8 @@ asmlinkage long sys_pfm_delete_evtsets(i
size_t sz;
int ret, fput_needed;
+ PFM_DBG("syscall");
+
if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
return -EINVAL;
Index: linux-2.6.22-rc3/perfmon/perfmon_rw.c
===================================================================
--- linux-2.6.22-rc3.orig/perfmon/perfmon_rw.c
+++ linux-2.6.22-rc3/perfmon/perfmon_rw.c
@@ -474,7 +474,7 @@ int __pfm_write_pmcs(struct pfm_context
return 0;
error:
pfm_retflag_set(req->reg_flags, error_code);
- PFM_DBG("set%u pmc%u error=%d", set_id, cnum, error_code);
+ PFM_DBG("set%u pmc%u error=0x%08x", set_id, cnum, error_code);
return ret;
}
Index: linux-2.6.22-rc3/include/asm-i386/perfmon.h
===================================================================
--- linux-2.6.22-rc3.orig/include/asm-i386/perfmon.h
+++ linux-2.6.22-rc3/include/asm-i386/perfmon.h
@@ -140,6 +140,10 @@ static inline void pfm_arch_write_pmc(st
if (ctx && ctx->flags.started == 0)
return;
+ PFM_DBG_ovfl("pfm_arch_write_pmc(0x%016Lx, 0x%016Lx)",
+ (unsigned long long) pfm_pmu_conf->pmc_desc[cnum].hw_addr,
+ (unsigned long long) value);
+ BUG_ON(pfm_pmu_conf->pmc_desc[cnum].type & PFM_REG_V);
if (arch_info->pmu_style == PFM_X86_PMU_P4)
__pfm_write_reg_p4(&arch_info->pmc_addrs[cnum], value);
else
@@ -155,6 +159,10 @@ static inline void pfm_arch_write_pmd(st
if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64)
value |= ~pfm_pmu_conf->ovfl_mask;
+ PFM_DBG_ovfl("pfm_arch_write_pmd(0x%016Lx, 0x%016Lx)",
+ (unsigned long long) pfm_pmu_conf->pmd_desc[cnum].hw_addr,
+ (unsigned long long) value);
+ BUG_ON(pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_V);
if (arch_info->pmu_style == PFM_X86_PMU_P4)
__pfm_write_reg_p4(&arch_info->pmd_addrs[cnum], value);
else
@@ -165,10 +173,14 @@ static inline u64 pfm_arch_read_pmd(stru
{
struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info;
u64 tmp;
+ BUG_ON(pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_V);
if (arch_info->pmu_style == PFM_X86_PMU_P4)
__pfm_read_reg_p4(&arch_info->pmd_addrs[cnum], &tmp);
else
rdmsrl(pfm_pmu_conf->pmd_desc[cnum].hw_addr, tmp);
+ PFM_DBG_ovfl("pfm_arch_read_pmd(0x%016Lx) = 0x%016Lx",
+ (unsigned long long) pfm_pmu_conf->pmd_desc[cnum].hw_addr,
+ (unsigned long long) tmp);
return tmp;
}
@@ -176,10 +188,14 @@ static inline u64 pfm_arch_read_pmc(stru
{
struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info;
u64 tmp;
+ BUG_ON(pfm_pmu_conf->pmc_desc[cnum].type & PFM_REG_V);
if (arch_info->pmu_style == PFM_X86_PMU_P4)
__pfm_read_reg_p4(&arch_info->pmc_addrs[cnum], &tmp);
else
rdmsrl(pfm_pmu_conf->pmc_desc[cnum].hw_addr, tmp);
+ PFM_DBG_ovfl("pfm_arch_read_pmc(0x%016Lx) = 0x%016Lx",
+ (unsigned long long) pfm_pmu_conf->pmc_desc[cnum].hw_addr,
+ (unsigned long long) tmp);
return tmp;
}
Index: linux-2.6.22-rc3/perfmon/perfmon.c
===================================================================
--- linux-2.6.22-rc3.orig/perfmon/perfmon.c
+++ linux-2.6.22-rc3/perfmon/perfmon.c
@@ -865,7 +865,8 @@ int __init pfm_init(void)
{
int ret;
- PFM_LOG("version %u.%u", PFM_VERSION_MAJ, PFM_VERSION_MIN);
+ PFM_LOG("version %u.%u, compiled: " __DATE__ ", " __TIME__,
+ PFM_VERSION_MAJ, PFM_VERSION_MIN);
pfm_ctx_cachep = kmem_cache_create("pfm_context",
sizeof(struct pfm_context)+PFM_ARCH_CTX_SIZE,
Index: linux-2.6.22-rc3/perfmon/perfmon_intr.c
===================================================================
--- linux-2.6.22-rc3.orig/perfmon/perfmon_intr.c
+++ linux-2.6.22-rc3/perfmon/perfmon_intr.c
@@ -170,6 +170,7 @@ static void pfm_overflow_handler(struct
* check for overflow condition
*/
if (likely(old_val > new_val)) {
+ PFM_DBG_ovfl("64 bit overflow of PMD%d", i);
has_64b_ovfl = 1;
if (has_ovfl_sw && ovfl_thres > 0) {
if (ovfl_thres == 1)
@@ -188,11 +189,13 @@ static void pfm_overflow_handler(struct
max_pmd);
} else {
+ PFM_DBG_ovfl("Hardware counter overflow of PMD%d=0x%04llx",
+ i, new_val);
/* only keep track of 64-bit overflows */
__clear_bit(i, ulp(pend_ovfls));
/*
- * on some PMU, it may be necessary to re-arm the PMD
- */
+ * on some PMU, it may be necessary to re-arm the PMD
+ */
pfm_arch_ovfl_reset_pmd(ctx, i);
}
--
AMD Saxony, Dresden, Germany
Operating System Research Center
email: robert.richter@amd.com
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch 2/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64
2007-06-15 16:58 ` [patch 2/8] " Robert Richter
@ 2007-06-19 12:39 ` Stephane Eranian
0 siblings, 0 replies; 19+ messages in thread
From: Stephane Eranian @ 2007-06-19 12:39 UTC (permalink / raw)
To: Robert Richter; +Cc: Andi Kleen, linux-kernel
Robert,
I applied this patch to my tree.
thanks.
On Fri, Jun 15, 2007 at 06:58:11PM +0200, Robert Richter wrote:
> Debug messages added for better debugging.
>
> Signed-off-by: Robert Richter <robert.richter@amd.com>
>
> Index: linux-2.6.22-rc3/perfmon/perfmon_file.c
> ===================================================================
> --- linux-2.6.22-rc3.orig/perfmon/perfmon_file.c
> +++ linux-2.6.22-rc3/perfmon/perfmon_file.c
> @@ -192,6 +192,8 @@ static int pfm_mmap(struct file *file, s
> unsigned long flags;
> int ret;
>
> + PFM_DBG("pfm_file_ops");
> +
> ctx = file->private_data;
> size = (vma->vm_end - vma->vm_start);
>
> @@ -332,6 +334,8 @@ static ssize_t pfm_read(struct file *fil
> union pfarg_msg msg_buf;
> int non_block, ret;
>
> + PFM_DBG("pfm_file_ops");
> +
> ctx = filp->private_data;
> if (ctx == NULL) {
> PFM_ERR("no ctx for pfm_read");
> @@ -375,6 +379,8 @@ static unsigned int pfm_poll(struct file
> unsigned long flags;
> unsigned int mask = 0;
>
> + PFM_DBG("pfm_file_ops");
> +
> if (filp->f_op != &pfm_file_ops) {
> PFM_ERR("pfm_poll bad magic");
> return 0;
> @@ -449,6 +455,8 @@ static int pfm_fasync(int fd, struct fil
> struct pfm_context *ctx;
> int ret;
>
> + PFM_DBG("pfm_file_ops");
> +
> ctx = filp->private_data;
> if (ctx == NULL) {
> PFM_ERR("pfm_fasync no ctx");
> @@ -611,6 +619,8 @@ static int pfm_close(struct inode *inode
> {
> struct pfm_context *ctx;
>
> + PFM_DBG("pfm_file_ops");
> +
> ctx = filp->private_data;
> if (ctx == NULL) {
> PFM_ERR("no ctx");
> @@ -621,6 +631,8 @@ static int pfm_close(struct inode *inode
>
> static int pfm_no_open(struct inode *irrelevant, struct file *dontcare)
> {
> + PFM_DBG("pfm_file_ops");
> +
> return -ENXIO;
> }
>
> @@ -637,6 +649,8 @@ static int pfm_flush(struct file *filp,
> {
> struct pfm_context *ctx;
>
> + PFM_DBG("pfm_file_ops");
> +
> ctx = filp->private_data;
> if (ctx == NULL) {
> PFM_ERR("pfm_flush no ctx");
> Index: linux-2.6.22-rc3/perfmon/perfmon_syscalls.c
> ===================================================================
> --- linux-2.6.22-rc3.orig/perfmon/perfmon_syscalls.c
> +++ linux-2.6.22-rc3/perfmon/perfmon_syscalls.c
> @@ -403,6 +403,8 @@ asmlinkage long sys_pfm_create_context(s
> void *fmt_arg = NULL;
> int ret;
>
> + PFM_DBG("syscall");
> +
> if (atomic_read(&perfmon_disabled))
> return -ENOSYS;
>
> @@ -433,8 +435,12 @@ asmlinkage long sys_pfm_write_pmcs(int f
> size_t sz;
> int ret, fput_needed;
>
> - if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
> + PFM_DBG("syscall");
> +
> + if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) {
> + PFM_DBG("invalid arg count %d", count);
> return -EINVAL;
> + }
>
> sz = count*sizeof(*ureq);
>
> @@ -475,6 +481,8 @@ asmlinkage long sys_pfm_write_pmcs(int f
> kfree(fptr);
> error:
> fput_light(filp, fput_needed);
> + if (ret)
> + PFM_DBG("failed: errno=%d", -ret);
>
> return ret;
> }
> @@ -490,6 +498,8 @@ asmlinkage long sys_pfm_write_pmds(int f
> size_t sz;
> int ret, fput_needed;
>
> + PFM_DBG("syscall");
> +
> if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
> return -EINVAL;
>
> @@ -543,6 +553,8 @@ asmlinkage long sys_pfm_read_pmds(int fd
> size_t sz;
> int ret, fput_needed;
>
> + PFM_DBG("syscall");
> +
> if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
> return -EINVAL;
>
> @@ -591,6 +603,8 @@ asmlinkage long sys_pfm_restart(int fd)
> unsigned long flags;
> int ret, fput_needed, complete_needed;
>
> + PFM_DBG("syscall");
> +
> filp = fget_light(fd, &fput_needed);
> if (unlikely(filp == NULL)) {
> PFM_DBG("invalid fd %d", fd);
> @@ -647,6 +661,8 @@ asmlinkage long sys_pfm_stop(int fd)
> unsigned long flags;
> int ret, fput_needed;
>
> + PFM_DBG("syscall");
> +
> filp = fget_light(fd, &fput_needed);
> if (unlikely(filp == NULL)) {
> PFM_DBG("invalid fd %d", fd);
> @@ -682,6 +698,8 @@ asmlinkage long sys_pfm_start(int fd, st
> unsigned long flags;
> int ret, fput_needed;
>
> + PFM_DBG("syscall");
> +
> filp = fget_light(fd, &fput_needed);
> if (unlikely(filp == NULL)) {
> PFM_DBG("invalid fd %d", fd);
> @@ -724,6 +742,8 @@ asmlinkage long sys_pfm_load_context(int
> struct pfarg_load req;
> int ret, fput_needed;
>
> + PFM_DBG("syscall");
> +
> if (copy_from_user(&req, ureq, sizeof(req)))
> return -EFAULT;
>
> @@ -792,6 +812,8 @@ asmlinkage long sys_pfm_unload_context(i
> int is_system, can_release = 0;
> u32 cpu;
>
> + PFM_DBG("syscall");
> +
> filp = fget_light(fd, &fput_needed);
> if (unlikely(filp == NULL)) {
> PFM_DBG("invalid fd %d", fd);
> @@ -834,6 +856,8 @@ asmlinkage long sys_pfm_create_evtsets(i
> size_t sz;
> int ret, fput_needed;
>
> + PFM_DBG("syscall");
> +
> if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
> return -EINVAL;
>
> @@ -890,6 +914,8 @@ asmlinkage long sys_pfm_getinfo_evtsets
> size_t sz;
> int ret, fput_needed;
>
> + PFM_DBG("syscall");
> +
> if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
> return -EINVAL;
>
> @@ -932,6 +958,8 @@ asmlinkage long sys_pfm_getinfo_evtsets
> kfree(fptr);
> error:
> fput_light(filp, fput_needed);
> + if (ret)
> + PFM_DBG("failed: errno=%d", -ret);
> return ret;
> }
>
> @@ -945,6 +973,8 @@ asmlinkage long sys_pfm_delete_evtsets(i
> size_t sz;
> int ret, fput_needed;
>
> + PFM_DBG("syscall");
> +
> if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
> return -EINVAL;
>
> Index: linux-2.6.22-rc3/perfmon/perfmon_rw.c
> ===================================================================
> --- linux-2.6.22-rc3.orig/perfmon/perfmon_rw.c
> +++ linux-2.6.22-rc3/perfmon/perfmon_rw.c
> @@ -474,7 +474,7 @@ int __pfm_write_pmcs(struct pfm_context
> return 0;
> error:
> pfm_retflag_set(req->reg_flags, error_code);
> - PFM_DBG("set%u pmc%u error=%d", set_id, cnum, error_code);
> + PFM_DBG("set%u pmc%u error=0x%08x", set_id, cnum, error_code);
> return ret;
> }
>
> Index: linux-2.6.22-rc3/include/asm-i386/perfmon.h
> ===================================================================
> --- linux-2.6.22-rc3.orig/include/asm-i386/perfmon.h
> +++ linux-2.6.22-rc3/include/asm-i386/perfmon.h
> @@ -140,6 +140,10 @@ static inline void pfm_arch_write_pmc(st
> if (ctx && ctx->flags.started == 0)
> return;
>
> + PFM_DBG_ovfl("pfm_arch_write_pmc(0x%016Lx, 0x%016Lx)",
> + (unsigned long long) pfm_pmu_conf->pmc_desc[cnum].hw_addr,
> + (unsigned long long) value);
> + BUG_ON(pfm_pmu_conf->pmc_desc[cnum].type & PFM_REG_V);
> if (arch_info->pmu_style == PFM_X86_PMU_P4)
> __pfm_write_reg_p4(&arch_info->pmc_addrs[cnum], value);
> else
> @@ -155,6 +159,10 @@ static inline void pfm_arch_write_pmd(st
> if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64)
> value |= ~pfm_pmu_conf->ovfl_mask;
>
> + PFM_DBG_ovfl("pfm_arch_write_pmd(0x%016Lx, 0x%016Lx)",
> + (unsigned long long) pfm_pmu_conf->pmd_desc[cnum].hw_addr,
> + (unsigned long long) value);
> + BUG_ON(pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_V);
> if (arch_info->pmu_style == PFM_X86_PMU_P4)
> __pfm_write_reg_p4(&arch_info->pmd_addrs[cnum], value);
> else
> @@ -165,10 +173,14 @@ static inline u64 pfm_arch_read_pmd(stru
> {
> struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info;
> u64 tmp;
> + BUG_ON(pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_V);
> if (arch_info->pmu_style == PFM_X86_PMU_P4)
> __pfm_read_reg_p4(&arch_info->pmd_addrs[cnum], &tmp);
> else
> rdmsrl(pfm_pmu_conf->pmd_desc[cnum].hw_addr, tmp);
> + PFM_DBG_ovfl("pfm_arch_read_pmd(0x%016Lx) = 0x%016Lx",
> + (unsigned long long) pfm_pmu_conf->pmd_desc[cnum].hw_addr,
> + (unsigned long long) tmp);
> return tmp;
> }
>
> @@ -176,10 +188,14 @@ static inline u64 pfm_arch_read_pmc(stru
> {
> struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info;
> u64 tmp;
> + BUG_ON(pfm_pmu_conf->pmc_desc[cnum].type & PFM_REG_V);
> if (arch_info->pmu_style == PFM_X86_PMU_P4)
> __pfm_read_reg_p4(&arch_info->pmc_addrs[cnum], &tmp);
> else
> rdmsrl(pfm_pmu_conf->pmc_desc[cnum].hw_addr, tmp);
> + PFM_DBG_ovfl("pfm_arch_read_pmc(0x%016Lx) = 0x%016Lx",
> + (unsigned long long) pfm_pmu_conf->pmc_desc[cnum].hw_addr,
> + (unsigned long long) tmp);
> return tmp;
> }
>
> Index: linux-2.6.22-rc3/perfmon/perfmon.c
> ===================================================================
> --- linux-2.6.22-rc3.orig/perfmon/perfmon.c
> +++ linux-2.6.22-rc3/perfmon/perfmon.c
> @@ -865,7 +865,8 @@ int __init pfm_init(void)
> {
> int ret;
>
> - PFM_LOG("version %u.%u", PFM_VERSION_MAJ, PFM_VERSION_MIN);
> + PFM_LOG("version %u.%u, compiled: " __DATE__ ", " __TIME__,
> + PFM_VERSION_MAJ, PFM_VERSION_MIN);
>
> pfm_ctx_cachep = kmem_cache_create("pfm_context",
> sizeof(struct pfm_context)+PFM_ARCH_CTX_SIZE,
> Index: linux-2.6.22-rc3/perfmon/perfmon_intr.c
> ===================================================================
> --- linux-2.6.22-rc3.orig/perfmon/perfmon_intr.c
> +++ linux-2.6.22-rc3/perfmon/perfmon_intr.c
> @@ -170,6 +170,7 @@ static void pfm_overflow_handler(struct
> * check for overflow condition
> */
> if (likely(old_val > new_val)) {
> + PFM_DBG_ovfl("64 bit overflow of PMD%d", i);
> has_64b_ovfl = 1;
> if (has_ovfl_sw && ovfl_thres > 0) {
> if (ovfl_thres == 1)
> @@ -188,11 +189,13 @@ static void pfm_overflow_handler(struct
> max_pmd);
>
> } else {
> + PFM_DBG_ovfl("Hardware counter overflow of PMD%d=0x%04llx",
> + i, new_val);
> /* only keep track of 64-bit overflows */
> __clear_bit(i, ulp(pend_ovfls));
> /*
> - * on some PMU, it may be necessary to re-arm the PMD
> - */
> + * on some PMU, it may be necessary to re-arm the PMD
> + */
> pfm_arch_ovfl_reset_pmd(ctx, i);
> }
>
>
> --
> AMD Saxony, Dresden, Germany
> Operating System Research Center
> email: robert.richter@amd.com
>
>
>
--
-Stephane
^ permalink raw reply [flat|nested] 19+ messages in thread
* [patch 3/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64
2007-06-15 16:56 [patch 0/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64 Robert Richter
2007-06-15 16:57 ` [patch 1/8] " Robert Richter
2007-06-15 16:58 ` [patch 2/8] " Robert Richter
@ 2007-06-15 16:58 ` Robert Richter
2007-06-19 12:40 ` Stephane Eranian
2007-06-15 16:59 ` [patch 4/8] " Robert Richter
` (4 subsequent siblings)
7 siblings, 1 reply; 19+ messages in thread
From: Robert Richter @ 2007-06-15 16:58 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Andi Kleen, linux-kernel
Minor changes (whitespace, comments, ...)
Signed-off-by: Robert Richter <robert.richter@amd.com>
Index: linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_k8.c
===================================================================
--- linux-2.6.22-rc3.orig/arch/x86_64/perfmon/perfmon_k8.c
+++ linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_k8.c
@@ -32,23 +32,29 @@ static int force_nmi;
MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
module_param(force_nmi, bool, 0600);
-static struct pfm_arch_pmu_info pfm_k8_pmu_info={
+static struct pfm_arch_pmu_info pfm_k8_pmu_info = {
.pmc_addrs = {
- {{MSR_K7_EVNTSEL0, 0}, 0, PFM_REGT_EN},
- {{MSR_K7_EVNTSEL1, 0}, 1, PFM_REGT_EN},
- {{MSR_K7_EVNTSEL2, 0}, 2, PFM_REGT_EN},
- {{MSR_K7_EVNTSEL3, 0}, 3, PFM_REGT_EN},
+/* pmc0 */ {{MSR_K7_EVNTSEL0, 0}, 0, PFM_REGT_EN},
+/* pmc1 */ {{MSR_K7_EVNTSEL1, 0}, 1, PFM_REGT_EN},
+/* pmc2 */ {{MSR_K7_EVNTSEL2, 0}, 2, PFM_REGT_EN},
+/* pmc3 */ {{MSR_K7_EVNTSEL3, 0}, 3, PFM_REGT_EN},
},
.pmd_addrs = {
- {{MSR_K7_PERFCTR0, 0}, 0, PFM_REGT_CTR},
- {{MSR_K7_PERFCTR1, 0}, 0, PFM_REGT_CTR},
- {{MSR_K7_PERFCTR2, 0}, 0, PFM_REGT_CTR},
- {{MSR_K7_PERFCTR3, 0}, 0, PFM_REGT_CTR},
+/* pmd0 */ {{MSR_K7_PERFCTR0, 0}, 0, PFM_REGT_CTR},
+/* pmd1 */ {{MSR_K7_PERFCTR1, 0}, 0, PFM_REGT_CTR},
+/* pmd2 */ {{MSR_K7_PERFCTR2, 0}, 0, PFM_REGT_CTR},
+/* pmd3 */ {{MSR_K7_PERFCTR3, 0}, 0, PFM_REGT_CTR},
},
.pmu_style = PFM_X86_PMU_P6
};
/*
+ * force Local APIC interrupt on overflow
+ */
+#define PFM_K8_VAL (1ULL<<20)
+#define PFM_K8_NO64 (1ULL<<20)
+
+/*
* reserved bits must be zero
*
* - upper 32 bits are reserved
@@ -59,12 +65,6 @@ static struct pfm_arch_pmu_info pfm_k8_p
| (1ULL<<20) \
| (1ULL<<21))
-/*
- * force Local APIC interrupt on overflow
- */
-#define PFM_K8_VAL (1ULL<<20)
-#define PFM_K8_NO64 (1ULL<<20)
-
static struct pfm_reg_desc pfm_k8_pmc_desc[]={
/* pmc0 */ PMC_D(PFM_REG_I64, "PERFSEL0", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL0),
/* pmc1 */ PMC_D(PFM_REG_I64, "PERFSEL1", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL1),
@@ -73,11 +73,11 @@ static struct pfm_reg_desc pfm_k8_pmc_de
};
#define PFM_AMD_NUM_PMCS ARRAY_SIZE(pfm_k8_pmc_desc)
-static struct pfm_reg_desc pfm_k8_pmd_desc[]={
-/* pmd0 */ PMD_D(PFM_REG_C, "PERFCTR0", MSR_K7_PERFCTR0),
-/* pmd1 */ PMD_D(PFM_REG_C, "PERFCTR1", MSR_K7_PERFCTR1),
-/* pmd2 */ PMD_D(PFM_REG_C, "PERFCTR2", MSR_K7_PERFCTR2),
-/* pmd3 */ PMD_D(PFM_REG_C, "PERFCTR3", MSR_K7_PERFCTR3),
+static struct pfm_reg_desc pfm_k8_pmd_desc[] = {
+/* pmd0 */ PMD_D(PFM_REG_C, "PERFCTR0", MSR_K7_PERFCTR0),
+/* pmd1 */ PMD_D(PFM_REG_C, "PERFCTR1", MSR_K7_PERFCTR1),
+/* pmd2 */ PMD_D(PFM_REG_C, "PERFCTR2", MSR_K7_PERFCTR2),
+/* pmd3 */ PMD_D(PFM_REG_C, "PERFCTR3", MSR_K7_PERFCTR3),
};
#define PFM_AMD_NUM_PMDS ARRAY_SIZE(pfm_k8_pmd_desc)
Index: linux-2.6.22-rc3/include/asm-i386/perfmon.h
===================================================================
--- linux-2.6.22-rc3.orig/include/asm-i386/perfmon.h
+++ linux-2.6.22-rc3/include/asm-i386/perfmon.h
@@ -50,14 +50,14 @@
/*
* bitmask for reg_type
*/
-#define PFM_REGT_NA 0x00 /* not available */
-#define PFM_REGT_EN 0x01 /* has enable bit (cleared on ctxsw) */
-#define PFM_REGT_ESCR 0x02 /* P4: ESCR */
-#define PFM_REGT_CCCR 0x04 /* P4: CCCR */
-#define PFM_REGT_OTH 0x80 /* other type of register */
-#define PFM_REGT_PEBS 0x10 /* PEBS related */
-#define PFM_REGT_NOHT 0x20 /* unavailable with HT */
-#define PFM_REGT_CTR 0x40 /* counter */
+#define PFM_REGT_NA 0x0000 /* not available */
+#define PFM_REGT_EN 0x0001 /* has enable bit (cleared on ctxsw) */
+#define PFM_REGT_ESCR 0x0002 /* P4: ESCR */
+#define PFM_REGT_CCCR 0x0004 /* P4: CCCR */
+#define PFM_REGT_PEBS 0x0010 /* PEBS related */
+#define PFM_REGT_NOHT 0x0020 /* unavailable with HT */
+#define PFM_REGT_CTR 0x0040 /* counter */
+#define PFM_REGT_OTH 0x0080 /* other type of register */
/*
* This design and the partitioning of resources for SMT (hyper threads)
Index: linux-2.6.22-rc3/include/linux/perfmon_pmu.h
===================================================================
--- linux-2.6.22-rc3.orig/include/linux/perfmon_pmu.h
+++ linux-2.6.22-rc3/include/linux/perfmon_pmu.h
@@ -69,7 +69,7 @@ struct pfm_reg_desc {
#define PFM_REG_C64 0x04 /* PMD: 64-bit virtualization */
#define PFM_REG_RO 0x08 /* PMD: read-only (writes ignored) */
#define PFM_REG_V 0x10 /* PMD: virtual reg (provided by PMU description) */
-#define PFM_REG_NO64 0x100 /* PMC: supports REGFL_NOEMUL64 */
+#define PFM_REG_NO64 0x100 /* PMC: supports PFM_REGFL_NO_EMUL64 */
/*
* define some shortcuts for common types
--
AMD Saxony, Dresden, Germany
Operating System Research Center
email: robert.richter@amd.com
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch 3/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64
2007-06-15 16:58 ` [patch 3/8] " Robert Richter
@ 2007-06-19 12:40 ` Stephane Eranian
0 siblings, 0 replies; 19+ messages in thread
From: Stephane Eranian @ 2007-06-19 12:40 UTC (permalink / raw)
To: Robert Richter; +Cc: Andi Kleen, linux-kernel
Robert,
Patch applied.
Thanks.
On Fri, Jun 15, 2007 at 06:58:49PM +0200, Robert Richter wrote:
> Minor changes (whitespace, comments, ...)
>
> Signed-off-by: Robert Richter <robert.richter@amd.com>
>
> Index: linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_k8.c
> ===================================================================
> --- linux-2.6.22-rc3.orig/arch/x86_64/perfmon/perfmon_k8.c
> +++ linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_k8.c
> @@ -32,23 +32,29 @@ static int force_nmi;
> MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
> module_param(force_nmi, bool, 0600);
>
> -static struct pfm_arch_pmu_info pfm_k8_pmu_info={
> +static struct pfm_arch_pmu_info pfm_k8_pmu_info = {
> .pmc_addrs = {
> - {{MSR_K7_EVNTSEL0, 0}, 0, PFM_REGT_EN},
> - {{MSR_K7_EVNTSEL1, 0}, 1, PFM_REGT_EN},
> - {{MSR_K7_EVNTSEL2, 0}, 2, PFM_REGT_EN},
> - {{MSR_K7_EVNTSEL3, 0}, 3, PFM_REGT_EN},
> +/* pmc0 */ {{MSR_K7_EVNTSEL0, 0}, 0, PFM_REGT_EN},
> +/* pmc1 */ {{MSR_K7_EVNTSEL1, 0}, 1, PFM_REGT_EN},
> +/* pmc2 */ {{MSR_K7_EVNTSEL2, 0}, 2, PFM_REGT_EN},
> +/* pmc3 */ {{MSR_K7_EVNTSEL3, 0}, 3, PFM_REGT_EN},
> },
> .pmd_addrs = {
> - {{MSR_K7_PERFCTR0, 0}, 0, PFM_REGT_CTR},
> - {{MSR_K7_PERFCTR1, 0}, 0, PFM_REGT_CTR},
> - {{MSR_K7_PERFCTR2, 0}, 0, PFM_REGT_CTR},
> - {{MSR_K7_PERFCTR3, 0}, 0, PFM_REGT_CTR},
> +/* pmd0 */ {{MSR_K7_PERFCTR0, 0}, 0, PFM_REGT_CTR},
> +/* pmd1 */ {{MSR_K7_PERFCTR1, 0}, 0, PFM_REGT_CTR},
> +/* pmd2 */ {{MSR_K7_PERFCTR2, 0}, 0, PFM_REGT_CTR},
> +/* pmd3 */ {{MSR_K7_PERFCTR3, 0}, 0, PFM_REGT_CTR},
> },
> .pmu_style = PFM_X86_PMU_P6
> };
>
> /*
> + * force Local APIC interrupt on overflow
> + */
> +#define PFM_K8_VAL (1ULL<<20)
> +#define PFM_K8_NO64 (1ULL<<20)
> +
> +/*
> * reserved bits must be zero
> *
> * - upper 32 bits are reserved
> @@ -59,12 +65,6 @@ static struct pfm_arch_pmu_info pfm_k8_p
> | (1ULL<<20) \
> | (1ULL<<21))
>
> -/*
> - * force Local APIC interrupt on overflow
> - */
> -#define PFM_K8_VAL (1ULL<<20)
> -#define PFM_K8_NO64 (1ULL<<20)
> -
> static struct pfm_reg_desc pfm_k8_pmc_desc[]={
> /* pmc0 */ PMC_D(PFM_REG_I64, "PERFSEL0", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL0),
> /* pmc1 */ PMC_D(PFM_REG_I64, "PERFSEL1", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL1),
> @@ -73,11 +73,11 @@ static struct pfm_reg_desc pfm_k8_pmc_de
> };
> #define PFM_AMD_NUM_PMCS ARRAY_SIZE(pfm_k8_pmc_desc)
>
> -static struct pfm_reg_desc pfm_k8_pmd_desc[]={
> -/* pmd0 */ PMD_D(PFM_REG_C, "PERFCTR0", MSR_K7_PERFCTR0),
> -/* pmd1 */ PMD_D(PFM_REG_C, "PERFCTR1", MSR_K7_PERFCTR1),
> -/* pmd2 */ PMD_D(PFM_REG_C, "PERFCTR2", MSR_K7_PERFCTR2),
> -/* pmd3 */ PMD_D(PFM_REG_C, "PERFCTR3", MSR_K7_PERFCTR3),
> +static struct pfm_reg_desc pfm_k8_pmd_desc[] = {
> +/* pmd0 */ PMD_D(PFM_REG_C, "PERFCTR0", MSR_K7_PERFCTR0),
> +/* pmd1 */ PMD_D(PFM_REG_C, "PERFCTR1", MSR_K7_PERFCTR1),
> +/* pmd2 */ PMD_D(PFM_REG_C, "PERFCTR2", MSR_K7_PERFCTR2),
> +/* pmd3 */ PMD_D(PFM_REG_C, "PERFCTR3", MSR_K7_PERFCTR3),
> };
> #define PFM_AMD_NUM_PMDS ARRAY_SIZE(pfm_k8_pmd_desc)
>
> Index: linux-2.6.22-rc3/include/asm-i386/perfmon.h
> ===================================================================
> --- linux-2.6.22-rc3.orig/include/asm-i386/perfmon.h
> +++ linux-2.6.22-rc3/include/asm-i386/perfmon.h
> @@ -50,14 +50,14 @@
> /*
> * bitmask for reg_type
> */
> -#define PFM_REGT_NA 0x00 /* not available */
> -#define PFM_REGT_EN 0x01 /* has enable bit (cleared on ctxsw) */
> -#define PFM_REGT_ESCR 0x02 /* P4: ESCR */
> -#define PFM_REGT_CCCR 0x04 /* P4: CCCR */
> -#define PFM_REGT_OTH 0x80 /* other type of register */
> -#define PFM_REGT_PEBS 0x10 /* PEBS related */
> -#define PFM_REGT_NOHT 0x20 /* unavailable with HT */
> -#define PFM_REGT_CTR 0x40 /* counter */
> +#define PFM_REGT_NA 0x0000 /* not available */
> +#define PFM_REGT_EN 0x0001 /* has enable bit (cleared on ctxsw) */
> +#define PFM_REGT_ESCR 0x0002 /* P4: ESCR */
> +#define PFM_REGT_CCCR 0x0004 /* P4: CCCR */
> +#define PFM_REGT_PEBS 0x0010 /* PEBS related */
> +#define PFM_REGT_NOHT 0x0020 /* unavailable with HT */
> +#define PFM_REGT_CTR 0x0040 /* counter */
> +#define PFM_REGT_OTH 0x0080 /* other type of register */
>
> /*
> * This design and the partitioning of resources for SMT (hyper threads)
> Index: linux-2.6.22-rc3/include/linux/perfmon_pmu.h
> ===================================================================
> --- linux-2.6.22-rc3.orig/include/linux/perfmon_pmu.h
> +++ linux-2.6.22-rc3/include/linux/perfmon_pmu.h
> @@ -69,7 +69,7 @@ struct pfm_reg_desc {
> #define PFM_REG_C64 0x04 /* PMD: 64-bit virtualization */
> #define PFM_REG_RO 0x08 /* PMD: read-only (writes ignored) */
> #define PFM_REG_V 0x10 /* PMD: virtual reg (provided by PMU description) */
> -#define PFM_REG_NO64 0x100 /* PMC: supports REGFL_NOEMUL64 */
> +#define PFM_REG_NO64 0x100 /* PMC: supports PFM_REGFL_NO_EMUL64 */
>
> /*
> * define some shortcuts for common types
>
> --
> AMD Saxony, Dresden, Germany
> Operating System Research Center
> email: robert.richter@amd.com
>
>
>
--
-Stephane
^ permalink raw reply [flat|nested] 19+ messages in thread
* [patch 4/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64
2007-06-15 16:56 [patch 0/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64 Robert Richter
` (2 preceding siblings ...)
2007-06-15 16:58 ` [patch 3/8] " Robert Richter
@ 2007-06-15 16:59 ` Robert Richter
2007-06-15 18:52 ` David Rientjes
2007-06-19 13:34 ` Stephane Eranian
2007-06-15 17:00 ` [patch 5/8] " Robert Richter
` (3 subsequent siblings)
7 siblings, 2 replies; 19+ messages in thread
From: Robert Richter @ 2007-06-15 16:59 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Andi Kleen, linux-kernel
This patch rearranges AMD64 MSR definitions.
Signed-off-by: Robert Richter <robert.richter@amd.com>
Index: linux-2.6.22-rc3/include/asm-i386/msr-index.h
===================================================================
--- linux-2.6.22-rc3.orig/include/asm-i386/msr-index.h
+++ linux-2.6.22-rc3/include/asm-i386/msr-index.h
@@ -75,6 +75,18 @@
/* K7/K8 MSRs. Not complete. See the architecture manual for a more
complete list. */
+
+/* K8 MSRs */
+#define MSR_K8_TOP_MEM1 0xC001001A
+#define MSR_K8_TOP_MEM2 0xC001001D
+#define MSR_K8_SYSCFG 0xC0010010
+#define MSR_K8_HWCR 0xC0010015
+#define MSR_K8_ENABLE_C1E 0xc0010055
+#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
+#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
+#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */
+
+/* K7 MSRs */
#define MSR_K7_EVNTSEL0 0xc0010000
#define MSR_K7_PERFCTR0 0xc0010004
#define MSR_K7_EVNTSEL1 0xc0010001
@@ -83,20 +95,10 @@
#define MSR_K7_PERFCTR2 0xc0010006
#define MSR_K7_EVNTSEL3 0xc0010003
#define MSR_K7_PERFCTR3 0xc0010007
-#define MSR_K8_TOP_MEM1 0xc001001a
#define MSR_K7_CLK_CTL 0xc001001b
-#define MSR_K8_TOP_MEM2 0xc001001d
-#define MSR_K8_SYSCFG 0xc0010010
-
-#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
-#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
-#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */
-
#define MSR_K7_HWCR 0xc0010015
-#define MSR_K8_HWCR 0xc0010015
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
-#define MSR_K8_ENABLE_C1E 0xc0010055
/* K6 MSRs */
#define MSR_K6_EFER 0xc0000080
--
AMD Saxony, Dresden, Germany
Operating System Research Center
email: robert.richter@amd.com
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch 4/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64
2007-06-15 16:59 ` [patch 4/8] " Robert Richter
@ 2007-06-15 18:52 ` David Rientjes
2007-06-20 18:36 ` Robert Richter
2007-06-19 13:34 ` Stephane Eranian
1 sibling, 1 reply; 19+ messages in thread
From: David Rientjes @ 2007-06-15 18:52 UTC (permalink / raw)
To: Robert Richter; +Cc: Stephane Eranian, Andi Kleen, linux-kernel
On Fri, 15 Jun 2007, Robert Richter wrote:
> Index: linux-2.6.22-rc3/include/asm-i386/msr-index.h
> ===================================================================
> --- linux-2.6.22-rc3.orig/include/asm-i386/msr-index.h
> +++ linux-2.6.22-rc3/include/asm-i386/msr-index.h
> @@ -75,6 +75,18 @@
>
> /* K7/K8 MSRs. Not complete. See the architecture manual for a more
> complete list. */
> +
> +/* K8 MSRs */
> +#define MSR_K8_TOP_MEM1 0xC001001A
> +#define MSR_K8_TOP_MEM2 0xC001001D
> +#define MSR_K8_SYSCFG 0xC0010010
> +#define MSR_K8_HWCR 0xC0010015
> +#define MSR_K8_ENABLE_C1E 0xc0010055
Please don't include mixed cases of hex digits. This entire file has all
hex digits in lowercase type, so please conform to that.
> +#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
> +#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
> +#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */
Masks like K8_MTRR_RDMEM_WRMEM_MASK are prone to bugs when the values they
are testing change and somebody forgets to update the mask. Can you make
K8_MTRR_RDMEM_WRMEM_MASK defined to be the result of another preprocessor
macro expression? Or, even better, get rid of it completely and modify
set_fixed_range()?
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch 4/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64
2007-06-15 18:52 ` David Rientjes
@ 2007-06-20 18:36 ` Robert Richter
0 siblings, 0 replies; 19+ messages in thread
From: Robert Richter @ 2007-06-20 18:36 UTC (permalink / raw)
To: David Rientjes; +Cc: Stephane Eranian, Andi Kleen, linux-kernel
David,
> Please don't include mixed cases of hex digits. This entire file has all
> hex digits in lowercase type, so please conform to that.
I fixed this in the 2nd version of the patch.
> > +#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
> > +#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
> > +#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */
>
> Masks like K8_MTRR_RDMEM_WRMEM_MASK are prone to bugs when the values they
> are testing change and somebody forgets to update the mask. Can you make
> K8_MTRR_RDMEM_WRMEM_MASK defined to be the result of another preprocessor
> macro expression? Or, even better, get rid of it completely and modify
> set_fixed_range()?
This is existing code, won't change that.
Thanks,
-Robert
--
AMD Saxony, Dresden, Germany
Operating System Research Center
email: robert.richter@amd.com
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch 4/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64
2007-06-15 16:59 ` [patch 4/8] " Robert Richter
2007-06-15 18:52 ` David Rientjes
@ 2007-06-19 13:34 ` Stephane Eranian
1 sibling, 0 replies; 19+ messages in thread
From: Stephane Eranian @ 2007-06-19 13:34 UTC (permalink / raw)
To: Robert Richter; +Cc: Andi Kleen, linux-kernel
Robert,
This patch is not specific to perfmon. It modifies the general
infrastructure. As such it could be submitted indepdently.
I will include it in my "infrastructure" patch file for now.
On Fri, Jun 15, 2007 at 06:59:28PM +0200, Robert Richter wrote:
> This patch rearranges AMD64 MSR definitions.
>
> Signed-off-by: Robert Richter <robert.richter@amd.com>
>
> Index: linux-2.6.22-rc3/include/asm-i386/msr-index.h
> ===================================================================
> --- linux-2.6.22-rc3.orig/include/asm-i386/msr-index.h
> +++ linux-2.6.22-rc3/include/asm-i386/msr-index.h
> @@ -75,6 +75,18 @@
>
> /* K7/K8 MSRs. Not complete. See the architecture manual for a more
> complete list. */
> +
> +/* K8 MSRs */
> +#define MSR_K8_TOP_MEM1 0xC001001A
> +#define MSR_K8_TOP_MEM2 0xC001001D
> +#define MSR_K8_SYSCFG 0xC0010010
> +#define MSR_K8_HWCR 0xC0010015
> +#define MSR_K8_ENABLE_C1E 0xc0010055
> +#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
> +#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
> +#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */
> +
> +/* K7 MSRs */
> #define MSR_K7_EVNTSEL0 0xc0010000
> #define MSR_K7_PERFCTR0 0xc0010004
> #define MSR_K7_EVNTSEL1 0xc0010001
> @@ -83,20 +95,10 @@
> #define MSR_K7_PERFCTR2 0xc0010006
> #define MSR_K7_EVNTSEL3 0xc0010003
> #define MSR_K7_PERFCTR3 0xc0010007
> -#define MSR_K8_TOP_MEM1 0xc001001a
> #define MSR_K7_CLK_CTL 0xc001001b
> -#define MSR_K8_TOP_MEM2 0xc001001d
> -#define MSR_K8_SYSCFG 0xc0010010
> -
> -#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
> -#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
> -#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */
> -
> #define MSR_K7_HWCR 0xc0010015
> -#define MSR_K8_HWCR 0xc0010015
> #define MSR_K7_FID_VID_CTL 0xc0010041
> #define MSR_K7_FID_VID_STATUS 0xc0010042
> -#define MSR_K8_ENABLE_C1E 0xc0010055
>
> /* K6 MSRs */
> #define MSR_K6_EFER 0xc0000080
>
> --
> AMD Saxony, Dresden, Germany
> Operating System Research Center
> email: robert.richter@amd.com
>
>
>
--
-Stephane
^ permalink raw reply [flat|nested] 19+ messages in thread
* [patch 5/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64
2007-06-15 16:56 [patch 0/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64 Robert Richter
` (3 preceding siblings ...)
2007-06-15 16:59 ` [patch 4/8] " Robert Richter
@ 2007-06-15 17:00 ` Robert Richter
2007-06-15 17:00 ` [patch 6/8] " Robert Richter
` (2 subsequent siblings)
7 siblings, 0 replies; 19+ messages in thread
From: Robert Richter @ 2007-06-15 17:00 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Andi Kleen, linux-kernel
This patch implements an initial AMD64 PMU configuration.
Signed-off-by: Robert Richter <robert.richter@amd.com>
Index: linux-2.6.22-rc3/arch/i386/perfmon/perfmon.c
===================================================================
--- linux-2.6.22-rc3.orig/arch/i386/perfmon/perfmon.c
+++ linux-2.6.22-rc3/arch/i386/perfmon/perfmon.c
@@ -325,6 +325,12 @@ static int pfm_stop_save_p6(struct pfm_c
return 0;
}
+static int pfm_stop_save_amd64(struct pfm_context *ctx,
+ struct pfm_event_set *set)
+{
+ return pfm_stop_save_p6(ctx, set);
+}
+
static int pfm_stop_save_core(struct pfm_context *ctx,
struct pfm_event_set *set)
{
@@ -843,6 +849,11 @@ static int __kprobes pfm_has_ovfl_p6(voi
return 0;
}
+static int __kprobes pfm_has_ovfl_amd64(void)
+{
+ return pfm_has_ovfl_p6();
+}
+
/*
* detect is counters have overflowed.
* return:
@@ -998,6 +1009,10 @@ int pfm_arch_pmu_config_init(struct _pfm
pfm_stop_save = pfm_stop_save_core;
pfm_has_ovfl = pfm_has_ovfl_core;
break;
+ case PFM_X86_PMU_AMD64:
+ pfm_stop_save = pfm_stop_save_amd64;
+ pfm_has_ovfl = pfm_has_ovfl_amd64;
+ break;
default:
PFM_INFO("unknown pmu_style=%d", arch_info->pmu_style);
return -EINVAL;
Index: linux-2.6.22-rc3/include/asm-i386/msr-index.h
===================================================================
--- linux-2.6.22-rc3.orig/include/asm-i386/msr-index.h
+++ linux-2.6.22-rc3/include/asm-i386/msr-index.h
@@ -73,7 +73,7 @@
#define MSR_P6_EVNTSEL0 0x00000186
#define MSR_P6_EVNTSEL1 0x00000187
-/* K7/K8 MSRs. Not complete. See the architecture manual for a more
+/* AMD64 MSRs. Not complete. See the architecture manual for a more
complete list. */
/* K8 MSRs */
Index: linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_k8.c
===================================================================
--- linux-2.6.22-rc3.orig/arch/x86_64/perfmon/perfmon_k8.c
+++ linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_k8.c
@@ -25,7 +25,7 @@
#include <asm/nmi.h>
MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
-MODULE_DESCRIPTION("Athlon/Opteron 64 (K8) PMU description table");
+MODULE_DESCRIPTION("AMD64 PMU description table");
MODULE_LICENSE("GPL");
static int force_nmi;
@@ -45,7 +45,7 @@ static struct pfm_arch_pmu_info pfm_k8_p
/* pmd2 */ {{MSR_K7_PERFCTR2, 0}, 0, PFM_REGT_CTR},
/* pmd3 */ {{MSR_K7_PERFCTR3, 0}, 0, PFM_REGT_CTR},
},
- .pmu_style = PFM_X86_PMU_P6
+ .pmu_style = PFM_X86_PMU_AMD64
};
/*
@@ -331,11 +331,13 @@ static int pfm_k8_probe_pmu(void)
if (current_cpu_data.x86_max_cores > 1)
pfm_k8_setup_nb_event_control();
+ PFM_INFO("Using AMD64 PMU");
+
return 0;
}
-static struct pfm_pmu_config pfm_k8_pmu_conf={
- .pmu_name = "AMD K8",
+static struct pfm_pmu_config pfm_k8_pmu_conf = {
+ .pmu_name = "AMD64",
.counter_width = 47,
.pmd_desc = pfm_k8_pmd_desc,
.pmc_desc = pfm_k8_pmc_desc,
Index: linux-2.6.22-rc3/include/asm-i386/perfmon.h
===================================================================
--- linux-2.6.22-rc3.orig/include/asm-i386/perfmon.h
+++ linux-2.6.22-rc3/include/asm-i386/perfmon.h
@@ -98,16 +98,17 @@ struct pfm_arch_pmu_info {
/*
* X86 PMU style
*/
-#define PFM_X86_PMU_P4 1 /* Intel P4/Xeon/EM64T processor PMU */
-#define PFM_X86_PMU_P6 2 /* Intel P6/Pentium M, AMD X86-64 processor PMU */
-#define PFM_X86_PMU_CORE 3 /* Intel Core PMU */
+#define PFM_X86_PMU_P4 1 /* Intel P4/Xeon/EM64T processor PMU */
+#define PFM_X86_PMU_P6 2 /* Intel P6/Pentium M */
+#define PFM_X86_PMU_CORE 3 /* Intel Core PMU */
+#define PFM_X86_PMU_AMD64 4 /* AMD64 PMU (K8, family 10h) */
/*
* PMU feature flags
*/
-#define PFM_X86_FL_PMU_DS 0x1 /* Intel: support for Data Save Area (DS) */
-#define PFM_X86_FL_PMU_PEBS 0x2 /* Intel: support PEBS (implies DS) */
-#define PFM_X86_FL_USE_NMI 0x4 /* must use NMI interrupt */
+#define PFM_X86_FL_PMU_DS 0x01 /* Intel: support for Data Save Area (DS) */
+#define PFM_X86_FL_PMU_PEBS 0x02 /* Intel: support PEBS (implies DS) */
+#define PFM_X86_FL_USE_NMI 0x04 /* must use NMI interrupt */
void __pfm_read_reg_p4(const struct pfm_arch_ext_reg *xreg, u64 *val);
void __pfm_write_reg_p4(const struct pfm_arch_ext_reg *xreg, u64 val);
--
AMD Saxony, Dresden, Germany
Operating System Research Center
email: robert.richter@amd.com
^ permalink raw reply [flat|nested] 19+ messages in thread
* [patch 6/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64
2007-06-15 16:56 [patch 0/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64 Robert Richter
` (4 preceding siblings ...)
2007-06-15 17:00 ` [patch 5/8] " Robert Richter
@ 2007-06-15 17:00 ` Robert Richter
2007-06-15 20:15 ` David Rientjes
2007-06-15 17:01 ` [patch 7/8] " Robert Richter
2007-06-15 17:02 ` [patch 8/8] " Robert Richter
7 siblings, 1 reply; 19+ messages in thread
From: Robert Richter @ 2007-06-15 17:00 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Andi Kleen, linux-kernel
This patch implements Instruction Based Sampling (IBS).
Signed-off-by: Robert Richter <robert.richter@amd.com>
Index: linux-2.6.22-rc3/arch/i386/perfmon/perfmon.c
===================================================================
--- linux-2.6.22-rc3.orig/arch/i386/perfmon/perfmon.c
+++ linux-2.6.22-rc3/arch/i386/perfmon/perfmon.c
@@ -5,6 +5,9 @@
* Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
* Contributed by Stephane Eranian <eranian@hpl.hp.com>
*
+ * Copyright (c) 2007 Advanced Micro Devices, Inc.
+ * Contributed by Robert Richter <robert.richter@amd.com>
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
@@ -66,6 +69,49 @@ static inline int get_smt_id(void)
#endif
}
+static inline u64 __pfm_pmd_sread(unsigned int vpmd)
+{
+ struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info;
+ return arch_info->vpmds[get_smt_id()][vpmd];
+}
+
+static inline void __pfm_pmd_swrite(unsigned int vpmd, u64 val)
+{
+ struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info;
+ arch_info->vpmds[get_smt_id()][vpmd] = val;
+}
+
+/* Context must be locked for atomic read and write operations */
+
+u64 pfm_pmd_sread(struct pfm_context *ctx, unsigned int cnum)
+{
+ unsigned int vpmd = pfm_pmu_conf->pmd_desc[cnum].hw_addr;
+ BUG_ON(! spin_is_locked(&ctx->lock));
+ if (vpmd >= PFM_NUM_VPMDS)
+ return 0;
+ return __pfm_pmd_sread(vpmd);
+}
+
+void pfm_pmd_swrite(struct pfm_context *ctx, unsigned int cnum, u64 val)
+{
+ unsigned int vpmd = pfm_pmu_conf->pmd_desc[cnum].hw_addr;
+ BUG_ON(! spin_is_locked(&ctx->lock));
+ if (vpmd >= PFM_NUM_VPMDS)
+ return;
+ __pfm_pmd_swrite(vpmd, val);
+}
+
+static void pfm_pmd_sinc(struct pfm_context *ctx, unsigned int cnum)
+{
+ unsigned int vpmd = pfm_pmu_conf->pmd_desc[cnum].hw_addr;
+ u64 val = 0;
+ BUG_ON(! spin_is_locked(&ctx->lock));
+ if (vpmd >= PFM_NUM_VPMDS)
+ return;
+ val = __pfm_pmd_sread(vpmd);
+ __pfm_pmd_swrite(vpmd, val + 1);
+}
+
void __pfm_write_reg_p4(const struct pfm_arch_ext_reg *xreg, u64 val)
{
u64 pmi;
@@ -309,7 +355,13 @@ static int pfm_stop_save_p6(struct pfm_c
count = set->nused_pmds;
for (i = 0; count; i++) {
if (test_bit(i, ulp(set->used_pmds))) {
- val = pfm_arch_read_pmd(ctx, i);
+ count--;
+ /* Skip for IBS event counter */
+ if (unlikely(i == arch_info->ibsfetchctr_idx))
+ continue;
+ if (unlikely(i == arch_info->ibsopctr_idx))
+ continue;
+ val = pfm_read_pmd(ctx, i);
if (likely(test_bit(i, ulp(cnt_mask)))) {
if (!(val & wmask)) {
__set_bit(i, ulp(set->povfl_pmds));
@@ -318,7 +370,6 @@ static int pfm_stop_save_p6(struct pfm_c
val = (pmds[i] & ~ovfl_mask) | (val & ovfl_mask);
}
pmds[i] = val;
- count--;
}
}
/* 0 means: no need to save PMDs at upper level */
@@ -328,6 +379,55 @@ static int pfm_stop_save_p6(struct pfm_c
static int pfm_stop_save_amd64(struct pfm_context *ctx,
struct pfm_event_set *set)
{
+ struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info;
+ struct pfm_reg_desc *pmc_desc = pfm_pmu_conf->pmc_desc;
+ u64 val = 0;
+
+ if (arch_info->flags & PFM_X86_FL_IBS) {
+ /* Check for IBS events */
+ BUG_ON(!spin_is_locked(&ctx->lock));
+ do {
+ if (unlikely(test_bit(arch_info->ibsfetchctr_idx,
+ ulp(set->povfl_pmds)))) {
+ PFM_DBG("Warning: IBS fetch data already pending");
+ continue;
+ }
+ rdmsrl(pmc_desc[arch_info->ibsfetchctl_idx].hw_addr,
+ val);
+ if (!(val & PFM_AMD64_IBSFETCHVAL))
+ continue;
+ PFM_DBG_ovfl("New IBS fetch data available");
+ __set_bit(arch_info->ibsfetchctr_idx,
+ ulp(set->povfl_pmds));
+ set->npend_ovfls++;
+ /* Increment event counter */
+ pfm_pmd_sinc(ctx, arch_info->ibsfetchctr_idx);
+ /* Update PMD */
+ set->view->set_pmds[arch_info->ibsfetchctr_idx] =
+ pfm_read_pmd(ctx, arch_info->ibsfetchctr_idx);
+ } while (0);
+ do {
+ if (unlikely(test_bit(arch_info->ibsopctr_idx,
+ ulp(set->povfl_pmds)))) {
+ PFM_DBG("Warning: IBS execution data already pending");
+ continue;
+ }
+ rdmsrl(pmc_desc[arch_info->ibsopctl_idx].hw_addr, val);
+ if (!(val & PFM_AMD64_IBSOPVAL))
+ continue;
+ PFM_DBG_ovfl("New IBS execution data available");
+ __set_bit(arch_info->ibsopctr_idx,
+ ulp(set->povfl_pmds));
+ set->npend_ovfls++;
+ /* Increment event counter */
+ pfm_pmd_sinc(ctx, arch_info->ibsopctr_idx);
+ /* Update PMD */
+ set->view->set_pmds[arch_info->ibsopctr_idx] =
+ pfm_read_pmd(ctx, arch_info->ibsopctr_idx);
+ } while (0);
+ }
+
+ /* IbsFetchCtl/IbsOpCtl is cleard in pfm_stop_save_p6() */
return pfm_stop_save_p6(ctx, set);
}
@@ -637,6 +737,7 @@ void pfm_arch_start(struct task_struct *
struct pfm_event_set *set)
{
struct pfm_arch_context *ctx_arch;
+ struct pfm_reg_desc* pmc_desc;
u64 *mask;
u16 i, num;
@@ -673,11 +774,18 @@ void pfm_arch_start(struct task_struct *
*/
num = pfm_pmu_conf->num_pmcs;
mask = pfm_pmu_conf->impl_pmcs;
+ pmc_desc = pfm_pmu_conf->pmc_desc;
for (i = 0; num; i++) {
- if (test_bit(i, ulp(mask))) {
+ if (!test_bit(i, ulp(mask)))
+ continue;
+ num--;
+ if (!test_bit(i, ulp(set->used_pmcs)))
+ /* If the PMC is not used, we initialize with
+ * interrupts disabled. */
+ pfm_arch_write_pmc(ctx, i, set->pmcs[i]
+ & ~pmc_desc[i].no_emul64_msk);
+ else
pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
- num--;
- }
}
/*
@@ -839,18 +947,37 @@ static int __kprobes pfm_has_ovfl_p6(voi
xrd = arch_info->pmd_addrs;
for (i = 0; num; i++) {
- if (test_bit(i, ulp(cnt_mask))) {
- rdmsrl(xrd[i].addrs[0], val);
- if (!(val & wmask))
- return 1;
- num--;
- }
+ if (!test_bit(i, ulp(cnt_mask)))
+ continue;
+ num--;
+ /* Skip for IBS event counter */
+ if (unlikely(i == arch_info->ibsfetchctr_idx))
+ continue;
+ if (unlikely(i == arch_info->ibsopctr_idx))
+ continue;
+ rdmsrl(xrd[i].addrs[0], val);
+ if (!(val & wmask))
+ return 1;
}
return 0;
}
static int __kprobes pfm_has_ovfl_amd64(void)
{
+ struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info;
+ u64 val = 0;
+
+ if (!(arch_info->flags & PFM_X86_FL_IBS))
+ return pfm_has_ovfl_p6();
+
+ /* Check for IBS events */
+ rdmsrl(pfm_pmu_conf->pmc_desc[arch_info->ibsfetchctl_idx].hw_addr, val);
+ if (val & PFM_AMD64_IBSFETCHVAL)
+ return 1;
+ rdmsrl(pfm_pmu_conf->pmc_desc[arch_info->ibsopctl_idx].hw_addr, val);
+ if (val & PFM_AMD64_IBSOPVAL)
+ return 1;
+
return pfm_has_ovfl_p6();
}
@@ -973,11 +1100,18 @@ static struct notifier_block pfm_nmi_nb=
int pfm_arch_pmu_config_init(struct _pfm_pmu_config *cfg)
{
struct pfm_arch_pmu_info *arch_info = cfg->arch_info;
- struct pfm_arch_ext_reg *pc;
+ struct pfm_arch_ext_reg *pc, *pd;
u64 *mask;
unsigned int i, num, ena;
+ unsigned int ibs_check = 0;
+#define IBS_CHECK_FETCHCTL 0x01
+#define IBS_CHECK_FETCHCTR 0x02
+#define IBS_CHECK_OPCTL 0x04
+#define IBS_CHECK_OPCTR 0x08
+#define IBS_CHECK_ALL 0x0f
pc = arch_info->pmc_addrs;
+ pd = arch_info->pmd_addrs;
/*
* ensure that PMU description is able to deal with NMI watchdog using
@@ -1023,18 +1157,86 @@ int pfm_arch_pmu_config_init(struct _pfm
num = cfg->num_pmcs;
mask = cfg->impl_pmcs;
ena = 0;
+ ibs_check = 0;
for (i = 0; num; i++) {
- if (test_bit(i, ulp(mask))) {
- if (pc[i].reg_type & PFM_REGT_EN) {
- __set_bit(i, ulp(arch_info->enable_mask));
- ena++;
- arch_info->max_ena = i + 1;
+ if (!test_bit(i, ulp(mask)))
+ continue;
+ num--;
+ if (!(pc[i].reg_type & PFM_REGT_EN))
+ continue;
+ if (pc[i].reg_type & PFM_REGT_IBS) {
+ if (!(arch_info->flags & PFM_X86_FL_IBS)) {
+ PFM_DBG("Skipping PMD%d, IBS not supported by the PMU", i);
+ continue;
+ }
+ if (arch_info->pmc_addrs[i].addrs[0] == MSR_AMD64_IBSFETCHCTL) {
+ PFM_DBG("IBS fetch control register detected (PMC%d)", i);
+ if (ibs_check & IBS_CHECK_FETCHCTL) {
+ PFM_INFO("Invalid PMU configuration (PMC%d), "
+ "IBS fetch control already configured", i);
+ return -EINVAL;
+ }
+ ibs_check |= IBS_CHECK_FETCHCTL;
+ arch_info->ibsfetchctl_idx = i;
+ }
+ else if (arch_info->pmc_addrs[i].addrs[0] == MSR_AMD64_IBSOPCTL) {
+ PFM_DBG("IBS execution control register detected (PMC%d)", i);
+ if (ibs_check & IBS_CHECK_OPCTL) {
+ PFM_INFO("Invalid PMU configuration (PMC%d), "
+ "IBS execution control already configured", i);
+ return -EINVAL;
+ }
+ ibs_check |= IBS_CHECK_OPCTL;
+ arch_info->ibsopctl_idx = i;
}
- num--;
}
+ __set_bit(i, ulp(arch_info->enable_mask));
+ ena++;
+ arch_info->max_ena = i + 1;
}
PFM_INFO("%u PMCs with enable capability", ena);
+ /* Configure IBS registers */
+ if (arch_info->flags & PFM_X86_FL_IBS) {
+ num = cfg->num_pmds;
+ mask = cfg->impl_pmds;
+ for (i = 0; num; i++) {
+ if (!test_bit(i, ulp(mask)))
+ continue;
+ num--;
+ if (!(pd[i].reg_type & PFM_REGT_IBS))
+ continue;
+ if (cfg->pmd_desc[i].type != PFM_REG_ICV)
+ /* No virtual IBS counter */
+ continue;
+ if (cfg->pmd_desc[i].hw_addr == PFM_VPMD_AMD64_IBSFETCHCTR) {
+ PFM_DBG("IBS fetch counter detected (PMD%d)", i);
+ if (ibs_check & IBS_CHECK_FETCHCTR) {
+ PFM_INFO("Invalid PMU configuration (PMD%d), "
+ "IBS fetch counter already configured", i);
+ return -EINVAL;
+ }
+ ibs_check |= IBS_CHECK_FETCHCTR;
+ arch_info->ibsfetchctr_idx = i;
+ } else if (cfg->pmd_desc[i].hw_addr == PFM_VPMD_AMD64_IBSOPCTR) {
+ PFM_DBG("IBS execution counter detected (PMD%d)", i);
+ if (ibs_check & IBS_CHECK_OPCTR) {
+ PFM_INFO("Invalid PMU configuration (PMD%d), "
+ "IBS execution counter already configured", i);
+ return -EINVAL;
+ }
+ ibs_check |= IBS_CHECK_OPCTR;
+ arch_info->ibsopctr_idx = i;
+ }
+ }
+ if ((ibs_check & IBS_CHECK_ALL) != IBS_CHECK_ALL) {
+ PFM_INFO("Invalid PMU configuration, "
+ "Missing settings: 0x%02x", ibs_check);
+ return -EINVAL;
+ }
+ PFM_DBG("IBS configuration successfully finished");
+ }
+
/*
* determine interrupt type to use
*/
Index: linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_k8.c
===================================================================
--- linux-2.6.22-rc3.orig/arch/x86_64/perfmon/perfmon_k8.c
+++ linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_k8.c
@@ -5,6 +5,9 @@
* Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
* Contributed by Stephane Eranian <eranian@hpl.hp.com>
*
+ * Copyright (c) 2007 Advanced Micro Devices, Inc.
+ * Contributed by Robert Richter <robert.richter@amd.com>
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
@@ -25,6 +28,7 @@
#include <asm/nmi.h>
MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
+MODULE_AUTHOR("Robert Richter <robert.richter@amd.com>");
MODULE_DESCRIPTION("AMD64 PMU description table");
MODULE_LICENSE("GPL");
@@ -38,12 +42,26 @@ static struct pfm_arch_pmu_info pfm_k8_p
/* pmc1 */ {{MSR_K7_EVNTSEL1, 0}, 1, PFM_REGT_EN},
/* pmc2 */ {{MSR_K7_EVNTSEL2, 0}, 2, PFM_REGT_EN},
/* pmc3 */ {{MSR_K7_EVNTSEL3, 0}, 3, PFM_REGT_EN},
+/* pmc4 */ {{MSR_AMD64_IBSFETCHCTL, 0}, 0, PFM_REGT_EN|PFM_REGT_IBS},
+/* pmc5 */ {{MSR_AMD64_IBSOPCTL, 0}, 0, PFM_REGT_EN|PFM_REGT_IBS},
},
.pmd_addrs = {
/* pmd0 */ {{MSR_K7_PERFCTR0, 0}, 0, PFM_REGT_CTR},
/* pmd1 */ {{MSR_K7_PERFCTR1, 0}, 0, PFM_REGT_CTR},
/* pmd2 */ {{MSR_K7_PERFCTR2, 0}, 0, PFM_REGT_CTR},
/* pmd3 */ {{MSR_K7_PERFCTR3, 0}, 0, PFM_REGT_CTR},
+/* pmd4 */ {{0, 0}, 0, PFM_REGT_CTR|PFM_REGT_IBS},
+/* pmd5 */ {{MSR_AMD64_IBSFETCHCTL, 0}, 0, PFM_REGT_IBS},
+/* pmd6 */ {{MSR_AMD64_IBSFETCHLINAD, 0}, 0, PFM_REGT_IBS},
+/* pmd7 */ {{MSR_AMD64_IBSFETCHPHYSAD, 0}, 0, PFM_REGT_IBS},
+/* pmd8 */ {{0, 0}, 0, PFM_REGT_CTR|PFM_REGT_IBS},
+/* pmd9 */ {{MSR_AMD64_IBSOPCTL, 0}, 0, PFM_REGT_IBS},
+/* pmd10 */ {{MSR_AMD64_IBSOPRIP, 0}, 0, PFM_REGT_IBS},
+/* pmd11 */ {{MSR_AMD64_IBSOPDATA, 0}, 0, PFM_REGT_IBS},
+/* pmd12 */ {{MSR_AMD64_IBSOPDATA2, 0}, 0, PFM_REGT_IBS},
+/* pmd13 */ {{MSR_AMD64_IBSOPDATA3, 0}, 0, PFM_REGT_IBS|PFM_REGT_IBS_EXT},
+/* pmd14 */ {{MSR_AMD64_IBSDCLINAD, 0}, 0, PFM_REGT_IBS|PFM_REGT_IBS_EXT},
+/* pmd15 */ {{MSR_AMD64_IBSDCPHYSAD, 0}, 0, PFM_REGT_IBS|PFM_REGT_IBS_EXT},
},
.pmu_style = PFM_X86_PMU_AMD64
};
@@ -65,11 +83,26 @@ static struct pfm_arch_pmu_info pfm_k8_p
| (1ULL<<20) \
| (1ULL<<21))
+/*
+ * We mark readonly bits as reserved and use the PMC for control
+ * operations only. Interrupt enable and clear bits are reserved too.
+ * IBSFETCHCTL is also implemented as PMD, where data can be read
+ * from. Same applies to IBSOPCTR.
+ */
+#define PFM_AMD64_IBSFETCHCTL_VAL PFM_AMD64_IBSFETCHEN
+#define PFM_AMD64_IBSFETCHCTL_NO64 PFM_AMD64_IBSFETCHEN
+#define PFM_AMD64_IBSFETCHCTL_RSVD (~((1ULL<<16)-1))
+#define PFM_AMD64_IBSOPCTL_VAL PFM_AMD64_IBSOPEN
+#define PFM_AMD64_IBSOPCTL_NO64 PFM_AMD64_IBSOPEN
+#define PFM_AMD64_IBSOPCTL_RSVD (~((1ULL<<16)-1))
+
static struct pfm_reg_desc pfm_k8_pmc_desc[]={
/* pmc0 */ PMC_D(PFM_REG_I64, "PERFSEL0", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL0),
/* pmc1 */ PMC_D(PFM_REG_I64, "PERFSEL1", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL1),
/* pmc2 */ PMC_D(PFM_REG_I64, "PERFSEL2", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL2),
/* pmc3 */ PMC_D(PFM_REG_I64, "PERFSEL3", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL3),
+/* pmc4 */ PMC_D(PFM_REG_I, "IBSFETCHCTL", PFM_AMD64_IBSFETCHCTL_VAL, PFM_AMD64_IBSFETCHCTL_RSVD, PFM_AMD64_IBSFETCHCTL_NO64, MSR_AMD64_IBSFETCHCTL),
+/* pmc5 */ PMC_D(PFM_REG_I, "IBSOPCTL", PFM_AMD64_IBSOPCTL_VAL, PFM_AMD64_IBSOPCTL_RSVD, PFM_AMD64_IBSOPCTL_NO64, MSR_AMD64_IBSOPCTL),
};
#define PFM_AMD_NUM_PMCS ARRAY_SIZE(pfm_k8_pmc_desc)
@@ -78,6 +111,18 @@ static struct pfm_reg_desc pfm_k8_pmd_de
/* pmd1 */ PMD_D(PFM_REG_C, "PERFCTR1", MSR_K7_PERFCTR1),
/* pmd2 */ PMD_D(PFM_REG_C, "PERFCTR2", MSR_K7_PERFCTR2),
/* pmd3 */ PMD_D(PFM_REG_C, "PERFCTR3", MSR_K7_PERFCTR3),
+/* pmd4 */ PMD_D(PFM_REG_ICV, "IBSFETCHCTR", PFM_VPMD_AMD64_IBSFETCHCTR),
+/* pmd5 */ PMD_D(PFM_REG_IRO, "IBSFETCHCTL", MSR_AMD64_IBSFETCHCTL),
+/* pmd6 */ PMD_D(PFM_REG_IRO, "IBSFETCHLINAD", MSR_AMD64_IBSFETCHLINAD),
+/* pmd7 */ PMD_D(PFM_REG_IRO, "IBSFETCHPHYSAD", MSR_AMD64_IBSFETCHPHYSAD),
+/* pmd8 */ PMD_D(PFM_REG_ICV, "IBSOPCTR", PFM_VPMD_AMD64_IBSOPCTR),
+/* pmd9 */ PMD_D(PFM_REG_IRO, "IBSOPCTL", MSR_AMD64_IBSOPCTL),
+/* pmd10 */ PMD_D(PFM_REG_IRO, "IBSOPRIP", MSR_AMD64_IBSOPRIP),
+/* pmd11 */ PMD_D(PFM_REG_IRO, "IBSOPDATA", MSR_AMD64_IBSOPDATA),
+/* pmd12 */ PMD_D(PFM_REG_IRO, "IBSOPDATA2", MSR_AMD64_IBSOPDATA2),
+/* pmd13 */ PMD_D(PFM_REG_IRO, "IBSOPDATA3", MSR_AMD64_IBSOPDATA3),
+/* pmd14 */ PMD_D(PFM_REG_IRO, "IBSDCLINAD", MSR_AMD64_IBSDCLINAD),
+/* pmd15 */ PMD_D(PFM_REG_IRO, "IBSDCPHYSAD", MSR_AMD64_IBSDCPHYSAD),
};
#define PFM_AMD_NUM_PMDS ARRAY_SIZE(pfm_k8_pmd_desc)
@@ -284,6 +329,9 @@ static int pfm_k8_detect_nmi(void)
* auto-detect which perfctr/eventsel is used by NMI watchdog
*/
for (i=0; i < PFM_AMD_NUM_PMDS; i++) {
+ /* skip IBS registers */
+ if (pfm_k8_pmu_info.pmc_addrs[i].reg_type & PFM_REGT_IBS)
+ continue;
if (avail_to_resrv_perfctr_nmi(pfm_k8_pmd_desc[i].hw_addr))
continue;
@@ -332,10 +380,75 @@ static int pfm_k8_probe_pmu(void)
pfm_k8_setup_nb_event_control();
PFM_INFO("Using AMD64 PMU");
+ if (pfm_k8_pmu_info.flags & PFM_X86_FL_IBS)
+ PFM_INFO("IBS is supported by processor");
+ if (pfm_k8_pmu_info.flags & PFM_X86_FL_IBS_EXT)
+ PFM_INFO("IBS extended registers are supported by processor");
return 0;
}
+static inline void
+pfm_amd64_check_register(struct pfm_pmu_config *cfg,
+ struct pfm_reg_desc *reg,
+ struct pfm_arch_ext_reg *ext_reg)
+{
+ struct pfm_arch_pmu_info *arch_info = cfg->arch_info;
+
+ if (!(ext_reg->reg_type & PFM_REGT_AMD64))
+ /* No special AMD64 PMU register */
+ return;
+
+ /* Disable register */
+ reg->type &= ~PFM_REG_I;
+
+ switch (ext_reg->reg_type & PFM_REGT_AMD64) {
+ case (PFM_REGT_IBS):
+ /* IBS register */
+ if (!(arch_info->flags & PFM_X86_FL_IBS))
+ return;
+ break;
+ case (PFM_REGT_IBS|PFM_REGT_IBS_EXT):
+ /* IBS extended register */
+ if (!(arch_info->flags & PFM_X86_FL_IBS_EXT))
+ return;
+ break;
+ default:
+ return;
+ }
+
+ /* Enable register */
+ reg->type |= PFM_REG_I;
+}
+
+static void pfm_amd64_setup_pmu(struct pfm_pmu_config *cfg)
+{
+ u16 i;
+ struct pfm_arch_pmu_info *arch_info = cfg->arch_info;
+
+ /* set PMU features depending on CPUID */
+ arch_info->flags &= ~(PFM_X86_FL_IBS|PFM_X86_FL_IBS_EXT);
+ switch (current_cpu_data.x86) {
+ case 15:
+ break;
+ case 16:
+ arch_info->flags |= PFM_X86_FL_IBS;
+ break;
+ default:
+ break;
+ }
+
+ /* Disable unsupported PMC/PMD registers */
+ for (i = 0; i < cfg->num_pmc_entries; i++) {
+ pfm_amd64_check_register(cfg, &cfg->pmc_desc[i],
+ &arch_info->pmc_addrs[i]);
+ }
+ for (i = 0; i < cfg->num_pmd_entries; i++) {
+ pfm_amd64_check_register(cfg, &cfg->pmd_desc[i],
+ &arch_info->pmd_addrs[i]);
+ }
+}
+
static struct pfm_pmu_config pfm_k8_pmu_conf = {
.pmu_name = "AMD64",
.counter_width = 47,
@@ -344,14 +457,17 @@ static struct pfm_pmu_config pfm_k8_pmu_
.num_pmc_entries = PFM_AMD_NUM_PMCS,
.num_pmd_entries = PFM_AMD_NUM_PMDS,
.probe_pmu = pfm_k8_probe_pmu,
- .version = "1.1",
+ .version = "1.2",
.arch_info = &pfm_k8_pmu_info,
.flags = PFM_PMU_BUILTIN_FLAG,
- .owner = THIS_MODULE
+ .owner = THIS_MODULE,
+ .pmd_sread = pfm_pmd_sread,
+ .pmd_swrite = pfm_pmd_swrite,
};
static int __init pfm_k8_pmu_init_module(void)
{
+ pfm_amd64_setup_pmu(&pfm_k8_pmu_conf);
return pfm_pmu_register(&pfm_k8_pmu_conf);
}
Index: linux-2.6.22-rc3/include/asm-i386/msr-index.h
===================================================================
--- linux-2.6.22-rc3.orig/include/asm-i386/msr-index.h
+++ linux-2.6.22-rc3/include/asm-i386/msr-index.h
@@ -76,6 +76,18 @@
/* AMD64 MSRs. Not complete. See the architecture manual for a more
complete list. */
+#define MSR_AMD64_IBSFETCHCTL 0xC0011030
+#define MSR_AMD64_IBSFETCHLINAD 0xC0011031
+#define MSR_AMD64_IBSFETCHPHYSAD 0xC0011032
+#define MSR_AMD64_IBSOPCTL 0xC0011033
+#define MSR_AMD64_IBSOPRIP 0xC0011034
+#define MSR_AMD64_IBSOPDATA 0xC0011035
+#define MSR_AMD64_IBSOPDATA2 0xC0011036
+#define MSR_AMD64_IBSOPDATA3 0xC0011037
+#define MSR_AMD64_IBSDCLINAD 0xC0011038
+#define MSR_AMD64_IBSDCPHYSAD 0xC0011039
+#define MSR_AMD64_IBSCTL 0xC001103A
+
/* K8 MSRs */
#define MSR_K8_TOP_MEM1 0xC001001A
#define MSR_K8_TOP_MEM2 0xC001001D
Index: linux-2.6.22-rc3/include/asm-i386/perfmon.h
===================================================================
--- linux-2.6.22-rc3.orig/include/asm-i386/perfmon.h
+++ linux-2.6.22-rc3/include/asm-i386/perfmon.h
@@ -2,6 +2,9 @@
* Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
* Contributed by Stephane Eranian <eranian@hpl.hp.com>
*
+ * Copyright (c) 2007 Advanced Micro Devices, Inc.
+ * Contributed by Robert Richter <robert.richter@amd.com>
+ *
* This file contains X86 Processor Family specific definitions
* for the perfmon interface. This covers P6, Pentium M, P4/Xeon
* (32-bit and 64-bit, i.e., EM64T) and AMD X86-64.
@@ -29,6 +32,7 @@
#include <asm/desc.h>
#include <asm/apic.h>
+#include <linux/threads.h> /* NR_CPUS */
#ifdef CONFIG_4KSTACKS
#define PFM_ARCH_PMD_STK_ARG 2
@@ -48,6 +52,15 @@
#define PFM_ESCR_RSVD ~0x000000007ffffffcULL
/*
+ * For AMD64
+ */
+/* Familiy 10h */
+#define PFM_AMD64_IBSFETCHEN (1ULL<<48)
+#define PFM_AMD64_IBSFETCHVAL (1ULL<<49)
+#define PFM_AMD64_IBSOPEN (1ULL<<17)
+#define PFM_AMD64_IBSOPVAL (1ULL<<18)
+
+/*
* bitmask for reg_type
*/
#define PFM_REGT_NA 0x0000 /* not available */
@@ -58,6 +71,17 @@
#define PFM_REGT_NOHT 0x0020 /* unavailable with HT */
#define PFM_REGT_CTR 0x0040 /* counter */
#define PFM_REGT_OTH 0x0080 /* other type of register */
+#define PFM_REGT_IBS 0x0100 /* IBS register set */
+#define PFM_REGT_IBS_EXT 0x0200 /* IBS extended register set */
+
+/* AMD64 PMU features */
+#define PFM_REGT_AMD64 (PFM_REGT_IBS|PFM_REGT_IBS_EXT)
+
+/* We count IBS events in virtual PMDs to use implemented sampling
+ * features */
+#define PFM_VPMD_AMD64_IBSFETCHCTR 0
+#define PFM_VPMD_AMD64_IBSOPCTR 1
+#define PFM_NUM_VPMDS 2
/*
* This design and the partitioning of resources for SMT (hyper threads)
@@ -93,6 +117,11 @@ struct pfm_arch_pmu_info {
u16 pebs_ctr_idx; /* index of PEBS IQ_CTR4 counter (for overflow) */
u16 reserved; /* for future use */
u8 pmu_style; /* type of PMU interface (P4, P6) */
+ u64 vpmds[NR_CPUS][PFM_NUM_VPMDS]; /* virt. sw PMDs per cpu */
+ u16 ibsfetchctl_idx; /* IBS: index of IBS fetch control register */
+ u16 ibsfetchctr_idx; /* IBS: index of IBS fetch counter (virtual) */
+ u16 ibsopctl_idx; /* IBS: index of IBS execution control register */
+ u16 ibsopctr_idx; /* IBS: index of IBS execution counter (virtual) */
};
/*
@@ -109,6 +138,12 @@ struct pfm_arch_pmu_info {
#define PFM_X86_FL_PMU_DS 0x01 /* Intel: support for Data Save Area (DS) */
#define PFM_X86_FL_PMU_PEBS 0x02 /* Intel: support PEBS (implies DS) */
#define PFM_X86_FL_USE_NMI 0x04 /* must use NMI interrupt */
+#define PFM_X86_FL_IBS 0x08 /* AMD64: support for IBS */
+#define PFM_X86_FL_IBS_EXT 0x10 /* AMD64: support for IBS extended registers */
+
+/* Virtual PMDs access functions */
+u64 pfm_pmd_sread(struct pfm_context *ctx, unsigned int cnum);
+void pfm_pmd_swrite(struct pfm_context *ctx, unsigned int cnum, u64 val);
void __pfm_read_reg_p4(const struct pfm_arch_ext_reg *xreg, u64 *val);
void __pfm_write_reg_p4(const struct pfm_arch_ext_reg *xreg, u64 val);
@@ -340,6 +375,9 @@ static inline int pfm_arch_context_initi
static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx, unsigned int cnum)
{
u64 val;
+ if (unlikely(pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_V))
+ /* skip virtual counters */
+ return;
val = pfm_arch_read_pmd(ctx, cnum);
pfm_arch_write_pmd(ctx, cnum, val);
}
Index: linux-2.6.22-rc3/include/linux/perfmon_pmu.h
===================================================================
--- linux-2.6.22-rc3.orig/include/linux/perfmon_pmu.h
+++ linux-2.6.22-rc3/include/linux/perfmon_pmu.h
@@ -2,6 +2,9 @@
* Copyright (c) 2006 Hewlett-Packard Development Company, L.P.
* Contributed by Stephane Eranian <eranian@hpl.hp.com>
*
+ * Copyright (c) 2007 Advanced Micro Devices, Inc.
+ * Contributed by Robert Richter <robert.richter@amd.com>
+ *
* Interface for PMU description modules
*
* This program is free software; you can redistribute it and/or
@@ -78,6 +81,8 @@ struct pfm_reg_desc {
#define PFM_REG_W64 (PFM_REG_WC|PFM_REG_NO64|PFM_REG_I)
#define PFM_REG_C (PFM_REG_C64|PFM_REG_I)
#define PFM_REG_I64 (PFM_REG_NO64|PFM_REG_I)
+#define PFM_REG_IRO (PFM_REG_RO|PFM_REG_I)
+#define PFM_REG_ICV (PFM_REG_C64|PFM_REG_I|PFM_REG_V) /* virtual (sw) counter */
typedef int (*pfm_pmc_check_t)(struct pfm_context *ctx,
struct pfm_event_set *set,
Index: linux-2.6.22-rc3/perfmon/perfmon_intr.c
===================================================================
--- linux-2.6.22-rc3.orig/perfmon/perfmon_intr.c
+++ linux-2.6.22-rc3/perfmon/perfmon_intr.c
@@ -18,6 +18,9 @@
* Contributed by Stephane Eranian <eranian@hpl.hp.com>
* David Mosberger-Tang <davidm@hpl.hp.com>
*
+ * Copyright (c) 2007 Advanced Micro Devices, Inc.
+ * Contributed by Robert Richter <robert.richter@amd.com>
+ *
* More information about perfmon available at:
* http://perfmon2.sf.net
*
@@ -163,7 +166,19 @@ static void pfm_overflow_handler(struct
*/
old_val = new_val = pmds[i];
ovfl_thres = set->pmds[i].ovflsw_thres;
- new_val += 1 + ovfl_mask;
+ if (likely(!(pfm_pmu_conf->pmd_desc[i].type & PFM_REG_V)))
+ new_val += 1 + ovfl_mask;
+ else {
+ /* No hardware counter */
+ /*
+ * Since the previous value is unknown, 64 bit
+ * overflows can only detected for zero
+ * values. Thus, increments of more than 1
+ * will not be detected.
+ */
+ if (! new_val)
+ old_val = ~0;
+ }
pmds[i] = new_val;
/*
--
AMD Saxony, Dresden, Germany
Operating System Research Center
email: robert.richter@amd.com
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch 6/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64
2007-06-15 17:00 ` [patch 6/8] " Robert Richter
@ 2007-06-15 20:15 ` David Rientjes
2007-06-20 18:38 ` Robert Richter
0 siblings, 1 reply; 19+ messages in thread
From: David Rientjes @ 2007-06-15 20:15 UTC (permalink / raw)
To: Robert Richter; +Cc: Stephane Eranian, Andi Kleen, linux-kernel
On Fri, 15 Jun 2007, Robert Richter wrote:
> Index: linux-2.6.22-rc3/arch/i386/perfmon/perfmon.c
> ===================================================================
> --- linux-2.6.22-rc3.orig/arch/i386/perfmon/perfmon.c
> +++ linux-2.6.22-rc3/arch/i386/perfmon/perfmon.c
> @@ -5,6 +5,9 @@
> * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
> * Contributed by Stephane Eranian <eranian@hpl.hp.com>
> *
> + * Copyright (c) 2007 Advanced Micro Devices, Inc.
> + * Contributed by Robert Richter <robert.richter@amd.com>
> + *
> * This program is free software; you can redistribute it and/or
> * modify it under the terms of version 2 of the GNU General Public
> * License as published by the Free Software Foundation.
> @@ -66,6 +69,49 @@ static inline int get_smt_id(void)
> #endif
> }
>
> +static inline u64 __pfm_pmd_sread(unsigned int vpmd)
> +{
> + struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info;
> + return arch_info->vpmds[get_smt_id()][vpmd];
> +}
> +
> +static inline void __pfm_pmd_swrite(unsigned int vpmd, u64 val)
> +{
> + struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info;
> + arch_info->vpmds[get_smt_id()][vpmd] = val;
> +}
> +
> +/* Context must be locked for atomic read and write operations */
> +
> +u64 pfm_pmd_sread(struct pfm_context *ctx, unsigned int cnum)
> +{
> + unsigned int vpmd = pfm_pmu_conf->pmd_desc[cnum].hw_addr;
> + BUG_ON(! spin_is_locked(&ctx->lock));
> + if (vpmd >= PFM_NUM_VPMDS)
> + return 0;
> + return __pfm_pmd_sread(vpmd);
> +}
If you're going to do error checking here even though you're already
checking for vpmd >= PFM_NUM_VPMDS in pfm_pmd_sinc(), then please return
an errno such as -EINVAL instead of 0 here. Otherwise a distinct caller
can still set pfm_pmu_conf->arch_info->vpmds[get_smt_id()][vpmd] to 1 if
it calls pfm_pmd_sread() outside of pfm_pmd_sinc().
> +void pfm_pmd_swrite(struct pfm_context *ctx, unsigned int cnum, u64 val)
> +{
> + unsigned int vpmd = pfm_pmu_conf->pmd_desc[cnum].hw_addr;
> + BUG_ON(! spin_is_locked(&ctx->lock));
> + if (vpmd >= PFM_NUM_VPMDS)
> + return;
> + __pfm_pmd_swrite(vpmd, val);
> +}
> +
> +static void pfm_pmd_sinc(struct pfm_context *ctx, unsigned int cnum)
> +{
> + unsigned int vpmd = pfm_pmu_conf->pmd_desc[cnum].hw_addr;
> + u64 val = 0;
> + BUG_ON(! spin_is_locked(&ctx->lock));
> + if (vpmd >= PFM_NUM_VPMDS)
> + return;
> + val = __pfm_pmd_sread(vpmd);
> + __pfm_pmd_swrite(vpmd, val + 1);
> +}
> +
> void __pfm_write_reg_p4(const struct pfm_arch_ext_reg *xreg, u64 val)
> {
> u64 pmi;
> @@ -309,7 +355,13 @@ static int pfm_stop_save_p6(struct pfm_c
> count = set->nused_pmds;
> for (i = 0; count; i++) {
> if (test_bit(i, ulp(set->used_pmds))) {
> - val = pfm_arch_read_pmd(ctx, i);
> + count--;
> + /* Skip for IBS event counter */
> + if (unlikely(i == arch_info->ibsfetchctr_idx))
> + continue;
> + if (unlikely(i == arch_info->ibsopctr_idx))
> + continue;
> + val = pfm_read_pmd(ctx, i);
> if (likely(test_bit(i, ulp(cnt_mask)))) {
> if (!(val & wmask)) {
> __set_bit(i, ulp(set->povfl_pmds));
Why are you moving the count-- ahead in this code block? Isn't it more
appropriate in the for() statement alongside i++ anyway? (There's
multiple instances of this in this patch.)
> @@ -318,7 +370,6 @@ static int pfm_stop_save_p6(struct pfm_c
> val = (pmds[i] & ~ovfl_mask) | (val & ovfl_mask);
> }
> pmds[i] = val;
> - count--;
> }
> }
> /* 0 means: no need to save PMDs at upper level */
> @@ -328,6 +379,55 @@ static int pfm_stop_save_p6(struct pfm_c
> static int pfm_stop_save_amd64(struct pfm_context *ctx,
> struct pfm_event_set *set)
> {
Does this need ctx->lock to be locked even in the !(arch_info->flags &
PFM_X86_FL_IBS) case? If so, it needs a comment.
> + struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info;
> + struct pfm_reg_desc *pmc_desc = pfm_pmu_conf->pmc_desc;
> + u64 val = 0;
> +
> + if (arch_info->flags & PFM_X86_FL_IBS) {
> + /* Check for IBS events */
> + BUG_ON(!spin_is_locked(&ctx->lock));
> + do {
> + if (unlikely(test_bit(arch_info->ibsfetchctr_idx,
> + ulp(set->povfl_pmds)))) {
> + PFM_DBG("Warning: IBS fetch data already pending");
> + continue;
> + }
> + rdmsrl(pmc_desc[arch_info->ibsfetchctl_idx].hw_addr,
> + val);
> + if (!(val & PFM_AMD64_IBSFETCHVAL))
> + continue;
> + PFM_DBG_ovfl("New IBS fetch data available");
> + __set_bit(arch_info->ibsfetchctr_idx,
> + ulp(set->povfl_pmds));
> + set->npend_ovfls++;
> + /* Increment event counter */
> + pfm_pmd_sinc(ctx, arch_info->ibsfetchctr_idx);
> + /* Update PMD */
> + set->view->set_pmds[arch_info->ibsfetchctr_idx] =
> + pfm_read_pmd(ctx, arch_info->ibsfetchctr_idx);
> + } while (0);
Please move this out of the do { ... } while (0); loop and change the
continue's to goto's.
> + do {
> + if (unlikely(test_bit(arch_info->ibsopctr_idx,
> + ulp(set->povfl_pmds)))) {
> + PFM_DBG("Warning: IBS execution data already pending");
> + continue;
> + }
> + rdmsrl(pmc_desc[arch_info->ibsopctl_idx].hw_addr, val);
> + if (!(val & PFM_AMD64_IBSOPVAL))
> + continue;
> + PFM_DBG_ovfl("New IBS execution data available");
> + __set_bit(arch_info->ibsopctr_idx,
> + ulp(set->povfl_pmds));
> + set->npend_ovfls++;
> + /* Increment event counter */
> + pfm_pmd_sinc(ctx, arch_info->ibsopctr_idx);
> + /* Update PMD */
> + set->view->set_pmds[arch_info->ibsopctr_idx] =
> + pfm_read_pmd(ctx, arch_info->ibsopctr_idx);
> + } while (0);
Likewise.
> @@ -637,6 +737,7 @@ void pfm_arch_start(struct task_struct *
> struct pfm_event_set *set)
> {
> struct pfm_arch_context *ctx_arch;
> + struct pfm_reg_desc* pmc_desc;
> u64 *mask;
> u16 i, num;
>
Kernel coding style is struct pfm_reg_desc *pmc_desc here.
> @@ -673,11 +774,18 @@ void pfm_arch_start(struct task_struct *
> */
> num = pfm_pmu_conf->num_pmcs;
> mask = pfm_pmu_conf->impl_pmcs;
> + pmc_desc = pfm_pmu_conf->pmc_desc;
> for (i = 0; num; i++) {
> - if (test_bit(i, ulp(mask))) {
> + if (!test_bit(i, ulp(mask)))
> + continue;
> + num--;
> + if (!test_bit(i, ulp(set->used_pmcs)))
> + /* If the PMC is not used, we initialize with
> + * interrupts disabled. */
> + pfm_arch_write_pmc(ctx, i, set->pmcs[i]
> + & ~pmc_desc[i].no_emul64_msk);
> + else
> pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
> - num--;
> - }
> }
num-- should be placed alongside i++ in the for() statement.
> @@ -839,18 +947,37 @@ static int __kprobes pfm_has_ovfl_p6(voi
> xrd = arch_info->pmd_addrs;
>
> for (i = 0; num; i++) {
> - if (test_bit(i, ulp(cnt_mask))) {
> - rdmsrl(xrd[i].addrs[0], val);
> - if (!(val & wmask))
> - return 1;
> - num--;
> - }
> + if (!test_bit(i, ulp(cnt_mask)))
> + continue;
> + num--;
> + /* Skip for IBS event counter */
> + if (unlikely(i == arch_info->ibsfetchctr_idx))
> + continue;
> + if (unlikely(i == arch_info->ibsopctr_idx))
> + continue;
> + rdmsrl(xrd[i].addrs[0], val);
> + if (!(val & wmask))
> + return 1;
> }
Same as above.
> @@ -973,11 +1100,18 @@ static struct notifier_block pfm_nmi_nb=
> int pfm_arch_pmu_config_init(struct _pfm_pmu_config *cfg)
> {
> struct pfm_arch_pmu_info *arch_info = cfg->arch_info;
> - struct pfm_arch_ext_reg *pc;
> + struct pfm_arch_ext_reg *pc, *pd;
> u64 *mask;
> unsigned int i, num, ena;
> + unsigned int ibs_check = 0;
> +#define IBS_CHECK_FETCHCTL 0x01
> +#define IBS_CHECK_FETCHCTR 0x02
> +#define IBS_CHECK_OPCTL 0x04
> +#define IBS_CHECK_OPCTR 0x08
> +#define IBS_CHECK_ALL 0x0f
>
These cannot occur in the middle of a function. Please move them to an
appropriate header file.
> pc = arch_info->pmc_addrs;
> + pd = arch_info->pmd_addrs;
>
> /*
> * ensure that PMU description is able to deal with NMI watchdog using
> @@ -1023,18 +1157,86 @@ int pfm_arch_pmu_config_init(struct _pfm
> num = cfg->num_pmcs;
> mask = cfg->impl_pmcs;
> ena = 0;
> + ibs_check = 0;
Didn't you initialize this to 0?
> Index: linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_k8.c
> ===================================================================
> --- linux-2.6.22-rc3.orig/arch/x86_64/perfmon/perfmon_k8.c
> +++ linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_k8.c
> @@ -5,6 +5,9 @@
> * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
> * Contributed by Stephane Eranian <eranian@hpl.hp.com>
> *
> + * Copyright (c) 2007 Advanced Micro Devices, Inc.
> + * Contributed by Robert Richter <robert.richter@amd.com>
> + *
> * This program is free software; you can redistribute it and/or
> * modify it under the terms of version 2 of the GNU General Public
> * License as published by the Free Software Foundation.
> @@ -25,6 +28,7 @@
> #include <asm/nmi.h>
>
> MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
> +MODULE_AUTHOR("Robert Richter <robert.richter@amd.com>");
> MODULE_DESCRIPTION("AMD64 PMU description table");
> MODULE_LICENSE("GPL");
>
> @@ -38,12 +42,26 @@ static struct pfm_arch_pmu_info pfm_k8_p
> /* pmc1 */ {{MSR_K7_EVNTSEL1, 0}, 1, PFM_REGT_EN},
> /* pmc2 */ {{MSR_K7_EVNTSEL2, 0}, 2, PFM_REGT_EN},
> /* pmc3 */ {{MSR_K7_EVNTSEL3, 0}, 3, PFM_REGT_EN},
> +/* pmc4 */ {{MSR_AMD64_IBSFETCHCTL, 0}, 0, PFM_REGT_EN|PFM_REGT_IBS},
> +/* pmc5 */ {{MSR_AMD64_IBSOPCTL, 0}, 0, PFM_REGT_EN|PFM_REGT_IBS},
> },
> .pmd_addrs = {
> /* pmd0 */ {{MSR_K7_PERFCTR0, 0}, 0, PFM_REGT_CTR},
> /* pmd1 */ {{MSR_K7_PERFCTR1, 0}, 0, PFM_REGT_CTR},
> /* pmd2 */ {{MSR_K7_PERFCTR2, 0}, 0, PFM_REGT_CTR},
> /* pmd3 */ {{MSR_K7_PERFCTR3, 0}, 0, PFM_REGT_CTR},
> +/* pmd4 */ {{0, 0}, 0, PFM_REGT_CTR|PFM_REGT_IBS},
> +/* pmd5 */ {{MSR_AMD64_IBSFETCHCTL, 0}, 0, PFM_REGT_IBS},
> +/* pmd6 */ {{MSR_AMD64_IBSFETCHLINAD, 0}, 0, PFM_REGT_IBS},
> +/* pmd7 */ {{MSR_AMD64_IBSFETCHPHYSAD, 0}, 0, PFM_REGT_IBS},
> +/* pmd8 */ {{0, 0}, 0, PFM_REGT_CTR|PFM_REGT_IBS},
> +/* pmd9 */ {{MSR_AMD64_IBSOPCTL, 0}, 0, PFM_REGT_IBS},
> +/* pmd10 */ {{MSR_AMD64_IBSOPRIP, 0}, 0, PFM_REGT_IBS},
> +/* pmd11 */ {{MSR_AMD64_IBSOPDATA, 0}, 0, PFM_REGT_IBS},
> +/* pmd12 */ {{MSR_AMD64_IBSOPDATA2, 0}, 0, PFM_REGT_IBS},
> +/* pmd13 */ {{MSR_AMD64_IBSOPDATA3, 0}, 0, PFM_REGT_IBS|PFM_REGT_IBS_EXT},
> +/* pmd14 */ {{MSR_AMD64_IBSDCLINAD, 0}, 0, PFM_REGT_IBS|PFM_REGT_IBS_EXT},
> +/* pmd15 */ {{MSR_AMD64_IBSDCPHYSAD, 0}, 0, PFM_REGT_IBS|PFM_REGT_IBS_EXT},
> },
> .pmu_style = PFM_X86_PMU_AMD64
> };
> @@ -65,11 +83,26 @@ static struct pfm_arch_pmu_info pfm_k8_p
> | (1ULL<<20) \
> | (1ULL<<21))
>
> +/*
> + * We mark readonly bits as reserved and use the PMC for control
> + * operations only. Interrupt enable and clear bits are reserved too.
> + * IBSFETCHCTL is also implemented as PMD, where data can be read
> + * from. Same applies to IBSOPCTR.
> + */
> +#define PFM_AMD64_IBSFETCHCTL_VAL PFM_AMD64_IBSFETCHEN
> +#define PFM_AMD64_IBSFETCHCTL_NO64 PFM_AMD64_IBSFETCHEN
> +#define PFM_AMD64_IBSFETCHCTL_RSVD (~((1ULL<<16)-1))
> +#define PFM_AMD64_IBSOPCTL_VAL PFM_AMD64_IBSOPEN
> +#define PFM_AMD64_IBSOPCTL_NO64 PFM_AMD64_IBSOPEN
> +#define PFM_AMD64_IBSOPCTL_RSVD (~((1ULL<<16)-1))
> +
> static struct pfm_reg_desc pfm_k8_pmc_desc[]={
> /* pmc0 */ PMC_D(PFM_REG_I64, "PERFSEL0", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL0),
> /* pmc1 */ PMC_D(PFM_REG_I64, "PERFSEL1", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL1),
> /* pmc2 */ PMC_D(PFM_REG_I64, "PERFSEL2", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL2),
> /* pmc3 */ PMC_D(PFM_REG_I64, "PERFSEL3", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL3),
> +/* pmc4 */ PMC_D(PFM_REG_I, "IBSFETCHCTL", PFM_AMD64_IBSFETCHCTL_VAL, PFM_AMD64_IBSFETCHCTL_RSVD, PFM_AMD64_IBSFETCHCTL_NO64, MSR_AMD64_IBSFETCHCTL),
> +/* pmc5 */ PMC_D(PFM_REG_I, "IBSOPCTL", PFM_AMD64_IBSOPCTL_VAL, PFM_AMD64_IBSOPCTL_RSVD, PFM_AMD64_IBSOPCTL_NO64, MSR_AMD64_IBSOPCTL),
> };
> #define PFM_AMD_NUM_PMCS ARRAY_SIZE(pfm_k8_pmc_desc)
>
> @@ -78,6 +111,18 @@ static struct pfm_reg_desc pfm_k8_pmd_de
> /* pmd1 */ PMD_D(PFM_REG_C, "PERFCTR1", MSR_K7_PERFCTR1),
> /* pmd2 */ PMD_D(PFM_REG_C, "PERFCTR2", MSR_K7_PERFCTR2),
> /* pmd3 */ PMD_D(PFM_REG_C, "PERFCTR3", MSR_K7_PERFCTR3),
> +/* pmd4 */ PMD_D(PFM_REG_ICV, "IBSFETCHCTR", PFM_VPMD_AMD64_IBSFETCHCTR),
> +/* pmd5 */ PMD_D(PFM_REG_IRO, "IBSFETCHCTL", MSR_AMD64_IBSFETCHCTL),
> +/* pmd6 */ PMD_D(PFM_REG_IRO, "IBSFETCHLINAD", MSR_AMD64_IBSFETCHLINAD),
> +/* pmd7 */ PMD_D(PFM_REG_IRO, "IBSFETCHPHYSAD", MSR_AMD64_IBSFETCHPHYSAD),
> +/* pmd8 */ PMD_D(PFM_REG_ICV, "IBSOPCTR", PFM_VPMD_AMD64_IBSOPCTR),
> +/* pmd9 */ PMD_D(PFM_REG_IRO, "IBSOPCTL", MSR_AMD64_IBSOPCTL),
> +/* pmd10 */ PMD_D(PFM_REG_IRO, "IBSOPRIP", MSR_AMD64_IBSOPRIP),
> +/* pmd11 */ PMD_D(PFM_REG_IRO, "IBSOPDATA", MSR_AMD64_IBSOPDATA),
> +/* pmd12 */ PMD_D(PFM_REG_IRO, "IBSOPDATA2", MSR_AMD64_IBSOPDATA2),
> +/* pmd13 */ PMD_D(PFM_REG_IRO, "IBSOPDATA3", MSR_AMD64_IBSOPDATA3),
> +/* pmd14 */ PMD_D(PFM_REG_IRO, "IBSDCLINAD", MSR_AMD64_IBSDCLINAD),
> +/* pmd15 */ PMD_D(PFM_REG_IRO, "IBSDCPHYSAD", MSR_AMD64_IBSDCPHYSAD),
> };
> #define PFM_AMD_NUM_PMDS ARRAY_SIZE(pfm_k8_pmd_desc)
>
> @@ -284,6 +329,9 @@ static int pfm_k8_detect_nmi(void)
> * auto-detect which perfctr/eventsel is used by NMI watchdog
> */
> for (i=0; i < PFM_AMD_NUM_PMDS; i++) {
> + /* skip IBS registers */
> + if (pfm_k8_pmu_info.pmc_addrs[i].reg_type & PFM_REGT_IBS)
> + continue;
> if (avail_to_resrv_perfctr_nmi(pfm_k8_pmd_desc[i].hw_addr))
> continue;
>
> @@ -332,10 +380,75 @@ static int pfm_k8_probe_pmu(void)
> pfm_k8_setup_nb_event_control();
>
> PFM_INFO("Using AMD64 PMU");
> + if (pfm_k8_pmu_info.flags & PFM_X86_FL_IBS)
> + PFM_INFO("IBS is supported by processor");
> + if (pfm_k8_pmu_info.flags & PFM_X86_FL_IBS_EXT)
> + PFM_INFO("IBS extended registers are supported by processor");
>
> return 0;
> }
>
> +static inline void
> +pfm_amd64_check_register(struct pfm_pmu_config *cfg,
> + struct pfm_reg_desc *reg,
> + struct pfm_arch_ext_reg *ext_reg)
> +{
> + struct pfm_arch_pmu_info *arch_info = cfg->arch_info;
> +
> + if (!(ext_reg->reg_type & PFM_REGT_AMD64))
> + /* No special AMD64 PMU register */
> + return;
> +
> + /* Disable register */
> + reg->type &= ~PFM_REG_I;
> +
> + switch (ext_reg->reg_type & PFM_REGT_AMD64) {
> + case (PFM_REGT_IBS):
> + /* IBS register */
> + if (!(arch_info->flags & PFM_X86_FL_IBS))
> + return;
> + break;
> + case (PFM_REGT_IBS|PFM_REGT_IBS_EXT):
> + /* IBS extended register */
> + if (!(arch_info->flags & PFM_X86_FL_IBS_EXT))
> + return;
> + break;
> + default:
> + return;
> + }
> +
> + /* Enable register */
> + reg->type |= PFM_REG_I;
> +}
> +
> +static void pfm_amd64_setup_pmu(struct pfm_pmu_config *cfg)
> +{
> + u16 i;
> + struct pfm_arch_pmu_info *arch_info = cfg->arch_info;
> +
> + /* set PMU features depending on CPUID */
> + arch_info->flags &= ~(PFM_X86_FL_IBS|PFM_X86_FL_IBS_EXT);
> + switch (current_cpu_data.x86) {
> + case 15:
> + break;
> + case 16:
> + arch_info->flags |= PFM_X86_FL_IBS;
> + break;
> + default:
> + break;
> + }
Could you just collapse this into
if (current_cpu_data.x86 == 16)
arch_info->flags |= PFM_X86_FL_IBS;
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch 6/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64
2007-06-15 20:15 ` David Rientjes
@ 2007-06-20 18:38 ` Robert Richter
0 siblings, 0 replies; 19+ messages in thread
From: Robert Richter @ 2007-06-20 18:38 UTC (permalink / raw)
To: David Rientjes; +Cc: Stephane Eranian, Andi Kleen, linux-kernel
David,
> > +u64 pfm_pmd_sread(struct pfm_context *ctx, unsigned int cnum)
> > +{
> > + unsigned int vpmd = pfm_pmu_conf->pmd_desc[cnum].hw_addr;
> > + BUG_ON(! spin_is_locked(&ctx->lock));
> > + if (vpmd >= PFM_NUM_VPMDS)
> > + return 0;
> > + return __pfm_pmd_sread(vpmd);
> > +}
>
> If you're going to do error checking here even though you're already
> checking for vpmd >= PFM_NUM_VPMDS in pfm_pmd_sinc(), then please return
> an errno such as -EINVAL instead of 0 here. Otherwise a distinct caller
> can still set pfm_pmu_conf->arch_info->vpmds[get_smt_id()][vpmd] to 1 if
> it calls pfm_pmd_sread() outside of pfm_pmd_sinc().
I modified the error handler, pls. see 2nd patch version.
> > @@ -309,7 +355,13 @@ static int pfm_stop_save_p6(struct pfm_c
> > count = set->nused_pmds;
> > for (i = 0; count; i++) {
> > if (test_bit(i, ulp(set->used_pmds))) {
> > - val = pfm_arch_read_pmd(ctx, i);
> > + count--;
> > + /* Skip for IBS event counter */
> > + if (unlikely(i == arch_info->ibsfetchctr_idx))
> > + continue;
> > + if (unlikely(i == arch_info->ibsopctr_idx))
> > + continue;
> > + val = pfm_read_pmd(ctx, i);
> > if (likely(test_bit(i, ulp(cnt_mask)))) {
> > if (!(val & wmask)) {
> > __set_bit(i, ulp(set->povfl_pmds));
>
> Why are you moving the count-- ahead in this code block? Isn't it more
> appropriate in the for() statement alongside i++ anyway? (There's
> multiple instances of this in this patch.)
'count' may only be incremented if the PMD is used. The loop finishs
after all used PMDs are served.
>
> > @@ -318,7 +370,6 @@ static int pfm_stop_save_p6(struct pfm_c
> > val = (pmds[i] & ~ovfl_mask) | (val & ovfl_mask);
> > }
> > pmds[i] = val;
> > - count--;
> > }
> > }
> > /* 0 means: no need to save PMDs at upper level */
> > @@ -328,6 +379,55 @@ static int pfm_stop_save_p6(struct pfm_c
> > static int pfm_stop_save_amd64(struct pfm_context *ctx,
> > struct pfm_event_set *set)
> > {
>
> Does this need ctx->lock to be locked even in the !(arch_info->flags &
> PFM_X86_FL_IBS) case? If so, it needs a comment.
It seems this function is only called if the lock is set, but im not
sure, if the non-IBS code requires the lock. The IBS code requires the
lock, thus I added BUG_ON().
>
> > + struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info;
> > + struct pfm_reg_desc *pmc_desc = pfm_pmu_conf->pmc_desc;
> > + u64 val = 0;
> > +
> > + if (arch_info->flags & PFM_X86_FL_IBS) {
> > + /* Check for IBS events */
> > + BUG_ON(!spin_is_locked(&ctx->lock));
> > + do {
> > + if (unlikely(test_bit(arch_info->ibsfetchctr_idx,
> > + ulp(set->povfl_pmds)))) {
> > + PFM_DBG("Warning: IBS fetch data already pending");
> > + continue;
> > + }
> > + rdmsrl(pmc_desc[arch_info->ibsfetchctl_idx].hw_addr,
> > + val);
> > + if (!(val & PFM_AMD64_IBSFETCHVAL))
> > + continue;
> > + PFM_DBG_ovfl("New IBS fetch data available");
> > + __set_bit(arch_info->ibsfetchctr_idx,
> > + ulp(set->povfl_pmds));
> > + set->npend_ovfls++;
> > + /* Increment event counter */
> > + pfm_pmd_sinc(ctx, arch_info->ibsfetchctr_idx);
> > + /* Update PMD */
> > + set->view->set_pmds[arch_info->ibsfetchctr_idx] =
> > + pfm_read_pmd(ctx, arch_info->ibsfetchctr_idx);
> > + } while (0);
>
> Please move this out of the do { ... } while (0); loop and change the
> continue's to goto's.
Reworked in patch version 2.
>
> > + do {
> > + if (unlikely(test_bit(arch_info->ibsopctr_idx,
> > + ulp(set->povfl_pmds)))) {
> > + PFM_DBG("Warning: IBS execution data already pending");
> > + continue;
> > + }
> > + rdmsrl(pmc_desc[arch_info->ibsopctl_idx].hw_addr, val);
> > + if (!(val & PFM_AMD64_IBSOPVAL))
> > + continue;
> > + PFM_DBG_ovfl("New IBS execution data available");
> > + __set_bit(arch_info->ibsopctr_idx,
> > + ulp(set->povfl_pmds));
> > + set->npend_ovfls++;
> > + /* Increment event counter */
> > + pfm_pmd_sinc(ctx, arch_info->ibsopctr_idx);
> > + /* Update PMD */
> > + set->view->set_pmds[arch_info->ibsopctr_idx] =
> > + pfm_read_pmd(ctx, arch_info->ibsopctr_idx);
> > + } while (0);
>
> Likewise.
>
> > @@ -637,6 +737,7 @@ void pfm_arch_start(struct task_struct *
> > struct pfm_event_set *set)
> > {
> > struct pfm_arch_context *ctx_arch;
> > + struct pfm_reg_desc* pmc_desc;
> > u64 *mask;
> > u16 i, num;
> >
>
> Kernel coding style is struct pfm_reg_desc *pmc_desc here.
Changed.
>
> > @@ -673,11 +774,18 @@ void pfm_arch_start(struct task_struct *
> > */
> > num = pfm_pmu_conf->num_pmcs;
> > mask = pfm_pmu_conf->impl_pmcs;
> > + pmc_desc = pfm_pmu_conf->pmc_desc;
> > for (i = 0; num; i++) {
> > - if (test_bit(i, ulp(mask))) {
> > + if (!test_bit(i, ulp(mask)))
> > + continue;
> > + num--;
> > + if (!test_bit(i, ulp(set->used_pmcs)))
> > + /* If the PMC is not used, we initialize with
> > + * interrupts disabled. */
> > + pfm_arch_write_pmc(ctx, i, set->pmcs[i]
> > + & ~pmc_desc[i].no_emul64_msk);
> > + else
> > pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
> > - num--;
> > - }
> > }
>
> num-- should be placed alongside i++ in the for() statement.
Same as above. See 'count' discussion of pfm_stop_save_p6() diff.
>
> > @@ -839,18 +947,37 @@ static int __kprobes pfm_has_ovfl_p6(voi
> > xrd = arch_info->pmd_addrs;
> >
> > for (i = 0; num; i++) {
> > - if (test_bit(i, ulp(cnt_mask))) {
> > - rdmsrl(xrd[i].addrs[0], val);
> > - if (!(val & wmask))
> > - return 1;
> > - num--;
> > - }
> > + if (!test_bit(i, ulp(cnt_mask)))
> > + continue;
> > + num--;
> > + /* Skip for IBS event counter */
> > + if (unlikely(i == arch_info->ibsfetchctr_idx))
> > + continue;
> > + if (unlikely(i == arch_info->ibsopctr_idx))
> > + continue;
> > + rdmsrl(xrd[i].addrs[0], val);
> > + if (!(val & wmask))
> > + return 1;
> > }
>
> Same as above.
>
> > @@ -973,11 +1100,18 @@ static struct notifier_block pfm_nmi_nb=
> > int pfm_arch_pmu_config_init(struct _pfm_pmu_config *cfg)
> > {
> > struct pfm_arch_pmu_info *arch_info = cfg->arch_info;
> > - struct pfm_arch_ext_reg *pc;
> > + struct pfm_arch_ext_reg *pc, *pd;
> > u64 *mask;
> > unsigned int i, num, ena;
> > + unsigned int ibs_check = 0;
> > +#define IBS_CHECK_FETCHCTL 0x01
> > +#define IBS_CHECK_FETCHCTR 0x02
> > +#define IBS_CHECK_OPCTL 0x04
> > +#define IBS_CHECK_OPCTR 0x08
> > +#define IBS_CHECK_ALL 0x0f
> >
>
> These cannot occur in the middle of a function. Please move them to an
> appropriate header file.
Changed.
>
> > pc = arch_info->pmc_addrs;
> > + pd = arch_info->pmd_addrs;
> >
> > /*
> > * ensure that PMU description is able to deal with NMI watchdog using
> > @@ -1023,18 +1157,86 @@ int pfm_arch_pmu_config_init(struct _pfm
> > num = cfg->num_pmcs;
> > mask = cfg->impl_pmcs;
> > ena = 0;
> > + ibs_check = 0;
>
> Didn't you initialize this to 0?
Changed.
>
> > Index: linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_k8.c
> > ===================================================================
> > --- linux-2.6.22-rc3.orig/arch/x86_64/perfmon/perfmon_k8.c
> > +++ linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_k8.c
> > @@ -5,6 +5,9 @@
> > * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
> > * Contributed by Stephane Eranian <eranian@hpl.hp.com>
> > *
> > + * Copyright (c) 2007 Advanced Micro Devices, Inc.
> > + * Contributed by Robert Richter <robert.richter@amd.com>
> > + *
> > * This program is free software; you can redistribute it and/or
> > * modify it under the terms of version 2 of the GNU General Public
> > * License as published by the Free Software Foundation.
> > @@ -25,6 +28,7 @@
> > #include <asm/nmi.h>
> >
> > MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
> > +MODULE_AUTHOR("Robert Richter <robert.richter@amd.com>");
> > MODULE_DESCRIPTION("AMD64 PMU description table");
> > MODULE_LICENSE("GPL");
> >
> > @@ -38,12 +42,26 @@ static struct pfm_arch_pmu_info pfm_k8_p
> > /* pmc1 */ {{MSR_K7_EVNTSEL1, 0}, 1, PFM_REGT_EN},
> > /* pmc2 */ {{MSR_K7_EVNTSEL2, 0}, 2, PFM_REGT_EN},
> > /* pmc3 */ {{MSR_K7_EVNTSEL3, 0}, 3, PFM_REGT_EN},
> > +/* pmc4 */ {{MSR_AMD64_IBSFETCHCTL, 0}, 0, PFM_REGT_EN|PFM_REGT_IBS},
> > +/* pmc5 */ {{MSR_AMD64_IBSOPCTL, 0}, 0, PFM_REGT_EN|PFM_REGT_IBS},
> > },
> > .pmd_addrs = {
> > /* pmd0 */ {{MSR_K7_PERFCTR0, 0}, 0, PFM_REGT_CTR},
> > /* pmd1 */ {{MSR_K7_PERFCTR1, 0}, 0, PFM_REGT_CTR},
> > /* pmd2 */ {{MSR_K7_PERFCTR2, 0}, 0, PFM_REGT_CTR},
> > /* pmd3 */ {{MSR_K7_PERFCTR3, 0}, 0, PFM_REGT_CTR},
> > +/* pmd4 */ {{0, 0}, 0, PFM_REGT_CTR|PFM_REGT_IBS},
> > +/* pmd5 */ {{MSR_AMD64_IBSFETCHCTL, 0}, 0, PFM_REGT_IBS},
> > +/* pmd6 */ {{MSR_AMD64_IBSFETCHLINAD, 0}, 0, PFM_REGT_IBS},
> > +/* pmd7 */ {{MSR_AMD64_IBSFETCHPHYSAD, 0}, 0, PFM_REGT_IBS},
> > +/* pmd8 */ {{0, 0}, 0, PFM_REGT_CTR|PFM_REGT_IBS},
> > +/* pmd9 */ {{MSR_AMD64_IBSOPCTL, 0}, 0, PFM_REGT_IBS},
> > +/* pmd10 */ {{MSR_AMD64_IBSOPRIP, 0}, 0, PFM_REGT_IBS},
> > +/* pmd11 */ {{MSR_AMD64_IBSOPDATA, 0}, 0, PFM_REGT_IBS},
> > +/* pmd12 */ {{MSR_AMD64_IBSOPDATA2, 0}, 0, PFM_REGT_IBS},
> > +/* pmd13 */ {{MSR_AMD64_IBSOPDATA3, 0}, 0, PFM_REGT_IBS|PFM_REGT_IBS_EXT},
> > +/* pmd14 */ {{MSR_AMD64_IBSDCLINAD, 0}, 0, PFM_REGT_IBS|PFM_REGT_IBS_EXT},
> > +/* pmd15 */ {{MSR_AMD64_IBSDCPHYSAD, 0}, 0, PFM_REGT_IBS|PFM_REGT_IBS_EXT},
> > },
> > .pmu_style = PFM_X86_PMU_AMD64
> > };
> > @@ -65,11 +83,26 @@ static struct pfm_arch_pmu_info pfm_k8_p
> > | (1ULL<<20) \
> > | (1ULL<<21))
> >
> > +/*
> > + * We mark readonly bits as reserved and use the PMC for control
> > + * operations only. Interrupt enable and clear bits are reserved too.
> > + * IBSFETCHCTL is also implemented as PMD, where data can be read
> > + * from. Same applies to IBSOPCTR.
> > + */
> > +#define PFM_AMD64_IBSFETCHCTL_VAL PFM_AMD64_IBSFETCHEN
> > +#define PFM_AMD64_IBSFETCHCTL_NO64 PFM_AMD64_IBSFETCHEN
> > +#define PFM_AMD64_IBSFETCHCTL_RSVD (~((1ULL<<16)-1))
> > +#define PFM_AMD64_IBSOPCTL_VAL PFM_AMD64_IBSOPEN
> > +#define PFM_AMD64_IBSOPCTL_NO64 PFM_AMD64_IBSOPEN
> > +#define PFM_AMD64_IBSOPCTL_RSVD (~((1ULL<<16)-1))
> > +
> > static struct pfm_reg_desc pfm_k8_pmc_desc[]={
> > /* pmc0 */ PMC_D(PFM_REG_I64, "PERFSEL0", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL0),
> > /* pmc1 */ PMC_D(PFM_REG_I64, "PERFSEL1", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL1),
> > /* pmc2 */ PMC_D(PFM_REG_I64, "PERFSEL2", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL2),
> > /* pmc3 */ PMC_D(PFM_REG_I64, "PERFSEL3", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL3),
> > +/* pmc4 */ PMC_D(PFM_REG_I, "IBSFETCHCTL", PFM_AMD64_IBSFETCHCTL_VAL, PFM_AMD64_IBSFETCHCTL_RSVD, PFM_AMD64_IBSFETCHCTL_NO64, MSR_AMD64_IBSFETCHCTL),
> > +/* pmc5 */ PMC_D(PFM_REG_I, "IBSOPCTL", PFM_AMD64_IBSOPCTL_VAL, PFM_AMD64_IBSOPCTL_RSVD, PFM_AMD64_IBSOPCTL_NO64, MSR_AMD64_IBSOPCTL),
> > };
> > #define PFM_AMD_NUM_PMCS ARRAY_SIZE(pfm_k8_pmc_desc)
> >
> > @@ -78,6 +111,18 @@ static struct pfm_reg_desc pfm_k8_pmd_de
> > /* pmd1 */ PMD_D(PFM_REG_C, "PERFCTR1", MSR_K7_PERFCTR1),
> > /* pmd2 */ PMD_D(PFM_REG_C, "PERFCTR2", MSR_K7_PERFCTR2),
> > /* pmd3 */ PMD_D(PFM_REG_C, "PERFCTR3", MSR_K7_PERFCTR3),
> > +/* pmd4 */ PMD_D(PFM_REG_ICV, "IBSFETCHCTR", PFM_VPMD_AMD64_IBSFETCHCTR),
> > +/* pmd5 */ PMD_D(PFM_REG_IRO, "IBSFETCHCTL", MSR_AMD64_IBSFETCHCTL),
> > +/* pmd6 */ PMD_D(PFM_REG_IRO, "IBSFETCHLINAD", MSR_AMD64_IBSFETCHLINAD),
> > +/* pmd7 */ PMD_D(PFM_REG_IRO, "IBSFETCHPHYSAD", MSR_AMD64_IBSFETCHPHYSAD),
> > +/* pmd8 */ PMD_D(PFM_REG_ICV, "IBSOPCTR", PFM_VPMD_AMD64_IBSOPCTR),
> > +/* pmd9 */ PMD_D(PFM_REG_IRO, "IBSOPCTL", MSR_AMD64_IBSOPCTL),
> > +/* pmd10 */ PMD_D(PFM_REG_IRO, "IBSOPRIP", MSR_AMD64_IBSOPRIP),
> > +/* pmd11 */ PMD_D(PFM_REG_IRO, "IBSOPDATA", MSR_AMD64_IBSOPDATA),
> > +/* pmd12 */ PMD_D(PFM_REG_IRO, "IBSOPDATA2", MSR_AMD64_IBSOPDATA2),
> > +/* pmd13 */ PMD_D(PFM_REG_IRO, "IBSOPDATA3", MSR_AMD64_IBSOPDATA3),
> > +/* pmd14 */ PMD_D(PFM_REG_IRO, "IBSDCLINAD", MSR_AMD64_IBSDCLINAD),
> > +/* pmd15 */ PMD_D(PFM_REG_IRO, "IBSDCPHYSAD", MSR_AMD64_IBSDCPHYSAD),
> > };
> > #define PFM_AMD_NUM_PMDS ARRAY_SIZE(pfm_k8_pmd_desc)
> >
> > @@ -284,6 +329,9 @@ static int pfm_k8_detect_nmi(void)
> > * auto-detect which perfctr/eventsel is used by NMI watchdog
> > */
> > for (i=0; i < PFM_AMD_NUM_PMDS; i++) {
> > + /* skip IBS registers */
> > + if (pfm_k8_pmu_info.pmc_addrs[i].reg_type & PFM_REGT_IBS)
> > + continue;
> > if (avail_to_resrv_perfctr_nmi(pfm_k8_pmd_desc[i].hw_addr))
> > continue;
> >
> > @@ -332,10 +380,75 @@ static int pfm_k8_probe_pmu(void)
> > pfm_k8_setup_nb_event_control();
> >
> > PFM_INFO("Using AMD64 PMU");
> > + if (pfm_k8_pmu_info.flags & PFM_X86_FL_IBS)
> > + PFM_INFO("IBS is supported by processor");
> > + if (pfm_k8_pmu_info.flags & PFM_X86_FL_IBS_EXT)
> > + PFM_INFO("IBS extended registers are supported by processor");
> >
> > return 0;
> > }
> >
> > +static inline void
> > +pfm_amd64_check_register(struct pfm_pmu_config *cfg,
> > + struct pfm_reg_desc *reg,
> > + struct pfm_arch_ext_reg *ext_reg)
> > +{
> > + struct pfm_arch_pmu_info *arch_info = cfg->arch_info;
> > +
> > + if (!(ext_reg->reg_type & PFM_REGT_AMD64))
> > + /* No special AMD64 PMU register */
> > + return;
> > +
> > + /* Disable register */
> > + reg->type &= ~PFM_REG_I;
> > +
> > + switch (ext_reg->reg_type & PFM_REGT_AMD64) {
> > + case (PFM_REGT_IBS):
> > + /* IBS register */
> > + if (!(arch_info->flags & PFM_X86_FL_IBS))
> > + return;
> > + break;
> > + case (PFM_REGT_IBS|PFM_REGT_IBS_EXT):
> > + /* IBS extended register */
> > + if (!(arch_info->flags & PFM_X86_FL_IBS_EXT))
> > + return;
> > + break;
> > + default:
> > + return;
> > + }
> > +
> > + /* Enable register */
> > + reg->type |= PFM_REG_I;
> > +}
> > +
> > +static void pfm_amd64_setup_pmu(struct pfm_pmu_config *cfg)
> > +{
> > + u16 i;
> > + struct pfm_arch_pmu_info *arch_info = cfg->arch_info;
> > +
> > + /* set PMU features depending on CPUID */
> > + arch_info->flags &= ~(PFM_X86_FL_IBS|PFM_X86_FL_IBS_EXT);
> > + switch (current_cpu_data.x86) {
> > + case 15:
> > + break;
> > + case 16:
> > + arch_info->flags |= PFM_X86_FL_IBS;
> > + break;
> > + default:
> > + break;
> > + }
>
> Could you just collapse this into
>
> if (current_cpu_data.x86 == 16)
> arch_info->flags |= PFM_X86_FL_IBS;
>
>
Since this code will be extended in the near future, switch/case is
more appropriate and reduces nested if/then/else statements. You are
right, to implement only this one flag, your suggested 2 lines are
better.
-Robert
--
AMD Saxony, Dresden, Germany
Operating System Research Center
email: robert.richter@amd.com
^ permalink raw reply [flat|nested] 19+ messages in thread
* [patch 7/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64
2007-06-15 16:56 [patch 0/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64 Robert Richter
` (5 preceding siblings ...)
2007-06-15 17:00 ` [patch 6/8] " Robert Richter
@ 2007-06-15 17:01 ` Robert Richter
2007-06-15 17:02 ` [patch 8/8] " Robert Richter
7 siblings, 0 replies; 19+ messages in thread
From: Robert Richter @ 2007-06-15 17:01 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Andi Kleen, linux-kernel
This patch renames *_k8_* symbols to *_amd64_*.
Signed-off-by: Robert Richter <robert.richter@amd.com>
Index: linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_k8.c
===================================================================
--- linux-2.6.22-rc3.orig/arch/x86_64/perfmon/perfmon_k8.c
+++ linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_k8.c
@@ -36,7 +36,7 @@ static int force_nmi;
MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
module_param(force_nmi, bool, 0600);
-static struct pfm_arch_pmu_info pfm_k8_pmu_info = {
+static struct pfm_arch_pmu_info pfm_amd64_pmu_info = {
.pmc_addrs = {
/* pmc0 */ {{MSR_K7_EVNTSEL0, 0}, 0, PFM_REGT_EN},
/* pmc1 */ {{MSR_K7_EVNTSEL1, 0}, 1, PFM_REGT_EN},
@@ -96,7 +96,7 @@ static struct pfm_arch_pmu_info pfm_k8_p
#define PFM_AMD64_IBSOPCTL_NO64 PFM_AMD64_IBSOPEN
#define PFM_AMD64_IBSOPCTL_RSVD (~((1ULL<<16)-1))
-static struct pfm_reg_desc pfm_k8_pmc_desc[]={
+static struct pfm_reg_desc pfm_amd64_pmc_desc[]={
/* pmc0 */ PMC_D(PFM_REG_I64, "PERFSEL0", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL0),
/* pmc1 */ PMC_D(PFM_REG_I64, "PERFSEL1", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL1),
/* pmc2 */ PMC_D(PFM_REG_I64, "PERFSEL2", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL2),
@@ -104,9 +104,9 @@ static struct pfm_reg_desc pfm_k8_pmc_de
/* pmc4 */ PMC_D(PFM_REG_I, "IBSFETCHCTL", PFM_AMD64_IBSFETCHCTL_VAL, PFM_AMD64_IBSFETCHCTL_RSVD, PFM_AMD64_IBSFETCHCTL_NO64, MSR_AMD64_IBSFETCHCTL),
/* pmc5 */ PMC_D(PFM_REG_I, "IBSOPCTL", PFM_AMD64_IBSOPCTL_VAL, PFM_AMD64_IBSOPCTL_RSVD, PFM_AMD64_IBSOPCTL_NO64, MSR_AMD64_IBSOPCTL),
};
-#define PFM_AMD_NUM_PMCS ARRAY_SIZE(pfm_k8_pmc_desc)
+#define PFM_AMD_NUM_PMCS ARRAY_SIZE(pfm_amd64_pmc_desc)
-static struct pfm_reg_desc pfm_k8_pmd_desc[] = {
+static struct pfm_reg_desc pfm_amd64_pmd_desc[] = {
/* pmd0 */ PMD_D(PFM_REG_C, "PERFCTR0", MSR_K7_PERFCTR0),
/* pmd1 */ PMD_D(PFM_REG_C, "PERFCTR1", MSR_K7_PERFCTR1),
/* pmd2 */ PMD_D(PFM_REG_C, "PERFCTR2", MSR_K7_PERFCTR2),
@@ -124,12 +124,12 @@ static struct pfm_reg_desc pfm_k8_pmd_de
/* pmd14 */ PMD_D(PFM_REG_IRO, "IBSDCLINAD", MSR_AMD64_IBSDCLINAD),
/* pmd15 */ PMD_D(PFM_REG_IRO, "IBSDCPHYSAD", MSR_AMD64_IBSDCPHYSAD),
};
-#define PFM_AMD_NUM_PMDS ARRAY_SIZE(pfm_k8_pmd_desc)
+#define PFM_AMD_NUM_PMDS ARRAY_SIZE(pfm_amd64_pmd_desc)
static struct pfm_context **pfm_nb_sys_owners;
static struct pfm_context *pfm_nb_task_owner;
-static struct pfm_pmu_config pfm_k8_pmu_conf;
+static struct pfm_pmu_config pfm_amd64_pmu_conf;
/*
* There can only one user per socket for the Northbridge (NB) events
@@ -148,7 +148,7 @@ static struct pfm_pmu_config pfm_k8_pmu_
* 0 : successfully acquire NB access
* < 0: errno, failed to acquire NB access
*/
-static int pfm_k8_acquire_nb(struct pfm_context *ctx)
+static int pfm_amd64_acquire_nb(struct pfm_context *ctx)
{
struct pfm_context **entry, *old;
int proc_id;
@@ -186,7 +186,7 @@ static int pfm_k8_acquire_nb(struct pfm_
*
* context is locked, interrupts are masked
*/
-static int pfm_k8_pmc_write_check(struct pfm_context *ctx,
+static int pfm_amd64_pmc_write_check(struct pfm_context *ctx,
struct pfm_event_set *set,
struct pfarg_pmc *req)
{
@@ -204,14 +204,14 @@ static int pfm_k8_pmc_write_check(struct
if (event < 0xee)
return 0;
- return pfm_k8_acquire_nb(ctx);
+ return pfm_amd64_acquire_nb(ctx);
}
/*
* invoked on pfm_load_context().
* context is locked, interrupts are masked
*/
-static int pfm_k8_load_context(struct pfm_context *ctx)
+static int pfm_amd64_load_context(struct pfm_context *ctx)
{
struct pfm_event_set *set;
unsigned int i, n;
@@ -231,13 +231,13 @@ static int pfm_k8_load_context(struct pf
}
return 0;
found:
- return pfm_k8_acquire_nb(ctx);
+ return pfm_amd64_acquire_nb(ctx);
}
/*
* invoked on pfm_unload_context()
*/
-static int pfm_k8_unload_context(struct pfm_context *ctx)
+static int pfm_amd64_unload_context(struct pfm_context *ctx)
{
struct pfm_context **entry, *old;
int proc_id;
@@ -269,7 +269,7 @@ static int pfm_k8_unload_context(struct
/*
* detect if we need to active NorthBridge event access control
*/
-static int pfm_k8_setup_nb_event_control(void)
+static int pfm_amd64_setup_nb_event_control(void)
{
unsigned int c, n = 0;
unsigned int max_phys = 0;
@@ -302,25 +302,25 @@ static int pfm_k8_setup_nb_event_control
* activate write-checker for PMC registers
*/
for(c=0; c < PFM_AMD_NUM_PMCS; c++) {
- pfm_k8_pmc_desc[c].type |= PFM_REG_WC;
+ pfm_amd64_pmc_desc[c].type |= PFM_REG_WC;
}
- pfm_k8_pmu_conf.load_context = pfm_k8_load_context;
- pfm_k8_pmu_conf.unload_context = pfm_k8_unload_context;
- pfm_k8_pmu_conf.pmc_write_check = pfm_k8_pmc_write_check;
+ pfm_amd64_pmu_conf.load_context = pfm_amd64_load_context;
+ pfm_amd64_pmu_conf.unload_context = pfm_amd64_unload_context;
+ pfm_amd64_pmu_conf.pmc_write_check = pfm_amd64_pmc_write_check;
PFM_INFO("NorthBridge event access control enabled");
return 0;
}
-static int pfm_k8_detect_nmi(void)
+static int pfm_amd64_detect_nmi(void)
{
unsigned int i;
if (nmi_watchdog != NMI_LOCAL_APIC) {
if (force_nmi)
- pfm_k8_pmu_info.flags |= PFM_X86_FL_USE_NMI;
+ pfm_amd64_pmu_info.flags |= PFM_X86_FL_USE_NMI;
return 0;
}
@@ -330,25 +330,25 @@ static int pfm_k8_detect_nmi(void)
*/
for (i=0; i < PFM_AMD_NUM_PMDS; i++) {
/* skip IBS registers */
- if (pfm_k8_pmu_info.pmc_addrs[i].reg_type & PFM_REGT_IBS)
+ if (pfm_amd64_pmu_info.pmc_addrs[i].reg_type & PFM_REGT_IBS)
continue;
- if (avail_to_resrv_perfctr_nmi(pfm_k8_pmd_desc[i].hw_addr))
+ if (avail_to_resrv_perfctr_nmi(pfm_amd64_pmd_desc[i].hw_addr))
continue;
PFM_INFO("NMI watchdog using %s/%s, disabling for perfmon",
- pfm_k8_pmc_desc[i].desc,
- pfm_k8_pmd_desc[i].desc);
+ pfm_amd64_pmc_desc[i].desc,
+ pfm_amd64_pmd_desc[i].desc);
- pfm_k8_pmc_desc[i].type = PFM_REG_NA;
- pfm_k8_pmd_desc[i].type = PFM_REG_NA;
- pfm_k8_pmu_info.pmc_addrs[i].reg_type = PFM_REGT_NA;
- pfm_k8_pmu_info.pmd_addrs[i].reg_type = PFM_REGT_NA;
+ pfm_amd64_pmc_desc[i].type = PFM_REG_NA;
+ pfm_amd64_pmd_desc[i].type = PFM_REG_NA;
+ pfm_amd64_pmu_info.pmc_addrs[i].reg_type = PFM_REGT_NA;
+ pfm_amd64_pmu_info.pmd_addrs[i].reg_type = PFM_REGT_NA;
}
- pfm_k8_pmu_info.flags |= PFM_X86_FL_USE_NMI;
+ pfm_amd64_pmu_info.flags |= PFM_X86_FL_USE_NMI;
return 0;
}
-static int pfm_k8_probe_pmu(void)
+static int pfm_amd64_probe_pmu(void)
{
if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) {
PFM_INFO("not an AMD processor");
@@ -372,17 +372,17 @@ static int pfm_k8_probe_pmu(void)
PFM_INFO("no local APIC, unsupported");
return -1;
}
- if (pfm_k8_detect_nmi()) {
+ if (pfm_amd64_detect_nmi()) {
PFM_INFO("NMI detection failed");
return -1;
}
if (current_cpu_data.x86_max_cores > 1)
- pfm_k8_setup_nb_event_control();
+ pfm_amd64_setup_nb_event_control();
PFM_INFO("Using AMD64 PMU");
- if (pfm_k8_pmu_info.flags & PFM_X86_FL_IBS)
+ if (pfm_amd64_pmu_info.flags & PFM_X86_FL_IBS)
PFM_INFO("IBS is supported by processor");
- if (pfm_k8_pmu_info.flags & PFM_X86_FL_IBS_EXT)
+ if (pfm_amd64_pmu_info.flags & PFM_X86_FL_IBS_EXT)
PFM_INFO("IBS extended registers are supported by processor");
return 0;
@@ -449,35 +449,35 @@ static void pfm_amd64_setup_pmu(struct p
}
}
-static struct pfm_pmu_config pfm_k8_pmu_conf = {
+static struct pfm_pmu_config pfm_amd64_pmu_conf = {
.pmu_name = "AMD64",
.counter_width = 47,
- .pmd_desc = pfm_k8_pmd_desc,
- .pmc_desc = pfm_k8_pmc_desc,
+ .pmd_desc = pfm_amd64_pmd_desc,
+ .pmc_desc = pfm_amd64_pmc_desc,
.num_pmc_entries = PFM_AMD_NUM_PMCS,
.num_pmd_entries = PFM_AMD_NUM_PMDS,
- .probe_pmu = pfm_k8_probe_pmu,
+ .probe_pmu = pfm_amd64_probe_pmu,
.version = "1.2",
- .arch_info = &pfm_k8_pmu_info,
+ .arch_info = &pfm_amd64_pmu_info,
.flags = PFM_PMU_BUILTIN_FLAG,
.owner = THIS_MODULE,
.pmd_sread = pfm_pmd_sread,
.pmd_swrite = pfm_pmd_swrite,
};
-static int __init pfm_k8_pmu_init_module(void)
+static int __init pfm_amd64_pmu_init_module(void)
{
- pfm_amd64_setup_pmu(&pfm_k8_pmu_conf);
- return pfm_pmu_register(&pfm_k8_pmu_conf);
+ pfm_amd64_setup_pmu(&pfm_amd64_pmu_conf);
+ return pfm_pmu_register(&pfm_amd64_pmu_conf);
}
-static void __exit pfm_k8_pmu_cleanup_module(void)
+static void __exit pfm_amd64_pmu_cleanup_module(void)
{
if (pfm_nb_sys_owners)
vfree(pfm_nb_sys_owners);
- pfm_pmu_unregister(&pfm_k8_pmu_conf);
+ pfm_pmu_unregister(&pfm_amd64_pmu_conf);
}
-module_init(pfm_k8_pmu_init_module);
-module_exit(pfm_k8_pmu_cleanup_module);
+module_init(pfm_amd64_pmu_init_module);
+module_exit(pfm_amd64_pmu_cleanup_module);
--
AMD Saxony, Dresden, Germany
Operating System Research Center
email: robert.richter@amd.com
^ permalink raw reply [flat|nested] 19+ messages in thread
* [patch 8/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64
2007-06-15 16:56 [patch 0/8] 2.6.22-rc3 perfmon2 : IBS implementation for AMD64 Robert Richter
` (6 preceding siblings ...)
2007-06-15 17:01 ` [patch 7/8] " Robert Richter
@ 2007-06-15 17:02 ` Robert Richter
7 siblings, 0 replies; 19+ messages in thread
From: Robert Richter @ 2007-06-15 17:02 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Andi Kleen, linux-kernel
This patch renames module perfmon_k8 to perfmon_amd64.
Signed-off-by: Robert Richter <robert.richter@amd.com>
Index: linux-2.6.22-rc3/arch/i386/perfmon/Kconfig
===================================================================
--- linux-2.6.22-rc3.orig/arch/i386/perfmon/Kconfig
+++ linux-2.6.22-rc3/arch/i386/perfmon/Kconfig
@@ -55,7 +55,7 @@ config I386_PERFMON_GEN_IA32
Enables 32-bit support for Intel architectural performance counters. Enable
this option to support Intel Core Solo/Core Duo processors.
-config I386_PERFMON_K8
+config I386_PERFMON_AMD64
tristate "Support 32-bit mode AMD Athlon64/Opteron64 hardware performance counters"
depends on PERFMON
default n
Index: linux-2.6.22-rc3/arch/i386/perfmon/Makefile
===================================================================
--- linux-2.6.22-rc3.orig/arch/i386/perfmon/Makefile
+++ linux-2.6.22-rc3/arch/i386/perfmon/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_I386_PERFMON_P4) += perfmo
obj-$(CONFIG_I386_PERFMON_CORE) += perfmon_core.o
obj-$(CONFIG_I386_PERFMON_GEN_IA32) += perfmon_gen_ia32.o
obj-$(CONFIG_I386_PERFMON_PEBS) += perfmon_pebs_smpl.o
-obj-$(CONFIG_I386_PERFMON_K8) += perfmon_k8.o
+obj-$(CONFIG_I386_PERFMON_AMD64) += perfmon_amd64.o
-perfmon_k8-$(subst m,y,$(CONFIG_I386_PERFMON_K8)) += ../../x86_64/perfmon/perfmon_k8.o
+perfmon_amd64-$(subst m,y,$(CONFIG_I386_PERFMON_AMD64)) += ../../x86_64/perfmon/perfmon_amd64.o
perfmon_core-$(subst m,y,$(CONFIG_I386_PERFMON_CORE)) += ../../x86_64/perfmon/perfmon_core.o
Index: linux-2.6.22-rc3/arch/i386/perfmon/perfmon.c
===================================================================
--- linux-2.6.22-rc3.orig/arch/i386/perfmon/perfmon.c
+++ linux-2.6.22-rc3/arch/i386/perfmon/perfmon.c
@@ -931,7 +931,7 @@ fastcall void smp_pmu_interrupt(struct p
* 0 : no overflow
* 1 : at least one overflow
*
- * used by AMD K8 and Intel architectural PMU
+ * used by AMD64 and Intel architectural PMU
*/
static int __kprobes pfm_has_ovfl_p6(void)
{
@@ -1126,7 +1126,8 @@ int pfm_arch_pmu_config_init(struct _pfm
/*
* adust stop routine based on PMU model
*
- * P6 : P6, Pentium M, AMD K8, CoreDuo/CoreSolo
+ * P6 : P6, Pentium M, CoreDuo/CoreSolo
+ * AMD64: AMD64 (K8, family 10h)
* P4 : Xeon, EM64T, P4
* CORE: Core 2,
*/
@@ -1279,7 +1280,7 @@ char *pfm_arch_get_pmu_module_name(void)
case 16:
/* All Opteron processors */
if (cpu_data->x86_vendor == X86_VENDOR_AMD)
- return "perfmon_k8";
+ return "perfmon_amd64";
switch(cpu_data->x86_model) {
case 0 ... 6:
Index: linux-2.6.22-rc3/arch/x86_64/perfmon/Kconfig
===================================================================
--- linux-2.6.22-rc3.orig/arch/x86_64/perfmon/Kconfig
+++ linux-2.6.22-rc3/arch/x86_64/perfmon/Kconfig
@@ -14,7 +14,7 @@ config PERFMON_DEBUG
help
Enables perfmon debugging support
-config X86_64_PERFMON_K8
+config X86_64_PERFMON_AMD64
tristate "Support 64-bit mode AMD Athlon64 and Opteron64 hardware performance counters"
depends on PERFMON
default n
Index: linux-2.6.22-rc3/arch/x86_64/perfmon/Makefile
===================================================================
--- linux-2.6.22-rc3.orig/arch/x86_64/perfmon/Makefile
+++ linux-2.6.22-rc3/arch/x86_64/perfmon/Makefile
@@ -4,7 +4,7 @@
#
obj-$(CONFIG_PERFMON) += perfmon.o
-obj-$(CONFIG_X86_64_PERFMON_K8) += perfmon_k8.o
+obj-$(CONFIG_X86_64_PERFMON_AMD64) += perfmon_amd64.o
obj-$(CONFIG_X86_64_PERFMON_P4) += perfmon_p4.o
obj-$(CONFIG_X86_64_PERFMON_CORE) += perfmon_core.o
obj-$(CONFIG_X86_64_PERFMON_GEN_IA32) += perfmon_gen_ia32.o
Index: linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_amd64.c
===================================================================
--- /dev/null
+++ linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_amd64.c
@@ -0,0 +1,483 @@
+/*
+ * This file contains the PMU description for the Ahtlon64 and Opteron64
+ * processors. It supports 32 and 64-bit modes.
+ *
+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * Copyright (c) 2007 Advanced Micro Devices, Inc.
+ * Contributed by Robert Richter <robert.richter@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307 USA
+ */
+#include <linux/module.h>
+#include <linux/perfmon.h>
+#include <linux/vmalloc.h>
+#include <asm/nmi.h>
+
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
+MODULE_AUTHOR("Robert Richter <robert.richter@amd.com>");
+MODULE_DESCRIPTION("AMD64 PMU description table");
+MODULE_LICENSE("GPL");
+
+static int force_nmi;
+MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
+module_param(force_nmi, bool, 0600);
+
+static struct pfm_arch_pmu_info pfm_amd64_pmu_info = {
+ .pmc_addrs = {
+/* pmc0 */ {{MSR_K7_EVNTSEL0, 0}, 0, PFM_REGT_EN},
+/* pmc1 */ {{MSR_K7_EVNTSEL1, 0}, 1, PFM_REGT_EN},
+/* pmc2 */ {{MSR_K7_EVNTSEL2, 0}, 2, PFM_REGT_EN},
+/* pmc3 */ {{MSR_K7_EVNTSEL3, 0}, 3, PFM_REGT_EN},
+/* pmc4 */ {{MSR_AMD64_IBSFETCHCTL, 0}, 0, PFM_REGT_EN|PFM_REGT_IBS},
+/* pmc5 */ {{MSR_AMD64_IBSOPCTL, 0}, 0, PFM_REGT_EN|PFM_REGT_IBS},
+ },
+ .pmd_addrs = {
+/* pmd0 */ {{MSR_K7_PERFCTR0, 0}, 0, PFM_REGT_CTR},
+/* pmd1 */ {{MSR_K7_PERFCTR1, 0}, 0, PFM_REGT_CTR},
+/* pmd2 */ {{MSR_K7_PERFCTR2, 0}, 0, PFM_REGT_CTR},
+/* pmd3 */ {{MSR_K7_PERFCTR3, 0}, 0, PFM_REGT_CTR},
+/* pmd4 */ {{0, 0}, 0, PFM_REGT_CTR|PFM_REGT_IBS},
+/* pmd5 */ {{MSR_AMD64_IBSFETCHCTL, 0}, 0, PFM_REGT_IBS},
+/* pmd6 */ {{MSR_AMD64_IBSFETCHLINAD, 0}, 0, PFM_REGT_IBS},
+/* pmd7 */ {{MSR_AMD64_IBSFETCHPHYSAD, 0}, 0, PFM_REGT_IBS},
+/* pmd8 */ {{0, 0}, 0, PFM_REGT_CTR|PFM_REGT_IBS},
+/* pmd9 */ {{MSR_AMD64_IBSOPCTL, 0}, 0, PFM_REGT_IBS},
+/* pmd10 */ {{MSR_AMD64_IBSOPRIP, 0}, 0, PFM_REGT_IBS},
+/* pmd11 */ {{MSR_AMD64_IBSOPDATA, 0}, 0, PFM_REGT_IBS},
+/* pmd12 */ {{MSR_AMD64_IBSOPDATA2, 0}, 0, PFM_REGT_IBS},
+/* pmd13 */ {{MSR_AMD64_IBSOPDATA3, 0}, 0, PFM_REGT_IBS|PFM_REGT_IBS_EXT},
+/* pmd14 */ {{MSR_AMD64_IBSDCLINAD, 0}, 0, PFM_REGT_IBS|PFM_REGT_IBS_EXT},
+/* pmd15 */ {{MSR_AMD64_IBSDCPHYSAD, 0}, 0, PFM_REGT_IBS|PFM_REGT_IBS_EXT},
+ },
+ .pmu_style = PFM_X86_PMU_AMD64
+};
+
+/*
+ * force Local APIC interrupt on overflow
+ */
+#define PFM_K8_VAL (1ULL<<20)
+#define PFM_K8_NO64 (1ULL<<20)
+
+/*
+ * reserved bits must be zero
+ *
+ * - upper 32 bits are reserved
+ * - APIC enable bit is reserved (forced to 1)
+ * - bit 21 is reserved
+ */
+#define PFM_K8_RSVD ((~((1ULL<<32)-1)) \
+ | (1ULL<<20) \
+ | (1ULL<<21))
+
+/*
+ * We mark readonly bits as reserved and use the PMC for control
+ * operations only. Interrupt enable and clear bits are reserved too.
+ * IBSFETCHCTL is also implemented as PMD, where data can be read
+ * from. Same applies to IBSOPCTR.
+ */
+#define PFM_AMD64_IBSFETCHCTL_VAL PFM_AMD64_IBSFETCHEN
+#define PFM_AMD64_IBSFETCHCTL_NO64 PFM_AMD64_IBSFETCHEN
+#define PFM_AMD64_IBSFETCHCTL_RSVD (~((1ULL<<16)-1))
+#define PFM_AMD64_IBSOPCTL_VAL PFM_AMD64_IBSOPEN
+#define PFM_AMD64_IBSOPCTL_NO64 PFM_AMD64_IBSOPEN
+#define PFM_AMD64_IBSOPCTL_RSVD (~((1ULL<<16)-1))
+
+static struct pfm_reg_desc pfm_amd64_pmc_desc[]={
+/* pmc0 */ PMC_D(PFM_REG_I64, "PERFSEL0", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL0),
+/* pmc1 */ PMC_D(PFM_REG_I64, "PERFSEL1", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL1),
+/* pmc2 */ PMC_D(PFM_REG_I64, "PERFSEL2", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL2),
+/* pmc3 */ PMC_D(PFM_REG_I64, "PERFSEL3", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL3),
+/* pmc4 */ PMC_D(PFM_REG_I, "IBSFETCHCTL", PFM_AMD64_IBSFETCHCTL_VAL, PFM_AMD64_IBSFETCHCTL_RSVD, PFM_AMD64_IBSFETCHCTL_NO64, MSR_AMD64_IBSFETCHCTL),
+/* pmc5 */ PMC_D(PFM_REG_I, "IBSOPCTL", PFM_AMD64_IBSOPCTL_VAL, PFM_AMD64_IBSOPCTL_RSVD, PFM_AMD64_IBSOPCTL_NO64, MSR_AMD64_IBSOPCTL),
+};
+#define PFM_AMD_NUM_PMCS ARRAY_SIZE(pfm_amd64_pmc_desc)
+
+static struct pfm_reg_desc pfm_amd64_pmd_desc[] = {
+/* pmd0 */ PMD_D(PFM_REG_C, "PERFCTR0", MSR_K7_PERFCTR0),
+/* pmd1 */ PMD_D(PFM_REG_C, "PERFCTR1", MSR_K7_PERFCTR1),
+/* pmd2 */ PMD_D(PFM_REG_C, "PERFCTR2", MSR_K7_PERFCTR2),
+/* pmd3 */ PMD_D(PFM_REG_C, "PERFCTR3", MSR_K7_PERFCTR3),
+/* pmd4 */ PMD_D(PFM_REG_ICV, "IBSFETCHCTR", PFM_VPMD_AMD64_IBSFETCHCTR),
+/* pmd5 */ PMD_D(PFM_REG_IRO, "IBSFETCHCTL", MSR_AMD64_IBSFETCHCTL),
+/* pmd6 */ PMD_D(PFM_REG_IRO, "IBSFETCHLINAD", MSR_AMD64_IBSFETCHLINAD),
+/* pmd7 */ PMD_D(PFM_REG_IRO, "IBSFETCHPHYSAD", MSR_AMD64_IBSFETCHPHYSAD),
+/* pmd8 */ PMD_D(PFM_REG_ICV, "IBSOPCTR", PFM_VPMD_AMD64_IBSOPCTR),
+/* pmd9 */ PMD_D(PFM_REG_IRO, "IBSOPCTL", MSR_AMD64_IBSOPCTL),
+/* pmd10 */ PMD_D(PFM_REG_IRO, "IBSOPRIP", MSR_AMD64_IBSOPRIP),
+/* pmd11 */ PMD_D(PFM_REG_IRO, "IBSOPDATA", MSR_AMD64_IBSOPDATA),
+/* pmd12 */ PMD_D(PFM_REG_IRO, "IBSOPDATA2", MSR_AMD64_IBSOPDATA2),
+/* pmd13 */ PMD_D(PFM_REG_IRO, "IBSOPDATA3", MSR_AMD64_IBSOPDATA3),
+/* pmd14 */ PMD_D(PFM_REG_IRO, "IBSDCLINAD", MSR_AMD64_IBSDCLINAD),
+/* pmd15 */ PMD_D(PFM_REG_IRO, "IBSDCPHYSAD", MSR_AMD64_IBSDCPHYSAD),
+};
+#define PFM_AMD_NUM_PMDS ARRAY_SIZE(pfm_amd64_pmd_desc)
+
+static struct pfm_context **pfm_nb_sys_owners;
+static struct pfm_context *pfm_nb_task_owner;
+
+static struct pfm_pmu_config pfm_amd64_pmu_conf;
+
+/*
+ * There can only one user per socket for the Northbridge (NB) events
+ * so we enforce mutual exclusion as follows:
+ * - per-thread : only one context machine-wide can use NB events
+ * - system-wide: only one context per processor socket
+ *
+ * Exclusion is enforced at:
+ * - pfm_load_context()
+ * - pfm_write_pmcs() for attached contexts
+ *
+ * Exclusion is released at:
+ * - pfm_unload_context() or any calls that implicitely uses it
+ *
+ * return:
+ * 0 : successfully acquire NB access
+ * < 0: errno, failed to acquire NB access
+ */
+static int pfm_amd64_acquire_nb(struct pfm_context *ctx)
+{
+ struct pfm_context **entry, *old;
+ int proc_id;
+
+#ifdef CONFIG_SMP
+ proc_id = topology_physical_package_id(smp_processor_id());
+#else
+ proc_id = 0;
+#endif
+
+ if (ctx->flags.system)
+ entry = &pfm_nb_sys_owners[proc_id];
+ else
+ entry = &pfm_nb_task_owner;
+
+ old = cmpxchg(entry, NULL, ctx);
+ if (!old) {
+ if (ctx->flags.system)
+ PFM_DBG("acquired Northbridge event access on socket %u", proc_id);
+ else
+ PFM_DBG("acquired Northbridge event access globally");
+ } else if (old != ctx) {
+ if (ctx->flags.system)
+ PFM_DBG("NorthBridge event conflict on socket %u", proc_id);
+ else
+ PFM_DBG("global NorthBridge event conflict");
+ return -EBUSY;
+ }
+ return 0;
+}
+
+/*
+ * invoked from pfm_write_pmcs() when pfm_nb_sys_owners is not NULL,i.e.,
+ * when we have detected a multi-core processor.
+ *
+ * context is locked, interrupts are masked
+ */
+static int pfm_amd64_pmc_write_check(struct pfm_context *ctx,
+ struct pfm_event_set *set,
+ struct pfarg_pmc *req)
+{
+ unsigned int event;
+ /*
+ * delay checking NB event until we load the context
+ */
+ if (ctx->state == PFM_CTX_UNLOADED)
+ return 0;
+
+ /*
+ * check event is NB event
+ */
+ event = (unsigned int)(req->reg_value & 0xff);
+ if (event < 0xee)
+ return 0;
+
+ return pfm_amd64_acquire_nb(ctx);
+}
+
+/*
+ * invoked on pfm_load_context().
+ * context is locked, interrupts are masked
+ */
+static int pfm_amd64_load_context(struct pfm_context *ctx)
+{
+ struct pfm_event_set *set;
+ unsigned int i, n;
+
+ /*
+ * scan all sets for NB events
+ */
+ list_for_each_entry(set, &ctx->list, list) {
+ n = set->nused_pmcs;
+ for(i=0; n; i++) {
+ if (!test_bit(i, ulp(set->used_pmcs)))
+ continue;
+ if ((set->pmcs[i] & 0xff) >= 0xee)
+ goto found;
+ n--;
+ }
+ }
+ return 0;
+found:
+ return pfm_amd64_acquire_nb(ctx);
+}
+
+/*
+ * invoked on pfm_unload_context()
+ */
+static int pfm_amd64_unload_context(struct pfm_context *ctx)
+{
+ struct pfm_context **entry, *old;
+ int proc_id;
+
+#ifdef CONFIG_SMP
+ proc_id = topology_physical_package_id(smp_processor_id());
+#else
+ proc_id = 0;
+#endif
+
+ /*
+ * unload always happens on the monitored CPU in system-wide
+ */
+ if (ctx->flags.system)
+ entry = &pfm_nb_sys_owners[proc_id];
+ else
+ entry = &pfm_nb_task_owner;
+
+ old = cmpxchg(entry, ctx, NULL);
+ if (old == ctx) {
+ if (ctx->flags.system)
+ PFM_DBG("released NorthBridge on socket %u", proc_id);
+ else
+ PFM_DBG("released NorthBridge events globally");
+ }
+ return 0;
+}
+
+/*
+ * detect if we need to active NorthBridge event access control
+ */
+static int pfm_amd64_setup_nb_event_control(void)
+{
+ unsigned int c, n = 0;
+ unsigned int max_phys = 0;
+
+#ifdef CONFIG_SMP
+ for_each_present_cpu(c) {
+ if (cpu_data[c].phys_proc_id > max_phys)
+ max_phys = cpu_data[c].phys_proc_id;
+ }
+#else
+ max_phys = 0;
+#endif
+ if (max_phys > 255) {
+ PFM_INFO("socket id %d is too big to handle", max_phys);
+ return -ENOMEM;
+ }
+
+ n = max_phys + 1;
+ if (n < 2)
+ return 0;
+
+ pfm_nb_sys_owners = vmalloc(n * sizeof(*pfm_nb_sys_owners));
+ if (!pfm_nb_sys_owners)
+ return -ENOMEM;
+
+ memset(pfm_nb_sys_owners, 0, n * sizeof(*pfm_nb_sys_owners));
+ pfm_nb_task_owner = NULL;
+
+ /*
+ * activate write-checker for PMC registers
+ */
+ for(c=0; c < PFM_AMD_NUM_PMCS; c++) {
+ pfm_amd64_pmc_desc[c].type |= PFM_REG_WC;
+ }
+
+ pfm_amd64_pmu_conf.load_context = pfm_amd64_load_context;
+ pfm_amd64_pmu_conf.unload_context = pfm_amd64_unload_context;
+ pfm_amd64_pmu_conf.pmc_write_check = pfm_amd64_pmc_write_check;
+
+ PFM_INFO("NorthBridge event access control enabled");
+
+ return 0;
+}
+
+static int pfm_amd64_detect_nmi(void)
+{
+ unsigned int i;
+
+ if (nmi_watchdog != NMI_LOCAL_APIC) {
+ if (force_nmi)
+ pfm_amd64_pmu_info.flags |= PFM_X86_FL_USE_NMI;
+ return 0;
+ }
+
+ /*
+ * assume NMI watchdog is initialized before PMU description module
+ * auto-detect which perfctr/eventsel is used by NMI watchdog
+ */
+ for (i=0; i < PFM_AMD_NUM_PMDS; i++) {
+ /* skip IBS registers */
+ if (pfm_amd64_pmu_info.pmc_addrs[i].reg_type & PFM_REGT_IBS)
+ continue;
+ if (avail_to_resrv_perfctr_nmi(pfm_amd64_pmd_desc[i].hw_addr))
+ continue;
+
+ PFM_INFO("NMI watchdog using %s/%s, disabling for perfmon",
+ pfm_amd64_pmc_desc[i].desc,
+ pfm_amd64_pmd_desc[i].desc);
+
+ pfm_amd64_pmc_desc[i].type = PFM_REG_NA;
+ pfm_amd64_pmd_desc[i].type = PFM_REG_NA;
+ pfm_amd64_pmu_info.pmc_addrs[i].reg_type = PFM_REGT_NA;
+ pfm_amd64_pmu_info.pmd_addrs[i].reg_type = PFM_REGT_NA;
+ }
+ pfm_amd64_pmu_info.flags |= PFM_X86_FL_USE_NMI;
+ return 0;
+}
+
+static int pfm_amd64_probe_pmu(void)
+{
+ if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+ PFM_INFO("not an AMD processor");
+ return -1;
+ }
+
+ switch (current_cpu_data.x86) {
+ case 15:
+ case 16:
+ PFM_INFO("found family=%d", current_cpu_data.x86);
+ break;
+ default:
+ PFM_INFO("unsupported family=%d", current_cpu_data.x86);
+ return -1;
+ }
+
+ /*
+ * check for local APIC (required)
+ */
+ if (!cpu_has_apic) {
+ PFM_INFO("no local APIC, unsupported");
+ return -1;
+ }
+ if (pfm_amd64_detect_nmi()) {
+ PFM_INFO("NMI detection failed");
+ return -1;
+ }
+ if (current_cpu_data.x86_max_cores > 1)
+ pfm_amd64_setup_nb_event_control();
+
+ PFM_INFO("Using AMD64 PMU");
+ if (pfm_amd64_pmu_info.flags & PFM_X86_FL_IBS)
+ PFM_INFO("IBS is supported by processor");
+ if (pfm_amd64_pmu_info.flags & PFM_X86_FL_IBS_EXT)
+ PFM_INFO("IBS extended registers are supported by processor");
+
+ return 0;
+}
+
+static inline void
+pfm_amd64_check_register(struct pfm_pmu_config *cfg,
+ struct pfm_reg_desc *reg,
+ struct pfm_arch_ext_reg *ext_reg)
+{
+ struct pfm_arch_pmu_info *arch_info = cfg->arch_info;
+
+ if (!(ext_reg->reg_type & PFM_REGT_AMD64))
+ /* No special AMD64 PMU register */
+ return;
+
+ /* Disable register */
+ reg->type &= ~PFM_REG_I;
+
+ switch (ext_reg->reg_type & PFM_REGT_AMD64) {
+ case (PFM_REGT_IBS):
+ /* IBS register */
+ if (!(arch_info->flags & PFM_X86_FL_IBS))
+ return;
+ break;
+ case (PFM_REGT_IBS|PFM_REGT_IBS_EXT):
+ /* IBS extended register */
+ if (!(arch_info->flags & PFM_X86_FL_IBS_EXT))
+ return;
+ break;
+ default:
+ return;
+ }
+
+ /* Enable register */
+ reg->type |= PFM_REG_I;
+}
+
+static void pfm_amd64_setup_pmu(struct pfm_pmu_config *cfg)
+{
+ u16 i;
+ struct pfm_arch_pmu_info *arch_info = cfg->arch_info;
+
+ /* set PMU features depending on CPUID */
+ arch_info->flags &= ~(PFM_X86_FL_IBS|PFM_X86_FL_IBS_EXT);
+ switch (current_cpu_data.x86) {
+ case 15:
+ break;
+ case 16:
+ arch_info->flags |= PFM_X86_FL_IBS;
+ break;
+ default:
+ break;
+ }
+
+ /* Disable unsupported PMC/PMD registers */
+ for (i = 0; i < cfg->num_pmc_entries; i++) {
+ pfm_amd64_check_register(cfg, &cfg->pmc_desc[i],
+ &arch_info->pmc_addrs[i]);
+ }
+ for (i = 0; i < cfg->num_pmd_entries; i++) {
+ pfm_amd64_check_register(cfg, &cfg->pmd_desc[i],
+ &arch_info->pmd_addrs[i]);
+ }
+}
+
+static struct pfm_pmu_config pfm_amd64_pmu_conf = {
+ .pmu_name = "AMD64",
+ .counter_width = 47,
+ .pmd_desc = pfm_amd64_pmd_desc,
+ .pmc_desc = pfm_amd64_pmc_desc,
+ .num_pmc_entries = PFM_AMD_NUM_PMCS,
+ .num_pmd_entries = PFM_AMD_NUM_PMDS,
+ .probe_pmu = pfm_amd64_probe_pmu,
+ .version = "1.2",
+ .arch_info = &pfm_amd64_pmu_info,
+ .flags = PFM_PMU_BUILTIN_FLAG,
+ .owner = THIS_MODULE,
+ .pmd_sread = pfm_pmd_sread,
+ .pmd_swrite = pfm_pmd_swrite,
+};
+
+static int __init pfm_amd64_pmu_init_module(void)
+{
+ pfm_amd64_setup_pmu(&pfm_amd64_pmu_conf);
+ return pfm_pmu_register(&pfm_amd64_pmu_conf);
+}
+
+static void __exit pfm_amd64_pmu_cleanup_module(void)
+{
+ if (pfm_nb_sys_owners)
+ vfree(pfm_nb_sys_owners);
+
+ pfm_pmu_unregister(&pfm_amd64_pmu_conf);
+}
+
+module_init(pfm_amd64_pmu_init_module);
+module_exit(pfm_amd64_pmu_cleanup_module);
Index: linux-2.6.22-rc3/arch/x86_64/perfmon/perfmon_k8.c
===================================================================
--- linux-2.6.22-rc3.orig/arch/x86_64/perfmon/perfmon_k8.c
+++ /dev/null
@@ -1,483 +0,0 @@
-/*
- * This file contains the PMU description for the Ahtlon64 and Opteron64
- * processors. It supports 32 and 64-bit modes.
- *
- * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
- * Contributed by Stephane Eranian <eranian@hpl.hp.com>
- *
- * Copyright (c) 2007 Advanced Micro Devices, Inc.
- * Contributed by Robert Richter <robert.richter@amd.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- * 02111-1307 USA
- */
-#include <linux/module.h>
-#include <linux/perfmon.h>
-#include <linux/vmalloc.h>
-#include <asm/nmi.h>
-
-MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
-MODULE_AUTHOR("Robert Richter <robert.richter@amd.com>");
-MODULE_DESCRIPTION("AMD64 PMU description table");
-MODULE_LICENSE("GPL");
-
-static int force_nmi;
-MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
-module_param(force_nmi, bool, 0600);
-
-static struct pfm_arch_pmu_info pfm_amd64_pmu_info = {
- .pmc_addrs = {
-/* pmc0 */ {{MSR_K7_EVNTSEL0, 0}, 0, PFM_REGT_EN},
-/* pmc1 */ {{MSR_K7_EVNTSEL1, 0}, 1, PFM_REGT_EN},
-/* pmc2 */ {{MSR_K7_EVNTSEL2, 0}, 2, PFM_REGT_EN},
-/* pmc3 */ {{MSR_K7_EVNTSEL3, 0}, 3, PFM_REGT_EN},
-/* pmc4 */ {{MSR_AMD64_IBSFETCHCTL, 0}, 0, PFM_REGT_EN|PFM_REGT_IBS},
-/* pmc5 */ {{MSR_AMD64_IBSOPCTL, 0}, 0, PFM_REGT_EN|PFM_REGT_IBS},
- },
- .pmd_addrs = {
-/* pmd0 */ {{MSR_K7_PERFCTR0, 0}, 0, PFM_REGT_CTR},
-/* pmd1 */ {{MSR_K7_PERFCTR1, 0}, 0, PFM_REGT_CTR},
-/* pmd2 */ {{MSR_K7_PERFCTR2, 0}, 0, PFM_REGT_CTR},
-/* pmd3 */ {{MSR_K7_PERFCTR3, 0}, 0, PFM_REGT_CTR},
-/* pmd4 */ {{0, 0}, 0, PFM_REGT_CTR|PFM_REGT_IBS},
-/* pmd5 */ {{MSR_AMD64_IBSFETCHCTL, 0}, 0, PFM_REGT_IBS},
-/* pmd6 */ {{MSR_AMD64_IBSFETCHLINAD, 0}, 0, PFM_REGT_IBS},
-/* pmd7 */ {{MSR_AMD64_IBSFETCHPHYSAD, 0}, 0, PFM_REGT_IBS},
-/* pmd8 */ {{0, 0}, 0, PFM_REGT_CTR|PFM_REGT_IBS},
-/* pmd9 */ {{MSR_AMD64_IBSOPCTL, 0}, 0, PFM_REGT_IBS},
-/* pmd10 */ {{MSR_AMD64_IBSOPRIP, 0}, 0, PFM_REGT_IBS},
-/* pmd11 */ {{MSR_AMD64_IBSOPDATA, 0}, 0, PFM_REGT_IBS},
-/* pmd12 */ {{MSR_AMD64_IBSOPDATA2, 0}, 0, PFM_REGT_IBS},
-/* pmd13 */ {{MSR_AMD64_IBSOPDATA3, 0}, 0, PFM_REGT_IBS|PFM_REGT_IBS_EXT},
-/* pmd14 */ {{MSR_AMD64_IBSDCLINAD, 0}, 0, PFM_REGT_IBS|PFM_REGT_IBS_EXT},
-/* pmd15 */ {{MSR_AMD64_IBSDCPHYSAD, 0}, 0, PFM_REGT_IBS|PFM_REGT_IBS_EXT},
- },
- .pmu_style = PFM_X86_PMU_AMD64
-};
-
-/*
- * force Local APIC interrupt on overflow
- */
-#define PFM_K8_VAL (1ULL<<20)
-#define PFM_K8_NO64 (1ULL<<20)
-
-/*
- * reserved bits must be zero
- *
- * - upper 32 bits are reserved
- * - APIC enable bit is reserved (forced to 1)
- * - bit 21 is reserved
- */
-#define PFM_K8_RSVD ((~((1ULL<<32)-1)) \
- | (1ULL<<20) \
- | (1ULL<<21))
-
-/*
- * We mark readonly bits as reserved and use the PMC for control
- * operations only. Interrupt enable and clear bits are reserved too.
- * IBSFETCHCTL is also implemented as PMD, where data can be read
- * from. Same applies to IBSOPCTR.
- */
-#define PFM_AMD64_IBSFETCHCTL_VAL PFM_AMD64_IBSFETCHEN
-#define PFM_AMD64_IBSFETCHCTL_NO64 PFM_AMD64_IBSFETCHEN
-#define PFM_AMD64_IBSFETCHCTL_RSVD (~((1ULL<<16)-1))
-#define PFM_AMD64_IBSOPCTL_VAL PFM_AMD64_IBSOPEN
-#define PFM_AMD64_IBSOPCTL_NO64 PFM_AMD64_IBSOPEN
-#define PFM_AMD64_IBSOPCTL_RSVD (~((1ULL<<16)-1))
-
-static struct pfm_reg_desc pfm_amd64_pmc_desc[]={
-/* pmc0 */ PMC_D(PFM_REG_I64, "PERFSEL0", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL0),
-/* pmc1 */ PMC_D(PFM_REG_I64, "PERFSEL1", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL1),
-/* pmc2 */ PMC_D(PFM_REG_I64, "PERFSEL2", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL2),
-/* pmc3 */ PMC_D(PFM_REG_I64, "PERFSEL3", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL3),
-/* pmc4 */ PMC_D(PFM_REG_I, "IBSFETCHCTL", PFM_AMD64_IBSFETCHCTL_VAL, PFM_AMD64_IBSFETCHCTL_RSVD, PFM_AMD64_IBSFETCHCTL_NO64, MSR_AMD64_IBSFETCHCTL),
-/* pmc5 */ PMC_D(PFM_REG_I, "IBSOPCTL", PFM_AMD64_IBSOPCTL_VAL, PFM_AMD64_IBSOPCTL_RSVD, PFM_AMD64_IBSOPCTL_NO64, MSR_AMD64_IBSOPCTL),
-};
-#define PFM_AMD_NUM_PMCS ARRAY_SIZE(pfm_amd64_pmc_desc)
-
-static struct pfm_reg_desc pfm_amd64_pmd_desc[] = {
-/* pmd0 */ PMD_D(PFM_REG_C, "PERFCTR0", MSR_K7_PERFCTR0),
-/* pmd1 */ PMD_D(PFM_REG_C, "PERFCTR1", MSR_K7_PERFCTR1),
-/* pmd2 */ PMD_D(PFM_REG_C, "PERFCTR2", MSR_K7_PERFCTR2),
-/* pmd3 */ PMD_D(PFM_REG_C, "PERFCTR3", MSR_K7_PERFCTR3),
-/* pmd4 */ PMD_D(PFM_REG_ICV, "IBSFETCHCTR", PFM_VPMD_AMD64_IBSFETCHCTR),
-/* pmd5 */ PMD_D(PFM_REG_IRO, "IBSFETCHCTL", MSR_AMD64_IBSFETCHCTL),
-/* pmd6 */ PMD_D(PFM_REG_IRO, "IBSFETCHLINAD", MSR_AMD64_IBSFETCHLINAD),
-/* pmd7 */ PMD_D(PFM_REG_IRO, "IBSFETCHPHYSAD", MSR_AMD64_IBSFETCHPHYSAD),
-/* pmd8 */ PMD_D(PFM_REG_ICV, "IBSOPCTR", PFM_VPMD_AMD64_IBSOPCTR),
-/* pmd9 */ PMD_D(PFM_REG_IRO, "IBSOPCTL", MSR_AMD64_IBSOPCTL),
-/* pmd10 */ PMD_D(PFM_REG_IRO, "IBSOPRIP", MSR_AMD64_IBSOPRIP),
-/* pmd11 */ PMD_D(PFM_REG_IRO, "IBSOPDATA", MSR_AMD64_IBSOPDATA),
-/* pmd12 */ PMD_D(PFM_REG_IRO, "IBSOPDATA2", MSR_AMD64_IBSOPDATA2),
-/* pmd13 */ PMD_D(PFM_REG_IRO, "IBSOPDATA3", MSR_AMD64_IBSOPDATA3),
-/* pmd14 */ PMD_D(PFM_REG_IRO, "IBSDCLINAD", MSR_AMD64_IBSDCLINAD),
-/* pmd15 */ PMD_D(PFM_REG_IRO, "IBSDCPHYSAD", MSR_AMD64_IBSDCPHYSAD),
-};
-#define PFM_AMD_NUM_PMDS ARRAY_SIZE(pfm_amd64_pmd_desc)
-
-static struct pfm_context **pfm_nb_sys_owners;
-static struct pfm_context *pfm_nb_task_owner;
-
-static struct pfm_pmu_config pfm_amd64_pmu_conf;
-
-/*
- * There can only one user per socket for the Northbridge (NB) events
- * so we enforce mutual exclusion as follows:
- * - per-thread : only one context machine-wide can use NB events
- * - system-wide: only one context per processor socket
- *
- * Exclusion is enforced at:
- * - pfm_load_context()
- * - pfm_write_pmcs() for attached contexts
- *
- * Exclusion is released at:
- * - pfm_unload_context() or any calls that implicitely uses it
- *
- * return:
- * 0 : successfully acquire NB access
- * < 0: errno, failed to acquire NB access
- */
-static int pfm_amd64_acquire_nb(struct pfm_context *ctx)
-{
- struct pfm_context **entry, *old;
- int proc_id;
-
-#ifdef CONFIG_SMP
- proc_id = topology_physical_package_id(smp_processor_id());
-#else
- proc_id = 0;
-#endif
-
- if (ctx->flags.system)
- entry = &pfm_nb_sys_owners[proc_id];
- else
- entry = &pfm_nb_task_owner;
-
- old = cmpxchg(entry, NULL, ctx);
- if (!old) {
- if (ctx->flags.system)
- PFM_DBG("acquired Northbridge event access on socket %u", proc_id);
- else
- PFM_DBG("acquired Northbridge event access globally");
- } else if (old != ctx) {
- if (ctx->flags.system)
- PFM_DBG("NorthBridge event conflict on socket %u", proc_id);
- else
- PFM_DBG("global NorthBridge event conflict");
- return -EBUSY;
- }
- return 0;
-}
-
-/*
- * invoked from pfm_write_pmcs() when pfm_nb_sys_owners is not NULL,i.e.,
- * when we have detected a multi-core processor.
- *
- * context is locked, interrupts are masked
- */
-static int pfm_amd64_pmc_write_check(struct pfm_context *ctx,
- struct pfm_event_set *set,
- struct pfarg_pmc *req)
-{
- unsigned int event;
- /*
- * delay checking NB event until we load the context
- */
- if (ctx->state == PFM_CTX_UNLOADED)
- return 0;
-
- /*
- * check event is NB event
- */
- event = (unsigned int)(req->reg_value & 0xff);
- if (event < 0xee)
- return 0;
-
- return pfm_amd64_acquire_nb(ctx);
-}
-
-/*
- * invoked on pfm_load_context().
- * context is locked, interrupts are masked
- */
-static int pfm_amd64_load_context(struct pfm_context *ctx)
-{
- struct pfm_event_set *set;
- unsigned int i, n;
-
- /*
- * scan all sets for NB events
- */
- list_for_each_entry(set, &ctx->list, list) {
- n = set->nused_pmcs;
- for(i=0; n; i++) {
- if (!test_bit(i, ulp(set->used_pmcs)))
- continue;
- if ((set->pmcs[i] & 0xff) >= 0xee)
- goto found;
- n--;
- }
- }
- return 0;
-found:
- return pfm_amd64_acquire_nb(ctx);
-}
-
-/*
- * invoked on pfm_unload_context()
- */
-static int pfm_amd64_unload_context(struct pfm_context *ctx)
-{
- struct pfm_context **entry, *old;
- int proc_id;
-
-#ifdef CONFIG_SMP
- proc_id = topology_physical_package_id(smp_processor_id());
-#else
- proc_id = 0;
-#endif
-
- /*
- * unload always happens on the monitored CPU in system-wide
- */
- if (ctx->flags.system)
- entry = &pfm_nb_sys_owners[proc_id];
- else
- entry = &pfm_nb_task_owner;
-
- old = cmpxchg(entry, ctx, NULL);
- if (old == ctx) {
- if (ctx->flags.system)
- PFM_DBG("released NorthBridge on socket %u", proc_id);
- else
- PFM_DBG("released NorthBridge events globally");
- }
- return 0;
-}
-
-/*
- * detect if we need to active NorthBridge event access control
- */
-static int pfm_amd64_setup_nb_event_control(void)
-{
- unsigned int c, n = 0;
- unsigned int max_phys = 0;
-
-#ifdef CONFIG_SMP
- for_each_present_cpu(c) {
- if (cpu_data[c].phys_proc_id > max_phys)
- max_phys = cpu_data[c].phys_proc_id;
- }
-#else
- max_phys = 0;
-#endif
- if (max_phys > 255) {
- PFM_INFO("socket id %d is too big to handle", max_phys);
- return -ENOMEM;
- }
-
- n = max_phys + 1;
- if (n < 2)
- return 0;
-
- pfm_nb_sys_owners = vmalloc(n * sizeof(*pfm_nb_sys_owners));
- if (!pfm_nb_sys_owners)
- return -ENOMEM;
-
- memset(pfm_nb_sys_owners, 0, n * sizeof(*pfm_nb_sys_owners));
- pfm_nb_task_owner = NULL;
-
- /*
- * activate write-checker for PMC registers
- */
- for(c=0; c < PFM_AMD_NUM_PMCS; c++) {
- pfm_amd64_pmc_desc[c].type |= PFM_REG_WC;
- }
-
- pfm_amd64_pmu_conf.load_context = pfm_amd64_load_context;
- pfm_amd64_pmu_conf.unload_context = pfm_amd64_unload_context;
- pfm_amd64_pmu_conf.pmc_write_check = pfm_amd64_pmc_write_check;
-
- PFM_INFO("NorthBridge event access control enabled");
-
- return 0;
-}
-
-static int pfm_amd64_detect_nmi(void)
-{
- unsigned int i;
-
- if (nmi_watchdog != NMI_LOCAL_APIC) {
- if (force_nmi)
- pfm_amd64_pmu_info.flags |= PFM_X86_FL_USE_NMI;
- return 0;
- }
-
- /*
- * assume NMI watchdog is initialized before PMU description module
- * auto-detect which perfctr/eventsel is used by NMI watchdog
- */
- for (i=0; i < PFM_AMD_NUM_PMDS; i++) {
- /* skip IBS registers */
- if (pfm_amd64_pmu_info.pmc_addrs[i].reg_type & PFM_REGT_IBS)
- continue;
- if (avail_to_resrv_perfctr_nmi(pfm_amd64_pmd_desc[i].hw_addr))
- continue;
-
- PFM_INFO("NMI watchdog using %s/%s, disabling for perfmon",
- pfm_amd64_pmc_desc[i].desc,
- pfm_amd64_pmd_desc[i].desc);
-
- pfm_amd64_pmc_desc[i].type = PFM_REG_NA;
- pfm_amd64_pmd_desc[i].type = PFM_REG_NA;
- pfm_amd64_pmu_info.pmc_addrs[i].reg_type = PFM_REGT_NA;
- pfm_amd64_pmu_info.pmd_addrs[i].reg_type = PFM_REGT_NA;
- }
- pfm_amd64_pmu_info.flags |= PFM_X86_FL_USE_NMI;
- return 0;
-}
-
-static int pfm_amd64_probe_pmu(void)
-{
- if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) {
- PFM_INFO("not an AMD processor");
- return -1;
- }
-
- switch (current_cpu_data.x86) {
- case 15:
- case 16:
- PFM_INFO("found family=%d", current_cpu_data.x86);
- break;
- default:
- PFM_INFO("unsupported family=%d", current_cpu_data.x86);
- return -1;
- }
-
- /*
- * check for local APIC (required)
- */
- if (!cpu_has_apic) {
- PFM_INFO("no local APIC, unsupported");
- return -1;
- }
- if (pfm_amd64_detect_nmi()) {
- PFM_INFO("NMI detection failed");
- return -1;
- }
- if (current_cpu_data.x86_max_cores > 1)
- pfm_amd64_setup_nb_event_control();
-
- PFM_INFO("Using AMD64 PMU");
- if (pfm_amd64_pmu_info.flags & PFM_X86_FL_IBS)
- PFM_INFO("IBS is supported by processor");
- if (pfm_amd64_pmu_info.flags & PFM_X86_FL_IBS_EXT)
- PFM_INFO("IBS extended registers are supported by processor");
-
- return 0;
-}
-
-static inline void
-pfm_amd64_check_register(struct pfm_pmu_config *cfg,
- struct pfm_reg_desc *reg,
- struct pfm_arch_ext_reg *ext_reg)
-{
- struct pfm_arch_pmu_info *arch_info = cfg->arch_info;
-
- if (!(ext_reg->reg_type & PFM_REGT_AMD64))
- /* No special AMD64 PMU register */
- return;
-
- /* Disable register */
- reg->type &= ~PFM_REG_I;
-
- switch (ext_reg->reg_type & PFM_REGT_AMD64) {
- case (PFM_REGT_IBS):
- /* IBS register */
- if (!(arch_info->flags & PFM_X86_FL_IBS))
- return;
- break;
- case (PFM_REGT_IBS|PFM_REGT_IBS_EXT):
- /* IBS extended register */
- if (!(arch_info->flags & PFM_X86_FL_IBS_EXT))
- return;
- break;
- default:
- return;
- }
-
- /* Enable register */
- reg->type |= PFM_REG_I;
-}
-
-static void pfm_amd64_setup_pmu(struct pfm_pmu_config *cfg)
-{
- u16 i;
- struct pfm_arch_pmu_info *arch_info = cfg->arch_info;
-
- /* set PMU features depending on CPUID */
- arch_info->flags &= ~(PFM_X86_FL_IBS|PFM_X86_FL_IBS_EXT);
- switch (current_cpu_data.x86) {
- case 15:
- break;
- case 16:
- arch_info->flags |= PFM_X86_FL_IBS;
- break;
- default:
- break;
- }
-
- /* Disable unsupported PMC/PMD registers */
- for (i = 0; i < cfg->num_pmc_entries; i++) {
- pfm_amd64_check_register(cfg, &cfg->pmc_desc[i],
- &arch_info->pmc_addrs[i]);
- }
- for (i = 0; i < cfg->num_pmd_entries; i++) {
- pfm_amd64_check_register(cfg, &cfg->pmd_desc[i],
- &arch_info->pmd_addrs[i]);
- }
-}
-
-static struct pfm_pmu_config pfm_amd64_pmu_conf = {
- .pmu_name = "AMD64",
- .counter_width = 47,
- .pmd_desc = pfm_amd64_pmd_desc,
- .pmc_desc = pfm_amd64_pmc_desc,
- .num_pmc_entries = PFM_AMD_NUM_PMCS,
- .num_pmd_entries = PFM_AMD_NUM_PMDS,
- .probe_pmu = pfm_amd64_probe_pmu,
- .version = "1.2",
- .arch_info = &pfm_amd64_pmu_info,
- .flags = PFM_PMU_BUILTIN_FLAG,
- .owner = THIS_MODULE,
- .pmd_sread = pfm_pmd_sread,
- .pmd_swrite = pfm_pmd_swrite,
-};
-
-static int __init pfm_amd64_pmu_init_module(void)
-{
- pfm_amd64_setup_pmu(&pfm_amd64_pmu_conf);
- return pfm_pmu_register(&pfm_amd64_pmu_conf);
-}
-
-static void __exit pfm_amd64_pmu_cleanup_module(void)
-{
- if (pfm_nb_sys_owners)
- vfree(pfm_nb_sys_owners);
-
- pfm_pmu_unregister(&pfm_amd64_pmu_conf);
-}
-
-module_init(pfm_amd64_pmu_init_module);
-module_exit(pfm_amd64_pmu_cleanup_module);
--
AMD Saxony, Dresden, Germany
Operating System Research Center
email: robert.richter@amd.com
^ permalink raw reply [flat|nested] 19+ messages in thread