All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC 0/9] KVM: x86: break the xAPIC barrier
@ 2016-05-06 20:53 Radim Krčmář
  2016-05-06 20:53 ` [PATCH 1/9] KVM: x86: add kvm_apic_map_get_dest_lapic Radim Krčmář
                   ` (8 more replies)
  0 siblings, 9 replies; 19+ messages in thread
From: Radim Krčmář @ 2016-05-06 20:53 UTC (permalink / raw)
  To: kvm; +Cc: Paolo Bonzini, Lan, Tianyu, Igor Mammedov, Jan Kiszka, Peter Xu

Please give attention to [5/9], which introduces a new userpace
interface, and to [7/9], which codifies a very unfourtunate interface
choice that was made when introducing paravirtual x2APIC.

The rest makes APIC ID > 255 work in KVM.

I've tested latest upstream and rhel7 kernels as guests in physical and
cluster x2APIC modes and there were no issues that could be tracked to
these patches.  If interrupt remapping + split irqchip didn't screw up
EOI, then everything useful would have worked.

It was a setup from Igor's latest x2APIC QEMU series, so 2 VCPUs in
total, first has id 0, the second has 280.  I used v4 of Peter's IR
patches with Jan's EIM on top, because newer versions got stuck at boot
even without other patches.

See the qemu-devel-list for potential uses,
"[RFC 0/4] APIC, IOMMU, KVM: add x2APIC interface".


Radim Krčmář (9):
  KVM: x86: add kvm_apic_map_get_dest_lapic
  KVM: x86: dynamic kvm_apic_map
  KVM: x86: use u16 for logical VCPU mask in lapic
  KVM: x86: use generic function for MSI parsing
  KVM: support x2APIC ID in userspace routes
  KVM: x86: directly call recalculate_apic_map on lapic restore
  KVM: x86: use proper format of APIC ID register
  KVM: x86: reset lapic base in kvm_lapic_reset
  KVM: bump MAX_VCPUS

 Documentation/virtual/kvm/api.txt |  17 +-
 arch/x86/include/asm/kvm_host.h   |  11 +-
 arch/x86/kvm/irq_comm.c           |  26 +--
 arch/x86/kvm/lapic.c              | 354 ++++++++++++++++++--------------------
 arch/x86/kvm/lapic.h              |   9 +-
 arch/x86/kvm/vmx.c                |   7 +-
 arch/x86/kvm/x86.c                |   3 +
 include/uapi/linux/kvm.h          |   5 +
 virt/kvm/irqchip.c                |   6 +-
 9 files changed, 228 insertions(+), 210 deletions(-)

-- 
2.8.2


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 1/9] KVM: x86: add kvm_apic_map_get_dest_lapic
  2016-05-06 20:53 [RFC 0/9] KVM: x86: break the xAPIC barrier Radim Krčmář
@ 2016-05-06 20:53 ` Radim Krčmář
  2016-05-19  6:36   ` Peter Xu
  2016-05-06 20:53 ` [PATCH 2/9] KVM: x86: dynamic kvm_apic_map Radim Krčmář
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 19+ messages in thread
From: Radim Krčmář @ 2016-05-06 20:53 UTC (permalink / raw)
  To: kvm; +Cc: Paolo Bonzini, Lan, Tianyu, Igor Mammedov, Jan Kiszka, Peter Xu

kvm_irq_delivery_to_apic_fast and kvm_intr_is_single_vcpu_fast both
compute the interrupt destination.  Factor the code.

'struct kvm_lapic **dst = NULL' had to be added to silence GCC.
GCC might complain about potential NULL access in the future, because it
missed conditions that avoided uninitialized uses of dst.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/lapic.c | 237 +++++++++++++++++++++------------------------------
 1 file changed, 99 insertions(+), 138 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 1a2da0e5a373..6812e61c12d4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -694,14 +694,94 @@ static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
 	}
 }
 
+/* If kvm_apic_map_get_dest_lapic returns true, then *bitmap encodes accessible
+ * elements in the *dst array.  Those are destinations for the interrupt.
+ */
+static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq, struct kvm_apic_map *map,
+		struct kvm_lapic ***dst, unsigned long *bitmap)
+{
+	int i, lowest;
+	bool x2apic_ipi;
+	u16 cid;
+
+	if (irq->shorthand == APIC_DEST_SELF) {
+		*dst = &src;
+		*bitmap = 1;
+		return true;
+	} else if (irq->shorthand)
+		return false;
+
+	x2apic_ipi = src && apic_x2apic_mode(src);
+	if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST))
+		return false;
+
+	if (!map)
+		return false;
+
+	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
+		if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {
+			*bitmap = 0;
+		} else {
+			*dst = &map->phys_map[irq->dest_id];
+			*bitmap = 1;
+		}
+		return true;
+	}
+
+	if (!kvm_apic_logical_map_valid(map))
+		return false;
+
+	apic_logical_id(map, irq->dest_id, &cid, (u16 *)bitmap);
+
+	if (cid >= ARRAY_SIZE(map->logical_map)) {
+		*bitmap = 0;
+		return true;
+	}
+
+	*dst = map->logical_map[cid];
+
+	if (!kvm_lowest_prio_delivery(irq))
+		return true;
+
+	if (!kvm_vector_hashing_enabled()) {
+		lowest = -1;
+		for_each_set_bit(i, bitmap, 16) {
+			if (!(*dst)[i])
+				continue;
+			if (lowest < 0)
+				lowest = i;
+			else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
+						(*dst)[lowest]->vcpu) < 0)
+				lowest = i;
+		}
+	} else {
+		if (!*bitmap)
+			return true;
+
+		lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
+				bitmap, 16);
+
+		if (!(*dst)[lowest]) {
+			kvm_apic_disabled_lapic_found(kvm);
+			*bitmap = 0;
+			return true;
+		}
+	}
+
+	*bitmap = (lowest >= 0) ? 1 << lowest : 0;
+
+	return true;
+}
+
 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 		struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
 {
 	struct kvm_apic_map *map;
-	unsigned long bitmap = 1;
-	struct kvm_lapic **dst;
+	unsigned long bitmap;
+	struct kvm_lapic **dst = NULL;
 	int i;
-	bool ret, x2apic_ipi;
+	bool ret;
 
 	*r = -1;
 
@@ -710,86 +790,19 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 		return true;
 	}
 
-	if (irq->shorthand)
-		return false;
-
-	x2apic_ipi = src && apic_x2apic_mode(src);
-	if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST))
-		return false;
-
-	ret = true;
 	rcu_read_lock();
 	map = rcu_dereference(kvm->arch.apic_map);
 
-	if (!map) {
-		ret = false;
-		goto out;
-	}
-
-	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
-		if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
-			goto out;
-
-		dst = &map->phys_map[irq->dest_id];
-	} else {
-		u16 cid;
-
-		if (!kvm_apic_logical_map_valid(map)) {
-			ret = false;
-			goto out;
+	ret = kvm_apic_map_get_dest_lapic(kvm, src, irq, map, &dst, &bitmap);
+	if (ret)
+		for_each_set_bit(i, &bitmap, 16) {
+			if (!dst[i])
+				continue;
+			if (*r < 0)
+				*r = 0;
+			*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
 		}
 
-		apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
-
-		if (cid >= ARRAY_SIZE(map->logical_map))
-			goto out;
-
-		dst = map->logical_map[cid];
-
-		if (!kvm_lowest_prio_delivery(irq))
-			goto set_irq;
-
-		if (!kvm_vector_hashing_enabled()) {
-			int l = -1;
-			for_each_set_bit(i, &bitmap, 16) {
-				if (!dst[i])
-					continue;
-				if (l < 0)
-					l = i;
-				else if (kvm_apic_compare_prio(dst[i]->vcpu,
-							dst[l]->vcpu) < 0)
-					l = i;
-			}
-			bitmap = (l >= 0) ? 1 << l : 0;
-		} else {
-			int idx;
-			unsigned int dest_vcpus;
-
-			dest_vcpus = hweight16(bitmap);
-			if (dest_vcpus == 0)
-				goto out;
-
-			idx = kvm_vector_to_index(irq->vector,
-				dest_vcpus, &bitmap, 16);
-
-			if (!dst[idx]) {
-				kvm_apic_disabled_lapic_found(kvm);
-				goto out;
-			}
-
-			bitmap = (idx >= 0) ? 1 << idx : 0;
-		}
-	}
-
-set_irq:
-	for_each_set_bit(i, &bitmap, 16) {
-		if (!dst[i])
-			continue;
-		if (*r < 0)
-			*r = 0;
-		*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
-	}
-out:
 	rcu_read_unlock();
 	return ret;
 }
@@ -812,8 +825,9 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 			struct kvm_vcpu **dest_vcpu)
 {
 	struct kvm_apic_map *map;
+	unsigned long bitmap;
+	struct kvm_lapic **dst = NULL;
 	bool ret = false;
-	struct kvm_lapic *dst = NULL;
 
 	if (irq->shorthand)
 		return false;
@@ -821,69 +835,16 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 	rcu_read_lock();
 	map = rcu_dereference(kvm->arch.apic_map);
 
-	if (!map)
-		goto out;
+	if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
+			hweight16(bitmap) == 1) {
+		unsigned long i = find_first_bit(&bitmap, 16);
 
-	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
-		if (irq->dest_id == 0xFF)
-			goto out;
-
-		if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
-			goto out;
-
-		dst = map->phys_map[irq->dest_id];
-		if (dst && kvm_apic_present(dst->vcpu))
-			*dest_vcpu = dst->vcpu;
-		else
-			goto out;
-	} else {
-		u16 cid;
-		unsigned long bitmap = 1;
-		int i, r = 0;
-
-		if (!kvm_apic_logical_map_valid(map))
-			goto out;
-
-		apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
-
-		if (cid >= ARRAY_SIZE(map->logical_map))
-			goto out;
-
-		if (kvm_vector_hashing_enabled() &&
-				kvm_lowest_prio_delivery(irq)) {
-			int idx;
-			unsigned int dest_vcpus;
-
-			dest_vcpus = hweight16(bitmap);
-			if (dest_vcpus == 0)
-				goto out;
-
-			idx = kvm_vector_to_index(irq->vector, dest_vcpus,
-						  &bitmap, 16);
-
-			dst = map->logical_map[cid][idx];
-			if (!dst) {
-				kvm_apic_disabled_lapic_found(kvm);
-				goto out;
-			}
-
-			*dest_vcpu = dst->vcpu;
-		} else {
-			for_each_set_bit(i, &bitmap, 16) {
-				dst = map->logical_map[cid][i];
-				if (++r == 2)
-					goto out;
-			}
-
-			if (dst && kvm_apic_present(dst->vcpu))
-				*dest_vcpu = dst->vcpu;
-			else
-				goto out;
+		if (dst[i]) {
+			*dest_vcpu = dst[i]->vcpu;
+			ret = true;
 		}
 	}
 
-	ret = true;
-out:
 	rcu_read_unlock();
 	return ret;
 }
-- 
2.8.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 2/9] KVM: x86: dynamic kvm_apic_map
  2016-05-06 20:53 [RFC 0/9] KVM: x86: break the xAPIC barrier Radim Krčmář
  2016-05-06 20:53 ` [PATCH 1/9] KVM: x86: add kvm_apic_map_get_dest_lapic Radim Krčmář
@ 2016-05-06 20:53 ` Radim Krčmář
  2016-05-23  8:04   ` Peter Xu
  2016-05-06 20:53 ` [PATCH 3/9] KVM: x86: use u16 for logical VCPU mask in lapic Radim Krčmář
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 19+ messages in thread
From: Radim Krčmář @ 2016-05-06 20:53 UTC (permalink / raw)
  To: kvm; +Cc: Paolo Bonzini, Lan, Tianyu, Igor Mammedov, Jan Kiszka, Peter Xu

x2APIC supports up to 2^32-1 LAPICs, but most guest in coming years will
have slighly less VCPUs.  Dynamic size saves memory at the cost of
turning one constant into a variable.

apic_map mutex had to be moved before allocation to avoid races with cpu
hotplug.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  9 +++--
 arch/x86/kvm/lapic.c            | 87 ++++++++++++++++++++++-------------------
 arch/x86/kvm/lapic.h            |  2 +-
 3 files changed, 54 insertions(+), 44 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b7e394485a5f..c93c4a7877ed 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -682,9 +682,12 @@ struct kvm_arch_memory_slot {
 struct kvm_apic_map {
 	struct rcu_head rcu;
 	u8 mode;
-	struct kvm_lapic *phys_map[256];
-	/* first index is cluster id second is cpu id in a cluster */
-	struct kvm_lapic *logical_map[16][16];
+	u32 size;
+	union {
+		struct kvm_lapic *xapic_flat_map[8];
+		struct kvm_lapic *xapic_cluster_map[16][4];
+	};
+	struct kvm_lapic *phys_map[];
 };
 
 /* Hyper-V emulation context */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 6812e61c12d4..90e07c109dfe 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -129,26 +129,32 @@ static inline int apic_enabled(struct kvm_lapic *apic)
 	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
 	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 
-/* The logical map is definitely wrong if we have multiple
- * modes at the same time.  (Physical map is always right.)
- */
-static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map)
-{
-	return !(map->mode & (map->mode - 1));
-}
+static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
+		u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
+	switch (map->mode) {
+	case KVM_APIC_MODE_X2APIC: {
+		u32 offset = (dest_id >> 16) * 16;
+		// XXX: phys_map must be allocated as multiples of 16
+		if (offset < map->size) {
+			*cluster = &map->phys_map[offset];
+			*mask = dest_id & 0xffff;
+		} else
+			*mask = 0;
 
-static inline void
-apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid)
-{
-	unsigned lid_bits;
-
-	BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_CLUSTER !=  4);
-	BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_FLAT    !=  8);
-	BUILD_BUG_ON(KVM_APIC_MODE_X2APIC        != 16);
-	lid_bits = map->mode;
-
-	*cid = dest_id >> lid_bits;
-	*lid = dest_id & ((1 << lid_bits) - 1);
+		return true;
+		}
+	case KVM_APIC_MODE_XAPIC_FLAT:
+		*cluster = map->xapic_flat_map;
+		*mask = dest_id & 0xff;
+		return true;
+	case KVM_APIC_MODE_XAPIC_CLUSTER:
+		*cluster = map->xapic_cluster_map[dest_id >> 4];
+		*mask = dest_id & 0xf;
+		return true;
+	default:
+		/* Not optimized. */
+		return false;
+	}
 }
 
 static void recalculate_apic_map(struct kvm *kvm)
@@ -156,17 +162,28 @@ static void recalculate_apic_map(struct kvm *kvm)
 	struct kvm_apic_map *new, *old = NULL;
 	struct kvm_vcpu *vcpu;
 	int i;
-
-	new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL);
+	u32 size = 255;
 
 	mutex_lock(&kvm->arch.apic_map_lock);
 
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		if (kvm_apic_present(vcpu))
+			size = max(size, kvm_apic_id(vcpu->arch.apic));
+
+	size++;
+	size += (16 - size) & 16;
+	new = kzalloc(sizeof(struct kvm_apic_map) +
+	              sizeof(struct kvm_lapic) * size, GFP_KERNEL);
+
 	if (!new)
 		goto out;
 
+	new->size = size;
+
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		struct kvm_lapic *apic = vcpu->arch.apic;
-		u16 cid, lid;
+		struct kvm_lapic **cluster;
+		u16 mask;
 		u32 ldr, aid;
 
 		if (!kvm_apic_present(vcpu))
@@ -175,7 +192,7 @@ static void recalculate_apic_map(struct kvm *kvm)
 		aid = kvm_apic_id(apic);
 		ldr = kvm_apic_get_reg(apic, APIC_LDR);
 
-		if (aid < ARRAY_SIZE(new->phys_map))
+		if (aid < size)
 			new->phys_map[aid] = apic;
 
 		if (apic_x2apic_mode(apic)) {
@@ -188,13 +205,11 @@ static void recalculate_apic_map(struct kvm *kvm)
 				new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
 		}
 
-		if (!kvm_apic_logical_map_valid(new))
+		if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
 			continue;
 
-		apic_logical_id(new, ldr, &cid, &lid);
-
-		if (lid && cid < ARRAY_SIZE(new->logical_map))
-			new->logical_map[cid][ffs(lid) - 1] = apic;
+		if (mask)
+			cluster[ffs(mask) - 1] = apic;
 	}
 out:
 	old = rcu_dereference_protected(kvm->arch.apic_map,
@@ -703,7 +718,6 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, struct kvm_lapic
 {
 	int i, lowest;
 	bool x2apic_ipi;
-	u16 cid;
 
 	if (irq->shorthand == APIC_DEST_SELF) {
 		*dst = &src;
@@ -720,7 +734,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, struct kvm_lapic
 		return false;
 
 	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
-		if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {
+		if (irq->dest_id >= map->size) {
 			*bitmap = 0;
 		} else {
 			*dst = &map->phys_map[irq->dest_id];
@@ -729,18 +743,11 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, struct kvm_lapic
 		return true;
 	}
 
-	if (!kvm_apic_logical_map_valid(map))
+	*bitmap = 0;
+	if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
+				(u16 *)bitmap))
 		return false;
 
-	apic_logical_id(map, irq->dest_id, &cid, (u16 *)bitmap);
-
-	if (cid >= ARRAY_SIZE(map->logical_map)) {
-		*bitmap = 0;
-		return true;
-	}
-
-	*dst = map->logical_map[cid];
-
 	if (!kvm_lowest_prio_delivery(irq))
 		return true;
 
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index f71183e502ee..d818a36ce24e 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -167,7 +167,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
 	return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
 }
 
-static inline int kvm_apic_id(struct kvm_lapic *apic)
+static inline u32 kvm_apic_id(struct kvm_lapic *apic)
 {
 	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }
-- 
2.8.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 3/9] KVM: x86: use u16 for logical VCPU mask in lapic
  2016-05-06 20:53 [RFC 0/9] KVM: x86: break the xAPIC barrier Radim Krčmář
  2016-05-06 20:53 ` [PATCH 1/9] KVM: x86: add kvm_apic_map_get_dest_lapic Radim Krčmář
  2016-05-06 20:53 ` [PATCH 2/9] KVM: x86: dynamic kvm_apic_map Radim Krčmář
@ 2016-05-06 20:53 ` Radim Krčmář
  2016-05-06 20:54 ` [PATCH 4/9] KVM: x86: use generic function for MSI parsing Radim Krčmář
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 19+ messages in thread
From: Radim Krčmář @ 2016-05-06 20:53 UTC (permalink / raw)
  To: kvm; +Cc: Paolo Bonzini, Lan, Tianyu, Igor Mammedov, Jan Kiszka, Peter Xu

for_all_bits() never were type safe, so we don't lose safety when
casting u16 * to unsigned long * and u16 is the size we want to avoid
mishaps when sending the pointer to functions that operate only on 16
bits.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/lapic.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 90e07c109dfe..6ae5e74031d6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -714,7 +714,7 @@ static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
  */
 static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, struct kvm_lapic *src,
 		struct kvm_lapic_irq *irq, struct kvm_apic_map *map,
-		struct kvm_lapic ***dst, unsigned long *bitmap)
+		struct kvm_lapic ***dst, u16 *bitmap)
 {
 	int i, lowest;
 	bool x2apic_ipi;
@@ -743,9 +743,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, struct kvm_lapic
 		return true;
 	}
 
-	*bitmap = 0;
-	if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
-				(u16 *)bitmap))
+	if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst, bitmap))
 		return false;
 
 	if (!kvm_lowest_prio_delivery(irq))
@@ -753,7 +751,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, struct kvm_lapic
 
 	if (!kvm_vector_hashing_enabled()) {
 		lowest = -1;
-		for_each_set_bit(i, bitmap, 16) {
+		for_each_set_bit(i, (unsigned long *)bitmap, 16) {
 			if (!(*dst)[i])
 				continue;
 			if (lowest < 0)
@@ -767,7 +765,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, struct kvm_lapic
 			return true;
 
 		lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
-				bitmap, 16);
+				(unsigned long *)bitmap, 16);
 
 		if (!(*dst)[lowest]) {
 			kvm_apic_disabled_lapic_found(kvm);
@@ -785,7 +783,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 		struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
 {
 	struct kvm_apic_map *map;
-	unsigned long bitmap;
+	u16 bitmap;
 	struct kvm_lapic **dst = NULL;
 	int i;
 	bool ret;
@@ -802,7 +800,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 
 	ret = kvm_apic_map_get_dest_lapic(kvm, src, irq, map, &dst, &bitmap);
 	if (ret)
-		for_each_set_bit(i, &bitmap, 16) {
+		for_each_set_bit(i, (unsigned long *)&bitmap, 16) {
 			if (!dst[i])
 				continue;
 			if (*r < 0)
@@ -832,7 +830,7 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 			struct kvm_vcpu **dest_vcpu)
 {
 	struct kvm_apic_map *map;
-	unsigned long bitmap;
+	u16 bitmap;
 	struct kvm_lapic **dst = NULL;
 	bool ret = false;
 
@@ -844,7 +842,7 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 
 	if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
 			hweight16(bitmap) == 1) {
-		unsigned long i = find_first_bit(&bitmap, 16);
+		unsigned long i = find_first_bit((unsigned long *)&bitmap, 16);
 
 		if (dst[i]) {
 			*dest_vcpu = dst[i]->vcpu;
-- 
2.8.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 4/9] KVM: x86: use generic function for MSI parsing
  2016-05-06 20:53 [RFC 0/9] KVM: x86: break the xAPIC barrier Radim Krčmář
                   ` (2 preceding siblings ...)
  2016-05-06 20:53 ` [PATCH 3/9] KVM: x86: use u16 for logical VCPU mask in lapic Radim Krčmář
@ 2016-05-06 20:54 ` Radim Krčmář
  2016-05-06 20:54 ` [PATCH 5/9] KVM: support x2APIC ID in userspace routes Radim Krčmář
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 19+ messages in thread
From: Radim Krčmář @ 2016-05-06 20:54 UTC (permalink / raw)
  To: kvm; +Cc: Paolo Bonzini, Lan, Tianyu, Igor Mammedov, Jan Kiszka, Peter Xu

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/irq_comm.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 54ead79e444b..3d17eee0987b 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -391,21 +391,16 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
 			       kvm->arch.nr_reserved_ioapic_pins);
 	for (i = 0; i < nr_ioapic_pins; ++i) {
 		hlist_for_each_entry(entry, &table->map[i], link) {
-			u32 dest_id, dest_mode;
-			bool level;
+			struct kvm_lapic_irq irq;
 
 			if (entry->type != KVM_IRQ_ROUTING_MSI)
 				continue;
-			dest_id = (entry->msi.address_lo >> 12) & 0xff;
-			dest_mode = (entry->msi.address_lo >> 2) & 0x1;
-			level = entry->msi.data & MSI_DATA_TRIGGER_LEVEL;
-			if (level && kvm_apic_match_dest(vcpu, NULL, 0,
-						dest_id, dest_mode)) {
-				u32 vector = entry->msi.data & 0xff;
 
-				__set_bit(vector,
-					  ioapic_handled_vectors);
-			}
+			kvm_set_msi_irq(entry, &irq);
+
+			if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0,
+						irq.dest_id, irq.dest_mode))
+				__set_bit(irq.vector, ioapic_handled_vectors);
 		}
 	}
 	srcu_read_unlock(&kvm->irq_srcu, idx);
-- 
2.8.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 5/9] KVM: support x2APIC ID in userspace routes
  2016-05-06 20:53 [RFC 0/9] KVM: x86: break the xAPIC barrier Radim Krčmář
                   ` (3 preceding siblings ...)
  2016-05-06 20:54 ` [PATCH 4/9] KVM: x86: use generic function for MSI parsing Radim Krčmář
@ 2016-05-06 20:54 ` Radim Krčmář
  2016-05-06 20:54 ` [PATCH 6/9] KVM: x86: directly call recalculate_apic_map on lapic restore Radim Krčmář
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 19+ messages in thread
From: Radim Krčmář @ 2016-05-06 20:54 UTC (permalink / raw)
  To: kvm; +Cc: Paolo Bonzini, Lan, Tianyu, Igor Mammedov, Jan Kiszka, Peter Xu

We need a way to pass 32 bit APIC ID.  Intel® Virtualization Technology
for Directed I/O, 5.1.8 Programming in Intel® 64 x2APIC Mode, specifies
that Remapping Hardware is configured with APIC ID 31:8 in Upper Address
Register 31:8.  This patch adopts that method and applies it to
KVM_SIGNAL_MSI and GSI routes.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 11 ++++++++++-
 arch/x86/kvm/irq_comm.c           |  9 +++++++--
 arch/x86/kvm/vmx.c                |  3 ++-
 arch/x86/kvm/x86.c                |  1 +
 include/uapi/linux/kvm.h          |  5 +++++
 virt/kvm/irqchip.c                |  6 +++++-
 6 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 4d0542c5206b..07bcedc0ba09 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1461,6 +1461,7 @@ struct kvm_irq_routing_entry {
 #define KVM_IRQ_ROUTING_MSI 2
 #define KVM_IRQ_ROUTING_S390_ADAPTER 3
 #define KVM_IRQ_ROUTING_HV_SINT 4
+#define KVM_IRQ_ROUTING_MSI_X2APIC 5
 
 No flags are specified so far, the corresponding field must be set to zero.
 
@@ -1489,6 +1490,11 @@ struct kvm_irq_routing_hv_sint {
 	__u32 sint;
 };
 
+KVM_IRQ_ROUTING_MSI_X2APIC can be used if KVM_CAP_MSI_X2APIC is present.
+The entry uses struct kvm_irq_routing_msi and stores APIC ID bits 8-31 in
+address_hi bits 8-31.
+
+
 4.53 KVM_ASSIGN_SET_MSIX_NR (deprecated)
 
 Capability: none
@@ -2166,7 +2172,10 @@ struct kvm_msi {
 	__u8  pad[16];
 };
 
-No flags are defined so far. The corresponding field must be 0.
+Valid flags are 0 and bitwise OR of any following:
+* KVM_SIGNAL_MSI_X2APIC
+  - valid with capability KVM_CAP_MSI_X2APIC
+  - address_hi bits 8-31 contain APIC ID bits 8-31
 
 
 4.71 KVM_CREATE_PIT2
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 3d17eee0987b..b9595251834b 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -117,6 +117,8 @@ void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
 
 	irq->dest_id = (e->msi.address_lo &
 			MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+	if (e->type == KVM_IRQ_ROUTING_MSI_X2APIC)
+		irq->dest_id |= MSI_ADDR_EXT_DEST_ID(e->msi.address_hi);
 	irq->vector = (e->msi.data &
 			MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
 	irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
@@ -150,7 +152,8 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
 	struct kvm_lapic_irq irq;
 	int r;
 
-	if (unlikely(e->type != KVM_IRQ_ROUTING_MSI))
+	if (unlikely(e->type != KVM_IRQ_ROUTING_MSI &&
+	             e->type != KVM_IRQ_ROUTING_MSI_X2APIC))
 		return -EWOULDBLOCK;
 
 	kvm_set_msi_irq(e, &irq);
@@ -281,6 +284,7 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			goto out;
 		break;
 	case KVM_IRQ_ROUTING_MSI:
+	case KVM_IRQ_ROUTING_MSI_X2APIC:
 		e->set = kvm_set_msi;
 		e->msi.address_lo = ue->u.msi.address_lo;
 		e->msi.address_hi = ue->u.msi.address_hi;
@@ -393,7 +397,8 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
 		hlist_for_each_entry(entry, &table->map[i], link) {
 			struct kvm_lapic_irq irq;
 
-			if (entry->type != KVM_IRQ_ROUTING_MSI)
+			if (entry->type != KVM_IRQ_ROUTING_MSI &&
+			    entry->type != KVM_IRQ_ROUTING_MSI_X2APIC)
 				continue;
 
 			kvm_set_msi_irq(entry, &irq);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 133679d520af..35df8d757d1d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10829,7 +10829,8 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
 	BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
 
 	hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
-		if (e->type != KVM_IRQ_ROUTING_MSI)
+		if (e->type != KVM_IRQ_ROUTING_MSI &&
+		    e->type != KVM_IRQ_ROUTING_MSI_X2APIC)
 			continue;
 		/*
 		 * VT-d PI cannot support posting multicast/broadcast
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9b7798c7b210..86d1eb1c9d8b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2567,6 +2567,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_DISABLE_QUIRKS:
 	case KVM_CAP_SET_BOOT_CPU_ID:
  	case KVM_CAP_SPLIT_IRQCHIP:
+	case KVM_CAP_MSI_X2APIC:
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_PCI_2_3:
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a7f1f8032ec1..af1abec5f41d 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -865,6 +865,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_SPAPR_TCE_64 125
 #define KVM_CAP_ARM_PMU_V3 126
 #define KVM_CAP_VCPU_ATTRIBUTES 127
+#define KVM_CAP_MSI_X2APIC 128
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -898,6 +899,7 @@ struct kvm_irq_routing_hv_sint {
 #define KVM_IRQ_ROUTING_MSI 2
 #define KVM_IRQ_ROUTING_S390_ADAPTER 3
 #define KVM_IRQ_ROUTING_HV_SINT 4
+#define KVM_IRQ_ROUTING_MSI_X2APIC 5 /* KVM_CAP_MSI_X2APIC */
 
 struct kvm_irq_routing_entry {
 	__u32 gsi;
@@ -1023,6 +1025,9 @@ struct kvm_one_reg {
 	__u64 addr;
 };
 
+#define KVM_SIGNAL_MSI_X2APIC  (1 <<  0) /* KVM_CAP_X2APIC */
+#define KVM_SIGNAL_MSI_FLAGS   KVM_SIGNAL_MSI_X2APIC
+
 struct kvm_msi {
 	__u32 address_lo;
 	__u32 address_hi;
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index fe84e1a95dd5..39b38b1a2156 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -62,9 +62,13 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
 {
 	struct kvm_kernel_irq_routing_entry route;
 
-	if (!irqchip_in_kernel(kvm) || msi->flags != 0)
+	if (!irqchip_in_kernel(kvm) ||
+	    (msi->flags & ~KVM_SIGNAL_MSI_FLAGS) != 0)
 		return -EINVAL;
 
+	/* assignment must copy kvm_set_routing_entry() */
+	route.type = msi->flags & KVM_SIGNAL_MSI_X2APIC ?
+		KVM_IRQ_ROUTING_MSI_X2APIC : KVM_IRQ_ROUTING_MSI;
 	route.msi.address_lo = msi->address_lo;
 	route.msi.address_hi = msi->address_hi;
 	route.msi.data = msi->data;
-- 
2.8.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 6/9] KVM: x86: directly call recalculate_apic_map on lapic restore
  2016-05-06 20:53 [RFC 0/9] KVM: x86: break the xAPIC barrier Radim Krčmář
                   ` (4 preceding siblings ...)
  2016-05-06 20:54 ` [PATCH 5/9] KVM: support x2APIC ID in userspace routes Radim Krčmář
@ 2016-05-06 20:54 ` Radim Krčmář
  2016-05-23  8:30   ` Peter Xu
  2016-05-06 20:54 ` [PATCH 7/9] KVM: x86: use proper format of APIC ID register Radim Krčmář
                   ` (2 subsequent siblings)
  8 siblings, 1 reply; 19+ messages in thread
From: Radim Krčmář @ 2016-05-06 20:54 UTC (permalink / raw)
  To: kvm; +Cc: Paolo Bonzini, Lan, Tianyu, Igor Mammedov, Jan Kiszka, Peter Xu

The get/set dance was just for that.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/lapic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 6ae5e74031d6..615067b41dde 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1933,8 +1933,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 	/* set SPIV separately to get count of SW disabled APICs right */
 	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
 	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
-	/* call kvm_apic_set_id() to put apic into apic_map */
-	kvm_apic_set_id(apic, kvm_apic_id(apic));
+	/* put apic into apic_map */
+	recalculate_apic_map(vcpu->kvm);
 	kvm_apic_set_version(vcpu);
 
 	apic_update_ppr(apic);
-- 
2.8.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 7/9] KVM: x86: use proper format of APIC ID register
  2016-05-06 20:53 [RFC 0/9] KVM: x86: break the xAPIC barrier Radim Krčmář
                   ` (5 preceding siblings ...)
  2016-05-06 20:54 ` [PATCH 6/9] KVM: x86: directly call recalculate_apic_map on lapic restore Radim Krčmář
@ 2016-05-06 20:54 ` Radim Krčmář
  2016-05-17 15:34   ` Paolo Bonzini
  2016-05-06 20:54 ` [PATCH 8/9] KVM: x86: reset lapic base in kvm_lapic_reset Radim Krčmář
  2016-05-06 20:54 ` [PATCH 9/9] KVM: bump MAX_VCPUS Radim Krčmář
  8 siblings, 1 reply; 19+ messages in thread
From: Radim Krčmář @ 2016-05-06 20:54 UTC (permalink / raw)
  To: kvm; +Cc: Paolo Bonzini, Lan, Tianyu, Igor Mammedov, Jan Kiszka, Peter Xu

We currently always shift APIC ID as if APIC was in xAPIC mode.
x2APIC mode wants to use more bits and storing a hardware-compabible
value is the the sanest option.  VMX can stop intercepting the APIC ID
register then.

KVM API to set the lapic expects that bottom 8 bits of APIC ID are in
top 8 bits of APIC_ID register.  Definite that x2APIC IDs are byte
swapped to keep compatibility without new toggles.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 Documentation/virtual/kvm/api.txt |  6 ++++++
 arch/x86/kvm/lapic.c              | 44 ++++++++++++++++++++++++++++-----------
 arch/x86/kvm/lapic.h              |  7 ++++++-
 arch/x86/kvm/vmx.c                |  4 ----
 arch/x86/kvm/x86.c                |  2 ++
 5 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 07bcedc0ba09..b1ddea38e6b9 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1583,6 +1583,11 @@ struct kvm_lapic_state {
 Reads the Local APIC registers and copies them into the input argument.  The
 data format and layout are the same as documented in the architecture manual.
 
+Note that the APIC ID is stored in APIC_ID register in big endian format.
+This makes no difference for xAPIC APIC ID, which is still in the top 8 bits,
+but x2APIC ID needs to be byteswapped.  The reason is compatibility with KVM's
+definition of x2APIC.  (The hardware stores x2APIC ID as little endian.)
+
 
 4.58 KVM_SET_LAPIC
 
@@ -1600,6 +1605,7 @@ struct kvm_lapic_state {
 Copies the input argument into the Local APIC registers.  The data format
 and layout are the same as documented in the architecture manual.
 
+See the note about APIC_ID register in KVM_GET_LAPIC.
 
 4.59 KVM_IOEVENTFD
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 615067b41dde..25702584d65c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -239,7 +239,7 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
 	}
 }
 
-static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
+static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
 {
 	apic_set_reg(apic, APIC_ID, id << 24);
 	recalculate_apic_map(apic->vcpu->kvm);
@@ -251,11 +251,11 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
 	recalculate_apic_map(apic->vcpu->kvm);
 }
 
-static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u8 id)
+static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
 {
 	u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
 
-	apic_set_reg(apic, APIC_ID, id << 24);
+	apic_set_reg(apic, APIC_ID, id);
 	apic_set_reg(apic, APIC_LDR, ldr);
 	recalculate_apic_map(apic->vcpu->kvm);
 }
@@ -1116,12 +1116,6 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
 		return 0;
 
 	switch (offset) {
-	case APIC_ID:
-		if (apic_x2apic_mode(apic))
-			val = kvm_apic_id(apic);
-		else
-			val = kvm_apic_id(apic) << 24;
-		break;
 	case APIC_ARBPRI:
 		apic_debug("Access APIC ARBPRI register which is for P6\n");
 		break;
@@ -1400,7 +1394,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 	switch (reg) {
 	case APIC_ID:		/* Local APIC ID */
 		if (!apic_x2apic_mode(apic))
-			kvm_apic_set_id(apic, val >> 24);
+			kvm_apic_set_xapic_id(apic, val >> 24);
 		else
 			ret = 1;
 		break;
@@ -1671,8 +1665,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 		if (value & X2APIC_ENABLE) {
 			kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
 			kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
-		} else
+		} else {
+			kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
 			kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
+		}
 	}
 
 	apic->base_address = apic->vcpu->arch.apic_base &
@@ -1703,7 +1699,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 	hrtimer_cancel(&apic->lapic_timer.timer);
 
 	if (!init_event)
-		kvm_apic_set_id(apic, vcpu->vcpu_id);
+		kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
 	kvm_apic_set_version(apic->vcpu);
 
 	for (i = 0; i < APIC_LVT_NUM; i++)
@@ -1924,6 +1920,30 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 	return vector;
 }
 
+static void __kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
+		struct kvm_lapic_state *s, bool restore)
+{
+	if (apic_x2apic_mode(vcpu->arch.apic)) {
+		u32 *id = (u32 *)(s->regs + APIC_ID);
+		if (restore)
+			*id = be32_to_cpu(*id);
+		else
+			*id = cpu_to_be32(*id);
+	}
+}
+
+void kvm_apic_state_get_fixup(struct kvm_vcpu *vcpu,
+		struct kvm_lapic_state *s)
+{
+	__kvm_apic_state_fixup(vcpu, s, false);
+}
+
+void kvm_apic_state_set_fixup(struct kvm_vcpu *vcpu,
+		struct kvm_lapic_state *s)
+{
+	__kvm_apic_state_fixup(vcpu, s, true);
+}
+
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 		struct kvm_lapic_state *s)
 {
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index d818a36ce24e..435606dd916f 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -71,6 +71,10 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
+void kvm_apic_state_get_fixup(struct kvm_vcpu *vcpu,
+		struct kvm_lapic_state *s);
+void kvm_apic_state_set_fixup(struct kvm_vcpu *vcpu,
+		struct kvm_lapic_state *s);
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 		struct kvm_lapic_state *s);
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
@@ -169,7 +173,8 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
 
 static inline u32 kvm_apic_id(struct kvm_lapic *apic)
 {
-	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
+	u32 id = kvm_apic_get_reg(apic, APIC_ID);
+	return apic_x2apic_mode(apic) ? id : id >> 24;
 }
 
 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 35df8d757d1d..12beb83c61d6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6337,10 +6337,6 @@ static __init int hardware_setup(void)
 		for (msr = 0x800; msr <= 0x8ff; msr++)
 			vmx_disable_intercept_msr_read_x2apic(msr);
 
-		/* According SDM, in x2apic mode, the whole id reg is used.
-		 * But in KVM, it only use the highest eight bits. Need to
-		 * intercept it */
-		vmx_enable_intercept_msr_read_x2apic(0x802);
 		/* TMCCT */
 		vmx_enable_intercept_msr_read_x2apic(0x839);
 		/* TPR */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 86d1eb1c9d8b..2a2446240320 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2769,6 +2769,7 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
 		kvm_x86_ops->sync_pir_to_irr(vcpu);
 
 	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
+	kvm_apic_state_get_fixup(vcpu, s);
 
 	return 0;
 }
@@ -2776,6 +2777,7 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
 static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
 				    struct kvm_lapic_state *s)
 {
+	kvm_apic_state_set_fixup(vcpu, s);
 	kvm_apic_post_state_restore(vcpu, s);
 	update_cr8_intercept(vcpu);
 
-- 
2.8.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 8/9] KVM: x86: reset lapic base in kvm_lapic_reset
  2016-05-06 20:53 [RFC 0/9] KVM: x86: break the xAPIC barrier Radim Krčmář
                   ` (6 preceding siblings ...)
  2016-05-06 20:54 ` [PATCH 7/9] KVM: x86: use proper format of APIC ID register Radim Krčmář
@ 2016-05-06 20:54 ` Radim Krčmář
  2016-05-06 20:54 ` [PATCH 9/9] KVM: bump MAX_VCPUS Radim Krčmář
  8 siblings, 0 replies; 19+ messages in thread
From: Radim Krčmář @ 2016-05-06 20:54 UTC (permalink / raw)
  To: kvm; +Cc: Paolo Bonzini, Lan, Tianyu, Igor Mammedov, Jan Kiszka, Peter Xu

LAPIC is reset in xAPIC mode and the surrounding code expects that.
KVM never resets after initialization, so this patch is just for sanity.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/lapic.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 25702584d65c..3669ed8a79d3 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1698,8 +1698,11 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 	/* Stop the timer in case it's a reset to an active apic */
 	hrtimer_cancel(&apic->lapic_timer.timer);
 
-	if (!init_event)
+	if (!init_event) {
+		kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE |
+		                         MSR_IA32_APICBASE_ENABLE);
 		kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
+	}
 	kvm_apic_set_version(apic->vcpu);
 
 	for (i = 0; i < APIC_LVT_NUM; i++)
@@ -1838,9 +1841,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 	 * thinking that APIC satet has changed.
 	 */
 	vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
-	kvm_lapic_set_base(vcpu,
-			APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE);
-
 	static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
 	kvm_lapic_reset(vcpu, false);
 	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
-- 
2.8.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 9/9] KVM: bump MAX_VCPUS
  2016-05-06 20:53 [RFC 0/9] KVM: x86: break the xAPIC barrier Radim Krčmář
                   ` (7 preceding siblings ...)
  2016-05-06 20:54 ` [PATCH 8/9] KVM: x86: reset lapic base in kvm_lapic_reset Radim Krčmář
@ 2016-05-06 20:54 ` Radim Krčmář
  8 siblings, 0 replies; 19+ messages in thread
From: Radim Krčmář @ 2016-05-06 20:54 UTC (permalink / raw)
  To: kvm; +Cc: Paolo Bonzini, Lan, Tianyu, Igor Mammedov, Jan Kiszka, Peter Xu

288 is in high demand because of Knights Landing CPU.
We cannot set the limit to 640k, because that would be wasting space.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c93c4a7877ed..c776b741b0db 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -34,7 +34,7 @@
 #include <asm/asm.h>
 #include <asm/kvm_page_track.h>
 
-#define KVM_MAX_VCPUS 255
+#define KVM_MAX_VCPUS 288
 #define KVM_SOFT_MAX_VCPUS 160
 #define KVM_USER_MEM_SLOTS 509
 /* memory slots that are not exposed to userspace */
-- 
2.8.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 7/9] KVM: x86: use proper format of APIC ID register
  2016-05-06 20:54 ` [PATCH 7/9] KVM: x86: use proper format of APIC ID register Radim Krčmář
@ 2016-05-17 15:34   ` Paolo Bonzini
  2016-05-25 16:30     ` Radim Krčmář
  0 siblings, 1 reply; 19+ messages in thread
From: Paolo Bonzini @ 2016-05-17 15:34 UTC (permalink / raw)
  To: Radim Krčmář, kvm
  Cc: Lan, Tianyu, Igor Mammedov, Jan Kiszka, Peter Xu



On 06/05/2016 22:54, Radim Krčmář wrote:
> We currently always shift APIC ID as if APIC was in xAPIC mode.
> x2APIC mode wants to use more bits and storing a hardware-compabible
> value is the the sanest option.  VMX can stop intercepting the APIC ID
> register then.
> 
> KVM API to set the lapic expects that bottom 8 bits of APIC ID are in
> top 8 bits of APIC_ID register.  Definite that x2APIC IDs are byte
> swapped to keep compatibility without new toggles.

That's a bit too clever...  Can we make KVM_CAP_MSI_X2APIC an
enable-able capability (and then better rename it KVM_CAP_X2APIC_ID),
and then all ids become 32 bit?  This fixes the APIC ID issue here, and
avoids introducing a new routing type in patch 5.

The cost is a little extra complexity in QEMU, but I think it's bearable.

Paolo

> Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
> ---
>  Documentation/virtual/kvm/api.txt |  6 ++++++
>  arch/x86/kvm/lapic.c              | 44 ++++++++++++++++++++++++++++-----------
>  arch/x86/kvm/lapic.h              |  7 ++++++-
>  arch/x86/kvm/vmx.c                |  4 ----
>  arch/x86/kvm/x86.c                |  2 ++
>  5 files changed, 46 insertions(+), 17 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index 07bcedc0ba09..b1ddea38e6b9 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -1583,6 +1583,11 @@ struct kvm_lapic_state {
>  Reads the Local APIC registers and copies them into the input argument.  The
>  data format and layout are the same as documented in the architecture manual.
>  
> +Note that the APIC ID is stored in APIC_ID register in big endian format.
> +This makes no difference for xAPIC APIC ID, which is still in the top 8 bits,
> +but x2APIC ID needs to be byteswapped.  The reason is compatibility with KVM's
> +definition of x2APIC.  (The hardware stores x2APIC ID as little endian.)
> +

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/9] KVM: x86: add kvm_apic_map_get_dest_lapic
  2016-05-06 20:53 ` [PATCH 1/9] KVM: x86: add kvm_apic_map_get_dest_lapic Radim Krčmář
@ 2016-05-19  6:36   ` Peter Xu
  2016-05-25 16:02     ` Radim Krčmář
  0 siblings, 1 reply; 19+ messages in thread
From: Peter Xu @ 2016-05-19  6:36 UTC (permalink / raw)
  To: Radim Krčmář
  Cc: kvm, Paolo Bonzini, Lan, Tianyu, Igor Mammedov, Jan Kiszka

On Fri, May 06, 2016 at 10:53:57PM +0200, Radim Krčmář wrote:
> kvm_irq_delivery_to_apic_fast and kvm_intr_is_single_vcpu_fast both
> compute the interrupt destination.  Factor the code.
> 
> 'struct kvm_lapic **dst = NULL' had to be added to silence GCC.
> GCC might complain about potential NULL access in the future, because it
> missed conditions that avoided uninitialized uses of dst.
> 
> Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
> ---
>  arch/x86/kvm/lapic.c | 237 +++++++++++++++++++++------------------------------
>  1 file changed, 99 insertions(+), 138 deletions(-)
> 
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 1a2da0e5a373..6812e61c12d4 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -694,14 +694,94 @@ static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
>  	}
>  }
>  
> +/* If kvm_apic_map_get_dest_lapic returns true, then *bitmap encodes accessible
> + * elements in the *dst array.  Those are destinations for the interrupt.

Here, we mentioned that "*bitmap" encodes accessible elements" if
returns true, while...

> + */
> +static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, struct kvm_lapic *src,
> +		struct kvm_lapic_irq *irq, struct kvm_apic_map *map,
> +		struct kvm_lapic ***dst, unsigned long *bitmap)
> +{
> +	int i, lowest;
> +	bool x2apic_ipi;
> +	u16 cid;
> +
> +	if (irq->shorthand == APIC_DEST_SELF) {
> +		*dst = &src;
> +		*bitmap = 1;
> +		return true;
> +	} else if (irq->shorthand)
> +		return false;
> +
> +	x2apic_ipi = src && apic_x2apic_mode(src);
> +	if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST))
> +		return false;
> +
> +	if (!map)
> +		return false;
> +
> +	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
> +		if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {
> +			*bitmap = 0;

... here we returned true, but actually we found no real candidates.
Would it be better that we directly return false in this case? There
are several similar places in this function as well. Not sure, so
asked.

[...]

> @@ -821,69 +835,16 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
>  	rcu_read_lock();
>  	map = rcu_dereference(kvm->arch.apic_map);
>  
> -	if (!map)
> -		goto out;
> +	if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
> +			hweight16(bitmap) == 1) {

If we follow the rule in comment above (return true only if there
are valid candidates), we may avoid the hweight16(bitmap) == 1
check as well.

Thanks,

-- peterx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/9] KVM: x86: dynamic kvm_apic_map
  2016-05-06 20:53 ` [PATCH 2/9] KVM: x86: dynamic kvm_apic_map Radim Krčmář
@ 2016-05-23  8:04   ` Peter Xu
  2016-05-25 16:15     ` Radim Krčmář
  0 siblings, 1 reply; 19+ messages in thread
From: Peter Xu @ 2016-05-23  8:04 UTC (permalink / raw)
  To: Radim Krčmář
  Cc: kvm, Paolo Bonzini, Lan, Tianyu, Igor Mammedov, Jan Kiszka

Hi, Radim,

Got several questions inline.

On Fri, May 06, 2016 at 10:53:58PM +0200, Radim Krčmář wrote:
> x2APIC supports up to 2^32-1 LAPICs, but most guest in coming years will
> have slighly less VCPUs.  Dynamic size saves memory at the cost of
> turning one constant into a variable.

From the manual, I see x2apic only support 2^20-16 processors, not
2^32-1.  Which one is correct?

From below codes [1], I still see 2^20-16, since we are using high
16 bits of dest ID as cluster ID (0-0xfffe, while I guess 0xffff
should be reserved), and lower 16 bits as processor/lapic mask.

[...]

> +static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
> +		u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
> +	switch (map->mode) {
> +	case KVM_APIC_MODE_X2APIC: {
> +		u32 offset = (dest_id >> 16) * 16;
> +		// XXX: phys_map must be allocated as multiples of 16
> +		if (offset < map->size) {
> +			*cluster = &map->phys_map[offset];
> +			*mask = dest_id & 0xffff;

[1]

[...]

>  static void recalculate_apic_map(struct kvm *kvm)
> @@ -156,17 +162,28 @@ static void recalculate_apic_map(struct kvm *kvm)
>  	struct kvm_apic_map *new, *old = NULL;
>  	struct kvm_vcpu *vcpu;
>  	int i;
> -
> -	new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL);
> +	u32 size = 255;
>  
>  	mutex_lock(&kvm->arch.apic_map_lock);
>  
> +	kvm_for_each_vcpu(i, vcpu, kvm)
> +		if (kvm_apic_present(vcpu))
> +			size = max(size, kvm_apic_id(vcpu->arch.apic));
> +
> +	size++;
> +	size += (16 - size) & 16;

Is this same as:

  size = round_up(size, 16);

?

Thanks,

-- peterx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 6/9] KVM: x86: directly call recalculate_apic_map on lapic restore
  2016-05-06 20:54 ` [PATCH 6/9] KVM: x86: directly call recalculate_apic_map on lapic restore Radim Krčmář
@ 2016-05-23  8:30   ` Peter Xu
  0 siblings, 0 replies; 19+ messages in thread
From: Peter Xu @ 2016-05-23  8:30 UTC (permalink / raw)
  To: Radim Krčmář
  Cc: kvm, Paolo Bonzini, Lan, Tianyu, Igor Mammedov, Jan Kiszka

On Fri, May 06, 2016 at 10:54:02PM +0200, Radim Krčmář wrote:
> The get/set dance was just for that.
> 
> Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>

Reviewed-by: Peter Xu <peterx@redhat.com>

> ---
>  arch/x86/kvm/lapic.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 6ae5e74031d6..615067b41dde 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1933,8 +1933,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
>  	/* set SPIV separately to get count of SW disabled APICs right */
>  	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
>  	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
> -	/* call kvm_apic_set_id() to put apic into apic_map */
> -	kvm_apic_set_id(apic, kvm_apic_id(apic));
> +	/* put apic into apic_map */
> +	recalculate_apic_map(vcpu->kvm);
>  	kvm_apic_set_version(vcpu);
>  
>  	apic_update_ppr(apic);
> -- 
> 2.8.2
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/9] KVM: x86: add kvm_apic_map_get_dest_lapic
  2016-05-19  6:36   ` Peter Xu
@ 2016-05-25 16:02     ` Radim Krčmář
  2016-05-26 11:58       ` Peter Xu
  0 siblings, 1 reply; 19+ messages in thread
From: Radim Krčmář @ 2016-05-25 16:02 UTC (permalink / raw)
  To: Peter Xu; +Cc: kvm, Paolo Bonzini, Lan, Tianyu, Igor Mammedov, Jan Kiszka

2016-05-19 14:36+0800, Peter Xu:
> On Fri, May 06, 2016 at 10:53:57PM +0200, Radim Krčmář wrote:
> > kvm_irq_delivery_to_apic_fast and kvm_intr_is_single_vcpu_fast both
> > compute the interrupt destination.  Factor the code.
> > 
> > 'struct kvm_lapic **dst = NULL' had to be added to silence GCC.
> > GCC might complain about potential NULL access in the future, because it
> > missed conditions that avoided uninitialized uses of dst.
> > 
> > Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
> > ---
> >  arch/x86/kvm/lapic.c | 237 +++++++++++++++++++++------------------------------
> >  1 file changed, 99 insertions(+), 138 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index 1a2da0e5a373..6812e61c12d4 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -694,14 +694,94 @@ static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
> >  	}
> >  }
> >  
> > +/* If kvm_apic_map_get_dest_lapic returns true, then *bitmap encodes accessible
> > + * elements in the *dst array.  Those are destinations for the interrupt.
> 
> Here, we mentioned that "*bitmap" encodes accessible elements" if
> returns true, while...
> 
>> + */
>> +static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, struct kvm_lapic *src,
>> +		struct kvm_lapic_irq *irq, struct kvm_apic_map *map,
>> +		struct kvm_lapic ***dst, unsigned long *bitmap)
>> +{
>> +	int i, lowest;
>> +	bool x2apic_ipi;
>> +	u16 cid;
>> +
>> +	if (irq->shorthand == APIC_DEST_SELF) {
>> +		*dst = &src;
>> +		*bitmap = 1;
>> +		return true;
>> +	} else if (irq->shorthand)
>> +		return false;
>> +
>> +	x2apic_ipi = src && apic_x2apic_mode(src);
>> +	if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST))
>> +		return false;
>> +
>> +	if (!map)
>> +		return false;
>> +
>> +	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
>> +		if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {
>> +			*bitmap = 0;
> 
> ... here we returned true, but actually we found no real candidates.
> Would it be better that we directly return false in this case? There
> are several similar places in this function as well. Not sure, so
> asked.

kvm_apic_map_get_dest_lapic() returns false if it doesn't know how to
handle the irq, so another function has to be used to get destinations
of the interrupt in question.

'irq->dest_id >= ARRAY_SIZE(map->phys_map)' is a case where we know that
the destination doesn't exist, so asking another function to get
destinations would be pure waste. but the destination doesn't exist, so
*bitmap has to be 0 to avoid invalid accesses.

"return true;" means that **dst offsets encoded in *bitmap are correct
destination(s).  Ah, it is horribly convoluted.  I am not sure if I can
improve the comment, though ...

> [...]
> 
> > @@ -821,69 +835,16 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
> >  	rcu_read_lock();
> >  	map = rcu_dereference(kvm->arch.apic_map);
> >  
> > -	if (!map)
> > -		goto out;
> > +	if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
> > +			hweight16(bitmap) == 1) {
> 
> If we follow the rule in comment above (return true only if there
> are valid candidates), we may avoid the hweight16(bitmap) == 1
> check as well.

hweight16(bitmap) ranges from 0 to 16.  Logical destination mode has
multicast (the maximal addressible unit is one cluster, which has 16
LAPICs), but multicast cannot be optimized by hardware, so we have a
special handling of cases where the destination is just one VCPU.

e.g. for bitmap = 0b100101, the interrupt is directed to dst[0], dst[2]
and dst[5].

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/9] KVM: x86: dynamic kvm_apic_map
  2016-05-23  8:04   ` Peter Xu
@ 2016-05-25 16:15     ` Radim Krčmář
  2016-05-30  5:24       ` Peter Xu
  0 siblings, 1 reply; 19+ messages in thread
From: Radim Krčmář @ 2016-05-25 16:15 UTC (permalink / raw)
  To: Peter Xu; +Cc: kvm, Paolo Bonzini, Lan, Tianyu, Igor Mammedov, Jan Kiszka

2016-05-23 16:04+0800, Peter Xu:
> Hi, Radim,
> 
> Got several questions inline.
> 
> On Fri, May 06, 2016 at 10:53:58PM +0200, Radim Krčmář wrote:
>> x2APIC supports up to 2^32-1 LAPICs, but most guest in coming years will
>> have slighly less VCPUs.  Dynamic size saves memory at the cost of
>> turning one constant into a variable.
> 
> From the manual, I see x2apic only support 2^20-16 processors, not
> 2^32-1.  Which one is correct?

"up to" is a crucial part.  Physical x2APIC can address 2^32-1, logical
x2APIC can address (2^16-1)*16 LAPICs and the OS can choose which mode
to use.

I think that machines with APIC IDs >2^20 will still be able to use
logical x2APIC, but higher APIC IDs are only be addressable with
physical x2APIC.

(Well, broadcasts would make even xAPIC "support" an unbounded amount of
 LAPICs. :])

> From below codes [1], I still see 2^20-16, since we are using high
> 16 bits of dest ID as cluster ID (0-0xfffe, while I guess 0xffff
> should be reserved), and lower 16 bits as processor/lapic mask.
> 
> [...]
> 
>> +static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
>> +		u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
>> +	switch (map->mode) {
>> +	case KVM_APIC_MODE_X2APIC: {
>> +		u32 offset = (dest_id >> 16) * 16;
>> +		// XXX: phys_map must be allocated as multiples of 16
>> +		if (offset < map->size) {
>> +			*cluster = &map->phys_map[offset];
>> +			*mask = dest_id & 0xffff;
> 
> [1]

This function is called ...get_LOGICAL_dest, so only logical addresses
are going to be passed to it.  Yes, it cannot handle APIC ID > 2^20-16.

> [...]
> 
>>  static void recalculate_apic_map(struct kvm *kvm)
>> @@ -156,17 +162,28 @@ static void recalculate_apic_map(struct kvm *kvm)
>>  	struct kvm_apic_map *new, *old = NULL;
>>  	struct kvm_vcpu *vcpu;
>>  	int i;
>> -
>> -	new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL);
>> +	u32 size = 255;
>>  
>>  	mutex_lock(&kvm->arch.apic_map_lock);
>>  
>> +	kvm_for_each_vcpu(i, vcpu, kvm)
>> +		if (kvm_apic_present(vcpu))
>> +			size = max(size, kvm_apic_id(vcpu->arch.apic));
>> +
>> +	size++;
>> +	size += (16 - size) & 16;
> 
> Is this same as:
> 
>   size = round_up(size, 16);
> 
> ?

It is, thank you.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 7/9] KVM: x86: use proper format of APIC ID register
  2016-05-17 15:34   ` Paolo Bonzini
@ 2016-05-25 16:30     ` Radim Krčmář
  0 siblings, 0 replies; 19+ messages in thread
From: Radim Krčmář @ 2016-05-25 16:30 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: kvm, Lan, Tianyu, Igor Mammedov, Jan Kiszka, Peter Xu

2016-05-17 17:34+0200, Paolo Bonzini:
> On 06/05/2016 22:54, Radim Krčmář wrote:
>> We currently always shift APIC ID as if APIC was in xAPIC mode.
>> x2APIC mode wants to use more bits and storing a hardware-compabible
>> value is the the sanest option.  VMX can stop intercepting the APIC ID
>> register then.
>> 
>> KVM API to set the lapic expects that bottom 8 bits of APIC ID are in
>> top 8 bits of APIC_ID register.  Definite that x2APIC IDs are byte
>> swapped to keep compatibility without new toggles.
> 
> That's a bit too clever...  Can we make KVM_CAP_MSI_X2APIC an
> enable-able capability (and then better rename it KVM_CAP_X2APIC_ID),
> and then all ids become 32 bit?  This fixes the APIC ID issue here, and
> avoids introducing a new routing type in patch 5.
> 
> The cost is a little extra complexity in QEMU, but I think it's bearable.

Will do, thanks.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/9] KVM: x86: add kvm_apic_map_get_dest_lapic
  2016-05-25 16:02     ` Radim Krčmář
@ 2016-05-26 11:58       ` Peter Xu
  0 siblings, 0 replies; 19+ messages in thread
From: Peter Xu @ 2016-05-26 11:58 UTC (permalink / raw)
  To: Radim Krčmář
  Cc: kvm, Paolo Bonzini, Lan, Tianyu, Igor Mammedov, Jan Kiszka

On Wed, May 25, 2016 at 06:02:46PM +0200, Radim Krčmář wrote:

[...]

> kvm_apic_map_get_dest_lapic() returns false if it doesn't know how to
> handle the irq, so another function has to be used to get destinations
> of the interrupt in question.
> 
> 'irq->dest_id >= ARRAY_SIZE(map->phys_map)' is a case where we know that
> the destination doesn't exist, so asking another function to get
> destinations would be pure waste. but the destination doesn't exist, so
> *bitmap has to be 0 to avoid invalid accesses.
> 
> "return true;" means that **dst offsets encoded in *bitmap are correct
> destination(s).  Ah, it is horribly convoluted.  I am not sure if I can
> improve the comment, though ...

Thanks for the explanation.

How about: "Return true if the interrupt can be handled by fast path,
false otherwise (in which case we still need to go through the slow
path). Note: we may have zero kvm_lapic candidate even when we return
true, which means the interrupt should be dropped. In this case,
*bitmap would be zero."

> 
> > [...]
> > 
> > > @@ -821,69 +835,16 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
> > >  	rcu_read_lock();
> > >  	map = rcu_dereference(kvm->arch.apic_map);
> > >  
> > > -	if (!map)
> > > -		goto out;
> > > +	if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
> > > +			hweight16(bitmap) == 1) {
> > 
> > If we follow the rule in comment above (return true only if there
> > are valid candidates), we may avoid the hweight16(bitmap) == 1
> > check as well.
> 
> hweight16(bitmap) ranges from 0 to 16.  Logical destination mode has
> multicast (the maximal addressible unit is one cluster, which has 16
> LAPICs), but multicast cannot be optimized by hardware, so we have a
> special handling of cases where the destination is just one VCPU.
> 
> e.g. for bitmap = 0b100101, the interrupt is directed to dst[0], dst[2]
> and dst[5].

Yes, thanks!

-- peterx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/9] KVM: x86: dynamic kvm_apic_map
  2016-05-25 16:15     ` Radim Krčmář
@ 2016-05-30  5:24       ` Peter Xu
  0 siblings, 0 replies; 19+ messages in thread
From: Peter Xu @ 2016-05-30  5:24 UTC (permalink / raw)
  To: Radim Krčmář
  Cc: kvm, Paolo Bonzini, Lan, Tianyu, Igor Mammedov, Jan Kiszka

On Wed, May 25, 2016 at 06:15:00PM +0200, Radim Krčmář wrote:
> 2016-05-23 16:04+0800, Peter Xu:
> > Hi, Radim,
> > 
> > Got several questions inline.
> > 
> > On Fri, May 06, 2016 at 10:53:58PM +0200, Radim Krčmář wrote:
> >> x2APIC supports up to 2^32-1 LAPICs, but most guest in coming years will
> >> have slighly less VCPUs.  Dynamic size saves memory at the cost of
> >> turning one constant into a variable.
> > 
> > From the manual, I see x2apic only support 2^20-16 processors, not
> > 2^32-1.  Which one is correct?
> 
> "up to" is a crucial part.  Physical x2APIC can address 2^32-1, logical
> x2APIC can address (2^16-1)*16 LAPICs and the OS can choose which mode
> to use.
> 
> I think that machines with APIC IDs >2^20 will still be able to use
> logical x2APIC, but higher APIC IDs are only be addressable with
> physical x2APIC.
> 
> (Well, broadcasts would make even xAPIC "support" an unbounded amount of
>  LAPICs. :])
> 
> > From below codes [1], I still see 2^20-16, since we are using high
> > 16 bits of dest ID as cluster ID (0-0xfffe, while I guess 0xffff
> > should be reserved), and lower 16 bits as processor/lapic mask.
> > 
> > [...]
> > 
> >> +static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
> >> +		u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
> >> +	switch (map->mode) {
> >> +	case KVM_APIC_MODE_X2APIC: {
> >> +		u32 offset = (dest_id >> 16) * 16;
> >> +		// XXX: phys_map must be allocated as multiples of 16
> >> +		if (offset < map->size) {
> >> +			*cluster = &map->phys_map[offset];
> >> +			*mask = dest_id & 0xffff;
> > 
> > [1]
> 
> This function is called ...get_LOGICAL_dest, so only logical addresses
> are going to be passed to it.  Yes, it cannot handle APIC ID > 2^20-16.

Thanks for the explanation. I thought x2apic only support logical
mode, seems I was wrong. :)

-- peterx

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2016-05-30  5:24 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-06 20:53 [RFC 0/9] KVM: x86: break the xAPIC barrier Radim Krčmář
2016-05-06 20:53 ` [PATCH 1/9] KVM: x86: add kvm_apic_map_get_dest_lapic Radim Krčmář
2016-05-19  6:36   ` Peter Xu
2016-05-25 16:02     ` Radim Krčmář
2016-05-26 11:58       ` Peter Xu
2016-05-06 20:53 ` [PATCH 2/9] KVM: x86: dynamic kvm_apic_map Radim Krčmář
2016-05-23  8:04   ` Peter Xu
2016-05-25 16:15     ` Radim Krčmář
2016-05-30  5:24       ` Peter Xu
2016-05-06 20:53 ` [PATCH 3/9] KVM: x86: use u16 for logical VCPU mask in lapic Radim Krčmář
2016-05-06 20:54 ` [PATCH 4/9] KVM: x86: use generic function for MSI parsing Radim Krčmář
2016-05-06 20:54 ` [PATCH 5/9] KVM: support x2APIC ID in userspace routes Radim Krčmář
2016-05-06 20:54 ` [PATCH 6/9] KVM: x86: directly call recalculate_apic_map on lapic restore Radim Krčmář
2016-05-23  8:30   ` Peter Xu
2016-05-06 20:54 ` [PATCH 7/9] KVM: x86: use proper format of APIC ID register Radim Krčmář
2016-05-17 15:34   ` Paolo Bonzini
2016-05-25 16:30     ` Radim Krčmář
2016-05-06 20:54 ` [PATCH 8/9] KVM: x86: reset lapic base in kvm_lapic_reset Radim Krčmář
2016-05-06 20:54 ` [PATCH 9/9] KVM: bump MAX_VCPUS Radim Krčmář

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.