All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Li, Xin3" <xin3.li@intel.com>
To: Thomas Gleixner <tglx@linutronix.de>,
	"H. Peter Anvin" <hpa@zytor.com>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"x86@kernel.org" <x86@kernel.org>,
	"kvm@vger.kernel.org" <kvm@vger.kernel.org>
Cc: "mingo@redhat.com" <mingo@redhat.com>,
	"bp@alien8.de" <bp@alien8.de>,
	"dave.hansen@linux.intel.com" <dave.hansen@linux.intel.com>,
	"peterz@infradead.org" <peterz@infradead.org>,
	"andrew.cooper3@citrix.com" <andrew.cooper3@citrix.com>,
	"Christopherson,, Sean" <seanjc@google.com>,
	"pbonzini@redhat.com" <pbonzini@redhat.com>,
	"Shankar, Ravi V" <ravi.v.shankar@intel.com>,
	"jiangshanlai@gmail.com" <jiangshanlai@gmail.com>,
	"Kang, Shan" <shan.kang@intel.com>
Subject: RE: [PATCH v8 01/33] x86/traps: let common_interrupt() handle IRQ_MOVE_CLEANUP_VECTOR
Date: Tue, 6 Jun 2023 23:16:20 +0000	[thread overview]
Message-ID: <SA1PR11MB673401D5E5027DADBC422ACFA852A@SA1PR11MB6734.namprd11.prod.outlook.com> (raw)
In-Reply-To: <877csgl5eo.ffs@tglx>

> The untested below should do the trick. Wants to be split in several patches, but
> you get the idea.

I will continue the work from what you posted.  Thanks a lot!

Xin




> ---
> Subject: x86/vector: Get rid of IRQ_MOVE_CLEANUP_VECTOR
> From: Thomas Gleixner <tglx@linutronix.de>
> 
> No point to waste a vector for cleaning up the leftovers of a moved interrupt.
> Aside of that this must be the lowest priority of all vectors which makes FRED
> systems utilizing vectors 0x10-0x1f more complicated than necessary.
> 
> Schedule a timer instead.
> 
> Not-Yet-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>  arch/x86/include/asm/hw_irq.h       |    4 -
>  arch/x86/include/asm/idtentry.h     |    1
>  arch/x86/include/asm/irq_vectors.h  |    7 ---
>  arch/x86/kernel/apic/vector.c       |   83 ++++++++++++++++++++++++++----------
>  arch/x86/kernel/idt.c               |    1
>  arch/x86/platform/uv/uv_irq.c       |    2
>  drivers/iommu/amd/iommu.c           |    2
>  drivers/iommu/hyperv-iommu.c        |    4 -
>  drivers/iommu/intel/irq_remapping.c |    2
>  9 files changed, 68 insertions(+), 38 deletions(-)
> 
> --- a/arch/x86/include/asm/hw_irq.h
> +++ b/arch/x86/include/asm/hw_irq.h
> @@ -97,10 +97,10 @@ extern struct irq_cfg *irqd_cfg(struct i  extern void
> lock_vector_lock(void);  extern void unlock_vector_lock(void);  #ifdef
> CONFIG_SMP -extern void send_cleanup_vector(struct irq_cfg *);
> +extern void vector_schedule_cleanup(struct irq_cfg *);
>  extern void irq_complete_move(struct irq_cfg *cfg);  #else -static inline void
> send_cleanup_vector(struct irq_cfg *c) { }
> +static inline void vector_schedule_cleanup(struct irq_cfg *c) { }
>  static inline void irq_complete_move(struct irq_cfg *c) { }  #endif
> 
> --- a/arch/x86/include/asm/idtentry.h
> +++ b/arch/x86/include/asm/idtentry.h
> @@ -648,7 +648,6 @@ DECLARE_IDTENTRY_SYSVEC(X86_PLATFORM_IPI
> 
>  #ifdef CONFIG_SMP
>  DECLARE_IDTENTRY(RESCHEDULE_VECTOR,
> 	sysvec_reschedule_ipi);
> -DECLARE_IDTENTRY_SYSVEC(IRQ_MOVE_CLEANUP_VECTOR,
> 	sysvec_irq_move_cleanup);
>  DECLARE_IDTENTRY_SYSVEC(REBOOT_VECTOR,
> 	sysvec_reboot);
>  DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_SINGLE_VECTOR,
> 	sysvec_call_function_single);
>  DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_VECTOR,
> 	sysvec_call_function);
> --- a/arch/x86/include/asm/irq_vectors.h
> +++ b/arch/x86/include/asm/irq_vectors.h
> @@ -35,13 +35,6 @@
>   */
>  #define FIRST_EXTERNAL_VECTOR		0x20
> 
> -/*
> - * Reserve the lowest usable vector (and hence lowest priority)  0x20 for
> - * triggering cleanup after irq migration. 0x21-0x2f will still be used
> - * for device interrupts.
> - */
> -#define IRQ_MOVE_CLEANUP_VECTOR		FIRST_EXTERNAL_VECTOR
> -
>  #define IA32_SYSCALL_VECTOR		0x80
> 
>  /*
> --- a/arch/x86/kernel/apic/vector.c
> +++ b/arch/x86/kernel/apic/vector.c
> @@ -44,7 +44,18 @@ static cpumask_var_t vector_searchmask;  static struct
> irq_chip lapic_controller;  static struct irq_matrix *vector_matrix;  #ifdef
> CONFIG_SMP -static DEFINE_PER_CPU(struct hlist_head, cleanup_list);
> +
> +static void vector_cleanup_callback(struct timer_list *tmr);
> +
> +struct vector_cleanup {
> +	struct hlist_head	head;
> +	struct timer_list	timer;
> +};
> +
> +static DEFINE_PER_CPU(struct vector_cleanup, vector_cleanup) = {
> +	.head	= HLIST_HEAD_INIT,
> +	.timer	= __TIMER_INITIALIZER(vector_cleanup_callback,
> TIMER_PINNED),
> +};
>  #endif
> 
>  void lock_vector_lock(void)
> @@ -843,8 +854,12 @@ void lapic_online(void)
> 
>  void lapic_offline(void)
>  {
> +	struct vector_cleanup *cl = this_cpu_ptr(&vector_cleanup);
> +
>  	lock_vector_lock();
>  	irq_matrix_offline(vector_matrix);
> +	WARN_ON_ONCE(try_to_del_timer_sync(&cl->timer) < 0);
> +	WARN_ON_ONCE(!hlist_empty(&cl->head));
>  	unlock_vector_lock();
>  }
> 
> @@ -934,62 +949,86 @@ static void free_moved_vector(struct api
>  	apicd->move_in_progress = 0;
>  }
> 
> -DEFINE_IDTENTRY_SYSVEC(sysvec_irq_move_cleanup)
> +static void vector_cleanup_callback(struct timer_list *tmr)
>  {
> -	struct hlist_head *clhead = this_cpu_ptr(&cleanup_list);
> +	struct vector_cleanup *cl = container_of(tmr, typeof(*cl), timer);
>  	struct apic_chip_data *apicd;
>  	struct hlist_node *tmp;
> +	bool rearm = false;
> 
> -	ack_APIC_irq();
>  	/* Prevent vectors vanishing under us */
> -	raw_spin_lock(&vector_lock);
> +	raw_spin_lock_irq(&vector_lock);
> 
> -	hlist_for_each_entry_safe(apicd, tmp, clhead, clist) {
> +	hlist_for_each_entry_safe(apicd, tmp, &cl->head, clist) {
>  		unsigned int irr, vector = apicd->prev_vector;
> 
>  		/*
>  		 * Paranoia: Check if the vector that needs to be cleaned
> -		 * up is registered at the APICs IRR. If so, then this is
> -		 * not the best time to clean it up. Clean it up in the
> -		 * next attempt by sending another
> IRQ_MOVE_CLEANUP_VECTOR
> -		 * to this CPU. IRQ_MOVE_CLEANUP_VECTOR is the lowest
> -		 * priority external vector, so on return from this
> -		 * interrupt the device interrupt will happen first.
> +		 * up is registered at the APICs IRR. That's clearly a
> +		 * hardware issue if the vector arrived on the old target
> +		 * _after_ interrupts were disabled above. Keep @apicd
> +		 * on the list and schedule the timer again to give the CPU
> +		 * a chance to handle the pending interrupt.
>  		 */
>  		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
>  		if (irr & (1U << (vector % 32))) {
> -			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
> +			pr_warn_once("Moved interrupt pending in old target
> APIC %u\n", apicd->irq);
> +			rearm = true;
>  			continue;
>  		}
>  		free_moved_vector(apicd);
>  	}
> 
> -	raw_spin_unlock(&vector_lock);
> +	/*
> +	 * Must happen under vector_lock to make the timer_pending() check
> +	 * in __vector_schedule_cleanup() race free against the rearm here.
> +	 */
> +	if (rearm)
> +		mod_timer(tmr, jiffies + 1);
> +
> +	raw_spin_unlock_irq(&vector_lock);
>  }
> 
> -static void __send_cleanup_vector(struct apic_chip_data *apicd)
> +static void __vector_schedule_cleanup(struct apic_chip_data *apicd)
>  {
> -	unsigned int cpu;
> +	unsigned int cpu = apicd->prev_cpu;
> 
>  	raw_spin_lock(&vector_lock);
>  	apicd->move_in_progress = 0;
> -	cpu = apicd->prev_cpu;
>  	if (cpu_online(cpu)) {
> -		hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu));
> -		apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR);
> +		struct vector_cleanup *cl = per_cpu_ptr(&vector_cleanup, cpu);
> +
> +		/*
> +		 * The lockless timer_pending() check is safe here. If it
> +		 * returns true, then the callback will observe this new
> +		 * apic data in the hlist as everything is serialized by
> +		 * vector lock.
> +		 *
> +		 * If it returns false then the timer is either not armed
> +		 * or the other CPU executes the callback, which again
> +		 * would be blocked on vector lock. Rearming it in the
> +		 * latter case makes it fire for nothing.
> +		 *
> +		 * This is also safe against the callback rearming the timer
> +		 * because that's serialized via vector lock too.
> +		 */
> +		if (!timer_pending(&cl->timer)) {
> +			cl->timer.expires = jiffies + 1;
> +			add_timer_on(&cl->timer, cpu);
> +		}
>  	} else {
>  		apicd->prev_vector = 0;
>  	}
>  	raw_spin_unlock(&vector_lock);
>  }
> 
> -void send_cleanup_vector(struct irq_cfg *cfg)
> +void vector_schedule_cleanup(struct irq_cfg *cfg)
>  {
>  	struct apic_chip_data *apicd;
> 
>  	apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg);
>  	if (apicd->move_in_progress)
> -		__send_cleanup_vector(apicd);
> +		__vector_schedule_cleanup(apicd);
>  }
> 
>  void irq_complete_move(struct irq_cfg *cfg) @@ -1007,7 +1046,7 @@ void
> irq_complete_move(struct irq_cfg *c
>  	 * on the same CPU.
>  	 */
>  	if (apicd->cpu == smp_processor_id())
> -		__send_cleanup_vector(apicd);
> +		__vector_schedule_cleanup(apicd);
>  }
> 
>  /*
> --- a/arch/x86/kernel/idt.c
> +++ b/arch/x86/kernel/idt.c
> @@ -131,7 +131,6 @@ static const __initconst struct idt_data
>  	INTG(RESCHEDULE_VECTOR,
> 	asm_sysvec_reschedule_ipi),
>  	INTG(CALL_FUNCTION_VECTOR,
> 	asm_sysvec_call_function),
>  	INTG(CALL_FUNCTION_SINGLE_VECTOR,
> 	asm_sysvec_call_function_single),
> -	INTG(IRQ_MOVE_CLEANUP_VECTOR,
> 	asm_sysvec_irq_move_cleanup),
>  	INTG(REBOOT_VECTOR,			asm_sysvec_reboot),
>  #endif
> 
> --- a/arch/x86/platform/uv/uv_irq.c
> +++ b/arch/x86/platform/uv/uv_irq.c
> @@ -58,7 +58,7 @@ uv_set_irq_affinity(struct irq_data *dat
>  	ret = parent->chip->irq_set_affinity(parent, mask, force);
>  	if (ret >= 0) {
>  		uv_program_mmr(cfg, data->chip_data);
> -		send_cleanup_vector(cfg);
> +		vector_schedule_cleanup(cfg);
>  	}
> 
>  	return ret;
> --- a/drivers/iommu/amd/iommu.c
> +++ b/drivers/iommu/amd/iommu.c
> @@ -3639,7 +3639,7 @@ static int amd_ir_set_affinity(struct ir
>  	 * at the new destination. So, time to cleanup the previous
>  	 * vector allocation.
>  	 */
> -	send_cleanup_vector(cfg);
> +	vector_schedule_cleanup(cfg);
> 
>  	return IRQ_SET_MASK_OK_DONE;
>  }
> --- a/drivers/iommu/hyperv-iommu.c
> +++ b/drivers/iommu/hyperv-iommu.c
> @@ -51,7 +51,7 @@ static int hyperv_ir_set_affinity(struct
>  	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
>  		return ret;
> 
> -	send_cleanup_vector(cfg);
> +	vector_schedule_cleanup(cfg);
> 
>  	return 0;
>  }
> @@ -257,7 +257,7 @@ static int hyperv_root_ir_set_affinity(s
>  	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
>  		return ret;
> 
> -	send_cleanup_vector(cfg);
> +	vector_schedule_cleanup(cfg);
> 
>  	return 0;
>  }
> --- a/drivers/iommu/intel/irq_remapping.c
> +++ b/drivers/iommu/intel/irq_remapping.c
> @@ -1180,7 +1180,7 @@ intel_ir_set_affinity(struct irq_data *d
>  	 * at the new destination. So, time to cleanup the previous
>  	 * vector allocation.
>  	 */
> -	send_cleanup_vector(cfg);
> +	vector_schedule_cleanup(cfg);
> 
>  	return IRQ_SET_MASK_OK_DONE;
>  }

  reply	other threads:[~2023-06-06 23:19 UTC|newest]

Thread overview: 96+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-10  8:14 [PATCH v8 00/33] x86: enable FRED for x86-64 Xin Li
2023-04-10  8:14 ` [PATCH v8 01/33] x86/traps: let common_interrupt() handle IRQ_MOVE_CLEANUP_VECTOR Xin Li
2023-05-07 11:59   ` Borislav Petkov
2023-06-03 19:19     ` Li, Xin3
2023-06-03 20:51   ` Thomas Gleixner
2023-06-05 17:07     ` Thomas Gleixner
2023-06-05 17:09       ` H. Peter Anvin
2023-06-06 20:09         ` Thomas Gleixner
2023-06-06 23:16           ` Li, Xin3 [this message]
2023-06-19  8:00           ` Li, Xin3
2023-06-19 14:22             ` Thomas Gleixner
2023-06-19 18:47               ` Li, Xin3
2023-06-19 19:16                 ` H. Peter Anvin
2023-06-20  0:04                   ` Li, Xin3
2023-04-10  8:14 ` [PATCH v8 02/33] x86/fred: make unions for the cs and ss fields in struct pt_regs Xin Li
2023-06-03  9:48   ` Borislav Petkov
2023-06-05 12:07   ` Thomas Gleixner
2023-06-05 17:12     ` H. Peter Anvin
2023-06-05 17:29       ` Thomas Gleixner
2023-04-10  8:14 ` [PATCH v8 03/33] x86/traps: add a system interrupt table for system interrupt dispatch Xin Li
2023-06-05  8:34   ` Thomas Gleixner
2023-06-06  8:05     ` Li, Xin3
2023-06-05  8:38   ` Thomas Gleixner
2023-06-05  8:39   ` Thomas Gleixner
2023-04-10  8:14 ` [PATCH v8 04/33] x86/traps: add install_system_interrupt_handler() Xin Li
2023-06-05  8:57   ` Thomas Gleixner
2023-06-06  5:46     ` Li, Xin3
2023-04-10  8:14 ` [PATCH v8 05/33] x86/traps: add external_interrupt() to dispatch external interrupts Xin Li
2023-06-05 11:56   ` Thomas Gleixner
2023-06-05 17:52     ` Thomas Gleixner
2023-06-19 19:16     ` Li, Xin3
2023-06-19 21:13       ` Thomas Gleixner
2023-06-20  0:16         ` Li, Xin3
2023-04-10  8:14 ` [PATCH v8 06/33] x86/cpufeature: add the cpu feature bit for FRED Xin Li
2023-04-10  8:14 ` [PATCH v8 07/33] x86/opcode: add ERETU, ERETS instructions to x86-opcode-map Xin Li
2023-04-10  8:14 ` [PATCH v8 08/33] x86/objtool: teach objtool about ERETU and ERETS Xin Li
2023-04-10  8:14 ` [PATCH v8 09/33] x86/cpu: add X86_CR4_FRED macro Xin Li
2023-06-05 12:01   ` Thomas Gleixner
2023-06-05 17:06     ` H. Peter Anvin
2023-06-05 17:19     ` H. Peter Anvin
2023-04-10  8:14 ` [PATCH v8 10/33] x86/fred: add Kconfig option for FRED (CONFIG_X86_FRED) Xin Li
2023-04-10  8:14 ` [PATCH v8 11/33] x86/fred: if CONFIG_X86_FRED is disabled, disable FRED support Xin Li
2023-04-10  8:14 ` [PATCH v8 12/33] x86/cpu: add MSR numbers for FRED configuration Xin Li
2023-04-10  8:14 ` [PATCH v8 13/33] x86/fred: header file for event types Xin Li
2023-04-10  8:14 ` [PATCH v8 14/33] x86/fred: header file with FRED definitions Xin Li
2023-04-10  8:14 ` [PATCH v8 15/33] x86/fred: reserve space for the FRED stack frame Xin Li
2023-04-10  8:14 ` [PATCH v8 16/33] x86/fred: add a page fault entry stub for FRED Xin Li
2023-04-10  8:14 ` [PATCH v8 17/33] x86/fred: add a debug " Xin Li
2023-04-10  8:14 ` [PATCH v8 18/33] x86/fred: add a NMI " Xin Li
2023-04-10  8:14 ` [PATCH v8 19/33] x86/fred: add a machine check " Xin Li
2023-04-10  8:14 ` [PATCH v8 20/33] x86/fred: FRED entry/exit and dispatch code Xin Li
2023-06-05 13:21   ` Thomas Gleixner
2023-04-10  8:14 ` [PATCH v8 21/33] x86/fred: FRED initialization code Xin Li
2023-06-05 12:15   ` Thomas Gleixner
2023-06-05 13:41   ` Thomas Gleixner
2023-04-10  8:14 ` [PATCH v8 22/33] x86/fred: update MSR_IA32_FRED_RSP0 during task switch Xin Li
2023-04-10  8:14 ` [PATCH v8 23/33] x86/fred: let ret_from_fork() jmp to fred_exit_user when FRED is enabled Xin Li
2023-04-10  8:14 ` [PATCH v8 24/33] x86/fred: disallow the swapgs instruction " Xin Li
2023-06-05 13:47   ` Thomas Gleixner
2023-04-10  8:14 ` [PATCH v8 25/33] x86/fred: no ESPFIX needed " Xin Li
2023-04-10  8:14 ` [PATCH v8 26/33] x86/fred: allow single-step trap and NMI when starting a new thread Xin Li
2023-06-05 13:50   ` Thomas Gleixner
2023-06-05 13:52     ` Thomas Gleixner
2023-04-10  8:14 ` [PATCH v8 27/33] x86/fred: fixup fault on ERETU by jumping to fred_entrypoint_user Xin Li
2023-04-10  8:14 ` [PATCH v8 28/33] x86/ia32: do not modify the DPL bits for a null selector Xin Li
2023-04-10  8:14 ` [PATCH v8 29/33] x86/fred: allow FRED systems to use interrupt vectors 0x10-0x1f Xin Li
2023-06-05 14:06   ` Thomas Gleixner
2023-06-05 16:55     ` H. Peter Anvin
2023-04-10  8:14 ` [PATCH v8 30/33] x86/fred: allow dynamic stack frame size Xin Li
2023-06-05 14:11   ` Thomas Gleixner
2023-06-06  6:18     ` Li, Xin3
2023-06-06 13:27       ` Thomas Gleixner
2023-06-06 23:08         ` H. Peter Anvin
2023-04-10  8:14 ` [PATCH v8 31/33] x86/fred: BUG() when ERETU with %rsp not equal to that when the ring 3 event was just delivered Xin Li
2023-06-05 14:15   ` Thomas Gleixner
2023-06-05 16:42     ` H. Peter Anvin
2023-06-05 17:16       ` Thomas Gleixner
2023-04-10  8:14 ` [PATCH v8 32/33] x86/fred: disable FRED by default in its early stage Xin Li
2023-04-10  8:14 ` [PATCH v8 33/33] KVM: x86/vmx: refactor VMX_DO_EVENT_IRQOFF to generate FRED stack frames Xin Li
2023-04-10 21:50   ` Sean Christopherson
2023-04-11  5:06     ` Li, Xin3
2023-04-11 18:34       ` Sean Christopherson
2023-04-11 22:50         ` Li, Xin3
2023-04-12 18:26     ` Li, Xin3
2023-04-12 19:37       ` Sean Christopherson
2023-04-10 18:37 ` [PATCH v8 00/33] x86: enable FRED for x86-64 Dave Hansen
2023-04-10 19:14   ` Li, Xin3
2023-04-10 19:32     ` Borislav Petkov
2023-04-10 19:38       ` Dave Hansen
2023-04-10 20:52         ` Li, Xin3
2023-04-11  4:14       ` Li, Xin3
2023-04-10 18:49 ` Dave Hansen
2023-04-10 19:16   ` Li, Xin3
2023-06-05 17:11     ` Thomas Gleixner
2023-06-05 17:22 ` H. Peter Anvin
2023-06-05 17:32   ` Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=SA1PR11MB673401D5E5027DADBC422ACFA852A@SA1PR11MB6734.namprd11.prod.outlook.com \
    --to=xin3.li@intel.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=jiangshanlai@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=ravi.v.shankar@intel.com \
    --cc=seanjc@google.com \
    --cc=shan.kang@intel.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.