The irq work queue is a per cpu object and it is sufficient for
synchronization if per cpu atomics are used. Doing so simplifies
the code and reduces the overhead of the code.
Before:
christoph@linux-2.6$ size kernel/irq_work.o
   text	   data	    bss	    dec	    hex	filename
    451	      8	      1	    460	    1cc	kernel/irq_work.o
After:
christoph@linux-2.6$ size kernel/irq_work.o 
   text	   data	    bss	    dec	    hex	filename
    438	      8	      1	    447	    1bf	kernel/irq_work.o
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Christoph Lameter <cl@linux.com>
		
	
			
		
			
				
	
	
		
			166 lines
		
	
	
	
		
			3.7 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			166 lines
		
	
	
	
		
			3.7 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
 | 
						|
 *
 | 
						|
 * Provides a framework for enqueueing and running callbacks from hardirq
 | 
						|
 * context. The enqueueing is NMI-safe.
 | 
						|
 */
 | 
						|
 | 
						|
#include <linux/kernel.h>
 | 
						|
#include <linux/module.h>
 | 
						|
#include <linux/irq_work.h>
 | 
						|
#include <linux/hardirq.h>
 | 
						|
 | 
						|
/*
 | 
						|
 * An entry can be in one of four states:
 | 
						|
 *
 | 
						|
 * free	     NULL, 0 -> {claimed}       : free to be used
 | 
						|
 * claimed   NULL, 3 -> {pending}       : claimed to be enqueued
 | 
						|
 * pending   next, 3 -> {busy}          : queued, pending callback
 | 
						|
 * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
 | 
						|
 *
 | 
						|
 * We use the lower two bits of the next pointer to keep PENDING and BUSY
 | 
						|
 * flags.
 | 
						|
 */
 | 
						|
 | 
						|
#define IRQ_WORK_PENDING	1UL
 | 
						|
#define IRQ_WORK_BUSY		2UL
 | 
						|
#define IRQ_WORK_FLAGS		3UL
 | 
						|
 | 
						|
static inline bool irq_work_is_set(struct irq_work *entry, int flags)
 | 
						|
{
 | 
						|
	return (unsigned long)entry->next & flags;
 | 
						|
}
 | 
						|
 | 
						|
static inline struct irq_work *irq_work_next(struct irq_work *entry)
 | 
						|
{
 | 
						|
	unsigned long next = (unsigned long)entry->next;
 | 
						|
	next &= ~IRQ_WORK_FLAGS;
 | 
						|
	return (struct irq_work *)next;
 | 
						|
}
 | 
						|
 | 
						|
static inline struct irq_work *next_flags(struct irq_work *entry, int flags)
 | 
						|
{
 | 
						|
	unsigned long next = (unsigned long)entry;
 | 
						|
	next |= flags;
 | 
						|
	return (struct irq_work *)next;
 | 
						|
}
 | 
						|
 | 
						|
static DEFINE_PER_CPU(struct irq_work *, irq_work_list);
 | 
						|
 | 
						|
/*
 | 
						|
 * Claim the entry so that no one else will poke at it.
 | 
						|
 */
 | 
						|
static bool irq_work_claim(struct irq_work *entry)
 | 
						|
{
 | 
						|
	struct irq_work *next, *nflags;
 | 
						|
 | 
						|
	do {
 | 
						|
		next = entry->next;
 | 
						|
		if ((unsigned long)next & IRQ_WORK_PENDING)
 | 
						|
			return false;
 | 
						|
		nflags = next_flags(next, IRQ_WORK_FLAGS);
 | 
						|
	} while (cmpxchg(&entry->next, next, nflags) != next);
 | 
						|
 | 
						|
	return true;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void __weak arch_irq_work_raise(void)
 | 
						|
{
 | 
						|
	/*
 | 
						|
	 * Lame architectures will get the timer tick callback
 | 
						|
	 */
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Queue the entry and raise the IPI if needed.
 | 
						|
 */
 | 
						|
static void __irq_work_queue(struct irq_work *entry)
 | 
						|
{
 | 
						|
	struct irq_work *next;
 | 
						|
 | 
						|
	preempt_disable();
 | 
						|
 | 
						|
	do {
 | 
						|
		next = __this_cpu_read(irq_work_list);
 | 
						|
		/* Can assign non-atomic because we keep the flags set. */
 | 
						|
		entry->next = next_flags(next, IRQ_WORK_FLAGS);
 | 
						|
	} while (this_cpu_cmpxchg(irq_work_list, next, entry) != next);
 | 
						|
 | 
						|
	/* The list was empty, raise self-interrupt to start processing. */
 | 
						|
	if (!irq_work_next(entry))
 | 
						|
		arch_irq_work_raise();
 | 
						|
 | 
						|
	preempt_enable();
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Enqueue the irq_work @entry, returns true on success, failure when the
 | 
						|
 * @entry was already enqueued by someone else.
 | 
						|
 *
 | 
						|
 * Can be re-enqueued while the callback is still in progress.
 | 
						|
 */
 | 
						|
bool irq_work_queue(struct irq_work *entry)
 | 
						|
{
 | 
						|
	if (!irq_work_claim(entry)) {
 | 
						|
		/*
 | 
						|
		 * Already enqueued, can't do!
 | 
						|
		 */
 | 
						|
		return false;
 | 
						|
	}
 | 
						|
 | 
						|
	__irq_work_queue(entry);
 | 
						|
	return true;
 | 
						|
}
 | 
						|
EXPORT_SYMBOL_GPL(irq_work_queue);
 | 
						|
 | 
						|
/*
 | 
						|
 * Run the irq_work entries on this cpu. Requires to be ran from hardirq
 | 
						|
 * context with local IRQs disabled.
 | 
						|
 */
 | 
						|
void irq_work_run(void)
 | 
						|
{
 | 
						|
	struct irq_work *list;
 | 
						|
 | 
						|
	if (this_cpu_read(irq_work_list) == NULL)
 | 
						|
		return;
 | 
						|
 | 
						|
	BUG_ON(!in_irq());
 | 
						|
	BUG_ON(!irqs_disabled());
 | 
						|
 | 
						|
	list = this_cpu_xchg(irq_work_list, NULL);
 | 
						|
 | 
						|
	while (list != NULL) {
 | 
						|
		struct irq_work *entry = list;
 | 
						|
 | 
						|
		list = irq_work_next(list);
 | 
						|
 | 
						|
		/*
 | 
						|
		 * Clear the PENDING bit, after this point the @entry
 | 
						|
		 * can be re-used.
 | 
						|
		 */
 | 
						|
		entry->next = next_flags(NULL, IRQ_WORK_BUSY);
 | 
						|
		entry->func(entry);
 | 
						|
		/*
 | 
						|
		 * Clear the BUSY bit and return to the free state if
 | 
						|
		 * no-one else claimed it meanwhile.
 | 
						|
		 */
 | 
						|
		(void)cmpxchg(&entry->next,
 | 
						|
			      next_flags(NULL, IRQ_WORK_BUSY),
 | 
						|
			      NULL);
 | 
						|
	}
 | 
						|
}
 | 
						|
EXPORT_SYMBOL_GPL(irq_work_run);
 | 
						|
 | 
						|
/*
 | 
						|
 * Synchronize against the irq_work @entry, ensures the entry is not
 | 
						|
 * currently in use.
 | 
						|
 */
 | 
						|
void irq_work_sync(struct irq_work *entry)
 | 
						|
{
 | 
						|
	WARN_ON_ONCE(irqs_disabled());
 | 
						|
 | 
						|
	while (irq_work_is_set(entry, IRQ_WORK_BUSY))
 | 
						|
		cpu_relax();
 | 
						|
}
 | 
						|
EXPORT_SYMBOL_GPL(irq_work_sync);
 |