task_work: Make task_work_add() lockless
Change task_work's to use llist-like code to avoid pi_lock in task_work_add(), this makes it useable under rq->lock. task_work_cancel() and task_work_run() still use pi_lock to synchronize with each other. (This is in preparation for a deadlock fix.) Suggested-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20120826191209.GA4221@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
		
					parent
					
						
							
								15674868d6
							
						
					
				
			
			
				commit
				
					
						ac3d0da8f3
					
				
			
		
					 1 changed files with 48 additions and 47 deletions
				
			
		|  | @ -3,25 +3,18 @@ | |||
| #include <linux/tracehook.h> | ||||
| 
 | ||||
| int | ||||
| task_work_add(struct task_struct *task, struct callback_head *twork, bool notify) | ||||
| task_work_add(struct task_struct *task, struct callback_head *work, bool notify) | ||||
| { | ||||
| 	struct callback_head *last, *first; | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	struct callback_head *head; | ||||
| 	/*
 | ||||
| 	 * Not inserting the new work if the task has already passed | ||||
| 	 * exit_task_work() is the responisbility of callers. | ||||
| 	 */ | ||||
| 	raw_spin_lock_irqsave(&task->pi_lock, flags); | ||||
| 	last = task->task_works; | ||||
| 	first = last ? last->next : twork; | ||||
| 	twork->next = first; | ||||
| 	if (last) | ||||
| 		last->next = twork; | ||||
| 	task->task_works = twork; | ||||
| 	raw_spin_unlock_irqrestore(&task->pi_lock, flags); | ||||
| 	do { | ||||
| 		head = ACCESS_ONCE(task->task_works); | ||||
| 		work->next = head; | ||||
| 	} while (cmpxchg(&task->task_works, head, work) != head); | ||||
| 
 | ||||
| 	/* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */ | ||||
| 	if (notify) | ||||
| 		set_notify_resume(task); | ||||
| 	return 0; | ||||
|  | @ -30,52 +23,60 @@ task_work_add(struct task_struct *task, struct callback_head *twork, bool notify | |||
| struct callback_head * | ||||
| task_work_cancel(struct task_struct *task, task_work_func_t func) | ||||
| { | ||||
| 	struct callback_head **pprev = &task->task_works; | ||||
| 	struct callback_head *work = NULL; | ||||
| 	unsigned long flags; | ||||
| 	struct callback_head *last, *res = NULL; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If cmpxchg() fails we continue without updating pprev. | ||||
| 	 * Either we raced with task_work_add() which added the | ||||
| 	 * new entry before this work, we will find it again. Or | ||||
| 	 * we raced with task_work_run(), *pprev == NULL. | ||||
| 	 */ | ||||
| 	raw_spin_lock_irqsave(&task->pi_lock, flags); | ||||
| 	last = task->task_works; | ||||
| 	if (last) { | ||||
| 		struct callback_head *q = last, *p = q->next; | ||||
| 		while (1) { | ||||
| 			if (p->func == func) { | ||||
| 				q->next = p->next; | ||||
| 				if (p == last) | ||||
| 					task->task_works = q == p ? NULL : q; | ||||
| 				res = p; | ||||
| 				break; | ||||
| 			} | ||||
| 			if (p == last) | ||||
| 				break; | ||||
| 			q = p; | ||||
| 			p = q->next; | ||||
| 		} | ||||
| 	while ((work = ACCESS_ONCE(*pprev))) { | ||||
| 		read_barrier_depends(); | ||||
| 		if (work->func != func) | ||||
| 			pprev = &work->next; | ||||
| 		else if (cmpxchg(pprev, work, work->next) == work) | ||||
| 			break; | ||||
| 	} | ||||
| 	raw_spin_unlock_irqrestore(&task->pi_lock, flags); | ||||
| 	return res; | ||||
| 
 | ||||
| 	return work; | ||||
| } | ||||
| 
 | ||||
| void task_work_run(void) | ||||
| { | ||||
| 	struct task_struct *task = current; | ||||
| 	struct callback_head *p, *q; | ||||
| 	struct callback_head *work, *head, *next; | ||||
| 
 | ||||
| 	while (1) { | ||||
| 		raw_spin_lock_irq(&task->pi_lock); | ||||
| 		p = task->task_works; | ||||
| 		task->task_works = NULL; | ||||
| 		raw_spin_unlock_irq(&task->pi_lock); | ||||
| 	for (;;) { | ||||
| 		work = xchg(&task->task_works, NULL); | ||||
| 		if (!work) | ||||
| 			break; | ||||
| 		/*
 | ||||
| 		 * Synchronize with task_work_cancel(). It can't remove | ||||
| 		 * the first entry == work, cmpxchg(task_works) should | ||||
| 		 * fail, but it can play with *work and other entries. | ||||
| 		 */ | ||||
| 		raw_spin_unlock_wait(&task->pi_lock); | ||||
| 		smp_mb(); | ||||
| 
 | ||||
| 		if (unlikely(!p)) | ||||
| 			return; | ||||
| 		/* Reverse the list to run the works in fifo order */ | ||||
| 		head = NULL; | ||||
| 		do { | ||||
| 			next = work->next; | ||||
| 			work->next = head; | ||||
| 			head = work; | ||||
| 			work = next; | ||||
| 		} while (work); | ||||
| 
 | ||||
| 		q = p->next; /* head */ | ||||
| 		p->next = NULL; /* cut it */ | ||||
| 		while (q) { | ||||
| 			p = q->next; | ||||
| 			q->func(q); | ||||
| 			q = p; | ||||
| 		work = head; | ||||
| 		do { | ||||
| 			next = work->next; | ||||
| 			work->func(work); | ||||
| 			work = next; | ||||
| 			cond_resched(); | ||||
| 		} | ||||
| 		} while (work); | ||||
| 	} | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Oleg Nesterov
				Oleg Nesterov