| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | /******************************************************************************
 | 
					
						
							|  |  |  | ******************************************************************************* | 
					
						
							|  |  |  | ** | 
					
						
							| 
									
										
										
										
											2007-09-27 15:53:38 -05:00
										 |  |  | **  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved. | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | ** | 
					
						
							|  |  |  | **  This copyrighted material is made available to anyone wishing to use, | 
					
						
							|  |  |  | **  modify, copy, or redistribute it subject to the terms and conditions | 
					
						
							|  |  |  | **  of the GNU General Public License v.2. | 
					
						
							|  |  |  | ** | 
					
						
							|  |  |  | ******************************************************************************* | 
					
						
							|  |  |  | ******************************************************************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "dlm_internal.h"
 | 
					
						
							|  |  |  | #include "member.h"
 | 
					
						
							|  |  |  | #include "lock.h"
 | 
					
						
							|  |  |  | #include "dir.h"
 | 
					
						
							|  |  |  | #include "config.h"
 | 
					
						
							|  |  |  | #include "requestqueue.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct rq_entry { | 
					
						
							|  |  |  | 	struct list_head list; | 
					
						
							| 
									
										
										
										
											2012-04-23 16:36:01 -05:00
										 |  |  | 	uint32_t recover_seq; | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | 	int nodeid; | 
					
						
							| 
									
										
										
										
											2008-01-25 00:28:28 -05:00
										 |  |  | 	struct dlm_message request; | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Requests received while the lockspace is in recovery get added to the | 
					
						
							|  |  |  |  * request queue and processed when recovery is complete.  This happens when | 
					
						
							|  |  |  |  * the lockspace is suspended on some nodes before it is on others, or the | 
					
						
							|  |  |  |  * lockspace is enabled on some while still suspended on others. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-01-25 00:28:28 -05:00
										 |  |  | void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms) | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | { | 
					
						
							|  |  |  | 	struct rq_entry *e; | 
					
						
							| 
									
										
										
										
											2008-01-25 00:28:28 -05:00
										 |  |  | 	int length = ms->m_header.h_length - sizeof(struct dlm_message); | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-11-30 16:34:43 -06:00
										 |  |  | 	e = kmalloc(sizeof(struct rq_entry) + length, GFP_NOFS); | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | 	if (!e) { | 
					
						
							| 
									
										
										
										
											2007-09-27 15:53:38 -05:00
										 |  |  | 		log_print("dlm_add_requestqueue: out of memory len %d", length); | 
					
						
							|  |  |  | 		return; | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-04-23 16:36:01 -05:00
										 |  |  | 	e->recover_seq = ls->ls_recover_seq & 0xFFFFFFFF; | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | 	e->nodeid = nodeid; | 
					
						
							| 
									
										
										
										
											2008-01-25 00:28:28 -05:00
										 |  |  | 	memcpy(&e->request, ms, ms->m_header.h_length); | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-01-20 08:47:07 +00:00
										 |  |  | 	mutex_lock(&ls->ls_requestqueue_mutex); | 
					
						
							| 
									
										
										
										
											2007-09-27 15:53:38 -05:00
										 |  |  | 	list_add_tail(&e->list, &ls->ls_requestqueue); | 
					
						
							| 
									
										
										
										
											2006-01-20 08:47:07 +00:00
										 |  |  | 	mutex_unlock(&ls->ls_requestqueue_mutex); | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-09-27 15:53:38 -05:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Called by dlm_recoverd to process normal messages saved while recovery was | 
					
						
							|  |  |  |  * happening.  Normal locking has been enabled before this is called.  dlm_recv | 
					
						
							|  |  |  |  * upon receiving a message, will wait for all saved messages to be drained | 
					
						
							|  |  |  |  * here before processing the message it got.  If a new dlm_ls_stop() arrives | 
					
						
							|  |  |  |  * while we're processing these saved messages, it may block trying to suspend | 
					
						
							|  |  |  |  * dlm_recv if dlm_recv is waiting for us in dlm_wait_requestqueue.  In that | 
					
						
							|  |  |  |  * case, we don't abort since locking_stopped is still 0.  If dlm_recv is not | 
					
						
							|  |  |  |  * waiting for us, then this processing may be aborted due to locking_stopped. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | int dlm_process_requestqueue(struct dlm_ls *ls) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct rq_entry *e; | 
					
						
							| 
									
										
											  
											
												dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used.  This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
  all in-progress operations after recovery.  In some
  cases it's not possible to know which in-progess locks
  to recover, so recover all.  (Most require recovery
  in nodir mode anyway since rehashing changes most
  master nodes.)
- Change the way nodir mode is enabled, from a command
  line mount arg passed through gfs2, into a sysfs
  file managed by dlm_controld, consistent with the
  other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
  yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
  from a previous, aborted recovery cycle.  Base this
  on the local recovery status not being in the state
  where any nodes should be sending LOCK messages for the
  current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
  may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
  the master as is usual), because the lkb can switch
  back and forth between being a master and being a
  process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
  non-empty convert or waiting queues for granting
  at the end of recovery.  (Rename flag from LOCKS_PURGED
  to RECOVER_GRANT and similar for the recovery function,
  because it's not only resources with purged locks
  that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
  error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
											
										 
											2012-04-26 15:54:29 -05:00
										 |  |  | 	struct dlm_message *ms; | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | 	int error = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-01-20 08:47:07 +00:00
										 |  |  | 	mutex_lock(&ls->ls_requestqueue_mutex); | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	for (;;) { | 
					
						
							|  |  |  | 		if (list_empty(&ls->ls_requestqueue)) { | 
					
						
							| 
									
										
										
										
											2006-01-20 08:47:07 +00:00
										 |  |  | 			mutex_unlock(&ls->ls_requestqueue_mutex); | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | 			error = 0; | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list); | 
					
						
							| 
									
										
										
										
											2006-01-20 08:47:07 +00:00
										 |  |  | 		mutex_unlock(&ls->ls_requestqueue_mutex); | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
											  
											
												dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used.  This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
  all in-progress operations after recovery.  In some
  cases it's not possible to know which in-progess locks
  to recover, so recover all.  (Most require recovery
  in nodir mode anyway since rehashing changes most
  master nodes.)
- Change the way nodir mode is enabled, from a command
  line mount arg passed through gfs2, into a sysfs
  file managed by dlm_controld, consistent with the
  other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
  yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
  from a previous, aborted recovery cycle.  Base this
  on the local recovery status not being in the state
  where any nodes should be sending LOCK messages for the
  current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
  may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
  the master as is usual), because the lkb can switch
  back and forth between being a master and being a
  process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
  non-empty convert or waiting queues for granting
  at the end of recovery.  (Rename flag from LOCKS_PURGED
  to RECOVER_GRANT and similar for the recovery function,
  because it's not only resources with purged locks
  that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
  error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
											
										 
											2012-04-26 15:54:29 -05:00
										 |  |  | 		ms = &e->request; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		log_limit(ls, "dlm_process_requestqueue msg %d from %d " | 
					
						
							|  |  |  | 			  "lkid %x remid %x result %d seq %u", | 
					
						
							|  |  |  | 			  ms->m_type, ms->m_header.h_nodeid, | 
					
						
							|  |  |  | 			  ms->m_lkid, ms->m_remid, ms->m_result, | 
					
						
							|  |  |  | 			  e->recover_seq); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-04-23 16:36:01 -05:00
										 |  |  | 		dlm_receive_message_saved(ls, &e->request, e->recover_seq); | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-01-20 08:47:07 +00:00
										 |  |  | 		mutex_lock(&ls->ls_requestqueue_mutex); | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | 		list_del(&e->list); | 
					
						
							|  |  |  | 		kfree(e); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (dlm_locking_stopped(ls)) { | 
					
						
							|  |  |  | 			log_debug(ls, "process_requestqueue abort running"); | 
					
						
							| 
									
										
										
										
											2006-01-20 08:47:07 +00:00
										 |  |  | 			mutex_unlock(&ls->ls_requestqueue_mutex); | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | 			error = -EINTR; | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		schedule(); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return error; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * After recovery is done, locking is resumed and dlm_recoverd takes all the | 
					
						
							| 
									
										
										
										
											2007-09-27 15:53:38 -05:00
										 |  |  |  * saved requests and processes them as they would have been by dlm_recv.  At | 
					
						
							|  |  |  |  * the same time, dlm_recv will start receiving new requests from remote nodes. | 
					
						
							|  |  |  |  * We want to delay dlm_recv processing new requests until dlm_recoverd has | 
					
						
							|  |  |  |  * finished processing the old saved requests.  We don't check for locking | 
					
						
							|  |  |  |  * stopped here because dlm_ls_stop won't stop locking until it's suspended us | 
					
						
							|  |  |  |  * (dlm_recv). | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void dlm_wait_requestqueue(struct dlm_ls *ls) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	for (;;) { | 
					
						
							| 
									
										
										
										
											2006-01-20 08:47:07 +00:00
										 |  |  | 		mutex_lock(&ls->ls_requestqueue_mutex); | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | 		if (list_empty(&ls->ls_requestqueue)) | 
					
						
							|  |  |  | 			break; | 
					
						
							| 
									
										
										
										
											2006-01-20 08:47:07 +00:00
										 |  |  | 		mutex_unlock(&ls->ls_requestqueue_mutex); | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | 		schedule(); | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2006-01-20 08:47:07 +00:00
										 |  |  | 	mutex_unlock(&ls->ls_requestqueue_mutex); | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	uint32_t type = ms->m_type; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-11-27 11:31:22 -06:00
										 |  |  | 	/* the ls is being cleaned up and freed by release_lockspace */ | 
					
						
							|  |  |  | 	if (!ls->ls_count) | 
					
						
							|  |  |  | 		return 1; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | 	if (dlm_is_removed(ls, nodeid)) | 
					
						
							|  |  |  | 		return 1; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* directory operations are always purged because the directory is
 | 
					
						
							|  |  |  | 	   always rebuilt during recovery and the lookups resent */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (type == DLM_MSG_REMOVE || | 
					
						
							|  |  |  | 	    type == DLM_MSG_LOOKUP || | 
					
						
							|  |  |  | 	    type == DLM_MSG_LOOKUP_REPLY) | 
					
						
							|  |  |  | 		return 1; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (!dlm_no_directory(ls)) | 
					
						
							|  |  |  | 		return 0; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used.  This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
  all in-progress operations after recovery.  In some
  cases it's not possible to know which in-progess locks
  to recover, so recover all.  (Most require recovery
  in nodir mode anyway since rehashing changes most
  master nodes.)
- Change the way nodir mode is enabled, from a command
  line mount arg passed through gfs2, into a sysfs
  file managed by dlm_controld, consistent with the
  other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
  yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
  from a previous, aborted recovery cycle.  Base this
  on the local recovery status not being in the state
  where any nodes should be sending LOCK messages for the
  current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
  may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
  the master as is usual), because the lkb can switch
  back and forth between being a master and being a
  process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
  non-empty convert or waiting queues for granting
  at the end of recovery.  (Rename flag from LOCKS_PURGED
  to RECOVER_GRANT and similar for the recovery function,
  because it's not only resources with purged locks
  that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
  error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
											
										 
											2012-04-26 15:54:29 -05:00
										 |  |  | 	return 1; | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void dlm_purge_requestqueue(struct dlm_ls *ls) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct dlm_message *ms; | 
					
						
							|  |  |  | 	struct rq_entry *e, *safe; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-01-20 08:47:07 +00:00
										 |  |  | 	mutex_lock(&ls->ls_requestqueue_mutex); | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | 	list_for_each_entry_safe(e, safe, &ls->ls_requestqueue, list) { | 
					
						
							| 
									
										
										
										
											2008-01-25 00:28:28 -05:00
										 |  |  | 		ms =  &e->request; | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		if (purge_request(ls, ms, e->nodeid)) { | 
					
						
							|  |  |  | 			list_del(&e->list); | 
					
						
							|  |  |  | 			kfree(e); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2006-01-20 08:47:07 +00:00
										 |  |  | 	mutex_unlock(&ls->ls_requestqueue_mutex); | 
					
						
							| 
									
										
										
										
											2006-01-18 09:30:29 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 |