libceph: request a new osdmap if lingering request maps to no osd
This commit does two things.  First, if there are any homeless
lingering requests, we now request a new osdmap even if the osdmap that
is being processed brought no changes, i.e. if a given lingering
request turned homeless in one of the previous epochs and remained
homeless in the current epoch.  Not doing so leaves us with a stale
osdmap and as a result we may miss our window for reestablishing the
watch and lose notifies.
MON=1 OSD=1:
    # cat linger-needmap.sh
    #!/bin/bash
    rbd create --size 1 test
    DEV=$(rbd map test)
    ceph osd out 0
    rbd map dne/dne # obtain a new osdmap as a side effect (!)
    sleep 1
    ceph osd in 0
    rbd resize --size 2 test
    # rbd info test | grep size -> 2M
    # blockdev --getsize $DEV -> 1M
N.B.: Not obtaining a new osdmap in between "osd out" and "osd in"
above is enough to make it miss that resize notify, but that is a
bug^Wlimitation of ceph watch/notify v1.
Second, homeless lingering requests are now kicked just like those
lingering requests whose mapping has changed.  This is mainly to
recognize that a homeless lingering request makes no sense and to
preserve the invariant that a registered lingering request is not
sitting on any of r_req_lru_item lists.  This spares us a WARN_ON,
which commit ba9d114ec5 ("libceph: clear r_req_lru_item in
__unregister_linger_request()") tried to fix the _wrong_ way.
Cc: stable@vger.kernel.org # 3.10+
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
	
	
This commit is contained in:
		
					parent
					
						
							
								e26081808e
							
						
					
				
			
			
				commit
				
					
						b049453221
					
				
			
		
					 1 changed files with 20 additions and 11 deletions
				
			
		|  | @ -2017,20 +2017,29 @@ static void kick_requests(struct ceph_osd_client *osdc, bool force_resend, | |||
| 		err = __map_request(osdc, req, | ||||
| 				    force_resend || force_resend_writes); | ||||
| 		dout("__map_request returned %d\n", err); | ||||
| 		if (err == 0) | ||||
| 			continue;  /* no change and no osd was specified */ | ||||
| 		if (err < 0) | ||||
| 			continue;  /* hrm! */ | ||||
| 		if (req->r_osd == NULL) { | ||||
| 			dout("tid %llu maps to no valid osd\n", req->r_tid); | ||||
| 			needmap++;  /* request a newer map */ | ||||
| 			continue; | ||||
| 		} | ||||
| 		if (req->r_osd == NULL || err > 0) { | ||||
| 			if (req->r_osd == NULL) { | ||||
| 				dout("lingering %p tid %llu maps to no osd\n", | ||||
| 				     req, req->r_tid); | ||||
| 				/*
 | ||||
| 				 * A homeless lingering request makes | ||||
| 				 * no sense, as it's job is to keep | ||||
| 				 * a particular OSD connection open. | ||||
| 				 * Request a newer map and kick the | ||||
| 				 * request, knowing that it won't be | ||||
| 				 * resent until we actually get a map | ||||
| 				 * that can tell us where to send it. | ||||
| 				 */ | ||||
| 				needmap++; | ||||
| 			} | ||||
| 
 | ||||
| 		dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid, | ||||
| 		     req->r_osd ? req->r_osd->o_osd : -1); | ||||
| 		__register_request(osdc, req); | ||||
| 		__unregister_linger_request(osdc, req); | ||||
| 			dout("kicking lingering %p tid %llu osd%d\n", req, | ||||
| 			     req->r_tid, req->r_osd ? req->r_osd->o_osd : -1); | ||||
| 			__register_request(osdc, req); | ||||
| 			__unregister_linger_request(osdc, req); | ||||
| 		} | ||||
| 	} | ||||
| 	reset_changed_osds(osdc); | ||||
| 	mutex_unlock(&osdc->request_mutex); | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Ilya Dryomov
				Ilya Dryomov