starting to get a handle on how to detect that mad gleam in lustre's eye
This commit is contained in:
parent
9d52d653f6
commit
8efd3d71c8
2 changed files with 55 additions and 16 deletions
|
@ -132,27 +132,12 @@ tryLock lockfile = trySideLock lockfile $ \sidelock -> do
|
|||
-- open(2) suggests that link can sometimes appear to fail
|
||||
-- on NFS but have actually succeeded, and the way to find out is to stat
|
||||
-- the file and check its link count etc.
|
||||
--
|
||||
-- On a Lustre filesystem, link has been observed to incorrectly *succeed*,
|
||||
-- despite the dest already existing. A subsequent stat of the dest
|
||||
-- looked like it had been replaced with the src. The process proceeded to
|
||||
-- run and then deleted the dest, and after the process was done, the
|
||||
-- original file was observed to still be in place. This is horrible and we
|
||||
-- can't do anything about such a lying filesystem.
|
||||
-- At least the side lock file will prevent git-annex's running on the same
|
||||
-- host from running concurrently even on such a lying filesystem.
|
||||
linkToLock :: SideLockHandle -> FilePath -> FilePath -> IO Bool
|
||||
linkToLock Nothing _ _ = return False
|
||||
linkToLock (Just _) src dest = do
|
||||
-- This might make Lustre notice that a lock file that is already
|
||||
-- there is there?
|
||||
_ <- catchMaybeIO $ readFile dest
|
||||
_ <- tryIO $ createLink src dest
|
||||
ifM (catchBoolIO checklinked)
|
||||
( catchBoolIO $ do
|
||||
srccontent <- readFile src
|
||||
destcontent <- readFile dest
|
||||
return (srccontent == destcontent)
|
||||
( catchBoolIO $ not <$> checkInsaneLustre dest
|
||||
, return False
|
||||
)
|
||||
where
|
||||
|
@ -173,6 +158,27 @@ linkToLock (Just _) src dest = do
|
|||
, linkCount x == 2
|
||||
]
|
||||
|
||||
-- On a Lustre filesystem, link has been observed to incorrectly *succeed*,
|
||||
-- despite the dest already existing. A subsequent stat of the dest
|
||||
-- looked like it had been replaced with the src. The process proceeded to
|
||||
-- run and then deleted the dest, and after the process was done, the
|
||||
-- original file was observed to still be in place.
|
||||
--
|
||||
-- We can detect this insanity by getting the directory contents after
|
||||
-- making the link, and checking to see if 2 copies of the dest file,
|
||||
-- with the SAME FILENAME exist.
|
||||
checkInsaneLustre :: FilePath -> IO Bool
|
||||
checkInsaneLustre dest = do
|
||||
fs <- dirContents (takeDirectory dest)
|
||||
case length (filter (== dest) fs) of
|
||||
1 -> return False -- whew!
|
||||
0 -> return True -- wtf?
|
||||
_ -> do
|
||||
-- Try to clean up the extra copy we made
|
||||
-- that has the same name. Egads.
|
||||
tryIO $ removeFile dest
|
||||
return True
|
||||
|
||||
-- | Waits as necessary to take a lock.
|
||||
--
|
||||
-- Uses a 1 second wait-loop.
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 14"""
|
||||
date="2015-11-13T20:00:48Z"
|
||||
content="""
|
||||
Adding to the crazy Lustre fun, check this out:
|
||||
|
||||
$ ls -l .git/annex/
|
||||
total 56
|
||||
-rw-rw-r-- 1 hess root 18387 Nov 13 14:35 index
|
||||
-rw-rw-r-- 1 hess root 41 Nov 13 14:35 index.lck
|
||||
drwxrwsr-x 2 hess root 12288 Nov 13 14:35 journal
|
||||
-rw-rw-r-- 1 hess root 0 Nov 13 11:48 journal.lck
|
||||
drwxrwsr-x 2 hess root 4096 Nov 13 14:35 misctmp
|
||||
drwxrwsr-x 88 hess root 4096 Nov 13 14:35 objects
|
||||
-r--r--r-- 1 hess root 70 Nov 13 14:35 pidlock
|
||||
-r--r--r-- 1 hess root 70 Nov 13 14:35 pidlock
|
||||
-rw-rw-r-- 1 hess root 0 Nov 13 11:48 sentinal
|
||||
-rw-rw-r-- 1 hess root 23 Nov 13 11:48 sentinal.cache
|
||||
|
||||
There are 2 pidlock files in that directory listing. 2 files with the same name.
|
||||
I deleted one of them, and with no other changes, ls shows only 1 now.
|
||||
|
||||
-r--r--r-- 1 hess root 74 Nov 13 14:35 pidlock
|
||||
|
||||
Notice that the file stat has changed too.
|
||||
|
||||
So, Lustre has clearly thrown POSIX out the window, and then defrenstrated
|
||||
sanity for good measure.
|
||||
|
||||
On the plus side, this may show how I can detect when rename() fails to
|
||||
preserve POSIX semantics..
|
||||
"""]]
|
Loading…
Reference in a new issue