starting to get a handle on how to detect that mad gleam in lustre's eye
This commit is contained in:
parent
9d52d653f6
commit
8efd3d71c8
2 changed files with 55 additions and 16 deletions
|
@ -132,27 +132,12 @@ tryLock lockfile = trySideLock lockfile $ \sidelock -> do
|
||||||
-- open(2) suggests that link can sometimes appear to fail
|
-- open(2) suggests that link can sometimes appear to fail
|
||||||
-- on NFS but have actually succeeded, and the way to find out is to stat
|
-- on NFS but have actually succeeded, and the way to find out is to stat
|
||||||
-- the file and check its link count etc.
|
-- the file and check its link count etc.
|
||||||
--
|
|
||||||
-- On a Lustre filesystem, link has been observed to incorrectly *succeed*,
|
|
||||||
-- despite the dest already existing. A subsequent stat of the dest
|
|
||||||
-- looked like it had been replaced with the src. The process proceeded to
|
|
||||||
-- run and then deleted the dest, and after the process was done, the
|
|
||||||
-- original file was observed to still be in place. This is horrible and we
|
|
||||||
-- can't do anything about such a lying filesystem.
|
|
||||||
-- At least the side lock file will prevent git-annex's running on the same
|
|
||||||
-- host from running concurrently even on such a lying filesystem.
|
|
||||||
linkToLock :: SideLockHandle -> FilePath -> FilePath -> IO Bool
|
linkToLock :: SideLockHandle -> FilePath -> FilePath -> IO Bool
|
||||||
linkToLock Nothing _ _ = return False
|
linkToLock Nothing _ _ = return False
|
||||||
linkToLock (Just _) src dest = do
|
linkToLock (Just _) src dest = do
|
||||||
-- This might make Lustre notice that a lock file that is already
|
|
||||||
-- there is there?
|
|
||||||
_ <- catchMaybeIO $ readFile dest
|
|
||||||
_ <- tryIO $ createLink src dest
|
_ <- tryIO $ createLink src dest
|
||||||
ifM (catchBoolIO checklinked)
|
ifM (catchBoolIO checklinked)
|
||||||
( catchBoolIO $ do
|
( catchBoolIO $ not <$> checkInsaneLustre dest
|
||||||
srccontent <- readFile src
|
|
||||||
destcontent <- readFile dest
|
|
||||||
return (srccontent == destcontent)
|
|
||||||
, return False
|
, return False
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
|
@ -173,6 +158,27 @@ linkToLock (Just _) src dest = do
|
||||||
, linkCount x == 2
|
, linkCount x == 2
|
||||||
]
|
]
|
||||||
|
|
||||||
|
-- On a Lustre filesystem, link has been observed to incorrectly *succeed*,
|
||||||
|
-- despite the dest already existing. A subsequent stat of the dest
|
||||||
|
-- looked like it had been replaced with the src. The process proceeded to
|
||||||
|
-- run and then deleted the dest, and after the process was done, the
|
||||||
|
-- original file was observed to still be in place.
|
||||||
|
--
|
||||||
|
-- We can detect this insanity by getting the directory contents after
|
||||||
|
-- making the link, and checking to see if 2 copies of the dest file,
|
||||||
|
-- with the SAME FILENAME exist.
|
||||||
|
checkInsaneLustre :: FilePath -> IO Bool
|
||||||
|
checkInsaneLustre dest = do
|
||||||
|
fs <- dirContents (takeDirectory dest)
|
||||||
|
case length (filter (== dest) fs) of
|
||||||
|
1 -> return False -- whew!
|
||||||
|
0 -> return True -- wtf?
|
||||||
|
_ -> do
|
||||||
|
-- Try to clean up the extra copy we made
|
||||||
|
-- that has the same name. Egads.
|
||||||
|
tryIO $ removeFile dest
|
||||||
|
return True
|
||||||
|
|
||||||
-- | Waits as necessary to take a lock.
|
-- | Waits as necessary to take a lock.
|
||||||
--
|
--
|
||||||
-- Uses a 1 second wait-loop.
|
-- Uses a 1 second wait-loop.
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 14"""
|
||||||
|
date="2015-11-13T20:00:48Z"
|
||||||
|
content="""
|
||||||
|
Adding to the crazy Lustre fun, check this out:
|
||||||
|
|
||||||
|
$ ls -l .git/annex/
|
||||||
|
total 56
|
||||||
|
-rw-rw-r-- 1 hess root 18387 Nov 13 14:35 index
|
||||||
|
-rw-rw-r-- 1 hess root 41 Nov 13 14:35 index.lck
|
||||||
|
drwxrwsr-x 2 hess root 12288 Nov 13 14:35 journal
|
||||||
|
-rw-rw-r-- 1 hess root 0 Nov 13 11:48 journal.lck
|
||||||
|
drwxrwsr-x 2 hess root 4096 Nov 13 14:35 misctmp
|
||||||
|
drwxrwsr-x 88 hess root 4096 Nov 13 14:35 objects
|
||||||
|
-r--r--r-- 1 hess root 70 Nov 13 14:35 pidlock
|
||||||
|
-r--r--r-- 1 hess root 70 Nov 13 14:35 pidlock
|
||||||
|
-rw-rw-r-- 1 hess root 0 Nov 13 11:48 sentinal
|
||||||
|
-rw-rw-r-- 1 hess root 23 Nov 13 11:48 sentinal.cache
|
||||||
|
|
||||||
|
There are 2 pidlock files in that directory listing. 2 files with the same name.
|
||||||
|
I deleted one of them, and with no other changes, ls shows only 1 now.
|
||||||
|
|
||||||
|
-r--r--r-- 1 hess root 74 Nov 13 14:35 pidlock
|
||||||
|
|
||||||
|
Notice that the file stat has changed too.
|
||||||
|
|
||||||
|
So, Lustre has clearly thrown POSIX out the window, and then defrenstrated
|
||||||
|
sanity for good measure.
|
||||||
|
|
||||||
|
On the plus side, this may show how I can detect when rename() fails to
|
||||||
|
preserve POSIX semantics..
|
||||||
|
"""]]
|
Loading…
Reference in a new issue