add getFileSize, which can get the real size of a large file on Windows
Avoid using fileSize which maxes out at just 2 gb on Windows. Instead, use hFileSize, which doesn't have a bounded size. Fixes support for files > 2 gb on Windows. Note that the InodeCache code only needs to compare a file size, so it doesn't matter it the file size wraps. So it has been left as-is. This was necessary both to avoid invalidating existing inode caches, and because the code passed FileStatus around and would have become more expensive if it called getFileSize. This commit was sponsored by Christian Dietrich.
This commit is contained in:
parent
87c4f0e320
commit
4f657aa14e
22 changed files with 94 additions and 36 deletions
|
@ -234,8 +234,8 @@ prepGetViaTmpChecked key unabletoget getkey = do
|
||||||
tmp <- fromRepo $ gitAnnexTmpObjectLocation key
|
tmp <- fromRepo $ gitAnnexTmpObjectLocation key
|
||||||
|
|
||||||
e <- liftIO $ doesFileExist tmp
|
e <- liftIO $ doesFileExist tmp
|
||||||
alreadythere <- if e
|
alreadythere <- liftIO $ if e
|
||||||
then fromIntegral . fileSize <$> liftIO (getFileStatus tmp)
|
then getFileSize tmp
|
||||||
else return 0
|
else return 0
|
||||||
ifM (checkDiskSpace Nothing key alreadythere)
|
ifM (checkDiskSpace Nothing key alreadythere)
|
||||||
( do
|
( do
|
||||||
|
|
|
@ -132,8 +132,7 @@ runTransfer' ignorelock t file shouldretry a = do
|
||||||
liftIO $ readMVar metervar
|
liftIO $ readMVar metervar
|
||||||
| otherwise = do
|
| otherwise = do
|
||||||
f <- fromRepo $ gitAnnexTmpObjectLocation (transferKey t)
|
f <- fromRepo $ gitAnnexTmpObjectLocation (transferKey t)
|
||||||
liftIO $ catchDefaultIO 0 $
|
liftIO $ catchDefaultIO 0 $ getFileSize f
|
||||||
fromIntegral . fileSize <$> getFileStatus f
|
|
||||||
|
|
||||||
type RetryDecider = TransferInfo -> TransferInfo -> Bool
|
type RetryDecider = TransferInfo -> TransferInfo -> Bool
|
||||||
|
|
||||||
|
|
|
@ -140,8 +140,7 @@ repairStaleGitLocks r = do
|
||||||
repairStaleLocks :: [FilePath] -> Assistant ()
|
repairStaleLocks :: [FilePath] -> Assistant ()
|
||||||
repairStaleLocks lockfiles = go =<< getsizes
|
repairStaleLocks lockfiles = go =<< getsizes
|
||||||
where
|
where
|
||||||
getsize lf = catchMaybeIO $
|
getsize lf = catchMaybeIO $ (\s -> (lf, s)) <$> getFileSize lf
|
||||||
(\s -> (lf, fileSize s)) <$> getFileStatus lf
|
|
||||||
getsizes = liftIO $ catMaybes <$> mapM getsize lockfiles
|
getsizes = liftIO $ catMaybes <$> mapM getsize lockfiles
|
||||||
go [] = return ()
|
go [] = return ()
|
||||||
go l = ifM (liftIO $ null <$> Lsof.query ("--" : map fst l))
|
go l = ifM (liftIO $ null <$> Lsof.query ("--" : map fst l))
|
||||||
|
|
|
@ -225,7 +225,7 @@ checkLogSize :: Int -> Assistant ()
|
||||||
checkLogSize n = do
|
checkLogSize n = do
|
||||||
f <- liftAnnex $ fromRepo gitAnnexLogFile
|
f <- liftAnnex $ fromRepo gitAnnexLogFile
|
||||||
logs <- liftIO $ listLogs f
|
logs <- liftIO $ listLogs f
|
||||||
totalsize <- liftIO $ sum <$> mapM filesize logs
|
totalsize <- liftIO $ sum <$> mapM getFileSize logs
|
||||||
when (totalsize > 2 * oneMegabyte) $ do
|
when (totalsize > 2 * oneMegabyte) $ do
|
||||||
notice ["Rotated logs due to size:", show totalsize]
|
notice ["Rotated logs due to size:", show totalsize]
|
||||||
liftIO $ openLog f >>= handleToFd >>= redirLog
|
liftIO $ openLog f >>= handleToFd >>= redirLog
|
||||||
|
@ -237,9 +237,7 @@ checkLogSize n = do
|
||||||
checkLogSize (n + 1)
|
checkLogSize (n + 1)
|
||||||
_ -> noop
|
_ -> noop
|
||||||
where
|
where
|
||||||
filesize f = fromIntegral . fileSize <$> liftIO (getFileStatus f)
|
oneMegabyte :: Integer
|
||||||
|
|
||||||
oneMegabyte :: Int
|
|
||||||
oneMegabyte = 1000000
|
oneMegabyte = 1000000
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -36,8 +36,7 @@ transferPollerThread = namedThread "TransferPoller" $ do
|
||||||
- temp file being used for the transfer. -}
|
- temp file being used for the transfer. -}
|
||||||
| transferDirection t == Download = do
|
| transferDirection t == Download = do
|
||||||
let f = gitAnnexTmpObjectLocation (transferKey t) g
|
let f = gitAnnexTmpObjectLocation (transferKey t) g
|
||||||
sz <- liftIO $ catchMaybeIO $
|
sz <- liftIO $ catchMaybeIO $ getFileSize f
|
||||||
fromIntegral . fileSize <$> getFileStatus f
|
|
||||||
newsize t info sz
|
newsize t info sz
|
||||||
{- Uploads don't need to be polled for when the TransferWatcher
|
{- Uploads don't need to be polled for when the TransferWatcher
|
||||||
- thread can track file modifications. -}
|
- thread can track file modifications. -}
|
||||||
|
|
|
@ -68,8 +68,7 @@ hashNameE hash = hashName hash ++ "E"
|
||||||
keyValue :: Hash -> KeySource -> Annex (Maybe Key)
|
keyValue :: Hash -> KeySource -> Annex (Maybe Key)
|
||||||
keyValue hash source = do
|
keyValue hash source = do
|
||||||
let file = contentLocation source
|
let file = contentLocation source
|
||||||
stat <- liftIO $ getFileStatus file
|
filesize <- liftIO $ getFileSize file
|
||||||
let filesize = fromIntegral $ fileSize stat
|
|
||||||
s <- hashFile hash file filesize
|
s <- hashFile hash file filesize
|
||||||
return $ Just $ stubKey
|
return $ Just $ stubKey
|
||||||
{ keyName = s
|
{ keyName = s
|
||||||
|
@ -103,7 +102,7 @@ checkKeyChecksum hash key file = do
|
||||||
mstat <- liftIO $ catchMaybeIO $ getFileStatus file
|
mstat <- liftIO $ catchMaybeIO $ getFileStatus file
|
||||||
case (mstat, fast) of
|
case (mstat, fast) of
|
||||||
(Just stat, False) -> do
|
(Just stat, False) -> do
|
||||||
let filesize = fromIntegral $ fileSize stat
|
filesize <- liftIO $ getFileSize' file stat
|
||||||
showSideAction "checksum"
|
showSideAction "checksum"
|
||||||
check <$> hashFile hash file filesize
|
check <$> hashFile hash file filesize
|
||||||
_ -> return True
|
_ -> return True
|
||||||
|
|
|
@ -32,11 +32,13 @@ backend = Backend
|
||||||
-}
|
-}
|
||||||
keyValue :: KeySource -> Annex (Maybe Key)
|
keyValue :: KeySource -> Annex (Maybe Key)
|
||||||
keyValue source = do
|
keyValue source = do
|
||||||
stat <- liftIO $ getFileStatus $ contentLocation source
|
let f = contentLocation source
|
||||||
|
stat <- liftIO $ getFileStatus f
|
||||||
|
sz <- liftIO $ getFileSize' f stat
|
||||||
relf <- getTopFilePath <$> inRepo (toTopFilePath $ keyFilename source)
|
relf <- getTopFilePath <$> inRepo (toTopFilePath $ keyFilename source)
|
||||||
return $ Just $ stubKey
|
return $ Just $ stubKey
|
||||||
{ keyName = genKeyName relf
|
{ keyName = genKeyName relf
|
||||||
, keyBackendName = name backend
|
, keyBackendName = name backend
|
||||||
, keySize = Just $ fromIntegral $ fileSize stat
|
, keySize = Just sz
|
||||||
, keyMtime = Just $ modificationTime stat
|
, keyMtime = Just $ modificationTime stat
|
||||||
}
|
}
|
||||||
|
|
|
@ -303,8 +303,7 @@ checkKeySizeOr :: (Key -> Annex String) -> Key -> FilePath -> Annex Bool
|
||||||
checkKeySizeOr bad key file = case Types.Key.keySize key of
|
checkKeySizeOr bad key file = case Types.Key.keySize key of
|
||||||
Nothing -> return True
|
Nothing -> return True
|
||||||
Just size -> do
|
Just size -> do
|
||||||
size' <- fromIntegral . fileSize
|
size' <- liftIO $ getFileSize file
|
||||||
<$> liftIO (getFileStatus file)
|
|
||||||
comparesizes size size'
|
comparesizes size size'
|
||||||
where
|
where
|
||||||
comparesizes a b = do
|
comparesizes a b = do
|
||||||
|
|
|
@ -490,8 +490,7 @@ staleSize label dirspec = go =<< lift (dirKeys dirspec)
|
||||||
keysizes keys = do
|
keysizes keys = do
|
||||||
dir <- lift $ fromRepo dirspec
|
dir <- lift $ fromRepo dirspec
|
||||||
liftIO $ forM keys $ \k -> catchDefaultIO 0 $
|
liftIO $ forM keys $ \k -> catchDefaultIO 0 $
|
||||||
fromIntegral . fileSize
|
getFileSize (dir </> keyFile k)
|
||||||
<$> getFileStatus (dir </> keyFile k)
|
|
||||||
|
|
||||||
aside :: String -> String
|
aside :: String -> String
|
||||||
aside s = " (" ++ s ++ ")"
|
aside s = " (" ++ s ++ ")"
|
||||||
|
|
|
@ -62,8 +62,7 @@ start key = fieldTransfer Download key $ \_p ->
|
||||||
oksize <- case Types.Key.keySize key of
|
oksize <- case Types.Key.keySize key of
|
||||||
Nothing -> return True
|
Nothing -> return True
|
||||||
Just size -> do
|
Just size -> do
|
||||||
size' <- fromIntegral . fileSize
|
size' <- liftIO $ getFileSize tmp
|
||||||
<$> liftIO (getFileStatus tmp)
|
|
||||||
return $ size == size'
|
return $ size == size'
|
||||||
if oksize
|
if oksize
|
||||||
then case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of
|
then case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of
|
||||||
|
|
|
@ -30,7 +30,8 @@ import Utility.Monad as X
|
||||||
import Utility.Data as X
|
import Utility.Data as X
|
||||||
import Utility.Applicative as X
|
import Utility.Applicative as X
|
||||||
import Utility.FileSystemEncoding as X
|
import Utility.FileSystemEncoding as X
|
||||||
import Utility.PosixFiles as X
|
import Utility.PosixFiles as X hiding (fileSize)
|
||||||
|
import Utility.FileSize as X
|
||||||
import Utility.Network as X
|
import Utility.Network as X
|
||||||
|
|
||||||
import Utility.PartialPrelude as X
|
import Utility.PartialPrelude as X
|
||||||
|
|
4
Limit.hs
4
Limit.hs
|
@ -239,9 +239,7 @@ limitSize vs s = case readSize dataUnits s of
|
||||||
checkkey sz key = return $ keySize key `vs` Just sz
|
checkkey sz key = return $ keySize key `vs` Just sz
|
||||||
check _ sz (Just key) = checkkey sz key
|
check _ sz (Just key) = checkkey sz key
|
||||||
check fi sz Nothing = do
|
check fi sz Nothing = do
|
||||||
filesize <- liftIO $ catchMaybeIO $
|
filesize <- liftIO $ catchMaybeIO $ getFileSize (relFile fi)
|
||||||
fromIntegral . fileSize
|
|
||||||
<$> getFileStatus (relFile fi)
|
|
||||||
return $ filesize `vs` Just sz
|
return $ filesize `vs` Just sz
|
||||||
|
|
||||||
addMetaData :: String -> Annex ()
|
addMetaData :: String -> Annex ()
|
||||||
|
|
|
@ -563,9 +563,7 @@ rsyncOrCopyFile rsyncparams src dest p =
|
||||||
(const $ copyFileExternal CopyTimeStamps src dest)
|
(const $ copyFileExternal CopyTimeStamps src dest)
|
||||||
watchfilesize oldsz = do
|
watchfilesize oldsz = do
|
||||||
threadDelay 500000 -- 0.5 seconds
|
threadDelay 500000 -- 0.5 seconds
|
||||||
v <- catchMaybeIO $
|
v <- catchMaybeIO $ toBytesProcessed <$> getFileSize dest
|
||||||
toBytesProcessed . fileSize
|
|
||||||
<$> getFileStatus dest
|
|
||||||
case v of
|
case v of
|
||||||
Just sz
|
Just sz
|
||||||
| sz /= oldsz -> do
|
| sz /= oldsz -> do
|
||||||
|
|
|
@ -244,8 +244,7 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink
|
||||||
| otherwise = go =<< chunkKeys u chunkconfig basek
|
| otherwise = go =<< chunkKeys u chunkconfig basek
|
||||||
where
|
where
|
||||||
go ls = do
|
go ls = do
|
||||||
currsize <- liftIO $ catchMaybeIO $
|
currsize <- liftIO $ catchMaybeIO $ getFileSize dest
|
||||||
toInteger . fileSize <$> getFileStatus dest
|
|
||||||
let ls' = maybe ls (setupResume ls) currsize
|
let ls' = maybe ls (setupResume ls) currsize
|
||||||
if any null ls'
|
if any null ls'
|
||||||
then return True -- dest is already complete
|
then return True -- dest is already complete
|
||||||
|
|
|
@ -32,7 +32,7 @@ httpStorer a = fileStorer $ \k f m -> a k =<< liftIO (httpBodyStorer f m)
|
||||||
-- the meter as it's sent.
|
-- the meter as it's sent.
|
||||||
httpBodyStorer :: FilePath -> MeterUpdate -> IO RequestBody
|
httpBodyStorer :: FilePath -> MeterUpdate -> IO RequestBody
|
||||||
httpBodyStorer src m = do
|
httpBodyStorer src m = do
|
||||||
size <- fromIntegral . fileSize <$> getFileStatus src :: IO Integer
|
size <- getFileSize src
|
||||||
let streamer sink = withMeteredFile src m $ \b -> byteStringPopper b sink
|
let streamer sink = withMeteredFile src m $ \b -> byteStringPopper b sink
|
||||||
return $ RequestBodyStream (fromInteger size) streamer
|
return $ RequestBodyStream (fromInteger size) streamer
|
||||||
|
|
||||||
|
|
|
@ -159,7 +159,7 @@ store :: Remote -> S3Handle -> Storer
|
||||||
store r h = fileStorer $ \k f p -> do
|
store r h = fileStorer $ \k f p -> do
|
||||||
case partSize (hinfo h) of
|
case partSize (hinfo h) of
|
||||||
Just partsz | partsz > 0 -> do
|
Just partsz | partsz > 0 -> do
|
||||||
fsz <- fromIntegral . fileSize <$> liftIO (getFileStatus f)
|
fsz <- liftIO $ getFileSize f
|
||||||
if fsz > partsz
|
if fsz > partsz
|
||||||
then multipartupload fsz partsz k f p
|
then multipartupload fsz partsz k f p
|
||||||
else singlepartupload k f p
|
else singlepartupload k f p
|
||||||
|
|
33
Utility/FileSize.hs
Normal file
33
Utility/FileSize.hs
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
{- File size.
|
||||||
|
-
|
||||||
|
- License: BSD-2-clause
|
||||||
|
-}
|
||||||
|
|
||||||
|
{-# LANGUAGE CPP #-}
|
||||||
|
|
||||||
|
module Utility.FileSize where
|
||||||
|
|
||||||
|
import System.PosixCompat.Files
|
||||||
|
import Control.Exception (bracket)
|
||||||
|
import System.IO
|
||||||
|
|
||||||
|
{- Gets the size of a file.
|
||||||
|
-
|
||||||
|
- This is better than using fileSize, because on Windows that returns a
|
||||||
|
- FileOffset which maxes out at 2 gb.
|
||||||
|
- See https://github.com/jystic/unix-compat/issues/16
|
||||||
|
-}
|
||||||
|
getFileSize :: FilePath -> IO Integer
|
||||||
|
#ifndef mingw32_HOST_OS
|
||||||
|
getFileSize f = fromIntegral . fileSize <$> getFileStatus f
|
||||||
|
#else
|
||||||
|
getFileSize f = bracket (openFile f ReadMode) hClose hFileSize
|
||||||
|
#endif
|
||||||
|
|
||||||
|
{- Gets the size of the file, when its FileStatus is already known. -}
|
||||||
|
getFileSize' :: FilePath -> FileStatus -> IO Integer
|
||||||
|
#ifndef mingw32_HOST_OS
|
||||||
|
getFileSize' _ s = return $ fromIntegral $ fileSize s
|
||||||
|
#else
|
||||||
|
getFileSize' f _ = getFileSize f
|
||||||
|
#endif
|
|
@ -40,6 +40,9 @@ module Utility.InodeCache (
|
||||||
import Common
|
import Common
|
||||||
import System.PosixCompat.Types
|
import System.PosixCompat.Types
|
||||||
import Utility.QuickCheck
|
import Utility.QuickCheck
|
||||||
|
-- While fileSize overflows and wraps at 2gb on Windows,
|
||||||
|
-- it's ok for purposes of comparison.
|
||||||
|
import System.PosixCompat.Files (fileSize)
|
||||||
|
|
||||||
#ifdef mingw32_HOST_OS
|
#ifdef mingw32_HOST_OS
|
||||||
import Data.Word (Word64)
|
import Data.Word (Word64)
|
||||||
|
|
|
@ -102,9 +102,12 @@ exists url uo = case parseURIRelaxed url of
|
||||||
-- so fall back to reading files and using curl.
|
-- so fall back to reading files and using curl.
|
||||||
Nothing
|
Nothing
|
||||||
| uriScheme u == "file:" -> do
|
| uriScheme u == "file:" -> do
|
||||||
s <- catchMaybeIO $ getFileStatus (unEscapeString $ uriPath u)
|
let f = unEscapeString (uriPath u)
|
||||||
|
s <- catchMaybeIO $ getFileStatus f
|
||||||
case s of
|
case s of
|
||||||
Just stat -> return (True, Just $ fromIntegral $ fileSize stat)
|
Just stat -> do
|
||||||
|
sz <- getFileSize' f stat
|
||||||
|
return (True, Just sz)
|
||||||
Nothing -> dne
|
Nothing -> dne
|
||||||
| Build.SysConfig.curl -> do
|
| Build.SysConfig.curl -> do
|
||||||
output <- catchDefaultIO "" $
|
output <- catchDefaultIO "" $
|
||||||
|
|
3
debian/changelog
vendored
3
debian/changelog
vendored
|
@ -13,6 +13,9 @@ git-annex (5.20150114) UNRELEASED; urgency=medium
|
||||||
* Fix wording of message displayed when unable to get a file that
|
* Fix wording of message displayed when unable to get a file that
|
||||||
is available in untrusted repositories.
|
is available in untrusted repositories.
|
||||||
* Windows: Fix running of the pre-commit-annex hook.
|
* Windows: Fix running of the pre-commit-annex hook.
|
||||||
|
* Avoid using fileSize which maxes out at just 2 gb on Windows.
|
||||||
|
Instead, use hFileSize, which doesn't have a bounded size.
|
||||||
|
Fixes support for files > 2 gb on Windows.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Tue, 13 Jan 2015 17:03:39 -0400
|
-- Joey Hess <id@joeyh.name> Tue, 13 Jan 2015 17:03:39 -0400
|
||||||
|
|
||||||
|
|
|
@ -56,3 +56,5 @@ git-annex: sync: 1 failed
|
||||||
|
|
||||||
# End of transcript or log.
|
# End of transcript or log.
|
||||||
"""]]
|
"""]]
|
||||||
|
|
||||||
|
> [[fixed|done]] --[[Joey]]
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 1"""
|
||||||
|
date="2015-01-20T19:41:44Z"
|
||||||
|
content="""
|
||||||
|
In my own test, I made a 10 gb file, and the key git-annex came up with had
|
||||||
|
a size of -2147483648 which is clearly screwed up.. But that's what
|
||||||
|
getFileStatus reports the size as. This was in an 32 bit XP VM.
|
||||||
|
|
||||||
|
Hmm, unix-compat's getFileStatus calls getFileSize, which yields a
|
||||||
|
FileOffset. The maxBound of that on linux is a nice large
|
||||||
|
9223372036854775807, but on Windows, it appears to be 2147483647.
|
||||||
|
|
||||||
|
Compare with using hFileSize, which yields an Integer. So,
|
||||||
|
getFileSize and fileSize are unsafe on Windows due to FileOffset being so
|
||||||
|
small on Windows.
|
||||||
|
|
||||||
|
I have now corrected all places in git-annex that used the unsafe fileSize.
|
||||||
|
It will behave correctly on Windows now.
|
||||||
|
|
||||||
|
However, if you still have the repo with the big file, it's key still has
|
||||||
|
the wrong size. To fix, you can "git annex unannex" the file, and then "git
|
||||||
|
annex add" it back, after upgrading to the current daily build, or the next
|
||||||
|
release of git-annex.
|
||||||
|
"""]]
|
Loading…
Reference in a new issue