add getFileSize, which can get the real size of a large file on Windows

Avoid using fileSize which maxes out at just 2 gb on Windows.
Instead, use hFileSize, which doesn't have a bounded size.
Fixes support for files > 2 gb on Windows.

Note that the InodeCache code only needs to compare a file size,
so it doesn't matter it the file size wraps. So it has been
left as-is. This was necessary both to avoid invalidating existing inode
caches, and because the code passed FileStatus around and would have become
more expensive if it called getFileSize.

This commit was sponsored by Christian Dietrich.
This commit is contained in:
Joey Hess 2015-01-20 16:58:48 -04:00
parent 87c4f0e320
commit 4f657aa14e
22 changed files with 94 additions and 36 deletions

View file

@ -234,8 +234,8 @@ prepGetViaTmpChecked key unabletoget getkey = do
tmp <- fromRepo $ gitAnnexTmpObjectLocation key tmp <- fromRepo $ gitAnnexTmpObjectLocation key
e <- liftIO $ doesFileExist tmp e <- liftIO $ doesFileExist tmp
alreadythere <- if e alreadythere <- liftIO $ if e
then fromIntegral . fileSize <$> liftIO (getFileStatus tmp) then getFileSize tmp
else return 0 else return 0
ifM (checkDiskSpace Nothing key alreadythere) ifM (checkDiskSpace Nothing key alreadythere)
( do ( do

View file

@ -132,8 +132,7 @@ runTransfer' ignorelock t file shouldretry a = do
liftIO $ readMVar metervar liftIO $ readMVar metervar
| otherwise = do | otherwise = do
f <- fromRepo $ gitAnnexTmpObjectLocation (transferKey t) f <- fromRepo $ gitAnnexTmpObjectLocation (transferKey t)
liftIO $ catchDefaultIO 0 $ liftIO $ catchDefaultIO 0 $ getFileSize f
fromIntegral . fileSize <$> getFileStatus f
type RetryDecider = TransferInfo -> TransferInfo -> Bool type RetryDecider = TransferInfo -> TransferInfo -> Bool

View file

@ -140,8 +140,7 @@ repairStaleGitLocks r = do
repairStaleLocks :: [FilePath] -> Assistant () repairStaleLocks :: [FilePath] -> Assistant ()
repairStaleLocks lockfiles = go =<< getsizes repairStaleLocks lockfiles = go =<< getsizes
where where
getsize lf = catchMaybeIO $ getsize lf = catchMaybeIO $ (\s -> (lf, s)) <$> getFileSize lf
(\s -> (lf, fileSize s)) <$> getFileStatus lf
getsizes = liftIO $ catMaybes <$> mapM getsize lockfiles getsizes = liftIO $ catMaybes <$> mapM getsize lockfiles
go [] = return () go [] = return ()
go l = ifM (liftIO $ null <$> Lsof.query ("--" : map fst l)) go l = ifM (liftIO $ null <$> Lsof.query ("--" : map fst l))

View file

@ -225,7 +225,7 @@ checkLogSize :: Int -> Assistant ()
checkLogSize n = do checkLogSize n = do
f <- liftAnnex $ fromRepo gitAnnexLogFile f <- liftAnnex $ fromRepo gitAnnexLogFile
logs <- liftIO $ listLogs f logs <- liftIO $ listLogs f
totalsize <- liftIO $ sum <$> mapM filesize logs totalsize <- liftIO $ sum <$> mapM getFileSize logs
when (totalsize > 2 * oneMegabyte) $ do when (totalsize > 2 * oneMegabyte) $ do
notice ["Rotated logs due to size:", show totalsize] notice ["Rotated logs due to size:", show totalsize]
liftIO $ openLog f >>= handleToFd >>= redirLog liftIO $ openLog f >>= handleToFd >>= redirLog
@ -237,9 +237,7 @@ checkLogSize n = do
checkLogSize (n + 1) checkLogSize (n + 1)
_ -> noop _ -> noop
where where
filesize f = fromIntegral . fileSize <$> liftIO (getFileStatus f) oneMegabyte :: Integer
oneMegabyte :: Int
oneMegabyte = 1000000 oneMegabyte = 1000000
#endif #endif

View file

@ -36,8 +36,7 @@ transferPollerThread = namedThread "TransferPoller" $ do
- temp file being used for the transfer. -} - temp file being used for the transfer. -}
| transferDirection t == Download = do | transferDirection t == Download = do
let f = gitAnnexTmpObjectLocation (transferKey t) g let f = gitAnnexTmpObjectLocation (transferKey t) g
sz <- liftIO $ catchMaybeIO $ sz <- liftIO $ catchMaybeIO $ getFileSize f
fromIntegral . fileSize <$> getFileStatus f
newsize t info sz newsize t info sz
{- Uploads don't need to be polled for when the TransferWatcher {- Uploads don't need to be polled for when the TransferWatcher
- thread can track file modifications. -} - thread can track file modifications. -}

View file

@ -68,8 +68,7 @@ hashNameE hash = hashName hash ++ "E"
keyValue :: Hash -> KeySource -> Annex (Maybe Key) keyValue :: Hash -> KeySource -> Annex (Maybe Key)
keyValue hash source = do keyValue hash source = do
let file = contentLocation source let file = contentLocation source
stat <- liftIO $ getFileStatus file filesize <- liftIO $ getFileSize file
let filesize = fromIntegral $ fileSize stat
s <- hashFile hash file filesize s <- hashFile hash file filesize
return $ Just $ stubKey return $ Just $ stubKey
{ keyName = s { keyName = s
@ -103,7 +102,7 @@ checkKeyChecksum hash key file = do
mstat <- liftIO $ catchMaybeIO $ getFileStatus file mstat <- liftIO $ catchMaybeIO $ getFileStatus file
case (mstat, fast) of case (mstat, fast) of
(Just stat, False) -> do (Just stat, False) -> do
let filesize = fromIntegral $ fileSize stat filesize <- liftIO $ getFileSize' file stat
showSideAction "checksum" showSideAction "checksum"
check <$> hashFile hash file filesize check <$> hashFile hash file filesize
_ -> return True _ -> return True

View file

@ -32,11 +32,13 @@ backend = Backend
-} -}
keyValue :: KeySource -> Annex (Maybe Key) keyValue :: KeySource -> Annex (Maybe Key)
keyValue source = do keyValue source = do
stat <- liftIO $ getFileStatus $ contentLocation source let f = contentLocation source
stat <- liftIO $ getFileStatus f
sz <- liftIO $ getFileSize' f stat
relf <- getTopFilePath <$> inRepo (toTopFilePath $ keyFilename source) relf <- getTopFilePath <$> inRepo (toTopFilePath $ keyFilename source)
return $ Just $ stubKey return $ Just $ stubKey
{ keyName = genKeyName relf { keyName = genKeyName relf
, keyBackendName = name backend , keyBackendName = name backend
, keySize = Just $ fromIntegral $ fileSize stat , keySize = Just sz
, keyMtime = Just $ modificationTime stat , keyMtime = Just $ modificationTime stat
} }

View file

@ -303,8 +303,7 @@ checkKeySizeOr :: (Key -> Annex String) -> Key -> FilePath -> Annex Bool
checkKeySizeOr bad key file = case Types.Key.keySize key of checkKeySizeOr bad key file = case Types.Key.keySize key of
Nothing -> return True Nothing -> return True
Just size -> do Just size -> do
size' <- fromIntegral . fileSize size' <- liftIO $ getFileSize file
<$> liftIO (getFileStatus file)
comparesizes size size' comparesizes size size'
where where
comparesizes a b = do comparesizes a b = do

View file

@ -490,8 +490,7 @@ staleSize label dirspec = go =<< lift (dirKeys dirspec)
keysizes keys = do keysizes keys = do
dir <- lift $ fromRepo dirspec dir <- lift $ fromRepo dirspec
liftIO $ forM keys $ \k -> catchDefaultIO 0 $ liftIO $ forM keys $ \k -> catchDefaultIO 0 $
fromIntegral . fileSize getFileSize (dir </> keyFile k)
<$> getFileStatus (dir </> keyFile k)
aside :: String -> String aside :: String -> String
aside s = " (" ++ s ++ ")" aside s = " (" ++ s ++ ")"

View file

@ -62,8 +62,7 @@ start key = fieldTransfer Download key $ \_p ->
oksize <- case Types.Key.keySize key of oksize <- case Types.Key.keySize key of
Nothing -> return True Nothing -> return True
Just size -> do Just size -> do
size' <- fromIntegral . fileSize size' <- liftIO $ getFileSize tmp
<$> liftIO (getFileStatus tmp)
return $ size == size' return $ size == size'
if oksize if oksize
then case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of then case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of

View file

@ -30,7 +30,8 @@ import Utility.Monad as X
import Utility.Data as X import Utility.Data as X
import Utility.Applicative as X import Utility.Applicative as X
import Utility.FileSystemEncoding as X import Utility.FileSystemEncoding as X
import Utility.PosixFiles as X import Utility.PosixFiles as X hiding (fileSize)
import Utility.FileSize as X
import Utility.Network as X import Utility.Network as X
import Utility.PartialPrelude as X import Utility.PartialPrelude as X

View file

@ -239,9 +239,7 @@ limitSize vs s = case readSize dataUnits s of
checkkey sz key = return $ keySize key `vs` Just sz checkkey sz key = return $ keySize key `vs` Just sz
check _ sz (Just key) = checkkey sz key check _ sz (Just key) = checkkey sz key
check fi sz Nothing = do check fi sz Nothing = do
filesize <- liftIO $ catchMaybeIO $ filesize <- liftIO $ catchMaybeIO $ getFileSize (relFile fi)
fromIntegral . fileSize
<$> getFileStatus (relFile fi)
return $ filesize `vs` Just sz return $ filesize `vs` Just sz
addMetaData :: String -> Annex () addMetaData :: String -> Annex ()

View file

@ -563,9 +563,7 @@ rsyncOrCopyFile rsyncparams src dest p =
(const $ copyFileExternal CopyTimeStamps src dest) (const $ copyFileExternal CopyTimeStamps src dest)
watchfilesize oldsz = do watchfilesize oldsz = do
threadDelay 500000 -- 0.5 seconds threadDelay 500000 -- 0.5 seconds
v <- catchMaybeIO $ v <- catchMaybeIO $ toBytesProcessed <$> getFileSize dest
toBytesProcessed . fileSize
<$> getFileStatus dest
case v of case v of
Just sz Just sz
| sz /= oldsz -> do | sz /= oldsz -> do

View file

@ -244,8 +244,7 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink
| otherwise = go =<< chunkKeys u chunkconfig basek | otherwise = go =<< chunkKeys u chunkconfig basek
where where
go ls = do go ls = do
currsize <- liftIO $ catchMaybeIO $ currsize <- liftIO $ catchMaybeIO $ getFileSize dest
toInteger . fileSize <$> getFileStatus dest
let ls' = maybe ls (setupResume ls) currsize let ls' = maybe ls (setupResume ls) currsize
if any null ls' if any null ls'
then return True -- dest is already complete then return True -- dest is already complete

View file

@ -32,7 +32,7 @@ httpStorer a = fileStorer $ \k f m -> a k =<< liftIO (httpBodyStorer f m)
-- the meter as it's sent. -- the meter as it's sent.
httpBodyStorer :: FilePath -> MeterUpdate -> IO RequestBody httpBodyStorer :: FilePath -> MeterUpdate -> IO RequestBody
httpBodyStorer src m = do httpBodyStorer src m = do
size <- fromIntegral . fileSize <$> getFileStatus src :: IO Integer size <- getFileSize src
let streamer sink = withMeteredFile src m $ \b -> byteStringPopper b sink let streamer sink = withMeteredFile src m $ \b -> byteStringPopper b sink
return $ RequestBodyStream (fromInteger size) streamer return $ RequestBodyStream (fromInteger size) streamer

View file

@ -159,7 +159,7 @@ store :: Remote -> S3Handle -> Storer
store r h = fileStorer $ \k f p -> do store r h = fileStorer $ \k f p -> do
case partSize (hinfo h) of case partSize (hinfo h) of
Just partsz | partsz > 0 -> do Just partsz | partsz > 0 -> do
fsz <- fromIntegral . fileSize <$> liftIO (getFileStatus f) fsz <- liftIO $ getFileSize f
if fsz > partsz if fsz > partsz
then multipartupload fsz partsz k f p then multipartupload fsz partsz k f p
else singlepartupload k f p else singlepartupload k f p

33
Utility/FileSize.hs Normal file
View file

@ -0,0 +1,33 @@
{- File size.
-
- License: BSD-2-clause
-}
{-# LANGUAGE CPP #-}
module Utility.FileSize where
import System.PosixCompat.Files
import Control.Exception (bracket)
import System.IO
{- Gets the size of a file.
-
- This is better than using fileSize, because on Windows that returns a
- FileOffset which maxes out at 2 gb.
- See https://github.com/jystic/unix-compat/issues/16
-}
getFileSize :: FilePath -> IO Integer
#ifndef mingw32_HOST_OS
getFileSize f = fromIntegral . fileSize <$> getFileStatus f
#else
getFileSize f = bracket (openFile f ReadMode) hClose hFileSize
#endif
{- Gets the size of the file, when its FileStatus is already known. -}
getFileSize' :: FilePath -> FileStatus -> IO Integer
#ifndef mingw32_HOST_OS
getFileSize' _ s = return $ fromIntegral $ fileSize s
#else
getFileSize' f _ = getFileSize f
#endif

View file

@ -40,6 +40,9 @@ module Utility.InodeCache (
import Common import Common
import System.PosixCompat.Types import System.PosixCompat.Types
import Utility.QuickCheck import Utility.QuickCheck
-- While fileSize overflows and wraps at 2gb on Windows,
-- it's ok for purposes of comparison.
import System.PosixCompat.Files (fileSize)
#ifdef mingw32_HOST_OS #ifdef mingw32_HOST_OS
import Data.Word (Word64) import Data.Word (Word64)

View file

@ -102,9 +102,12 @@ exists url uo = case parseURIRelaxed url of
-- so fall back to reading files and using curl. -- so fall back to reading files and using curl.
Nothing Nothing
| uriScheme u == "file:" -> do | uriScheme u == "file:" -> do
s <- catchMaybeIO $ getFileStatus (unEscapeString $ uriPath u) let f = unEscapeString (uriPath u)
s <- catchMaybeIO $ getFileStatus f
case s of case s of
Just stat -> return (True, Just $ fromIntegral $ fileSize stat) Just stat -> do
sz <- getFileSize' f stat
return (True, Just sz)
Nothing -> dne Nothing -> dne
| Build.SysConfig.curl -> do | Build.SysConfig.curl -> do
output <- catchDefaultIO "" $ output <- catchDefaultIO "" $

3
debian/changelog vendored
View file

@ -13,6 +13,9 @@ git-annex (5.20150114) UNRELEASED; urgency=medium
* Fix wording of message displayed when unable to get a file that * Fix wording of message displayed when unable to get a file that
is available in untrusted repositories. is available in untrusted repositories.
* Windows: Fix running of the pre-commit-annex hook. * Windows: Fix running of the pre-commit-annex hook.
* Avoid using fileSize which maxes out at just 2 gb on Windows.
Instead, use hFileSize, which doesn't have a bounded size.
Fixes support for files > 2 gb on Windows.
-- Joey Hess <id@joeyh.name> Tue, 13 Jan 2015 17:03:39 -0400 -- Joey Hess <id@joeyh.name> Tue, 13 Jan 2015 17:03:39 -0400

View file

@ -56,3 +56,5 @@ git-annex: sync: 1 failed
# End of transcript or log. # End of transcript or log.
"""]] """]]
> [[fixed|done]] --[[Joey]]

View file

@ -0,0 +1,25 @@
[[!comment format=mdwn
username="joey"
subject="""comment 1"""
date="2015-01-20T19:41:44Z"
content="""
In my own test, I made a 10 gb file, and the key git-annex came up with had
a size of -2147483648 which is clearly screwed up.. But that's what
getFileStatus reports the size as. This was in an 32 bit XP VM.
Hmm, unix-compat's getFileStatus calls getFileSize, which yields a
FileOffset. The maxBound of that on linux is a nice large
9223372036854775807, but on Windows, it appears to be 2147483647.
Compare with using hFileSize, which yields an Integer. So,
getFileSize and fileSize are unsafe on Windows due to FileOffset being so
small on Windows.
I have now corrected all places in git-annex that used the unsafe fileSize.
It will behave correctly on Windows now.
However, if you still have the repo with the big file, it's key still has
the wrong size. To fix, you can "git annex unannex" the file, and then "git
annex add" it back, after upgrading to the current daily build, or the next
release of git-annex.
"""]]