annex.thin
Decided it's too scary to make v6 unlocked files have 1 copy by default, but that should be available to those who need it. This is consistent with git-annex not dropping unused content without --force, etc. * Added annex.thin setting, which makes unlocked files in v6 repositories be hard linked to their content, instead of a copy. This saves disk space but means any modification of an unlocked file will lose the local (and possibly only) copy of the old version. * Enable annex.thin by default on upgrade from direct mode to v6, since direct mode made the same tradeoff. * fix: Adjusts unlocked files as configured by annex.thin.
This commit is contained in:
parent
bb6719678e
commit
121f5d5b0c
17 changed files with 259 additions and 146 deletions
131
Annex/Content.hs
131
Annex/Content.hs
|
@ -25,8 +25,8 @@ module Annex.Content (
|
||||||
checkDiskSpace,
|
checkDiskSpace,
|
||||||
moveAnnex,
|
moveAnnex,
|
||||||
populatePointerFile,
|
populatePointerFile,
|
||||||
linkAnnex,
|
linkToAnnex,
|
||||||
linkAnnex',
|
linkFromAnnex,
|
||||||
LinkAnnexResult(..),
|
LinkAnnexResult(..),
|
||||||
unlinkAnnex,
|
unlinkAnnex,
|
||||||
checkedCopyFile,
|
checkedCopyFile,
|
||||||
|
@ -469,13 +469,12 @@ moveAnnex key src = withObjectLoc key storeobject storedirect
|
||||||
storeobject dest = ifM (liftIO $ doesFileExist dest)
|
storeobject dest = ifM (liftIO $ doesFileExist dest)
|
||||||
( alreadyhave
|
( alreadyhave
|
||||||
, modifyContent dest $ do
|
, modifyContent dest $ do
|
||||||
|
freezeContent src
|
||||||
liftIO $ moveFile src dest
|
liftIO $ moveFile src dest
|
||||||
fs <- Database.Keys.getAssociatedFiles key
|
fs <- Database.Keys.getAssociatedFiles key
|
||||||
if null fs
|
unless (null fs) $ do
|
||||||
then freezeContent dest
|
mapM_ (populatePointerFile key dest) fs
|
||||||
else do
|
Database.Keys.storeInodeCaches key (dest:fs)
|
||||||
mapM_ (populatePointerFile key dest) fs
|
|
||||||
Database.Keys.storeInodeCaches key (dest:fs)
|
|
||||||
)
|
)
|
||||||
storeindirect = storeobject =<< calcRepo (gitAnnexLocation key)
|
storeindirect = storeobject =<< calcRepo (gitAnnexLocation key)
|
||||||
|
|
||||||
|
@ -510,48 +509,52 @@ populatePointerFile k obj f = go =<< isPointerFile f
|
||||||
where
|
where
|
||||||
go (Just k') | k == k' = do
|
go (Just k') | k == k' = do
|
||||||
liftIO $ nukeFile f
|
liftIO $ nukeFile f
|
||||||
unlessM (linkAnnex'' k obj f) $
|
ifM (linkOrCopy k obj f)
|
||||||
liftIO $ writeFile f (formatPointer k)
|
( thawContent f
|
||||||
|
, liftIO $ writeFile f (formatPointer k)
|
||||||
|
)
|
||||||
go _ = return ()
|
go _ = return ()
|
||||||
|
|
||||||
{- Hard links a file into .git/annex/objects/, falling back to a copy
|
data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop
|
||||||
- if necessary. Does nothing if the object file already exists.
|
|
||||||
-
|
|
||||||
- Does not lock down the hard linked object, so that the user can modify
|
|
||||||
- the source file. So, adding an object to the annex this way can
|
|
||||||
- prevent losing the content if the source file is deleted, but does not
|
|
||||||
- guard against modifications.
|
|
||||||
-}
|
|
||||||
linkAnnex :: Key -> FilePath -> Maybe InodeCache -> Annex LinkAnnexResult
|
|
||||||
linkAnnex key src srcic = do
|
|
||||||
dest <- calcRepo (gitAnnexLocation key)
|
|
||||||
modifyContent dest $ linkAnnex' key src srcic dest
|
|
||||||
|
|
||||||
{- Hard links (or copies) src to dest, one of which should be the
|
{- Populates the annex object file by hard linking or copying a source
|
||||||
- annex object. Updates inode cache for src and for dest when it's
|
- file to it. -}
|
||||||
- changed. -}
|
linkToAnnex :: Key -> FilePath -> Maybe InodeCache -> Annex LinkAnnexResult
|
||||||
linkAnnex' :: Key -> FilePath -> Maybe InodeCache -> FilePath -> Annex LinkAnnexResult
|
linkToAnnex key src srcic = do
|
||||||
linkAnnex' _ _ Nothing _ = return LinkAnnexFailed
|
dest <- calcRepo (gitAnnexLocation key)
|
||||||
linkAnnex' key src (Just srcic) dest =
|
modifyContent dest $ linkAnnex To key src srcic dest
|
||||||
|
|
||||||
|
{- Makes a destination file be a link or copy from the annex object. -}
|
||||||
|
linkFromAnnex :: Key -> FilePath -> Annex LinkAnnexResult
|
||||||
|
linkFromAnnex key dest = do
|
||||||
|
src <- calcRepo (gitAnnexLocation key)
|
||||||
|
srcic <- withTSDelta (liftIO . genInodeCache src)
|
||||||
|
linkAnnex From key src srcic dest
|
||||||
|
|
||||||
|
data FromTo = From | To
|
||||||
|
|
||||||
|
{- Hard links or copies from or to the annex object location.
|
||||||
|
- Updates inode cache.
|
||||||
|
-
|
||||||
|
- Thaws the file that is not the annex object.
|
||||||
|
- When a hard link was made, this necessarily thaws
|
||||||
|
- the annex object too. So, adding an object to the annex this
|
||||||
|
- way can prevent losing the content if the source file
|
||||||
|
- is deleted, but does not guard against modifications.
|
||||||
|
-}
|
||||||
|
linkAnnex :: FromTo -> Key -> FilePath -> Maybe InodeCache -> FilePath -> Annex LinkAnnexResult
|
||||||
|
linkAnnex _ _ _ Nothing _ = return LinkAnnexFailed
|
||||||
|
linkAnnex fromto key src (Just srcic) dest =
|
||||||
ifM (liftIO $ doesFileExist dest)
|
ifM (liftIO $ doesFileExist dest)
|
||||||
( do
|
( do
|
||||||
Database.Keys.addInodeCaches key [srcic]
|
Database.Keys.addInodeCaches key [srcic]
|
||||||
return LinkAnnexNoop
|
return LinkAnnexNoop
|
||||||
, ifM (linkAnnex'' key src dest)
|
, ifM (linkOrCopy key src dest)
|
||||||
( do
|
( do
|
||||||
thawContent dest
|
thawContent $ case fromto of
|
||||||
-- src could have changed while being copied
|
From -> dest
|
||||||
-- to dest
|
To -> src
|
||||||
mcache <- withTSDelta (liftIO . genInodeCache src)
|
checksrcunchanged
|
||||||
case mcache of
|
|
||||||
Just srcic' | compareStrong srcic srcic' -> do
|
|
||||||
destic <- withTSDelta (liftIO . genInodeCache dest)
|
|
||||||
Database.Keys.addInodeCaches key $
|
|
||||||
catMaybes [destic, Just srcic]
|
|
||||||
return LinkAnnexOk
|
|
||||||
_ -> do
|
|
||||||
liftIO $ nukeFile dest
|
|
||||||
failed
|
|
||||||
, failed
|
, failed
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -559,25 +562,41 @@ linkAnnex' key src (Just srcic) dest =
|
||||||
failed = do
|
failed = do
|
||||||
Database.Keys.addInodeCaches key [srcic]
|
Database.Keys.addInodeCaches key [srcic]
|
||||||
return LinkAnnexFailed
|
return LinkAnnexFailed
|
||||||
|
checksrcunchanged = do
|
||||||
|
mcache <- withTSDelta (liftIO . genInodeCache src)
|
||||||
|
case mcache of
|
||||||
|
Just srcic' | compareStrong srcic srcic' -> do
|
||||||
|
destic <- withTSDelta (liftIO . genInodeCache dest)
|
||||||
|
Database.Keys.addInodeCaches key $
|
||||||
|
catMaybes [destic, Just srcic]
|
||||||
|
return LinkAnnexOk
|
||||||
|
_ -> do
|
||||||
|
liftIO $ nukeFile dest
|
||||||
|
failed
|
||||||
|
|
||||||
data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop
|
{- Hard links or copies src to dest. Only uses a hard link when annex.thin
|
||||||
|
- is enabled and when src is not already hardlinked to elsewhere.
|
||||||
{- Hard links or copies src to dest. Only uses a hard link if src
|
- Checks disk reserve before copying, and will fail if not enough space,
|
||||||
- is not already hardlinked to elsewhere. Checks disk reserve before
|
- or if the dest file already exists. -}
|
||||||
- copying, and will fail if not enough space, or if the dest file
|
linkOrCopy :: Key -> FilePath -> FilePath -> Annex Bool
|
||||||
- already exists. -}
|
linkOrCopy key src dest = catchBoolIO $
|
||||||
linkAnnex'' :: Key -> FilePath -> FilePath -> Annex Bool
|
ifM (annexThin <$> Annex.getGitConfig)
|
||||||
linkAnnex'' key src dest = catchBoolIO $ do
|
( hardlink
|
||||||
s <- liftIO $ getFileStatus src
|
, copy =<< getstat
|
||||||
let copy = checkedCopyFile' key src dest s
|
)
|
||||||
|
where
|
||||||
|
hardlink = do
|
||||||
#ifndef mingw32_HOST_OS
|
#ifndef mingw32_HOST_OS
|
||||||
if linkCount s > 1
|
s <- getstat
|
||||||
then copy
|
if linkCount s > 1
|
||||||
else liftIO (createLink src dest >> return True)
|
then copy s
|
||||||
`catchIO` const copy
|
else liftIO (createLink src dest >> return True)
|
||||||
|
`catchIO` const (copy s)
|
||||||
#else
|
#else
|
||||||
copy
|
copy s
|
||||||
#endif
|
#endif
|
||||||
|
copy = checkedCopyFile' key src dest
|
||||||
|
getstat = liftIO $ getFileStatus src
|
||||||
|
|
||||||
{- Removes the annex object file for a key. Lowlevel. -}
|
{- Removes the annex object file for a key. Lowlevel. -}
|
||||||
unlinkAnnex :: Key -> Annex ()
|
unlinkAnnex :: Key -> Annex ()
|
||||||
|
|
|
@ -142,11 +142,11 @@ ingest (Just (LockedDown lockingfile source)) = withTSDelta $ \delta -> do
|
||||||
|
|
||||||
gounlocked key (Just cache) s = do
|
gounlocked key (Just cache) s = do
|
||||||
-- Remove temp directory hard link first because
|
-- Remove temp directory hard link first because
|
||||||
-- linkAnnex falls back to copying if a file
|
-- linkToAnnex falls back to copying if a file
|
||||||
-- already has a hard link.
|
-- already has a hard link.
|
||||||
cleanCruft source
|
cleanCruft source
|
||||||
cleanOldKeys (keyFilename source) key
|
cleanOldKeys (keyFilename source) key
|
||||||
r <- linkAnnex key (keyFilename source) (Just cache)
|
r <- linkToAnnex key (keyFilename source) (Just cache)
|
||||||
case r of
|
case r of
|
||||||
LinkAnnexFailed -> failure "failed to link to annex"
|
LinkAnnexFailed -> failure "failed to link to annex"
|
||||||
_ -> do
|
_ -> do
|
||||||
|
@ -219,12 +219,12 @@ cleanOldKeys file newkey = do
|
||||||
<$> Database.Keys.getAssociatedFiles key
|
<$> Database.Keys.getAssociatedFiles key
|
||||||
fs' <- filterM (`sameInodeCache` caches) fs
|
fs' <- filterM (`sameInodeCache` caches) fs
|
||||||
case fs' of
|
case fs' of
|
||||||
-- If linkAnnex fails, the associated
|
-- If linkToAnnex fails, the associated
|
||||||
-- file with the content is still present,
|
-- file with the content is still present,
|
||||||
-- so no need for any recovery.
|
-- so no need for any recovery.
|
||||||
(f:_) -> do
|
(f:_) -> do
|
||||||
ic <- withTSDelta (liftIO . genInodeCache f)
|
ic <- withTSDelta (liftIO . genInodeCache f)
|
||||||
void $ linkAnnex key f ic
|
void $ linkToAnnex key f ic
|
||||||
_ -> lostcontent
|
_ -> lostcontent
|
||||||
where
|
where
|
||||||
lostcontent = logStatus key InfoMissing
|
lostcontent = logStatus key InfoMissing
|
||||||
|
|
|
@ -18,7 +18,7 @@ import Utility.Tmp
|
||||||
- which it can write to, and once done the temp file is moved into place
|
- which it can write to, and once done the temp file is moved into place
|
||||||
- and anything else in the temp directory is deleted.
|
- and anything else in the temp directory is deleted.
|
||||||
-
|
-
|
||||||
- The action can throw an IO exception, in which case the temp directory
|
- The action can throw an exception, in which case the temp directory
|
||||||
- will be deleted, and the existing file will be preserved.
|
- will be deleted, and the existing file will be preserved.
|
||||||
-
|
-
|
||||||
- Throws an IO exception when it was unable to replace the file.
|
- Throws an IO exception when it was unable to replace the file.
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex command
|
{- git-annex command
|
||||||
-
|
-
|
||||||
- Copyright 2010 Joey Hess <id@joeyh.name>
|
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -11,7 +11,13 @@ module Command.Fix where
|
||||||
|
|
||||||
import Common.Annex
|
import Common.Annex
|
||||||
import Command
|
import Command
|
||||||
|
import Config
|
||||||
|
import qualified Annex
|
||||||
|
import Annex.Version
|
||||||
|
import Annex.ReplaceFile
|
||||||
|
import Annex.Content
|
||||||
import qualified Annex.Queue
|
import qualified Annex.Queue
|
||||||
|
import qualified Database.Keys
|
||||||
#ifdef WITH_CLIBS
|
#ifdef WITH_CLIBS
|
||||||
#ifndef __ANDROID__
|
#ifndef __ANDROID__
|
||||||
import Utility.Touch
|
import Utility.Touch
|
||||||
|
@ -21,22 +27,66 @@ import Utility.Touch
|
||||||
cmd :: Command
|
cmd :: Command
|
||||||
cmd = notDirect $ noCommit $ withGlobalOptions annexedMatchingOptions $
|
cmd = notDirect $ noCommit $ withGlobalOptions annexedMatchingOptions $
|
||||||
command "fix" SectionMaintenance
|
command "fix" SectionMaintenance
|
||||||
"fix up symlinks to point to annexed content"
|
"fix up links to annexed content"
|
||||||
paramPaths (withParams seek)
|
paramPaths (withParams seek)
|
||||||
|
|
||||||
seek :: CmdParams -> CommandSeek
|
seek :: CmdParams -> CommandSeek
|
||||||
seek = withFilesInGit $ whenAnnexed start
|
seek ps = unlessM crippledFileSystem $ do
|
||||||
|
fixwhat <- ifM versionSupportsUnlockedPointers
|
||||||
|
( return FixAll
|
||||||
|
, return FixSymlinks
|
||||||
|
)
|
||||||
|
flip withFilesInGit ps $ whenAnnexed $ start fixwhat
|
||||||
|
|
||||||
{- Fixes the symlink to an annexed file. -}
|
data FixWhat = FixSymlinks | FixAll
|
||||||
start :: FilePath -> Key -> CommandStart
|
|
||||||
start file key = do
|
start :: FixWhat -> FilePath -> Key -> CommandStart
|
||||||
link <- calcRepo $ gitAnnexLink file key
|
start fixwhat file key = do
|
||||||
stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do
|
currlink <- liftIO $ catchMaybeIO $ readSymbolicLink file
|
||||||
|
wantlink <- calcRepo $ gitAnnexLink file key
|
||||||
|
case currlink of
|
||||||
|
Just l
|
||||||
|
| l /= wantlink -> fixby $ fixSymlink file wantlink
|
||||||
|
| otherwise -> stop
|
||||||
|
Nothing -> case fixwhat of
|
||||||
|
FixAll -> fixthin
|
||||||
|
FixSymlinks -> stop
|
||||||
|
where
|
||||||
|
fixby a = do
|
||||||
showStart "fix" file
|
showStart "fix" file
|
||||||
next $ perform file link
|
next a
|
||||||
|
fixthin = do
|
||||||
|
obj <- calcRepo $ gitAnnexLocation key
|
||||||
|
stopUnless (isUnmodified key file <&&> isUnmodified key obj) $ do
|
||||||
|
thin <- annexThin <$> Annex.getGitConfig
|
||||||
|
fs <- liftIO $ catchMaybeIO $ getFileStatus file
|
||||||
|
os <- liftIO $ catchMaybeIO $ getFileStatus obj
|
||||||
|
case (linkCount <$> fs, linkCount <$> os, thin) of
|
||||||
|
(Just 1, Just 1, True) ->
|
||||||
|
fixby $ makeHardLink file key
|
||||||
|
(Just n, Just n', False) | n > 1 && n == n' ->
|
||||||
|
fixby $ breakHardLink file key obj
|
||||||
|
_ -> stop
|
||||||
|
|
||||||
perform :: FilePath -> FilePath -> CommandPerform
|
breakHardLink :: FilePath -> Key -> FilePath -> CommandPerform
|
||||||
perform file link = do
|
breakHardLink file key obj = do
|
||||||
|
replaceFile file $ \tmp ->
|
||||||
|
unlessM (checkedCopyFile key obj tmp) $
|
||||||
|
error "unable to break hard link"
|
||||||
|
Database.Keys.storeInodeCaches key [file]
|
||||||
|
next $ return True
|
||||||
|
|
||||||
|
makeHardLink :: FilePath -> Key -> CommandPerform
|
||||||
|
makeHardLink file key = do
|
||||||
|
replaceFile file $ \tmp -> do
|
||||||
|
r <- linkFromAnnex key tmp
|
||||||
|
case r of
|
||||||
|
LinkAnnexFailed -> error "unable to make hard link"
|
||||||
|
_ -> noop
|
||||||
|
next $ return True
|
||||||
|
|
||||||
|
fixSymlink :: FilePath -> FilePath -> CommandPerform
|
||||||
|
fixSymlink file link = do
|
||||||
liftIO $ do
|
liftIO $ do
|
||||||
#ifdef WITH_CLIBS
|
#ifdef WITH_CLIBS
|
||||||
#ifndef __ANDROID__
|
#ifndef __ANDROID__
|
||||||
|
@ -53,9 +103,9 @@ perform file link = do
|
||||||
maybe noop (\t -> touch file t False) mtime
|
maybe noop (\t -> touch file t False) mtime
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
next $ cleanup file
|
next $ cleanupSymlink file
|
||||||
|
|
||||||
cleanup :: FilePath -> CommandCleanup
|
cleanupSymlink :: FilePath -> CommandCleanup
|
||||||
cleanup file = do
|
cleanupSymlink file = do
|
||||||
Annex.Queue.addCommand "add" [Param "--force", Param "--"] [file]
|
Annex.Queue.addCommand "add" [Param "--force", Param "--"] [file]
|
||||||
return True
|
return True
|
||||||
|
|
|
@ -79,7 +79,7 @@ performNew file key filemodified = do
|
||||||
unlessM (sameInodeCache obj (maybeToList mfc)) $ do
|
unlessM (sameInodeCache obj (maybeToList mfc)) $ do
|
||||||
modifyContent obj $ replaceFile obj $ \tmp -> do
|
modifyContent obj $ replaceFile obj $ \tmp -> do
|
||||||
unlessM (checkedCopyFile key obj tmp) $
|
unlessM (checkedCopyFile key obj tmp) $
|
||||||
error "unable to lock file; need more free disk space"
|
error "unable to lock file"
|
||||||
Database.Keys.storeInodeCaches key [obj]
|
Database.Keys.storeInodeCaches key [obj]
|
||||||
|
|
||||||
-- Try to repopulate obj from an unmodified associated file.
|
-- Try to repopulate obj from an unmodified associated file.
|
||||||
|
|
|
@ -52,7 +52,7 @@ seek ps = lockPreCommitHook $ ifM isDirect
|
||||||
, do
|
, do
|
||||||
-- fix symlinks to files being committed
|
-- fix symlinks to files being committed
|
||||||
flip withFilesToBeCommitted ps $ \f ->
|
flip withFilesToBeCommitted ps $ \f ->
|
||||||
maybe stop (Command.Fix.start f)
|
maybe stop (Command.Fix.start Command.Fix.FixSymlinks f)
|
||||||
=<< isAnnexLink f
|
=<< isAnnexLink f
|
||||||
-- inject unlocked files into the annex
|
-- inject unlocked files into the annex
|
||||||
-- (not needed when repo version uses
|
-- (not needed when repo version uses
|
||||||
|
|
|
@ -100,7 +100,7 @@ ingestLocal file = do
|
||||||
-- Hard link (or copy) file content to annex object
|
-- Hard link (or copy) file content to annex object
|
||||||
-- to prevent it from being lost when git checks out
|
-- to prevent it from being lost when git checks out
|
||||||
-- a branch not containing this file.
|
-- a branch not containing this file.
|
||||||
r <- linkAnnex k file ic
|
r <- linkToAnnex k file ic
|
||||||
case r of
|
case r of
|
||||||
LinkAnnexFailed -> error "Problem adding file to the annex"
|
LinkAnnexFailed -> error "Problem adding file to the annex"
|
||||||
LinkAnnexOk -> logStatus k InfoPresent
|
LinkAnnexOk -> logStatus k InfoPresent
|
||||||
|
|
|
@ -14,8 +14,6 @@ import Annex.CatFile
|
||||||
import Annex.Version
|
import Annex.Version
|
||||||
import Annex.Link
|
import Annex.Link
|
||||||
import Annex.ReplaceFile
|
import Annex.ReplaceFile
|
||||||
import Annex.InodeSentinal
|
|
||||||
import Utility.InodeCache
|
|
||||||
import Utility.CopyFile
|
import Utility.CopyFile
|
||||||
|
|
||||||
cmd :: Command
|
cmd :: Command
|
||||||
|
@ -52,13 +50,11 @@ start file key = ifM (isJust <$> isAnnexLink file)
|
||||||
|
|
||||||
performNew :: FilePath -> Key -> CommandPerform
|
performNew :: FilePath -> Key -> CommandPerform
|
||||||
performNew dest key = do
|
performNew dest key = do
|
||||||
src <- calcRepo (gitAnnexLocation key)
|
|
||||||
srcic <- withTSDelta (liftIO . genInodeCache src)
|
|
||||||
replaceFile dest $ \tmp -> do
|
replaceFile dest $ \tmp -> do
|
||||||
r <- linkAnnex' key src srcic tmp
|
r <- linkFromAnnex key tmp
|
||||||
case r of
|
case r of
|
||||||
LinkAnnexOk -> return ()
|
LinkAnnexOk -> return ()
|
||||||
_ -> error "linkAnnex failed"
|
_ -> error "unlock failed"
|
||||||
next $ cleanupNew dest key
|
next $ cleanupNew dest key
|
||||||
|
|
||||||
cleanupNew :: FilePath -> Key -> CommandCleanup
|
cleanupNew :: FilePath -> Key -> CommandCleanup
|
||||||
|
|
|
@ -60,6 +60,7 @@ data GitConfig = GitConfig
|
||||||
, annexListen :: Maybe String
|
, annexListen :: Maybe String
|
||||||
, annexStartupScan :: Bool
|
, annexStartupScan :: Bool
|
||||||
, annexHardLink :: Bool
|
, annexHardLink :: Bool
|
||||||
|
, annexThin :: Bool
|
||||||
, annexDifferences :: Differences
|
, annexDifferences :: Differences
|
||||||
, annexUsedRefSpec :: Maybe RefSpec
|
, annexUsedRefSpec :: Maybe RefSpec
|
||||||
, annexVerify :: Bool
|
, annexVerify :: Bool
|
||||||
|
@ -104,6 +105,7 @@ extractGitConfig r = GitConfig
|
||||||
, annexListen = getmaybe (annex "listen")
|
, annexListen = getmaybe (annex "listen")
|
||||||
, annexStartupScan = getbool (annex "startupscan") True
|
, annexStartupScan = getbool (annex "startupscan") True
|
||||||
, annexHardLink = getbool (annex "hardlink") False
|
, annexHardLink = getbool (annex "hardlink") False
|
||||||
|
, annexThin = getbool (annex "thin") False
|
||||||
, annexDifferences = getDifferences r
|
, annexDifferences = getDifferences r
|
||||||
, annexUsedRefSpec = either (const Nothing) Just . parseRefSpec
|
, annexUsedRefSpec = either (const Nothing) Just . parseRefSpec
|
||||||
=<< getmaybe (annex "used-refspec")
|
=<< getmaybe (annex "used-refspec")
|
||||||
|
|
|
@ -8,18 +8,20 @@
|
||||||
module Upgrade.V5 where
|
module Upgrade.V5 where
|
||||||
|
|
||||||
import Common.Annex
|
import Common.Annex
|
||||||
|
import qualified Annex
|
||||||
import Config
|
import Config
|
||||||
import Annex.InodeSentinal
|
import Annex.InodeSentinal
|
||||||
import Annex.Link
|
import Annex.Link
|
||||||
import Annex.Direct
|
import Annex.Direct
|
||||||
import Annex.Content
|
import Annex.Content
|
||||||
import Annex.WorkTree
|
import Annex.CatFile
|
||||||
import qualified Database.Keys
|
import qualified Database.Keys
|
||||||
import qualified Annex.Content.Direct as Direct
|
import qualified Annex.Content.Direct as Direct
|
||||||
import qualified Git
|
import qualified Git
|
||||||
import qualified Git.LsFiles
|
import qualified Git.LsFiles
|
||||||
import qualified Git.Branch
|
import qualified Git.Branch
|
||||||
import Git.FileMode
|
import Git.FileMode
|
||||||
|
import Git.Config
|
||||||
import Utility.InodeCache
|
import Utility.InodeCache
|
||||||
|
|
||||||
upgrade :: Bool -> Annex Bool
|
upgrade :: Bool -> Annex Bool
|
||||||
|
@ -27,6 +29,11 @@ upgrade automatic = do
|
||||||
unless automatic $
|
unless automatic $
|
||||||
showAction "v5 to v6"
|
showAction "v5 to v6"
|
||||||
whenM isDirect $ do
|
whenM isDirect $ do
|
||||||
|
{- Direct mode makes the same tradeoff of using less disk
|
||||||
|
- space, with less preservation of old versions of files
|
||||||
|
- as does annex.thin. -}
|
||||||
|
setConfig (annexConfig "thin") (boolConfig True)
|
||||||
|
Annex.changeGitConfig $ \c -> c { annexThin = True }
|
||||||
{- Since upgrade from direct mode changes how files
|
{- Since upgrade from direct mode changes how files
|
||||||
- are represented in git, commit any changes in the
|
- are represented in git, commit any changes in the
|
||||||
- work tree first. -}
|
- work tree first. -}
|
||||||
|
@ -70,7 +77,9 @@ upgradeDirectWorkTree = do
|
||||||
void $ liftIO clean
|
void $ liftIO clean
|
||||||
where
|
where
|
||||||
go (f, Just _sha, Just mode) | isSymLink mode = do
|
go (f, Just _sha, Just mode) | isSymLink mode = do
|
||||||
mk <- lookupFile f
|
-- Cannot use lookupFile here, as we're in between direct
|
||||||
|
-- mode and v6.
|
||||||
|
mk <- catKeyFile f
|
||||||
case mk of
|
case mk of
|
||||||
Nothing -> noop
|
Nothing -> noop
|
||||||
Just k -> do
|
Just k -> do
|
||||||
|
@ -84,13 +93,13 @@ upgradeDirectWorkTree = do
|
||||||
go _ = noop
|
go _ = noop
|
||||||
|
|
||||||
fromdirect f k = do
|
fromdirect f k = do
|
||||||
-- If linkAnnex fails for some reason, the work tree file
|
-- If linkToAnnex fails for some reason, the work tree file
|
||||||
-- still has the content; the annex object file is just
|
-- still has the content; the annex object file is just
|
||||||
-- not populated with it. Since the work tree file
|
-- not populated with it. Since the work tree file
|
||||||
-- is recorded as an associated file, things will still
|
-- is recorded as an associated file, things will still
|
||||||
-- work that way, it's just not ideal.
|
-- work that way, it's just not ideal.
|
||||||
ic <- withTSDelta (liftIO . genInodeCache f)
|
ic <- withTSDelta (liftIO . genInodeCache f)
|
||||||
void $ linkAnnex k f ic
|
void $ linkToAnnex k f ic
|
||||||
writepointer f k = liftIO $ do
|
writepointer f k = liftIO $ do
|
||||||
nukeFile f
|
nukeFile f
|
||||||
writeFile f (formatPointer k)
|
writeFile f (formatPointer k)
|
||||||
|
|
7
debian/changelog
vendored
7
debian/changelog
vendored
|
@ -14,6 +14,13 @@ git-annex (6.20151219) UNRELEASED; urgency=medium
|
||||||
pointer file, and this change can be committed to the git repository.
|
pointer file, and this change can be committed to the git repository.
|
||||||
* assistant: In v6 mode, adds files in unlocked mode, so they can
|
* assistant: In v6 mode, adds files in unlocked mode, so they can
|
||||||
continue to be modified.
|
continue to be modified.
|
||||||
|
* Added annex.thin setting, which makes unlocked files in v6 repositories
|
||||||
|
be hard linked to their content, instead of a copy. This saves disk
|
||||||
|
space but means any modification of an unlocked file will lose the local
|
||||||
|
(and possibly only) copy of the old version.
|
||||||
|
* Enable annex.thin by default on upgrade from direct mode to v6, since
|
||||||
|
direct mode made the same tradeoff.
|
||||||
|
* fix: Adjusts unlocked files as configured by annex.thin.
|
||||||
* persistent-sqlite is now a hard build dependency, since v6 repository
|
* persistent-sqlite is now a hard build dependency, since v6 repository
|
||||||
mode needs it.
|
mode needs it.
|
||||||
* status: On crippled filesystems, was displaying M for all annexed files
|
* status: On crippled filesystems, was displaying M for all annexed files
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# NAME
|
# NAME
|
||||||
|
|
||||||
git-annex fix - fix up symlinks to point to annexed content
|
git-annex fix - fix up links to annexed content
|
||||||
|
|
||||||
# SYNOPSIS
|
# SYNOPSIS
|
||||||
|
|
||||||
|
@ -11,8 +11,11 @@ git annex fix `[path ...]`
|
||||||
Fixes up symlinks that have become broken to again point to annexed
|
Fixes up symlinks that have become broken to again point to annexed
|
||||||
content.
|
content.
|
||||||
|
|
||||||
This is useful to run if you have been moving the symlinks around,
|
This is useful to run manually when you have been moving the symlinks
|
||||||
but is done automatically when committing a change with git too.
|
around, but is done automatically when committing a change with git too.
|
||||||
|
|
||||||
|
Also, adjusts unlocked files to be copies or hard links as
|
||||||
|
configured by annex.thin.
|
||||||
|
|
||||||
# OPTIONS
|
# OPTIONS
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ git annex unlock `[path ...]`
|
||||||
|
|
||||||
Normally, the content of annexed files is protected from being changed.
|
Normally, the content of annexed files is protected from being changed.
|
||||||
Unlocking an annexed file allows it to be modified. This replaces the
|
Unlocking an annexed file allows it to be modified. This replaces the
|
||||||
symlink for each specified file with a copy of the file's content.
|
symlink for each specified file with the file's content.
|
||||||
You can then modify it and `git annex add` (or `git commit`) to save your
|
You can then modify it and `git annex add` (or `git commit`) to save your
|
||||||
changes.
|
changes.
|
||||||
|
|
||||||
|
@ -22,6 +22,14 @@ can use `git add` to add a fie to the annex in unlocked form. This allows
|
||||||
workflows where a file starts out unlocked, is modified as necessary, and
|
workflows where a file starts out unlocked, is modified as necessary, and
|
||||||
is locked once it reaches its final version.
|
is locked once it reaches its final version.
|
||||||
|
|
||||||
|
Normally, unlocking a file requires a copy to be made of its content,
|
||||||
|
so that its original content is preserved, while the copy can be modified.
|
||||||
|
To use less space, annex.thin can be set to true; this makes a hard link
|
||||||
|
to the content be made instead of a copy. (Only when supported by the file
|
||||||
|
system, and only in repository version 6.) While this can save considerable
|
||||||
|
disk space, any modification made to a file will cause the old version of the
|
||||||
|
file to be lost from the local repository. So, enable annex.thin with care.
|
||||||
|
|
||||||
# OPTIONS
|
# OPTIONS
|
||||||
|
|
||||||
* file matching options
|
* file matching options
|
||||||
|
|
|
@ -904,6 +904,14 @@ Here are all the supported configuration settings.
|
||||||
will automatically set annex.hardlink and mark the repository as
|
will automatically set annex.hardlink and mark the repository as
|
||||||
untrusted.
|
untrusted.
|
||||||
|
|
||||||
|
* `annex.thin`
|
||||||
|
|
||||||
|
Set this to `true` to make unlocked files be a hard link to their content
|
||||||
|
in the annex, rather than a second copy. (Only when supported by the file
|
||||||
|
system, and only in repository version 6.) This can save considerable
|
||||||
|
disk space, but modification made to a file will lose the local (and
|
||||||
|
possibly only) copy of the old version. So, enable with care.
|
||||||
|
|
||||||
* `annex.delayadd`
|
* `annex.delayadd`
|
||||||
|
|
||||||
Makes the watch and assistant commands delay for the specified number of
|
Makes the watch and assistant commands delay for the specified number of
|
||||||
|
|
|
@ -49,9 +49,11 @@ Or, you can init a new repository in v6 mode.
|
||||||
# git init
|
# git init
|
||||||
# git annex init --version=6
|
# git annex init --version=6
|
||||||
|
|
||||||
|
## using it
|
||||||
|
|
||||||
Using a v6 repository is easy! Just use regular git commands to add
|
Using a v6 repository is easy! Just use regular git commands to add
|
||||||
and commit files. Under the hood, git will use git-annex to store the file
|
and commit files. git will use git-annex to store the file contents,
|
||||||
contents.
|
and the files will be left unlocked.
|
||||||
|
|
||||||
[[!template id=note text="""
|
[[!template id=note text="""
|
||||||
Want `git add` to add some file contents to the annex, but store the contents of
|
Want `git add` to add some file contents to the annex, but store the contents of
|
||||||
|
@ -70,8 +72,8 @@ smaller files in git itself? Configure annex.largefiles to match the former.
|
||||||
# git annex find
|
# git annex find
|
||||||
my_cool_big_file
|
my_cool_big_file
|
||||||
|
|
||||||
You can make whatever changes you like to committed files, and commit your
|
You can make whatever modifications you want to unlocked files, and commit
|
||||||
changes.
|
your changes.
|
||||||
|
|
||||||
# echo more stuff >> my_cool_big_file
|
# echo more stuff >> my_cool_big_file
|
||||||
# git mv my_cool_big_file my_cool_bigger_file
|
# git mv my_cool_big_file my_cool_bigger_file
|
||||||
|
@ -81,47 +83,62 @@ changes.
|
||||||
delete mode 100644 my_cool_big_file
|
delete mode 100644 my_cool_big_file
|
||||||
create mode 100644 my_cool_bigger_file
|
create mode 100644 my_cool_bigger_file
|
||||||
|
|
||||||
Under the hood, this uses git's [[todo/smudge]] filter interface,
|
Under the hood, this uses git's [[todo/smudge]] filter interface, and
|
||||||
and git-annex converts between the content of the big file and a pointer file,
|
git-annex converts between the content of the big file and a pointer file,
|
||||||
which is what gets committed to git.
|
which is what gets committed to git.
|
||||||
|
|
||||||
A v6 repository can have both locked and unlocked files. You can switch
|
A v6 repository can contain both locked and unlocked files. You can switch
|
||||||
a file back and forth using the `git annex lock` and `git annex unlock`
|
a file back and forth using the `git annex lock` and `git annex unlock`
|
||||||
commands. This changes what's stored in git between a git-annex symlink
|
commands. This changes what's stored in git between a git-annex symlink
|
||||||
(locked) and a git-annex pointer file (unlocked).
|
(locked) and a git-annex pointer file (unlocked). To add a file to
|
||||||
|
the repository in locked mode, use `git annex add`; to add a file in
|
||||||
|
unlocked mode, use `git add`.
|
||||||
|
|
||||||
## danger will robinson
|
## using less disk space
|
||||||
|
|
||||||
|
Unlocked files are handy, but they have one significant disadvantage
|
||||||
|
compared with locked files: They use more disk space.
|
||||||
|
While only one copy of a locked file has to be stored, normally,
|
||||||
|
two copies of an unlocked file are stored on disk. One copy is in
|
||||||
|
the git work tree, where you can use and modify it,
|
||||||
|
and the other is stashed away in `.git/annex/objects` (see [[internals]]).
|
||||||
|
|
||||||
|
The reason for that second copy is to preserve the old version of the file,
|
||||||
|
if you modify the unlocked file in the work tree. Being able to access
|
||||||
|
old versions of files is an important part of git after all.
|
||||||
|
|
||||||
|
That's a good safe default. But there are ways to use git-annex that
|
||||||
|
make the second copy not be worth keeping:
|
||||||
|
|
||||||
[[!template id=note text="""
|
[[!template id=note text="""
|
||||||
Double the disk space is used on systems like Windows that don't support
|
When a [[direct_mode]] repository is upgraded, annex.thin is automatically
|
||||||
hard links.
|
set, because direct mode made the same single-copy tradeoff.
|
||||||
"""]]
|
"""]]
|
||||||
|
|
||||||
In contrast with locked files, which are quite safe, using unlocked files is a
|
* When you're using git-annex to sync the current version of files acrosss
|
||||||
little bit dangerous. git-annex tries to avoid storing a duplicate copy of an
|
devices, and don't care much about previous versions.
|
||||||
unlocked file in your local repository, in order to not use double the disk
|
* When you have set up a backup repository, and use git-annex to copy
|
||||||
space. But this means that an unlocked file can be the only copy of that
|
your files to the backup.
|
||||||
version of the file's content. Modify it, and oops, you lost the old version!
|
|
||||||
|
|
||||||
In fact, that happened in the examples above, and you probably didn't notice
|
In situations like these, you may want to avoid the overhead of the second
|
||||||
until now.
|
local copy of unlocked files. There's config setting for that.
|
||||||
|
|
||||||
# git checkout HEAD^
|
git config annex.thin true
|
||||||
HEAD is now at 92f2725 added my_cool_big_file to the annex
|
|
||||||
# cat my_cool_big_file
|
|
||||||
/annex/objects/SHA256E-s30--e7aaf46f227886c10c98f8f76cae681afd0521438c78f958fc27114674b391a4
|
|
||||||
|
|
||||||
Woah, what's all that?! Well, it's the pointer file that gets checked into
|
After changing annex.thin, you'll want to fix up the work tree to
|
||||||
git. You'd see the same thing if you had used `git annex drop` to drop
|
match the new setting:
|
||||||
the content of the file from your repository.
|
|
||||||
|
|
||||||
In the example above, the content wasn't explicitly dropped, but it was
|
git annex fix
|
||||||
modified while it was unlocked... and so the old version of the content
|
|
||||||
was lost.
|
|
||||||
|
|
||||||
If this is worrying -- and it should be -- you'll want to keep files locked
|
Note that setting annex.thin only has any effect on systems that support
|
||||||
most of the time, or set up a remote and have git-annex copy the content of
|
hard links. Ie, not Windows, and not FAT filesystems.
|
||||||
files to the remote as a backup.
|
|
||||||
|
|
||||||
By the way, don't worry about deleting an unlocked file. That *won't* lose
|
## tradeoffs
|
||||||
its content.
|
|
||||||
|
Setting annex.thin can save a lot of disk space, but it's a tradeoff
|
||||||
|
between disk usage and safety.
|
||||||
|
|
||||||
|
Keeping files locked is safer and also avoids using unnecessary
|
||||||
|
disk space, but trades off easy modification of files.
|
||||||
|
|
||||||
|
Pick the tradeoff that's right for you.
|
||||||
|
|
|
@ -13,10 +13,11 @@ git-annex should use smudge/clean filters.
|
||||||
# because it doesn't know it has that name
|
# because it doesn't know it has that name
|
||||||
# git commit clears up this mess
|
# git commit clears up this mess
|
||||||
* Interaction with shared clones. Should avoid hard linking from/to a
|
* Interaction with shared clones. Should avoid hard linking from/to a
|
||||||
object in a shared clone if either repository has the object unlocked.
|
object in a shared clone if either repository has the object unlocked
|
||||||
(And should avoid unlocking an object if it's hard linked to a shared clone,
|
with a hard link in place.
|
||||||
but that's already accomplished because it avoids unlocking an object if
|
(And should avoid unlocking an object with a hard link if it's hard
|
||||||
it's hard linked at all)
|
linked to a shared clone, but that's already accomplished because it
|
||||||
|
avoids unlocking an object if it's hard linked at all)
|
||||||
* Make automatic merge conflict resolution work for pointer files.
|
* Make automatic merge conflict resolution work for pointer files.
|
||||||
- Should probably automatically handle merge conflicts between annex
|
- Should probably automatically handle merge conflicts between annex
|
||||||
symlinks and pointer files too. Maybe by always resulting in a pointer
|
symlinks and pointer files too. Maybe by always resulting in a pointer
|
||||||
|
@ -46,7 +47,7 @@ git-annex should use smudge/clean filters.
|
||||||
|
|
||||||
* Eventually (but not yet), make v6 the default for new repositories.
|
* Eventually (but not yet), make v6 the default for new repositories.
|
||||||
Note that the assistant forces repos into direct mode; that will need to
|
Note that the assistant forces repos into direct mode; that will need to
|
||||||
be changed then.
|
be changed then, and it should enable annex.thin.
|
||||||
* Later still, remove support for direct mode, and enable automatic
|
* Later still, remove support for direct mode, and enable automatic
|
||||||
v5 to v6 upgrades.
|
v5 to v6 upgrades.
|
||||||
|
|
||||||
|
@ -158,7 +159,7 @@ cannot directly write to the file or git gets unhappy.
|
||||||
.. Are very important, otherwise a repo can't scale past the size of the
|
.. Are very important, otherwise a repo can't scale past the size of the
|
||||||
smallest client's disk!
|
smallest client's disk!
|
||||||
|
|
||||||
It would be nice if the smudge filter could hard link or symlink a work
|
It would be nice if the smudge filter could hard link a work
|
||||||
tree file to the annex object.
|
tree file to the annex object.
|
||||||
|
|
||||||
But currently, the smudge filter can't modify the work tree file on its own
|
But currently, the smudge filter can't modify the work tree file on its own
|
||||||
|
@ -184,7 +185,9 @@ smudged file in the work tree when renaming it. It instead deletes the old
|
||||||
file and asks the smudge filter to smudge the new filename.
|
file and asks the smudge filter to smudge the new filename.
|
||||||
|
|
||||||
So, copies need to be maintained in .git/annex/objects, though it's ok
|
So, copies need to be maintained in .git/annex/objects, though it's ok
|
||||||
to use hard links to the work tree files.
|
to use hard links to the work tree files. (Although somewhat unsafe
|
||||||
|
since modification of the file will lose the old version. annex.thin
|
||||||
|
setting can enable this.)
|
||||||
|
|
||||||
Even if hard links are used, smudge needs to output the content of an
|
Even if hard links are used, smudge needs to output the content of an
|
||||||
annexed file, which will result in duplication when merging in renames of
|
annexed file, which will result in duplication when merging in renames of
|
||||||
|
@ -241,21 +244,16 @@ git-annex clean:
|
||||||
|
|
||||||
Generate annex key from filename and content from stdin.
|
Generate annex key from filename and content from stdin.
|
||||||
|
|
||||||
Hard link .git/annex/objects to the file, if it doesn't already exist.
|
Hard link (annex.thin) or copy .git/annex/objects to the file,
|
||||||
(On platforms not supporting hardlinks, copy the file to
|
if it doesn't already exist.
|
||||||
.git/annex/objects.)
|
|
||||||
|
|
||||||
This is done to prevent losing the only copy of a file when eg
|
This is done to prevent losing the only copy of a file when eg
|
||||||
doing a git checkout of a different branch, or merging a commit that
|
doing a git checkout of a different branch, or merging a commit that
|
||||||
renames or deletes a file. But, no attempt is made to
|
renames or deletes a file. But, with annex.thin no attempt is made to
|
||||||
protect the object from being modified. If a user wants to
|
protect the object from being modified. If a user wants to
|
||||||
protect object contents from modification, they should use
|
protect object contents from modification, they should use
|
||||||
`git annex add`, not `git add`, or they can `git annex lock` after adding,.
|
`git annex add`, not `git add`, or they can `git annex lock` after adding,
|
||||||
|
or not enable annex.thin.
|
||||||
There could be a configuration knob to cause a copy to be made to
|
|
||||||
.git/annex/objects -- useful for those crippled filesystems. It might
|
|
||||||
also drop that copy once the object gets uploaded to another repo ...
|
|
||||||
But that gets complicated quickly.
|
|
||||||
|
|
||||||
Update file map.
|
Update file map.
|
||||||
|
|
||||||
|
|
|
@ -72,10 +72,6 @@ The behavior of some commands changes in an upgraded repository:
|
||||||
* `git annex unlock` and `git annex lock` change how the pointer to
|
* `git annex unlock` and `git annex lock` change how the pointer to
|
||||||
the annexed content is stored in git.
|
the annexed content is stored in git.
|
||||||
|
|
||||||
If a repository is only used in indirect mode, you can use git-annex
|
|
||||||
v5 and v6 in different clones of the same indirect mode repository without
|
|
||||||
problems.
|
|
||||||
|
|
||||||
On upgrade, all files in a direct mode repository will be converted to
|
On upgrade, all files in a direct mode repository will be converted to
|
||||||
unlocked files. The upgrade will stage changes to all annexed files in
|
unlocked files. The upgrade will stage changes to all annexed files in
|
||||||
the git repository, which you can then commit.
|
the git repository, which you can then commit.
|
||||||
|
|
Loading…
Reference in a new issue