annex.thin
Decided it's too scary to make v6 unlocked files have 1 copy by default, but that should be available to those who need it. This is consistent with git-annex not dropping unused content without --force, etc. * Added annex.thin setting, which makes unlocked files in v6 repositories be hard linked to their content, instead of a copy. This saves disk space but means any modification of an unlocked file will lose the local (and possibly only) copy of the old version. * Enable annex.thin by default on upgrade from direct mode to v6, since direct mode made the same tradeoff. * fix: Adjusts unlocked files as configured by annex.thin.
This commit is contained in:
parent
bb6719678e
commit
121f5d5b0c
17 changed files with 259 additions and 146 deletions
115
Annex/Content.hs
115
Annex/Content.hs
|
@ -25,8 +25,8 @@ module Annex.Content (
|
|||
checkDiskSpace,
|
||||
moveAnnex,
|
||||
populatePointerFile,
|
||||
linkAnnex,
|
||||
linkAnnex',
|
||||
linkToAnnex,
|
||||
linkFromAnnex,
|
||||
LinkAnnexResult(..),
|
||||
unlinkAnnex,
|
||||
checkedCopyFile,
|
||||
|
@ -469,11 +469,10 @@ moveAnnex key src = withObjectLoc key storeobject storedirect
|
|||
storeobject dest = ifM (liftIO $ doesFileExist dest)
|
||||
( alreadyhave
|
||||
, modifyContent dest $ do
|
||||
freezeContent src
|
||||
liftIO $ moveFile src dest
|
||||
fs <- Database.Keys.getAssociatedFiles key
|
||||
if null fs
|
||||
then freezeContent dest
|
||||
else do
|
||||
unless (null fs) $ do
|
||||
mapM_ (populatePointerFile key dest) fs
|
||||
Database.Keys.storeInodeCaches key (dest:fs)
|
||||
)
|
||||
|
@ -510,38 +509,60 @@ populatePointerFile k obj f = go =<< isPointerFile f
|
|||
where
|
||||
go (Just k') | k == k' = do
|
||||
liftIO $ nukeFile f
|
||||
unlessM (linkAnnex'' k obj f) $
|
||||
liftIO $ writeFile f (formatPointer k)
|
||||
ifM (linkOrCopy k obj f)
|
||||
( thawContent f
|
||||
, liftIO $ writeFile f (formatPointer k)
|
||||
)
|
||||
go _ = return ()
|
||||
|
||||
{- Hard links a file into .git/annex/objects/, falling back to a copy
|
||||
- if necessary. Does nothing if the object file already exists.
|
||||
-
|
||||
- Does not lock down the hard linked object, so that the user can modify
|
||||
- the source file. So, adding an object to the annex this way can
|
||||
- prevent losing the content if the source file is deleted, but does not
|
||||
- guard against modifications.
|
||||
-}
|
||||
linkAnnex :: Key -> FilePath -> Maybe InodeCache -> Annex LinkAnnexResult
|
||||
linkAnnex key src srcic = do
|
||||
dest <- calcRepo (gitAnnexLocation key)
|
||||
modifyContent dest $ linkAnnex' key src srcic dest
|
||||
data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop
|
||||
|
||||
{- Hard links (or copies) src to dest, one of which should be the
|
||||
- annex object. Updates inode cache for src and for dest when it's
|
||||
- changed. -}
|
||||
linkAnnex' :: Key -> FilePath -> Maybe InodeCache -> FilePath -> Annex LinkAnnexResult
|
||||
linkAnnex' _ _ Nothing _ = return LinkAnnexFailed
|
||||
linkAnnex' key src (Just srcic) dest =
|
||||
{- Populates the annex object file by hard linking or copying a source
|
||||
- file to it. -}
|
||||
linkToAnnex :: Key -> FilePath -> Maybe InodeCache -> Annex LinkAnnexResult
|
||||
linkToAnnex key src srcic = do
|
||||
dest <- calcRepo (gitAnnexLocation key)
|
||||
modifyContent dest $ linkAnnex To key src srcic dest
|
||||
|
||||
{- Makes a destination file be a link or copy from the annex object. -}
|
||||
linkFromAnnex :: Key -> FilePath -> Annex LinkAnnexResult
|
||||
linkFromAnnex key dest = do
|
||||
src <- calcRepo (gitAnnexLocation key)
|
||||
srcic <- withTSDelta (liftIO . genInodeCache src)
|
||||
linkAnnex From key src srcic dest
|
||||
|
||||
data FromTo = From | To
|
||||
|
||||
{- Hard links or copies from or to the annex object location.
|
||||
- Updates inode cache.
|
||||
-
|
||||
- Thaws the file that is not the annex object.
|
||||
- When a hard link was made, this necessarily thaws
|
||||
- the annex object too. So, adding an object to the annex this
|
||||
- way can prevent losing the content if the source file
|
||||
- is deleted, but does not guard against modifications.
|
||||
-}
|
||||
linkAnnex :: FromTo -> Key -> FilePath -> Maybe InodeCache -> FilePath -> Annex LinkAnnexResult
|
||||
linkAnnex _ _ _ Nothing _ = return LinkAnnexFailed
|
||||
linkAnnex fromto key src (Just srcic) dest =
|
||||
ifM (liftIO $ doesFileExist dest)
|
||||
( do
|
||||
Database.Keys.addInodeCaches key [srcic]
|
||||
return LinkAnnexNoop
|
||||
, ifM (linkAnnex'' key src dest)
|
||||
, ifM (linkOrCopy key src dest)
|
||||
( do
|
||||
thawContent dest
|
||||
-- src could have changed while being copied
|
||||
-- to dest
|
||||
thawContent $ case fromto of
|
||||
From -> dest
|
||||
To -> src
|
||||
checksrcunchanged
|
||||
, failed
|
||||
)
|
||||
)
|
||||
where
|
||||
failed = do
|
||||
Database.Keys.addInodeCaches key [srcic]
|
||||
return LinkAnnexFailed
|
||||
checksrcunchanged = do
|
||||
mcache <- withTSDelta (liftIO . genInodeCache src)
|
||||
case mcache of
|
||||
Just srcic' | compareStrong srcic srcic' -> do
|
||||
|
@ -552,32 +573,30 @@ linkAnnex' key src (Just srcic) dest =
|
|||
_ -> do
|
||||
liftIO $ nukeFile dest
|
||||
failed
|
||||
, failed
|
||||
)
|
||||
|
||||
{- Hard links or copies src to dest. Only uses a hard link when annex.thin
|
||||
- is enabled and when src is not already hardlinked to elsewhere.
|
||||
- Checks disk reserve before copying, and will fail if not enough space,
|
||||
- or if the dest file already exists. -}
|
||||
linkOrCopy :: Key -> FilePath -> FilePath -> Annex Bool
|
||||
linkOrCopy key src dest = catchBoolIO $
|
||||
ifM (annexThin <$> Annex.getGitConfig)
|
||||
( hardlink
|
||||
, copy =<< getstat
|
||||
)
|
||||
where
|
||||
failed = do
|
||||
Database.Keys.addInodeCaches key [srcic]
|
||||
return LinkAnnexFailed
|
||||
|
||||
data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop
|
||||
|
||||
{- Hard links or copies src to dest. Only uses a hard link if src
|
||||
- is not already hardlinked to elsewhere. Checks disk reserve before
|
||||
- copying, and will fail if not enough space, or if the dest file
|
||||
- already exists. -}
|
||||
linkAnnex'' :: Key -> FilePath -> FilePath -> Annex Bool
|
||||
linkAnnex'' key src dest = catchBoolIO $ do
|
||||
s <- liftIO $ getFileStatus src
|
||||
let copy = checkedCopyFile' key src dest s
|
||||
hardlink = do
|
||||
#ifndef mingw32_HOST_OS
|
||||
s <- getstat
|
||||
if linkCount s > 1
|
||||
then copy
|
||||
then copy s
|
||||
else liftIO (createLink src dest >> return True)
|
||||
`catchIO` const copy
|
||||
`catchIO` const (copy s)
|
||||
#else
|
||||
copy
|
||||
copy s
|
||||
#endif
|
||||
copy = checkedCopyFile' key src dest
|
||||
getstat = liftIO $ getFileStatus src
|
||||
|
||||
{- Removes the annex object file for a key. Lowlevel. -}
|
||||
unlinkAnnex :: Key -> Annex ()
|
||||
|
|
|
@ -142,11 +142,11 @@ ingest (Just (LockedDown lockingfile source)) = withTSDelta $ \delta -> do
|
|||
|
||||
gounlocked key (Just cache) s = do
|
||||
-- Remove temp directory hard link first because
|
||||
-- linkAnnex falls back to copying if a file
|
||||
-- linkToAnnex falls back to copying if a file
|
||||
-- already has a hard link.
|
||||
cleanCruft source
|
||||
cleanOldKeys (keyFilename source) key
|
||||
r <- linkAnnex key (keyFilename source) (Just cache)
|
||||
r <- linkToAnnex key (keyFilename source) (Just cache)
|
||||
case r of
|
||||
LinkAnnexFailed -> failure "failed to link to annex"
|
||||
_ -> do
|
||||
|
@ -219,12 +219,12 @@ cleanOldKeys file newkey = do
|
|||
<$> Database.Keys.getAssociatedFiles key
|
||||
fs' <- filterM (`sameInodeCache` caches) fs
|
||||
case fs' of
|
||||
-- If linkAnnex fails, the associated
|
||||
-- If linkToAnnex fails, the associated
|
||||
-- file with the content is still present,
|
||||
-- so no need for any recovery.
|
||||
(f:_) -> do
|
||||
ic <- withTSDelta (liftIO . genInodeCache f)
|
||||
void $ linkAnnex key f ic
|
||||
void $ linkToAnnex key f ic
|
||||
_ -> lostcontent
|
||||
where
|
||||
lostcontent = logStatus key InfoMissing
|
||||
|
|
|
@ -18,7 +18,7 @@ import Utility.Tmp
|
|||
- which it can write to, and once done the temp file is moved into place
|
||||
- and anything else in the temp directory is deleted.
|
||||
-
|
||||
- The action can throw an IO exception, in which case the temp directory
|
||||
- The action can throw an exception, in which case the temp directory
|
||||
- will be deleted, and the existing file will be preserved.
|
||||
-
|
||||
- Throws an IO exception when it was unable to replace the file.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- git-annex command
|
||||
-
|
||||
- Copyright 2010 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
@ -11,7 +11,13 @@ module Command.Fix where
|
|||
|
||||
import Common.Annex
|
||||
import Command
|
||||
import Config
|
||||
import qualified Annex
|
||||
import Annex.Version
|
||||
import Annex.ReplaceFile
|
||||
import Annex.Content
|
||||
import qualified Annex.Queue
|
||||
import qualified Database.Keys
|
||||
#ifdef WITH_CLIBS
|
||||
#ifndef __ANDROID__
|
||||
import Utility.Touch
|
||||
|
@ -21,22 +27,66 @@ import Utility.Touch
|
|||
cmd :: Command
|
||||
cmd = notDirect $ noCommit $ withGlobalOptions annexedMatchingOptions $
|
||||
command "fix" SectionMaintenance
|
||||
"fix up symlinks to point to annexed content"
|
||||
"fix up links to annexed content"
|
||||
paramPaths (withParams seek)
|
||||
|
||||
seek :: CmdParams -> CommandSeek
|
||||
seek = withFilesInGit $ whenAnnexed start
|
||||
seek ps = unlessM crippledFileSystem $ do
|
||||
fixwhat <- ifM versionSupportsUnlockedPointers
|
||||
( return FixAll
|
||||
, return FixSymlinks
|
||||
)
|
||||
flip withFilesInGit ps $ whenAnnexed $ start fixwhat
|
||||
|
||||
{- Fixes the symlink to an annexed file. -}
|
||||
start :: FilePath -> Key -> CommandStart
|
||||
start file key = do
|
||||
link <- calcRepo $ gitAnnexLink file key
|
||||
stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do
|
||||
data FixWhat = FixSymlinks | FixAll
|
||||
|
||||
start :: FixWhat -> FilePath -> Key -> CommandStart
|
||||
start fixwhat file key = do
|
||||
currlink <- liftIO $ catchMaybeIO $ readSymbolicLink file
|
||||
wantlink <- calcRepo $ gitAnnexLink file key
|
||||
case currlink of
|
||||
Just l
|
||||
| l /= wantlink -> fixby $ fixSymlink file wantlink
|
||||
| otherwise -> stop
|
||||
Nothing -> case fixwhat of
|
||||
FixAll -> fixthin
|
||||
FixSymlinks -> stop
|
||||
where
|
||||
fixby a = do
|
||||
showStart "fix" file
|
||||
next $ perform file link
|
||||
next a
|
||||
fixthin = do
|
||||
obj <- calcRepo $ gitAnnexLocation key
|
||||
stopUnless (isUnmodified key file <&&> isUnmodified key obj) $ do
|
||||
thin <- annexThin <$> Annex.getGitConfig
|
||||
fs <- liftIO $ catchMaybeIO $ getFileStatus file
|
||||
os <- liftIO $ catchMaybeIO $ getFileStatus obj
|
||||
case (linkCount <$> fs, linkCount <$> os, thin) of
|
||||
(Just 1, Just 1, True) ->
|
||||
fixby $ makeHardLink file key
|
||||
(Just n, Just n', False) | n > 1 && n == n' ->
|
||||
fixby $ breakHardLink file key obj
|
||||
_ -> stop
|
||||
|
||||
perform :: FilePath -> FilePath -> CommandPerform
|
||||
perform file link = do
|
||||
breakHardLink :: FilePath -> Key -> FilePath -> CommandPerform
|
||||
breakHardLink file key obj = do
|
||||
replaceFile file $ \tmp ->
|
||||
unlessM (checkedCopyFile key obj tmp) $
|
||||
error "unable to break hard link"
|
||||
Database.Keys.storeInodeCaches key [file]
|
||||
next $ return True
|
||||
|
||||
makeHardLink :: FilePath -> Key -> CommandPerform
|
||||
makeHardLink file key = do
|
||||
replaceFile file $ \tmp -> do
|
||||
r <- linkFromAnnex key tmp
|
||||
case r of
|
||||
LinkAnnexFailed -> error "unable to make hard link"
|
||||
_ -> noop
|
||||
next $ return True
|
||||
|
||||
fixSymlink :: FilePath -> FilePath -> CommandPerform
|
||||
fixSymlink file link = do
|
||||
liftIO $ do
|
||||
#ifdef WITH_CLIBS
|
||||
#ifndef __ANDROID__
|
||||
|
@ -53,9 +103,9 @@ perform file link = do
|
|||
maybe noop (\t -> touch file t False) mtime
|
||||
#endif
|
||||
#endif
|
||||
next $ cleanup file
|
||||
next $ cleanupSymlink file
|
||||
|
||||
cleanup :: FilePath -> CommandCleanup
|
||||
cleanup file = do
|
||||
cleanupSymlink :: FilePath -> CommandCleanup
|
||||
cleanupSymlink file = do
|
||||
Annex.Queue.addCommand "add" [Param "--force", Param "--"] [file]
|
||||
return True
|
||||
|
|
|
@ -79,7 +79,7 @@ performNew file key filemodified = do
|
|||
unlessM (sameInodeCache obj (maybeToList mfc)) $ do
|
||||
modifyContent obj $ replaceFile obj $ \tmp -> do
|
||||
unlessM (checkedCopyFile key obj tmp) $
|
||||
error "unable to lock file; need more free disk space"
|
||||
error "unable to lock file"
|
||||
Database.Keys.storeInodeCaches key [obj]
|
||||
|
||||
-- Try to repopulate obj from an unmodified associated file.
|
||||
|
|
|
@ -52,7 +52,7 @@ seek ps = lockPreCommitHook $ ifM isDirect
|
|||
, do
|
||||
-- fix symlinks to files being committed
|
||||
flip withFilesToBeCommitted ps $ \f ->
|
||||
maybe stop (Command.Fix.start f)
|
||||
maybe stop (Command.Fix.start Command.Fix.FixSymlinks f)
|
||||
=<< isAnnexLink f
|
||||
-- inject unlocked files into the annex
|
||||
-- (not needed when repo version uses
|
||||
|
|
|
@ -100,7 +100,7 @@ ingestLocal file = do
|
|||
-- Hard link (or copy) file content to annex object
|
||||
-- to prevent it from being lost when git checks out
|
||||
-- a branch not containing this file.
|
||||
r <- linkAnnex k file ic
|
||||
r <- linkToAnnex k file ic
|
||||
case r of
|
||||
LinkAnnexFailed -> error "Problem adding file to the annex"
|
||||
LinkAnnexOk -> logStatus k InfoPresent
|
||||
|
|
|
@ -14,8 +14,6 @@ import Annex.CatFile
|
|||
import Annex.Version
|
||||
import Annex.Link
|
||||
import Annex.ReplaceFile
|
||||
import Annex.InodeSentinal
|
||||
import Utility.InodeCache
|
||||
import Utility.CopyFile
|
||||
|
||||
cmd :: Command
|
||||
|
@ -52,13 +50,11 @@ start file key = ifM (isJust <$> isAnnexLink file)
|
|||
|
||||
performNew :: FilePath -> Key -> CommandPerform
|
||||
performNew dest key = do
|
||||
src <- calcRepo (gitAnnexLocation key)
|
||||
srcic <- withTSDelta (liftIO . genInodeCache src)
|
||||
replaceFile dest $ \tmp -> do
|
||||
r <- linkAnnex' key src srcic tmp
|
||||
r <- linkFromAnnex key tmp
|
||||
case r of
|
||||
LinkAnnexOk -> return ()
|
||||
_ -> error "linkAnnex failed"
|
||||
_ -> error "unlock failed"
|
||||
next $ cleanupNew dest key
|
||||
|
||||
cleanupNew :: FilePath -> Key -> CommandCleanup
|
||||
|
|
|
@ -60,6 +60,7 @@ data GitConfig = GitConfig
|
|||
, annexListen :: Maybe String
|
||||
, annexStartupScan :: Bool
|
||||
, annexHardLink :: Bool
|
||||
, annexThin :: Bool
|
||||
, annexDifferences :: Differences
|
||||
, annexUsedRefSpec :: Maybe RefSpec
|
||||
, annexVerify :: Bool
|
||||
|
@ -104,6 +105,7 @@ extractGitConfig r = GitConfig
|
|||
, annexListen = getmaybe (annex "listen")
|
||||
, annexStartupScan = getbool (annex "startupscan") True
|
||||
, annexHardLink = getbool (annex "hardlink") False
|
||||
, annexThin = getbool (annex "thin") False
|
||||
, annexDifferences = getDifferences r
|
||||
, annexUsedRefSpec = either (const Nothing) Just . parseRefSpec
|
||||
=<< getmaybe (annex "used-refspec")
|
||||
|
|
|
@ -8,18 +8,20 @@
|
|||
module Upgrade.V5 where
|
||||
|
||||
import Common.Annex
|
||||
import qualified Annex
|
||||
import Config
|
||||
import Annex.InodeSentinal
|
||||
import Annex.Link
|
||||
import Annex.Direct
|
||||
import Annex.Content
|
||||
import Annex.WorkTree
|
||||
import Annex.CatFile
|
||||
import qualified Database.Keys
|
||||
import qualified Annex.Content.Direct as Direct
|
||||
import qualified Git
|
||||
import qualified Git.LsFiles
|
||||
import qualified Git.Branch
|
||||
import Git.FileMode
|
||||
import Git.Config
|
||||
import Utility.InodeCache
|
||||
|
||||
upgrade :: Bool -> Annex Bool
|
||||
|
@ -27,6 +29,11 @@ upgrade automatic = do
|
|||
unless automatic $
|
||||
showAction "v5 to v6"
|
||||
whenM isDirect $ do
|
||||
{- Direct mode makes the same tradeoff of using less disk
|
||||
- space, with less preservation of old versions of files
|
||||
- as does annex.thin. -}
|
||||
setConfig (annexConfig "thin") (boolConfig True)
|
||||
Annex.changeGitConfig $ \c -> c { annexThin = True }
|
||||
{- Since upgrade from direct mode changes how files
|
||||
- are represented in git, commit any changes in the
|
||||
- work tree first. -}
|
||||
|
@ -70,7 +77,9 @@ upgradeDirectWorkTree = do
|
|||
void $ liftIO clean
|
||||
where
|
||||
go (f, Just _sha, Just mode) | isSymLink mode = do
|
||||
mk <- lookupFile f
|
||||
-- Cannot use lookupFile here, as we're in between direct
|
||||
-- mode and v6.
|
||||
mk <- catKeyFile f
|
||||
case mk of
|
||||
Nothing -> noop
|
||||
Just k -> do
|
||||
|
@ -84,13 +93,13 @@ upgradeDirectWorkTree = do
|
|||
go _ = noop
|
||||
|
||||
fromdirect f k = do
|
||||
-- If linkAnnex fails for some reason, the work tree file
|
||||
-- If linkToAnnex fails for some reason, the work tree file
|
||||
-- still has the content; the annex object file is just
|
||||
-- not populated with it. Since the work tree file
|
||||
-- is recorded as an associated file, things will still
|
||||
-- work that way, it's just not ideal.
|
||||
ic <- withTSDelta (liftIO . genInodeCache f)
|
||||
void $ linkAnnex k f ic
|
||||
void $ linkToAnnex k f ic
|
||||
writepointer f k = liftIO $ do
|
||||
nukeFile f
|
||||
writeFile f (formatPointer k)
|
||||
|
|
7
debian/changelog
vendored
7
debian/changelog
vendored
|
@ -14,6 +14,13 @@ git-annex (6.20151219) UNRELEASED; urgency=medium
|
|||
pointer file, and this change can be committed to the git repository.
|
||||
* assistant: In v6 mode, adds files in unlocked mode, so they can
|
||||
continue to be modified.
|
||||
* Added annex.thin setting, which makes unlocked files in v6 repositories
|
||||
be hard linked to their content, instead of a copy. This saves disk
|
||||
space but means any modification of an unlocked file will lose the local
|
||||
(and possibly only) copy of the old version.
|
||||
* Enable annex.thin by default on upgrade from direct mode to v6, since
|
||||
direct mode made the same tradeoff.
|
||||
* fix: Adjusts unlocked files as configured by annex.thin.
|
||||
* persistent-sqlite is now a hard build dependency, since v6 repository
|
||||
mode needs it.
|
||||
* status: On crippled filesystems, was displaying M for all annexed files
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# NAME
|
||||
|
||||
git-annex fix - fix up symlinks to point to annexed content
|
||||
git-annex fix - fix up links to annexed content
|
||||
|
||||
# SYNOPSIS
|
||||
|
||||
|
@ -11,8 +11,11 @@ git annex fix `[path ...]`
|
|||
Fixes up symlinks that have become broken to again point to annexed
|
||||
content.
|
||||
|
||||
This is useful to run if you have been moving the symlinks around,
|
||||
but is done automatically when committing a change with git too.
|
||||
This is useful to run manually when you have been moving the symlinks
|
||||
around, but is done automatically when committing a change with git too.
|
||||
|
||||
Also, adjusts unlocked files to be copies or hard links as
|
||||
configured by annex.thin.
|
||||
|
||||
# OPTIONS
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ git annex unlock `[path ...]`
|
|||
|
||||
Normally, the content of annexed files is protected from being changed.
|
||||
Unlocking an annexed file allows it to be modified. This replaces the
|
||||
symlink for each specified file with a copy of the file's content.
|
||||
symlink for each specified file with the file's content.
|
||||
You can then modify it and `git annex add` (or `git commit`) to save your
|
||||
changes.
|
||||
|
||||
|
@ -22,6 +22,14 @@ can use `git add` to add a fie to the annex in unlocked form. This allows
|
|||
workflows where a file starts out unlocked, is modified as necessary, and
|
||||
is locked once it reaches its final version.
|
||||
|
||||
Normally, unlocking a file requires a copy to be made of its content,
|
||||
so that its original content is preserved, while the copy can be modified.
|
||||
To use less space, annex.thin can be set to true; this makes a hard link
|
||||
to the content be made instead of a copy. (Only when supported by the file
|
||||
system, and only in repository version 6.) While this can save considerable
|
||||
disk space, any modification made to a file will cause the old version of the
|
||||
file to be lost from the local repository. So, enable annex.thin with care.
|
||||
|
||||
# OPTIONS
|
||||
|
||||
* file matching options
|
||||
|
|
|
@ -904,6 +904,14 @@ Here are all the supported configuration settings.
|
|||
will automatically set annex.hardlink and mark the repository as
|
||||
untrusted.
|
||||
|
||||
* `annex.thin`
|
||||
|
||||
Set this to `true` to make unlocked files be a hard link to their content
|
||||
in the annex, rather than a second copy. (Only when supported by the file
|
||||
system, and only in repository version 6.) This can save considerable
|
||||
disk space, but modification made to a file will lose the local (and
|
||||
possibly only) copy of the old version. So, enable with care.
|
||||
|
||||
* `annex.delayadd`
|
||||
|
||||
Makes the watch and assistant commands delay for the specified number of
|
||||
|
|
|
@ -49,9 +49,11 @@ Or, you can init a new repository in v6 mode.
|
|||
# git init
|
||||
# git annex init --version=6
|
||||
|
||||
## using it
|
||||
|
||||
Using a v6 repository is easy! Just use regular git commands to add
|
||||
and commit files. Under the hood, git will use git-annex to store the file
|
||||
contents.
|
||||
and commit files. git will use git-annex to store the file contents,
|
||||
and the files will be left unlocked.
|
||||
|
||||
[[!template id=note text="""
|
||||
Want `git add` to add some file contents to the annex, but store the contents of
|
||||
|
@ -70,8 +72,8 @@ smaller files in git itself? Configure annex.largefiles to match the former.
|
|||
# git annex find
|
||||
my_cool_big_file
|
||||
|
||||
You can make whatever changes you like to committed files, and commit your
|
||||
changes.
|
||||
You can make whatever modifications you want to unlocked files, and commit
|
||||
your changes.
|
||||
|
||||
# echo more stuff >> my_cool_big_file
|
||||
# git mv my_cool_big_file my_cool_bigger_file
|
||||
|
@ -81,47 +83,62 @@ changes.
|
|||
delete mode 100644 my_cool_big_file
|
||||
create mode 100644 my_cool_bigger_file
|
||||
|
||||
Under the hood, this uses git's [[todo/smudge]] filter interface,
|
||||
and git-annex converts between the content of the big file and a pointer file,
|
||||
Under the hood, this uses git's [[todo/smudge]] filter interface, and
|
||||
git-annex converts between the content of the big file and a pointer file,
|
||||
which is what gets committed to git.
|
||||
|
||||
A v6 repository can have both locked and unlocked files. You can switch
|
||||
A v6 repository can contain both locked and unlocked files. You can switch
|
||||
a file back and forth using the `git annex lock` and `git annex unlock`
|
||||
commands. This changes what's stored in git between a git-annex symlink
|
||||
(locked) and a git-annex pointer file (unlocked).
|
||||
(locked) and a git-annex pointer file (unlocked). To add a file to
|
||||
the repository in locked mode, use `git annex add`; to add a file in
|
||||
unlocked mode, use `git add`.
|
||||
|
||||
## danger will robinson
|
||||
## using less disk space
|
||||
|
||||
Unlocked files are handy, but they have one significant disadvantage
|
||||
compared with locked files: They use more disk space.
|
||||
While only one copy of a locked file has to be stored, normally,
|
||||
two copies of an unlocked file are stored on disk. One copy is in
|
||||
the git work tree, where you can use and modify it,
|
||||
and the other is stashed away in `.git/annex/objects` (see [[internals]]).
|
||||
|
||||
The reason for that second copy is to preserve the old version of the file,
|
||||
if you modify the unlocked file in the work tree. Being able to access
|
||||
old versions of files is an important part of git after all.
|
||||
|
||||
That's a good safe default. But there are ways to use git-annex that
|
||||
make the second copy not be worth keeping:
|
||||
|
||||
[[!template id=note text="""
|
||||
Double the disk space is used on systems like Windows that don't support
|
||||
hard links.
|
||||
When a [[direct_mode]] repository is upgraded, annex.thin is automatically
|
||||
set, because direct mode made the same single-copy tradeoff.
|
||||
"""]]
|
||||
|
||||
In contrast with locked files, which are quite safe, using unlocked files is a
|
||||
little bit dangerous. git-annex tries to avoid storing a duplicate copy of an
|
||||
unlocked file in your local repository, in order to not use double the disk
|
||||
space. But this means that an unlocked file can be the only copy of that
|
||||
version of the file's content. Modify it, and oops, you lost the old version!
|
||||
* When you're using git-annex to sync the current version of files acrosss
|
||||
devices, and don't care much about previous versions.
|
||||
* When you have set up a backup repository, and use git-annex to copy
|
||||
your files to the backup.
|
||||
|
||||
In fact, that happened in the examples above, and you probably didn't notice
|
||||
until now.
|
||||
In situations like these, you may want to avoid the overhead of the second
|
||||
local copy of unlocked files. There's config setting for that.
|
||||
|
||||
# git checkout HEAD^
|
||||
HEAD is now at 92f2725 added my_cool_big_file to the annex
|
||||
# cat my_cool_big_file
|
||||
/annex/objects/SHA256E-s30--e7aaf46f227886c10c98f8f76cae681afd0521438c78f958fc27114674b391a4
|
||||
git config annex.thin true
|
||||
|
||||
Woah, what's all that?! Well, it's the pointer file that gets checked into
|
||||
git. You'd see the same thing if you had used `git annex drop` to drop
|
||||
the content of the file from your repository.
|
||||
After changing annex.thin, you'll want to fix up the work tree to
|
||||
match the new setting:
|
||||
|
||||
In the example above, the content wasn't explicitly dropped, but it was
|
||||
modified while it was unlocked... and so the old version of the content
|
||||
was lost.
|
||||
git annex fix
|
||||
|
||||
If this is worrying -- and it should be -- you'll want to keep files locked
|
||||
most of the time, or set up a remote and have git-annex copy the content of
|
||||
files to the remote as a backup.
|
||||
Note that setting annex.thin only has any effect on systems that support
|
||||
hard links. Ie, not Windows, and not FAT filesystems.
|
||||
|
||||
By the way, don't worry about deleting an unlocked file. That *won't* lose
|
||||
its content.
|
||||
## tradeoffs
|
||||
|
||||
Setting annex.thin can save a lot of disk space, but it's a tradeoff
|
||||
between disk usage and safety.
|
||||
|
||||
Keeping files locked is safer and also avoids using unnecessary
|
||||
disk space, but trades off easy modification of files.
|
||||
|
||||
Pick the tradeoff that's right for you.
|
||||
|
|
|
@ -13,10 +13,11 @@ git-annex should use smudge/clean filters.
|
|||
# because it doesn't know it has that name
|
||||
# git commit clears up this mess
|
||||
* Interaction with shared clones. Should avoid hard linking from/to a
|
||||
object in a shared clone if either repository has the object unlocked.
|
||||
(And should avoid unlocking an object if it's hard linked to a shared clone,
|
||||
but that's already accomplished because it avoids unlocking an object if
|
||||
it's hard linked at all)
|
||||
object in a shared clone if either repository has the object unlocked
|
||||
with a hard link in place.
|
||||
(And should avoid unlocking an object with a hard link if it's hard
|
||||
linked to a shared clone, but that's already accomplished because it
|
||||
avoids unlocking an object if it's hard linked at all)
|
||||
* Make automatic merge conflict resolution work for pointer files.
|
||||
- Should probably automatically handle merge conflicts between annex
|
||||
symlinks and pointer files too. Maybe by always resulting in a pointer
|
||||
|
@ -46,7 +47,7 @@ git-annex should use smudge/clean filters.
|
|||
|
||||
* Eventually (but not yet), make v6 the default for new repositories.
|
||||
Note that the assistant forces repos into direct mode; that will need to
|
||||
be changed then.
|
||||
be changed then, and it should enable annex.thin.
|
||||
* Later still, remove support for direct mode, and enable automatic
|
||||
v5 to v6 upgrades.
|
||||
|
||||
|
@ -158,7 +159,7 @@ cannot directly write to the file or git gets unhappy.
|
|||
.. Are very important, otherwise a repo can't scale past the size of the
|
||||
smallest client's disk!
|
||||
|
||||
It would be nice if the smudge filter could hard link or symlink a work
|
||||
It would be nice if the smudge filter could hard link a work
|
||||
tree file to the annex object.
|
||||
|
||||
But currently, the smudge filter can't modify the work tree file on its own
|
||||
|
@ -184,7 +185,9 @@ smudged file in the work tree when renaming it. It instead deletes the old
|
|||
file and asks the smudge filter to smudge the new filename.
|
||||
|
||||
So, copies need to be maintained in .git/annex/objects, though it's ok
|
||||
to use hard links to the work tree files.
|
||||
to use hard links to the work tree files. (Although somewhat unsafe
|
||||
since modification of the file will lose the old version. annex.thin
|
||||
setting can enable this.)
|
||||
|
||||
Even if hard links are used, smudge needs to output the content of an
|
||||
annexed file, which will result in duplication when merging in renames of
|
||||
|
@ -241,21 +244,16 @@ git-annex clean:
|
|||
|
||||
Generate annex key from filename and content from stdin.
|
||||
|
||||
Hard link .git/annex/objects to the file, if it doesn't already exist.
|
||||
(On platforms not supporting hardlinks, copy the file to
|
||||
.git/annex/objects.)
|
||||
Hard link (annex.thin) or copy .git/annex/objects to the file,
|
||||
if it doesn't already exist.
|
||||
|
||||
This is done to prevent losing the only copy of a file when eg
|
||||
doing a git checkout of a different branch, or merging a commit that
|
||||
renames or deletes a file. But, no attempt is made to
|
||||
renames or deletes a file. But, with annex.thin no attempt is made to
|
||||
protect the object from being modified. If a user wants to
|
||||
protect object contents from modification, they should use
|
||||
`git annex add`, not `git add`, or they can `git annex lock` after adding,.
|
||||
|
||||
There could be a configuration knob to cause a copy to be made to
|
||||
.git/annex/objects -- useful for those crippled filesystems. It might
|
||||
also drop that copy once the object gets uploaded to another repo ...
|
||||
But that gets complicated quickly.
|
||||
`git annex add`, not `git add`, or they can `git annex lock` after adding,
|
||||
or not enable annex.thin.
|
||||
|
||||
Update file map.
|
||||
|
||||
|
|
|
@ -72,10 +72,6 @@ The behavior of some commands changes in an upgraded repository:
|
|||
* `git annex unlock` and `git annex lock` change how the pointer to
|
||||
the annexed content is stored in git.
|
||||
|
||||
If a repository is only used in indirect mode, you can use git-annex
|
||||
v5 and v6 in different clones of the same indirect mode repository without
|
||||
problems.
|
||||
|
||||
On upgrade, all files in a direct mode repository will be converted to
|
||||
unlocked files. The upgrade will stage changes to all annexed files in
|
||||
the git repository, which you can then commit.
|
||||
|
|
Loading…
Reference in a new issue