annex.thin

Decided it's too scary to make v6 unlocked files have 1 copy by default,
but that should be available to those who need it. This is consistent with
git-annex not dropping unused content without --force, etc.

* Added annex.thin setting, which makes unlocked files in v6 repositories
  be hard linked to their content, instead of a copy. This saves disk
  space but means any modification of an unlocked file will lose the local
  (and possibly only) copy of the old version.
* Enable annex.thin by default on upgrade from direct mode to v6, since
  direct mode made the same tradeoff.
* fix: Adjusts unlocked files as configured by annex.thin.
This commit is contained in:
Joey Hess 2015-12-27 15:59:59 -04:00
parent bb6719678e
commit 121f5d5b0c
Failed to extract signature
17 changed files with 259 additions and 146 deletions

View file

@ -25,8 +25,8 @@ module Annex.Content (
checkDiskSpace, checkDiskSpace,
moveAnnex, moveAnnex,
populatePointerFile, populatePointerFile,
linkAnnex, linkToAnnex,
linkAnnex', linkFromAnnex,
LinkAnnexResult(..), LinkAnnexResult(..),
unlinkAnnex, unlinkAnnex,
checkedCopyFile, checkedCopyFile,
@ -469,13 +469,12 @@ moveAnnex key src = withObjectLoc key storeobject storedirect
storeobject dest = ifM (liftIO $ doesFileExist dest) storeobject dest = ifM (liftIO $ doesFileExist dest)
( alreadyhave ( alreadyhave
, modifyContent dest $ do , modifyContent dest $ do
freezeContent src
liftIO $ moveFile src dest liftIO $ moveFile src dest
fs <- Database.Keys.getAssociatedFiles key fs <- Database.Keys.getAssociatedFiles key
if null fs unless (null fs) $ do
then freezeContent dest mapM_ (populatePointerFile key dest) fs
else do Database.Keys.storeInodeCaches key (dest:fs)
mapM_ (populatePointerFile key dest) fs
Database.Keys.storeInodeCaches key (dest:fs)
) )
storeindirect = storeobject =<< calcRepo (gitAnnexLocation key) storeindirect = storeobject =<< calcRepo (gitAnnexLocation key)
@ -510,48 +509,52 @@ populatePointerFile k obj f = go =<< isPointerFile f
where where
go (Just k') | k == k' = do go (Just k') | k == k' = do
liftIO $ nukeFile f liftIO $ nukeFile f
unlessM (linkAnnex'' k obj f) $ ifM (linkOrCopy k obj f)
liftIO $ writeFile f (formatPointer k) ( thawContent f
, liftIO $ writeFile f (formatPointer k)
)
go _ = return () go _ = return ()
{- Hard links a file into .git/annex/objects/, falling back to a copy data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop
- if necessary. Does nothing if the object file already exists.
-
- Does not lock down the hard linked object, so that the user can modify
- the source file. So, adding an object to the annex this way can
- prevent losing the content if the source file is deleted, but does not
- guard against modifications.
-}
linkAnnex :: Key -> FilePath -> Maybe InodeCache -> Annex LinkAnnexResult
linkAnnex key src srcic = do
dest <- calcRepo (gitAnnexLocation key)
modifyContent dest $ linkAnnex' key src srcic dest
{- Hard links (or copies) src to dest, one of which should be the {- Populates the annex object file by hard linking or copying a source
- annex object. Updates inode cache for src and for dest when it's - file to it. -}
- changed. -} linkToAnnex :: Key -> FilePath -> Maybe InodeCache -> Annex LinkAnnexResult
linkAnnex' :: Key -> FilePath -> Maybe InodeCache -> FilePath -> Annex LinkAnnexResult linkToAnnex key src srcic = do
linkAnnex' _ _ Nothing _ = return LinkAnnexFailed dest <- calcRepo (gitAnnexLocation key)
linkAnnex' key src (Just srcic) dest = modifyContent dest $ linkAnnex To key src srcic dest
{- Makes a destination file be a link or copy from the annex object. -}
linkFromAnnex :: Key -> FilePath -> Annex LinkAnnexResult
linkFromAnnex key dest = do
src <- calcRepo (gitAnnexLocation key)
srcic <- withTSDelta (liftIO . genInodeCache src)
linkAnnex From key src srcic dest
data FromTo = From | To
{- Hard links or copies from or to the annex object location.
- Updates inode cache.
-
- Thaws the file that is not the annex object.
- When a hard link was made, this necessarily thaws
- the annex object too. So, adding an object to the annex this
- way can prevent losing the content if the source file
- is deleted, but does not guard against modifications.
-}
linkAnnex :: FromTo -> Key -> FilePath -> Maybe InodeCache -> FilePath -> Annex LinkAnnexResult
linkAnnex _ _ _ Nothing _ = return LinkAnnexFailed
linkAnnex fromto key src (Just srcic) dest =
ifM (liftIO $ doesFileExist dest) ifM (liftIO $ doesFileExist dest)
( do ( do
Database.Keys.addInodeCaches key [srcic] Database.Keys.addInodeCaches key [srcic]
return LinkAnnexNoop return LinkAnnexNoop
, ifM (linkAnnex'' key src dest) , ifM (linkOrCopy key src dest)
( do ( do
thawContent dest thawContent $ case fromto of
-- src could have changed while being copied From -> dest
-- to dest To -> src
mcache <- withTSDelta (liftIO . genInodeCache src) checksrcunchanged
case mcache of
Just srcic' | compareStrong srcic srcic' -> do
destic <- withTSDelta (liftIO . genInodeCache dest)
Database.Keys.addInodeCaches key $
catMaybes [destic, Just srcic]
return LinkAnnexOk
_ -> do
liftIO $ nukeFile dest
failed
, failed , failed
) )
) )
@ -559,25 +562,41 @@ linkAnnex' key src (Just srcic) dest =
failed = do failed = do
Database.Keys.addInodeCaches key [srcic] Database.Keys.addInodeCaches key [srcic]
return LinkAnnexFailed return LinkAnnexFailed
checksrcunchanged = do
mcache <- withTSDelta (liftIO . genInodeCache src)
case mcache of
Just srcic' | compareStrong srcic srcic' -> do
destic <- withTSDelta (liftIO . genInodeCache dest)
Database.Keys.addInodeCaches key $
catMaybes [destic, Just srcic]
return LinkAnnexOk
_ -> do
liftIO $ nukeFile dest
failed
data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop {- Hard links or copies src to dest. Only uses a hard link when annex.thin
- is enabled and when src is not already hardlinked to elsewhere.
{- Hard links or copies src to dest. Only uses a hard link if src - Checks disk reserve before copying, and will fail if not enough space,
- is not already hardlinked to elsewhere. Checks disk reserve before - or if the dest file already exists. -}
- copying, and will fail if not enough space, or if the dest file linkOrCopy :: Key -> FilePath -> FilePath -> Annex Bool
- already exists. -} linkOrCopy key src dest = catchBoolIO $
linkAnnex'' :: Key -> FilePath -> FilePath -> Annex Bool ifM (annexThin <$> Annex.getGitConfig)
linkAnnex'' key src dest = catchBoolIO $ do ( hardlink
s <- liftIO $ getFileStatus src , copy =<< getstat
let copy = checkedCopyFile' key src dest s )
where
hardlink = do
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
if linkCount s > 1 s <- getstat
then copy if linkCount s > 1
else liftIO (createLink src dest >> return True) then copy s
`catchIO` const copy else liftIO (createLink src dest >> return True)
`catchIO` const (copy s)
#else #else
copy copy s
#endif #endif
copy = checkedCopyFile' key src dest
getstat = liftIO $ getFileStatus src
{- Removes the annex object file for a key. Lowlevel. -} {- Removes the annex object file for a key. Lowlevel. -}
unlinkAnnex :: Key -> Annex () unlinkAnnex :: Key -> Annex ()

View file

@ -142,11 +142,11 @@ ingest (Just (LockedDown lockingfile source)) = withTSDelta $ \delta -> do
gounlocked key (Just cache) s = do gounlocked key (Just cache) s = do
-- Remove temp directory hard link first because -- Remove temp directory hard link first because
-- linkAnnex falls back to copying if a file -- linkToAnnex falls back to copying if a file
-- already has a hard link. -- already has a hard link.
cleanCruft source cleanCruft source
cleanOldKeys (keyFilename source) key cleanOldKeys (keyFilename source) key
r <- linkAnnex key (keyFilename source) (Just cache) r <- linkToAnnex key (keyFilename source) (Just cache)
case r of case r of
LinkAnnexFailed -> failure "failed to link to annex" LinkAnnexFailed -> failure "failed to link to annex"
_ -> do _ -> do
@ -219,12 +219,12 @@ cleanOldKeys file newkey = do
<$> Database.Keys.getAssociatedFiles key <$> Database.Keys.getAssociatedFiles key
fs' <- filterM (`sameInodeCache` caches) fs fs' <- filterM (`sameInodeCache` caches) fs
case fs' of case fs' of
-- If linkAnnex fails, the associated -- If linkToAnnex fails, the associated
-- file with the content is still present, -- file with the content is still present,
-- so no need for any recovery. -- so no need for any recovery.
(f:_) -> do (f:_) -> do
ic <- withTSDelta (liftIO . genInodeCache f) ic <- withTSDelta (liftIO . genInodeCache f)
void $ linkAnnex key f ic void $ linkToAnnex key f ic
_ -> lostcontent _ -> lostcontent
where where
lostcontent = logStatus key InfoMissing lostcontent = logStatus key InfoMissing

View file

@ -18,7 +18,7 @@ import Utility.Tmp
- which it can write to, and once done the temp file is moved into place - which it can write to, and once done the temp file is moved into place
- and anything else in the temp directory is deleted. - and anything else in the temp directory is deleted.
- -
- The action can throw an IO exception, in which case the temp directory - The action can throw an exception, in which case the temp directory
- will be deleted, and the existing file will be preserved. - will be deleted, and the existing file will be preserved.
- -
- Throws an IO exception when it was unable to replace the file. - Throws an IO exception when it was unable to replace the file.

View file

@ -1,6 +1,6 @@
{- git-annex command {- git-annex command
- -
- Copyright 2010 Joey Hess <id@joeyh.name> - Copyright 2010-2015 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -11,7 +11,13 @@ module Command.Fix where
import Common.Annex import Common.Annex
import Command import Command
import Config
import qualified Annex
import Annex.Version
import Annex.ReplaceFile
import Annex.Content
import qualified Annex.Queue import qualified Annex.Queue
import qualified Database.Keys
#ifdef WITH_CLIBS #ifdef WITH_CLIBS
#ifndef __ANDROID__ #ifndef __ANDROID__
import Utility.Touch import Utility.Touch
@ -21,22 +27,66 @@ import Utility.Touch
cmd :: Command cmd :: Command
cmd = notDirect $ noCommit $ withGlobalOptions annexedMatchingOptions $ cmd = notDirect $ noCommit $ withGlobalOptions annexedMatchingOptions $
command "fix" SectionMaintenance command "fix" SectionMaintenance
"fix up symlinks to point to annexed content" "fix up links to annexed content"
paramPaths (withParams seek) paramPaths (withParams seek)
seek :: CmdParams -> CommandSeek seek :: CmdParams -> CommandSeek
seek = withFilesInGit $ whenAnnexed start seek ps = unlessM crippledFileSystem $ do
fixwhat <- ifM versionSupportsUnlockedPointers
( return FixAll
, return FixSymlinks
)
flip withFilesInGit ps $ whenAnnexed $ start fixwhat
{- Fixes the symlink to an annexed file. -} data FixWhat = FixSymlinks | FixAll
start :: FilePath -> Key -> CommandStart
start file key = do start :: FixWhat -> FilePath -> Key -> CommandStart
link <- calcRepo $ gitAnnexLink file key start fixwhat file key = do
stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do currlink <- liftIO $ catchMaybeIO $ readSymbolicLink file
wantlink <- calcRepo $ gitAnnexLink file key
case currlink of
Just l
| l /= wantlink -> fixby $ fixSymlink file wantlink
| otherwise -> stop
Nothing -> case fixwhat of
FixAll -> fixthin
FixSymlinks -> stop
where
fixby a = do
showStart "fix" file showStart "fix" file
next $ perform file link next a
fixthin = do
obj <- calcRepo $ gitAnnexLocation key
stopUnless (isUnmodified key file <&&> isUnmodified key obj) $ do
thin <- annexThin <$> Annex.getGitConfig
fs <- liftIO $ catchMaybeIO $ getFileStatus file
os <- liftIO $ catchMaybeIO $ getFileStatus obj
case (linkCount <$> fs, linkCount <$> os, thin) of
(Just 1, Just 1, True) ->
fixby $ makeHardLink file key
(Just n, Just n', False) | n > 1 && n == n' ->
fixby $ breakHardLink file key obj
_ -> stop
perform :: FilePath -> FilePath -> CommandPerform breakHardLink :: FilePath -> Key -> FilePath -> CommandPerform
perform file link = do breakHardLink file key obj = do
replaceFile file $ \tmp ->
unlessM (checkedCopyFile key obj tmp) $
error "unable to break hard link"
Database.Keys.storeInodeCaches key [file]
next $ return True
makeHardLink :: FilePath -> Key -> CommandPerform
makeHardLink file key = do
replaceFile file $ \tmp -> do
r <- linkFromAnnex key tmp
case r of
LinkAnnexFailed -> error "unable to make hard link"
_ -> noop
next $ return True
fixSymlink :: FilePath -> FilePath -> CommandPerform
fixSymlink file link = do
liftIO $ do liftIO $ do
#ifdef WITH_CLIBS #ifdef WITH_CLIBS
#ifndef __ANDROID__ #ifndef __ANDROID__
@ -53,9 +103,9 @@ perform file link = do
maybe noop (\t -> touch file t False) mtime maybe noop (\t -> touch file t False) mtime
#endif #endif
#endif #endif
next $ cleanup file next $ cleanupSymlink file
cleanup :: FilePath -> CommandCleanup cleanupSymlink :: FilePath -> CommandCleanup
cleanup file = do cleanupSymlink file = do
Annex.Queue.addCommand "add" [Param "--force", Param "--"] [file] Annex.Queue.addCommand "add" [Param "--force", Param "--"] [file]
return True return True

View file

@ -79,7 +79,7 @@ performNew file key filemodified = do
unlessM (sameInodeCache obj (maybeToList mfc)) $ do unlessM (sameInodeCache obj (maybeToList mfc)) $ do
modifyContent obj $ replaceFile obj $ \tmp -> do modifyContent obj $ replaceFile obj $ \tmp -> do
unlessM (checkedCopyFile key obj tmp) $ unlessM (checkedCopyFile key obj tmp) $
error "unable to lock file; need more free disk space" error "unable to lock file"
Database.Keys.storeInodeCaches key [obj] Database.Keys.storeInodeCaches key [obj]
-- Try to repopulate obj from an unmodified associated file. -- Try to repopulate obj from an unmodified associated file.

View file

@ -52,7 +52,7 @@ seek ps = lockPreCommitHook $ ifM isDirect
, do , do
-- fix symlinks to files being committed -- fix symlinks to files being committed
flip withFilesToBeCommitted ps $ \f -> flip withFilesToBeCommitted ps $ \f ->
maybe stop (Command.Fix.start f) maybe stop (Command.Fix.start Command.Fix.FixSymlinks f)
=<< isAnnexLink f =<< isAnnexLink f
-- inject unlocked files into the annex -- inject unlocked files into the annex
-- (not needed when repo version uses -- (not needed when repo version uses

View file

@ -100,7 +100,7 @@ ingestLocal file = do
-- Hard link (or copy) file content to annex object -- Hard link (or copy) file content to annex object
-- to prevent it from being lost when git checks out -- to prevent it from being lost when git checks out
-- a branch not containing this file. -- a branch not containing this file.
r <- linkAnnex k file ic r <- linkToAnnex k file ic
case r of case r of
LinkAnnexFailed -> error "Problem adding file to the annex" LinkAnnexFailed -> error "Problem adding file to the annex"
LinkAnnexOk -> logStatus k InfoPresent LinkAnnexOk -> logStatus k InfoPresent

View file

@ -14,8 +14,6 @@ import Annex.CatFile
import Annex.Version import Annex.Version
import Annex.Link import Annex.Link
import Annex.ReplaceFile import Annex.ReplaceFile
import Annex.InodeSentinal
import Utility.InodeCache
import Utility.CopyFile import Utility.CopyFile
cmd :: Command cmd :: Command
@ -52,13 +50,11 @@ start file key = ifM (isJust <$> isAnnexLink file)
performNew :: FilePath -> Key -> CommandPerform performNew :: FilePath -> Key -> CommandPerform
performNew dest key = do performNew dest key = do
src <- calcRepo (gitAnnexLocation key)
srcic <- withTSDelta (liftIO . genInodeCache src)
replaceFile dest $ \tmp -> do replaceFile dest $ \tmp -> do
r <- linkAnnex' key src srcic tmp r <- linkFromAnnex key tmp
case r of case r of
LinkAnnexOk -> return () LinkAnnexOk -> return ()
_ -> error "linkAnnex failed" _ -> error "unlock failed"
next $ cleanupNew dest key next $ cleanupNew dest key
cleanupNew :: FilePath -> Key -> CommandCleanup cleanupNew :: FilePath -> Key -> CommandCleanup

View file

@ -60,6 +60,7 @@ data GitConfig = GitConfig
, annexListen :: Maybe String , annexListen :: Maybe String
, annexStartupScan :: Bool , annexStartupScan :: Bool
, annexHardLink :: Bool , annexHardLink :: Bool
, annexThin :: Bool
, annexDifferences :: Differences , annexDifferences :: Differences
, annexUsedRefSpec :: Maybe RefSpec , annexUsedRefSpec :: Maybe RefSpec
, annexVerify :: Bool , annexVerify :: Bool
@ -104,6 +105,7 @@ extractGitConfig r = GitConfig
, annexListen = getmaybe (annex "listen") , annexListen = getmaybe (annex "listen")
, annexStartupScan = getbool (annex "startupscan") True , annexStartupScan = getbool (annex "startupscan") True
, annexHardLink = getbool (annex "hardlink") False , annexHardLink = getbool (annex "hardlink") False
, annexThin = getbool (annex "thin") False
, annexDifferences = getDifferences r , annexDifferences = getDifferences r
, annexUsedRefSpec = either (const Nothing) Just . parseRefSpec , annexUsedRefSpec = either (const Nothing) Just . parseRefSpec
=<< getmaybe (annex "used-refspec") =<< getmaybe (annex "used-refspec")

View file

@ -8,18 +8,20 @@
module Upgrade.V5 where module Upgrade.V5 where
import Common.Annex import Common.Annex
import qualified Annex
import Config import Config
import Annex.InodeSentinal import Annex.InodeSentinal
import Annex.Link import Annex.Link
import Annex.Direct import Annex.Direct
import Annex.Content import Annex.Content
import Annex.WorkTree import Annex.CatFile
import qualified Database.Keys import qualified Database.Keys
import qualified Annex.Content.Direct as Direct import qualified Annex.Content.Direct as Direct
import qualified Git import qualified Git
import qualified Git.LsFiles import qualified Git.LsFiles
import qualified Git.Branch import qualified Git.Branch
import Git.FileMode import Git.FileMode
import Git.Config
import Utility.InodeCache import Utility.InodeCache
upgrade :: Bool -> Annex Bool upgrade :: Bool -> Annex Bool
@ -27,6 +29,11 @@ upgrade automatic = do
unless automatic $ unless automatic $
showAction "v5 to v6" showAction "v5 to v6"
whenM isDirect $ do whenM isDirect $ do
{- Direct mode makes the same tradeoff of using less disk
- space, with less preservation of old versions of files
- as does annex.thin. -}
setConfig (annexConfig "thin") (boolConfig True)
Annex.changeGitConfig $ \c -> c { annexThin = True }
{- Since upgrade from direct mode changes how files {- Since upgrade from direct mode changes how files
- are represented in git, commit any changes in the - are represented in git, commit any changes in the
- work tree first. -} - work tree first. -}
@ -70,7 +77,9 @@ upgradeDirectWorkTree = do
void $ liftIO clean void $ liftIO clean
where where
go (f, Just _sha, Just mode) | isSymLink mode = do go (f, Just _sha, Just mode) | isSymLink mode = do
mk <- lookupFile f -- Cannot use lookupFile here, as we're in between direct
-- mode and v6.
mk <- catKeyFile f
case mk of case mk of
Nothing -> noop Nothing -> noop
Just k -> do Just k -> do
@ -84,13 +93,13 @@ upgradeDirectWorkTree = do
go _ = noop go _ = noop
fromdirect f k = do fromdirect f k = do
-- If linkAnnex fails for some reason, the work tree file -- If linkToAnnex fails for some reason, the work tree file
-- still has the content; the annex object file is just -- still has the content; the annex object file is just
-- not populated with it. Since the work tree file -- not populated with it. Since the work tree file
-- is recorded as an associated file, things will still -- is recorded as an associated file, things will still
-- work that way, it's just not ideal. -- work that way, it's just not ideal.
ic <- withTSDelta (liftIO . genInodeCache f) ic <- withTSDelta (liftIO . genInodeCache f)
void $ linkAnnex k f ic void $ linkToAnnex k f ic
writepointer f k = liftIO $ do writepointer f k = liftIO $ do
nukeFile f nukeFile f
writeFile f (formatPointer k) writeFile f (formatPointer k)

7
debian/changelog vendored
View file

@ -14,6 +14,13 @@ git-annex (6.20151219) UNRELEASED; urgency=medium
pointer file, and this change can be committed to the git repository. pointer file, and this change can be committed to the git repository.
* assistant: In v6 mode, adds files in unlocked mode, so they can * assistant: In v6 mode, adds files in unlocked mode, so they can
continue to be modified. continue to be modified.
* Added annex.thin setting, which makes unlocked files in v6 repositories
be hard linked to their content, instead of a copy. This saves disk
space but means any modification of an unlocked file will lose the local
(and possibly only) copy of the old version.
* Enable annex.thin by default on upgrade from direct mode to v6, since
direct mode made the same tradeoff.
* fix: Adjusts unlocked files as configured by annex.thin.
* persistent-sqlite is now a hard build dependency, since v6 repository * persistent-sqlite is now a hard build dependency, since v6 repository
mode needs it. mode needs it.
* status: On crippled filesystems, was displaying M for all annexed files * status: On crippled filesystems, was displaying M for all annexed files

View file

@ -1,6 +1,6 @@
# NAME # NAME
git-annex fix - fix up symlinks to point to annexed content git-annex fix - fix up links to annexed content
# SYNOPSIS # SYNOPSIS
@ -11,8 +11,11 @@ git annex fix `[path ...]`
Fixes up symlinks that have become broken to again point to annexed Fixes up symlinks that have become broken to again point to annexed
content. content.
This is useful to run if you have been moving the symlinks around, This is useful to run manually when you have been moving the symlinks
but is done automatically when committing a change with git too. around, but is done automatically when committing a change with git too.
Also, adjusts unlocked files to be copies or hard links as
configured by annex.thin.
# OPTIONS # OPTIONS

View file

@ -10,7 +10,7 @@ git annex unlock `[path ...]`
Normally, the content of annexed files is protected from being changed. Normally, the content of annexed files is protected from being changed.
Unlocking an annexed file allows it to be modified. This replaces the Unlocking an annexed file allows it to be modified. This replaces the
symlink for each specified file with a copy of the file's content. symlink for each specified file with the file's content.
You can then modify it and `git annex add` (or `git commit`) to save your You can then modify it and `git annex add` (or `git commit`) to save your
changes. changes.
@ -22,6 +22,14 @@ can use `git add` to add a fie to the annex in unlocked form. This allows
workflows where a file starts out unlocked, is modified as necessary, and workflows where a file starts out unlocked, is modified as necessary, and
is locked once it reaches its final version. is locked once it reaches its final version.
Normally, unlocking a file requires a copy to be made of its content,
so that its original content is preserved, while the copy can be modified.
To use less space, annex.thin can be set to true; this makes a hard link
to the content be made instead of a copy. (Only when supported by the file
system, and only in repository version 6.) While this can save considerable
disk space, any modification made to a file will cause the old version of the
file to be lost from the local repository. So, enable annex.thin with care.
# OPTIONS # OPTIONS
* file matching options * file matching options

View file

@ -904,6 +904,14 @@ Here are all the supported configuration settings.
will automatically set annex.hardlink and mark the repository as will automatically set annex.hardlink and mark the repository as
untrusted. untrusted.
* `annex.thin`
Set this to `true` to make unlocked files be a hard link to their content
in the annex, rather than a second copy. (Only when supported by the file
system, and only in repository version 6.) This can save considerable
disk space, but modification made to a file will lose the local (and
possibly only) copy of the old version. So, enable with care.
* `annex.delayadd` * `annex.delayadd`
Makes the watch and assistant commands delay for the specified number of Makes the watch and assistant commands delay for the specified number of

View file

@ -49,9 +49,11 @@ Or, you can init a new repository in v6 mode.
# git init # git init
# git annex init --version=6 # git annex init --version=6
## using it
Using a v6 repository is easy! Just use regular git commands to add Using a v6 repository is easy! Just use regular git commands to add
and commit files. Under the hood, git will use git-annex to store the file and commit files. git will use git-annex to store the file contents,
contents. and the files will be left unlocked.
[[!template id=note text=""" [[!template id=note text="""
Want `git add` to add some file contents to the annex, but store the contents of Want `git add` to add some file contents to the annex, but store the contents of
@ -70,8 +72,8 @@ smaller files in git itself? Configure annex.largefiles to match the former.
# git annex find # git annex find
my_cool_big_file my_cool_big_file
You can make whatever changes you like to committed files, and commit your You can make whatever modifications you want to unlocked files, and commit
changes. your changes.
# echo more stuff >> my_cool_big_file # echo more stuff >> my_cool_big_file
# git mv my_cool_big_file my_cool_bigger_file # git mv my_cool_big_file my_cool_bigger_file
@ -81,47 +83,62 @@ changes.
delete mode 100644 my_cool_big_file delete mode 100644 my_cool_big_file
create mode 100644 my_cool_bigger_file create mode 100644 my_cool_bigger_file
Under the hood, this uses git's [[todo/smudge]] filter interface, Under the hood, this uses git's [[todo/smudge]] filter interface, and
and git-annex converts between the content of the big file and a pointer file, git-annex converts between the content of the big file and a pointer file,
which is what gets committed to git. which is what gets committed to git.
A v6 repository can have both locked and unlocked files. You can switch A v6 repository can contain both locked and unlocked files. You can switch
a file back and forth using the `git annex lock` and `git annex unlock` a file back and forth using the `git annex lock` and `git annex unlock`
commands. This changes what's stored in git between a git-annex symlink commands. This changes what's stored in git between a git-annex symlink
(locked) and a git-annex pointer file (unlocked). (locked) and a git-annex pointer file (unlocked). To add a file to
the repository in locked mode, use `git annex add`; to add a file in
unlocked mode, use `git add`.
## danger will robinson ## using less disk space
Unlocked files are handy, but they have one significant disadvantage
compared with locked files: They use more disk space.
While only one copy of a locked file has to be stored, normally,
two copies of an unlocked file are stored on disk. One copy is in
the git work tree, where you can use and modify it,
and the other is stashed away in `.git/annex/objects` (see [[internals]]).
The reason for that second copy is to preserve the old version of the file,
if you modify the unlocked file in the work tree. Being able to access
old versions of files is an important part of git after all.
That's a good safe default. But there are ways to use git-annex that
make the second copy not be worth keeping:
[[!template id=note text=""" [[!template id=note text="""
Double the disk space is used on systems like Windows that don't support When a [[direct_mode]] repository is upgraded, annex.thin is automatically
hard links. set, because direct mode made the same single-copy tradeoff.
"""]] """]]
In contrast with locked files, which are quite safe, using unlocked files is a * When you're using git-annex to sync the current version of files acrosss
little bit dangerous. git-annex tries to avoid storing a duplicate copy of an devices, and don't care much about previous versions.
unlocked file in your local repository, in order to not use double the disk * When you have set up a backup repository, and use git-annex to copy
space. But this means that an unlocked file can be the only copy of that your files to the backup.
version of the file's content. Modify it, and oops, you lost the old version!
In fact, that happened in the examples above, and you probably didn't notice In situations like these, you may want to avoid the overhead of the second
until now. local copy of unlocked files. There's config setting for that.
# git checkout HEAD^ git config annex.thin true
HEAD is now at 92f2725 added my_cool_big_file to the annex
# cat my_cool_big_file
/annex/objects/SHA256E-s30--e7aaf46f227886c10c98f8f76cae681afd0521438c78f958fc27114674b391a4
Woah, what's all that?! Well, it's the pointer file that gets checked into After changing annex.thin, you'll want to fix up the work tree to
git. You'd see the same thing if you had used `git annex drop` to drop match the new setting:
the content of the file from your repository.
In the example above, the content wasn't explicitly dropped, but it was git annex fix
modified while it was unlocked... and so the old version of the content
was lost.
If this is worrying -- and it should be -- you'll want to keep files locked Note that setting annex.thin only has any effect on systems that support
most of the time, or set up a remote and have git-annex copy the content of hard links. Ie, not Windows, and not FAT filesystems.
files to the remote as a backup.
By the way, don't worry about deleting an unlocked file. That *won't* lose ## tradeoffs
its content.
Setting annex.thin can save a lot of disk space, but it's a tradeoff
between disk usage and safety.
Keeping files locked is safer and also avoids using unnecessary
disk space, but trades off easy modification of files.
Pick the tradeoff that's right for you.

View file

@ -13,10 +13,11 @@ git-annex should use smudge/clean filters.
# because it doesn't know it has that name # because it doesn't know it has that name
# git commit clears up this mess # git commit clears up this mess
* Interaction with shared clones. Should avoid hard linking from/to a * Interaction with shared clones. Should avoid hard linking from/to a
object in a shared clone if either repository has the object unlocked. object in a shared clone if either repository has the object unlocked
(And should avoid unlocking an object if it's hard linked to a shared clone, with a hard link in place.
but that's already accomplished because it avoids unlocking an object if (And should avoid unlocking an object with a hard link if it's hard
it's hard linked at all) linked to a shared clone, but that's already accomplished because it
avoids unlocking an object if it's hard linked at all)
* Make automatic merge conflict resolution work for pointer files. * Make automatic merge conflict resolution work for pointer files.
- Should probably automatically handle merge conflicts between annex - Should probably automatically handle merge conflicts between annex
symlinks and pointer files too. Maybe by always resulting in a pointer symlinks and pointer files too. Maybe by always resulting in a pointer
@ -46,7 +47,7 @@ git-annex should use smudge/clean filters.
* Eventually (but not yet), make v6 the default for new repositories. * Eventually (but not yet), make v6 the default for new repositories.
Note that the assistant forces repos into direct mode; that will need to Note that the assistant forces repos into direct mode; that will need to
be changed then. be changed then, and it should enable annex.thin.
* Later still, remove support for direct mode, and enable automatic * Later still, remove support for direct mode, and enable automatic
v5 to v6 upgrades. v5 to v6 upgrades.
@ -158,7 +159,7 @@ cannot directly write to the file or git gets unhappy.
.. Are very important, otherwise a repo can't scale past the size of the .. Are very important, otherwise a repo can't scale past the size of the
smallest client's disk! smallest client's disk!
It would be nice if the smudge filter could hard link or symlink a work It would be nice if the smudge filter could hard link a work
tree file to the annex object. tree file to the annex object.
But currently, the smudge filter can't modify the work tree file on its own But currently, the smudge filter can't modify the work tree file on its own
@ -184,7 +185,9 @@ smudged file in the work tree when renaming it. It instead deletes the old
file and asks the smudge filter to smudge the new filename. file and asks the smudge filter to smudge the new filename.
So, copies need to be maintained in .git/annex/objects, though it's ok So, copies need to be maintained in .git/annex/objects, though it's ok
to use hard links to the work tree files. to use hard links to the work tree files. (Although somewhat unsafe
since modification of the file will lose the old version. annex.thin
setting can enable this.)
Even if hard links are used, smudge needs to output the content of an Even if hard links are used, smudge needs to output the content of an
annexed file, which will result in duplication when merging in renames of annexed file, which will result in duplication when merging in renames of
@ -241,21 +244,16 @@ git-annex clean:
Generate annex key from filename and content from stdin. Generate annex key from filename and content from stdin.
Hard link .git/annex/objects to the file, if it doesn't already exist. Hard link (annex.thin) or copy .git/annex/objects to the file,
(On platforms not supporting hardlinks, copy the file to if it doesn't already exist.
.git/annex/objects.)
This is done to prevent losing the only copy of a file when eg This is done to prevent losing the only copy of a file when eg
doing a git checkout of a different branch, or merging a commit that doing a git checkout of a different branch, or merging a commit that
renames or deletes a file. But, no attempt is made to renames or deletes a file. But, with annex.thin no attempt is made to
protect the object from being modified. If a user wants to protect the object from being modified. If a user wants to
protect object contents from modification, they should use protect object contents from modification, they should use
`git annex add`, not `git add`, or they can `git annex lock` after adding,. `git annex add`, not `git add`, or they can `git annex lock` after adding,
or not enable annex.thin.
There could be a configuration knob to cause a copy to be made to
.git/annex/objects -- useful for those crippled filesystems. It might
also drop that copy once the object gets uploaded to another repo ...
But that gets complicated quickly.
Update file map. Update file map.

View file

@ -72,10 +72,6 @@ The behavior of some commands changes in an upgraded repository:
* `git annex unlock` and `git annex lock` change how the pointer to * `git annex unlock` and `git annex lock` change how the pointer to
the annexed content is stored in git. the annexed content is stored in git.
If a repository is only used in indirect mode, you can use git-annex
v5 and v6 in different clones of the same indirect mode repository without
problems.
On upgrade, all files in a direct mode repository will be converted to On upgrade, all files in a direct mode repository will be converted to
unlocked files. The upgrade will stage changes to all annexed files in unlocked files. The upgrade will stage changes to all annexed files in
the git repository, which you can then commit. the git repository, which you can then commit.