Merge branch 'v7'

This commit is contained in:
Joey Hess 2018-10-26 13:52:09 -04:00
commit 6fd37fb016
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
37 changed files with 528 additions and 280 deletions

View file

@ -46,6 +46,7 @@ module Annex.Content (
staleKeysPrune,
pruneTmpWorkDirBefore,
isUnmodified,
isUnmodifiedCheap,
verifyKeyContent,
VerifyConfig(..),
Verification(..),
@ -746,9 +747,7 @@ isUnmodified :: Key -> FilePath -> Annex Bool
isUnmodified key f = go =<< geti
where
go Nothing = return False
go (Just fc) = cheapcheck fc <||> expensivecheck fc
cheapcheck fc = anyM (compareInodeCaches fc)
=<< Database.Keys.getInodeCaches key
go (Just fc) = isUnmodifiedCheap' key fc <||> expensivecheck fc
expensivecheck fc = ifM (verifyKeyContent RetrievalAllKeysSecure AlwaysVerify UnVerified key f)
( do
-- The file could have been modified while it was
@ -765,6 +764,17 @@ isUnmodified key f = go =<< geti
)
geti = withTSDelta (liftIO . genInodeCache f)
{- Cheap check if a file contains the unmodified content of the key,
- only checking the InodeCache of the key.
-}
isUnmodifiedCheap :: Key -> FilePath -> Annex Bool
isUnmodifiedCheap key f = maybe (return False) (isUnmodifiedCheap' key)
=<< withTSDelta (liftIO . genInodeCache f)
isUnmodifiedCheap' :: Key -> InodeCache -> Annex Bool
isUnmodifiedCheap' key fc =
anyM (compareInodeCaches fc) =<< Database.Keys.getInodeCaches key
{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and
- returns the file it was moved to. -}
moveBad :: Key -> Annex FilePath

View file

@ -49,7 +49,6 @@ linkOrCopy = linkOrCopy' (annexThin <$> Annex.getGitConfig)
linkOrCopy' :: Annex Bool -> Key -> FilePath -> FilePath -> Maybe FileMode -> Annex (Maybe LinkedOrCopied)
linkOrCopy' canhardlink key src dest destmode
| maybe False isExecutable destmode = copy =<< getstat
| otherwise = catchDefaultIO Nothing $
ifM canhardlink
( hardlink

View file

@ -32,6 +32,17 @@ postReceiveHook = Git.Hook "post-receive"
[ mkHookScript "git annex post-receive"
]
postCheckoutHook :: Git.Hook
postCheckoutHook = Git.Hook "post-checkout" smudgeHook []
postMergeHook :: Git.Hook
postMergeHook = Git.Hook "post-merge" smudgeHook []
-- Only run git-annex smudge --update when git-annex supports it.
-- Older versions of git-annex didn't need this hook.
smudgeHook :: String
smudgeHook = mkHookScript "if git annex smudge --update >/dev/null 2>&1; then git-annex smudge --update; fi"
preCommitAnnexHook :: Git.Hook
preCommitAnnexHook = Git.Hook "pre-commit-annex" "" []

View file

@ -38,7 +38,6 @@ import Logs.Location
import qualified Annex
import qualified Annex.Queue
import qualified Database.Keys
import qualified Git.Branch
import Config
import Utility.InodeCache
import Annex.ReplaceFile

View file

@ -21,7 +21,6 @@ module Annex.Init (
import Annex.Common
import qualified Annex
import qualified Git
import qualified Git.LsFiles
import qualified Git.Config
import qualified Git.Objects
import qualified Annex.Branch
@ -29,10 +28,10 @@ import Logs.UUID
import Logs.Trust.Basic
import Logs.Config
import Types.TrustLevel
import Types.RepoVersion
import Annex.Version
import Annex.Difference
import Annex.UUID
import Annex.Link
import Annex.WorkTree
import Config
import Config.Smudge
@ -78,7 +77,7 @@ genDescription Nothing = do
Right username -> [username, at, hostname, ":", reldir]
Left _ -> [hostname, ":", reldir]
initialize :: AutoInit -> Maybe String -> Maybe Version -> Annex ()
initialize :: AutoInit -> Maybe String -> Maybe RepoVersion -> Annex ()
initialize ai mdescription mversion = checkCanInitialize ai $ do
{- Has to come before any commits are made as the shared
- clone heuristic expects no local objects. -}
@ -98,7 +97,7 @@ initialize ai mdescription mversion = checkCanInitialize ai $ do
-- Everything except for uuid setup, shared clone setup, and initial
-- description.
initialize' :: AutoInit -> Maybe Version -> Annex ()
initialize' :: AutoInit -> Maybe RepoVersion -> Annex ()
initialize' ai mversion = checkCanInitialize ai $ do
checkLockSupport
checkFifoSupport
@ -112,18 +111,16 @@ initialize' ai mversion = checkCanInitialize ai $ do
whenM versionSupportsUnlockedPointers $ do
configureSmudgeFilter
scanUnlockedFiles
unlessM isBareRepo $ do
hookWrite postCheckoutHook
hookWrite postMergeHook
checkAdjustedClone >>= \case
NeedUpgradeForAdjustedClone ->
void $ upgrade True versionForAdjustedClone
void $ upgrade True versionForAdjustedClone
InAdjustedClone -> return ()
NotInAdjustedClone ->
ifM (crippledFileSystem <&&> (not <$> isBareRepo))
( ifM versionSupportsUnlockedPointers
( adjustToCrippledFileSystem
, do
enableDirectMode
setDirect True
)
( adjustToCrippledFileSystem
-- Handle case where this repo was cloned from a
-- direct mode repo
, unlessM isBareRepo
@ -266,15 +263,6 @@ checkFifoSupport = unlessM probeFifoSupport $ do
warning "Disabling ssh connection caching."
setConfig (annexConfig "sshcaching") (Git.Config.boolConfig False)
enableDirectMode :: Annex ()
enableDirectMode = unlessM isDirect $ do
warning "Enabling direct mode."
top <- fromRepo Git.repoPath
(l, clean) <- inRepo $ Git.LsFiles.inRepo [top]
forM_ l $ \f ->
maybe noop (`toDirect` f) =<< isAnnexLink f
void $ liftIO clean
checkSharedClone :: Annex Bool
checkSharedClone = inRepo Git.Objects.isSharedClone

View file

@ -38,6 +38,8 @@ module Annex.Locations (
gitAnnexFsckDbDir,
gitAnnexFsckDbLock,
gitAnnexFsckResultsLog,
gitAnnexSmudgeLog,
gitAnnexSmudgeLock,
gitAnnexExportDbDir,
gitAnnexExportLock,
gitAnnexScheduleState,
@ -312,6 +314,14 @@ gitAnnexFsckDbLock u r = gitAnnexFsckDir u r </> "fsck.lck"
gitAnnexFsckResultsLog :: UUID -> Git.Repo -> FilePath
gitAnnexFsckResultsLog u r = gitAnnexDir r </> "fsckresults" </> fromUUID u
{- .git/annex/smudge.log is used to log smudges worktree files that need to
- be updated. -}
gitAnnexSmudgeLog :: Git.Repo -> FilePath
gitAnnexSmudgeLog r = gitAnnexDir r </> "smudge.log"
gitAnnexSmudgeLock :: Git.Repo -> FilePath
gitAnnexSmudgeLock r = gitAnnexDir r </> "smudge.lck"
{- .git/annex/export/uuid/ is used to store information about
- exports to special remotes. -}
gitAnnexExportDir :: UUID -> Git.Repo -> FilePath

View file

@ -1,8 +1,8 @@
{- git-annex repository versioning
-
- Copyright 2010,2013 Joey Hess <id@joeyh.name>
- Copyright 2010-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
@ -11,48 +11,53 @@ module Annex.Version where
import Annex.Common
import Config
import Types.RepoVersion
import qualified Annex
type Version = String
import qualified Data.Map as M
defaultVersion :: Version
defaultVersion = "5"
defaultVersion :: RepoVersion
defaultVersion = RepoVersion 5
latestVersion :: Version
latestVersion = "6"
latestVersion :: RepoVersion
latestVersion = RepoVersion 7
supportedVersions :: [Version]
supportedVersions = ["3", "5", "6"]
supportedVersions :: [RepoVersion]
supportedVersions = map RepoVersion [5, 7]
versionForAdjustedClone :: Version
versionForAdjustedClone = "6"
versionForAdjustedClone :: RepoVersion
versionForAdjustedClone = RepoVersion 7
upgradableVersions :: [Version]
upgradableVersions :: [RepoVersion]
#ifndef mingw32_HOST_OS
upgradableVersions = ["0", "1", "2", "3", "4", "5"]
upgradableVersions = map RepoVersion [0..6]
#else
upgradableVersions = ["2", "3", "4", "5"]
upgradableVersions = map RepoVersion [2..6]
#endif
autoUpgradeableVersions :: [Version]
autoUpgradeableVersions = ["3", "4"]
autoUpgradeableVersions :: M.Map RepoVersion RepoVersion
autoUpgradeableVersions = M.fromList
[ (RepoVersion 3, RepoVersion 5)
, (RepoVersion 4, RepoVersion 5)
, (RepoVersion 6, RepoVersion 7)
]
versionField :: ConfigKey
versionField = annexConfig "version"
getVersion :: Annex (Maybe Version)
getVersion :: Annex (Maybe RepoVersion)
getVersion = annexVersion <$> Annex.getGitConfig
versionSupportsDirectMode :: Annex Bool
versionSupportsDirectMode = go <$> getVersion
where
go (Just "6") = False
go (Just v) | v >= RepoVersion 6 = False
go _ = True
versionSupportsUnlockedPointers :: Annex Bool
versionSupportsUnlockedPointers = go <$> getVersion
where
go (Just "6") = True
go (Just v) | v >= RepoVersion 6 = True
go _ = False
versionSupportsAdjustedBranch :: Annex Bool
@ -61,8 +66,8 @@ versionSupportsAdjustedBranch = versionSupportsUnlockedPointers
versionUsesKeysDatabase :: Annex Bool
versionUsesKeysDatabase = versionSupportsUnlockedPointers
setVersion :: Version -> Annex ()
setVersion = setConfig versionField
setVersion :: RepoVersion -> Annex ()
setVersion (RepoVersion v) = setConfig versionField (show v)
removeVersion :: Annex ()
removeVersion = unsetConfig versionField

View file

@ -1,5 +1,13 @@
git-annex (6.20181012) UNRELEASED; urgency=medium
git-annex (7.20181025) UNRELEASED; urgency=medium
* Added v7 repository mode. v6 upgrades automatically to v7, but
v5 is still the default for now. While v6 was always experimental
to some degree, its successor v7 is ready for production use!
* Direct mode repositories are deprecated; they have many problems
that v7 fixes, so upgrading them now is recommended (but not yet
required): git annex upgrade --version=7
* init: When in a crippled filesystem, initialize a v7 repository
using an adjusted unlocked branch, instead of a direct mode repository.
* At long last there's a way to hide annexed files whose content
is missing from the working tree: git-annex adjust --hide-missing
See https://git-annex.branchable.com/tips/hiding_missing_files/
@ -10,17 +18,34 @@ git-annex (6.20181012) UNRELEASED; urgency=medium
* git-annex sync --content supports --hide-missing; it can
be used to get the content of hidden files, and it updates the
adjusted branch to hide/unhide files as necessary.
* smudge: The smudge filter no longer provides git with annexed
file content, to avoid a git memory leak, and because that did not
honor annex.thin. Now git annex smudge --update has to be run
after a checkout to update unlocked files in the working tree
with annexed file contents.
* v7 init, upgrade: Install git post-checkout and post-merge hooks that run
git annex smudge --update.
* precommit: Run git annex smudge --update, because the post-merge
hook is not run when there is a merge conflict. So the work tree will
be updated when a commit is made to resolve the merge conflict.
* Note that git has no hooks run after git stash or git cherry-pick,
so the user will have to manually run git annex smudge --update
after such commands.
* Removed the old Android app.
* Removed support for building with very old ghc < 8.0.1,
and with yesod < 1.4.3, and without concurrent-output,
which were onyl being used for the Android cross build.
which were only being used for the Android cross build.
* Webapp: Fix termux detection.
* runshell: Use system locales when built with
GIT_ANNEX_PACKAGE_INSTALL set. (For Neurodebian packages.)
* v6: Fix database inconsistency that could cause git-annex to
* Fix database inconsistency that could cause git-annex to
get confused about whether a locked file's content was present.
* Fix concurrency bug that occurred on the first download from an
exporttree remote.
* init --version=6 will still work, but the repository is auto-upgraded
immediately to v7.
* When annex.thin is set, allow hard links to be made between executable
work tree files and annex objects.
-- Joey Hess <id@joeyh.name> Sat, 13 Oct 2018 00:52:02 -0400

View file

@ -10,7 +10,10 @@ module Command.Init where
import Command
import Annex.Init
import Annex.Version
import Types.RepoVersion
import qualified Annex.SpecialRemote
import qualified Data.Map as M
cmd :: Command
cmd = dontCheck repoExists $
@ -19,21 +22,25 @@ cmd = dontCheck repoExists $
data InitOptions = InitOptions
{ initDesc :: String
, initVersion :: Maybe Version
, initVersion :: Maybe RepoVersion
}
optParser :: CmdParamsDesc -> Parser InitOptions
optParser desc = InitOptions
<$> (unwords <$> cmdParams desc)
<*> optional (option (str >>= parseVersion)
<*> optional (option (str >>= parseRepoVersion)
( long "version" <> metavar paramValue
<> help "Override default annex.version"
))
parseVersion :: Monad m => String -> m Version
parseVersion v
| v `elem` supportedVersions = return v
| otherwise = fail $ v ++ " is not a currently supported repository version"
parseRepoVersion :: Monad m => String -> m RepoVersion
parseRepoVersion s = case RepoVersion <$> readish s of
Nothing -> fail $ "version parse error"
Just v
| v `elem` supportedVersions -> return v
| otherwise -> case M.lookup v autoUpgradeableVersions of
Just v' -> return v'
Nothing -> fail $ s ++ " is not a currently supported repository version"
seek :: InitOptions -> CommandSeek
seek = commandAction . start

View file

@ -13,6 +13,7 @@ import Command
import Config
import qualified Command.Add
import qualified Command.Fix
import qualified Command.Smudge
import Annex.Direct
import Annex.Hook
import Annex.Link
@ -54,11 +55,21 @@ seek ps = lockPreCommitHook $ ifM isDirect
flip withFilesToBeCommitted l $ \f -> commandAction $
maybe stop (Command.Fix.start Command.Fix.FixSymlinks f)
=<< isAnnexLink f
-- inject unlocked files into the annex
-- (not needed when repo version uses
-- unlocked pointer files)
unlessM versionSupportsUnlockedPointers $
withFilesOldUnlockedToBeCommitted (commandAction . startInjectUnlocked) l
ifM versionSupportsUnlockedPointers
-- after a merge conflict or git
-- cherry-pick or stash, pointer
-- files in the worktree won't
-- be populated, so populate them
-- here
( Command.Smudge.updateSmudged
-- When there's a false index,
-- restaging the files won't work.
. Restage =<< liftIO Git.haveFalseIndex
-- inject unlocked files into the annex
-- (not needed when repo version uses
-- unlocked pointer files)
, withFilesOldUnlockedToBeCommitted (commandAction . startInjectUnlocked) l
)
)
runAnnexHook preCommitAnnexHook
-- committing changes to a view updates metadata

View file

@ -8,12 +8,12 @@
module Command.Smudge where
import Command
import qualified Annex
import Annex.Content
import Annex.Link
import Annex.FileMatcher
import Annex.Ingest
import Annex.CatFile
import Logs.Smudge
import Logs.Location
import qualified Database.Keys
import qualified Git.BuildVersion
@ -29,43 +29,46 @@ cmd = noCommit $ noMessages $
"git smudge filter"
paramFile (seek <$$> optParser)
data SmudgeOptions = SmudgeOptions
data SmudgeOptions = UpdateOption | SmudgeOptions
{ smudgeFile :: FilePath
, cleanOption :: Bool
}
optParser :: CmdParamsDesc -> Parser SmudgeOptions
optParser desc = SmudgeOptions
<$> argument str ( metavar desc )
<*> switch ( long "clean" <> help "clean filter" )
optParser desc = smudgeoptions <|> updateoption
where
smudgeoptions = SmudgeOptions
<$> argument str ( metavar desc )
<*> switch ( long "clean" <> help "clean filter" )
updateoption = flag' UpdateOption
( long "update" <> help "populate annexed worktree files" )
seek :: SmudgeOptions -> CommandSeek
seek o = commandAction $
(if cleanOption o then clean else smudge) (smudgeFile o)
seek (SmudgeOptions f False) = commandAction (smudge f)
seek (SmudgeOptions f True) = commandAction (clean f)
seek UpdateOption = commandAction update
-- Smudge filter is fed git file content, and if it's a pointer to an
-- available annex object, should output its content.
-- available annex object, git expects it to output its content.
--
-- However, this does not do that. It outputs the pointer, and records
-- the filename in the smudge log. Git hooks run after commands like checkout
-- then run git annex smudge --update which populates the work tree files
-- with annex content. This is done for several reasons:
--
-- * To support annex.thin
-- * Because git currently buffers the whole object received from the
-- smudge filter in memory, which is a problem with large files.
smudge :: FilePath -> CommandStart
smudge file = do
b <- liftIO $ B.hGetContents stdin
case parseLinkOrPointer b of
Nothing -> liftIO $ B.putStr b
Nothing -> noop
Just k -> do
Database.Keys.addAssociatedFile k =<< inRepo (toTopFilePath file)
-- A previous unlocked checkout of the file may have
-- led to the annex object getting modified;
-- don't provide such modified content as it
-- will be confusing. inAnnex will detect such
-- modifications.
ifM (inAnnex k)
( do
content <- calcRepo (gitAnnexLocation k)
whenM (annexThin <$> Annex.getGitConfig) $
warning $ "Not able to honor annex.thin when git is checking out " ++ file ++ " (run git annex fix to re-thin files)"
liftIO $ B.putStr . fromMaybe b
=<< catchMaybeIO (B.readFile content)
, liftIO $ B.putStr b
)
topfile <- inRepo (toTopFilePath file)
Database.Keys.addAssociatedFile k topfile
void $ smudgeLog k topfile
liftIO $ B.putStr b
stop
-- Clean filter is fed file content on stdin, decides if a file
@ -92,19 +95,30 @@ clean file = do
if Git.BuildVersion.older "2.5"
then B.length b `seq` return ()
else liftIO $ hClose stdin
-- Look up the backend that was used for this file
-- before, so that when git re-cleans a file its
-- backend does not change.
let oldbackend = maybe Nothing (maybeLookupBackendVariety . keyVariety) oldkey
-- Can't restage associated files because git add
-- runs this and has the index locked.
let norestage = Restage False
liftIO . emitPointer
=<< postingest
=<< (\ld -> ingest' oldbackend ld Nothing norestage)
=<< lockDown cfg file
-- Optimization when the file is already annexed
-- and is unmodified.
case oldkey of
Nothing -> doingest oldkey
Just ko -> ifM (isUnmodifiedCheap ko file)
( liftIO $ emitPointer ko
, doingest oldkey
)
, liftIO $ B.hPut stdout b
)
doingest oldkey = do
-- Look up the backend that was used for this file
-- before, so that when git re-cleans a file its
-- backend does not change.
let oldbackend = maybe Nothing (maybeLookupBackendVariety . keyVariety) oldkey
-- Can't restage associated files because git add
-- runs this and has the index locked.
let norestage = Restage False
liftIO . emitPointer
=<< postingest
=<< (\ld -> ingest' oldbackend ld Nothing norestage)
=<< lockDown cfg file
postingest (Just k, _) = do
logStatus k InfoPresent
@ -151,3 +165,19 @@ getMoveRaceRecovery k file = void $ tryNonAsync $
populatePointerFile (Restage False) k obj file >>= \case
Nothing -> return ()
Just ic -> Database.Keys.addInodeCaches k [ic]
update :: CommandStart
update = do
updateSmudged (Restage True)
stop
updateSmudged :: Restage -> Annex ()
updateSmudged restage = streamSmudged $ \k topf -> do
f <- fromRepo $ fromTopFilePath topf
whenM (inAnnex k) $ do
obj <- calcRepo (gitAnnexLocation k)
unlessM (isJust <$> populatePointerFile restage k obj f) $
liftIO (isPointerFile f) >>= \case
Just k' | k' == k -> toplevelWarning False $
"unable to populate worktree file " ++ f
_ -> noop

View file

@ -12,6 +12,7 @@ import Annex.Version
import BuildInfo
import BuildFlags
import Types.Key
import Types.RepoVersion
import qualified Types.Backend as B
import qualified Types.Remote as R
import qualified Remote
@ -49,7 +50,7 @@ seekNoRepo o
showVersion :: Annex ()
showVersion = do
liftIO showPackageVersion
maybe noop (liftIO . vinfo "local repository version")
maybe noop (liftIO . vinfo "local repository version" . showRepoVersion)
=<< getVersion
showPackageVersion :: IO ()
@ -62,9 +63,14 @@ showPackageVersion = do
vinfo "remote types" $ unwords $ map R.typename Remote.remoteTypes
vinfo "operating system" $ unwords [os, arch]
vinfo "supported repository versions" $
unwords supportedVersions
verlist supportedVersions
vinfo "upgrade supported from repository versions" $
unwords upgradableVersions
verlist upgradableVersions
where
verlist = unwords . map showRepoVersion
showRepoVersion :: RepoVersion -> String
showRepoVersion = show . fromRepoVersion
showRawVersion :: IO ()
showRawVersion = do

View file

@ -2,26 +2,59 @@
-
- Copyright 2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
- Licensed under the GNU AGPL version 3 or higher.
-}
module Logs.File where
module Logs.File (writeLogFile, appendLogFile, streamLogFile) where
import Annex.Common
import Annex.Perms
import Annex.LockFile
import qualified Git
import Utility.Tmp
-- | Writes content to a file, replacing the file atomically, and
-- making the new file have whatever permissions the git repository is
-- configured to use. Creates the parent directory when necessary.
writeLogFile :: FilePath -> String -> Annex ()
writeLogFile f c = go `catchNonAsync` \_e -> do
-- Most of the time, the directory will exist, so this is only
-- done if writing the file fails.
createAnnexDirectory (parentDir f)
go
writeLogFile f c = createDirWhenNeeded f $ viaTmp writelog f c
where
go = viaTmp writelog f c
writelog f' c' = do
liftIO $ writeFile f' c'
setAnnexFilePerm f'
-- | Appends a line to a log file, first locking it to prevent
-- concurrent writers.
appendLogFile :: FilePath -> (Git.Repo -> FilePath) -> String -> Annex ()
appendLogFile f lck c = createDirWhenNeeded f $ withExclusiveLock lck $ do
liftIO $ withFile f AppendMode $ \h -> hPutStrLn h c
setAnnexFilePerm f
-- | Streams lines from a log file, and then empties the file at the end.
--
-- If the action is interrupted or throws an exception, the log file is
-- left unchanged.
--
-- Does nothing if the log file does not exist.
--
-- Locking is used to prevent writes to to the log file while this
-- is running.
streamLogFile :: FilePath -> (Git.Repo -> FilePath) -> (String -> Annex ()) -> Annex ()
streamLogFile f lck a = withExclusiveLock lck $ bracketOnError setup cleanup go
where
setup = liftIO $ tryWhenExists $ openFile f ReadMode
cleanup Nothing = noop
cleanup (Just h) = liftIO $ hClose h
go Nothing = noop
go (Just h) = do
mapM_ a =<< liftIO (lines <$> hGetContents h)
liftIO $ hClose h
liftIO $ writeFile f ""
setAnnexFilePerm f
createDirWhenNeeded :: FilePath -> Annex () -> Annex ()
createDirWhenNeeded f a = a `catchNonAsync` \_e -> do
-- Most of the time, the directory will exist, so this is only
-- done if writing the file fails.
createAnnexDirectory (parentDir f)
a

40
Logs/Smudge.hs Normal file
View file

@ -0,0 +1,40 @@
{- git-annex smudge log file
-
- Copyright 2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Logs.Smudge where
import Annex.Common
import Git.FilePath
import Logs.File
-- | Log a smudged file.
smudgeLog :: Key -> TopFilePath -> Annex ()
smudgeLog k f = do
logf <- fromRepo gitAnnexSmudgeLog
appendLogFile logf gitAnnexSmudgeLock $
key2file k ++ " " ++ getTopFilePath f
-- | Streams all smudged files, and then empties the log at the end.
--
-- If the action is interrupted or throws an exception, the log file is
-- left unchanged.
--
-- Locking is used to prevent new items being added to the log while this
-- is running.
streamSmudged :: (Key -> TopFilePath -> Annex ()) -> Annex ()
streamSmudged a = do
logf <- fromRepo gitAnnexSmudgeLog
streamLogFile logf gitAnnexSmudgeLock $ \l ->
case parse l of
Nothing -> noop
Just (k, f) -> a k f
where
parse l =
let (ks, f) = separate (== ' ') l
in do
k <- file2key ks
return (k, asTopFilePath f)

41
Test.hs
View file

@ -1,6 +1,6 @@
{- git-annex test suite
-
- Copyright 2010-2017 Joey Hess <id@joeyh.name>
- Copyright 2010-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -10,6 +10,7 @@
module Test where
import Types.Test
import Types.RepoVersion
import Test.Framework
import Options.Applicative.Types
@ -148,10 +149,10 @@ tests crippledfilesystem opts = testGroup "Tests" $ properties :
map (\(d, te) -> withTestMode te initTests (unitTests d)) testmodes
where
testmodes = catMaybes
[ Just ("v6 unlocked", (testMode opts "6") { unlockedFiles = True })
, unlesscrippled ("v5", testMode opts "5")
, unlesscrippled ("v6 locked", testMode opts "6")
, Just ("v5 direct", (testMode opts "5") { forceDirect = True })
[ Just ("v7 unlocked", (testMode opts (RepoVersion 7)) { unlockedFiles = True })
, unlesscrippled ("v5", testMode opts (RepoVersion 5))
, unlesscrippled ("v7 locked", testMode opts (RepoVersion 7))
, Just ("v5 direct", (testMode opts (RepoVersion 5)) { forceDirect = True })
]
unlesscrippled v
| crippledfilesystem = Nothing
@ -225,7 +226,7 @@ unitTests note = testGroup ("Unit Tests " ++ note)
, testCase "move (ssh remote)" test_move_ssh_remote
, testCase "copy" test_copy
, testCase "lock" test_lock
, testCase "lock (v6 --force)" test_lock_v6_force
, testCase "lock (v7 --force)" test_lock_v7_force
, testCase "edit (no pre-commit)" test_edit
, testCase "edit (pre-commit)" test_edit_precommit
, testCase "partial commit" test_partial_commit
@ -280,7 +281,7 @@ test_init = innewrepo $ do
ver <- annexVersion <$> getTestMode
if ver == Annex.Version.defaultVersion
then git_annex "init" [reponame] @? "init failed"
else git_annex "init" [reponame, "--version", ver] @? "init failed"
else git_annex "init" [reponame, "--version", show (fromRepoVersion ver)] @? "init failed"
setupTestMode
where
reponame = "test repo"
@ -601,11 +602,11 @@ test_lock = intmpclonerepoInDirect $ do
annexed_notpresent annexedfile
-- regression test: unlock of newly added, not committed file
-- should fail in v5 mode. In v6 mode, this is allowed.
-- should fail in v5 mode. In v7 mode, this is allowed.
writeFile "newfile" "foo"
git_annex "add" ["newfile"] @? "add new file failed"
ifM (annexeval Annex.Version.versionSupportsUnlockedPointers)
( git_annex "unlock" ["newfile"] @? "unlock failed on newly added, never committed file in v6 repository"
( git_annex "unlock" ["newfile"] @? "unlock failed on newly added, never committed file in v7 repository"
, not <$> git_annex "unlock" ["newfile"] @? "unlock failed to fail on newly added, never committed file in v5 repository"
)
@ -619,7 +620,7 @@ test_lock = intmpclonerepoInDirect $ do
writeFile annexedfile $ content annexedfile ++ "foo"
not <$> git_annex "lock" [annexedfile] @? "lock failed to fail without --force"
git_annex "lock" ["--force", annexedfile] @? "lock --force failed"
-- In v6 mode, the original content of the file is not always
-- In v7 mode, the original content of the file is not always
-- preserved after modification, so re-get it.
git_annex "get" [annexedfile] @? "get of file failed after lock --force"
annexed_present_locked annexedfile
@ -642,19 +643,19 @@ test_lock = intmpclonerepoInDirect $ do
-- Regression test: lock --force when work tree file
-- was modified lost the (unmodified) annex object.
-- (Only occurred when the keys database was out of sync.)
test_lock_v6_force :: Assertion
test_lock_v6_force = intmpclonerepoInDirect $ do
test_lock_v7_force :: Assertion
test_lock_v7_force = intmpclonerepoInDirect $ do
git_annex "upgrade" [] @? "upgrade failed"
whenM (annexeval Annex.Version.versionSupportsUnlockedPointers) $ do
git_annex "get" [annexedfile] @? "get of file failed"
git_annex "unlock" [annexedfile] @? "unlock failed in v6 mode"
git_annex "unlock" [annexedfile] @? "unlock failed in v7 mode"
annexeval $ do
Database.Keys.closeDb
dbdir <- Annex.fromRepo Annex.Locations.gitAnnexKeysDb
liftIO $ renameDirectory dbdir (dbdir ++ ".old")
writeFile annexedfile "test_lock_v6_force content"
not <$> git_annex "lock" [annexedfile] @? "lock of modified file failed to fail in v6 mode"
git_annex "lock" ["--force", annexedfile] @? "lock --force of modified file failed in v6 mode"
writeFile annexedfile "test_lock_v7_force content"
not <$> git_annex "lock" [annexedfile] @? "lock of modified file failed to fail in v7 mode"
git_annex "lock" ["--force", annexedfile] @? "lock --force of modified file failed in v7 mode"
annexed_present_locked annexedfile
test_edit :: Assertion
@ -693,7 +694,7 @@ test_partial_commit = intmpclonerepoInDirect $ do
changecontent annexedfile
ifM (annexeval Annex.Version.versionSupportsUnlockedPointers)
( boolSystem "git" [Param "commit", Param "-q", Param "-m", Param "test", File annexedfile]
@? "partial commit of unlocked file should be allowed in v6 repository"
@? "partial commit of unlocked file should be allowed in v7 repository"
, not <$> boolSystem "git" [Param "commit", Param "-q", Param "-m", Param "test", File annexedfile]
@? "partial commit of unlocked file not blocked by pre-commit hook"
)
@ -723,7 +724,7 @@ test_direct = intmpclonerepoInDirect $ do
git_annex "get" [annexedfile] @? "get of file failed"
annexed_present annexedfile
ifM (annexeval Annex.Version.versionSupportsUnlockedPointers)
( not <$> git_annex "direct" [] @? "switch to direct mode failed to fail in v6 repository"
( not <$> git_annex "direct" [] @? "switch to direct mode failed to fail in v7 repository"
, do
git_annex "direct" [] @? "switch to direct mode failed"
annexed_present annexedfile
@ -1111,7 +1112,7 @@ test_conflict_resolution_adjusted_branch = whenM (annexeval Annex.AdjustedBranch
writeFile conflictor "conflictor2"
add_annex conflictor @? "add conflicter failed"
git_annex "sync" [] @? "sync failed in r2"
-- need v6 to use adjust
-- need v7 to use adjust
git_annex "upgrade" [] @? "upgrade failed"
-- We might be in an adjusted branch
-- already, when eg on a crippled
@ -1405,7 +1406,7 @@ test_conflict_resolution_symlink_bit = unlessM (unlockedFiles <$> getTestMode) $
all (\i -> Git.Types.toTreeItemType (Git.LsTree.mode i) == Just Git.Types.TreeSymlink) l
@? (what ++ " " ++ f ++ " lost symlink bit after merge: " ++ show l)
{- A v6 unlocked file that conflicts with a locked file should be resolved
{- A v7 unlocked file that conflicts with a locked file should be resolved
- in favor of the unlocked file, with no variant files, as long as they
- both point to the same key. -}
test_mixed_lock_conflict_resolution :: Assertion

View file

@ -17,6 +17,7 @@ import Types.Test
import qualified Annex
import qualified Annex.UUID
import qualified Annex.Version
import qualified Types.RepoVersion
import qualified Backend
import qualified Git.CurrentRepo
import qualified Git.Construct
@ -198,7 +199,7 @@ clonerepo old new cfg = do
ver <- annexVersion <$> getTestMode
if ver == Annex.Version.defaultVersion
then git_annex "init" ["-q", new] @? "git annex init failed"
else git_annex "init" ["-q", new, "--version", ver] @? "git annex init failed"
else git_annex "init" ["-q", new, "--version", show (Types.RepoVersion.fromRepoVersion ver)] @? "git annex init failed"
unless (bareClone cfg) $
indir new $
setupTestMode
@ -387,11 +388,11 @@ add_annex f = ifM (unlockedFiles <$> getTestMode)
data TestMode = TestMode
{ forceDirect :: Bool
, unlockedFiles :: Bool
, annexVersion :: Annex.Version.Version
, annexVersion :: Types.RepoVersion.RepoVersion
, keepFailures :: Bool
} deriving (Read, Show)
testMode :: TestOptions -> Annex.Version.Version -> TestMode
testMode :: TestOptions -> Types.RepoVersion.RepoVersion -> TestMode
testMode opts v = TestMode
{ forceDirect = False
, unlockedFiles = False

View file

@ -30,6 +30,7 @@ import Types.Concurrency
import Types.NumCopies
import Types.Difference
import Types.RefSpec
import Types.RepoVersion
import Config.DynamicConfig
import Utility.HumanTime
import Utility.Gpg (GpgCmd, mkGpgCmd)
@ -52,7 +53,7 @@ data Configurable a
{- Main git-annex settings. Each setting corresponds to a git-config key
- such as annex.foo -}
data GitConfig = GitConfig
{ annexVersion :: Maybe String
{ annexVersion :: Maybe RepoVersion
, annexUUID :: UUID
, annexNumCopies :: Maybe NumCopies
, annexDiskReserve :: Integer
@ -110,7 +111,7 @@ data GitConfig = GitConfig
extractGitConfig :: Git.Repo -> GitConfig
extractGitConfig r = GitConfig
{ annexVersion = notempty $ getmaybe (annex "version")
{ annexVersion = RepoVersion <$> getmayberead (annex "version")
, annexUUID = maybe NoUUID toUUID $ getmaybe (annex "uuid")
, annexNumCopies = NumCopies <$> getmayberead (annex "numcopies")
, annexDiskReserve = fromMaybe onemegabyte $

11
Types/RepoVersion.hs Normal file
View file

@ -0,0 +1,11 @@
{- git-annex repository versioning
-
- Copyright 2010-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Types.RepoVersion where
newtype RepoVersion = RepoVersion { fromRepoVersion :: Int }
deriving (Eq, Ord, Read, Show)

View file

@ -11,6 +11,7 @@ module Upgrade where
import Annex.Common
import Annex.Version
import Types.RepoVersion
#ifndef mingw32_HOST_OS
import qualified Upgrade.V0
import qualified Upgrade.V1
@ -19,25 +20,33 @@ import qualified Upgrade.V2
import qualified Upgrade.V3
import qualified Upgrade.V4
import qualified Upgrade.V5
import qualified Upgrade.V6
checkUpgrade :: Version -> Annex ()
import qualified Data.Map as M
checkUpgrade :: RepoVersion -> Annex ()
checkUpgrade = maybe noop giveup <=< needsUpgrade
needsUpgrade :: Version -> Annex (Maybe String)
needsUpgrade :: RepoVersion -> Annex (Maybe String)
needsUpgrade v
| v `elem` supportedVersions = ok
| v `elem` autoUpgradeableVersions = ifM (upgrade True defaultVersion)
( ok
, err "Automatic upgrade failed!"
)
| v `elem` upgradableVersions = err "Upgrade this repository: git-annex upgrade"
| otherwise = err "Upgrade git-annex."
| otherwise = case M.lookup v autoUpgradeableVersions of
Nothing
| v `elem` upgradableVersions ->
err "Upgrade this repository: git-annex upgrade"
| otherwise ->
err "Upgrade git-annex."
Just newv -> ifM (upgrade True newv)
( ok
, err "Automatic upgrade failed!"
)
where
err msg = return $ Just $ "Repository version " ++ v ++
err msg = return $ Just $ "Repository version " ++
show (fromRepoVersion v) ++
" is not supported. " ++ msg
ok = return Nothing
upgrade :: Bool -> Version -> Annex Bool
upgrade :: Bool -> RepoVersion -> Annex Bool
upgrade automatic destversion = do
upgraded <- go =<< getVersion
when upgraded $
@ -46,14 +55,15 @@ upgrade automatic destversion = do
where
go (Just v) | v >= destversion = return True
#ifndef mingw32_HOST_OS
go (Just "0") = Upgrade.V0.upgrade
go (Just "1") = Upgrade.V1.upgrade
go (Just (RepoVersion 0)) = Upgrade.V0.upgrade
go (Just (RepoVersion 1)) = Upgrade.V1.upgrade
#else
go (Just "0") = giveup "upgrade from v0 on Windows not supported"
go (Just "1") = giveup "upgrade from v1 on Windows not supported"
go (Just (RepoVersion 0)) = giveup "upgrade from V0 on Windows not supported"
go (Just (RepoVersion 1)) = giveup "upgrade from V1 on Windows not supported"
#endif
go (Just "2") = Upgrade.V2.upgrade
go (Just "3") = Upgrade.V3.upgrade automatic
go (Just "4") = Upgrade.V4.upgrade automatic
go (Just "5") = Upgrade.V5.upgrade automatic
go (Just (RepoVersion 2)) = Upgrade.V2.upgrade
go (Just (RepoVersion 3)) = Upgrade.V3.upgrade automatic
go (Just (RepoVersion 4)) = Upgrade.V4.upgrade automatic
go (Just (RepoVersion 5)) = Upgrade.V5.upgrade automatic
go (Just (RepoVersion 6)) = Upgrade.V6.upgrade automatic
go _ = return True

21
Upgrade/V6.hs Normal file
View file

@ -0,0 +1,21 @@
{- git-annex v6 -> v7 upgrade support
-
- Copyright 2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Upgrade.V6 where
import Annex.Common
import Config
import Annex.Hook
upgrade :: Bool -> Annex Bool
upgrade automatic = do
unless automatic $
showAction "v6 to v7"
unlessM isBareRepo $ do
hookWrite postCheckoutHook
hookWrite postMergeHook
return True

View file

@ -22,7 +22,6 @@ import Yesod.Default.Util
import Language.Haskell.TH.Syntax (Q, Exp)
import Data.Default (def)
import Text.Hamlet hiding (Html)
import Data.Text (Text)
widgetFile :: String -> Q Exp
widgetFile = widgetFileNoReload $ def

View file

@ -77,3 +77,5 @@ file_%subdir%
### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)
Yep! I already use it to move files between my laptop's HDD and SSD, and to copy files between my many SD cards. I was trying this to see if I could not have to scroll as far on my 3D printer's menu.
> [[done]] see comments --[[Joey]]

View file

@ -0,0 +1,17 @@
[[!comment format=mdwn
username="joey"
subject="""comment 4"""
date="2018-10-26T16:53:47Z"
content="""
`git annex adjust --hide-missing` is now available to do what you want
re hiding missing files.
`git annex view` doesn't currently unlock files in a v6 repo, so it's not
usable on a crippled filesystem. That's why the cat in the transcript above
shows the symlink content which git writes to a regular file when in a
crippled filesystem.
I would like to eventually unify adjust with view, so `git annex adjust
--unlock` can be used with a view, which would support that.
See [[todo/unify_adjust_with_view]].
"""]]

View file

@ -0,0 +1,11 @@
[[!comment format=mdwn
username="joey"
subject="""comment 1"""
date="2018-10-26T17:04:09Z"
content="""
Have you ever seen this again or have any more information about how to
reproduce it?
This seems similar to the problem fixed by [[!commit a13c0ce66c6dd5d8cf5b09ee2fc5a58f43db4b14]]
but the version you were using already had that commit in it.
"""]]

View file

@ -377,3 +377,5 @@ total 12
lil' positive end note mode on:
git-annex is the only thing to which I trust my archive of most valuable documents and memories!
> [[done]]; see comments --[[Joey]]

View file

@ -5,6 +5,7 @@
subject="comment 4"
date="2018-10-18T23:34:26Z"
content="""
I am stupid talking about executable files hardlinking. I think I just chmod-ed already hardlinking files, that's how I got it. No surprise.
I am ok with this quirk (executable files are not thinned), but just curious: what exactly influenced such design decision?

View file

@ -0,0 +1,24 @@
[[!comment format=mdwn
username="joey"
subject="""comment 5"""
date="2018-10-26T17:17:35Z"
content="""
[[!commit b7c8bf5274a64389ac87d6ce0388b8708c261971]] is where that was
implemented. Interestingly, its commit message does say that the annex
object file is made executable when using annex.thin.
And indeed, git add of an executable file with annex.thin set does
make the object executable and hard link to it.
But that commit contains this line that avoids hard linking:
| maybe False isExecutable destmode = copy =<< getstat
Which is what I based my earlier comment on. But without that line,
AFAIK it will behave the way you want, with the annex object and
executable worktree file being hard linked. The code also removes the
execute bit if the annex object file later ends up getting hard linked
instead to a non-executable file.
So, based on this analysis, I'm going to remove that line. And improve the
annex.thin docs slightly, and I think that's sufficient to close this bug.
"""]]

View file

@ -11,9 +11,9 @@ understand how to update its working tree.
## deprecated
Direct mode is deprecated! Intead, git-annex v6 repositories can simply
Direct mode is deprecated! Intead, git-annex v7 repositories can simply
have files that are unlocked and thus can be directly accessed and
modified. See [[upgrades]] for details about the transition to v6
modified. See [[upgrades]] for details about the transition to v7
repositories.
## enabling (and disabling) direct mode

View file

@ -6,11 +6,13 @@ git-annex smudge - git filter driver for git-annex
git annex smudge [--clean] file
git annex smudge --update
# DESCRIPTION
This command lets git-annex be used as a git filter driver which lets
annexed files in the git repository to be unlocked at all times, instead
of being symlinks.
annexed files in the git repository to be unlocked, instead
of being symlinks, and lets `git add` store files in the annex.
When adding a file with `git add`, the annex.largefiles config is
consulted to decide if a given file should be added to git as-is,
@ -32,6 +34,16 @@ contents:
* filter=annex
.* !filter
The smudge filter does not provide git with the content of annexed files,
because that would be slow and triggers memory leaks in git. Instead,
it records which worktree files need to be updated, and
`git annex smudge --update` later updates the work tree to contain
the content. That is run by several git hooks, including post-checkout
and post-merge. However, a few git commands, notably `git stash` and
`git cherry-pick`, do not run any hooks, so after using those commands
you can manually run `git annex smudge --update` to update the working
tree.
# SEE ALSO
[[git-annex]](1)

View file

@ -1024,18 +1024,16 @@ Here are all the supported configuration settings.
* `annex.thin`
Set this to `true` to make unlocked files be a hard link to their content
in the annex, rather than a second copy. (Only when supported by the file
system, and only in repository version 6.) This can save considerable
in the annex, rather than a second copy. This can save considerable
disk space, but when a modification is made to a file, you will lose the
local (and possibly only) copy of the old version. So, enable with care.
After setting (or unsetting) this, you should run `git annex fix` to
fix up the annexed files in the work tree to be hard links (or copies).
Note that `annex.thin` is not honored when git updates an annexed file
in the working tree. So when `git checkout` or `git merge` updates the
working tree, a second copy of annexed files will result. You can run
`git-annex fix` to fix up the hard links after running such git commands.
Note that this has no effect when the filesystem does not support hard links.
And when multiple files in the work tree have the same content, only
one of them gets hard linked to the annex.
* `annex.delayadd`

View file

@ -8,10 +8,10 @@ but it needs some different workflows of using git-annex.
## getting started
To get started, your repository needs to be upgraded to v6, since the
To get started, your repository needs to be upgraded to v7, since the
feature does not work in v5 repositories.
git annex upgrade --version=6
git annex upgrade --version=7
The [[git-annex adjust|git-annex-adjust]] command sets up an adjusted form
of a git branch, in this case we'll ask it to hide missing files.
@ -124,7 +124,7 @@ I set up the repository like this:
git clone server:/path/to/podcasts
cd podcasts
git annex upgrade --version=6
git annex upgrade --version=7
git annex adjust --hide-missing
git annex group here client
git annex wanted here standard

View file

@ -15,7 +15,7 @@ by running `git annex unlock`.
# git annex unlock some_file
# echo "new content" > some_file
Back before git-annex version 6, and its v6 repository mode, unlocking a file
Back before git-annex version 7, and its v7 repository mode, unlocking a file
like this was a transient thing. You'd modify it and then `git annex add` the
modified version to the annex, and finally `git commit`. The new version of
the file was then back to being locked.
@ -29,31 +29,28 @@ to edit files repeatedly, without manually having to unlock them every time.
The [[direct_mode]] made all files be unlocked all the time, but it
had many problems of its own.
## enter v6 mode
## enter v7 mode
/!\ This is a new feature; see its [[todo_list|todo/smudge]]
for known issues.
This led to the v6 repository mode, which makes unlocked files remain
This led to the v7 repository mode, which makes unlocked files remain
unlocked after they're committed, so you can keep changing them and
committing the changes whenever you'd like. It also lets you use more
normal git commands (or even interfaces on top of git) for handling
annexed files.
To get a repository into v6 mode, you can [[upgrade|upgrades]] it.
To get a repository into v7 mode, you can [[upgrade|upgrades]] it.
This will eventually happen automatically, but for now it's a manual process
(be sure to read [[upgrades]] before doing this):
# git annex upgrade
Or, you can init a new repository in v6 mode.
Or, you can init a new repository in v7 mode.
# git init
# git annex init --version=6
# git annex init --version=7
## using it
Using a v6 repository is easy! Simply use regular git commands to add
Using a v7 repository is easy! Simply use regular git commands to add
and commit files. In a git-annex repository, git will use git-annex
to store the file contents, and the files will be left unlocked.
@ -97,7 +94,7 @@ mode is used. To make them always use unlocked mode, run:
## mixing locked and unlocked files
A v6 repository can contain both locked and unlocked files. You can switch
A v7 repository can contain both locked and unlocked files. You can switch
a file back and forth using the `git annex lock` and `git annex unlock`
commands. This changes what's stored in git between a git-annex symlink
(locked) and a git-annex pointer file (unlocked). To add a file to
@ -108,28 +105,34 @@ If you want to mostly keep files locked, but be able to locally switch
to having them all unlocked, you can do so using `git annex adjust
--unlock`. See [[git-annex-adjust]] for details. This is particularly
useful when using filesystems like FAT, and OS's like Windows that don't
support symlinks.
support symlinks. Indeed, `git-annex init` detects such filesystems and
automatically sets up a repository to use all unlocked files.
## index gotchas
## imperfections
When git-annex gets or drops the content of an unlocked file, it updates
the file in git's worktree accordingly. That makes `git status` show
the file as modified, even though there are no changes to commit.
So git-annex then updates the index file to reflect the change to the
worktree, and prevent the file from appearing to be modified.
Unlocked files in v7 repositories mostly work very well, but there are a
few imperfections which you should be aware of when using them.
This means that when git-annex is running a command that gets or drops the
content of an unlocked file, the index will sometimes be locked. This might
prevent you from `git commit` at the same time. Or, if you have a git
commit in progress, or are running multiple git-annex processes, git-annex
may complain that the index is locked.
1. `git stash`, `git cherry-pick` and `git reset --hard` don't update
the working tree with the content of unlocked files. The files
will contain pointers, the same as if the content was not in the
repository. So after running these commands, you will need to manually
run `git annex smudge --update`.
Also, interrupting git-annex (eg with ctrl-c) before it can update the
index will leave `git status` showing modifications.
2. When git-annex is running a command that gets or drops the content
of an unlocked file, git's index will briefly be locked, which might
prevent you from running a `git commit` at the same time.
To manually update the index when git-annex was not able to, you can run:
3. Conversely, if you have a git commit in progress, running git-annex may
complain that the index is locked, though this will not prevent it from
working.
git update-index -q --refresh $file
4. When an operation such as a checkout or merge needs to update a large
number of unlocked files, it can become slow. So can be `git add` of
a large number of files (`git annex add` is faster).
(The technical reasons behind these imperfections are explained in
detail in [[todo/git_smudge_clean_interface_suboptiomal]].)
## using less disk space
@ -168,15 +171,6 @@ match the new setting:
git annex fix
Unfortunately, git's smudge interface does not let git-annex honor
the annex.thin configuration when git is checking out a file.
So, using `git checkout` to check out a different branch, or even
`git merge` can result in some non-thin files making their way into the
working tree, and using more disk space. A warning will be printed out in
this situation. You can always run `git annex fix` to re-thin such files.
## annex.thin tradeoffs
[[!template id=note text="""
When a [[direct_mode]] repository is upgraded, annex.thin is automatically
set, because direct mode made the same single-copy tradeoff.

View file

@ -0,0 +1,21 @@
[[!comment format=mdwn
username="joey"
subject="""comment 3"""
date="2018-10-26T16:21:28Z"
content="""
While `git add` would be a lot slower when using this interface to add
large files, it would make `git checkout` and other commands that update
the work tree a lot faster.
Since the smudge filter is not providing git with the file content any more,
using filterdriver would avoid git running many git-annex smudge processes,
greatly speeding up large checkouts.
Unfortunately, `git annex smudge --update` ends up running the smudge filter
on all files that the clean filter earlier acted on, so even if filterdriver were
used to speed up the clean filter, there would still be one process spawned per
file for the smudge filter.
So some interface improvement is needed before git-annex can usefully use
this.
"""]]

View file

@ -1,82 +1,13 @@
git-annex should use smudge/clean filters. v6 mode
git-annex should use smudge/clean filters. v7 mode
### problems keeping v6 experimental
## warts
* Checking out a different branch causes git to smudge all changed files,
and write their content. This does not honor annex.thin. A warning
message is printed in this case.
This is particularly wasteful when checking out an adjusted unlocked
branch, which causes 2x the space to be used.
"git annex proxy" could be used to handle this.
Make it run the git command with smudge filter set to not output content
but only pointers, and then at the end populate the pointer files, hard
when appropriate. (As an optimization, the smudge filter could also be
made to use the long-running filter interface when run this way.)
git-annex adjust and git-annex sync could both use that internally
when checking out the adjusted branch, and merging a branch into HEAD.
Or: Make the smudge filter never provide the actual file content, but the
pointer. Install post-checkout and post-merge hooks that populate
the worktree files that were checked out. Of course, they will also
need to update the index.
Problem: post-merge hook is not run when there's a merge conflict.
Git does not actually run the smudge filter in this case;
the conflicting file becomes a text file containing a merge conflict
between the two annex pointers. When the user resolves the conflict
and git add's the result, git runs the smudge filter.
So, if the smudge filter then provides the pointer, the file would not be
populated. The post-commit hook would then need to populate the file,
once the merge got committed.
Problem: No hook seems to be run for git stash / git stash apply
or for git reset --hard or git cherry-pick.
Fatal or can we live with needing to run a
git-annex command to populate the files after those commands?
> implemented on the `delaysmudge` branch now
(My enhanced smudge/clean patch set also fixed this problem, in a much
nicer way...)
* Optionally: Use the filterdriver interface during checkout. Unfortunately that
interface is slower for cleaning during git add (see
[[todo/Long_Running_Filter_Process]]), but since the smudge filter is not
providing git with the file content any more, using filterdriver would
avoid git running many git-annex smudge processes, greatly speeding up large
checkouts. git add could be left slow, with git-annex add being the fast path,
until the filterdriver interface is improved. Or, make "git annex proxy"
use the filterdriver interface for checkout.
* When git runs the smudge filter, it buffers all its output in ram before
writing it to a file. So, checking out a branch with a large v6 unlocked files
can cause git to use a lot of memory.
This needs to be fixed in git, but my proposed interface in
<http://thread.gmane.org/gmane.comp.version-control.git/294425> would
avoid the problem for git checkout, since it would use the new interface
and not the smudge filter.
Last verified with git 2.18 in 2018.
Note that the long-running filter process interface has the same problem.
The annex.thin idea above could work around this problem.
> implemented on the `delaysmudge` branch now
## other warts
* There are several v6 bugs that are edge cases and
* There are several bugs that are edge cases and
need more info or analysis. None of these seem like blockers
to keep v6 experimental or to replacing direct mode with v6.
to keep v7 experimental or to replacing direct mode with v7.
- <http://git-annex.branchable.com/bugs/assistant_crashes_in_TransferScanner/>
- <http://git-annex.branchable.com/bugs/v6_appears_to_not_thin/>
- <http://git-annex.branchable.com/bugs/Metadata_views_in_v6_repo_upgraded_from_direct_mode_act_strangely/>
- <http://git-annex.branchable.com/bugs/git-annex-sync_sometimes_fails_in_submodule_in_V6_adjusted_branch/>
### long term todos
@ -86,14 +17,14 @@ git-annex should use smudge/clean filters. v6 mode
multiple files, and so should be faster.
See [[todo/Long_Running_Filter_Process]] .. it's not currently actually a
win but might be a good way to improve git to work better with v6.
win but might be a good way to improve git to work better with v7.
* Eventually (but not yet), make v6 the default for new repositories.
* Eventually (but not yet), make v7 the default for new repositories.
Note that the assistant forces repos into direct mode; that will need to
be changed then, and it should enable annex.thin instead.
* Later still, remove support for direct mode, and enable automatic
v5 to v6 upgrades.
v5 to v7 upgrades.
### historical notes
@ -395,7 +326,7 @@ just look at the repo content in the first place..
#### Upgrading
annex.version changes to 6
annex.version changes to 7
git config for filter.annex.smudge and filter.annex.clean is set up.

View file

@ -0,0 +1,7 @@
`git annex adjust` and `git annex view` (et all) both derive a branch from
the main branch and enter it. They have different capabilies. It would be
useful to be able to compose them. For example, to enter a view based on
metadata that also has all files unlocked.
There's also probably a fair amount of overlap in their implementations.
--[[Joey]]

View file

@ -46,11 +46,18 @@ the upgrade would need to be run in a copy of the repository.
The upgrade events, so far:
## v5 -> v6 (git-annex version 6.x)
## v6 -> v7 (git-annex version 7.x)
The upgrade from v5 to v6 is handled manually for now.
The upgrade from v5 to v7 is handled manually for now.
Run `git-annex upgrade` to perform the upgrade.
v6 repositories are automatically upgraded to v7.
The only difference between v6 and v7 is that some additional git hooks
were added in v7.
## v5 -> v6 (git-annex version 6.x)
A v6 git-annex repository can have some files locked while other files are
unlocked, and all git and git-annex commands can be used on both locked and
unlocked files. (Although for locked files to be accessible, the filesystem

View file

@ -1,5 +1,5 @@
Name: git-annex
Version: 6.20181011
Version: 7.20181011
Cabal-Version: >= 1.8
License: GPL-3
Maintainer: Joey Hess <id@joeyh.name>
@ -880,6 +880,7 @@ Executable git-annex
Logs.Schedule
Logs.SingleValue
Logs.SingleValue.Pure
Logs.Smudge
Logs.TimeStamp
Logs.Transfer
Logs.Transitions
@ -969,6 +970,7 @@ Executable git-annex
Types.NumCopies
Types.RefSpec
Types.Remote
Types.RepoVersion
Types.ScheduledActivity
Types.StandardGroups
Types.StoreRetrieve
@ -985,6 +987,7 @@ Executable git-annex
Upgrade.V3
Upgrade.V4
Upgrade.V5
Upgrade.V6
Utility.Aeson
Utility.Android
Utility.Applicative