import: Added --no-content option, which avoids downloading files from a special remote

Only supported by some special remotes: directory
I need to check the rest and they're currently missing methods until I do.

git-annex sync --no-content does not yet use this to do imports
This commit is contained in:
Joey Hess 2020-07-03 13:41:57 -04:00
parent a8099b9896
commit 85506a7015
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
12 changed files with 204 additions and 60 deletions

View file

@ -20,6 +20,7 @@ module Annex.Content (
getViaTmp, getViaTmp,
getViaTmpFromDisk, getViaTmpFromDisk,
checkDiskSpaceToGet, checkDiskSpaceToGet,
checkSecureHashes,
prepTmp, prepTmp,
withTmp, withTmp,
checkDiskSpace, checkDiskSpace,
@ -473,7 +474,7 @@ withTmp key action = do
- case. May also throw exceptions in some cases. - case. May also throw exceptions in some cases.
-} -}
moveAnnex :: Key -> FilePath -> Annex Bool moveAnnex :: Key -> FilePath -> Annex Bool
moveAnnex key src = ifM (checkSecureHashes key) moveAnnex key src = ifM (checkSecureHashes' key)
( do ( do
withObjectLoc key storeobject withObjectLoc key storeobject
return True return True
@ -496,22 +497,27 @@ moveAnnex key src = ifM (checkSecureHashes key)
dest' = fromRawFilePath dest dest' = fromRawFilePath dest
alreadyhave = liftIO $ removeFile src alreadyhave = liftIO $ removeFile src
checkSecureHashes :: Key -> Annex Bool checkSecureHashes :: Key -> Annex (Maybe String)
checkSecureHashes key checkSecureHashes key
| cryptographicallySecure (fromKey keyVariety key) = return True | cryptographicallySecure (fromKey keyVariety key) = return Nothing
| otherwise = ifM (annexSecureHashesOnly <$> Annex.getGitConfig) | otherwise = ifM (annexSecureHashesOnly <$> Annex.getGitConfig)
( do ( return $ Just $ "annex.securehashesonly blocked adding " ++ decodeBS (formatKeyVariety (fromKey keyVariety key)) ++ " key"
warning $ "annex.securehashesonly blocked adding " ++ decodeBS (formatKeyVariety (fromKey keyVariety key)) ++ " key to annex objects" , return Nothing
return False
, return True
) )
checkSecureHashes' :: Key -> Annex Bool
checkSecureHashes' key = checkSecureHashes key >>= \case
Nothing -> return True
Just msg -> do
warning $ msg ++ "to annex objects"
return False
data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop
{- Populates the annex object file by hard linking or copying a source {- Populates the annex object file by hard linking or copying a source
- file to it. -} - file to it. -}
linkToAnnex :: Key -> FilePath -> Maybe InodeCache -> Annex LinkAnnexResult linkToAnnex :: Key -> FilePath -> Maybe InodeCache -> Annex LinkAnnexResult
linkToAnnex key src srcic = ifM (checkSecureHashes key) linkToAnnex key src srcic = ifM (checkSecureHashes' key)
( do ( do
dest <- fromRawFilePath <$> calcRepo (gitAnnexLocation key) dest <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
modifyContent dest $ linkAnnex To key src srcic dest Nothing modifyContent dest $ linkAnnex To key src srcic dest Nothing

View file

@ -12,7 +12,7 @@ module Annex.Import (
ImportCommitConfig(..), ImportCommitConfig(..),
buildImportCommit, buildImportCommit,
buildImportTrees, buildImportTrees,
downloadImport, importKeys,
filterImportableContents, filterImportableContents,
makeImportMatcher, makeImportMatcher,
listImportableContents, listImportableContents,
@ -280,44 +280,57 @@ buildImportTrees basetree msubdir importable = History
topf = asTopFilePath $ topf = asTopFilePath $
maybe lf (\sd -> getTopFilePath sd P.</> lf) msubdir maybe lf (\sd -> getTopFilePath sd P.</> lf) msubdir
{- Downloads all new ContentIdentifiers. Supports concurrency when enabled. {- Downloads all new ContentIdentifiers, or when importcontent is False,
- - generates Keys without downloading.
- If any download fails, the whole thing fails with Nothing,
- but it will resume where it left off.
- -
- Generates either a Key or a git Sha, depending on annex.largefiles. - Generates either a Key or a git Sha, depending on annex.largefiles.
- Note that, when a ContentIdentifiers has been imported before, - But when importcontent is False, it cannot match on annex.largefiles
- annex.largefiles is not reapplied, so will result in how ever that - (or generate a git Sha), so always generates Keys.
- content was stored in the repo before. -
- Supports concurrency when enabled.
-
- If it fails on any file, the whole thing fails with Nothing,
- but it will resume where it left off.
-
- Note that, when a ContentIdentifier has been imported before,
- generates the same thing that was imported before, so annex.largefiles
- is not reapplied.
-} -}
downloadImport :: Remote -> ImportTreeConfig -> ImportableContents (ContentIdentifier, ByteSize) -> Annex (Maybe (ImportableContents (Either Sha Key))) importKeys
downloadImport remote importtreeconfig importablecontents = do :: Remote
-> ImportTreeConfig
-> Bool
-> ImportableContents (ContentIdentifier, ByteSize)
-> Annex (Maybe (ImportableContents (Either Sha Key)))
importKeys remote importtreeconfig importcontent importablecontents = do
when (not importcontent && isNothing (Remote.importKey ia)) $
giveup "This remote does not support importing without downloading content."
-- This map is used to remember content identifiers that -- This map is used to remember content identifiers that
-- were just downloaded, before they have necessarily been -- were just imported, before they have necessarily been
-- stored in the database. This way, if the same content -- stored in the database. This way, if the same content
-- identifier appears multiple times in the -- identifier appears multiple times in the
-- importablecontents (eg when it has a history), -- importablecontents (eg when it has a history),
-- they will only be downloaded once. -- they will only be imported once.
cidmap <- liftIO $ newTVarIO M.empty cidmap <- liftIO $ newTVarIO M.empty
-- When concurrency is enabled, this set is needed to -- When concurrency is enabled, this set is needed to
-- avoid two threads both downloading the same content identifier. -- avoid two threads both importing the same content identifier.
downloading <- liftIO $ newTVarIO S.empty importing <- liftIO $ newTVarIO S.empty
withExclusiveLock gitAnnexContentIdentifierLock $ withExclusiveLock gitAnnexContentIdentifierLock $
bracket CIDDb.openDb CIDDb.closeDb $ \db -> do bracket CIDDb.openDb CIDDb.closeDb $ \db -> do
CIDDb.needsUpdateFromLog db CIDDb.needsUpdateFromLog db
>>= maybe noop (CIDDb.updateFromLog db) >>= maybe noop (CIDDb.updateFromLog db)
go False cidmap downloading importablecontents db go False cidmap importing importablecontents db
where where
go oldversion cidmap downloading (ImportableContents l h) db = do go oldversion cidmap importing (ImportableContents l h) db = do
largematcher <- largeFilesMatcher largematcher <- largeFilesMatcher
jobs <- forM l $ \i -> jobs <- forM l $ \i ->
startdownload cidmap downloading db i oldversion largematcher startimport cidmap importing db i oldversion largematcher
l' <- liftIO $ forM jobs $ l' <- liftIO $ forM jobs $
either pure (atomically . takeTMVar) either pure (atomically . takeTMVar)
if any isNothing l' if any isNothing l'
then return Nothing then return Nothing
else do else do
h' <- mapM (\ic -> go True cidmap downloading ic db) h h' <- mapM (\ic -> go True cidmap importing ic db) h
if any isNothing h' if any isNothing h'
then return Nothing then return Nothing
else return $ Just $ else return $ Just $
@ -325,17 +338,17 @@ downloadImport remote importtreeconfig importablecontents = do
(catMaybes l') (catMaybes l')
(catMaybes h') (catMaybes h')
waitstart downloading cid = liftIO $ atomically $ do waitstart importing cid = liftIO $ atomically $ do
s <- readTVar downloading s <- readTVar importing
if S.member cid s if S.member cid s
then retry then retry
else writeTVar downloading $ S.insert cid s else writeTVar importing $ S.insert cid s
signaldone downloading cid = liftIO $ atomically $ do signaldone importing cid = liftIO $ atomically $ do
s <- readTVar downloading s <- readTVar importing
writeTVar downloading $ S.delete cid s writeTVar importing $ S.delete cid s
startdownload cidmap downloading db i@(loc, (cid, _sz)) oldversion largematcher = getcidkey cidmap db cid >>= \case startimport cidmap importing db i@(loc, (cid, _sz)) oldversion largematcher = getcidkey cidmap db cid >>= \case
(k:ks) -> (k:ks) ->
-- If the same content was imported before -- If the same content was imported before
-- yeilding multiple different keys, it's not clear -- yeilding multiple different keys, it's not clear
@ -350,10 +363,10 @@ downloadImport remote importtreeconfig importablecontents = do
[] -> do [] -> do
job <- liftIO $ newEmptyTMVarIO job <- liftIO $ newEmptyTMVarIO
let ai = ActionItemOther (Just (fromRawFilePath (fromImportLocation loc))) let ai = ActionItemOther (Just (fromRawFilePath (fromImportLocation loc)))
let downloadaction = starting ("import " ++ Remote.name remote) ai $ do let importaction = starting ("import " ++ Remote.name remote) ai $ do
when oldversion $ when oldversion $
showNote "old version" showNote "old version"
tryNonAsync (download cidmap db i largematcher) >>= \case tryNonAsync (importordownload cidmap db i largematcher) >>= \case
Left e -> next $ do Left e -> next $ do
warning (show e) warning (show e)
liftIO $ atomically $ liftIO $ atomically $
@ -364,12 +377,36 @@ downloadImport remote importtreeconfig importablecontents = do
putTMVar job r putTMVar job r
return True return True
commandAction $ bracket_ commandAction $ bracket_
(waitstart downloading cid) (waitstart importing cid)
(signaldone downloading cid) (signaldone importing cid)
downloadaction importaction
return (Right job) return (Right job)
download cidmap db (loc, (cid, sz)) largematcher = do importordownload
| not importcontent = doimport
| otherwise = dodownload
doimport cidmap db (loc, (cid, sz)) _largematcher =
case Remote.importKey ia of
Nothing -> error "internal" -- checked earlier
Just a -> do
let importer p = do
k <- a loc cid sz p
checkSecureHashes k >>= \case
Nothing -> do
recordcidkey cidmap db cid k
logChange k (Remote.uuid remote) InfoPresent
return (Right k)
Just msg -> giveup (msg ++ " to import")
let runimport p = tryNonAsync (importer p) >>= \case
Right k -> return $ Just (loc, k)
Left e -> do
warning (show e)
return Nothing
metered Nothing sz $
const runimport
dodownload cidmap db (loc, (cid, sz)) largematcher = do
let downloader tmpfile p = do let downloader tmpfile p = do
k <- Remote.retrieveExportWithContentIdentifier ia loc cid tmpfile (mkkey loc tmpfile largematcher) p k <- Remote.retrieveExportWithContentIdentifier ia loc cid tmpfile (mkkey loc tmpfile largematcher) p
case keyGitSha k of case keyGitSha k of
@ -394,9 +431,10 @@ downloadImport remote importtreeconfig importablecontents = do
metered Nothing tmpkey $ metered Nothing tmpkey $
const (rundownload tmpfile) const (rundownload tmpfile)
where where
ia = Remote.importActions remote
tmpkey = importKey cid sz tmpkey = importKey cid sz
ia = Remote.importActions remote
mkkey loc tmpfile largematcher = do mkkey loc tmpfile largematcher = do
f <- fromRepo $ fromTopFilePath $ locworktreefilename loc f <- fromRepo $ fromTopFilePath $ locworktreefilename loc
matcher <- largematcher (fromRawFilePath f) matcher <- largematcher (fromRawFilePath f)

View file

@ -1,6 +1,10 @@
git-annex (8.20200618) UNRELEASED; urgency=medium git-annex (8.20200618) UNRELEASED; urgency=medium
* import: Added --no-content option, which avoids downloading files
from a special remote. Only supported by some special remotes:
directory
* Honor annex.largefiles when importing a tree from a special remote. * Honor annex.largefiles when importing a tree from a special remote.
(Except for when --no-content is used.)
* Fix a deadlock that could occur after git-annex got an unlocked * Fix a deadlock that could occur after git-annex got an unlocked
file, causing the command to hang indefinitely. Known to happen on file, causing the command to hang indefinitely. Known to happen on
vfat filesystems, possibly others. vfat filesystems, possibly others.
@ -15,7 +19,7 @@ git-annex (8.20200618) UNRELEASED; urgency=medium
itempubminute, itempubsecond. itempubminute, itempubsecond.
* Made several special remotes support locking content on them, * Made several special remotes support locking content on them,
which allows dropping from other special remotes in some situations which allows dropping from other special remotes in some situations
where it was not possible before. where it was not possible before. Supported special remotes:
S3 (with versioning=yes), git-lfs, tahoe S3 (with versioning=yes), git-lfs, tahoe
* Fix reversion that broke passing annex.* and remote.*.annex-* * Fix reversion that broke passing annex.* and remote.*.annex-*
git configs with -c. (Since version 8.20200330.) git configs with -c. (Since version 8.20200330.)

View file

@ -1,6 +1,6 @@
{- git-annex command {- git-annex command
- -
- Copyright 2012-2019 Joey Hess <id@joeyh.name> - Copyright 2012-2020 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -17,6 +17,7 @@ import qualified Command.Reinject
import qualified Types.Remote as Remote import qualified Types.Remote as Remote
import qualified Git.Ref import qualified Git.Ref
import Utility.CopyFile import Utility.CopyFile
import Utility.OptParse
import Backend import Backend
import Types.KeySource import Types.KeySource
import Annex.CheckIgnore import Annex.CheckIgnore
@ -53,12 +54,16 @@ data ImportOptions
{ importFromRemote :: DeferredParse Remote { importFromRemote :: DeferredParse Remote
, importToBranch :: Branch , importToBranch :: Branch
, importToSubDir :: Maybe FilePath , importToSubDir :: Maybe FilePath
, importContent :: Bool
} }
optParser :: CmdParamsDesc -> Parser ImportOptions optParser :: CmdParamsDesc -> Parser ImportOptions
optParser desc = do optParser desc = do
ps <- cmdParams desc ps <- cmdParams desc
mfromremote <- optional $ parseRemoteOption <$> parseFromOption mfromremote <- optional $ parseRemoteOption <$> parseFromOption
content <- invertableSwitch "content" True
( help "do not get contents of imported files"
)
dupmode <- fromMaybe Default <$> optional duplicateModeParser dupmode <- fromMaybe Default <$> optional duplicateModeParser
return $ case mfromremote of return $ case mfromremote of
Nothing -> LocalImportOptions ps dupmode Nothing -> LocalImportOptions ps dupmode
@ -68,6 +73,7 @@ optParser desc = do
in RemoteImportOptions r in RemoteImportOptions r
(Ref (encodeBS' branch)) (Ref (encodeBS' branch))
(if null subdir then Nothing else Just subdir) (if null subdir then Nothing else Just subdir)
content
_ -> giveup "expected BRANCH[:SUBDIR]" _ -> giveup "expected BRANCH[:SUBDIR]"
data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates | ReinjectDuplicates data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates | ReinjectDuplicates
@ -114,7 +120,7 @@ seek o@(RemoteImportOptions {}) = startConcurrency commandStages $ do
(pure Nothing) (pure Nothing)
(Just <$$> inRepo . toTopFilePath . toRawFilePath) (Just <$$> inRepo . toTopFilePath . toRawFilePath)
(importToSubDir o) (importToSubDir o)
seekRemote r (importToBranch o) subdir seekRemote r (importToBranch o) subdir (importContent o)
startLocal :: AddUnlockedMatcher -> GetFileMatcher -> DuplicateMode -> (FilePath, FilePath) -> CommandStart startLocal :: AddUnlockedMatcher -> GetFileMatcher -> DuplicateMode -> (FilePath, FilePath) -> CommandStart
startLocal addunlockedmatcher largematcher mode (srcfile, destfile) = startLocal addunlockedmatcher largematcher mode (srcfile, destfile) =
@ -258,8 +264,8 @@ verifyExisting key destfile (yes, no) = do
verifyEnoughCopiesToDrop [] key Nothing need [] preverified tocheck verifyEnoughCopiesToDrop [] key Nothing need [] preverified tocheck
(const yes) no (const yes) no
seekRemote :: Remote -> Branch -> Maybe TopFilePath -> CommandSeek seekRemote :: Remote -> Branch -> Maybe TopFilePath -> Bool -> CommandSeek
seekRemote remote branch msubdir = do seekRemote remote branch msubdir importcontent = do
importtreeconfig <- case msubdir of importtreeconfig <- case msubdir of
Nothing -> return ImportTree Nothing -> return ImportTree
Just subdir -> Just subdir ->
@ -279,7 +285,7 @@ seekRemote remote branch msubdir = do
void $ includeCommandAction (listContents remote importabletvar) void $ includeCommandAction (listContents remote importabletvar)
liftIO (atomically (readTVar importabletvar)) >>= \case liftIO (atomically (readTVar importabletvar)) >>= \case
Nothing -> return () Nothing -> return ()
Just importable -> downloadImport remote importtreeconfig importable >>= \case Just importable -> importKeys remote importtreeconfig importcontent importable >>= \case
Nothing -> warning $ concat Nothing -> warning $ concat
[ "Failed to import some files from " [ "Failed to import some files from "
, Remote.name remote , Remote.name remote

View file

@ -455,7 +455,7 @@ importRemote o mergeconfig remote currbranch
let subdir = if S.null p let subdir = if S.null p
then Nothing then Nothing
else Just (asTopFilePath p) else Just (asTopFilePath p)
Command.Import.seekRemote remote branch subdir Command.Import.seekRemote remote branch subdir True
void $ mergeRemote remote currbranch mergeconfig o void $ mergeRemote remote currbranch mergeconfig o
where where
wantpull = remoteAnnexPull (Remote.gitconfig remote) wantpull = remoteAnnexPull (Remote.gitconfig remote)

View file

@ -32,6 +32,8 @@ import Types.Import
import qualified Remote.Directory.LegacyChunked as Legacy import qualified Remote.Directory.LegacyChunked as Legacy
import Annex.Content import Annex.Content
import Annex.UUID import Annex.UUID
import Backend
import Types.KeySource
import Utility.Metered import Utility.Metered
import Utility.Tmp import Utility.Tmp
import Utility.InodeCache import Utility.InodeCache
@ -88,6 +90,7 @@ gen r u rc gc rs = do
} }
, importActions = ImportActions , importActions = ImportActions
{ listImportableContents = listImportableContentsM dir { listImportableContents = listImportableContentsM dir
, importKey = Just (importKeyM dir)
, retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM dir , retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM dir
, storeExportWithContentIdentifier = storeExportWithContentIdentifierM dir , storeExportWithContentIdentifier = storeExportWithContentIdentifierM dir
, removeExportWithContentIdentifier = removeExportWithContentIdentifierM dir , removeExportWithContentIdentifier = removeExportWithContentIdentifierM dir
@ -342,6 +345,26 @@ mkContentIdentifier f st =
fmap (ContentIdentifier . encodeBS . showInodeCache) fmap (ContentIdentifier . encodeBS . showInodeCache)
<$> toInodeCache noTSDelta f st <$> toInodeCache noTSDelta f st
guardSameContentIdentifiers :: a -> ContentIdentifier -> Maybe ContentIdentifier -> a
guardSameContentIdentifiers cont old new
| new == Just old = cont
| otherwise = giveup "file content has changed"
importKeyM :: FilePath -> ExportLocation -> ContentIdentifier -> ByteSize -> MeterUpdate -> Annex Key
importKeyM dir loc cid sz p = do
backend <- chooseBackend (fromRawFilePath f)
k <- fst <$> genKey ks p backend
currcid <- liftIO $ mkContentIdentifier absf =<< getFileStatus absf
guardSameContentIdentifiers (return k) cid currcid
where
f = fromExportLocation loc
absf = dir </> fromRawFilePath f
ks = KeySource
{ keyFilename = f
, contentLocation = toRawFilePath absf
, inodeCache = Nothing
}
retrieveExportWithContentIdentifierM :: FilePath -> ExportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key retrieveExportWithContentIdentifierM :: FilePath -> ExportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key
retrieveExportWithContentIdentifierM dir loc cid dest mkkey p = retrieveExportWithContentIdentifierM dir loc cid dest mkkey p =
precheck $ docopy postcheck precheck $ docopy postcheck
@ -376,7 +399,7 @@ retrieveExportWithContentIdentifierM dir loc cid dest mkkey p =
-- Check before copy, to avoid expensive copy of wrong file -- Check before copy, to avoid expensive copy of wrong file
-- content. -- content.
precheck cont = comparecid cont precheck cont = guardSameContentIdentifiers cont cid
=<< liftIO . mkContentIdentifier f =<< liftIO . mkContentIdentifier f
=<< liftIO (getFileStatus f) =<< liftIO (getFileStatus f)
@ -404,11 +427,7 @@ retrieveExportWithContentIdentifierM dir loc cid dest mkkey p =
#else #else
=<< getFileStatus f =<< getFileStatus f
#endif #endif
comparecid cont currcid guardSameContentIdentifiers cont cid currcid
comparecid cont currcid
| currcid == Just cid = cont
| otherwise = giveup "file content has changed"
storeExportWithContentIdentifierM :: FilePath -> FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex ContentIdentifier storeExportWithContentIdentifierM :: FilePath -> FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex ContentIdentifier
storeExportWithContentIdentifierM dir src _k loc overwritablecids p = do storeExportWithContentIdentifierM dir src _k loc overwritablecids p = do

View file

@ -56,6 +56,7 @@ instance HasImportUnsupported (ParsedRemoteConfig -> RemoteGitConfig -> Annex Bo
instance HasImportUnsupported (ImportActions Annex) where instance HasImportUnsupported (ImportActions Annex) where
importUnsupported = ImportActions importUnsupported = ImportActions
{ listImportableContents = return Nothing { listImportableContents = return Nothing
, importKey = Nothing
, retrieveExportWithContentIdentifier = nope , retrieveExportWithContentIdentifier = nope
, storeExportWithContentIdentifier = nope , storeExportWithContentIdentifier = nope
, removeExportWithContentIdentifier = nope , removeExportWithContentIdentifier = nope

View file

@ -276,6 +276,18 @@ data ImportActions a = ImportActions
-- May also find old versions of files that are still stored in the -- May also find old versions of files that are still stored in the
-- remote. -- remote.
{ listImportableContents :: a (Maybe (ImportableContents (ContentIdentifier, ByteSize))) { listImportableContents :: a (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
-- Imports a file from the remote, without downloading it,
-- by generating a Key (of any type).
--
-- May update the progress meter if it needs to perform an
-- expensive operation, such as hashing a local file.
--
-- Ensures that the key corresponds to the ContentIdentifier,
-- bearing in mind that the file on the remote may have changed
-- since the ContentIdentifier was generated.
--
-- Throws exception on failure.
, importKey :: Maybe (ExportLocation -> ContentIdentifier -> ByteSize -> MeterUpdate -> a Key)
-- Retrieves a file from the remote. Ensures that the file -- Retrieves a file from the remote. Ensures that the file
-- it retrieves has the requested ContentIdentifier. -- it retrieves has the requested ContentIdentifier.
-- --

View file

@ -14,11 +14,11 @@ or from a directory.
# IMPORTING FROM A SPECIAL REMOTE # IMPORTING FROM A SPECIAL REMOTE
Importing from a special remote first downloads all new content from it, Importing from a special remote first downloads or hashes all new content
and then constructs a git commit that reflects files that have changed on from it, and then constructs a git commit that reflects files that have
the special remote since the last time git-annex looked at it. Merging that changed on the special remote since the last time git-annex looked at it.
commit into your repository will update it to reflect changes made on the Merging that commit into your repository will update it to reflect changes
special remote. made on the special remote.
This way, something can be using the special remote for file storage, This way, something can be using the special remote for file storage,
adding files, modifying files, and deleting files, and you can track those adding files, modifying files, and deleting files, and you can track those
@ -84,6 +84,38 @@ set. This includes expressions containing "copies=", "metadata=", and other
things that depend on the key. Preferred content expressions containing things that depend on the key. Preferred content expressions containing
"include=", "exclude=" "smallerthan=", "largerthan=" will work. "include=", "exclude=" "smallerthan=", "largerthan=" will work.
# OPTIONS FOR IMPORTING FROM A SPECIAL REMOTE
* `--content`, `--no-content`
Controls whether content is downloaded from the special remote.
The default is to download content into the git-annex repository.
With --no-content, git-annex keys are generated from information
provided by the special remote, without downloading it. Commands like
`git-annex get` can later be used to download files, as desired.
The --no-content option is not supported by all special remotes,
and the kind of git-annex key that is generated is left up to
each special remote. So while the directory special remote hashes
the file and generates the same key it usually would, other
special remotes may use unusual keys like SHA1, or WORM, depending
on the limitations of the special remote.
The annex.securehashesonly configuration, if set, will prevent
--no-content importing from a special remote that uses insecure keys.
Using --no-content prevents annex.largefiles from being checked,
because the files are not downloaded. So, when using --no-content,
files that would usually be considered non-large will be added to the
annex, rather than adding them directly to the git repository.
Note that a different git tree will often be generated when using
--no-content than would be generated when using --content, because
the options cause different kinds of keys to be used when importing
new/changed files. So mixing uses of --content and --no-content can
lead to merge conflicts in some situations.
# IMPORTING FROM A DIRECTORY # IMPORTING FROM A DIRECTORY
When run with a path, `git annex import` moves files from somewhere outside When run with a path, `git annex import` moves files from somewhere outside

View file

@ -856,8 +856,10 @@ Like other git commands, git-annex is configured via `.git/config`.
When this is set, the contents of files using cryptographically When this is set, the contents of files using cryptographically
insecure hashes will not be allowed to be added to the repository. insecure hashes will not be allowed to be added to the repository.
Also, git-annex fsck` will complain about any files present in Also, `git-annex fsck` will complain about any files present in
the repository that use insecure hashes. the repository that use insecure hashes. And,
`git-annex import --no-content` will refuse to import files
from special remotes using insecure hashes.
To configure the behavior in new clones of the repository, To configure the behavior in new clones of the repository,
this can be set using [[git-annex-config]]. this can be set using [[git-annex-config]].

View file

@ -0,0 +1,12 @@
[[!comment format=mdwn
username="joey"
subject="""comment 7"""
date="2020-07-03T16:27:10Z"
content="""
Hmm, --fast is not very descriptive for this when it's used with a
directory special remote, because hashing is almost as slow as copying.
Probably better to use --no-content and --content, same as sync.
(Though unfortunately with an opposite default though iirc there are plans
somewhere to transition sync to default to --content).
"""]]

View file

@ -0,0 +1,12 @@
[[!comment format=mdwn
username="joey"
subject="""comment 8"""
date="2020-07-03T17:39:19Z"
content="""
Note that, since exporttree remotes are always untrusted, after importing
--no-content from one, fsck is going to complain about it being the only
location with the content.
Which seems right.. That content could be overwritten at any time and the
only copy lost. But still worth keeping in mind.
"""]]