import --no-content: Check annex.largefiles
Import small files into git, the same as is done when importing with content. Which means, for small files, --no-content does download them. If the largefiles expression needs the file content available (due to mimetype or mimeencoding being used), the import will fail. This commit was sponsored by Jake Vosloo on Patreon.
This commit is contained in:
parent
8b74f01a26
commit
15c1ee16d9
6 changed files with 106 additions and 53 deletions
119
Annex/Import.hs
119
Annex/Import.hs
|
@ -48,7 +48,7 @@ import Logs.Location
|
||||||
import Logs.PreferredContent
|
import Logs.PreferredContent
|
||||||
import Types.FileMatcher
|
import Types.FileMatcher
|
||||||
import Annex.FileMatcher
|
import Annex.FileMatcher
|
||||||
import Utility.Matcher (isEmpty)
|
import qualified Utility.Matcher
|
||||||
import qualified Database.Export as Export
|
import qualified Database.Export as Export
|
||||||
import qualified Database.ContentIdentifier as CIDDb
|
import qualified Database.ContentIdentifier as CIDDb
|
||||||
import qualified Logs.ContentIdentifier as CIDLog
|
import qualified Logs.ContentIdentifier as CIDLog
|
||||||
|
@ -388,29 +388,70 @@ importKeys remote importtreeconfig importcontent importablecontents = do
|
||||||
| not importcontent = doimport
|
| not importcontent = doimport
|
||||||
| otherwise = dodownload
|
| otherwise = dodownload
|
||||||
|
|
||||||
doimport cidmap db (loc, (cid, sz)) _largematcher =
|
doimport cidmap db (loc, (cid, sz)) largematcher =
|
||||||
case Remote.importKey ia of
|
case Remote.importKey ia of
|
||||||
Nothing -> error "internal" -- checked earlier
|
Nothing -> error "internal" -- checked earlier
|
||||||
Just a -> do
|
Just importkey -> do
|
||||||
let importer p = do
|
f <- locworktreefile loc
|
||||||
unsizedk <- a loc cid p
|
matcher <- largematcher (fromRawFilePath f)
|
||||||
-- This avoids every remote needing
|
when (Utility.Matcher.introspect matchNeedsFileContent matcher) $
|
||||||
-- to add the size.
|
giveup "annex.largefiles configuration examines file contents, so cannot import without content."
|
||||||
let k = alterKey unsizedk $ \kd -> kd
|
let mi = MatchingInfo ProvidedInfo
|
||||||
{ keySize = keySize kd <|> Just sz }
|
{ providedFilePath = f
|
||||||
checkSecureHashes k >>= \case
|
, providedKey = Nothing
|
||||||
Nothing -> do
|
, providedFileSize = sz
|
||||||
recordcidkey cidmap db cid k
|
, providedMimeType = Nothing
|
||||||
logChange k (Remote.uuid remote) InfoPresent
|
, providedMimeEncoding = Nothing
|
||||||
return (Right k)
|
}
|
||||||
Just msg -> giveup (msg ++ " to import")
|
islargefile <- checkMatcher' matcher mi mempty
|
||||||
let runimport p = tryNonAsync (importer p) >>= \case
|
metered Nothing sz $ const $ if islargefile
|
||||||
Right k -> return $ Just (loc, k)
|
then doimportlarge importkey cidmap db loc cid sz
|
||||||
|
else doimportsmall cidmap db loc cid sz
|
||||||
|
|
||||||
|
doimportlarge importkey cidmap db loc cid sz p =
|
||||||
|
tryNonAsync importer >>= \case
|
||||||
|
Right k -> return $ Just (loc, k)
|
||||||
|
Left e -> do
|
||||||
|
warning (show e)
|
||||||
|
return Nothing
|
||||||
|
where
|
||||||
|
importer = do
|
||||||
|
unsizedk <- importkey loc cid p
|
||||||
|
-- This avoids every remote needing
|
||||||
|
-- to add the size.
|
||||||
|
let k = alterKey unsizedk $ \kd -> kd
|
||||||
|
{ keySize = keySize kd <|> Just sz }
|
||||||
|
checkSecureHashes k >>= \case
|
||||||
|
Nothing -> do
|
||||||
|
recordcidkey cidmap db cid k
|
||||||
|
logChange k (Remote.uuid remote) InfoPresent
|
||||||
|
return (Right k)
|
||||||
|
Just msg -> giveup (msg ++ " to import")
|
||||||
|
|
||||||
|
-- The file is small, so is added to git, so while importing
|
||||||
|
-- without content does not retrieve annexed files, it does
|
||||||
|
-- need to retrieve this file.
|
||||||
|
doimportsmall cidmap db loc cid sz p = do
|
||||||
|
let downloader tmpfile = do
|
||||||
|
k <- Remote.retrieveExportWithContentIdentifier
|
||||||
|
ia loc cid tmpfile
|
||||||
|
(mkkey tmpfile)
|
||||||
|
p
|
||||||
|
case keyGitSha k of
|
||||||
|
Just sha -> do
|
||||||
|
recordcidkey cidmap db cid k
|
||||||
|
return (Left sha)
|
||||||
|
Nothing -> error "internal"
|
||||||
|
checkDiskSpaceToGet tmpkey Nothing $
|
||||||
|
withTmp tmpkey $ \tmpfile ->
|
||||||
|
tryNonAsync (downloader tmpfile) >>= \case
|
||||||
|
Right v -> return $ Just (loc, v)
|
||||||
Left e -> do
|
Left e -> do
|
||||||
warning (show e)
|
warning (show e)
|
||||||
return Nothing
|
return Nothing
|
||||||
metered Nothing sz $
|
where
|
||||||
const runimport
|
tmpkey = importKey cid sz
|
||||||
|
mkkey tmpfile = gitShaKey <$> hashFile tmpfile
|
||||||
|
|
||||||
dodownload cidmap db (loc, (cid, sz)) largematcher = do
|
dodownload cidmap db (loc, (cid, sz)) largematcher = do
|
||||||
f <- locworktreefile loc
|
f <- locworktreefile loc
|
||||||
|
@ -418,7 +459,7 @@ importKeys remote importtreeconfig importcontent importablecontents = do
|
||||||
let downloader tmpfile p = do
|
let downloader tmpfile p = do
|
||||||
k <- Remote.retrieveExportWithContentIdentifier
|
k <- Remote.retrieveExportWithContentIdentifier
|
||||||
ia loc cid tmpfile
|
ia loc cid tmpfile
|
||||||
(mkkey f tmpfile largematcher)
|
(mkkey f tmpfile)
|
||||||
p
|
p
|
||||||
case keyGitSha k of
|
case keyGitSha k of
|
||||||
Nothing -> do
|
Nothing -> do
|
||||||
|
@ -446,25 +487,25 @@ importKeys remote importtreeconfig importcontent importablecontents = do
|
||||||
where
|
where
|
||||||
tmpkey = importKey cid sz
|
tmpkey = importKey cid sz
|
||||||
|
|
||||||
ia = Remote.importActions remote
|
mkkey f tmpfile = do
|
||||||
|
matcher <- largematcher (fromRawFilePath f)
|
||||||
|
let mi = MatchingFile FileInfo
|
||||||
|
{ matchFile = f
|
||||||
|
, contentFile = Just (toRawFilePath tmpfile)
|
||||||
|
}
|
||||||
|
islargefile <- checkMatcher' matcher mi mempty
|
||||||
|
if islargefile
|
||||||
|
then do
|
||||||
|
backend <- chooseBackend (fromRawFilePath f)
|
||||||
|
let ks = KeySource
|
||||||
|
{ keyFilename = f
|
||||||
|
, contentLocation = toRawFilePath tmpfile
|
||||||
|
, inodeCache = Nothing
|
||||||
|
}
|
||||||
|
fst <$> genKey ks nullMeterUpdate backend
|
||||||
|
else gitShaKey <$> hashFile tmpfile
|
||||||
|
|
||||||
mkkey f tmpfile largematcher = do
|
ia = Remote.importActions remote
|
||||||
matcher <- largematcher (fromRawFilePath f)
|
|
||||||
let mi = MatchingFile FileInfo
|
|
||||||
{ matchFile = f
|
|
||||||
, contentFile = Just (toRawFilePath tmpfile)
|
|
||||||
}
|
|
||||||
islargefile <- checkMatcher' matcher mi mempty
|
|
||||||
if islargefile
|
|
||||||
then do
|
|
||||||
backend <- chooseBackend (fromRawFilePath f)
|
|
||||||
let ks = KeySource
|
|
||||||
{ keyFilename = f
|
|
||||||
, contentLocation = toRawFilePath tmpfile
|
|
||||||
, inodeCache = Nothing
|
|
||||||
}
|
|
||||||
fst <$> genKey ks nullMeterUpdate backend
|
|
||||||
else gitShaKey <$> hashFile tmpfile
|
|
||||||
|
|
||||||
locworktreefile loc = fromRepo $ fromTopFilePath $ asTopFilePath $
|
locworktreefile loc = fromRepo $ fromTopFilePath $ asTopFilePath $
|
||||||
case importtreeconfig of
|
case importtreeconfig of
|
||||||
|
@ -563,7 +604,7 @@ shouldImport dbhandle matcher loc sz =
|
||||||
|
|
||||||
filterImportableContents :: Remote -> FileMatcher Annex -> ImportableContents (ContentIdentifier, ByteSize) -> Annex (ImportableContents (ContentIdentifier, ByteSize))
|
filterImportableContents :: Remote -> FileMatcher Annex -> ImportableContents (ContentIdentifier, ByteSize) -> Annex (ImportableContents (ContentIdentifier, ByteSize))
|
||||||
filterImportableContents r matcher importable
|
filterImportableContents r matcher importable
|
||||||
| isEmpty matcher = return importable
|
| Utility.Matcher.isEmpty matcher = return importable
|
||||||
| otherwise = do
|
| otherwise = do
|
||||||
dbhandle <- Export.openDb (Remote.uuid r)
|
dbhandle <- Export.openDb (Remote.uuid r)
|
||||||
go dbhandle importable
|
go dbhandle importable
|
||||||
|
|
|
@ -23,6 +23,10 @@ git-annex (8.20200909) UNRELEASED; urgency=medium
|
||||||
local repo and remotes don't use include=/exclude=.
|
local repo and remotes don't use include=/exclude=.
|
||||||
* Sped up seeking for files to operate on, when using options like
|
* Sped up seeking for files to operate on, when using options like
|
||||||
--copies or --in, by around 20%
|
--copies or --in, by around 20%
|
||||||
|
* import --no-content: Check annex.largefiles, and import small
|
||||||
|
files into git, the same as is done when importing with content.
|
||||||
|
If the largefiles expression needs the file content available
|
||||||
|
(due to mimetype or mimeencoding being used), the import will fail.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Mon, 14 Sep 2020 18:34:37 -0400
|
-- Joey Hess <id@joeyh.name> Mon, 14 Sep 2020 18:34:37 -0400
|
||||||
|
|
||||||
|
|
2
Limit.hs
2
Limit.hs
|
@ -69,7 +69,7 @@ getMatcher' = go =<< Annex.getState Annex.limit
|
||||||
{- Checks if the user-specified limits contains anything that meets the
|
{- Checks if the user-specified limits contains anything that meets the
|
||||||
- condition. -}
|
- condition. -}
|
||||||
introspect :: (MatchFiles Annex -> Bool) -> Annex Bool
|
introspect :: (MatchFiles Annex -> Bool) -> Annex Bool
|
||||||
introspect c = any c <$> getMatcher'
|
introspect c = Utility.Matcher.introspect c <$> getMatcher'
|
||||||
|
|
||||||
{- Adds something to the limit list, which is built up reversed. -}
|
{- Adds something to the limit list, which is built up reversed. -}
|
||||||
add :: Utility.Matcher.Token (MatchFiles Annex) -> Annex ()
|
add :: Utility.Matcher.Token (MatchFiles Annex) -> Annex ()
|
||||||
|
|
|
@ -27,6 +27,7 @@ module Utility.Matcher (
|
||||||
matchMrun,
|
matchMrun,
|
||||||
isEmpty,
|
isEmpty,
|
||||||
combineMatchers,
|
combineMatchers,
|
||||||
|
introspect,
|
||||||
|
|
||||||
prop_matcher_sane
|
prop_matcher_sane
|
||||||
) where
|
) where
|
||||||
|
@ -147,6 +148,10 @@ combineMatchers a b
|
||||||
| isEmpty b = a
|
| isEmpty b = a
|
||||||
| otherwise = a `MOr` b
|
| otherwise = a `MOr` b
|
||||||
|
|
||||||
|
{- Checks if anything in the matcher meets the condition. -}
|
||||||
|
introspect :: (a -> Bool) -> Matcher a -> Bool
|
||||||
|
introspect = any
|
||||||
|
|
||||||
prop_matcher_sane :: Bool
|
prop_matcher_sane :: Bool
|
||||||
prop_matcher_sane = all (\m -> match dummy m ()) $ map generate
|
prop_matcher_sane = all (\m -> match dummy m ()) $ map generate
|
||||||
[ [Operation True]
|
[ [Operation True]
|
||||||
|
|
|
@ -91,7 +91,8 @@ the tree of files on the remote, even when importing into a subdirectory.
|
||||||
|
|
||||||
* `--content`, `--no-content`
|
* `--content`, `--no-content`
|
||||||
|
|
||||||
Controls whether content is downloaded from the special remote.
|
Controls whether annexed content is downloaded from the special remote.
|
||||||
|
|
||||||
The default is to download content into the git-annex repository.
|
The default is to download content into the git-annex repository.
|
||||||
|
|
||||||
With --no-content, git-annex keys are generated from information
|
With --no-content, git-annex keys are generated from information
|
||||||
|
@ -108,16 +109,12 @@ the tree of files on the remote, even when importing into a subdirectory.
|
||||||
The annex.securehashesonly configuration, if set, will prevent
|
The annex.securehashesonly configuration, if set, will prevent
|
||||||
--no-content importing from a special remote that uses insecure keys.
|
--no-content importing from a special remote that uses insecure keys.
|
||||||
|
|
||||||
Using --no-content prevents annex.largefiles from being checked,
|
Note that a different git tree may be generated when using
|
||||||
because the files are not downloaded. So, when using --no-content,
|
|
||||||
files that would usually be considered non-large will be added to the
|
|
||||||
annex, rather than adding them directly to the git repository.
|
|
||||||
|
|
||||||
Note that a different git tree will often be generated when using
|
|
||||||
--no-content than would be generated when using --content, because
|
--no-content than would be generated when using --content, because
|
||||||
the options cause different kinds of keys to be used when importing
|
the options cause different kinds of keys to be used when importing
|
||||||
new/changed files. So mixing uses of --content and --no-content can
|
new/changed files. So mixing uses of --content and --no-content can
|
||||||
lead to merge conflicts in some situations.
|
lead to merge conflicts in some situations. Some special remotes,
|
||||||
|
notably the directory special remote, avoid this problem.
|
||||||
|
|
||||||
# IMPORTING FROM A DIRECTORY
|
# IMPORTING FROM A DIRECTORY
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,13 @@ expression needs the file content, when importing from a special remote.
|
||||||
Or could detect when those are used, and only allow
|
Or could detect when those are used, and only allow
|
||||||
importing with --content in that case.
|
importing with --content in that case.
|
||||||
|
|
||||||
|
> So this needs a way to introspect a preferred content expression
|
||||||
|
> to see if the terms used in it
|
||||||
|
> match some criteria. Another todo that also needs that is
|
||||||
|
> [[faster_key_lookup_for_limits]] --[[Joey]]
|
||||||
|
|
||||||
|
> > That introspection is implemented now.
|
||||||
|
|
||||||
Which is better? The repo may have annex.largefiles set in gitattributes
|
Which is better? The repo may have annex.largefiles set in gitattributes
|
||||||
for good workflow reasons, so it would be very annoying to have importing
|
for good workflow reasons, so it would be very annoying to have importing
|
||||||
error out. And if importing ignores the configuration, the user is likely
|
error out. And if importing ignores the configuration, the user is likely
|
||||||
|
@ -21,9 +28,8 @@ and say "sorry, I can't, need the file content", the user can then choose
|
||||||
between changing largefiles or using --content, and it's clear how they're
|
between changing largefiles or using --content, and it's clear how they're
|
||||||
asking for contradictory things.
|
asking for contradictory things.
|
||||||
|
|
||||||
> So this needs a way to introspect a preferred content expression
|
Hmm, if largefiles does not match, it would have to download the file
|
||||||
> to see if the terms used in it
|
content to add it to git, even though --no-content is used. A little weird,
|
||||||
> match some criteria. Another todo that also needs that is
|
but it's a small file, presumably.
|
||||||
> [[faster_key_lookup_for_limits]] --[[Joey]]
|
|
||||||
|
|
||||||
> > That introspection is implemented now.
|
[[done]] --[[Joey]]
|
||||||
|
|
Loading…
Reference in a new issue