diff --git a/Annex/Import.hs b/Annex/Import.hs index 2d2526a544..b9c1b74e87 100644 --- a/Annex/Import.hs +++ b/Annex/Import.hs @@ -1,6 +1,6 @@ {- git-annex import from remotes - - - Copyright 2019-2024 Joey Hess + - Copyright 2019-2025 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -64,6 +64,7 @@ import qualified Utility.Matcher import qualified Database.Export as Export import qualified Database.ContentIdentifier as CIDDb import qualified Logs.ContentIdentifier as CIDLog +import qualified Utility.OsString as OS import Backend.Utilities import Control.Concurrent.STM @@ -1048,6 +1049,10 @@ pruneImportMatcher = Utility.Matcher.pruneMatcher matchNeedsKey - write a git tree that contains that, git will complain and refuse to - check it out. - + - Filters out any paths that contain an empty filename, because git cannot + - represent an empty filename in a tree, but some special remotes do + - support empty filenames. + - - Filters out new things not matching the FileMatcher or that are - gitignored. However, files that are already in git get imported - regardless. (Similar to how git add behaves on gitignored files.) @@ -1094,19 +1099,35 @@ getImportableContents r importtreeconfig ci matcher = do wanted dbhandle (loc, (_cid, sz)) | ingitdir = pure False + | OS.null (fromImportLocation loc) = do + warning $ UnquotedString "Cannot import a file with an empty filename" + return False + | isdirectory = do + warning $ UnquotedString "Cannot import a file with a name that appears to be a directory: " + <> QuotedPath (fromImportLocation loc) + return False | otherwise = isknown <||> (matches <&&> notignored) where -- Checks, from least to most expensive. #ifdef mingw32_HOST_OS - ingitdir = ".git" `elem` Posix.splitDirectories (fromOsPath (fromImportLocation loc)) + ingitdir = ".git" `elem` Posix.splitDirectories loc' #else ingitdir = literalOsPath ".git" `elem` splitDirectories (fromImportLocation loc) +#endif +#ifdef mingw32_HOST_OS + isdirectory = Posix.dropFileName loc' == loc' +#else + isdirectory = dropFileName (fromImportLocation loc) == fromImportLocation loc #endif matches = matchesImportLocation matcher loc sz isknown = isKnownImportLocation dbhandle loc notignored = notIgnoredImportLocation importtreeconfig ci loc - + +#ifdef mingw32_HOST_OS + loc' = fromOsPath (fromImportLocation loc) +#endif + wantedunder dbhandle root (loc, v) = wanted dbhandle (importableContentsChunkFullLocation root loc, v) diff --git a/CHANGELOG b/CHANGELOG index a2036e20fd..2b2823d998 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,10 @@ +git-annex (10.20250606) UNRELEASED; urgency=medium + + * Skip and warn when a tree import includes empty filenames, + which can happen with eg a S3 bucket. + + -- Joey Hess Mon, 23 Jun 2025 11:11:29 -0400 + git-annex (10.20250605) upstream; urgency=medium * sync: Push the current branch first, rather than a synced branch, diff --git a/doc/bugs/import__58_____34__fatal__58___empty_filename_in_tree_entry__34__.mdwn b/doc/bugs/import__58_____34__fatal__58___empty_filename_in_tree_entry__34__.mdwn index f0ffec47fb..497f4bdbb5 100644 --- a/doc/bugs/import__58_____34__fatal__58___empty_filename_in_tree_entry__34__.mdwn +++ b/doc/bugs/import__58_____34__fatal__58___empty_filename_in_tree_entry__34__.mdwn @@ -44,3 +44,5 @@ the version from pypi @mih started to build recently [[!meta author=yoh]] [[!tag projects/dandi]] + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/import__58_____34__fatal__58___empty_filename_in_tree_entry__34__/comment_1_85ea14723ade98ed24658ee13b42814f._comment b/doc/bugs/import__58_____34__fatal__58___empty_filename_in_tree_entry__34__/comment_1_85ea14723ade98ed24658ee13b42814f._comment new file mode 100644 index 0000000000..e61e32ea65 --- /dev/null +++ b/doc/bugs/import__58_____34__fatal__58___empty_filename_in_tree_entry__34__/comment_1_85ea14723ade98ed24658ee13b42814f._comment @@ -0,0 +1,29 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-06-23T14:32:13Z" + content=""" +Your hypothesis is right, it's items in the bucket with names ending in "/". + +After fixing git-annex to skip and warn about those, it looks like this: + + list s3-origin + Cannot import a file with a name that appears to be a directory: models/smartspim_production_models/ + + Cannot import a file with a name that appears to be a directory: models/smartspim_production_models/model_2_12202024/ + + Cannot import a file with a name that appears to be a directory: point_annotations/ + + Cannot import a file with a name that appears to be a directory: point_annotations/06-21-2024/ + ok + +Note that "models/smartspim_production_models/config.json" is a file in the +bucket located "inside" the first path. So this is not a case of an empty +directory being somehow stored to a S3 bucket as a file, but of something else. +I have not looked at the contents of these objects, as I would likely not +understand them anyway. + +I couldn't think of a better method than to warn and skip them. Any name mangling +would take a name that could be used by some other file. And not warning risks the user +being surprised when all the data in the bucket does not get imported. +"""]]