Skip and warn when a tree import includes empty filenames
Which can happen with eg a S3 bucket. Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
parent
0f4531e9a7
commit
6818e69b81
4 changed files with 62 additions and 3 deletions
|
@ -1,6 +1,6 @@
|
|||
{- git-annex import from remotes
|
||||
-
|
||||
- Copyright 2019-2024 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2019-2025 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -64,6 +64,7 @@ import qualified Utility.Matcher
|
|||
import qualified Database.Export as Export
|
||||
import qualified Database.ContentIdentifier as CIDDb
|
||||
import qualified Logs.ContentIdentifier as CIDLog
|
||||
import qualified Utility.OsString as OS
|
||||
import Backend.Utilities
|
||||
|
||||
import Control.Concurrent.STM
|
||||
|
@ -1048,6 +1049,10 @@ pruneImportMatcher = Utility.Matcher.pruneMatcher matchNeedsKey
|
|||
- write a git tree that contains that, git will complain and refuse to
|
||||
- check it out.
|
||||
-
|
||||
- Filters out any paths that contain an empty filename, because git cannot
|
||||
- represent an empty filename in a tree, but some special remotes do
|
||||
- support empty filenames.
|
||||
-
|
||||
- Filters out new things not matching the FileMatcher or that are
|
||||
- gitignored. However, files that are already in git get imported
|
||||
- regardless. (Similar to how git add behaves on gitignored files.)
|
||||
|
@ -1094,19 +1099,35 @@ getImportableContents r importtreeconfig ci matcher = do
|
|||
|
||||
wanted dbhandle (loc, (_cid, sz))
|
||||
| ingitdir = pure False
|
||||
| OS.null (fromImportLocation loc) = do
|
||||
warning $ UnquotedString "Cannot import a file with an empty filename"
|
||||
return False
|
||||
| isdirectory = do
|
||||
warning $ UnquotedString "Cannot import a file with a name that appears to be a directory: "
|
||||
<> QuotedPath (fromImportLocation loc)
|
||||
return False
|
||||
| otherwise =
|
||||
isknown <||> (matches <&&> notignored)
|
||||
where
|
||||
-- Checks, from least to most expensive.
|
||||
#ifdef mingw32_HOST_OS
|
||||
ingitdir = ".git" `elem` Posix.splitDirectories (fromOsPath (fromImportLocation loc))
|
||||
ingitdir = ".git" `elem` Posix.splitDirectories loc'
|
||||
#else
|
||||
ingitdir = literalOsPath ".git" `elem` splitDirectories (fromImportLocation loc)
|
||||
#endif
|
||||
#ifdef mingw32_HOST_OS
|
||||
isdirectory = Posix.dropFileName loc' == loc'
|
||||
#else
|
||||
isdirectory = dropFileName (fromImportLocation loc) == fromImportLocation loc
|
||||
#endif
|
||||
matches = matchesImportLocation matcher loc sz
|
||||
isknown = isKnownImportLocation dbhandle loc
|
||||
notignored = notIgnoredImportLocation importtreeconfig ci loc
|
||||
|
||||
|
||||
#ifdef mingw32_HOST_OS
|
||||
loc' = fromOsPath (fromImportLocation loc)
|
||||
#endif
|
||||
|
||||
wantedunder dbhandle root (loc, v) =
|
||||
wanted dbhandle (importableContentsChunkFullLocation root loc, v)
|
||||
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
git-annex (10.20250606) UNRELEASED; urgency=medium
|
||||
|
||||
* Skip and warn when a tree import includes empty filenames,
|
||||
which can happen with eg a S3 bucket.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Mon, 23 Jun 2025 11:11:29 -0400
|
||||
|
||||
git-annex (10.20250605) upstream; urgency=medium
|
||||
|
||||
* sync: Push the current branch first, rather than a synced branch,
|
||||
|
|
|
@ -44,3 +44,5 @@ the version from pypi @mih started to build recently
|
|||
|
||||
[[!meta author=yoh]]
|
||||
[[!tag projects/dandi]]
|
||||
|
||||
> [[fixed|done]] --[[Joey]]
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 1"""
|
||||
date="2025-06-23T14:32:13Z"
|
||||
content="""
|
||||
Your hypothesis is right, it's items in the bucket with names ending in "/".
|
||||
|
||||
After fixing git-annex to skip and warn about those, it looks like this:
|
||||
|
||||
list s3-origin
|
||||
Cannot import a file with a name that appears to be a directory: models/smartspim_production_models/
|
||||
|
||||
Cannot import a file with a name that appears to be a directory: models/smartspim_production_models/model_2_12202024/
|
||||
|
||||
Cannot import a file with a name that appears to be a directory: point_annotations/
|
||||
|
||||
Cannot import a file with a name that appears to be a directory: point_annotations/06-21-2024/
|
||||
ok
|
||||
|
||||
Note that "models/smartspim_production_models/config.json" is a file in the
|
||||
bucket located "inside" the first path. So this is not a case of an empty
|
||||
directory being somehow stored to a S3 bucket as a file, but of something else.
|
||||
I have not looked at the contents of these objects, as I would likely not
|
||||
understand them anyway.
|
||||
|
||||
I couldn't think of a better method than to warn and skip them. Any name mangling
|
||||
would take a name that could be used by some other file. And not warning risks the user
|
||||
being surprised when all the data in the bucket does not get imported.
|
||||
"""]]
|
Loading…
Add table
Add a link
Reference in a new issue