Skip and warn when a tree import includes empty filenames
Which can happen with eg a S3 bucket. Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
parent
0f4531e9a7
commit
6818e69b81
4 changed files with 62 additions and 3 deletions
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex import from remotes
|
{- git-annex import from remotes
|
||||||
-
|
-
|
||||||
- Copyright 2019-2024 Joey Hess <id@joeyh.name>
|
- Copyright 2019-2025 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -64,6 +64,7 @@ import qualified Utility.Matcher
|
||||||
import qualified Database.Export as Export
|
import qualified Database.Export as Export
|
||||||
import qualified Database.ContentIdentifier as CIDDb
|
import qualified Database.ContentIdentifier as CIDDb
|
||||||
import qualified Logs.ContentIdentifier as CIDLog
|
import qualified Logs.ContentIdentifier as CIDLog
|
||||||
|
import qualified Utility.OsString as OS
|
||||||
import Backend.Utilities
|
import Backend.Utilities
|
||||||
|
|
||||||
import Control.Concurrent.STM
|
import Control.Concurrent.STM
|
||||||
|
@ -1048,6 +1049,10 @@ pruneImportMatcher = Utility.Matcher.pruneMatcher matchNeedsKey
|
||||||
- write a git tree that contains that, git will complain and refuse to
|
- write a git tree that contains that, git will complain and refuse to
|
||||||
- check it out.
|
- check it out.
|
||||||
-
|
-
|
||||||
|
- Filters out any paths that contain an empty filename, because git cannot
|
||||||
|
- represent an empty filename in a tree, but some special remotes do
|
||||||
|
- support empty filenames.
|
||||||
|
-
|
||||||
- Filters out new things not matching the FileMatcher or that are
|
- Filters out new things not matching the FileMatcher or that are
|
||||||
- gitignored. However, files that are already in git get imported
|
- gitignored. However, files that are already in git get imported
|
||||||
- regardless. (Similar to how git add behaves on gitignored files.)
|
- regardless. (Similar to how git add behaves on gitignored files.)
|
||||||
|
@ -1094,19 +1099,35 @@ getImportableContents r importtreeconfig ci matcher = do
|
||||||
|
|
||||||
wanted dbhandle (loc, (_cid, sz))
|
wanted dbhandle (loc, (_cid, sz))
|
||||||
| ingitdir = pure False
|
| ingitdir = pure False
|
||||||
|
| OS.null (fromImportLocation loc) = do
|
||||||
|
warning $ UnquotedString "Cannot import a file with an empty filename"
|
||||||
|
return False
|
||||||
|
| isdirectory = do
|
||||||
|
warning $ UnquotedString "Cannot import a file with a name that appears to be a directory: "
|
||||||
|
<> QuotedPath (fromImportLocation loc)
|
||||||
|
return False
|
||||||
| otherwise =
|
| otherwise =
|
||||||
isknown <||> (matches <&&> notignored)
|
isknown <||> (matches <&&> notignored)
|
||||||
where
|
where
|
||||||
-- Checks, from least to most expensive.
|
-- Checks, from least to most expensive.
|
||||||
#ifdef mingw32_HOST_OS
|
#ifdef mingw32_HOST_OS
|
||||||
ingitdir = ".git" `elem` Posix.splitDirectories (fromOsPath (fromImportLocation loc))
|
ingitdir = ".git" `elem` Posix.splitDirectories loc'
|
||||||
#else
|
#else
|
||||||
ingitdir = literalOsPath ".git" `elem` splitDirectories (fromImportLocation loc)
|
ingitdir = literalOsPath ".git" `elem` splitDirectories (fromImportLocation loc)
|
||||||
|
#endif
|
||||||
|
#ifdef mingw32_HOST_OS
|
||||||
|
isdirectory = Posix.dropFileName loc' == loc'
|
||||||
|
#else
|
||||||
|
isdirectory = dropFileName (fromImportLocation loc) == fromImportLocation loc
|
||||||
#endif
|
#endif
|
||||||
matches = matchesImportLocation matcher loc sz
|
matches = matchesImportLocation matcher loc sz
|
||||||
isknown = isKnownImportLocation dbhandle loc
|
isknown = isKnownImportLocation dbhandle loc
|
||||||
notignored = notIgnoredImportLocation importtreeconfig ci loc
|
notignored = notIgnoredImportLocation importtreeconfig ci loc
|
||||||
|
|
||||||
|
#ifdef mingw32_HOST_OS
|
||||||
|
loc' = fromOsPath (fromImportLocation loc)
|
||||||
|
#endif
|
||||||
|
|
||||||
wantedunder dbhandle root (loc, v) =
|
wantedunder dbhandle root (loc, v) =
|
||||||
wanted dbhandle (importableContentsChunkFullLocation root loc, v)
|
wanted dbhandle (importableContentsChunkFullLocation root loc, v)
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,10 @@
|
||||||
|
git-annex (10.20250606) UNRELEASED; urgency=medium
|
||||||
|
|
||||||
|
* Skip and warn when a tree import includes empty filenames,
|
||||||
|
which can happen with eg a S3 bucket.
|
||||||
|
|
||||||
|
-- Joey Hess <id@joeyh.name> Mon, 23 Jun 2025 11:11:29 -0400
|
||||||
|
|
||||||
git-annex (10.20250605) upstream; urgency=medium
|
git-annex (10.20250605) upstream; urgency=medium
|
||||||
|
|
||||||
* sync: Push the current branch first, rather than a synced branch,
|
* sync: Push the current branch first, rather than a synced branch,
|
||||||
|
|
|
@ -44,3 +44,5 @@ the version from pypi @mih started to build recently
|
||||||
|
|
||||||
[[!meta author=yoh]]
|
[[!meta author=yoh]]
|
||||||
[[!tag projects/dandi]]
|
[[!tag projects/dandi]]
|
||||||
|
|
||||||
|
> [[fixed|done]] --[[Joey]]
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 1"""
|
||||||
|
date="2025-06-23T14:32:13Z"
|
||||||
|
content="""
|
||||||
|
Your hypothesis is right, it's items in the bucket with names ending in "/".
|
||||||
|
|
||||||
|
After fixing git-annex to skip and warn about those, it looks like this:
|
||||||
|
|
||||||
|
list s3-origin
|
||||||
|
Cannot import a file with a name that appears to be a directory: models/smartspim_production_models/
|
||||||
|
|
||||||
|
Cannot import a file with a name that appears to be a directory: models/smartspim_production_models/model_2_12202024/
|
||||||
|
|
||||||
|
Cannot import a file with a name that appears to be a directory: point_annotations/
|
||||||
|
|
||||||
|
Cannot import a file with a name that appears to be a directory: point_annotations/06-21-2024/
|
||||||
|
ok
|
||||||
|
|
||||||
|
Note that "models/smartspim_production_models/config.json" is a file in the
|
||||||
|
bucket located "inside" the first path. So this is not a case of an empty
|
||||||
|
directory being somehow stored to a S3 bucket as a file, but of something else.
|
||||||
|
I have not looked at the contents of these objects, as I would likely not
|
||||||
|
understand them anyway.
|
||||||
|
|
||||||
|
I couldn't think of a better method than to warn and skip them. Any name mangling
|
||||||
|
would take a name that could be used by some other file. And not warning risks the user
|
||||||
|
being surprised when all the data in the bucket does not get imported.
|
||||||
|
"""]]
|
Loading…
Add table
Add a link
Reference in a new issue