handle keys with extensions consistently in all locales
Fix some cases where handling of keys with extensions varied depending on the locale. A filename with a unicode extension would before generate a key with an extension in a unicode locale, but not in LANG=C, because the extension was not all alphanumeric. Also the the length of the extension could be counted differently depending on the locale. In a non-unicode locale, git-annex migrate would see that the extension was not all alphanumeric and want to "upgrade" it. Now that doesn't happen. As far as backwards compatability, this does mean that unicode extensions are counted by the number of bytes, not number of characters. So, if someone is using unicode extensions, they may find git-annex stops using them when adding files, because their extensions are too long. Keys already in their repo with the "too long" extensions will still work though, so this only prevents adding the same content with the same extension generating the same key. Documented this by documenting that annex.maxextensionlength is a number of bytes. Also, if a filename has an extension that is not valid utf-8 and the locale is utf-8, the extension will be allowed now, and an old git-annex, in the same locale would not, and would also want to "upgrade" that.
This commit is contained in:
parent
547e1f29b1
commit
09df58c4ea
3 changed files with 28 additions and 20 deletions
|
@ -1,6 +1,6 @@
|
|||
{- git-annex hashing backends
|
||||
-
|
||||
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -24,7 +24,9 @@ import Utility.Metered
|
|||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Char8 as S8
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import qualified System.FilePath.ByteString as P
|
||||
import Data.Char
|
||||
import Data.Word
|
||||
import Control.DeepSeq
|
||||
import Control.Exception (evaluate)
|
||||
|
||||
|
@ -104,22 +106,22 @@ keyValueE hash source meterupdate =
|
|||
where
|
||||
addE k = do
|
||||
maxlen <- annexMaxExtensionLength <$> Annex.getGitConfig
|
||||
let ext = selectExtension maxlen (keyFilename source)
|
||||
let ext = selectExtension maxlen (toRawFilePath (keyFilename source))
|
||||
return $ Just $ alterKey k $ \d -> d
|
||||
{ keyName = keyName d <> encodeBS ext
|
||||
{ keyName = keyName d <> ext
|
||||
, keyVariety = hashKeyVariety hash (HasExt True)
|
||||
}
|
||||
|
||||
selectExtension :: Maybe Int -> FilePath -> String
|
||||
selectExtension :: Maybe Int -> RawFilePath -> S.ByteString
|
||||
selectExtension maxlen f
|
||||
| null es = ""
|
||||
| otherwise = intercalate "." ("":es)
|
||||
| otherwise = S.intercalate "." ("":es)
|
||||
where
|
||||
es = filter (not . null) $ reverse $
|
||||
take 2 $ filter (all validInExtension) $
|
||||
es = filter (not . S.null) $ reverse $
|
||||
take 2 $ filter (S.all validInExtension) $
|
||||
takeWhile shortenough $
|
||||
reverse $ splitc '.' $ takeExtensions f
|
||||
shortenough e = length e <= fromMaybe maxExtensionLen maxlen
|
||||
reverse $ S.split (fromIntegral (ord '.')) (P.takeExtensions f)
|
||||
shortenough e = S.length e <= fromMaybe maxExtensionLen maxlen
|
||||
|
||||
maxExtensionLen :: Int
|
||||
maxExtensionLen = 4 -- long enough for "jpeg"
|
||||
|
@ -152,11 +154,13 @@ checkKeyChecksum hash key file = catchIOErrorType HardwareFault hwfault $ do
|
|||
keyHash :: Key -> S.ByteString
|
||||
keyHash = fst . splitKeyNameExtension
|
||||
|
||||
validInExtension :: Char -> Bool
|
||||
validInExtension :: Word8 -> Bool
|
||||
validInExtension c
|
||||
| isAlphaNum c = True
|
||||
| c == '.' = True
|
||||
| otherwise = False
|
||||
| c >= 48 && c <= 57 = True -- numbers
|
||||
| c >= 65 && c <= 90 = True -- A-Z
|
||||
| c >= 97 && c <= 122 = True -- a-z
|
||||
| c <= 127 = False -- other ascii, spaces, punctuation, control chars
|
||||
| otherwise = True -- utf8 is allowed, also other encodings
|
||||
|
||||
{- Upgrade keys that have the \ prefix on their hash due to a bug, or
|
||||
- that contain non-alphanumeric characters in their extension.
|
||||
|
@ -168,7 +172,7 @@ validInExtension c
|
|||
needsUpgrade :: Key -> Bool
|
||||
needsUpgrade key = or
|
||||
[ "\\" `S8.isPrefixOf` keyHash key
|
||||
, any (not . validInExtension) (decodeBS $ snd $ splitKeyNameExtension key)
|
||||
, S.any (not . validInExtension) (snd $ splitKeyNameExtension key)
|
||||
, not (hasExt (fromKey keyVariety key)) && keyHash key /= fromKey keyName key
|
||||
]
|
||||
|
||||
|
@ -188,7 +192,7 @@ trivialMigrate' oldkey newbackend afile maxextlen
|
|||
AssociatedFile Nothing -> Nothing
|
||||
AssociatedFile (Just file) -> Just $ alterKey oldkey $ \d -> d
|
||||
{ keyName = keyHash oldkey
|
||||
<> encodeBS' (selectExtension maxextlen (fromRawFilePath file))
|
||||
<> selectExtension maxextlen file
|
||||
, keyVariety = newvariety
|
||||
}
|
||||
{- Upgrade to fix bad previous migration that created a
|
||||
|
|
|
@ -25,6 +25,10 @@ git-annex (8.20200221) UNRELEASED; urgency=medium
|
|||
the remote cannot be fetched from by git, so git fetch --all
|
||||
will not try to use it.
|
||||
* Makefile: Support newer versions of cabal that use the new-build system.
|
||||
* Fix some cases where handling of keys with extensions varied depending
|
||||
on the locale.
|
||||
* annex.maxextensionlength used to be the number of characters, not
|
||||
bytes, when in a utf-8 locale. It's now always the number of bytes.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Wed, 19 Feb 2020 12:48:58 -0400
|
||||
|
||||
|
|
|
@ -871,11 +871,11 @@ Like other git commands, git-annex is configured via `.git/config`.
|
|||
|
||||
* `annex.maxextensionlength`
|
||||
|
||||
Maximum length of what is considered a filename extension when adding a
|
||||
file to a backend that preserves filename extensions. The default length
|
||||
is 4, which allows extensions like "jpeg". The dot before the extension
|
||||
is not counted part of its length. At most two extensions at the end of
|
||||
a filename will be preserved, e.g. .gz or .tar.gz .
|
||||
Maximum length, in bytes, of what is considered a filename extension when
|
||||
adding a file to a backend that preserves filename extensions. The
|
||||
default length is 4, which allows extensions like "jpeg". The dot before
|
||||
the extension is not counted part of its length. At most two extensions
|
||||
at the end of a filename will be preserved, e.g. .gz or .tar.gz .
|
||||
|
||||
* `annex.diskreserve`
|
||||
|
||||
|
|
Loading…
Reference in a new issue