Fix storing of filenames of v6 unlocked files when the filename is not representable in the current locale.
This is a mostly backwards compatable change. I broke backwards compatability in the case where a filename starts with double-quote. That seems likely to be very rare, and v6 unlocked files are a new feature anyway, and fsck needs to fix missing associated file mappings anyway. So, I decided that is good enough. The encoding used is to just show the String when it contains a problem character. While that adds some overhead to addAssociatedFile and removeAssociatedFile, those are not called very often. This approach has minimal decode overhead, because most filenames won't be encoded that way, and it only has to look for the leading double-quote to skip the expensive read. So, getAssociatedFiles remains fast. I did consider using ByteString instead, but getting a FilePath converted with all chars intact, even surrigates, is difficult, and it looks like instance PersistField ByteString uses Text, which I don't trust for problem encoded data. It would probably be slower too, and it would make the database less easy to inspect manually.
This commit is contained in:
parent
613d6056f5
commit
cf260d9a15
5 changed files with 85 additions and 9 deletions
|
@ -174,7 +174,7 @@ scanAssociatedFiles = whenM (isJust <$> inRepo Git.Branch.current) $
|
||||||
add h i k = liftIO $ flip SQL.queueDb h $
|
add h i k = liftIO $ flip SQL.queueDb h $
|
||||||
void $ insertUnique $ SQL.Associated
|
void $ insertUnique $ SQL.Associated
|
||||||
(toIKey k)
|
(toIKey k)
|
||||||
(getTopFilePath $ Git.LsTree.file i)
|
(toSFilePath $ getTopFilePath $ Git.LsTree.file i)
|
||||||
|
|
||||||
{- Stats the files, and stores their InodeCaches. -}
|
{- Stats the files, and stores their InodeCaches. -}
|
||||||
storeInodeCaches :: Key -> [FilePath] -> Annex ()
|
storeInodeCaches :: Key -> [FilePath] -> Annex ()
|
||||||
|
|
|
@ -26,7 +26,7 @@ import Control.Monad
|
||||||
share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase|
|
share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase|
|
||||||
Associated
|
Associated
|
||||||
key IKey
|
key IKey
|
||||||
file FilePath
|
file SFilePath
|
||||||
KeyFileIndex key file
|
KeyFileIndex key file
|
||||||
FileKeyIndex file key
|
FileKeyIndex file key
|
||||||
Content
|
Content
|
||||||
|
@ -63,8 +63,10 @@ addAssociatedFile ik f = queueDb $ do
|
||||||
-- If the same file was associated with a different key before,
|
-- If the same file was associated with a different key before,
|
||||||
-- remove that.
|
-- remove that.
|
||||||
delete $ from $ \r -> do
|
delete $ from $ \r -> do
|
||||||
where_ (r ^. AssociatedFile ==. val (getTopFilePath f) &&. not_ (r ^. AssociatedKey ==. val ik))
|
where_ (r ^. AssociatedFile ==. val af &&. not_ (r ^. AssociatedKey ==. val ik))
|
||||||
void $ insertUnique $ Associated ik (getTopFilePath f)
|
void $ insertUnique $ Associated ik af
|
||||||
|
where
|
||||||
|
af = toSFilePath (getTopFilePath f)
|
||||||
|
|
||||||
{- Note that the files returned were once associated with the key, but
|
{- Note that the files returned were once associated with the key, but
|
||||||
- some of them may not be any longer. -}
|
- some of them may not be any longer. -}
|
||||||
|
@ -73,21 +75,25 @@ getAssociatedFiles ik = readDb $ do
|
||||||
l <- select $ from $ \r -> do
|
l <- select $ from $ \r -> do
|
||||||
where_ (r ^. AssociatedKey ==. val ik)
|
where_ (r ^. AssociatedKey ==. val ik)
|
||||||
return (r ^. AssociatedFile)
|
return (r ^. AssociatedFile)
|
||||||
return $ map (asTopFilePath . unValue) l
|
return $ map (asTopFilePath . fromSFilePath . unValue) l
|
||||||
|
|
||||||
{- Gets any keys that are on record as having a particular associated file.
|
{- Gets any keys that are on record as having a particular associated file.
|
||||||
- (Should be one or none but the database doesn't enforce that.) -}
|
- (Should be one or none but the database doesn't enforce that.) -}
|
||||||
getAssociatedKey :: TopFilePath -> ReadHandle -> IO [IKey]
|
getAssociatedKey :: TopFilePath -> ReadHandle -> IO [IKey]
|
||||||
getAssociatedKey f = readDb $ do
|
getAssociatedKey f = readDb $ do
|
||||||
l <- select $ from $ \r -> do
|
l <- select $ from $ \r -> do
|
||||||
where_ (r ^. AssociatedFile ==. val (getTopFilePath f))
|
where_ (r ^. AssociatedFile ==. val af)
|
||||||
return (r ^. AssociatedKey)
|
return (r ^. AssociatedKey)
|
||||||
return $ map unValue l
|
return $ map unValue l
|
||||||
|
where
|
||||||
|
af = toSFilePath (getTopFilePath f)
|
||||||
|
|
||||||
removeAssociatedFile :: IKey -> TopFilePath -> WriteHandle -> IO ()
|
removeAssociatedFile :: IKey -> TopFilePath -> WriteHandle -> IO ()
|
||||||
removeAssociatedFile ik f = queueDb $
|
removeAssociatedFile ik f = queueDb $
|
||||||
delete $ from $ \r -> do
|
delete $ from $ \r -> do
|
||||||
where_ (r ^. AssociatedKey ==. val ik &&. r ^. AssociatedFile ==. val (getTopFilePath f))
|
where_ (r ^. AssociatedKey ==. val ik &&. r ^. AssociatedFile ==. val af)
|
||||||
|
where
|
||||||
|
af = toSFilePath (getTopFilePath f)
|
||||||
|
|
||||||
addInodeCaches :: IKey -> [InodeCache] -> WriteHandle -> IO ()
|
addInodeCaches :: IKey -> [InodeCache] -> WriteHandle -> IO ()
|
||||||
addInodeCaches ik is = queueDb $
|
addInodeCaches ik is = queueDb $
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- types for SQL databases
|
{- types for SQL databases
|
||||||
-
|
-
|
||||||
- Copyright 2015 Joey Hess <id@joeyh.name>
|
- Copyright 2015-2016 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -11,7 +11,9 @@ module Database.Types where
|
||||||
|
|
||||||
import Database.Persist.TH
|
import Database.Persist.TH
|
||||||
import Data.Maybe
|
import Data.Maybe
|
||||||
|
import Data.Char
|
||||||
|
|
||||||
|
import Utility.PartialPrelude
|
||||||
import Types.Key
|
import Types.Key
|
||||||
import Utility.InodeCache
|
import Utility.InodeCache
|
||||||
|
|
||||||
|
@ -53,6 +55,41 @@ toSInodeCache :: InodeCache -> SInodeCache
|
||||||
toSInodeCache = I . showInodeCache
|
toSInodeCache = I . showInodeCache
|
||||||
|
|
||||||
fromSInodeCache :: SInodeCache -> InodeCache
|
fromSInodeCache :: SInodeCache -> InodeCache
|
||||||
fromSInodeCache (I s) = fromMaybe (error $ "bad serialied InodeCache " ++ s) (readInodeCache s)
|
fromSInodeCache (I s) = fromMaybe (error $ "bad serialized InodeCache " ++ s) (readInodeCache s)
|
||||||
|
|
||||||
derivePersistField "SInodeCache"
|
derivePersistField "SInodeCache"
|
||||||
|
|
||||||
|
-- A serialized FilePath.
|
||||||
|
--
|
||||||
|
-- Not all unicode characters round-trip through sqlite. In particular,
|
||||||
|
-- surrigate code points do not. So, escape the FilePath. But, only when
|
||||||
|
-- it contains such characters.
|
||||||
|
newtype SFilePath = SFilePath String
|
||||||
|
|
||||||
|
-- Note that Read instance does not work when used in any kind of complex
|
||||||
|
-- data structure.
|
||||||
|
instance Read SFilePath where
|
||||||
|
readsPrec _ s = [(SFilePath s, "")]
|
||||||
|
|
||||||
|
instance Show SFilePath where
|
||||||
|
show (SFilePath s) = s
|
||||||
|
|
||||||
|
toSFilePath :: FilePath -> SFilePath
|
||||||
|
toSFilePath s@('"':_) = SFilePath (show s)
|
||||||
|
toSFilePath s
|
||||||
|
| any needsescape s = SFilePath (show s)
|
||||||
|
| otherwise = SFilePath s
|
||||||
|
where
|
||||||
|
needsescape c = case generalCategory c of
|
||||||
|
Surrogate -> True
|
||||||
|
PrivateUse -> True
|
||||||
|
NotAssigned -> True
|
||||||
|
_ -> False
|
||||||
|
|
||||||
|
fromSFilePath :: SFilePath -> FilePath
|
||||||
|
fromSFilePath (SFilePath s@('"':_)) =
|
||||||
|
fromMaybe (error "bad serialized SFilePath " ++ s) (readish s)
|
||||||
|
fromSFilePath (SFilePath s) = s
|
||||||
|
|
||||||
|
derivePersistField "SFilePath"
|
||||||
|
|
||||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -6,6 +6,8 @@ git-annex (6.20160212) UNRELEASED; urgency=medium
|
||||||
* Work around problem with concurrent-output when in a non-unicode locale
|
* Work around problem with concurrent-output when in a non-unicode locale
|
||||||
by avoiding use of it in such a locale. Instead -J will behave as if
|
by avoiding use of it in such a locale. Instead -J will behave as if
|
||||||
it was built without concurrent-output support in this situation.
|
it was built without concurrent-output support in this situation.
|
||||||
|
* Fix storing of filenames of v6 unlocked files when the filename is not
|
||||||
|
representable in the current locale.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Fri, 12 Feb 2016 14:03:46 -0400
|
-- Joey Hess <id@joeyh.name> Fri, 12 Feb 2016 14:03:46 -0400
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 1"""
|
||||||
|
date="2016-02-14T19:19:46Z"
|
||||||
|
content="""
|
||||||
|
Reproduced using LANG=C.
|
||||||
|
|
||||||
|
This is a problem with the filename stored in the keys db. In the first
|
||||||
|
repo, it has:
|
||||||
|
|
||||||
|
VALUES(1,'SHA256E-s8--d1d0c59000f7c0d71485b051c9ca3f25f7afa84f0be5fea98fe1e12f3f898f44','test_öüä');
|
||||||
|
|
||||||
|
However, in the clone:
|
||||||
|
|
||||||
|
VALUES(1,'SHA256E-s8--d1d0c59000f7c0d71485b051c9ca3f25f7afa84f0be5fea98fe1e12f3f898f44','test_<74><5F><EFBFBD><EFBFBD><EFBFBD><EFBFBD>');
|
||||||
|
|
||||||
|
So, it's lost the correct filename there. Since it doesn't
|
||||||
|
find the file with the messed up name, it doesn't replace the file content.
|
||||||
|
|
||||||
|
The problem is not with decoding git's C-style character encoding; that
|
||||||
|
happens ok yielding `"test_\56515\56502\56515\56508\56515\56484"`.
|
||||||
|
But, that does not seem to get stored in the database correctly.
|
||||||
|
|
||||||
|
Seems that these unicode surrigates are not handled by the sqlite layer.
|
||||||
|
The surrigates are being used because LANG=C does not support
|
||||||
|
unicode. This could also happen when in a (working) utf-8 locale, when
|
||||||
|
the filename is not utf-8 encoded.
|
||||||
|
|
||||||
|
So, need to escape strings containing such surrigates before passing to
|
||||||
|
SQL. In a backwards-compatible way. Done.
|
||||||
|
"""]]
|
Loading…
Reference in a new issue