update for things already fixed in this branch
This commit is contained in:
parent
9085a2cfec
commit
09c7cbbaa8
3 changed files with 22 additions and 100 deletions
|
@ -1,6 +1,10 @@
|
|||
git-annex (7.20191025) UNRELEASED; urgency=medium
|
||||
|
||||
* benchmark: Add --databases to benchmark sqlite databases.
|
||||
* Add some missing indexes to sqlite databases. This will speed up
|
||||
some things involving export and import remotes, and git-annex smudge.
|
||||
* Improved serialization of filenames and keys to the sqlite databases,
|
||||
avoiding some encoding problems.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Tue, 29 Oct 2019 15:13:03 -0400
|
||||
|
||||
|
|
|
@ -32,3 +32,7 @@ TransferScanner crashed: fd:47: commitBuffer: invalid argument (invalid characte
|
|||
"""]]
|
||||
|
||||
[[!meta title="v6 repository problems with filename encoding when not in unicode locale"]]
|
||||
|
||||
> This bug grew to encompass several problems. I believe all of them are
|
||||
> now [[fixed|done]]. Please reopen a new bug report if you see a problem like this
|
||||
> with current git-annex. --[[Joey]]
|
||||
|
|
|
@ -10,12 +10,18 @@ process.
|
|||
Eg, store the size and allowable mtimes in a separate table that is
|
||||
queried when necessary.
|
||||
|
||||
* Several selects were not able to use indexes, so would be slow.
|
||||
|
||||
Fixed by adding indexes.
|
||||
|
||||
* Database.Types has some suboptimal encodings for Key and InodeCache.
|
||||
They are both slow due to being implemented using String
|
||||
(which may be fixable w/o changing the DB schema),
|
||||
and the VARCHARs they generate are longer than necessary
|
||||
since they look like eg `SKey "whatever"` and `I "whatever"`
|
||||
|
||||
Fixed.
|
||||
|
||||
* SFilePath is stored efficiently, and has to be a String anyway,
|
||||
(until ByteStringFilePath is used)
|
||||
but since it's stored as a VARCHAR, which sqlite interprets using the
|
||||
|
@ -35,6 +41,8 @@ process.
|
|||
And it seems likely that a query by filename would fail if the filename
|
||||
was in the database but with a different encoding.
|
||||
|
||||
Fixed by converting to blob.
|
||||
|
||||
* IKey could fail to round-trip as well, when a Key contains something
|
||||
(eg, a filename extension) that is not valid in the current locale,
|
||||
for similar reasons to SFilePath. Using BLOB would be better.
|
||||
|
@ -49,6 +57,12 @@ process.
|
|||
INSERT INTO associated VALUES(8,'SHA256E-s30--59594eea8d6f64156b3ce6530cc3a661739abf2a0b72443de8683c34b0b19344.ü','foo.ü');
|
||||
INSERT INTO associated VALUES(9,'SHA256E-s30--59594eea8d6f64156b3ce6530cc3a661739abf2a0b72443de8683c34b0b19344.<2E><>','"foo.\56515\56508"');
|
||||
|
||||
Fixed by converting to blob.
|
||||
|
||||
----
|
||||
|
||||
migration
|
||||
|
||||
> Investigated this in more detail, and I can't find a way to
|
||||
> solve the encoding problem other than changing the encoding
|
||||
> SKey, IKey, and SFilePath in a non-backwards-compatible way.
|
||||
|
@ -102,103 +116,3 @@ process.
|
|||
> And, what about users who use a mix of old and new git-annex versions?
|
||||
>
|
||||
> Seems this needs an annex.version bump from v7 to v8.
|
||||
|
||||
----
|
||||
|
||||
[[!format patch """
|
||||
diff --git a/Database/Types.hs b/Database/Types.hs
|
||||
index f08cf4e9d..3e9c9e267 100644
|
||||
--- a/Database/Types.hs
|
||||
+++ b/Database/Types.hs
|
||||
@@ -14,11 +14,12 @@ import Database.Persist.TH
|
||||
import Database.Persist.Class hiding (Key)
|
||||
import Database.Persist.Sql hiding (Key)
|
||||
import Data.Maybe
|
||||
-import Data.Char
|
||||
import qualified Data.ByteString as S
|
||||
+import qualified Data.ByteString.Lazy as L
|
||||
import qualified Data.Text as T
|
||||
|
||||
import Utility.PartialPrelude
|
||||
+import Utility.FileSystemEncoding
|
||||
import Key
|
||||
import Utility.InodeCache
|
||||
import Git.Types (Ref(..))
|
||||
@@ -37,23 +38,18 @@ fromSKey (SKey s) = fromMaybe (error $ "bad serialized Key " ++ s) (deserializeK
|
||||
|
||||
derivePersistField "SKey"
|
||||
|
||||
--- A Key index. More efficient than SKey, but its Read instance does not
|
||||
--- work when it's used in any kind of complex data structure.
|
||||
-newtype IKey = IKey String
|
||||
-
|
||||
-instance Read IKey where
|
||||
- readsPrec _ s = [(IKey s, "")]
|
||||
-
|
||||
-instance Show IKey where
|
||||
- show (IKey s) = s
|
||||
+-- A Key index. More efficient than SKey.
|
||||
+newtype IKey = IKey S.ByteString
|
||||
+ deriving (Eq, Show, PersistField, PersistFieldSql)
|
||||
|
||||
+-- FIXME: toStrict copies, not efficient
|
||||
toIKey :: Key -> IKey
|
||||
-toIKey = IKey . serializeKey
|
||||
+toIKey = IKey . L.toStrict . serializeKey'
|
||||
|
||||
fromIKey :: IKey -> Key
|
||||
-fromIKey (IKey s) = fromMaybe (error $ "bad serialized Key " ++ s) (deserializeKey s)
|
||||
-
|
||||
-derivePersistField "IKey"
|
||||
+fromIKey (IKey b) = fromMaybe
|
||||
+ (error $ "bad serialized Key " ++ show b)
|
||||
+ (deserializeKey' b)
|
||||
|
||||
-- A serialized InodeCache
|
||||
newtype SInodeCache = I String
|
||||
@@ -67,39 +63,15 @@ fromSInodeCache (I s) = fromMaybe (error $ "bad serialized InodeCache " ++ s) (r
|
||||
|
||||
derivePersistField "SInodeCache"
|
||||
|
||||
--- A serialized FilePath.
|
||||
---
|
||||
--- Not all unicode characters round-trip through sqlite. In particular,
|
||||
--- surrigate code points do not. So, escape the FilePath. But, only when
|
||||
--- it contains such characters.
|
||||
-newtype SFilePath = SFilePath String
|
||||
-
|
||||
--- Note that Read instance does not work when used in any kind of complex
|
||||
--- data structure.
|
||||
-instance Read SFilePath where
|
||||
- readsPrec _ s = [(SFilePath s, "")]
|
||||
-
|
||||
-instance Show SFilePath where
|
||||
- show (SFilePath s) = s
|
||||
+-- A serialized FilePath. Stored as a ByteString to avoid encoding problems.
|
||||
+newtype SFilePath = SFilePath S.ByteString
|
||||
+ deriving (Eq, Show, PersistField, PersistFieldSql)
|
||||
|
||||
toSFilePath :: FilePath -> SFilePath
|
||||
-toSFilePath s@('"':_) = SFilePath (show s)
|
||||
-toSFilePath s
|
||||
- | any needsescape s = SFilePath (show s)
|
||||
- | otherwise = SFilePath s
|
||||
- where
|
||||
- needsescape c = case generalCategory c of
|
||||
- Surrogate -> True
|
||||
- PrivateUse -> True
|
||||
- NotAssigned -> True
|
||||
- _ -> False
|
||||
+toSFilePath = SFilePath . encodeBS
|
||||
|
||||
fromSFilePath :: SFilePath -> FilePath
|
||||
-fromSFilePath (SFilePath s@('"':_)) =
|
||||
- fromMaybe (error "bad serialized SFilePath " ++ s) (readish s)
|
||||
-fromSFilePath (SFilePath s) = s
|
||||
-
|
||||
-derivePersistField "SFilePath"
|
||||
+fromSFilePath (SFilePath b) = decodeBS b
|
||||
|
||||
-- A serialized Ref
|
||||
newtype SRef = SRef Ref
|
||||
"""]]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue