implemented mkImportableContentsVersioned
This commit is contained in:
parent
1968f6d9c6
commit
55a5d9679a
2 changed files with 94 additions and 66 deletions
109
Remote/S3.hs
109
Remote/S3.hs
|
@ -199,20 +199,20 @@ s3Setup' ss u mcreds c gc
|
|||
writeUUIDFile archiveconfig u info
|
||||
use archiveconfig info
|
||||
|
||||
checkexportimportsafe c' info =
|
||||
checkexportimportsafe' c' info
|
||||
=<< Annex.getState Annex.force
|
||||
checkexportimportsafe' c' info force
|
||||
| force = return ()
|
||||
checkexportimportsafe c' info =
|
||||
unlessM (Annex.getState Annex.force) $
|
||||
checkexportimportsafe' c' info
|
||||
checkexportimportsafe' c' info
|
||||
| versioning info = return ()
|
||||
| exportTree c' && importTree c' = giveup $ unwords
|
||||
[ "Combining exporttree=yes and importtree=yes"
|
||||
, "with an unversioned S3 bucket is not safe;"
|
||||
, "exporting can overwrite other modifications"
|
||||
, "to files in the bucket."
|
||||
, "Recommend you add versioning=yes."
|
||||
, "(Or use --force if you don't mind possibly losing data.)"
|
||||
]
|
||||
| otherwise = when (exportTree c' && importTree c') $
|
||||
giveup $ unwords
|
||||
[ "Combining exporttree=yes and importtree=yes"
|
||||
, "with an unversioned S3 bucket is not safe;"
|
||||
, "exporting can overwrite other modifications"
|
||||
, "to files in the bucket."
|
||||
, "Recommend you add versioning=yes."
|
||||
, "(Or use --force if you don't mind possibly losing data.)"
|
||||
]
|
||||
|
||||
store :: S3HandleVar -> Remote -> S3Info -> Maybe Magic -> Storer
|
||||
store mh r info magic = fileStorer $ \k f p -> withS3HandleOrFail (uuid r) mh $ \h -> do
|
||||
|
@ -479,7 +479,8 @@ listImportableContentsS3 hv r info =
|
|||
}
|
||||
liftIO $ print rsp'
|
||||
continuelistunversioned h (rsp:l) rsp'
|
||||
| otherwise = return (mklistunversioned (reverse (rsp:l)))
|
||||
| otherwise = return $
|
||||
mkImportableContentsUnversioned info (reverse (rsp:l))
|
||||
|
||||
continuelistversioned h l rsp
|
||||
| S3.gbovrIsTruncated rsp = do
|
||||
|
@ -490,23 +491,71 @@ listImportableContentsS3 hv r info =
|
|||
}
|
||||
liftIO $ print rsp
|
||||
continuelistversioned h (rsp:l) rsp'
|
||||
| otherwise = return (mklistversioned (reverse (rsp:l)))
|
||||
| otherwise = do
|
||||
let v = mkImportableContentsVersioned info (reverse (rsp:l))
|
||||
liftIO $ print v
|
||||
return v
|
||||
|
||||
mkImportableContentsUnversioned :: S3Info -> [S3.GetBucketResponse] -> ImportableContents (ContentIdentifier, ByteSize)
|
||||
mkImportableContentsUnversioned info l = ImportableContents
|
||||
{ importableContents = concatMap (mapMaybe extract . S3.gbrContents) l
|
||||
, importableHistory = []
|
||||
}
|
||||
where
|
||||
extract oi = do
|
||||
loc <- bucketImportLocation info $
|
||||
T.unpack $ S3.objectKey oi
|
||||
let sz = S3.objectSize oi
|
||||
let cid = mkS3UnversionedContentIdentifier $
|
||||
S3.objectETag oi
|
||||
return (loc, (cid, sz))
|
||||
|
||||
mkImportableContentsVersioned :: S3Info -> [S3.GetBucketObjectVersionsResponse] -> ImportableContents (ContentIdentifier, ByteSize)
|
||||
mkImportableContentsVersioned info = build . groupfiles
|
||||
where
|
||||
build [] = ImportableContents [] []
|
||||
build l =
|
||||
let (l', v) = latestversion l
|
||||
in ImportableContents
|
||||
{ importableContents = mapMaybe extract v
|
||||
, importableHistory = case build l' of
|
||||
ImportableContents [] [] -> []
|
||||
h -> [h]
|
||||
}
|
||||
|
||||
extract ovi@(S3.ObjectVersion {}) = do
|
||||
loc <- bucketImportLocation info $
|
||||
T.unpack $ S3.oviKey ovi
|
||||
let sz = S3.oviSize ovi
|
||||
let cid = mkS3UnversionedContentIdentifier $
|
||||
S3.oviETag ovi
|
||||
return (loc, (cid, sz))
|
||||
extract (S3.DeleteMarker {}) = Nothing
|
||||
|
||||
mklistunversioned l = ImportableContents
|
||||
{ importableContents =
|
||||
concatMap (mapMaybe go . S3.gbrContents) l
|
||||
, importableHistory = []
|
||||
}
|
||||
-- group files so all versions of a file are in a sublist,
|
||||
-- with the newest first. S3 uses such an order, so it's just a
|
||||
-- matter of breaking up the response list into sublists.
|
||||
groupfiles = groupBy (\a b -> S3.oviKey a == S3.oviKey b)
|
||||
. concatMap S3.gbovrContents
|
||||
|
||||
latestversion [] = ([], [])
|
||||
latestversion ([]:rest) = latestversion rest
|
||||
latestversion l@((first:_old):remainder) =
|
||||
go (S3.oviLastModified first) [first] remainder
|
||||
where
|
||||
go oi = do
|
||||
loc <- bucketImportLocation info $
|
||||
T.unpack $ S3.objectKey oi
|
||||
let sz = S3.objectSize oi
|
||||
let cid = mkS3UnversionedContentIdentifier $
|
||||
S3.objectETag oi
|
||||
return (loc, (cid, sz))
|
||||
go mtime c [] = (removemostrecent mtime l, reverse c)
|
||||
go mtime c ([]:rest) = go mtime c rest
|
||||
go mtime c ((latest:_old):rest) =
|
||||
let !mtime' = max mtime (S3.oviLastModified latest)
|
||||
in go mtime' (latest:c) rest
|
||||
|
||||
mklistversioned l = ImportableContents [] [] -- FIXME
|
||||
removemostrecent _ [] = []
|
||||
removemostrecent mtime ([]:rest) = removemostrecent mtime rest
|
||||
removemostrecent mtime (i@(curr:old):rest)
|
||||
| S3.oviLastModified curr == mtime =
|
||||
old : removemostrecent mtime rest
|
||||
| otherwise =
|
||||
i : removemostrecent mtime rest
|
||||
|
||||
retrieveExportWithContentIdentifierS3 :: S3HandleVar -> S3Info -> ExportLocation -> ContentIdentifier -> FilePath -> Annex (Maybe Key) -> MeterUpdate -> Annex (Maybe Key)
|
||||
retrieveExportWithContentIdentifierS3 hv info loc cid dest mkkey p = undefined
|
||||
|
@ -977,13 +1026,15 @@ mkS3VersionedContentIdentifier :: S3VersionID -> ContentIdentifier
|
|||
mkS3VersionedContentIdentifier (S3VersionID _ v) =
|
||||
ContentIdentifier $ T.encodeUtf8 v
|
||||
|
||||
-- S3 returns etags surrounded by double quotes, and the quotes are
|
||||
-- included here.
|
||||
type S3Etag = T.Text
|
||||
|
||||
-- For an unversioned bucket, the S3Etag is instead used as the
|
||||
-- ContentIdentifier.
|
||||
mkS3UnversionedContentIdentifier :: S3Etag -> ContentIdentifier
|
||||
mkS3UnversionedContentIdentifier t =
|
||||
ContentIdentifier $ T.encodeUtf8 t
|
||||
ContentIdentifier $ T.encodeUtf8 $ T.filter (/= '"') t
|
||||
|
||||
setS3VersionID :: S3Info -> UUID -> Key -> Maybe S3VersionID -> Annex ()
|
||||
setS3VersionID info u k vid
|
||||
|
|
|
@ -237,49 +237,26 @@ But l needs to be the same as the l above to avoid git trees proliferation.
|
|||
|
||||
What is the algorythm here?
|
||||
|
||||
It's got two parts, the first finds the current file tree that's
|
||||
in the bucket:
|
||||
1. Build a list of files with historical versions ([[a]]).
|
||||
2. Extract a snapshot from the list
|
||||
3. Remove too new versions from the list
|
||||
4. Recurse with the new list.
|
||||
|
||||
1. Remove the first item from the list, and add it to the file tree.
|
||||
(This is the most recently changed item.)
|
||||
(If the item is a deletion, remove from list but don't add anything to
|
||||
file tree.)
|
||||
2. Skip forward past past versions of the file from #1 to another file.
|
||||
3. Go through the rest of the list, and the first time a file is seen, add
|
||||
it to the file tree. (Unless it's a deletion.)
|
||||
Don't remove these from the list, unless their
|
||||
modification time is the same as the modification time of the item in
|
||||
#1.
|
||||
Extracting a snapshot:
|
||||
|
||||
The second part takes the remaining list from the first part, and
|
||||
recursively generates past file trees:
|
||||
Map over the list, taking the head version of each item and tracking
|
||||
the most recent modification time. Add the filenames to a snapshot list
|
||||
(unless the item is a deletion).
|
||||
|
||||
1. Find the most recently modified item in the list.
|
||||
2. Remove the most recently modified item from the list, and add it to the
|
||||
file tree.
|
||||
(If the item is a deletion, remove from list but don't add anything to
|
||||
file tree.)
|
||||
3. Skip forward past past versions of the file from #1 to another file.
|
||||
4. Go through the rest of the list, and the first time a file is seen, add
|
||||
it to the file tree. (Unless it's a deletion.)
|
||||
Don't remove these from the list, unless their
|
||||
modification time is the same as the modification time of the item in
|
||||
#1.
|
||||
5. The file tree now corresponds to the most recent past version of the S3
|
||||
bucket, so make a ImportableContents that has it as the
|
||||
importableContents. For the importableHistory, recurse this function
|
||||
again, with the remaining contents of the list.
|
||||
Removing too new versions:
|
||||
|
||||
The only expensive op here is finding the most recently modified item
|
||||
in the list. There are only two possibilities for where that is in the
|
||||
list:
|
||||
Map over the list, and when the head version of a file matches the most
|
||||
recent modification time, pop it off.
|
||||
|
||||
1. It may be the first item in the list.
|
||||
2. It may be the first mention of some other file than the first
|
||||
one mentioned in the list.
|
||||
This results in a list that is only versions before the snapshot.
|
||||
|
||||
So that only needs a small scan forward to the next file,
|
||||
and a single date comparison.
|
||||
Overall this is perhaps a bit better than O(n^2) because the size of the list
|
||||
decreases as it goes?
|
||||
|
||||
---
|
||||
|
||||
|
|
Loading…
Reference in a new issue