avoid storing same filename repeatedly in versioned import from S3
Logically, this should make it need a lot less memory when files have been changed many times. In my tests, it didn't seem to change memory use at all. Unsure why, it is working. It's possible the Response is not getting garbage collected due to pinning. But as far as I can see, all parts of it that are retained get copied in a way that won't keep the whole thing pinned in memory.
This commit is contained in:
parent
dc5bf24823
commit
51b2d6d8c5
1 changed files with 29 additions and 15 deletions
44
Remote/S3.hs
44
Remote/S3.hs
|
@ -582,7 +582,7 @@ listImportableContentsS3 hv r info c =
|
||||||
| versioning info = do
|
| versioning info = do
|
||||||
rsp <- sendS3Handle h $
|
rsp <- sendS3Handle h $
|
||||||
S3.getBucketObjectVersions (bucket info)
|
S3.getBucketObjectVersions (bucket info)
|
||||||
continuelistversioned 0 h [] rsp
|
continuelistversioned Nothing h [] rsp
|
||||||
| otherwise = do
|
| otherwise = do
|
||||||
rsp <- sendS3Handle h $
|
rsp <- sendS3Handle h $
|
||||||
(S3.getBucket (bucket info))
|
(S3.getBucket (bucket info))
|
||||||
|
@ -608,11 +608,13 @@ listImportableContentsS3 hv r info c =
|
||||||
Nothing -> nomore
|
Nothing -> nomore
|
||||||
| otherwise = nomore
|
| otherwise = nomore
|
||||||
where
|
where
|
||||||
nomore = return $
|
nomore = do
|
||||||
mkImportableContentsUnversioned
|
l' <- extractFromResourceT $
|
||||||
(reverse (extractunversioned rsp:l))
|
extractunversioned rsp
|
||||||
|
return $ mkImportableContentsUnversioned
|
||||||
|
(reverse (l':l))
|
||||||
|
|
||||||
continuelistversioned n h l rsp
|
continuelistversioned reuse h l rsp
|
||||||
| S3.gbovrIsTruncated rsp = do
|
| S3.gbovrIsTruncated rsp = do
|
||||||
rsp' <- sendS3Handle h $
|
rsp' <- sendS3Handle h $
|
||||||
(S3.getBucketObjectVersions (bucket info))
|
(S3.getBucketObjectVersions (bucket info))
|
||||||
|
@ -620,12 +622,14 @@ listImportableContentsS3 hv r info c =
|
||||||
, S3.gbovVersionIdMarker = S3.gbovrNextVersionIdMarker rsp
|
, S3.gbovVersionIdMarker = S3.gbovrNextVersionIdMarker rsp
|
||||||
, S3.gbovPrefix = fileprefix
|
, S3.gbovPrefix = fileprefix
|
||||||
}
|
}
|
||||||
l' <- extractFromResourceT $
|
(l', reuse') <- extractFromResourceT $
|
||||||
extractversioned rsp
|
extractversioned reuse rsp
|
||||||
continuelistversioned (length l' + n) h (l':l) rsp'
|
continuelistversioned reuse' h (l':l) rsp'
|
||||||
| otherwise = return $
|
| otherwise = do
|
||||||
mkImportableContentsVersioned
|
(l', _) <- extractFromResourceT $
|
||||||
(reverse (extractversioned rsp:l))
|
extractversioned reuse rsp
|
||||||
|
return $ mkImportableContentsVersioned
|
||||||
|
(reverse (l':l))
|
||||||
|
|
||||||
extractunversioned = mapMaybe extractunversioned' . S3.gbrContents
|
extractunversioned = mapMaybe extractunversioned' . S3.gbrContents
|
||||||
extractunversioned' oi = do
|
extractunversioned' oi = do
|
||||||
|
@ -635,14 +639,24 @@ listImportableContentsS3 hv r info c =
|
||||||
let cid = mkS3UnversionedContentIdentifier $ S3.objectETag oi
|
let cid = mkS3UnversionedContentIdentifier $ S3.objectETag oi
|
||||||
return (loc, (cid, sz))
|
return (loc, (cid, sz))
|
||||||
|
|
||||||
extractversioned = mapMaybe extractversioned' . S3.gbovrContents
|
extractversioned reuse = extractversioned' reuse . S3.gbovrContents
|
||||||
extractversioned' ovi@(S3.ObjectVersion {}) = do
|
extractversioned' reuse [] = ([], reuse)
|
||||||
|
extractversioned' reuse (x:xs) = case extractversioned'' reuse x of
|
||||||
|
Just (v, reuse') ->
|
||||||
|
let (l, reuse'') = extractversioned' reuse' xs
|
||||||
|
in (v:l, reuse'')
|
||||||
|
Nothing -> extractversioned' reuse xs
|
||||||
|
extractversioned'' reuse ovi@(S3.ObjectVersion {}) = do
|
||||||
loc <- bucketImportLocation info $
|
loc <- bucketImportLocation info $
|
||||||
T.unpack $ S3.oviKey ovi
|
T.unpack $ S3.oviKey ovi
|
||||||
|
-- Avoid storing the same filename in memory repeatedly.
|
||||||
|
let loc' = case reuse of
|
||||||
|
Just reuseloc | reuseloc == loc -> reuseloc
|
||||||
|
_ -> loc
|
||||||
let sz = S3.oviSize ovi
|
let sz = S3.oviSize ovi
|
||||||
let cid = mkS3VersionedContentIdentifier' ovi
|
let cid = mkS3VersionedContentIdentifier' ovi
|
||||||
return ((loc, (cid, sz)), S3.oviLastModified ovi)
|
return (((loc', (cid, sz)), S3.oviLastModified ovi), Just loc')
|
||||||
extractversioned' (S3.DeleteMarker {}) = Nothing
|
extractversioned'' _ (S3.DeleteMarker {}) = Nothing
|
||||||
|
|
||||||
mkImportableContentsUnversioned :: [[(ImportLocation, (ContentIdentifier, ByteSize))]] -> ImportableContents (ContentIdentifier, ByteSize)
|
mkImportableContentsUnversioned :: [[(ImportLocation, (ContentIdentifier, ByteSize))]] -> ImportableContents (ContentIdentifier, ByteSize)
|
||||||
mkImportableContentsUnversioned l = ImportableContents
|
mkImportableContentsUnversioned l = ImportableContents
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue