From 922621301ae750e9ee86807ae21709e2e5b2d352 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 17 Sep 2020 17:27:42 -0400 Subject: [PATCH] Serialize use of C magic library, which is not thread safe. This fixes failures uploading to S3 when using -J. This commit was sponsored by Denis Dzyubenko on Patreon. --- Annex/Magic.hs | 18 +++++++++++++++-- CHANGELOG | 2 ++ ..._d2943f675bbfe86b78b30e4fb17b7fb4._comment | 20 +++++++++++++++++++ 3 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 doc/bugs/concurrent_git-annex-copy_to_s3_special_remote_fails/comment_7_d2943f675bbfe86b78b30e4fb17b7fb4._comment diff --git a/Annex/Magic.hs b/Annex/Magic.hs index 8569381f9e..c408cd50d0 100644 --- a/Annex/Magic.hs +++ b/Annex/Magic.hs @@ -1,6 +1,6 @@ {- Interface to libmagic - - - Copyright 2019 Joey Hess + - Copyright 2019-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -21,6 +21,8 @@ import Control.Monad.IO.Class #ifdef WITH_MAGICMIME import Magic import Utility.Env +import Control.Concurrent +import System.IO.Unsafe (unsafePerformIO) import Common #else type Magic = () @@ -41,7 +43,7 @@ initMagicMime = return Nothing getMagicMime :: Magic -> FilePath -> IO (Maybe (MimeType, MimeEncoding)) #ifdef WITH_MAGICMIME -getMagicMime m f = Just . parse <$> magicFile m f +getMagicMime m f = Just . parse <$> magicConcurrentSafe (magicFile m f) where parse s = let (mimetype, rest) = separate (== ';') s @@ -58,3 +60,15 @@ getMagicMimeType m f = liftIO $ fmap fst <$> getMagicMime m f getMagicMimeEncoding :: MonadIO m => Magic -> FilePath -> m(Maybe MimeEncoding) getMagicMimeEncoding m f = liftIO $ fmap snd <$> getMagicMime m f + +#ifdef WITH_MAGICMIME +{-# NOINLINE mutex #-} +mutex :: MVar () +mutex = unsafePerformIO $ newMVar () + +-- Work around a bug, the library is not concurrency safe and will +-- sometimes access the wrong memory if multiple ones are called at the +-- same time. +magicConcurrentSafe :: IO a -> IO a +magicConcurrentSafe = bracket_ (takeMVar mutex) (putMVar mutex ()) +#endif diff --git a/CHANGELOG b/CHANGELOG index 34c033d83a..0a88a5a030 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -9,6 +9,8 @@ git-annex (8.20200909) UNRELEASED; urgency=medium to determine which batch request each response corresponds to. * aws-0.22 improved its support for setting etags, which improves support for versioned S3 buckets. + * Serialize use of C magic library, which is not thread safe. + This fixes failures uploading to S3 when using -J. -- Joey Hess Mon, 14 Sep 2020 18:34:37 -0400 diff --git a/doc/bugs/concurrent_git-annex-copy_to_s3_special_remote_fails/comment_7_d2943f675bbfe86b78b30e4fb17b7fb4._comment b/doc/bugs/concurrent_git-annex-copy_to_s3_special_remote_fails/comment_7_d2943f675bbfe86b78b30e4fb17b7fb4._comment new file mode 100644 index 0000000000..9331b32306 --- /dev/null +++ b/doc/bugs/concurrent_git-annex-copy_to_s3_special_remote_fails/comment_7_d2943f675bbfe86b78b30e4fb17b7fb4._comment @@ -0,0 +1,20 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 7""" + date="2020-09-17T21:03:27Z" + content=""" +I instrumented git-annex to output the content-type it's sending, +and it IS corrupted before hitting the http stack. It seems that libmagic +is returning garbage. + +Perhaps it's not thread safe? If so, it might be *writing* to the wrong +location too in some cases, so could explain all this weird behavior. +And S3 remote is almost the only part of git-annex that uses libmagic, and the +only part that uses it concurrently... + +So, I added a mutex around uses of it. Problem went away. Bug sent to +library maintainer, and for now I hope this fixes the problem. + +If you were seeing hangs, crashes, etc, please try with a git-annex +autobuild made after this comment.. +"""]]