From 1610d9477698c6be1bc27ce7782a89b066771656 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 29 Sep 2020 13:00:41 -0400 Subject: [PATCH] addurl: Avoid a redundant git ignores check for speed Ensure that checkCanAdd is used everywhere a file is added to git, so git add is run with -f, presumably avoiding the work it would usually do to check ignores. --- CHANGELOG | 1 + Command/AddUrl.hs | 50 ++++++++-------- Command/ImportFeed.hs | 57 +++++++++++-------- ..._8e931e54738640cecf0bbaf80a3e8660._comment | 8 +++ 4 files changed, 69 insertions(+), 47 deletions(-) create mode 100644 doc/todo/make_git_check-ignore_less___34__cpu_heavy__34___while_addurl_--batch_--fast___63__/comment_5_8e931e54738640cecf0bbaf80a3e8660._comment diff --git a/CHANGELOG b/CHANGELOG index ad4b04d294..905f163944 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -30,6 +30,7 @@ git-annex (8.20200909) UNRELEASED; urgency=medium * sync: When run without --content, import without copying from importtree=yes directory special remotes. (Other special remotes may support this later as well.) + * addurl: Avoid a redundant git ignores check for speed. -- Joey Hess Mon, 14 Sep 2020 18:34:37 -0400 diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index b77ad06111..3267369965 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -180,12 +180,12 @@ performRemote addunlockedmatcher r o uri file sz = ifAnnexed (toRawFilePath file geturi = next $ isJust <$> downloadRemoteFile addunlockedmatcher r (downloadOptions o) uri file sz downloadRemoteFile :: AddUnlockedMatcher -> Remote -> DownloadOptions -> URLString -> FilePath -> Maybe Integer -> Annex (Maybe Key) -downloadRemoteFile addunlockedmatcher r o uri file sz = checkCanAdd o file $ do +downloadRemoteFile addunlockedmatcher r o uri file sz = checkCanAdd o file $ \canadd -> do let urlkey = Backend.URL.fromUrl uri sz createWorkTreeDirectory (parentDir file) ifM (Annex.getState Annex.fast <||> pure (relaxedOption o)) ( do - addWorkTree o addunlockedmatcher (Remote.uuid r) loguri file urlkey Nothing + addWorkTree canadd addunlockedmatcher (Remote.uuid r) loguri file urlkey Nothing return (Just urlkey) , do -- Set temporary url for the urlkey @@ -194,7 +194,7 @@ downloadRemoteFile addunlockedmatcher r o uri file sz = checkCanAdd o file $ do setTempUrl urlkey loguri let downloader = \dest p -> fst <$> Remote.verifiedAction (Remote.retrieveKeyFile r urlkey af dest p) - ret <- downloadWith o addunlockedmatcher downloader urlkey (Remote.uuid r) loguri file + ret <- downloadWith canadd addunlockedmatcher downloader urlkey (Remote.uuid r) loguri file removeTempUrl urlkey return ret ) @@ -311,10 +311,10 @@ downloadWeb addunlockedmatcher o url urlinfo file = ( tryyoutubedl tmp , normalfinish tmp ) - normalfinish tmp = checkCanAdd o file $ do + normalfinish tmp = checkCanAdd o file $ \canadd -> do showDestinationFile file createWorkTreeDirectory (parentDir file) - Just <$> finishDownloadWith o addunlockedmatcher tmp webUUID url file + Just <$> finishDownloadWith canadd addunlockedmatcher tmp webUUID url file -- Ask youtube-dl what filename it will download first, -- so it's only used when the file contains embedded media. tryyoutubedl tmp = youtubeDlFileNameHtmlOnly url >>= \case @@ -332,9 +332,9 @@ downloadWeb addunlockedmatcher o url urlinfo file = youtubeDl url workdir >>= \case Right (Just mediafile) -> do cleanuptmp - checkCanAdd o dest $ do + checkCanAdd o dest $ \canadd -> do showDestinationFile dest - addWorkTree o addunlockedmatcher webUUID mediaurl dest mediakey (Just mediafile) + addWorkTree canadd addunlockedmatcher webUUID mediaurl dest mediakey (Just mediafile) return $ Just mediakey Right Nothing -> normalfinish tmp Left msg -> do @@ -377,13 +377,13 @@ showDestinationFile file = do - Downloads the url, sets up the worktree file, and returns the - real key. -} -downloadWith :: DownloadOptions -> AddUnlockedMatcher -> (FilePath -> MeterUpdate -> Annex Bool) -> Key -> UUID -> URLString -> FilePath -> Annex (Maybe Key) -downloadWith o addunlockedmatcher downloader dummykey u url file = +downloadWith :: CanAddFile -> AddUnlockedMatcher -> (FilePath -> MeterUpdate -> Annex Bool) -> Key -> UUID -> URLString -> FilePath -> Annex (Maybe Key) +downloadWith canadd addunlockedmatcher downloader dummykey u url file = go =<< downloadWith' downloader dummykey u url afile where afile = AssociatedFile (Just (toRawFilePath file)) go Nothing = return Nothing - go (Just tmp) = Just <$> finishDownloadWith o addunlockedmatcher tmp u url file + go (Just tmp) = Just <$> finishDownloadWith canadd addunlockedmatcher tmp u url file {- Like downloadWith, but leaves the dummy key content in - the returned location. -} @@ -399,8 +399,8 @@ downloadWith' downloader dummykey u url afile = then return (Just tmp) else return Nothing -finishDownloadWith :: DownloadOptions -> AddUnlockedMatcher -> FilePath -> UUID -> URLString -> FilePath -> Annex Key -finishDownloadWith o addunlockedmatcher tmp u url file = do +finishDownloadWith :: CanAddFile -> AddUnlockedMatcher -> FilePath -> UUID -> URLString -> FilePath -> Annex Key +finishDownloadWith canadd addunlockedmatcher tmp u url file = do backend <- chooseBackend file let source = KeySource { keyFilename = toRawFilePath file @@ -408,7 +408,7 @@ finishDownloadWith o addunlockedmatcher tmp u url file = do , inodeCache = Nothing } key <- fst <$> genKey source nullMeterUpdate backend - addWorkTree o addunlockedmatcher u url file key (Just tmp) + addWorkTree canadd addunlockedmatcher u url file key (Just tmp) return key {- Adds the url size to the Key. -} @@ -418,8 +418,8 @@ addSizeUrlKey urlinfo key = alterKey key $ \d -> d } {- Adds worktree file to the repository. -} -addWorkTree :: DownloadOptions -> AddUnlockedMatcher -> UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex () -addWorkTree o addunlockedmatcher u url file key mtmp = case mtmp of +addWorkTree :: CanAddFile -> AddUnlockedMatcher -> UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex () +addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of Nothing -> go Just tmp -> do -- Move to final location for large file check. @@ -435,20 +435,22 @@ addWorkTree o addunlockedmatcher u url file key mtmp = case mtmp of -- than the work tree file. liftIO $ renameFile file tmp go - else void $ Command.Add.addSmall - (checkGitIgnoreOption o) - (toRawFilePath file) + else void $ Command.Add.addSmall noci (toRawFilePath file) where go = do maybeShowJSON $ JSONChunk [("key", serializeKey key)] setUrlPresent key url logChange key u InfoPresent - ifM (addAnnexedFile (checkGitIgnoreOption o) addunlockedmatcher file key mtmp) + ifM (addAnnexedFile noci addunlockedmatcher file key mtmp) ( do when (isJust mtmp) $ logStatus key InfoPresent , maybe noop (\tmp -> pruneTmpWorkDirBefore tmp (liftIO . nukeFile)) mtmp ) + + -- git does not need to check ignores, because that has already + -- been done, as witnessed by the CannAddFile. + noci = CheckGitIgnore False nodownloadWeb :: AddUnlockedMatcher -> DownloadOptions -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key) nodownloadWeb addunlockedmatcher o url urlinfo file @@ -475,10 +477,10 @@ youtubeDlDestFile o destfile mediafile | otherwise = takeFileName mediafile nodownloadWeb' :: DownloadOptions -> AddUnlockedMatcher -> URLString -> Key -> FilePath -> Annex (Maybe Key) -nodownloadWeb' o addunlockedmatcher url key file = checkCanAdd o file $ do +nodownloadWeb' o addunlockedmatcher url key file = checkCanAdd o file $ \canadd -> do showDestinationFile file createWorkTreeDirectory (parentDir file) - addWorkTree o addunlockedmatcher webUUID url file key Nothing + addWorkTree canadd addunlockedmatcher webUUID url file key Nothing return (Just key) url2file :: URI -> Maybe Int -> Int -> FilePath @@ -510,7 +512,9 @@ adjustFile o = addprefix . addsuffix addprefix f = maybe f (++ f) (prefixOption o) addsuffix f = maybe f (f ++) (suffixOption o) -checkCanAdd :: DownloadOptions -> FilePath -> Annex (Maybe a) -> Annex (Maybe a) +data CanAddFile = CanAddFile + +checkCanAdd :: DownloadOptions -> FilePath -> (CanAddFile -> Annex (Maybe a)) -> Annex (Maybe a) checkCanAdd o file a = ifM (isJust <$> (liftIO $ catchMaybeIO $ getSymbolicLinkStatus file)) ( do warning $ file ++ " already exists; not overwriting" @@ -519,6 +523,6 @@ checkCanAdd o file a = ifM (isJust <$> (liftIO $ catchMaybeIO $ getSymbolicLinkS ( do warning $ "not adding " ++ file ++ " which is .gitignored (use --no-check-gitignore to override)" return Nothing - , a + , a CanAddFile ) ) diff --git a/Command/ImportFeed.hs b/Command/ImportFeed.hs index 16281b4291..9dd379019c 100644 --- a/Command/ImportFeed.hs +++ b/Command/ImportFeed.hs @@ -34,7 +34,7 @@ import Logs.File import qualified Utility.Format import Utility.Tmp import Utility.Metered -import Command.AddUrl (addUrlFile, downloadRemoteFile, parseDownloadOptions, DownloadOptions(..)) +import Command.AddUrl (addUrlFile, downloadRemoteFile, parseDownloadOptions, DownloadOptions(..), checkCanAdd) import Annex.UUID import Backend.URL (fromUrl) import Annex.Content @@ -198,28 +198,28 @@ performDownload addunlockedmatcher opts cache todownload = case location todownl -- don't use youtube-dl , rawOption = True } - let go urlinfo = maybeToList <$> addUrlFile addunlockedmatcher dlopts url urlinfo f + let go urlinfo = Just . maybeToList <$> addUrlFile addunlockedmatcher dlopts url urlinfo f if relaxedOption (downloadOptions opts) then go Url.assumeUrlExists else Url.withUrlOptions (Url.getUrlInfo url) >>= \case Right urlinfo -> go urlinfo Left err -> do warning err - return [] + return (Just []) else do res <- tryNonAsync $ maybe (error $ "unable to checkUrl of " ++ Remote.name r) (flip id url) (Remote.checkUrl r) case res of - Left _ -> return [] + Left _ -> return (Just []) Right (UrlContents sz _) -> - maybeToList <$> + Just . maybeToList <$> downloadRemoteFile addunlockedmatcher r (downloadOptions opts) url f sz Right (UrlMulti l) -> do kl <- forM l $ \(url', sz, subf) -> downloadRemoteFile addunlockedmatcher r (downloadOptions opts) url' (f sanitizeFilePath subf) sz - return $ if all isJust kl + return $ Just $ if all isJust kl then catMaybes kl else [] @@ -257,19 +257,26 @@ performDownload addunlockedmatcher opts cache todownload = case location todownl Nothing -> return True Just f -> do showStartOther "addurl" (Just url) (SeekInput []) - ks <- getter f - if null ks - then do + getter f >>= \case + Just ks + -- Download problem. + | null ks -> do + showEndFail + checkFeedBroken (feedurl todownload) + | otherwise -> do + forM_ ks $ \key -> + ifM (annexGenMetaData <$> Annex.getGitConfig) + ( addMetaData key $ extractMetaData todownload + , addMetaData key $ minimalMetaData todownload + ) + showEndOk + return True + -- Was not able to add anything, + -- but not because of a download + -- problem. + Nothing -> do showEndFail - checkFeedBroken (feedurl todownload) - else do - forM_ ks $ \key -> - ifM (annexGenMetaData <$> Annex.getGitConfig) - ( addMetaData key $ extractMetaData todownload - , addMetaData key $ minimalMetaData todownload - ) - showEndOk - return True + return False {- Find a unique filename to save the url to. - If the file exists, prefixes it with a number. @@ -306,9 +313,10 @@ performDownload addunlockedmatcher opts cache todownload = case location todownl let ext = case takeExtension mediafile of [] -> ".m" s -> s - ok <- rundownload linkurl ext $ \f -> do - addWorkTree (downloadOptions opts) addunlockedmatcher webUUID mediaurl f mediakey (Just mediafile) - return [mediakey] + ok <- rundownload linkurl ext $ \f -> + checkCanAdd (downloadOptions opts) f $ \canadd -> do + addWorkTree canadd addunlockedmatcher webUUID mediaurl f mediakey (Just mediafile) + return (Just [mediakey]) return (Just ok) -- youtude-dl didn't support it, so -- download it as if the link were @@ -325,9 +333,10 @@ performDownload addunlockedmatcher opts cache todownload = case location todownl addmediafast linkurl mediaurl mediakey = ifM (pure (not (rawOption (downloadOptions opts))) <&&> youtubeDlSupported linkurl) - ( rundownload linkurl ".m" $ \f -> do - addWorkTree (downloadOptions opts) addunlockedmatcher webUUID mediaurl f mediakey Nothing - return [mediakey] + ( rundownload linkurl ".m" $ \f -> + checkCanAdd (downloadOptions opts) f $ \canadd -> do + addWorkTree canadd addunlockedmatcher webUUID mediaurl f mediakey Nothing + return (Just [mediakey]) , performDownload addunlockedmatcher opts cache todownload { location = Enclosure linkurl } ) diff --git a/doc/todo/make_git_check-ignore_less___34__cpu_heavy__34___while_addurl_--batch_--fast___63__/comment_5_8e931e54738640cecf0bbaf80a3e8660._comment b/doc/todo/make_git_check-ignore_less___34__cpu_heavy__34___while_addurl_--batch_--fast___63__/comment_5_8e931e54738640cecf0bbaf80a3e8660._comment new file mode 100644 index 0000000000..ade7b88321 --- /dev/null +++ b/doc/todo/make_git_check-ignore_less___34__cpu_heavy__34___while_addurl_--batch_--fast___63__/comment_5_8e931e54738640cecf0bbaf80a3e8660._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 5""" + date="2020-09-29T16:58:18Z" + content=""" +Implemented the git add -f optimisation. Unsure how much of an optimisation +it really is tho. +"""]]