git-annex/Annex/Link.hs

465 lines
16 KiB
Haskell
Raw Normal View History

fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
{- git-annex links to content
-
- On file systems that support them, symlinks are used.
-
- On other filesystems, git instead stores the symlink target in a regular
- file.
-
- Pointer files are used instead of symlinks for unlocked files.
-
- Copyright 2013-2022 Joey Hess <id@joeyh.name>
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
-
- Licensed under the GNU AGPL version 3 or higher.
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
-}
{-# LANGUAGE CPP, BangPatterns, OverloadedStrings #-}
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
module Annex.Link where
import Annex.Common
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
import qualified Annex
import qualified Annex.Queue
import qualified Git.Queue
import qualified Git.UpdateIndex
import qualified Git.Index
import qualified Git.LockFile
import qualified Git.Env
import qualified Git
add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project
2022-09-23 18:38:59 +00:00
import Logs.Restage
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
import Git.Types
import Git.FilePath
import Git.Config
import Annex.HashObject
import Annex.InodeSentinal
2020-08-26 16:20:56 +00:00
import Annex.PidLock
import Utility.FileMode
import Utility.InodeCache
import Utility.Tmp.Dir
import Utility.CopyFile
import qualified Database.Keys.Handle
import qualified Utility.RawFilePath as R
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L
import qualified System.FilePath.ByteString as P
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
type LinkTarget = S.ByteString
2013-02-18 06:35:38 +00:00
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
{- Checks if a file is a link to a key. -}
isAnnexLink :: RawFilePath -> Annex (Maybe Key)
isAnnexLink file = maybe Nothing parseLinkTargetOrPointer <$> getAnnexLinkTarget file
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
{- Gets the link target of a symlink.
-
- On a filesystem that does not support symlinks, fall back to getting the
- link target by looking inside the file.
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
-
- Returns Nothing if the file is not a symlink, or not a link to annex
- content.
-}
getAnnexLinkTarget :: RawFilePath -> Annex (Maybe LinkTarget)
getAnnexLinkTarget f = getAnnexLinkTarget' f
=<< (coreSymlinks <$> Annex.getGitConfig)
{- Pass False to force looking inside file, for when git checks out
- symlinks as plain files. -}
getAnnexLinkTarget' :: RawFilePath -> Bool -> Annex (Maybe S.ByteString)
getAnnexLinkTarget' file coresymlinks = if coresymlinks
then check probesymlink $
return Nothing
else check probesymlink $
check probefilecontent $
return Nothing
where
2017-12-05 19:00:50 +00:00
check getlinktarget fallback =
liftIO (catchMaybeIO getlinktarget) >>= \case
Just l
| isLinkToAnnex l -> return (Just l)
| otherwise -> return Nothing
Nothing -> fallback
probesymlink = R.readSymbolicLink file
probefilecontent = withFile (fromRawFilePath file) ReadMode $ \h -> do
s <- S.hGet h maxSymlinkSz
-- If we got the full amount, the file is too large
-- to be a symlink target.
return $ if S.length s == maxSymlinkSz
then mempty
else
-- If there are any NUL or newline
-- characters, or whitespace, we
-- certainly don't have a symlink to a
-- git-annex key.
if any (`S8.elem` s) ("\0\n\r \t" :: [Char])
then mempty
2013-09-25 07:09:06 +00:00
else s
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
makeAnnexLink :: LinkTarget -> RawFilePath -> Annex ()
makeAnnexLink = makeGitLink
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
{- Creates a link on disk.
-
- On a filesystem that does not support symlinks, writes the link target
- to a file. Note that git will only treat the file as a symlink if
- it's staged as such, so use addAnnexLink when adding a new file or
- modified link to git.
-}
makeGitLink :: LinkTarget -> RawFilePath -> Annex ()
makeGitLink linktarget file = ifM (coreSymlinks <$> Annex.getGitConfig)
( liftIO $ do
void $ tryIO $ R.removeLink file
R.createSymbolicLink linktarget file
, liftIO $ S.writeFile (fromRawFilePath file) linktarget
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
)
{- Creates a link on disk, and additionally stages it in git. -}
addAnnexLink :: LinkTarget -> RawFilePath -> Annex ()
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
addAnnexLink linktarget file = do
makeAnnexLink linktarget file
stageSymlink file =<< hashSymlink linktarget
{- Injects a symlink target into git, returning its Sha. -}
2013-02-18 06:35:38 +00:00
hashSymlink :: LinkTarget -> Annex Sha
hashSymlink = hashBlob . toInternalGitPath
{- Stages a symlink to an annexed object, using a Sha of its target. -}
stageSymlink :: RawFilePath -> Sha -> Annex ()
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
stageSymlink file sha =
Annex.Queue.addUpdateIndex =<<
2020-10-30 19:55:59 +00:00
inRepo (Git.UpdateIndex.stageSymlink file sha)
{- Injects a pointer file content into git, returning its Sha. -}
hashPointerFile :: Key -> Annex Sha
hashPointerFile key = hashBlob $ formatPointer key
{- Stages a pointer file, using a Sha of its content -}
stagePointerFile :: RawFilePath -> Maybe FileMode -> Sha -> Annex ()
stagePointerFile file mode sha =
Annex.Queue.addUpdateIndex =<<
inRepo (Git.UpdateIndex.stageFile sha treeitemtype $ fromRawFilePath file)
where
treeitemtype
| maybe False isExecutable mode = TreeExecutable
| otherwise = TreeFile
writePointerFile :: RawFilePath -> Key -> Maybe FileMode -> IO ()
writePointerFile file k mode = do
S.writeFile (fromRawFilePath file) (formatPointer k)
2022-06-22 20:11:03 +00:00
maybe noop (R.setFileMode file) mode
newtype Restage = Restage Bool
{- Restage pointer file. This is used after updating a worktree file
- when content is added/removed, to prevent git status from showing
- it as modified.
-
add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project
2022-09-23 18:38:59 +00:00
- The InodeCache is for the worktree file. It is used to detect when
- the worktree file is changed by something else before git update-index
- gets to look at it.
-
- Asks git to refresh its index information for the file.
- That in turn runs the clean filter on the file; when the clean
- filter produces the same pointer that was in the index before, git
- realizes that the file has not actually been modified.
-
- Note that, if the pointer file is staged for deletion, or has different
- content than the current worktree content staged, this won't change
- that. So it's safe to call at any time and any situation.
-
- If the index is known to be locked (eg, git add has run git-annex),
add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project
2022-09-23 18:38:59 +00:00
- that would fail. Restage False will prevent the index being updated,
- and will store it in the restage log. Displays a message to help the
- user understand why the file will appear to be modified.
-
- This uses the git queue, so the update is not performed immediately,
add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project
2022-09-23 18:38:59 +00:00
- and this can be run multiple times cheaply. Using the git queue also
- prevents building up too large a number of updates when many files
- are being processed. It's also recorded in the restage log so that,
- if the process is interrupted before the git queue is fulushed, the
- restage will be taken care of later.
-}
restagePointerFile :: Restage -> RawFilePath -> InodeCache -> Annex ()
restagePointerFile (Restage False) f orig = do
add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project
2022-09-23 18:38:59 +00:00
flip writeRestageLog orig =<< inRepo (toTopFilePath f)
toplevelWarning True $ unableToRestage $ Just $ fromRawFilePath f
add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project
2022-09-23 18:38:59 +00:00
restagePointerFile (Restage True) f orig = do
flip writeRestageLog orig =<< inRepo (toTopFilePath f)
-- Avoid refreshing the index if run by the
-- smudge clean filter, because git uses that when
-- it's already refreshing the index, probably because
-- this very action is running. Running it again would likely
-- deadlock.
add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project
2022-09-23 18:38:59 +00:00
unlessM (Annex.getState Annex.insmudgecleanfilter) $
Annex.Queue.addFlushAction restagePointerFileRunner [f]
restagePointerFileRunner :: Git.Queue.FlushActionRunner Annex
restagePointerFileRunner =
Git.Queue.FlushActionRunner "restagePointerFiles" $ \r _fs ->
restagePointerFiles r
add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project
2022-09-23 18:38:59 +00:00
-- Restage all files in the restage log that have not been modified.
--
-- Other changes to the files may have been staged before this
-- gets a chance to run. To avoid a race with any staging of
-- changes, first lock the index file. Then run git update-index
-- on all still-unmodified files, using a copy of the index file,
-- to bypass the lock. Then replace the old index file with the new
-- updated index file.
add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project
2022-09-23 18:38:59 +00:00
restagePointerFiles :: Git.Repo -> Annex ()
restagePointerFiles r = unlessM (Annex.getState Annex.insmudgecleanfilter) $ do
-- Flush any queued changes to the keys database, so they
-- are visible to child processes.
-- The database is closed because that may improve behavior
-- when run in Windows's WSL1, which has issues with
-- multiple writers to SQL databases.
liftIO . Database.Keys.Handle.closeDbHandle
=<< Annex.getRead Annex.keysdbhandle
realindex <- liftIO $ Git.Index.currentIndexFile r
numsz@(numfiles, _) <- calcRestageLog (0, 0) $ \(_f, ic) (numfiles, sizefiles) ->
(numfiles+1, sizefiles + inodeCacheFileSize ic)
let lock = fromRawFilePath (Git.Index.indexFileLock realindex)
lockindex = liftIO $ catchMaybeIO $ Git.LockFile.openLock' lock
unlockindex = liftIO . maybe noop Git.LockFile.closeLock
showwarning = warning $ unableToRestage Nothing
go Nothing = showwarning
go (Just _) = withTmpDirIn (fromRawFilePath $ Git.localGitDir r) "annexindex" $ \tmpdir -> do
add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project
2022-09-23 18:38:59 +00:00
tsd <- getTSDelta
let tmpindex = toRawFilePath (tmpdir </> "index")
add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project
2022-09-23 18:38:59 +00:00
let replaceindex = liftIO $
moveFile tmpindex realindex
let updatetmpindex = do
r' <- liftIO $ Git.Env.addGitEnv r Git.Index.indexEnv
=<< Git.Index.indexEnvVal tmpindex
-- Avoid git warning about CRLF munging.
let r'' = r' { gitGlobalOpts = gitGlobalOpts r' ++
[ Param "-c"
, Param $ "core.safecrlf=" ++ boolConfig False
] }
add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project
2022-09-23 18:38:59 +00:00
configfilterprocess numsz $ runsGitAnnexChildProcessViaGit' r'' $ \r''' ->
Git.UpdateIndex.refreshIndex r''' $ \feeder -> do
let atend = do
-- wait for index write
liftIO $ feeder Nothing
replaceindex
streamRestageLog atend $ \topf ic -> do
let f = fromTopFilePath topf r'''
liftIO $ whenM (isunmodified tsd f ic) $
feedupdateindex f feeder
return True
ok <- liftIO (createLinkOrCopy realindex tmpindex)
add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project
2022-09-23 18:38:59 +00:00
<&&> catchBoolIO updatetmpindex
unless ok showwarning
when (numfiles > 0) $
bracket lockindex unlockindex go
where
add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project
2022-09-23 18:38:59 +00:00
isunmodified tsd f orig =
genInodeCache f tsd >>= return . \case
Nothing -> False
Just new -> compareStrong orig new
{- update-index is documented as picky about "./file" and it
- fails on "../../repo/path/file" when cwd is not in the repo
- being acted on. Avoid these problems with an absolute path.
-}
feedupdateindex f feeder = do
absf <- absPath f
feeder (Just absf)
{- filter.annex.process configured to use git-annex filter-process
- is sometimes faster and sometimes slower than using
- git-annex smudge. The latter is run once per file, while
- the former has the content of files piped to it.
-}
add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project
2022-09-23 18:38:59 +00:00
filterprocessfaster :: (Integer, FileSize) -> Bool
filterprocessfaster (numfiles, sizefiles) =
let estimate_enabled = sizefiles `div` 191739611
estimate_disabled = numfiles `div` 7
in estimate_enabled <= estimate_disabled
{- This disables filter.annex.process if it's set when it would
- probably not be faster to use it. Unfortunately, simply
- passing -c filter.annex.process= also prevents git from
- running the smudge filter, so .git/config has to be modified
- to disable it. The modification is reversed at the end. In
- case this process is terminated early, the next time this
- runs it will take care of reversing the modification.
-}
add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project
2022-09-23 18:38:59 +00:00
configfilterprocess numsz = bracket setup cleanup . const
where
setup
add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project
2022-09-23 18:38:59 +00:00
| filterprocessfaster numsz = return Nothing
| otherwise = fromRepo (Git.Config.getMaybe ck) >>= \case
Nothing -> return Nothing
Just v -> do
void $ inRepo (Git.Config.change ckd (fromConfigValue v))
void $ inRepo (Git.Config.unset ck)
return (Just v)
cleanup (Just v) = do
void $ inRepo $ Git.Config.change ck (fromConfigValue v)
void $ inRepo (Git.Config.unset ckd)
cleanup Nothing = fromRepo (Git.Config.getMaybe ckd) >>= \case
Nothing -> return ()
Just v -> do
whenM (isNothing <$> fromRepo (Git.Config.getMaybe ck)) $
void $ inRepo (Git.Config.change ck (fromConfigValue v))
void $ inRepo (Git.Config.unset ckd)
ck = ConfigKey "filter.annex.process"
ckd = ConfigKey "filter.annex.process-temp-disabled"
unableToRestage :: Maybe FilePath -> String
unableToRestage mf = unwords
[ "git status will show " ++ fromMaybe "some files" mf
, "to be modified, since content availability has changed"
, "and git-annex was unable to update the index."
, "This is only a cosmetic problem affecting git status; git add,"
, "git commit, etc won't be affected."
, "To fix the git status display, you can run:"
, "git update-index -q --refresh " ++ fromMaybe "<file>" mf
]
{- Parses a symlink target or a pointer file to a Key.
-
- Makes sure that the pointer file is valid, including not being longer
- than the maximum allowed size of a valid pointer file, and that any
- subsequent lines after the first contain the validPointerLineTag.
- If a valid pointer file gets some other data appended to it, it should
- never be considered valid, unless that data happened to itself be a
- valid pointer file.
-}
parseLinkTargetOrPointer :: S.ByteString -> Maybe Key
parseLinkTargetOrPointer = either (const Nothing) id
. parseLinkTargetOrPointer'
data InvalidAppendedPointerFile = InvalidAppendedPointerFile
parseLinkTargetOrPointer' :: S.ByteString -> Either InvalidAppendedPointerFile (Maybe Key)
parseLinkTargetOrPointer' b =
let (firstline, rest) = S8.span (/= '\n') b
in case parsekey $ droptrailing '\r' firstline of
Just k
| S.length b > maxValidPointerSz -> Left InvalidAppendedPointerFile
| restvalid (dropleading '\n' rest) -> Right (Just k)
| otherwise -> Left InvalidAppendedPointerFile
Nothing -> Right Nothing
where
parsekey l
| isLinkToAnnex l = fileKey $ snd $ S8.breakEnd pathsep l
| otherwise = Nothing
restvalid r
| S.null r = True
| otherwise =
let (l, r') = S8.span (/= '\n') r
in validPointerLineTag `S.isInfixOf` l
&& (not (S8.null r') && S8.head r' == '\n')
&& restvalid (S8.tail r')
dropleading c l
| S.null l = l
| S8.head l == c = S8.tail l
| otherwise = l
droptrailing c l
| S.null l = l
| S8.last l == c = S8.init l
| otherwise = l
pathsep '/' = True
#ifdef mingw32_HOST_OS
pathsep '\\' = True
#endif
pathsep _ = False
{- Avoid looking at more of the lazy ByteString than necessary since it
- could be reading from a large file that is not a pointer file. -}
parseLinkTargetOrPointerLazy :: L.ByteString -> Maybe Key
parseLinkTargetOrPointerLazy = either (const Nothing) id
. parseLinkTargetOrPointerLazy'
parseLinkTargetOrPointerLazy' :: L.ByteString -> Either InvalidAppendedPointerFile (Maybe Key)
parseLinkTargetOrPointerLazy' b =
let b' = L.take (fromIntegral maxPointerSz) b
in parseLinkTargetOrPointer' (L.toStrict b')
formatPointer :: Key -> S.ByteString
formatPointer k = prefix <> keyFile k <> nl
where
2022-06-22 20:08:49 +00:00
prefix = toInternalGitPath $ P.pathSeparator `S.cons` objectDir
nl = S8.singleton '\n'
{- Maximum size of a file that could be a pointer to a key.
- Check to avoid buffering really big files in git into
- memory when reading files that may be pointers.
-
- 8192 bytes is plenty for a pointer to a key. This adds some additional
- padding to allow for pointer files that have lines of additional data
- after the key.
-
- One additional byte is used to detect when a valid pointer file
- got something else appended to it.
-}
maxPointerSz :: Int
maxPointerSz = maxValidPointerSz + 1
{- Maximum size of a valid pointer files is 32kb. -}
maxValidPointerSz :: Int
maxValidPointerSz = 32768
maxSymlinkSz :: Int
maxSymlinkSz = 8192
{- Checks if a worktree file is a pointer to a key.
-
- Unlocked files whose content is present are not detected by this.
-
- It's possible, though unlikely, that an annex symlink points to
- an object that looks like a pointer file. Or that a non-annex
- symlink does. Avoids a false positive in those cases.
- -}
isPointerFile :: RawFilePath -> IO (Maybe Key)
isPointerFile f = catchDefaultIO Nothing $
#if defined(mingw32_HOST_OS)
checkcontentfollowssymlinks -- no symlinks supported on windows
#else
#if MIN_VERSION_unix(2,8,0)
bracket
(openFd (fromRawFilePath f) ReadOnly (defaultFileFlags { nofollow = True }) Nothing)
closeFd
(\fd -> readhandle =<< fdToHandle fd)
#else
ifM (isSymbolicLink <$> R.getSymbolicLinkStatus f)
( return Nothing
, checkcontentfollowssymlinks
)
#endif
#endif
where
checkcontentfollowssymlinks =
withFile (fromRawFilePath f) ReadMode readhandle
readhandle h = parseLinkTargetOrPointer <$> S.hGet h maxPointerSz
{- Checks a symlink target or pointer file first line to see if it
- appears to point to annexed content.
-
- We only look for paths inside the .git directory, and not at the .git
- directory itself, because GIT_DIR may cause a directory name other
- than .git to be used.
-}
isLinkToAnnex :: S.ByteString -> Bool
isLinkToAnnex s = p `S.isInfixOf` s
#ifdef mingw32_HOST_OS
-- '/' is used inside pointer files on Windows, not the native '\'
|| p' `S.isInfixOf` s
#endif
where
2022-06-22 20:08:49 +00:00
p = P.pathSeparator `S.cons` objectDir
#ifdef mingw32_HOST_OS
p' = toInternalGitPath p
#endif
{- String that must appear on every line of a valid pointer file. -}
validPointerLineTag :: S.ByteString
validPointerLineTag = "/annex/"