diff --git a/.forgejo/patches/ghc-9.8.patch b/.forgejo/patches/ghc-9.8.patch deleted file mode 100644 index 85796d787d..0000000000 --- a/.forgejo/patches/ghc-9.8.patch +++ /dev/null @@ -1,18 +0,0 @@ -Support ghc-9.8 by widening a lot of constraints. - -This patch can be removed once upstream supports ghc 9.8 offically. - -diff -uprN git-annex-10.20240227.orig/cabal.project git-annex-10.20240227/cabal.project ---- git-annex-10.20240227.orig/cabal.project 1970-01-01 01:00:00.000000000 +0100 -+++ git-annex-10.20240227/cabal.project 2024-04-28 13:30:14.061706299 +0200 -@@ -0,0 +1,10 @@ -+packages: *.cabal -+ -+allow-newer: dav -+allow-newer: haskeline:filepath -+allow-newer: haskeline:directory -+allow-newer: xml-hamlet -+allow-newer: aws:filepath -+allow-newer: dbus:network -+allow-newer: dbus:filepath -+allow-newer: microstache:filepath diff --git a/.forgejo/workflows/generate-lockfile.yml b/.forgejo/workflows/generate-lockfile.yml deleted file mode 100644 index 8dbb579e67..0000000000 --- a/.forgejo/workflows/generate-lockfile.yml +++ /dev/null @@ -1,89 +0,0 @@ -on: - workflow_dispatch: - inputs: - ref_name: - description: 'Tag or commit' - required: true - type: string - - push: - tags: - - '*' - -jobs: - cabal-config-edge: - name: Generate cabal config for edge - runs-on: x86_64 - container: - image: alpine:edge - env: - CI_ALPINE_TARGET_RELEASE: edge - steps: - - name: Environment setup - run: | - apk upgrade -a - apk add nodejs git cabal patch - - name: Repo pull - uses: actions/checkout@v4 - with: - fetch-depth: 1 - ref: ${{ inputs.ref_name }} - - name: Config generation - run: | - patch -p1 -i .forgejo/patches/ghc-9.8.patch - HOME="${{ github.workspace}}"/cabal_cache cabal update - HOME="${{ github.workspace}}"/cabal_cache cabal v2-freeze --shadow-installed-packages --strong-flags --flags="+assistant +webapp +pairing +production +torrentparser +magicmime +benchmark -debuglocks +dbus +networkbsd +gitlfs +httpclientrestricted" - mv cabal.project.freeze git-annex.config - - name: Package upload - uses: forgejo/upload-artifact@v3 - with: - name: cabalconfigedge - path: git-annex*.config - cabal-config-v322: - name: Generate cabal config for v3.22 - runs-on: x86_64 - container: - image: alpine:3.22 - env: - CI_ALPINE_TARGET_RELEASE: v3.22 - steps: - - name: Environment setup - run: | - apk upgrade -a - apk add nodejs git cabal patch - - name: Repo pull - uses: actions/checkout@v4 - with: - fetch-depth: 1 - ref: ${{ inputs.ref_name }} - - name: Config generation - run: | - patch -p1 -i .forgejo/patches/ghc-9.8.patch - HOME="${{ github.workspace }}"/cabal_cache cabal update - HOME="${{ github.workspace }}"/cabal_cache cabal v2-freeze --shadow-installed-packages --strong-flags --flags="+assistant +webapp +pairing +production +torrentparser +magicmime +benchmark -debuglocks +dbus +networkbsd +gitlfs +httpclientrestricted" - mv cabal.project.freeze git-annex.config - - name: Package upload - uses: forgejo/upload-artifact@v3 - with: - name: cabalconfig322 - path: git-annex*.config - upload-tarball: - name: Upload to generic repo - runs-on: x86_64 - needs: [cabal-config-edge,cabal-config-v322] - container: - image: alpine:latest - steps: - - name: Environment setup - run: apk add nodejs curl findutils - - name: Package download - uses: forgejo/download-artifact@v3 - - name: Package deployment - run: | - if test $GITHUB_REF_NAME == "ci" ; then - CI_REF_NAME=${{ inputs.ref_name }} - else - CI_REF_NAME=$GITHUB_REF_NAME - fi - curl --user ${{ vars.CODE_FORGEJO_USER }}:${{ secrets.CODE_FORGEJO_TOKEN }} --upload-file ./cabalconfigedge/git-annex.config ${{ github.server_url }}/api/packages/mirrors/generic/git-annex/$CI_REF_NAME/git-annex-$CI_REF_NAME-edge.cabal - curl --user ${{ vars.CODE_FORGEJO_USER }}:${{ secrets.CODE_FORGEJO_TOKEN }} --upload-file ./cabalconfig322/git-annex.config ${{ github.server_url }}/api/packages/mirrors/generic/git-annex/$CI_REF_NAME/git-annex-$CI_REF_NAME-v322.cabal diff --git a/.forgejo/workflows/mirror-repository.yml b/.forgejo/workflows/mirror-repository.yml deleted file mode 100644 index f44c4668cf..0000000000 --- a/.forgejo/workflows/mirror-repository.yml +++ /dev/null @@ -1,50 +0,0 @@ -on: - workflow_dispatch: - - schedule: - - cron: '@hourly' - -jobs: - mirror: - name: Pull from upstream - runs-on: x86_64 - container: - image: alpine:latest - env: - upstream: https://git.joeyh.name/git/git-annex.git - tags: '10.2025*' - steps: - - name: Environment setup - run: apk add grep git sed coreutils bash nodejs - - name: Fetch destination - uses: actions/checkout@v4 - with: - fetch_depth: 1 - ref: ci - token: ${{ secrets.CODE_FORGEJO_TOKEN }} - - name: Missing tag detecting - run: | - git ls-remote $upstream "refs/tags/$tags" | grep -v '{' | sed 's|.*/||' | sort > upstream_tags - git ls-remote ${{ github.server_url}}/${{ github.repository }} "refs/tags/$tags" | grep -v '{' | sed 's|.*/||' | sort > destination_tags - comm -23 upstream_tags destination_tags > missing_tags - echo "Missing tags:" - cat missing_tags - - name: Missing tag fetch - run: | - git remote add upstream $upstream - while read tag; do - git fetch upstream tag $tag --no-tags - done < missing_tags - - name: Packaging workflow injection - run: | - while read tag; do - git checkout $tag - git tag -d $tag - git checkout ci -- ./.forgejo - git config user.name "forgejo-actions[bot]" - git config user.email "dev@ayakael.net" - git commit -m 'Inject custom workflow' - git tag -a $tag -m $tag - done < missing_tags - - name: Push to destination - run: git push --force origin refs/tags/*:refs/tags/* --tags diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000..5d425843f2 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +debian/changelog merge=dpkg-mergechangelogs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..7d2504de6f --- /dev/null +++ b/.gitignore @@ -0,0 +1,19 @@ +*.hi +*.o +test +configure +Build/SysConfig.hs +git-annex +git-annex-shell +git-union-merge +git-annex.1 +git-annex-shell.1 +git-union-merge.1 +doc/.ikiwiki +html +*.tix +.hpc +Utility/Touch.hs +Utility/StatFS.hs +Remote/S3.hs +dist diff --git a/Annex.hs b/Annex.hs new file mode 100644 index 0000000000..e82ffc5d1f --- /dev/null +++ b/Annex.hs @@ -0,0 +1,134 @@ +{- git-annex monad + - + - Copyright 2010-2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE GeneralizedNewtypeDeriving #-} + +module Annex ( + Annex, + AnnexState(..), + OutputType(..), + new, + newState, + run, + eval, + getState, + changeState, + gitRepo, + inRepo, + fromRepo, +) where + +import Control.Monad.IO.Control +import Control.Monad.State + +import Common +import qualified Git +import qualified Git.Config +import Git.CatFile +import qualified Git.Queue +import Types.Backend +import qualified Types.Remote +import Types.Crypto +import Types.BranchState +import Types.TrustLevel +import Types.UUID +import qualified Utility.Matcher +import qualified Utility.Format +import qualified Data.Map as M + +-- git-annex's monad +newtype Annex a = Annex { runAnnex :: StateT AnnexState IO a } + deriving ( + Monad, + MonadIO, + MonadControlIO, + MonadState AnnexState, + Functor, + Applicative + ) + +data OutputType = NormalOutput | QuietOutput | JSONOutput + +-- internal state storage +data AnnexState = AnnexState + { repo :: Git.Repo + , backends :: [Backend Annex] + , remotes :: [Types.Remote.Remote Annex] + , repoqueue :: Git.Queue.Queue + , output :: OutputType + , force :: Bool + , fast :: Bool + , auto :: Bool + , format :: Maybe Utility.Format.Format + , branchstate :: BranchState + , catfilehandle :: Maybe CatFileHandle + , forcebackend :: Maybe String + , forcenumcopies :: Maybe Int + , toremote :: Maybe String + , fromremote :: Maybe String + , limit :: Either [Utility.Matcher.Token (FilePath -> Annex Bool)] (Utility.Matcher.Matcher (FilePath -> Annex Bool)) + , forcetrust :: [(UUID, TrustLevel)] + , trustmap :: Maybe TrustMap + , ciphers :: M.Map EncryptedCipher Cipher + } + +newState :: Git.Repo -> AnnexState +newState gitrepo = AnnexState + { repo = gitrepo + , backends = [] + , remotes = [] + , repoqueue = Git.Queue.new + , output = NormalOutput + , force = False + , fast = False + , auto = False + , format = Nothing + , branchstate = startBranchState + , catfilehandle = Nothing + , forcebackend = Nothing + , forcenumcopies = Nothing + , toremote = Nothing + , fromremote = Nothing + , limit = Left [] + , forcetrust = [] + , trustmap = Nothing + , ciphers = M.empty + } + +{- Create and returns an Annex state object for the specified git repo. -} +new :: Git.Repo -> IO AnnexState +new gitrepo = newState <$> Git.Config.read gitrepo + +{- performs an action in the Annex monad -} +run :: AnnexState -> Annex a -> IO (a, AnnexState) +run s a = runStateT (runAnnex a) s +eval :: AnnexState -> Annex a -> IO a +eval s a = evalStateT (runAnnex a) s + +{- Gets a value from the internal state, selected by the passed value + - constructor. -} +getState :: (AnnexState -> a) -> Annex a +getState = gets + +{- Applies a state mutation function to change the internal state. + - + - Example: changeState $ \s -> s { output = QuietOutput } + -} +changeState :: (AnnexState -> AnnexState) -> Annex () +changeState = modify + +{- Returns the annex's git repository. -} +gitRepo :: Annex Git.Repo +gitRepo = getState repo + +{- Runs an IO action in the annex's git repository. -} +inRepo :: (Git.Repo -> IO a) -> Annex a +inRepo a = liftIO . a =<< gitRepo + +{- Extracts a value from the annex's git repisitory. -} +fromRepo :: (Git.Repo -> a) -> Annex a +fromRepo a = a <$> gitRepo diff --git a/Annex/Branch.hs b/Annex/Branch.hs new file mode 100644 index 0000000000..5f678b9d30 --- /dev/null +++ b/Annex/Branch.hs @@ -0,0 +1,312 @@ +{- management of the git-annex branch + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Branch ( + name, + hasOrigin, + hasSibling, + create, + update, + updateTo, + get, + change, + commit, + files, +) where + +import qualified Data.ByteString.Lazy.Char8 as L + +import Common.Annex +import Annex.Exception +import Annex.BranchState +import Annex.Journal +import qualified Git +import qualified Git.Command +import qualified Git.Ref +import qualified Git.Branch +import qualified Git.UnionMerge +import qualified Git.HashObject +import qualified Git.Index +import Annex.CatFile + +{- Name of the branch that is used to store git-annex's information. -} +name :: Git.Ref +name = Git.Ref "git-annex" + +{- Fully qualified name of the branch. -} +fullname :: Git.Ref +fullname = Git.Ref $ "refs/heads/" ++ show name + +{- Branch's name in origin. -} +originname :: Git.Ref +originname = Git.Ref $ "origin/" ++ show name + +{- Does origin/git-annex exist? -} +hasOrigin :: Annex Bool +hasOrigin = inRepo $ Git.Ref.exists originname + +{- Does the git-annex branch or a sibling foo/git-annex branch exist? -} +hasSibling :: Annex Bool +hasSibling = not . null <$> siblingBranches + +{- List of git-annex (refs, branches), including the main one and any + - from remotes. Duplicate refs are filtered out. -} +siblingBranches :: Annex [(Git.Ref, Git.Branch)] +siblingBranches = inRepo $ Git.Ref.matching name + +{- Creates the branch, if it does not already exist. -} +create :: Annex () +create = do + _ <- getBranch + return () + +{- Returns the ref of the branch, creating it first if necessary. -} +getBranch :: Annex (Git.Ref) +getBranch = maybe (hasOrigin >>= go >>= use) (return) =<< branchsha + where + go True = do + inRepo $ Git.Command.run "branch" + [Param $ show name, Param $ show originname] + fromMaybe (error $ "failed to create " ++ show name) + <$> branchsha + go False = withIndex' True $ do + inRepo $ Git.Branch.commit "branch created" fullname [] + use sha = do + setIndexSha sha + return sha + branchsha = inRepo $ Git.Ref.sha fullname + +{- Ensures that the branch and index are is up-to-date; should be + - called before data is read from it. Runs only once per git-annex run. + -} +update :: Annex () +update = runUpdateOnce $ updateTo =<< siblingBranches + +{- Merges the specified Refs into the index, if they have any changes not + - already in it. The Branch names are only used in the commit message; + - it's even possible that the provided Branches have not been updated to + - point to the Refs yet. + - + - Before refs are merged into the index, it's important to first stage the + - journal into the index. Otherwise, any changes in the journal would + - later get staged, and might overwrite changes made during the merge. + - If no Refs are provided, the journal is still staged and committed. + - + - (It would be cleaner to handle the merge by updating the journal, not the + - index, with changes from the branches.) + - + - The branch is fast-forwarded if possible, otherwise a merge commit is + - made. + -} +updateTo :: [(Git.Ref, Git.Branch)] -> Annex () +updateTo pairs = do + -- ensure branch exists, and get its current ref + branchref <- getBranch + -- check what needs updating before taking the lock + dirty <- journalDirty + (refs, branches) <- unzip <$> filterM isnewer pairs + if (not dirty && null refs) + then updateIndex branchref + else withIndex $ lockJournal $ do + when dirty stageJournal + let merge_desc = if null branches + then "update" + else "merging " ++ + unwords (map Git.Ref.describe branches) ++ + " into " ++ show name + unless (null branches) $ do + showSideAction merge_desc + mergeIndex refs + ff <- if dirty + then return False + else inRepo $ Git.Branch.fastForward fullname refs + if ff + then updateIndex branchref + else commitBranch branchref merge_desc + (nub $ fullname:refs) + invalidateCache + where + isnewer (r, _) = inRepo $ Git.Branch.changed fullname r + +{- Gets the content of a file on the branch, or content from the journal, or + - staged in the index. + - + - Returns an empty string if the file doesn't exist yet. -} +get :: FilePath -> Annex String +get = get' False + +{- Like get, but does not merge the branch, so the info returned may not + - reflect changes in remotes. (Changing the value this returns, and then + - merging is always the same as using get, and then changing its value.) -} +getStale :: FilePath -> Annex String +getStale = get' True + +get' :: Bool -> FilePath -> Annex String +get' staleok file = fromcache =<< getCache file + where + fromcache (Just content) = return content + fromcache Nothing = fromjournal =<< getJournalFile file + fromjournal (Just content) = cache content + fromjournal Nothing + | staleok = withIndex frombranch + | otherwise = withIndexUpdate $ frombranch >>= cache + frombranch = L.unpack <$> catFile fullname file + cache content = do + setCache file content + return content + +{- Applies a function to modifiy the content of a file. + - + - Note that this does not cause the branch to be merged, it only + - modifes the current content of the file on the branch. + -} +change :: FilePath -> (String -> String) -> Annex () +change file a = lockJournal $ getStale file >>= return . a >>= set file + +{- Records new content of a file into the journal and cache. -} +set :: FilePath -> String -> Annex () +set file content = do + setJournalFile file content + setCache file content + +{- Stages the journal, and commits staged changes to the branch. -} +commit :: String -> Annex () +commit message = whenM journalDirty $ lockJournal $ do + stageJournal + ref <- getBranch + withIndex $ commitBranch ref message [fullname] + +{- Commits the staged changes in the index to the branch. + - + - Ensures that the branch's index file is first updated to the state + - of the brannch at branchref, before running the commit action. This + - is needed because the branch may have had changes pushed to it, that + - are not yet reflected in the index. + - + - Also safely handles a race that can occur if a change is being pushed + - into the branch at the same time. When the race happens, the commit will + - be made on top of the newly pushed change, but without the index file + - being updated to include it. The result is that the newly pushed + - change is reverted. This race is detected and another commit made + - to fix it. + - + - The branchref value can have been obtained using getBranch at any + - previous point, though getting it a long time ago makes the race + - more likely to occur. + -} +commitBranch :: Git.Ref -> String -> [Git.Ref] -> Annex () +commitBranch branchref message parents = do + updateIndex branchref + committedref <- inRepo $ Git.Branch.commit message fullname parents + setIndexSha committedref + parentrefs <- commitparents <$> catObject committedref + when (racedetected branchref parentrefs) $ + fixrace committedref parentrefs + where + -- look for "parent ref" lines and return the refs + commitparents = map (Git.Ref . snd) . filter isparent . + map (toassoc . L.unpack) . L.lines + toassoc = separate (== ' ') + isparent (k,_) = k == "parent" + + {- The race can be detected by checking the commit's + - parent, which will be the newly pushed branch, + - instead of the expected ref that the index was updated to. -} + racedetected expectedref parentrefs + | expectedref `elem` parentrefs = False -- good parent + | otherwise = True -- race! + + {- To recover from the race, union merge the lost refs + - into the index, and recommit on top of the bad commit. -} + fixrace committedref lostrefs = do + mergeIndex lostrefs + commitBranch committedref racemessage [committedref] + + racemessage = message ++ " (recovery from race)" + +{- Lists all files on the branch. There may be duplicates in the list. -} +files :: Annex [FilePath] +files = withIndexUpdate $ do + bfiles <- inRepo $ Git.Command.pipeNullSplit + [Params "ls-tree --name-only -r -z", Param $ show fullname] + jfiles <- getJournalledFiles + return $ jfiles ++ bfiles + + +{- Populates the branch's index file with the current branch contents. + - + - This is only done when the index doesn't yet exist, and the index + - is used to build up changes to be commited to the branch, and merge + - in changes from other branches. + -} +genIndex :: Git.Repo -> IO () +genIndex g = Git.UnionMerge.stream_update_index g + [Git.UnionMerge.ls_tree fullname g] + +{- Merges the specified refs into the index. + - Any changes staged in the index will be preserved. -} +mergeIndex :: [Git.Ref] -> Annex () +mergeIndex branches = do + h <- catFileHandle + inRepo $ \g -> Git.UnionMerge.merge_index h g branches + +{- Runs an action using the branch's index file. -} +withIndex :: Annex a -> Annex a +withIndex = withIndex' False +withIndex' :: Bool -> Annex a -> Annex a +withIndex' bootstrapping a = do + f <- fromRepo gitAnnexIndex + bracketIO (Git.Index.override f) id $ do + unlessM (liftIO $ doesFileExist f) $ do + unless bootstrapping create + liftIO $ createDirectoryIfMissing True $ takeDirectory f + unless bootstrapping $ inRepo genIndex + a + +{- Runs an action using the branch's index file, first making sure that + - the branch and index are up-to-date. -} +withIndexUpdate :: Annex a -> Annex a +withIndexUpdate a = update >> withIndex a + +{- Updates the branch's index to reflect the current contents of the branch. + - Any changes staged in the index will be preserved. + - + - Compares the ref stored in the lock file with the current + - ref of the branch to see if an update is needed. + -} +updateIndex :: Git.Ref -> Annex () +updateIndex branchref = do + lock <- fromRepo gitAnnexIndexLock + lockref <- Git.Ref . firstLine <$> + liftIO (catchDefaultIO (readFileStrict lock) "") + when (lockref /= branchref) $ do + withIndex $ mergeIndex [fullname] + setIndexSha branchref + +{- Record that the branch's index has been updated to correspond to a + - given ref of the branch. -} +setIndexSha :: Git.Ref -> Annex () +setIndexSha ref = do + lock <- fromRepo gitAnnexIndexLock + liftIO $ writeFile lock $ show ref ++ "\n" + +{- Stages the journal into the index. -} +stageJournal :: Annex () +stageJournal = do + fs <- getJournalFiles + g <- gitRepo + withIndex $ liftIO $ do + let dir = gitAnnexJournalDir g + let paths = map (dir ) fs + (shas, cleanup) <- Git.HashObject.hashFiles paths g + Git.UnionMerge.update_index g $ + index_lines shas (map fileJournal fs) + cleanup + mapM_ removeFile paths + where + index_lines shas = map genline . zip shas + genline (sha, file) = Git.UnionMerge.update_index_line sha file diff --git a/Annex/BranchState.hs b/Annex/BranchState.hs new file mode 100644 index 0000000000..0950e9a967 --- /dev/null +++ b/Annex/BranchState.hs @@ -0,0 +1,56 @@ +{- git-annex branch state management + - + - Runtime state about the git-annex branch, including a small read cache. + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.BranchState where + +import Common.Annex +import Types.BranchState +import qualified Annex + +getState :: Annex BranchState +getState = Annex.getState Annex.branchstate + +setState :: BranchState -> Annex () +setState state = Annex.changeState $ \s -> s { Annex.branchstate = state } + +setCache :: FilePath -> String -> Annex () +setCache file content = do + state <- getState + setState state { cachedFile = Just file, cachedContent = content } + +getCache :: FilePath -> Annex (Maybe String) +getCache file = getState >>= go + where + go state + | cachedFile state == Just file = + return $ Just $ cachedContent state + | otherwise = return Nothing + +invalidateCache :: Annex () +invalidateCache = do + state <- getState + setState state { cachedFile = Nothing, cachedContent = "" } + +{- Runs an action to update the branch, if it's not been updated before + - in this run of git-annex. -} +runUpdateOnce :: Annex () -> Annex () +runUpdateOnce a = unlessM (branchUpdated <$> getState) $ do + a + disableUpdate + +{- Avoids updating the branch. A useful optimisation when the branch + - is known to have not changed, or git-annex won't be relying on info + - from it. -} +disableUpdate :: Annex () +disableUpdate = Annex.changeState setupdated + where + setupdated s = s { Annex.branchstate = new } + where + new = old { branchUpdated = True } + old = Annex.branchstate s diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs new file mode 100644 index 0000000000..bcf44551e2 --- /dev/null +++ b/Annex/CatFile.hs @@ -0,0 +1,37 @@ +{- git cat-file interface, with handle automatically stored in the Annex monad + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.CatFile ( + catFile, + catObject, + catFileHandle +) where + +import qualified Data.ByteString.Lazy.Char8 as L + +import Common.Annex +import qualified Git +import qualified Git.CatFile +import qualified Annex + +catFile :: Git.Branch -> FilePath -> Annex L.ByteString +catFile branch file = do + h <- catFileHandle + liftIO $ Git.CatFile.catFile h branch file + +catObject :: Git.Ref -> Annex L.ByteString +catObject ref = do + h <- catFileHandle + liftIO $ Git.CatFile.catObject h ref + +catFileHandle :: Annex Git.CatFile.CatFileHandle +catFileHandle = maybe startup return =<< Annex.getState Annex.catfilehandle + where + startup = do + h <- inRepo Git.CatFile.catFileStart + Annex.changeState $ \s -> s { Annex.catfilehandle = Just h } + return h diff --git a/Annex/Content.hs b/Annex/Content.hs new file mode 100644 index 0000000000..3f1db37b53 --- /dev/null +++ b/Annex/Content.hs @@ -0,0 +1,283 @@ +{- git-annex file content managing + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Content ( + inAnnex, + inAnnexSafe, + lockContent, + calcGitLink, + logStatus, + getViaTmp, + getViaTmpUnchecked, + withTmp, + checkDiskSpace, + moveAnnex, + removeAnnex, + fromAnnex, + moveBad, + getKeysPresent, + saveState +) where + +import System.IO.Error (try) +import Control.Exception (bracket_) +import System.Posix.Types + +import Common.Annex +import Logs.Location +import Annex.UUID +import qualified Git +import qualified Annex +import qualified Annex.Queue +import qualified Annex.Branch +import Utility.StatFS +import Utility.FileMode +import Types.Key +import Utility.DataUnits +import Config +import Annex.Exception + +{- Checks if a given key's content is currently present. -} +inAnnex :: Key -> Annex Bool +inAnnex = inAnnex' doesFileExist +inAnnex' :: (FilePath -> IO a) -> Key -> Annex a +inAnnex' a key = do + whenM (fromRepo Git.repoIsUrl) $ + error "inAnnex cannot check remote repo" + inRepo $ \g -> gitAnnexLocation key g >>= a + +{- A safer check; the key's content must not only be present, but + - is not in the process of being removed. -} +inAnnexSafe :: Key -> Annex (Maybe Bool) +inAnnexSafe = inAnnex' $ \f -> openForLock f False >>= check + where + check Nothing = return is_missing + check (Just h) = do + v <- getLock h (ReadLock, AbsoluteSeek, 0, 0) + closeFd h + return $ case v of + Just _ -> is_locked + Nothing -> is_unlocked + is_locked = Nothing + is_unlocked = Just True + is_missing = Just False + +{- Content is exclusively locked while running an action that might remove + - it. (If the content is not present, no locking is done.) -} +lockContent :: Key -> Annex a -> Annex a +lockContent key a = do + file <- inRepo $ gitAnnexLocation key + bracketIO (openForLock file True >>= lock) unlock a + where + lock Nothing = return Nothing + lock (Just l) = do + v <- try $ setLock l (WriteLock, AbsoluteSeek, 0, 0) + case v of + Left _ -> error "content is locked" + Right _ -> return $ Just l + unlock Nothing = return () + unlock (Just l) = closeFd l + +openForLock :: FilePath -> Bool -> IO (Maybe Fd) +openForLock file writelock = bracket_ prep cleanup go + where + go = catchMaybeIO $ openFd file mode Nothing defaultFileFlags + mode = if writelock then ReadWrite else ReadOnly + {- Since files are stored with the write bit disabled, + - have to fiddle with permissions to open for an + - exclusive lock. -} + forwritelock a = + when writelock $ whenM (doesFileExist file) a + prep = forwritelock $ allowWrite file + cleanup = forwritelock $ preventWrite file + +{- Calculates the relative path to use to link a file to a key. -} +calcGitLink :: FilePath -> Key -> Annex FilePath +calcGitLink file key = do + cwd <- liftIO getCurrentDirectory + let absfile = fromMaybe whoops $ absNormPath cwd file + loc <- inRepo $ gitAnnexLocation key + return $ relPathDirToFile (parentDir absfile) loc + where + whoops = error $ "unable to normalize " ++ file + +{- Updates the Logs.Location when a key's presence changes in the current + - repository. -} +logStatus :: Key -> LogStatus -> Annex () +logStatus key status = do + u <- getUUID + logChange key u status + +{- Runs an action, passing it a temporary filename to get, + - and if the action succeeds, moves the temp file into + - the annex as a key's content. -} +getViaTmp :: Key -> (FilePath -> Annex Bool) -> Annex Bool +getViaTmp key action = do + tmp <- fromRepo $ gitAnnexTmpLocation key + + -- Check that there is enough free disk space. + -- When the temp file already exists, count the space + -- it is using as free. + e <- liftIO $ doesFileExist tmp + if e + then do + stat <- liftIO $ getFileStatus tmp + checkDiskSpace' (fromIntegral $ fileSize stat) key + else checkDiskSpace key + + when e $ liftIO $ allowWrite tmp + + getViaTmpUnchecked key action + +prepTmp :: Key -> Annex FilePath +prepTmp key = do + tmp <- fromRepo $ gitAnnexTmpLocation key + liftIO $ createDirectoryIfMissing True (parentDir tmp) + return tmp + +{- Like getViaTmp, but does not check that there is enough disk space + - for the incoming key. For use when the key content is already on disk + - and not being copied into place. -} +getViaTmpUnchecked :: Key -> (FilePath -> Annex Bool) -> Annex Bool +getViaTmpUnchecked key action = do + tmp <- prepTmp key + success <- action tmp + if success + then do + moveAnnex key tmp + logStatus key InfoPresent + return True + else do + -- the tmp file is left behind, in case caller wants + -- to resume its transfer + return False + +{- Creates a temp file, runs an action on it, and cleans up the temp file. -} +withTmp :: Key -> (FilePath -> Annex a) -> Annex a +withTmp key action = do + tmp <- prepTmp key + res <- action tmp + liftIO $ whenM (doesFileExist tmp) $ liftIO $ removeFile tmp + return res + +{- Checks that there is disk space available to store a given key, + - throwing an error if not. -} +checkDiskSpace :: Key -> Annex () +checkDiskSpace = checkDiskSpace' 0 + +checkDiskSpace' :: Integer -> Key -> Annex () +checkDiskSpace' adjustment key = do + g <- gitRepo + r <- getConfig g "diskreserve" "" + let reserve = fromMaybe megabyte $ readSize dataUnits r + stats <- liftIO $ getFileSystemStats (gitAnnexDir g) + case (stats, keySize key) of + (Nothing, _) -> return () + (_, Nothing) -> return () + (Just (FileSystemStats { fsStatBytesAvailable = have }), Just need) -> + when (need + reserve > have + adjustment) $ + needmorespace (need + reserve - have - adjustment) + where + megabyte :: Integer + megabyte = 1000000 + needmorespace n = unlessM (Annex.getState Annex.force) $ + error $ "not enough free space, need " ++ + roughSize storageUnits True n ++ + " more (use --force to override this check or adjust annex.diskreserve)" + +{- Moves a file into .git/annex/objects/ + - + - What if the key there already has content? This could happen for + - various reasons; perhaps the same content is being annexed again. + - Perhaps there has been a hash collision generating the keys. + - + - The current strategy is to assume that in this case it's safe to delete + - one of the two copies of the content; and the one already in the annex + - is left there, assuming it's the original, canonical copy. + - + - I considered being more paranoid, and checking that both files had + - the same content. Decided against it because A) users explicitly choose + - a backend based on its hashing properties and so if they're dealing + - with colliding files it's their own fault and B) adding such a check + - would not catch all cases of colliding keys. For example, perhaps + - a remote has a key; if it's then added again with different content then + - the overall system now has two different peices of content for that + - key, and one of them will probably get deleted later. So, adding the + - check here would only raise expectations that git-annex cannot truely + - meet. + -} +moveAnnex :: Key -> FilePath -> Annex () +moveAnnex key src = do + dest <- inRepo $ gitAnnexLocation key + let dir = parentDir dest + e <- liftIO $ doesFileExist dest + if e + then liftIO $ removeFile src + else liftIO $ do + createDirectoryIfMissing True dir + allowWrite dir -- in case the directory already exists + moveFile src dest + preventWrite dest + preventWrite dir + +withObjectLoc :: Key -> ((FilePath, FilePath) -> Annex a) -> Annex a +withObjectLoc key a = do + file <- inRepo $ gitAnnexLocation key + let dir = parentDir file + a (dir, file) + +{- Removes a key's file from .git/annex/objects/ -} +removeAnnex :: Key -> Annex () +removeAnnex key = withObjectLoc key $ \(dir, file) -> liftIO $ do + allowWrite dir + removeFile file + removeDirectory dir + +{- Moves a key's file out of .git/annex/objects/ -} +fromAnnex :: Key -> FilePath -> Annex () +fromAnnex key dest = withObjectLoc key $ \(dir, file) -> liftIO $ do + allowWrite dir + allowWrite file + moveFile file dest + removeDirectory dir + +{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and + - returns the file it was moved to. -} +moveBad :: Key -> Annex FilePath +moveBad key = do + src <- inRepo $ gitAnnexLocation key + bad <- fromRepo gitAnnexBadDir + let dest = bad takeFileName src + liftIO $ do + createDirectoryIfMissing True (parentDir dest) + allowWrite (parentDir src) + moveFile src dest + removeDirectory (parentDir src) + logStatus key InfoMissing + return dest + +{- List of keys whose content exists in .git/annex/objects/ -} +getKeysPresent :: Annex [Key] +getKeysPresent = getKeysPresent' =<< fromRepo gitAnnexObjectDir +getKeysPresent' :: FilePath -> Annex [Key] +getKeysPresent' dir = do + exists <- liftIO $ doesDirectoryExist dir + if not exists + then return [] + else liftIO $ do + -- 2 levels of hashing + levela <- dirContents dir + levelb <- mapM dirContents levela + contents <- mapM dirContents (concat levelb) + let files = concat contents + return $ mapMaybe (fileKey . takeFileName) files + +{- Things to do to record changes to content. -} +saveState :: Annex () +saveState = do + Annex.Queue.flush False + Annex.Branch.commit "update" diff --git a/Annex/Exception.hs b/Annex/Exception.hs new file mode 100644 index 0000000000..c147439a1c --- /dev/null +++ b/Annex/Exception.hs @@ -0,0 +1,27 @@ +{- exception handling in the git-annex monad + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Exception ( + bracketIO, + handle, + throw, +) where + +import Control.Exception.Control (handle) +import Control.Monad.IO.Control (liftIOOp) +import Control.Exception hiding (handle, throw) + +import Common.Annex + +{- Runs an Annex action, with setup and cleanup both in the IO monad. -} +bracketIO :: IO c -> (c -> IO b) -> Annex a -> Annex a +bracketIO setup cleanup go = + liftIOOp (Control.Exception.bracket setup cleanup) (const go) + +{- Throws an exception in the Annex monad. -} +throw :: Control.Exception.Exception e => e -> Annex a +throw = liftIO . throwIO diff --git a/Annex/Journal.hs b/Annex/Journal.hs new file mode 100644 index 0000000000..9c5be89b19 --- /dev/null +++ b/Annex/Journal.hs @@ -0,0 +1,94 @@ +{- management of the git-annex journal and cache + - + - The journal is used to queue up changes before they are committed to the + - git-annex branch. Amoung other things, it ensures that if git-annex is + - interrupted, its recorded data is not lost. + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Journal where + +import System.IO.Binary + +import Common.Annex +import Annex.Exception +import qualified Git + +{- Records content for a file in the branch to the journal. + - + - Using the journal, rather than immediatly staging content to the index + - avoids git needing to rewrite the index after every change. -} +setJournalFile :: FilePath -> String -> Annex () +setJournalFile file content = do + g <- gitRepo + liftIO $ doRedo (write g) $ do + createDirectoryIfMissing True $ gitAnnexJournalDir g + createDirectoryIfMissing True $ gitAnnexTmpDir g + where + -- journal file is written atomically + write g = do + let jfile = journalFile g file + let tmpfile = gitAnnexTmpDir g takeFileName jfile + writeBinaryFile tmpfile content + moveFile tmpfile jfile + +{- Gets any journalled content for a file in the branch. -} +getJournalFile :: FilePath -> Annex (Maybe String) +getJournalFile file = inRepo $ \g -> catchMaybeIO $ + readFileStrict $ journalFile g file + +{- List of files that have updated content in the journal. -} +getJournalledFiles :: Annex [FilePath] +getJournalledFiles = map fileJournal <$> getJournalFiles + +{- List of existing journal files. -} +getJournalFiles :: Annex [FilePath] +getJournalFiles = do + g <- gitRepo + fs <- liftIO $ + catchDefaultIO (getDirectoryContents $ gitAnnexJournalDir g) [] + return $ filter (`notElem` [".", ".."]) fs + +{- Checks if there are changes in the journal. -} +journalDirty :: Annex Bool +journalDirty = not . null <$> getJournalFiles + +{- Produces a filename to use in the journal for a file on the branch. + - + - The journal typically won't have a lot of files in it, so the hashing + - used in the branch is not necessary, and all the files are put directly + - in the journal directory. + -} +journalFile :: Git.Repo -> FilePath -> FilePath +journalFile repo file = gitAnnexJournalDir repo concatMap mangle file + where + mangle '/' = "_" + mangle '_' = "__" + mangle c = [c] + +{- Converts a journal file (relative to the journal dir) back to the + - filename on the branch. -} +fileJournal :: FilePath -> FilePath +fileJournal = replace "//" "_" . replace "_" "/" + +{- Runs an action that modifies the journal, using locking to avoid + - contention with other git-annex processes. -} +lockJournal :: Annex a -> Annex a +lockJournal a = do + file <- fromRepo gitAnnexJournalLock + bracketIO (lock file) unlock a + where + lock file = do + l <- doRedo (createFile file stdFileMode) $ + createDirectoryIfMissing True $ takeDirectory file + waitToSetLock l (WriteLock, AbsoluteSeek, 0, 0) + return l + unlock = closeFd + +{- Runs an action, catching failure and running something to fix it up, and + - retrying if necessary. -} +doRedo :: IO a -> IO b -> IO a +doRedo a b = catch a $ const $ b >> a diff --git a/Annex/Queue.hs b/Annex/Queue.hs new file mode 100644 index 0000000000..f611cf02eb --- /dev/null +++ b/Annex/Queue.hs @@ -0,0 +1,41 @@ +{- git-annex command queue + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Queue ( + add, + flush, + flushWhenFull +) where + +import Common.Annex +import Annex +import qualified Git.Queue + +{- Adds a git command to the queue. -} +add :: String -> [CommandParam] -> [FilePath] -> Annex () +add command params files = do + q <- getState repoqueue + store $ Git.Queue.add q command params files + +{- Runs the queue if it is full. Should be called periodically. -} +flushWhenFull :: Annex () +flushWhenFull = do + q <- getState repoqueue + when (Git.Queue.full q) $ flush False + +{- Runs (and empties) the queue. -} +flush :: Bool -> Annex () +flush silent = do + q <- getState repoqueue + unless (0 == Git.Queue.size q) $ do + unless silent $ + showSideAction "Recording state in git" + q' <- inRepo $ Git.Queue.flush q + store q' + +store :: Git.Queue.Queue -> Annex () +store q = changeState $ \s -> s { repoqueue = q } diff --git a/Annex/Ssh.hs b/Annex/Ssh.hs new file mode 100644 index 0000000000..81e488b41d --- /dev/null +++ b/Annex/Ssh.hs @@ -0,0 +1,65 @@ +{- git-annex remote access with ssh + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Ssh where + +import Common +import qualified Git +import qualified Git.Url +import Types +import Config +import Annex.UUID + +{- Generates parameters to ssh to a repository's host and run a command. + - Caller is responsible for doing any neccessary shellEscaping of the + - passed command. -} +sshToRepo :: Git.Repo -> [CommandParam] -> Annex [CommandParam] +sshToRepo repo sshcmd = do + s <- getConfig repo "ssh-options" "" + let sshoptions = map Param (words s) + let sshport = case Git.Url.port repo of + Nothing -> [] + Just p -> [Param "-p", Param (show p)] + let sshhost = Param $ Git.Url.hostuser repo + return $ sshoptions ++ sshport ++ [sshhost] ++ sshcmd + +{- Generates parameters to run a git-annex-shell command on a remote + - repository. -} +git_annex_shell :: Git.Repo -> String -> [CommandParam] -> Annex (Maybe (FilePath, [CommandParam])) +git_annex_shell r command params + | not $ Git.repoIsUrl r = return $ Just (shellcmd, shellopts) + | Git.repoIsSsh r = do + uuid <- getRepoUUID r + sshparams <- sshToRepo r [Param $ sshcmd uuid ] + return $ Just ("ssh", sshparams) + | otherwise = return Nothing + where + dir = Git.workTree r + shellcmd = "git-annex-shell" + shellopts = Param command : File dir : params + sshcmd uuid = unwords $ + shellcmd : map shellEscape (toCommand shellopts) ++ + uuidcheck uuid + uuidcheck NoUUID = [] + uuidcheck (UUID u) = ["--uuid", u] + +{- Uses a supplied function (such as boolSystem) to run a git-annex-shell + - command on a remote. + - + - Or, if the remote does not support running remote commands, returns + - a specified error value. -} +onRemote + :: Git.Repo + -> (FilePath -> [CommandParam] -> IO a, a) + -> String + -> [CommandParam] + -> Annex a +onRemote r (with, errorval) command params = do + s <- git_annex_shell r command params + case s of + Just (c, ps) -> liftIO $ with c ps + Nothing -> return errorval diff --git a/Annex/UUID.hs b/Annex/UUID.hs new file mode 100644 index 0000000000..48bf71f104 --- /dev/null +++ b/Annex/UUID.hs @@ -0,0 +1,74 @@ +{- git-annex uuids + - + - Each git repository used by git-annex has an annex.uuid setting that + - uniquely identifies that repository. + - + - UUIDs of remotes are cached in git config, using keys named + - remote..annex-uuid + - + - Copyright 2010-2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.UUID ( + getUUID, + getRepoUUID, + getUncachedUUID, + prepUUID, + genUUID +) where + +import Common.Annex +import qualified Git +import qualified Git.Config +import qualified Build.SysConfig as SysConfig +import Config + +configkey :: String +configkey = "annex.uuid" + +{- Generates a UUID. There is a library for this, but it's not packaged, + - so use the command line tool. -} +genUUID :: IO UUID +genUUID = pOpen ReadFromPipe command params $ liftM toUUID . hGetLine + where + command = SysConfig.uuid + params = if command == "uuid" + -- request a random uuid be generated + then ["-m"] + -- uuidgen generates random uuid by default + else [] + +{- Get current repository's UUID. -} +getUUID :: Annex UUID +getUUID = getRepoUUID =<< gitRepo + +{- Looks up a repo's UUID, caching it in .git/config if it's not already. -} +getRepoUUID :: Git.Repo -> Annex UUID +getRepoUUID r = do + c <- fromRepo cached + let u = getUncachedUUID r + + if c /= u && u /= NoUUID + then do + updatecache u + return u + else return c + where + cached = toUUID . Git.Config.get cachekey "" + updatecache u = do + g <- gitRepo + when (g /= r) $ storeUUID cachekey u + cachekey = remoteConfig r "uuid" + +getUncachedUUID :: Git.Repo -> UUID +getUncachedUUID = toUUID . Git.Config.get configkey "" + +{- Make sure that the repo has an annex.uuid setting. -} +prepUUID :: Annex () +prepUUID = whenM ((==) NoUUID <$> getUUID) $ + storeUUID configkey =<< liftIO genUUID + +storeUUID :: String -> UUID -> Annex () +storeUUID configfield = setConfig configfield . fromUUID diff --git a/Annex/Version.hs b/Annex/Version.hs new file mode 100644 index 0000000000..917859eae4 --- /dev/null +++ b/Annex/Version.hs @@ -0,0 +1,44 @@ +{- git-annex repository versioning + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Version where + +import Common.Annex +import qualified Git.Config +import Config + +type Version = String + +defaultVersion :: Version +defaultVersion = "3" + +supportedVersions :: [Version] +supportedVersions = [defaultVersion] + +upgradableVersions :: [Version] +upgradableVersions = ["0", "1", "2"] + +versionField :: String +versionField = "annex.version" + +getVersion :: Annex (Maybe Version) +getVersion = handle <$> fromRepo (Git.Config.get versionField "") + where + handle [] = Nothing + handle v = Just v + +setVersion :: Annex () +setVersion = setConfig versionField defaultVersion + +checkVersion :: Version -> Annex () +checkVersion v + | v `elem` supportedVersions = return () + | v `elem` upgradableVersions = err "Upgrade this repository: git-annex upgrade" + | otherwise = err "Upgrade git-annex." + where + err msg = error $ "Repository version " ++ v ++ + " is not supported. " ++ msg diff --git a/Backend.hs b/Backend.hs new file mode 100644 index 0000000000..2f788fcd00 --- /dev/null +++ b/Backend.hs @@ -0,0 +1,120 @@ +{- git-annex key/value backends + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Backend ( + BackendFile, + list, + orderedList, + genKey, + lookupFile, + chooseBackends, + lookupBackendName, + maybeLookupBackendName +) where + +import System.IO.Error (try) +import System.Posix.Files + +import Common.Annex +import qualified Git.Config +import qualified Git.CheckAttr +import qualified Annex +import Types.Key +import qualified Types.Backend as B + +-- When adding a new backend, import it here and add it to the list. +import qualified Backend.SHA +import qualified Backend.WORM +import qualified Backend.URL + +list :: [Backend Annex] +list = Backend.SHA.backends ++ Backend.WORM.backends ++ Backend.URL.backends + +{- List of backends in the order to try them when storing a new key. -} +orderedList :: Annex [Backend Annex] +orderedList = do + l <- Annex.getState Annex.backends -- list is cached here + if not $ null l + then return l + else handle =<< Annex.getState Annex.forcebackend + where + handle Nothing = standard + handle (Just "") = standard + handle (Just name) = do + l' <- (lookupBackendName name :) <$> standard + Annex.changeState $ \s -> s { Annex.backends = l' } + return l' + standard = fromRepo $ parseBackendList . Git.Config.get "annex.backends" "" + parseBackendList [] = list + parseBackendList s = map lookupBackendName $ words s + +{- Generates a key for a file, trying each backend in turn until one + - accepts it. -} +genKey :: FilePath -> Maybe (Backend Annex) -> Annex (Maybe (Key, Backend Annex)) +genKey file trybackend = do + bs <- orderedList + let bs' = maybe bs (: bs) trybackend + genKey' bs' file +genKey' :: [Backend Annex] -> FilePath -> Annex (Maybe (Key, Backend Annex)) +genKey' [] _ = return Nothing +genKey' (b:bs) file = do + r <- (B.getKey b) file + case r of + Nothing -> genKey' bs file + Just k -> return $ Just (makesane k, b) + where + -- keyNames should not contain newline characters. + makesane k = k { keyName = map fixbadchar (keyName k) } + fixbadchar c + | c == '\n' = '_' + | otherwise = c + +{- Looks up the key and backend corresponding to an annexed file, + - by examining what the file symlinks to. -} +lookupFile :: FilePath -> Annex (Maybe (Key, Backend Annex)) +lookupFile file = do + tl <- liftIO $ try getsymlink + case tl of + Left _ -> return Nothing + Right l -> makekey l + where + getsymlink = takeFileName <$> readSymbolicLink file + makekey l = maybe (return Nothing) (makeret l) (fileKey l) + makeret l k = let bname = keyBackendName k in + case maybeLookupBackendName bname of + Just backend -> return $ Just (k, backend) + Nothing -> do + when (isLinkToAnnex l) $ warning $ + "skipping " ++ file ++ + " (unknown backend " ++ + bname ++ ")" + return Nothing + +type BackendFile = (Maybe (Backend Annex), FilePath) + +{- Looks up the backends that should be used for each file in a list. + - That can be configured on a per-file basis in the gitattributes file. + -} +chooseBackends :: [FilePath] -> Annex [BackendFile] +chooseBackends fs = Annex.getState Annex.forcebackend >>= go + where + go Nothing = do + pairs <- inRepo $ Git.CheckAttr.lookup "annex.backend" fs + return $ map (\(f,b) -> (maybeLookupBackendName b, f)) pairs + go (Just _) = do + l <- orderedList + return $ map (\f -> (Just $ Prelude.head l, f)) fs + +{- Looks up a backend by name. May fail if unknown. -} +lookupBackendName :: String -> Backend Annex +lookupBackendName s = fromMaybe unknown $ maybeLookupBackendName s + where + unknown = error $ "unknown backend " ++ s +maybeLookupBackendName :: String -> Maybe (Backend Annex) +maybeLookupBackendName s = headMaybe matches + where + matches = filter (\b -> s == B.name b) list diff --git a/Backend/SHA.hs b/Backend/SHA.hs new file mode 100644 index 0000000000..eca312944e --- /dev/null +++ b/Backend/SHA.hs @@ -0,0 +1,114 @@ +{- git-annex SHA backend + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Backend.SHA (backends) where + +import Common.Annex +import qualified Annex +import Annex.Content +import Types.Backend +import Types.Key +import qualified Build.SysConfig as SysConfig + +type SHASize = Int + +-- order is slightly significant; want SHA256 first, and more general +-- sizes earlier +sizes :: [Int] +sizes = [256, 1, 512, 224, 384] + +backends :: [Backend Annex] +backends = catMaybes $ map genBackend sizes ++ map genBackendE sizes + +genBackend :: SHASize -> Maybe (Backend Annex) +genBackend size + | isNothing (shaCommand size) = Nothing + | otherwise = Just b + where + b = Types.Backend.Backend + { name = shaName size + , getKey = keyValue size + , fsckKey = checkKeyChecksum size + } + +genBackendE :: SHASize -> Maybe (Backend Annex) +genBackendE size = + case genBackend size of + Nothing -> Nothing + Just b -> Just $ b + { name = shaNameE size + , getKey = keyValueE size + } + +shaCommand :: SHASize -> Maybe String +shaCommand 1 = SysConfig.sha1 +shaCommand 256 = SysConfig.sha256 +shaCommand 224 = SysConfig.sha224 +shaCommand 384 = SysConfig.sha384 +shaCommand 512 = SysConfig.sha512 +shaCommand _ = Nothing + +shaName :: SHASize -> String +shaName size = "SHA" ++ show size + +shaNameE :: SHASize -> String +shaNameE size = shaName size ++ "E" + +shaN :: SHASize -> FilePath -> Annex String +shaN size file = do + showAction "checksum" + liftIO $ pOpen ReadFromPipe command (toCommand [File file]) $ \h -> do + sha <- fst . separate (== ' ') <$> hGetLine h + if null sha + then error $ command ++ " parse error" + else return sha + where + command = fromJust $ shaCommand size + +{- A key is a checksum of its contents. -} +keyValue :: SHASize -> FilePath -> Annex (Maybe Key) +keyValue size file = do + s <- shaN size file + stat <- liftIO $ getFileStatus file + return $ Just $ stubKey + { keyName = s + , keyBackendName = shaName size + , keySize = Just $ fromIntegral $ fileSize stat + } + +{- Extension preserving keys. -} +keyValueE :: SHASize -> FilePath -> Annex (Maybe Key) +keyValueE size file = keyValue size file >>= maybe (return Nothing) addE + where + addE k = return $ Just $ k + { keyName = keyName k ++ extension + , keyBackendName = shaNameE size + } + naiveextension = takeExtension file + extension + -- long or newline containing extensions are + -- probably not really an extension + | length naiveextension > 6 || + '\n' `elem` naiveextension = "" + | otherwise = naiveextension + +{- A key's checksum is checked during fsck. -} +checkKeyChecksum :: SHASize -> Key -> Annex Bool +checkKeyChecksum size key = do + fast <- Annex.getState Annex.fast + file <- inRepo $ gitAnnexLocation key + present <- liftIO $ doesFileExist file + if not present || fast + then return True + else check =<< shaN size file + where + check s + | s == dropExtension (keyName key) = return True + | otherwise = do + dest <- moveBad key + warning $ "Bad file content; moved to " ++ dest + return False diff --git a/Backend/URL.hs b/Backend/URL.hs new file mode 100644 index 0000000000..32a72335a5 --- /dev/null +++ b/Backend/URL.hs @@ -0,0 +1,28 @@ +{- git-annex "URL" backend -- keys whose content is available from urls. + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Backend.URL ( + backends, + fromUrl +) where + +import Common.Annex +import Types.Backend +import Types.Key + +backends :: [Backend Annex] +backends = [backend] + +backend :: Backend Annex +backend = Types.Backend.Backend { + name = "URL", + getKey = const (return Nothing), + fsckKey = const (return True) +} + +fromUrl :: String -> Key +fromUrl url = stubKey { keyName = url, keyBackendName = "URL" } diff --git a/Backend/WORM.hs b/Backend/WORM.hs new file mode 100644 index 0000000000..5a3e2d694c --- /dev/null +++ b/Backend/WORM.hs @@ -0,0 +1,39 @@ +{- git-annex "WORM" backend -- Write Once, Read Many + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Backend.WORM (backends) where + +import Common.Annex +import Types.Backend +import Types.Key + +backends :: [Backend Annex] +backends = [backend] + +backend :: Backend Annex +backend = Types.Backend.Backend { + name = "WORM", + getKey = keyValue, + fsckKey = const (return True) +} + +{- The key includes the file size, modification time, and the + - basename of the filename. + - + - That allows multiple files with the same names to have different keys, + - while also allowing a file to be moved around while retaining the + - same key. + -} +keyValue :: FilePath -> Annex (Maybe Key) +keyValue file = do + stat <- liftIO $ getFileStatus file + return $ Just Key { + keyName = takeFileName file, + keyBackendName = name backend, + keySize = Just $ fromIntegral $ fileSize stat, + keyMtime = Just $ modificationTime stat + } diff --git a/Build/TestConfig.hs b/Build/TestConfig.hs new file mode 100644 index 0000000000..e8a0d13368 --- /dev/null +++ b/Build/TestConfig.hs @@ -0,0 +1,114 @@ +{- Tests the system and generates Build.SysConfig.hs. -} + +module Build.TestConfig where + +import System.IO +import System.Cmd +import System.Exit + +type ConfigKey = String +data ConfigValue = + BoolConfig Bool | + StringConfig String | + MaybeStringConfig (Maybe String) +data Config = Config ConfigKey ConfigValue + +type Test = IO Config +type TestName = String +data TestCase = TestCase TestName Test + +instance Show ConfigValue where + show (BoolConfig b) = show b + show (StringConfig s) = show s + show (MaybeStringConfig s) = show s + +instance Show Config where + show (Config key value) = unlines + [ key ++ " :: " ++ valuetype value + , key ++ " = " ++ show value + ] + where + valuetype (BoolConfig _) = "Bool" + valuetype (StringConfig _) = "String" + valuetype (MaybeStringConfig _) = "Maybe String" + +writeSysConfig :: [Config] -> IO () +writeSysConfig config = writeFile "Build/SysConfig.hs" body + where + body = unlines $ header ++ map show config ++ footer + header = [ + "{- Automatically generated. -}" + , "module Build.SysConfig where" + , "" + ] + footer = [] + +runTests :: [TestCase] -> IO [Config] +runTests [] = return [] +runTests (TestCase tname t : ts) = do + testStart tname + c <- t + testEnd c + rest <- runTests ts + return $ c:rest + +{- Tests that a command is available, aborting if not. -} +requireCmd :: ConfigKey -> String -> Test +requireCmd k cmdline = do + ret <- testCmd k cmdline + handle ret + where + handle r@(Config _ (BoolConfig True)) = return r + handle r = do + testEnd r + error $ "** the " ++ c ++ " command is required" + c = head $ words cmdline + +{- Checks if a command is available by running a command line. -} +testCmd :: ConfigKey -> String -> Test +testCmd k cmdline = do + ret <- system $ quiet cmdline + return $ Config k (BoolConfig $ ret == ExitSuccess) + +{- Ensures that one of a set of commands is available by running each in + - turn. The Config is set to the first one found. -} +selectCmd :: ConfigKey -> [String] -> String -> Test +selectCmd k = searchCmd + (return . Config k . StringConfig) + (\cmds -> do + testEnd $ Config k $ BoolConfig False + error $ "* need one of these commands, but none are available: " ++ show cmds + ) + +maybeSelectCmd :: ConfigKey -> [String] -> String -> Test +maybeSelectCmd k = searchCmd + (return . Config k . MaybeStringConfig . Just) + (\_ -> return $ Config k $ MaybeStringConfig Nothing) + +searchCmd :: (String -> Test) -> ([String] -> Test) -> [String] -> String -> Test +searchCmd success failure cmds param = search cmds + where + search [] = failure cmds + search (c:cs) = do + ret <- system $ quiet c ++ " " ++ param + if ret == ExitSuccess + then success c + else search cs + +quiet :: String -> String +quiet s = s ++ " >/dev/null 2>&1" + +testStart :: TestName -> IO () +testStart s = do + putStr $ " checking " ++ s ++ "..." + hFlush stdout + +testEnd :: Config -> IO () +testEnd (Config _ (BoolConfig True)) = status "yes" +testEnd (Config _ (BoolConfig False)) = status "no" +testEnd (Config _ (StringConfig s)) = status s +testEnd (Config _ (MaybeStringConfig (Just s))) = status s +testEnd (Config _ (MaybeStringConfig Nothing)) = status "not available" + +status :: String -> IO () +status s = putStrLn $ ' ':s diff --git a/CHANGELOG b/CHANGELOG new file mode 120000 index 0000000000..d526672ce2 --- /dev/null +++ b/CHANGELOG @@ -0,0 +1 @@ +debian/changelog \ No newline at end of file diff --git a/Checks.hs b/Checks.hs new file mode 100644 index 0000000000..e443811cdc --- /dev/null +++ b/Checks.hs @@ -0,0 +1,42 @@ +{- git-annex command checks + - + - Common sanity checks for commands, and an interface to selectively + - remove them, or add others. + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Checks where + +import Common.Annex +import Types.Command +import Init +import qualified Annex + +commonChecks :: [CommandCheck] +commonChecks = [fromOpt, toOpt, repoExists] + +repoExists :: CommandCheck +repoExists = CommandCheck 0 ensureInitialized + +fromOpt :: CommandCheck +fromOpt = CommandCheck 1 $ do + v <- Annex.getState Annex.fromremote + unless (isNothing v) $ error "cannot use --from with this command" + +toOpt :: CommandCheck +toOpt = CommandCheck 2 $ do + v <- Annex.getState Annex.toremote + unless (isNothing v) $ error "cannot use --to with this command" + +dontCheck :: CommandCheck -> Command -> Command +dontCheck check cmd = mutateCheck cmd $ \c -> filter (/= check) c + +addCheck :: Annex () -> Command -> Command +addCheck check cmd = mutateCheck cmd $ + \c -> CommandCheck (length c + 100) check : c + +mutateCheck :: Command -> ([CommandCheck] -> [CommandCheck]) -> Command +mutateCheck cmd@(Command { cmdcheck = c }) a = cmd { cmdcheck = a c } diff --git a/CmdLine.hs b/CmdLine.hs new file mode 100644 index 0000000000..7f708f15a1 --- /dev/null +++ b/CmdLine.hs @@ -0,0 +1,106 @@ +{- git-annex command line parsing and dispatch + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module CmdLine ( + dispatch, + usage, + shutdown +) where + +import qualified System.IO.Error as IO +import qualified Control.Exception as E +import Control.Exception (throw) +import System.Console.GetOpt + +import Common.Annex +import qualified Annex +import qualified Annex.Queue +import qualified Git +import qualified Git.Command +import Annex.Content +import Command + +type Params = [String] +type Flags = [Annex ()] + +{- Runs the passed command line. -} +dispatch :: Params -> [Command] -> [Option] -> String -> IO Git.Repo -> IO () +dispatch args cmds options header getgitrepo = do + setupConsole + r <- E.try getgitrepo :: IO (Either E.SomeException Git.Repo) + case r of + Left e -> fromMaybe (throw e) (cmdnorepo cmd) + Right g -> do + state <- Annex.new g + (actions, state') <- Annex.run state $ do + sequence_ flags + prepCommand cmd params + tryRun state' cmd $ [startup] ++ actions ++ [shutdown] + where + (flags, cmd, params) = parseCmd args cmds options header + +{- Parses command line, and returns actions to run to configure flags, + - the Command being run, and the remaining parameters for the command. -} +parseCmd :: Params -> [Command] -> [Option] -> String -> (Flags, Command, Params) +parseCmd argv cmds options header = check $ getOpt Permute options argv + where + check (_, [], []) = err "missing command" + check (flags, name:rest, []) + | null matches = err $ "unknown command " ++ name + | otherwise = (flags, Prelude.head matches, rest) + where + matches = filter (\c -> name == cmdname c) cmds + check (_, _, errs) = err $ concat errs + err msg = error $ msg ++ "\n\n" ++ usage header cmds options + +{- Usage message with lists of commands and options. -} +usage :: String -> [Command] -> [Option] -> String +usage header cmds options = usageInfo top options ++ commands + where + top = header ++ "\n\nOptions:" + commands = "\nCommands:\n" ++ cmddescs + cmddescs = unlines $ map (indent . showcmd) cmds + showcmd c = + cmdname c ++ + pad (longest cmdname + 1) (cmdname c) ++ + cmdparams c ++ + pad (longest cmdparams + 2) (cmdparams c) ++ + cmddesc c + pad n s = replicate (n - length s) ' ' + longest f = foldl max 0 $ map (length . f) cmds + +{- Runs a list of Annex actions. Catches IO errors and continues + - (but explicitly thrown errors terminate the whole command). + -} +tryRun :: Annex.AnnexState -> Command -> [CommandCleanup] -> IO () +tryRun = tryRun' 0 +tryRun' :: Integer -> Annex.AnnexState -> Command -> [CommandCleanup] -> IO () +tryRun' errnum _ cmd [] + | errnum > 0 = error $ cmdname cmd ++ ": " ++ show errnum ++ " failed" + | otherwise = return () +tryRun' errnum state cmd (a:as) = run >>= handle + where + run = IO.try $ Annex.run state $ do + Annex.Queue.flushWhenFull + a + handle (Left err) = showerr err >> cont False state + handle (Right (success, state')) = cont success state' + cont success s = tryRun' (if success then errnum else errnum + 1) s cmd as + showerr err = Annex.eval state $ do + showErr err + showEndFail + +{- Actions to perform each time ran. -} +startup :: Annex Bool +startup = return True + +{- Cleanup actions. -} +shutdown :: Annex Bool +shutdown = do + saveState + liftIO Git.Command.reap -- zombies from long-running git processes + return True diff --git a/Command.hs b/Command.hs new file mode 100644 index 0000000000..813a239cb0 --- /dev/null +++ b/Command.hs @@ -0,0 +1,108 @@ +{- git-annex command infrastructure + - + - Copyright 2010-2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command ( + command, + noRepo, + next, + stop, + stopUnless, + prepCommand, + doCommand, + whenAnnexed, + ifAnnexed, + notBareRepo, + isBareRepo, + autoCopies, + module ReExported +) where + +import Common.Annex +import qualified Backend +import qualified Annex +import qualified Git +import Types.Command as ReExported +import Seek as ReExported +import Checks as ReExported +import Options as ReExported +import Logs.Trust +import Logs.Location +import Config + +{- Generates a normal command -} +command :: String -> String -> [CommandSeek] -> String -> Command +command = Command Nothing commonChecks + +{- Adds a fallback action to a command, that will be run if it's used + - outside a git repository. -} +noRepo :: IO () -> Command -> Command +noRepo a c = c { cmdnorepo = Just a } + +{- For start and perform stages to indicate what step to run next. -} +next :: a -> Annex (Maybe a) +next a = return $ Just a + +{- Or to indicate nothing needs to be done. -} +stop :: Annex (Maybe a) +stop = return Nothing + +{- Stops unless a condition is met. -} +stopUnless :: Annex Bool -> Annex (Maybe a) -> Annex (Maybe a) +stopUnless c a = do + ok <- c + if ok then a else stop + +{- Prepares to run a command via the check and seek stages, returning a + - list of actions to perform to run the command. -} +prepCommand :: Command -> [String] -> Annex [CommandCleanup] +prepCommand Command { cmdseek = seek, cmdcheck = c } params = do + mapM_ runCheck c + map doCommand . concat <$> mapM (\s -> s params) seek + +{- Runs a command through the start, perform and cleanup stages -} +doCommand :: CommandStart -> CommandCleanup +doCommand = start + where + start = stage $ maybe skip perform + perform = stage $ maybe failure cleanup + cleanup = stage $ status + stage = (=<<) + skip = return True + failure = showEndFail >> return False + status r = showEndResult r >> return r + +{- Modifies an action to only act on files that are already annexed, + - and passes the key and backend on to it. -} +whenAnnexed :: (FilePath -> (Key, Backend Annex) -> Annex (Maybe a)) -> FilePath -> Annex (Maybe a) +whenAnnexed a file = ifAnnexed file (a file) (return Nothing) + +ifAnnexed :: FilePath -> ((Key, Backend Annex) -> Annex a) -> Annex a -> Annex a +ifAnnexed file yes no = maybe no yes =<< Backend.lookupFile file + +notBareRepo :: Annex a -> Annex a +notBareRepo a = do + whenM isBareRepo $ + error "You cannot run this subcommand in a bare repository." + a + +isBareRepo :: Annex Bool +isBareRepo = fromRepo Git.repoIsLocalBare + +{- Used for commands that have an auto mode that checks the number of known + - copies of a key. + - + - In auto mode, first checks that the number of known + - copies of the key is > or < than the numcopies setting, before running + - the action. -} +autoCopies :: Key -> (Int -> Int -> Bool) -> Maybe Int -> CommandStart -> CommandStart +autoCopies key vs numcopiesattr a = Annex.getState Annex.auto >>= auto + where + auto False = a + auto True = do + needed <- getNumCopies numcopiesattr + (_, have) <- trustPartition UnTrusted =<< keyLocations key + if length have `vs` needed then a else stop diff --git a/Command/Add.hs b/Command/Add.hs new file mode 100644 index 0000000000..9410601b8b --- /dev/null +++ b/Command/Add.hs @@ -0,0 +1,93 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Add where + +import Common.Annex +import Annex.Exception +import Command +import qualified Annex +import qualified Annex.Queue +import qualified Backend +import Logs.Location +import Annex.Content +import Utility.Touch +import Backend + +def :: [Command] +def = [command "add" paramPaths seek "add files to annex"] + +{- Add acts on both files not checked into git yet, and unlocked files. -} +seek :: [CommandSeek] +seek = [withFilesNotInGit start, withFilesUnlocked start] + +{- The add subcommand annexes a file, storing it in a backend, and then + - moving it into the annex directory and setting up the symlink pointing + - to its content. -} +start :: BackendFile -> CommandStart +start p@(_, file) = notBareRepo $ ifAnnexed file fixup add + where + add = do + s <- liftIO $ getSymbolicLinkStatus file + if isSymbolicLink s || not (isRegularFile s) + then stop + else do + showStart "add" file + next $ perform p + fixup (key, _) = do + -- fixup from an interrupted add; the symlink + -- is present but not yet added to git + showStart "add" file + liftIO $ removeFile file + next $ next $ cleanup file key =<< inAnnex key + +perform :: BackendFile -> CommandPerform +perform (backend, file) = Backend.genKey file backend >>= go + where + go Nothing = stop + go (Just (key, _)) = do + handle (undo file key) $ moveAnnex key file + next $ cleanup file key True + +{- On error, put the file back so it doesn't seem to have vanished. + - This can be called before or after the symlink is in place. -} +undo :: FilePath -> Key -> IOException -> Annex a +undo file key e = do + unlessM (inAnnex key) rethrow -- no cleanup to do + liftIO $ whenM (doesFileExist file) $ removeFile file + handle tryharder $ fromAnnex key file + logStatus key InfoMissing + rethrow + where + rethrow = throw e + + -- fromAnnex could fail if the file ownership is weird + tryharder :: IOException -> Annex () + tryharder _ = do + src <- inRepo $ gitAnnexLocation key + liftIO $ moveFile src file + +cleanup :: FilePath -> Key -> Bool -> CommandCleanup +cleanup file key hascontent = do + handle (undo file key) $ do + link <- calcGitLink file key + liftIO $ createSymbolicLink link file + + when hascontent $ do + logStatus key InfoPresent + + -- touch the symlink to have the same mtime as the + -- file it points to + liftIO $ do + mtime <- modificationTime <$> getFileStatus file + touch file (TimeSpec mtime) False + + force <- Annex.getState Annex.force + if force + then Annex.Queue.add "add" [Param "-f", Param "--"] [file] + else Annex.Queue.add "add" [Param "--"] [file] + return True diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs new file mode 100644 index 0000000000..027c508bcb --- /dev/null +++ b/Command/AddUrl.hs @@ -0,0 +1,72 @@ +{- git-annex command + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.AddUrl where + +import Network.URI + +import Common.Annex +import Command +import qualified Backend +import qualified Utility.Url as Url +import qualified Command.Add +import qualified Annex +import qualified Backend.URL +import Annex.Content +import Logs.Web + +def :: [Command] +def = [command "addurl" (paramRepeating paramUrl) seek "add urls to annex"] + +seek :: [CommandSeek] +seek = [withStrings start] + +start :: String -> CommandStart +start s = notBareRepo $ go $ parseURI s + where + go Nothing = error $ "bad url " ++ s + go (Just url) = do + file <- liftIO $ url2file url + showStart "addurl" file + next $ perform s file + +perform :: String -> FilePath -> CommandPerform +perform url file = do + fast <- Annex.getState Annex.fast + if fast then nodownload url file else download url file + +download :: String -> FilePath -> CommandPerform +download url file = do + showAction $ "downloading " ++ url ++ " " + let dummykey = Backend.URL.fromUrl url + tmp <- fromRepo $ gitAnnexTmpLocation dummykey + liftIO $ createDirectoryIfMissing True (parentDir tmp) + stopUnless (liftIO $ Url.download url tmp) $ do + [(backend, _)] <- Backend.chooseBackends [file] + k <- Backend.genKey tmp backend + case k of + Nothing -> stop + Just (key, _) -> do + moveAnnex key tmp + setUrlPresent key url + next $ Command.Add.cleanup file key True + +nodownload :: String -> FilePath -> CommandPerform +nodownload url file = do + let key = Backend.URL.fromUrl url + setUrlPresent key url + next $ Command.Add.cleanup file key False + +url2file :: URI -> IO FilePath +url2file url = do + whenM (doesFileExist file) $ + error $ "already have this url in " ++ file + return file + where + file = escape $ uriRegName auth ++ uriPath url ++ uriQuery url + escape = replace "/" "_" . replace "?" "_" + auth = fromMaybe (error $ "bad url " ++ show url) $ uriAuthority url diff --git a/Command/ConfigList.hs b/Command/ConfigList.hs new file mode 100644 index 0000000000..dcf4d15093 --- /dev/null +++ b/Command/ConfigList.hs @@ -0,0 +1,25 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.ConfigList where + +import Common.Annex +import Command +import Annex.UUID + +def :: [Command] +def = [command "configlist" paramNothing seek + "outputs relevant git configuration"] + +seek :: [CommandSeek] +seek = [withNothing start] + +start :: CommandStart +start = do + u <- getUUID + liftIO $ putStrLn $ "annex.uuid=" ++ fromUUID u + stop diff --git a/Command/Copy.hs b/Command/Copy.hs new file mode 100644 index 0000000000..16de423acb --- /dev/null +++ b/Command/Copy.hs @@ -0,0 +1,26 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Copy where + +import Common.Annex +import Command +import qualified Command.Move + +def :: [Command] +def = [dontCheck toOpt $ dontCheck fromOpt $ + command "copy" paramPaths seek + "copy content of files to/from another repository"] + +seek :: [CommandSeek] +seek = [withNumCopies $ \n -> whenAnnexed $ start n] + +-- A copy is just a move that does not delete the source file. +-- However, --auto mode avoids unnecessary copies. +start :: Maybe Int -> FilePath -> (Key, Backend Annex) -> CommandStart +start numcopies file (key, backend) = autoCopies key (<) numcopies $ + Command.Move.start False file (key, backend) diff --git a/Command/Dead.hs b/Command/Dead.hs new file mode 100644 index 0000000000..192551e207 --- /dev/null +++ b/Command/Dead.hs @@ -0,0 +1,32 @@ +{- git-annex command + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Dead where + +import Common.Annex +import Command +import qualified Remote +import Logs.Trust + +def :: [Command] +def = [command "dead" (paramRepeating paramRemote) seek + "hide a lost repository"] + +seek :: [CommandSeek] +seek = [withWords start] + +start :: [String] -> CommandStart +start ws = do + let name = unwords ws + showStart "dead " name + u <- Remote.nameToUUID name + next $ perform u + +perform :: UUID -> CommandPerform +perform uuid = do + trustSet uuid DeadTrusted + next $ return True diff --git a/Command/Describe.hs b/Command/Describe.hs new file mode 100644 index 0000000000..61297e77c7 --- /dev/null +++ b/Command/Describe.hs @@ -0,0 +1,32 @@ +{- git-annex command + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Describe where + +import Common.Annex +import Command +import qualified Remote +import Logs.UUID + +def :: [Command] +def = [command "describe" (paramPair paramRemote paramDesc) seek + "change description of a repository"] + +seek :: [CommandSeek] +seek = [withWords start] + +start :: [String] -> CommandStart +start (name:description) = do + showStart "describe" name + u <- Remote.nameToUUID name + next $ perform u $ unwords description +start _ = error "Specify a repository and a description." + +perform :: UUID -> String -> CommandPerform +perform u description = do + describeUUID u description + next $ return True diff --git a/Command/Drop.hs b/Command/Drop.hs new file mode 100644 index 0000000000..0a4c9dfd6f --- /dev/null +++ b/Command/Drop.hs @@ -0,0 +1,132 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Drop where + +import Common.Annex +import Command +import qualified Remote +import qualified Annex +import Annex.UUID +import Logs.Location +import Logs.Trust +import Annex.Content +import Config + +def :: [Command] +def = [dontCheck fromOpt $ command "drop" paramPaths seek + "indicate content of files not currently wanted"] + +seek :: [CommandSeek] +seek = [withNumCopies $ \n -> whenAnnexed $ start n] + +start :: Maybe Int -> FilePath -> (Key, Backend Annex) -> CommandStart +start numcopies file (key, _) = autoCopies key (>) numcopies $ do + from <- Annex.getState Annex.fromremote + case from of + Nothing -> startLocal file numcopies key + Just name -> do + remote <- Remote.byName name + u <- getUUID + if Remote.uuid remote == u + then startLocal file numcopies key + else startRemote file numcopies key remote + +startLocal :: FilePath -> Maybe Int -> Key -> CommandStart +startLocal file numcopies key = stopUnless (inAnnex key) $ do + showStart "drop" file + next $ performLocal key numcopies + +startRemote :: FilePath -> Maybe Int -> Key -> Remote.Remote Annex -> CommandStart +startRemote file numcopies key remote = do + showStart "drop" file + next $ performRemote key numcopies remote + +performLocal :: Key -> Maybe Int -> CommandPerform +performLocal key numcopies = lockContent key $ do + (remotes, trusteduuids) <- Remote.keyPossibilitiesTrusted key + untrusteduuids <- trustGet UnTrusted + let tocheck = Remote.remotesWithoutUUID remotes (trusteduuids++untrusteduuids) + stopUnless (canDropKey key numcopies trusteduuids tocheck []) $ do + whenM (inAnnex key) $ removeAnnex key + next $ cleanupLocal key + +performRemote :: Key -> Maybe Int -> Remote.Remote Annex -> CommandPerform +performRemote key numcopies remote = lockContent key $ do + -- Filter the remote it's being dropped from out of the lists of + -- places assumed to have the key, and places to check. + -- When the local repo has the key, that's one additional copy. + (remotes, trusteduuids) <- Remote.keyPossibilitiesTrusted key + present <- inAnnex key + u <- getUUID + let have = filter (/= uuid) $ + if present then u:trusteduuids else trusteduuids + untrusteduuids <- trustGet UnTrusted + let tocheck = filter (/= remote) $ + Remote.remotesWithoutUUID remotes (have++untrusteduuids) + stopUnless (canDropKey key numcopies have tocheck [uuid]) $ do + ok <- Remote.removeKey remote key + next $ cleanupRemote key remote ok + where + uuid = Remote.uuid remote + +cleanupLocal :: Key -> CommandCleanup +cleanupLocal key = do + logStatus key InfoMissing + return True + +cleanupRemote :: Key -> Remote.Remote Annex -> Bool -> CommandCleanup +cleanupRemote key remote ok = do + -- better safe than sorry: assume the remote dropped the key + -- even if it seemed to fail; the failure could have occurred + -- after it really dropped it + Remote.logStatus remote key False + return ok + +{- Checks specified remotes to verify that enough copies of a key exist to + - allow it to be safely removed (with no data loss). Can be provided with + - some locations where the key is known/assumed to be present. -} +canDropKey :: Key -> Maybe Int -> [UUID] -> [Remote.Remote Annex] -> [UUID] -> Annex Bool +canDropKey key numcopiesM have check skip = do + force <- Annex.getState Annex.force + if force || numcopiesM == Just 0 + then return True + else do + need <- getNumCopies numcopiesM + findCopies key need skip have check + +findCopies :: Key -> Int -> [UUID] -> [UUID] -> [Remote.Remote Annex] -> Annex Bool +findCopies key need skip = helper [] + where + helper bad have [] + | length have >= need = return True + | otherwise = notEnoughCopies key need have skip bad + helper bad have (r:rs) + | length have >= need = return True + | otherwise = do + let u = Remote.uuid r + let duplicate = u `elem` have + haskey <- Remote.hasKey r key + case (duplicate, haskey) of + (False, Right True) -> helper bad (u:have) rs + (False, Left _) -> helper (r:bad) have rs + _ -> helper bad have rs + +notEnoughCopies :: Key -> Int -> [UUID] -> [UUID] -> [Remote.Remote Annex] -> Annex Bool +notEnoughCopies key need have skip bad = do + unsafe + showLongNote $ + "Could only verify the existence of " ++ + show (length have) ++ " out of " ++ show need ++ + " necessary copies" + Remote.showTriedRemotes bad + Remote.showLocations key (have++skip) + hint + return False + where + unsafe = showNote "unsafe" + hint = showLongNote "(Use --force to override this check, or adjust annex.numcopies.)" diff --git a/Command/DropKey.hs b/Command/DropKey.hs new file mode 100644 index 0000000000..aaaa224661 --- /dev/null +++ b/Command/DropKey.hs @@ -0,0 +1,38 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.DropKey where + +import Common.Annex +import Command +import qualified Annex +import Logs.Location +import Annex.Content + +def :: [Command] +def = [command "dropkey" (paramRepeating paramKey) seek + "drops annexed content for specified keys"] + +seek :: [CommandSeek] +seek = [withKeys start] + +start :: Key -> CommandStart +start key = stopUnless (inAnnex key) $ do + unlessM (Annex.getState Annex.force) $ + error "dropkey can cause data loss; use --force if you're sure you want to do this" + showStart "dropkey" (show key) + next $ perform key + +perform :: Key -> CommandPerform +perform key = lockContent key $ do + removeAnnex key + next $ cleanup key + +cleanup :: Key -> CommandCleanup +cleanup key = do + logStatus key InfoMissing + return True diff --git a/Command/DropUnused.hs b/Command/DropUnused.hs new file mode 100644 index 0000000000..244f378d97 --- /dev/null +++ b/Command/DropUnused.hs @@ -0,0 +1,78 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.DropUnused where + +import qualified Data.Map as M + +import Common.Annex +import Command +import qualified Annex +import qualified Command.Drop +import qualified Remote +import qualified Git +import Types.Key + +type UnusedMap = M.Map String Key + +def :: [Command] +def = [dontCheck fromOpt $ command "dropunused" (paramRepeating paramNumber) + seek "drop unused file content"] + +seek :: [CommandSeek] +seek = [withUnusedMaps] + +{- Read unused logs once, and pass the maps to each start action. -} +withUnusedMaps :: CommandSeek +withUnusedMaps params = do + unused <- readUnusedLog "" + unusedbad <- readUnusedLog "bad" + unusedtmp <- readUnusedLog "tmp" + return $ map (start (unused, unusedbad, unusedtmp)) params + +start :: (UnusedMap, UnusedMap, UnusedMap) -> FilePath -> CommandStart +start (unused, unusedbad, unusedtmp) s = search + [ (unused, perform) + , (unusedbad, performOther gitAnnexBadLocation) + , (unusedtmp, performOther gitAnnexTmpLocation) + ] + where + search [] = stop + search ((m, a):rest) = + case M.lookup s m of + Nothing -> search rest + Just key -> do + showStart "dropunused" s + next $ a key + +perform :: Key -> CommandPerform +perform key = maybe droplocal dropremote =<< Annex.getState Annex.fromremote + where + dropremote name = do + r <- Remote.byName name + showAction $ "from " ++ Remote.name r + ok <- Remote.removeKey r key + next $ Command.Drop.cleanupRemote key r ok + droplocal = Command.Drop.performLocal key (Just 0) -- force drop + +performOther :: (Key -> Git.Repo -> FilePath) -> Key -> CommandPerform +performOther filespec key = do + f <- fromRepo $ filespec key + liftIO $ whenM (doesFileExist f) $ removeFile f + next $ return True + +readUnusedLog :: FilePath -> Annex UnusedMap +readUnusedLog prefix = do + f <- fromRepo $ gitAnnexUnusedLog prefix + e <- liftIO $ doesFileExist f + if e + then M.fromList . map parse . lines <$> liftIO (readFile f) + else return M.empty + where + parse line = (num, fromJust $ readKey rest) + where + (num, rest) = separate (== ' ') line diff --git a/Command/Find.hs b/Command/Find.hs new file mode 100644 index 0000000000..1961e6b748 --- /dev/null +++ b/Command/Find.hs @@ -0,0 +1,48 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Find where + +import qualified Data.Map as M + +import Common.Annex +import Command +import Annex.Content +import Limit +import qualified Annex +import qualified Utility.Format +import Utility.DataUnits +import Types.Key + +def :: [Command] +def = [command "find" paramPaths seek "lists available files"] + +seek :: [CommandSeek] +seek = [withFilesInGit $ whenAnnexed start] + +start :: FilePath -> (Key, Backend Annex) -> CommandStart +start file (key, _) = do + -- only files inAnnex are shown, unless the user has requested + -- others via a limit + whenM (liftM2 (||) limited (inAnnex key)) $ + unlessM (showFullJSON vars) $ do + f <- Annex.getState Annex.format + case f of + Nothing -> liftIO $ putStrLn file + Just formatter -> liftIO $ putStr $ + Utility.Format.format formatter $ + M.fromList vars + stop + where + vars = + [ ("file", file) + , ("key", show key) + , ("backend", keyBackendName key) + , ("bytesize", size show) + , ("humansize", size $ roughSize storageUnits True) + ] + size c = maybe "unknown" c $ keySize key diff --git a/Command/Fix.hs b/Command/Fix.hs new file mode 100644 index 0000000000..f264106c3f --- /dev/null +++ b/Command/Fix.hs @@ -0,0 +1,40 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Fix where + +import Common.Annex +import Command +import qualified Annex.Queue +import Annex.Content + +def :: [Command] +def = [command "fix" paramPaths seek + "fix up symlinks to point to annexed content"] + +seek :: [CommandSeek] +seek = [withFilesInGit $ whenAnnexed start] + +{- Fixes the symlink to an annexed file. -} +start :: FilePath -> (Key, Backend Annex) -> CommandStart +start file (key, _) = do + link <- calcGitLink file key + stopUnless ((/=) link <$> liftIO (readSymbolicLink file)) $ do + showStart "fix" file + next $ perform file link + +perform :: FilePath -> FilePath -> CommandPerform +perform file link = do + liftIO $ createDirectoryIfMissing True (parentDir file) + liftIO $ removeFile file + liftIO $ createSymbolicLink link file + next $ cleanup file + +cleanup :: FilePath -> CommandCleanup +cleanup file = do + Annex.Queue.add "add" [Param "--force", Param "--"] [file] + return True diff --git a/Command/FromKey.hs b/Command/FromKey.hs new file mode 100644 index 0000000000..ec194e06e8 --- /dev/null +++ b/Command/FromKey.hs @@ -0,0 +1,43 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.FromKey where + +import Common.Annex +import Command +import qualified Annex.Queue +import Annex.Content +import Types.Key + +def :: [Command] +def = [command "fromkey" (paramPair paramKey paramPath) seek + "adds a file using a specific key"] + +seek :: [CommandSeek] +seek = [withWords start] + +start :: [String] -> CommandStart +start (keyname:file:[]) = notBareRepo $ do + let key = fromMaybe (error "bad key") $ readKey keyname + inbackend <- inAnnex key + unless inbackend $ error $ + "key ("++ keyname ++") is not present in backend" + showStart "fromkey" file + next $ perform key file +start _ = error "specify a key and a dest file" + +perform :: Key -> FilePath -> CommandPerform +perform key file = do + link <- calcGitLink file key + liftIO $ createDirectoryIfMissing True (parentDir file) + liftIO $ createSymbolicLink link file + next $ cleanup file + +cleanup :: FilePath -> CommandCleanup +cleanup file = do + Annex.Queue.add "add" [Param "--"] [file] + return True diff --git a/Command/Fsck.hs b/Command/Fsck.hs new file mode 100644 index 0000000000..a803207e20 --- /dev/null +++ b/Command/Fsck.hs @@ -0,0 +1,168 @@ +{- git-annex command + - + - Copyright 2010-2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Fsck where + +import Common.Annex +import Command +import qualified Remote +import qualified Types.Backend +import qualified Types.Key +import qualified Backend +import Annex.Content +import Logs.Location +import Logs.Trust +import Annex.UUID +import Utility.DataUnits +import Utility.FileMode +import Config + +def :: [Command] +def = [command "fsck" paramPaths seek "check for problems"] + +seek :: [CommandSeek] +seek = + [ withNumCopies $ \n -> whenAnnexed $ start n + , withBarePresentKeys startBare + ] + +start :: Maybe Int -> FilePath -> (Key, Backend Annex) -> CommandStart +start numcopies file (key, backend) = do + showStart "fsck" file + next $ perform key file backend numcopies + +perform :: Key -> FilePath -> Backend Annex -> Maybe Int -> CommandPerform +perform key file backend numcopies = check + -- order matters + [ verifyLocationLog key file + , checkKeySize key + , checkKeyNumCopies key file numcopies + , checkBackend backend key + ] + +{- To fsck a bare repository, fsck each key in the location log. -} +withBarePresentKeys :: (Key -> CommandStart) -> CommandSeek +withBarePresentKeys a params = isBareRepo >>= go + where + go False = return [] + go True = do + unless (null params) $ + error "fsck should be run without parameters in a bare repository" + prepStart a loggedKeys + +startBare :: Key -> CommandStart +startBare key = case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of + Nothing -> stop + Just backend -> do + showStart "fsck" (show key) + next $ performBare key backend + +{- Note that numcopies cannot be checked in a bare repository, because + - getting the numcopies value requires a working copy with .gitattributes + - files. -} +performBare :: Key -> Backend Annex -> CommandPerform +performBare key backend = check + [ verifyLocationLog key (show key) + , checkKeySize key + , checkBackend backend key + ] + +check :: [Annex Bool] -> CommandPerform +check = sequence >=> dispatch + where + dispatch vs + | all (== True) vs = next $ return True + | otherwise = stop + +{- Checks that the location log reflects the current status of the key, + in this repository only. -} +verifyLocationLog :: Key -> String -> Annex Bool +verifyLocationLog key desc = do + present <- inAnnex key + + -- Since we're checking that a key's file is present, throw + -- in a permission fixup here too. + when present $ do + f <- inRepo $ gitAnnexLocation key + liftIO $ do + preventWrite f + preventWrite (parentDir f) + + u <- getUUID + uuids <- keyLocations key + + case (present, u `elem` uuids) of + (True, False) -> do + fix u InfoPresent + -- There is no data loss, so do not fail. + return True + (False, True) -> do + fix u InfoMissing + warning $ + "** Based on the location log, " ++ desc + ++ "\n** was expected to be present, " ++ + "but its content is missing." + return False + _ -> return True + + where + fix u s = do + showNote "fixing location log" + logChange key u s + +{- The size of the data for a key is checked against the size encoded in + - the key's metadata, if available. -} +checkKeySize :: Key -> Annex Bool +checkKeySize key = do + file <- inRepo $ gitAnnexLocation key + present <- liftIO $ doesFileExist file + case (present, Types.Key.keySize key) of + (_, Nothing) -> return True + (False, _) -> return True + (True, Just size) -> do + stat <- liftIO $ getFileStatus file + let size' = fromIntegral (fileSize stat) + if size == size' + then return True + else do + dest <- moveBad key + warning $ "Bad file size (" ++ + compareSizes storageUnits True size size' ++ + "); moved to " ++ dest + return False + + +checkBackend :: Backend Annex -> Key -> Annex Bool +checkBackend = Types.Backend.fsckKey + +checkKeyNumCopies :: Key -> FilePath -> Maybe Int -> Annex Bool +checkKeyNumCopies key file numcopies = do + needed <- getNumCopies numcopies + (untrustedlocations, safelocations) <- trustPartition UnTrusted =<< keyLocations key + let present = length safelocations + if present < needed + then do + ppuuids <- Remote.prettyPrintUUIDs "untrusted" untrustedlocations + warning $ missingNote file present needed ppuuids + return False + else return True + +missingNote :: String -> Int -> Int -> String -> String +missingNote file 0 _ [] = + "** No known copies exist of " ++ file +missingNote file 0 _ untrusted = + "Only these untrusted locations may have copies of " ++ file ++ + "\n" ++ untrusted ++ + "Back it up to trusted locations with git-annex copy." +missingNote file present needed [] = + "Only " ++ show present ++ " of " ++ show needed ++ + " trustworthy copies exist of " ++ file ++ + "\nBack it up with git-annex copy." +missingNote file present needed untrusted = + missingNote file present needed [] ++ + "\nThe following untrusted locations may also have copies: " ++ + "\n" ++ untrusted diff --git a/Command/Get.hs b/Command/Get.hs new file mode 100644 index 0000000000..b7023e2de8 --- /dev/null +++ b/Command/Get.hs @@ -0,0 +1,80 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Get where + +import Common.Annex +import Command +import qualified Annex +import qualified Remote +import Annex.Content +import qualified Command.Move + +def :: [Command] +def = [dontCheck fromOpt $ command "get" paramPaths seek + "make content of annexed files available"] + +seek :: [CommandSeek] +seek = [withNumCopies $ \n -> whenAnnexed $ start n] + +start :: Maybe Int -> FilePath -> (Key, Backend Annex) -> CommandStart +start numcopies file (key, _) = stopUnless (not <$> inAnnex key) $ + autoCopies key (<) numcopies $ do + from <- Annex.getState Annex.fromremote + case from of + Nothing -> go $ perform key + Just name -> do + -- get --from = copy --from + src <- Remote.byName name + stopUnless (Command.Move.fromOk src key) $ + go $ Command.Move.fromPerform src False key + where + go a = do + showStart "get" file + next a + +perform :: Key -> CommandPerform +perform key = stopUnless (getViaTmp key $ getKeyFile key) $ do + next $ return True -- no cleanup needed + +{- Try to find a copy of the file in one of the remotes, + - and copy it to here. -} +getKeyFile :: Key -> FilePath -> Annex Bool +getKeyFile key file = do + remotes <- Remote.keyPossibilities key + if null remotes + then do + showNote "not available" + Remote.showLocations key [] + return False + else trycopy remotes remotes + where + trycopy full [] = do + Remote.showTriedRemotes full + Remote.showLocations key [] + return False + trycopy full (r:rs) = do + probablythere <- probablyPresent r + if probablythere + then docopy r (trycopy full rs) + else trycopy full rs + -- This check is to avoid an ugly message if a remote is a + -- drive that is not mounted. + probablyPresent r = + if Remote.hasKeyCheap r + then do + res <- Remote.hasKey r key + case res of + Right b -> return b + Left _ -> return False + else return True + docopy r continue = do + showAction $ "from " ++ Remote.name r + copied <- Remote.retrieveKeyFile r key file + if copied + then return True + else continue diff --git a/Command/InAnnex.hs b/Command/InAnnex.hs new file mode 100644 index 0000000000..c41f9a92c1 --- /dev/null +++ b/Command/InAnnex.hs @@ -0,0 +1,27 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.InAnnex where + +import Common.Annex +import Command +import Annex.Content + +def :: [Command] +def = [command "inannex" (paramRepeating paramKey) seek + "checks if keys are present in the annex"] + +seek :: [CommandSeek] +seek = [withKeys start] + +start :: Key -> CommandStart +start key = inAnnexSafe key >>= dispatch + where + dispatch (Just True) = stop + dispatch (Just False) = exit 1 + dispatch Nothing = exit 100 + exit n = liftIO $ exitWith $ ExitFailure n diff --git a/Command/Init.hs b/Command/Init.hs new file mode 100644 index 0000000000..bbabdc4c25 --- /dev/null +++ b/Command/Init.hs @@ -0,0 +1,31 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Init where + +import Common.Annex +import Command +import Init + +def :: [Command] +def = [dontCheck repoExists $ + command "init" paramDesc seek "initialize git-annex"] + +seek :: [CommandSeek] +seek = [withWords start] + +start :: [String] -> CommandStart +start ws = do + showStart "init" description + next $ perform description + where + description = unwords ws + +perform :: String -> CommandPerform +perform description = do + initialize $ if null description then Nothing else Just description + next $ return True diff --git a/Command/InitRemote.hs b/Command/InitRemote.hs new file mode 100644 index 0000000000..1e6bc2ef17 --- /dev/null +++ b/Command/InitRemote.hs @@ -0,0 +1,95 @@ +{- git-annex command + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.InitRemote where + +import qualified Data.Map as M + +import Common.Annex +import Command +import qualified Remote +import qualified Logs.Remote +import qualified Types.Remote as R +import Annex.UUID + +def :: [Command] +def = [command "initremote" + (paramPair paramName $ paramOptional $ paramRepeating paramKeyValue) + seek "sets up a special (non-git) remote"] + +seek :: [CommandSeek] +seek = [withWords start] + +start :: [String] -> CommandStart +start [] = do + names <- remoteNames + error $ "Specify a name for the remote. " ++ + if null names + then "" + else "Either a new name, or one of these existing special remotes: " ++ join " " names +start (name:ws) = do + (u, c) <- findByName name + let fullconfig = config `M.union` c + t <- findType fullconfig + + showStart "initremote" name + next $ perform t u $ M.union config c + + where + config = Logs.Remote.keyValToConfig ws + +perform :: R.RemoteType Annex -> UUID -> R.RemoteConfig -> CommandPerform +perform t u c = do + c' <- R.setup t u c + next $ cleanup u c' + +cleanup :: UUID -> R.RemoteConfig -> CommandCleanup +cleanup u c = do + Logs.Remote.configSet u c + return True + +{- Look up existing remote's UUID and config by name, or generate a new one -} +findByName :: String -> Annex (UUID, R.RemoteConfig) +findByName name = do + m <- Logs.Remote.readRemoteLog + maybe generate return $ findByName' name m + where + generate = do + uuid <- liftIO genUUID + return (uuid, M.insert nameKey name M.empty) + +findByName' :: String -> M.Map UUID R.RemoteConfig -> Maybe (UUID, R.RemoteConfig) +findByName' n = headMaybe . filter (matching . snd) . M.toList + where + matching c = case M.lookup nameKey c of + Nothing -> False + Just n' + | n' == n -> True + | otherwise -> False + +remoteNames :: Annex [String] +remoteNames = do + m <- Logs.Remote.readRemoteLog + return $ mapMaybe (M.lookup nameKey . snd) $ M.toList m + +{- find the specified remote type -} +findType :: R.RemoteConfig -> Annex (R.RemoteType Annex) +findType config = maybe unspecified specified $ M.lookup typeKey config + where + unspecified = error "Specify the type of remote with type=" + specified s = case filter (findtype s) Remote.remoteTypes of + [] -> error $ "Unknown remote type " ++ s + (t:_) -> return t + findtype s i = R.typename i == s + +{- The name of a configured remote is stored in its config using this key. -} +nameKey :: String +nameKey = "name" + +{- The type of a remote is stored in its config using this key. -} +typeKey :: String +typeKey = "type" diff --git a/Command/Lock.hs b/Command/Lock.hs new file mode 100644 index 0000000000..329fd3eff7 --- /dev/null +++ b/Command/Lock.hs @@ -0,0 +1,34 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Lock where + +import Common.Annex +import Command +import qualified Annex.Queue +import Backend + +def :: [Command] +def = [command "lock" paramPaths seek "undo unlock command"] + +seek :: [CommandSeek] +seek = [withFilesUnlocked start, withFilesUnlockedToBeCommitted start] + +{- Undo unlock -} +start :: BackendFile -> CommandStart +start (_, file) = do + showStart "lock" file + next $ perform file + +perform :: FilePath -> CommandPerform +perform file = do + liftIO $ removeFile file + -- Checkout from HEAD to get rid of any changes that might be + -- staged in the index, and get back to the previous symlink to + -- the content. + Annex.Queue.add "checkout" [Param "HEAD", Param "--"] [file] + next $ return True -- no cleanup needed diff --git a/Command/Map.hs b/Command/Map.hs new file mode 100644 index 0000000000..0f32e1130d --- /dev/null +++ b/Command/Map.hs @@ -0,0 +1,238 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Map where + +import Control.Exception.Extensible +import qualified Data.Map as M + +import Common.Annex +import Command +import qualified Git +import qualified Git.Url +import qualified Git.Config +import qualified Git.Construct +import qualified Annex +import Annex.UUID +import Logs.UUID +import Logs.Trust +import Annex.Ssh +import qualified Utility.Dot as Dot + +-- a link from the first repository to the second (its remote) +data Link = Link Git.Repo Git.Repo + +def :: [Command] +def = [dontCheck repoExists $ + command "map" paramNothing seek "generate map of repositories"] + +seek :: [CommandSeek] +seek = [withNothing start] + +start :: CommandStart +start = do + rs <- spider =<< gitRepo + + umap <- uuidMap + trusted <- trustGet Trusted + + liftIO $ writeFile file (drawMap rs umap trusted) + next $ next $ do + fast <- Annex.getState Annex.fast + if fast + then return True + else do + showLongNote $ "running: dot -Tx11 " ++ file + showOutput + liftIO $ boolSystem "dot" [Param "-Tx11", File file] + where + file = "map.dot" + +{- Generates a graph for dot(1). Each repository, and any other uuids, are + - displayed as a node, and each of its remotes is represented as an edge + - pointing at the node for the remote. + - + - The order nodes are added to the graph matters, since dot will draw + - the first ones near to the top and left. So it looks better to put + - the repositories first, followed by uuids that were not matched + - to a repository. + -} +drawMap :: [Git.Repo] -> M.Map UUID String -> [UUID] -> String +drawMap rs umap ts = Dot.graph $ repos ++ trusted ++ others + where + repos = map (node umap rs) rs + ruuids = ts ++ map getUncachedUUID rs + others = map (unreachable . uuidnode) $ + filter (`notElem` ruuids) (M.keys umap) + trusted = map (trustworthy . uuidnode) ts + uuidnode u = Dot.graphNode (fromUUID u) $ + M.findWithDefault "" u umap + +hostname :: Git.Repo -> String +hostname r + | Git.repoIsUrl r = Git.Url.host r + | otherwise = "localhost" + +basehostname :: Git.Repo -> String +basehostname r = Prelude.head $ split "." $ hostname r + +{- A name to display for a repo. Uses the name from uuid.log if available, + - or the remote name if not. -} +repoName :: M.Map UUID String -> Git.Repo -> String +repoName umap r + | repouuid == NoUUID = fallback + | otherwise = M.findWithDefault fallback repouuid umap + where + repouuid = getUncachedUUID r + fallback = fromMaybe "unknown" $ Git.remoteName r + +{- A unique id for the node for a repo. Uses the annex.uuid if available. -} +nodeId :: Git.Repo -> String +nodeId r = + case getUncachedUUID r of + NoUUID -> Git.repoLocation r + UUID u -> u + +{- A node representing a repo. -} +node :: M.Map UUID String -> [Git.Repo] -> Git.Repo -> String +node umap fullinfo r = unlines $ n:edges + where + n = Dot.subGraph (hostname r) (basehostname r) "lightblue" $ + decorate $ Dot.graphNode (nodeId r) (repoName umap r) + edges = map (edge umap fullinfo r) (Git.remotes r) + decorate + | Git.config r == M.empty = unreachable + | otherwise = reachable + +{- An edge between two repos. The second repo is a remote of the first. -} +edge :: M.Map UUID String -> [Git.Repo] -> Git.Repo -> Git.Repo -> String +edge umap fullinfo from to = + Dot.graphEdge (nodeId from) (nodeId fullto) edgename + where + -- get the full info for the remote, to get its UUID + fullto = findfullinfo to + findfullinfo n = + case filter (same n) fullinfo of + [] -> n + (n':_) -> n' + {- Only name an edge if the name is different than the name + - that will be used for the destination node, and is + - different from its hostname. (This reduces visual clutter.) -} + edgename = maybe Nothing calcname $ Git.remoteName to + calcname n + | n `elem` [repoName umap fullto, hostname fullto] = Nothing + | otherwise = Just n + +unreachable :: String -> String +unreachable = Dot.fillColor "red" +reachable :: String -> String +reachable = Dot.fillColor "white" +trustworthy :: String -> String +trustworthy = Dot.fillColor "green" + +{- Recursively searches out remotes starting with the specified repo. -} +spider :: Git.Repo -> Annex [Git.Repo] +spider r = spider' [r] [] +spider' :: [Git.Repo] -> [Git.Repo] -> Annex [Git.Repo] +spider' [] known = return known +spider' (r:rs) known + | any (same r) known = spider' rs known + | otherwise = do + r' <- scan r + + -- The remotes will be relative to r', and need to be + -- made absolute for later use. + remotes <- mapM (absRepo r') (Git.remotes r') + let r'' = r' { Git.remotes = remotes } + + spider' (rs ++ remotes) (r'':known) + +{- Converts repos to a common absolute form. -} +absRepo :: Git.Repo -> Git.Repo -> Annex Git.Repo +absRepo reference r + | Git.repoIsUrl reference = return $ Git.Construct.localToUrl reference r + | otherwise = liftIO $ Git.Construct.fromAbsPath =<< absPath (Git.workTree r) + +{- Checks if two repos are the same. -} +same :: Git.Repo -> Git.Repo -> Bool +same a b + | both Git.repoIsSsh = matching Git.Url.authority && matching Git.workTree + | both Git.repoIsUrl && neither Git.repoIsSsh = matching show + | neither Git.repoIsSsh = matching Git.workTree + | otherwise = False + + where + matching t = t a == t b + both t = t a && t b + neither t = not (t a) && not (t b) + +{- reads the config of a remote, with progress display -} +scan :: Git.Repo -> Annex Git.Repo +scan r = do + showStart "map" $ Git.repoDescribe r + v <- tryScan r + case v of + Just r' -> do + showEndOk + return r' + Nothing -> do + showOutput + showEndFail + return r + +{- tries to read the config of a remote, returning it only if it can + - be accessed -} +tryScan :: Git.Repo -> Annex (Maybe Git.Repo) +tryScan r + | Git.repoIsSsh r = sshscan + | Git.repoIsUrl r = return Nothing + | otherwise = safely $ Git.Config.read r + where + safely a = do + result <- liftIO (try a :: IO (Either SomeException Git.Repo)) + case result of + Left _ -> return Nothing + Right r' -> return $ Just r' + pipedconfig cmd params = safely $ + pOpen ReadFromPipe cmd (toCommand params) $ + Git.Config.hRead r + + configlist = + onRemote r (pipedconfig, Nothing) "configlist" [] + manualconfiglist = do + sshparams <- sshToRepo r [Param sshcmd] + liftIO $ pipedconfig "ssh" sshparams + where + sshcmd = cddir ++ " && " ++ + "git config --null --list" + dir = Git.workTree r + cddir + | "/~" `isPrefixOf` dir = + let (userhome, reldir) = span (/= '/') (drop 1 dir) + in "cd " ++ userhome ++ " && cd " ++ shellEscape (drop 1 reldir) + | otherwise = "cd " ++ shellEscape dir + + -- First, try sshing and running git config manually, + -- only fall back to git-annex-shell configlist if that + -- fails. + -- + -- This is done for two reasons, first I'd like this + -- subcommand to be usable on non-git-annex repos. + -- Secondly, configlist doesn't include information about + -- the remote's remotes. + sshscan = do + sshnote + v <- manualconfiglist + case v of + Nothing -> do + sshnote + configlist + ok -> return ok + + sshnote = do + showAction "sshing" + showOutput diff --git a/Command/Merge.hs b/Command/Merge.hs new file mode 100644 index 0000000000..c1f7e899af --- /dev/null +++ b/Command/Merge.hs @@ -0,0 +1,29 @@ +{- git-annex command + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Merge where + +import Common.Annex +import Command +import qualified Annex.Branch + +def :: [Command] +def = [command "merge" paramNothing seek + "auto-merge remote changes into git-annex branch"] + +seek :: [CommandSeek] +seek = [withNothing start] + +start :: CommandStart +start = do + showStart "merge" "." + next perform + +perform :: CommandPerform +perform = do + Annex.Branch.update + next $ return True diff --git a/Command/Migrate.hs b/Command/Migrate.hs new file mode 100644 index 0000000000..8778743ff5 --- /dev/null +++ b/Command/Migrate.hs @@ -0,0 +1,79 @@ +{- git-annex command + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Migrate where + +import Common.Annex +import Command +import qualified Backend +import qualified Types.Key +import Annex.Content +import qualified Command.Add +import Logs.Web + +def :: [Command] +def = [command "migrate" paramPaths seek "switch data to different backend"] + +seek :: [CommandSeek] +seek = [withBackendFilesInGit $ \(b, f) -> whenAnnexed (start b) f] + +start :: Maybe (Backend Annex) -> FilePath -> (Key, Backend Annex) -> CommandStart +start b file (key, oldbackend) = do + exists <- inAnnex key + newbackend <- choosebackend b + if (newbackend /= oldbackend || upgradableKey key) && exists + then do + showStart "migrate" file + next $ perform file key newbackend + else stop + where + choosebackend Nothing = Prelude.head <$> Backend.orderedList + choosebackend (Just backend) = return backend + +{- Checks if a key is upgradable to a newer representation. -} +{- Ideally, all keys have file size metadata. Old keys may not. -} +upgradableKey :: Key -> Bool +upgradableKey key = isNothing $ Types.Key.keySize key + +{- Store the old backend's key in the new backend + - The old backend's key is not dropped from it, because there may + - be other files still pointing at that key. + - + - Use the same filename as the file for the temp file name, to support + - backends that allow the filename to influence the keys they + - generate. + -} +perform :: FilePath -> Key -> Backend Annex -> CommandPerform +perform file oldkey newbackend = do + src <- inRepo $ gitAnnexLocation oldkey + tmp <- fromRepo gitAnnexTmpDir + let tmpfile = tmp takeFileName file + cleantmp tmpfile + liftIO $ createLink src tmpfile + k <- Backend.genKey tmpfile $ Just newbackend + cleantmp tmpfile + case k of + Nothing -> stop + Just (newkey, _) -> stopUnless (link src newkey) $ do + -- Update symlink to use the new key. + liftIO $ removeFile file + + -- If the old key had some + -- associated urls, record them for + -- the new key as well. + urls <- getUrls oldkey + unless (null urls) $ + mapM_ (setUrlPresent newkey) urls + + next $ Command.Add.cleanup file newkey True + where + cleantmp t = liftIO $ whenM (doesFileExist t) $ removeFile t + link src newkey = getViaTmpUnchecked newkey $ \t -> do + -- Make a hard link to the old backend's + -- cached key, to avoid wasting disk space. + liftIO $ unlessM (doesFileExist t) $ createLink src t + return True diff --git a/Command/Move.hs b/Command/Move.hs new file mode 100644 index 0000000000..85fdff7398 --- /dev/null +++ b/Command/Move.hs @@ -0,0 +1,141 @@ +{- git-annex command + - + - Copyright 2010-2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Move where + +import Common.Annex +import Command +import qualified Command.Drop +import qualified Annex +import Annex.Content +import qualified Remote +import Annex.UUID + +def :: [Command] +def = [dontCheck toOpt $ dontCheck fromOpt $ + command "move" paramPaths seek + "move content of files to/from another repository"] + +seek :: [CommandSeek] +seek = [withFilesInGit $ whenAnnexed $ start True] + +start :: Bool -> FilePath -> (Key, Backend Annex) -> CommandStart +start move file (key, _) = do + noAuto + to <- Annex.getState Annex.toremote + from <- Annex.getState Annex.fromremote + case (from, to) of + (Nothing, Nothing) -> error "specify either --from or --to" + (Nothing, Just name) -> do + dest <- Remote.byName name + toStart dest move file key + (Just name, Nothing) -> do + src <- Remote.byName name + fromStart src move file key + (_ , _) -> error "only one of --from or --to can be specified" + where + noAuto = when move $ whenM (Annex.getState Annex.auto) $ error + "--auto is not supported for move" + +showMoveAction :: Bool -> FilePath -> Annex () +showMoveAction True file = showStart "move" file +showMoveAction False file = showStart "copy" file + +{- Moves (or copies) the content of an annexed file to a remote. + - + - If the remote already has the content, it is still removed from + - the current repository. + - + - Note that unlike drop, this does not honor annex.numcopies. + - A file's content can be moved even if there are insufficient copies to + - allow it to be dropped. + -} +toStart :: Remote.Remote Annex -> Bool -> FilePath -> Key -> CommandStart +toStart dest move file key = do + u <- getUUID + ishere <- inAnnex key + if not ishere || u == Remote.uuid dest + then stop -- not here, so nothing to do + else do + showMoveAction move file + next $ toPerform dest move key +toPerform :: Remote.Remote Annex -> Bool -> Key -> CommandPerform +toPerform dest move key = moveLock move key $ do + -- Checking the remote is expensive, so not done in the start step. + -- In fast mode, location tracking is assumed to be correct, + -- and an explicit check is not done, when copying. When moving, + -- it has to be done, to avoid inaverdent data loss. + fast <- Annex.getState Annex.fast + let fastcheck = fast && not move && not (Remote.hasKeyCheap dest) + isthere <- if fastcheck + then do + remotes <- Remote.keyPossibilities key + return $ Right $ dest `elem` remotes + else Remote.hasKey dest key + case isthere of + Left err -> do + showNote err + stop + Right False -> do + showAction $ "to " ++ Remote.name dest + ok <- Remote.storeKey dest key + if ok + then finish + else do + when fastcheck $ + warning "This could have failed because --fast is enabled." + stop + Right True -> finish + where + finish = do + Remote.logStatus dest key True + if move + then do + whenM (inAnnex key) $ removeAnnex key + next $ Command.Drop.cleanupLocal key + else next $ return True + +{- Moves (or copies) the content of an annexed file from a remote + - to the current repository. + - + - If the current repository already has the content, it is still removed + - from the remote. + -} +fromStart :: Remote.Remote Annex -> Bool -> FilePath -> Key -> CommandStart +fromStart src move file key + | move = go + | otherwise = stopUnless (not <$> inAnnex key) go + where + go = stopUnless (fromOk src key) $ do + showMoveAction move file + next $ fromPerform src move key +fromOk :: Remote.Remote Annex -> Key -> Annex Bool +fromOk src key = do + u <- getUUID + remotes <- Remote.keyPossibilities key + return $ u /= Remote.uuid src && any (== src) remotes +fromPerform :: Remote.Remote Annex -> Bool -> Key -> CommandPerform +fromPerform src move key = moveLock move key $ do + ishere <- inAnnex key + if ishere + then handle move True + else do + showAction $ "from " ++ Remote.name src + ok <- getViaTmp key $ Remote.retrieveKeyFile src key + handle move ok + where + handle _ False = stop -- failed + handle False True = next $ return True -- copy complete + handle True True = do -- finish moving + ok <- Remote.removeKey src key + next $ Command.Drop.cleanupRemote key src ok + +{- Locks a key in order for it to be moved. + - No lock is needed when a key is being copied. -} +moveLock :: Bool -> Key -> Annex a -> Annex a +moveLock True key a = lockContent key a +moveLock False _ a = a diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs new file mode 100644 index 0000000000..57bc7ac138 --- /dev/null +++ b/Command/PreCommit.hs @@ -0,0 +1,33 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.PreCommit where + +import Command +import qualified Command.Add +import qualified Command.Fix +import Backend + +def :: [Command] +def = [command "pre-commit" paramPaths seek "run by git pre-commit hook"] + +{- The pre-commit hook needs to fix symlinks to all files being committed. + - And, it needs to inject unlocked files into the annex. -} +seek :: [CommandSeek] +seek = + [ withFilesToBeCommitted $ whenAnnexed Command.Fix.start + , withFilesUnlockedToBeCommitted start] + +start :: BackendFile -> CommandStart +start p = next $ perform p + +perform :: BackendFile -> CommandPerform +perform pair@(_, file) = do + ok <- doCommand $ Command.Add.start pair + if ok + then next $ return True + else error $ "failed to add " ++ file ++ "; canceling commit" diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs new file mode 100644 index 0000000000..5243fa9d4b --- /dev/null +++ b/Command/RecvKey.hs @@ -0,0 +1,34 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.RecvKey where + +import Common.Annex +import Command +import CmdLine +import Annex.Content +import Utility.RsyncFile + +def :: [Command] +def = [command "recvkey" paramKey seek + "runs rsync in server mode to receive content"] + +seek :: [CommandSeek] +seek = [withKeys start] + +start :: Key -> CommandStart +start key = do + whenM (inAnnex key) $ error "key is already present in annex" + + ok <- getViaTmp key (liftIO . rsyncServerReceive) + if ok + then do + -- forcibly quit after receiving one key, + -- and shutdown cleanly so queued git commands run + _ <- shutdown + liftIO exitSuccess + else liftIO exitFailure diff --git a/Command/Reinject.hs b/Command/Reinject.hs new file mode 100644 index 0000000000..0648e90fca --- /dev/null +++ b/Command/Reinject.hs @@ -0,0 +1,56 @@ +{- git-annex command + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Reinject where + +import Common.Annex +import Command +import Logs.Location +import Annex.Content +import qualified Command.Fsck + +def :: [Command] +def = [command "reinject" (paramPair "SRC" "DEST") seek + "sets content of annexed file"] + +seek :: [CommandSeek] +seek = [withWords start] + +start :: [FilePath] -> CommandStart +start (src:dest:[]) + | src == dest = stop + | otherwise = do + ifAnnexed src + (error $ "cannot used annexed file as src: " ++ src) + go + where + go = do + showStart "reinject" dest + next $ whenAnnexed (perform src) dest +start _ = error "specify a src file and a dest file" + +perform :: FilePath -> FilePath -> (Key, Backend Annex) -> CommandPerform +perform src _dest (key, backend) = do + unlessM move $ error "mv failed!" + next $ cleanup key backend + where + -- the file might be on a different filesystem, + -- so mv is used rather than simply calling + -- moveToObjectDir; disk space is also + -- checked this way. + move = getViaTmp key $ \tmp -> + liftIO $ boolSystem "mv" [File src, File tmp] + +cleanup :: Key -> Backend Annex -> CommandCleanup +cleanup key backend = do + logStatus key InfoPresent + + -- fsck the new content + size_ok <- Command.Fsck.checkKeySize key + backend_ok <- Command.Fsck.checkBackend backend key + + return $ size_ok && backend_ok diff --git a/Command/Semitrust.hs b/Command/Semitrust.hs new file mode 100644 index 0000000000..f8c3062131 --- /dev/null +++ b/Command/Semitrust.hs @@ -0,0 +1,32 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Semitrust where + +import Common.Annex +import Command +import qualified Remote +import Logs.Trust + +def :: [Command] +def = [command "semitrust" (paramRepeating paramRemote) seek + "return repository to default trust level"] + +seek :: [CommandSeek] +seek = [withWords start] + +start :: [String] -> CommandStart +start ws = do + let name = unwords ws + showStart "semitrust" name + u <- Remote.nameToUUID name + next $ perform u + +perform :: UUID -> CommandPerform +perform uuid = do + trustSet uuid SemiTrusted + next $ return True diff --git a/Command/SendKey.hs b/Command/SendKey.hs new file mode 100644 index 0000000000..7b1cd3ecae --- /dev/null +++ b/Command/SendKey.hs @@ -0,0 +1,28 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.SendKey where + +import Common.Annex +import Command +import Annex.Content +import Utility.RsyncFile + +def :: [Command] +def = [command "sendkey" paramKey seek + "runs rsync in server mode to send content"] + +seek :: [CommandSeek] +seek = [withKeys start] + +start :: Key -> CommandStart +start key = do + file <- inRepo $ gitAnnexLocation key + whenM (inAnnex key) $ + liftIO $ rsyncServerSend file -- does not return + warning "requested key is not present" + liftIO exitFailure diff --git a/Command/Status.hs b/Command/Status.hs new file mode 100644 index 0000000000..736d897ef3 --- /dev/null +++ b/Command/Status.hs @@ -0,0 +1,198 @@ +{- git-annex command + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Status where + +import Control.Monad.State +import qualified Data.Map as M +import qualified Data.Set as S +import Data.Set (Set) +import Text.JSON + +import Common.Annex +import qualified Types.Backend as B +import qualified Types.Remote as R +import qualified Remote +import qualified Command.Unused +import qualified Git +import qualified Annex +import Command +import Utility.DataUnits +import Annex.Content +import Types.Key +import Backend +import Logs.UUID +import Logs.Trust +import Remote + +-- a named computation that produces a statistic +type Stat = StatState (Maybe (String, StatState String)) + +-- cached info that multiple Stats may need +data StatInfo = StatInfo + { keysPresentCache :: Maybe (Set Key) + , keysReferencedCache :: Maybe (Set Key) + } + +-- a state monad for running Stats in +type StatState = StateT StatInfo Annex + +def :: [Command] +def = [command "status" paramNothing seek + "shows status information about the annex"] + +seek :: [CommandSeek] +seek = [withNothing start] + +{- Order is significant. Less expensive operations, and operations + - that share data go together. + -} +fast_stats :: [Stat] +fast_stats = + [ supported_backends + , supported_remote_types + , remote_list Trusted "trusted" + , remote_list SemiTrusted "semitrusted" + , remote_list UnTrusted "untrusted" + , remote_list DeadTrusted "dead" + ] +slow_stats :: [Stat] +slow_stats = + [ tmp_size + , bad_data_size + , local_annex_keys + , local_annex_size + , visible_annex_keys + , visible_annex_size + , backend_usage + ] + +start :: CommandStart +start = do + fast <- Annex.getState Annex.fast + let stats = if fast then fast_stats else fast_stats ++ slow_stats + showCustom "status" $ do + evalStateT (mapM_ showStat stats) (StatInfo Nothing Nothing) + return True + stop + +stat :: String -> (String -> StatState String) -> Stat +stat desc a = return $ Just (desc, a desc) + +nostat :: Stat +nostat = return Nothing + +json :: JSON j => (j -> String) -> StatState j -> String -> StatState String +json serialize a desc = do + j <- a + lift $ maybeShowJSON [(desc, j)] + return $ serialize j + +nojson :: StatState String -> String -> StatState String +nojson a _ = a + +showStat :: Stat -> StatState () +showStat s = calc =<< s + where + calc (Just (desc, a)) = do + (lift . showHeader) desc + lift . showRaw =<< a + calc Nothing = return () + +supported_backends :: Stat +supported_backends = stat "supported backends" $ json unwords $ + return $ map B.name Backend.list + +supported_remote_types :: Stat +supported_remote_types = stat "supported remote types" $ json unwords $ + return $ map R.typename Remote.remoteTypes + +remote_list :: TrustLevel -> String -> Stat +remote_list level desc = stat n $ nojson $ lift $ do + us <- M.keys <$> (M.union <$> uuidMap <*> remoteMap) + rs <- fst <$> trustPartition level us + s <- prettyPrintUUIDs n rs + return $ if null s then "0" else show (length rs) ++ "\n" ++ beginning s + where + n = desc ++ " repositories" + +local_annex_size :: Stat +local_annex_size = stat "local annex size" $ json id $ + keySizeSum <$> cachedKeysPresent + +local_annex_keys :: Stat +local_annex_keys = stat "local annex keys" $ json show $ + S.size <$> cachedKeysPresent + +visible_annex_size :: Stat +visible_annex_size = stat "visible annex size" $ json id $ + keySizeSum <$> cachedKeysReferenced + +visible_annex_keys :: Stat +visible_annex_keys = stat "visible annex keys" $ json show $ + S.size <$> cachedKeysReferenced + +tmp_size :: Stat +tmp_size = staleSize "temporary directory size" gitAnnexTmpDir + +bad_data_size :: Stat +bad_data_size = staleSize "bad keys size" gitAnnexBadDir + +backend_usage :: Stat +backend_usage = stat "backend usage" $ nojson $ + usage <$> cachedKeysReferenced <*> cachedKeysPresent + where + usage a b = pp "" $ reverse . sort $ map swap $ splits $ S.toList $ S.union a b + splits :: [Key] -> [(String, Integer)] + splits ks = M.toList $ M.fromListWith (+) $ map tcount ks + tcount k = (keyBackendName k, 1) + swap (a, b) = (b, a) + pp c [] = c + pp c ((n, b):xs) = "\n\t" ++ b ++ ": " ++ show n ++ pp c xs + +cachedKeysPresent :: StatState (Set Key) +cachedKeysPresent = do + s <- get + case keysPresentCache s of + Just v -> return v + Nothing -> do + keys <- S.fromList <$> lift getKeysPresent + put s { keysPresentCache = Just keys } + return keys + +cachedKeysReferenced :: StatState (Set Key) +cachedKeysReferenced = do + s <- get + case keysReferencedCache s of + Just v -> return v + Nothing -> do + keys <- S.fromList <$> lift Command.Unused.getKeysReferenced + put s { keysReferencedCache = Just keys } + return keys + +keySizeSum :: Set Key -> String +keySizeSum s = total ++ missingnote + where + knownsizes = mapMaybe keySize $ S.toList s + total = roughSize storageUnits False $ sum knownsizes + missing = S.size s - genericLength knownsizes + missingnote + | missing == 0 = "" + | otherwise = aside $ + "+ " ++ show missing ++ + " keys of unknown size" + +staleSize :: String -> (Git.Repo -> FilePath) -> Stat +staleSize label dirspec = do + keys <- lift (Command.Unused.staleKeys dirspec) + if null keys + then nostat + else stat label $ json (++ aside "clean up with git-annex unused") $ + return $ keySizeSum $ S.fromList keys + +aside :: String -> String +aside s = " (" ++ s ++ ")" diff --git a/Command/Sync.hs b/Command/Sync.hs new file mode 100644 index 0000000000..36c4eeef06 --- /dev/null +++ b/Command/Sync.hs @@ -0,0 +1,74 @@ +{- git-annex command + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Sync where + +import Common.Annex +import Command +import qualified Annex.Branch +import qualified Git.Command +import qualified Git.Config +import qualified Git.Ref +import qualified Git + +import qualified Data.ByteString.Lazy.Char8 as L + +def :: [Command] +def = [command "sync" paramPaths seek "synchronize local repository with remote"] + +-- syncing involves several operations, any of which can independantly fail +seek :: [CommandSeek] +seek = map withNothing [commit, pull, push] + +commit :: CommandStart +commit = do + showStart "commit" "" + next $ next $ do + showOutput + -- Commit will fail when the tree is clean, so ignore failure. + _ <- inRepo $ Git.Command.runBool "commit" + [Param "-a", Param "-m", Param "sync"] + return True + +pull :: CommandStart +pull = do + remote <- defaultRemote + showStart "pull" remote + next $ next $ do + showOutput + checkRemote remote + inRepo $ Git.Command.runBool "pull" [Param remote] + +push :: CommandStart +push = do + remote <- defaultRemote + showStart "push" remote + next $ next $ do + Annex.Branch.update + showOutput + inRepo $ Git.Command.runBool "push" [Param remote, matchingbranches] + where + -- git push may be configured to not push matching + -- branches; this should ensure it always does. + matchingbranches = Param ":" + +-- the remote defaults to origin when not configured +defaultRemote :: Annex String +defaultRemote = do + branch <- currentBranch + fromRepo $ Git.Config.get ("branch." ++ branch ++ ".remote") "origin" + +currentBranch :: Annex String +currentBranch = Git.Ref.describe . Git.Ref . firstLine . L.unpack <$> + inRepo (Git.Command.pipeRead [Param "symbolic-ref", Param "HEAD"]) + +checkRemote :: String -> Annex () +checkRemote remote = do + remoteurl <- fromRepo $ + Git.Config.get ("remote." ++ remote ++ ".url") "" + when (null remoteurl) $ do + error $ "No url is configured for the remote: " ++ remote diff --git a/Command/Trust.hs b/Command/Trust.hs new file mode 100644 index 0000000000..d976b86a8f --- /dev/null +++ b/Command/Trust.hs @@ -0,0 +1,31 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Trust where + +import Common.Annex +import Command +import qualified Remote +import Logs.Trust + +def :: [Command] +def = [command "trust" (paramRepeating paramRemote) seek "trust a repository"] + +seek :: [CommandSeek] +seek = [withWords start] + +start :: [String] -> CommandStart +start ws = do + let name = unwords ws + showStart "trust" name + u <- Remote.nameToUUID name + next $ perform u + +perform :: UUID -> CommandPerform +perform uuid = do + trustSet uuid Trusted + next $ return True diff --git a/Command/TweakFetch.hs b/Command/TweakFetch.hs new file mode 100644 index 0000000000..077041b576 --- /dev/null +++ b/Command/TweakFetch.hs @@ -0,0 +1,34 @@ +{- git-annex command + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.TweakFetch where + +import Common +import Command +import qualified Git.TweakFetch +import qualified Annex.Branch + +def :: [Command] +def = [command "tweak-fetch" paramNothing seek "run by git tweak-fetch hook"] + +seek :: [CommandSeek] +seek = [ withNothing start] + +start :: CommandStart +start = do + -- First, pass the hook's input through to its output, unchanged. + fetched <- liftIO $ Git.TweakFetch.runHook return + + -- If one of the fetched refs is going to be stored on a git-annex + -- tracking branch, then merge in the new sha for that ref. + let tomerge = filter siblings fetched + unless (null tomerge) $ Annex.Branch.updateTo $ map topairs tomerge + stop + where + siblings f = suffix `isSuffixOf` (show $ Git.TweakFetch.local f) + suffix = "/" ++ show Annex.Branch.name + topairs f = (Git.TweakFetch.sha f, Git.TweakFetch.local f) diff --git a/Command/Unannex.hs b/Command/Unannex.hs new file mode 100644 index 0000000000..66611cbd74 --- /dev/null +++ b/Command/Unannex.hs @@ -0,0 +1,62 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Unannex where + +import Common.Annex +import Command +import qualified Annex +import Utility.FileMode +import Logs.Location +import Annex.Content +import qualified Git.Command +import qualified Git.LsFiles as LsFiles + +def :: [Command] +def = [command "unannex" paramPaths seek "undo accidential add command"] + +seek :: [CommandSeek] +seek = [withFilesInGit $ whenAnnexed start] + +start :: FilePath -> (Key, Backend Annex) -> CommandStart +start file (key, _) = stopUnless (inAnnex key) $ do + showStart "unannex" file + next $ perform file key + +perform :: FilePath -> Key -> CommandPerform +perform file key = next $ cleanup file key + +cleanup :: FilePath -> Key -> CommandCleanup +cleanup file key = do + liftIO $ removeFile file + -- git rm deletes empty directory without --cached + inRepo $ Git.Command.run "rm" [Params "--cached --quiet --", File file] + + -- If the file was already committed, it is now staged for removal. + -- Commit that removal now, to avoid later confusing the + -- pre-commit hook if this file is later added back to + -- git as a normal, non-annexed file. + whenM (not . null <$> inRepo (LsFiles.staged [file])) $ do + showOutput + inRepo $ Git.Command.run "commit" [ + Param "-q", + Params "-m", Param "content removed from git annex", + Param "--", File file] + + fast <- Annex.getState Annex.fast + if fast + then do + -- fast mode: hard link to content in annex + src <- inRepo $ gitAnnexLocation key + liftIO $ do + createLink src file + allowWrite file + else do + fromAnnex key file + logStatus key InfoMissing + + return True diff --git a/Command/Uninit.hs b/Command/Uninit.hs new file mode 100644 index 0000000000..21ad4c7df5 --- /dev/null +++ b/Command/Uninit.hs @@ -0,0 +1,63 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Uninit where + +import qualified Data.ByteString.Lazy.Char8 as B + +import Common.Annex +import Command +import qualified Git +import qualified Git.Command +import qualified Annex +import qualified Command.Unannex +import Init +import qualified Annex.Branch +import Annex.Content + +def :: [Command] +def = [addCheck check $ command "uninit" paramPaths seek + "de-initialize git-annex and clean out repository"] + +check :: Annex () +check = do + b <- current_branch + when (b == Annex.Branch.name) $ error $ + "cannot uninit when the " ++ show b ++ " branch is checked out" + where + current_branch = Git.Ref . Prelude.head . lines . B.unpack <$> revhead + revhead = inRepo $ Git.Command.pipeRead + [Params "rev-parse --abbrev-ref HEAD"] + +seek :: [CommandSeek] +seek = [withFilesInGit $ whenAnnexed startUnannex, withNothing start] + +startUnannex :: FilePath -> (Key, Backend Annex) -> CommandStart +startUnannex file info = do + -- Force fast mode before running unannex. This way, if multiple + -- files link to a key, it will be left in the annex and hardlinked + -- to by each. + Annex.changeState $ \s -> s { Annex.fast = True } + Command.Unannex.start file info + +start :: CommandStart +start = next perform + +perform :: CommandPerform +perform = next cleanup + +cleanup :: CommandCleanup +cleanup = do + annexdir <- fromRepo gitAnnexDir + uninitialize + mapM_ removeAnnex =<< getKeysPresent + liftIO $ removeDirectoryRecursive annexdir + -- avoid normal shutdown + saveState + inRepo $ Git.Command.run "branch" + [Param "-D", Param $ show Annex.Branch.name] + liftIO exitSuccess diff --git a/Command/Unlock.hs b/Command/Unlock.hs new file mode 100644 index 0000000000..673a7038a0 --- /dev/null +++ b/Command/Unlock.hs @@ -0,0 +1,52 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Unlock where + +import Common.Annex +import Command +import Annex.Content +import Utility.CopyFile +import Utility.FileMode + +def :: [Command] +def = + [ c "unlock" "unlock files for modification" + , c "edit" "same as unlock" + ] + where + c n = command n paramPaths seek + +seek :: [CommandSeek] +seek = [withFilesInGit $ whenAnnexed start] + +{- The unlock subcommand replaces the symlink with a copy of the file's + - content. -} +start :: FilePath -> (Key, Backend Annex) -> CommandStart +start file (key, _) = do + showStart "unlock" file + next $ perform file key + +perform :: FilePath -> Key -> CommandPerform +perform dest key = do + unlessM (inAnnex key) $ error "content not present" + + checkDiskSpace key + + src <- inRepo $ gitAnnexLocation key + tmpdest <- fromRepo $ gitAnnexTmpLocation key + liftIO $ createDirectoryIfMissing True (parentDir tmpdest) + showAction "copying" + ok <- liftIO $ copyFileExternal src tmpdest + if ok + then do + liftIO $ do + removeFile dest + moveFile tmpdest dest + allowWrite dest + next $ return True + else error "copy failed!" diff --git a/Command/Untrust.hs b/Command/Untrust.hs new file mode 100644 index 0000000000..e16040e6bb --- /dev/null +++ b/Command/Untrust.hs @@ -0,0 +1,32 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Untrust where + +import Common.Annex +import Command +import qualified Remote +import Logs.Trust + +def :: [Command] +def = [command "untrust" (paramRepeating paramRemote) seek + "do not trust a repository"] + +seek :: [CommandSeek] +seek = [withWords start] + +start :: [String] -> CommandStart +start ws = do + let name = unwords ws + showStart "untrust" name + u <- Remote.nameToUUID name + next $ perform u + +perform :: UUID -> CommandPerform +perform uuid = do + trustSet uuid UnTrusted + next $ return True diff --git a/Command/Unused.hs b/Command/Unused.hs new file mode 100644 index 0000000000..ef398b01e1 --- /dev/null +++ b/Command/Unused.hs @@ -0,0 +1,235 @@ +{- git-annex command + - + - Copyright 2010-2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE BangPatterns #-} + +module Command.Unused where + +import qualified Data.Set as S +import qualified Data.ByteString.Lazy.Char8 as L + +import Common.Annex +import Command +import Annex.Content +import Utility.FileMode +import Utility.TempFile +import Logs.Location +import qualified Annex +import qualified Git +import qualified Git.Command +import qualified Git.Ref +import qualified Git.LsFiles as LsFiles +import qualified Git.LsTree as LsTree +import qualified Backend +import qualified Remote +import qualified Annex.Branch +import Annex.CatFile + +def :: [Command] +def = [dontCheck fromOpt $ command "unused" paramNothing seek + "look for unused file content"] + +seek :: [CommandSeek] +seek = [withNothing start] + +{- Finds unused content in the annex. -} +start :: CommandStart +start = do + from <- Annex.getState Annex.fromremote + let (name, action) = case from of + Nothing -> (".", checkUnused) + Just "." -> (".", checkUnused) + Just n -> (n, checkRemoteUnused n) + showStart "unused" name + next action + +checkUnused :: CommandPerform +checkUnused = do + (unused, stalebad, staletmp) <- unusedKeys + _ <- list "" unusedMsg unused 0 >>= + list "bad" staleBadMsg stalebad >>= + list "tmp" staleTmpMsg staletmp + next $ return True + where + list file msg l c = do + let unusedlist = number c l + unless (null l) $ showLongNote $ msg unusedlist + writeUnusedFile file unusedlist + return $ c + length l + +checkRemoteUnused :: String -> CommandPerform +checkRemoteUnused name = do + checkRemoteUnused' =<< Remote.byName name + next $ return True + +checkRemoteUnused' :: Remote.Remote Annex -> Annex () +checkRemoteUnused' r = do + showAction "checking for unused data" + remotehas <- loggedKeysFor (Remote.uuid r) + remoteunused <- excludeReferenced remotehas + let list = number 0 remoteunused + writeUnusedFile "" list + unless (null remoteunused) $ showLongNote $ remoteUnusedMsg r list + +writeUnusedFile :: FilePath -> [(Int, Key)] -> Annex () +writeUnusedFile prefix l = do + logfile <- fromRepo $ gitAnnexUnusedLog prefix + liftIO $ viaTmp writeFile logfile $ + unlines $ map (\(n, k) -> show n ++ " " ++ show k) l + +table :: [(Int, Key)] -> [String] +table l = " NUMBER KEY" : map cols l + where + cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k + pad n s = s ++ replicate (n - length s) ' ' + +number :: Int -> [a] -> [(Int, a)] +number _ [] = [] +number n (x:xs) = (n+1, x) : number (n+1) xs + +staleTmpMsg :: [(Int, Key)] -> String +staleTmpMsg t = unlines $ + ["Some partially transferred data exists in temporary files:"] + ++ table t ++ [dropMsg Nothing] + +staleBadMsg :: [(Int, Key)] -> String +staleBadMsg t = unlines $ + ["Some corrupted files have been preserved by fsck, just in case:"] + ++ table t ++ [dropMsg Nothing] + +unusedMsg :: [(Int, Key)] -> String +unusedMsg u = unusedMsg' u + ["Some annexed data is no longer used by any files:"] + [dropMsg Nothing] +unusedMsg' :: [(Int, Key)] -> [String] -> [String] -> String +unusedMsg' u header trailer = unlines $ + header ++ + table u ++ + ["(To see where data was previously used, try: git log --stat -S'KEY')"] ++ + trailer + +remoteUnusedMsg :: Remote.Remote Annex -> [(Int, Key)] -> String +remoteUnusedMsg r u = unusedMsg' u + ["Some annexed data on " ++ name ++ " is not used by any files:"] + [dropMsg $ Just r] + where + name = Remote.name r + +dropMsg :: Maybe (Remote.Remote Annex) -> String +dropMsg Nothing = dropMsg' "" +dropMsg (Just r) = dropMsg' $ " --from " ++ Remote.name r +dropMsg' :: String -> String +dropMsg' s = "\nTo remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER\n" + +{- Finds keys whose content is present, but that do not seem to be used + - by any files in the git repo, or that are only present as bad or tmp + - files. -} +unusedKeys :: Annex ([Key], [Key], [Key]) +unusedKeys = do + fast <- Annex.getState Annex.fast + if fast + then do + showNote "fast mode enabled; only finding stale files" + tmp <- staleKeys gitAnnexTmpDir + bad <- staleKeys gitAnnexBadDir + return ([], bad, tmp) + else do + showAction "checking for unused data" + present <- getKeysPresent + unused <- excludeReferenced present + staletmp <- staleKeysPrune gitAnnexTmpDir present + stalebad <- staleKeysPrune gitAnnexBadDir present + return (unused, stalebad, staletmp) + +{- Finds keys in the list that are not referenced in the git repository. -} +excludeReferenced :: [Key] -> Annex [Key] +excludeReferenced [] = return [] -- optimisation +excludeReferenced l = do + c <- inRepo $ Git.Command.pipeRead [Param "show-ref"] + removewith (getKeysReferenced : map getKeysReferencedInGit (refs c)) + (S.fromList l) + where + -- Skip the git-annex branches, and get all other unique refs. + refs = map (Git.Ref . snd) . + nubBy uniqref . + filter ourbranches . + map (separate (== ' ')) . lines . L.unpack + uniqref (a, _) (b, _) = a == b + ourbranchend = '/' : show Annex.Branch.name + ourbranches (_, b) = not $ ourbranchend `isSuffixOf` b + removewith [] s = return $ S.toList s + removewith (a:as) s + | s == S.empty = return [] -- optimisation + | otherwise = do + referenced <- a + let !s' = s `S.difference` S.fromList referenced + removewith as s' + +{- Finds items in the first, smaller list, that are not + - present in the second, larger list. + - + - Constructing a single set, of the list that tends to be + - smaller, appears more efficient in both memory and CPU + - than constructing and taking the S.difference of two sets. -} +exclude :: Ord a => [a] -> [a] -> [a] +exclude [] _ = [] -- optimisation +exclude smaller larger = S.toList $ remove larger $ S.fromList smaller + where + remove a b = foldl (flip S.delete) b a + +{- List of keys referenced by symlinks in the git repo. -} +getKeysReferenced :: Annex [Key] +getKeysReferenced = do + top <- fromRepo Git.workTree + files <- inRepo $ LsFiles.inRepo [top] + keypairs <- mapM Backend.lookupFile files + return $ map fst $ catMaybes keypairs + +{- List of keys referenced by symlinks in a git ref. -} +getKeysReferencedInGit :: Git.Ref -> Annex [Key] +getKeysReferencedInGit ref = do + showAction $ "checking " ++ Git.Ref.describe ref + findkeys [] =<< inRepo (LsTree.lsTree ref) + where + findkeys c [] = return c + findkeys c (l:ls) + | isSymLink (LsTree.mode l) = do + content <- catFile ref $ LsTree.file l + case fileKey (takeFileName $ L.unpack content) of + Nothing -> findkeys c ls + Just k -> findkeys (k:c) ls + | otherwise = findkeys c ls + +{- Looks in the specified directory for bad/tmp keys, and returns a list + - of those that might still have value, or might be stale and removable. + - + - When a list of presently available keys is provided, stale keys + - that no longer have value are deleted. + -} +staleKeysPrune :: (Git.Repo -> FilePath) -> [Key] -> Annex [Key] +staleKeysPrune dirspec present = do + contents <- staleKeys dirspec + + let stale = contents `exclude` present + let dups = contents `exclude` stale + + dir <- fromRepo dirspec + liftIO $ forM_ dups $ \t -> removeFile $ dir keyFile t + + return stale + +staleKeys :: (Git.Repo -> FilePath) -> Annex [Key] +staleKeys dirspec = do + dir <- fromRepo dirspec + exists <- liftIO $ doesDirectoryExist dir + if not exists + then return [] + else do + contents <- liftIO $ getDirectoryContents dir + files <- liftIO $ filterM doesFileExist $ + map (dir ) contents + return $ mapMaybe (fileKey . takeFileName) files diff --git a/Command/Upgrade.hs b/Command/Upgrade.hs new file mode 100644 index 0000000000..b39fcd99c2 --- /dev/null +++ b/Command/Upgrade.hs @@ -0,0 +1,27 @@ +{- git-annex command + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Upgrade where + +import Common.Annex +import Command +import Upgrade +import Annex.Version + +def :: [Command] +def = [dontCheck repoExists $ -- because an old version may not seem to exist + command "upgrade" paramNothing seek "upgrade repository layout"] + +seek :: [CommandSeek] +seek = [withNothing start] + +start :: CommandStart +start = do + showStart "upgrade" "." + r <- upgrade + setVersion + next $ next $ return r diff --git a/Command/Version.hs b/Command/Version.hs new file mode 100644 index 0000000000..9fb7fe5bdb --- /dev/null +++ b/Command/Version.hs @@ -0,0 +1,36 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Version where + +import Common.Annex +import Command +import qualified Build.SysConfig as SysConfig +import Annex.Version + +def :: [Command] +def = [noRepo showPackageVersion $ dontCheck repoExists $ + command "version" paramNothing seek "show version info"] + +seek :: [CommandSeek] +seek = [withNothing start] + +start :: CommandStart +start = do + v <- getVersion + liftIO $ do + showPackageVersion + putStrLn $ "local repository version: " ++ fromMaybe "unknown" v + putStrLn $ "default repository version: " ++ defaultVersion + putStrLn $ "supported repository versions: " ++ vs supportedVersions + putStrLn $ "upgrade supported from repository versions: " ++ vs upgradableVersions + stop + where + vs = join " " + +showPackageVersion :: IO () +showPackageVersion = putStrLn $ "git-annex version: " ++ SysConfig.packageversion diff --git a/Command/Whereis.hs b/Command/Whereis.hs new file mode 100644 index 0000000000..eb2ae3d4e7 --- /dev/null +++ b/Command/Whereis.hs @@ -0,0 +1,41 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Whereis where + +import Common.Annex +import Logs.Location +import Command +import Remote +import Logs.Trust + +def :: [Command] +def = [command "whereis" paramPaths seek + "lists repositories that have file content"] + +seek :: [CommandSeek] +seek = [withFilesInGit $ whenAnnexed start] + +start :: FilePath -> (Key, Backend Annex) -> CommandStart +start file (key, _) = do + showStart "whereis" file + next $ perform key + +perform :: Key -> CommandPerform +perform key = do + (untrustedlocations, safelocations) <- trustPartition UnTrusted =<< keyLocations key + let num = length safelocations + showNote $ show num ++ " " ++ copiesplural num + pp <- prettyPrintUUIDs "whereis" safelocations + unless (null safelocations) $ showLongNote pp + pp' <- prettyPrintUUIDs "untrusted" untrustedlocations + unless (null untrustedlocations) $ showLongNote $ untrustedheader ++ pp' + if null safelocations then stop else next $ return True + where + copiesplural 1 = "copy" + copiesplural _ = "copies" + untrustedheader = "The following untrusted locations may also have copies:\n" diff --git a/Common.hs b/Common.hs new file mode 100644 index 0000000000..90895f08e8 --- /dev/null +++ b/Common.hs @@ -0,0 +1,29 @@ +module Common (module X) where + +import Control.Monad as X hiding (join) +import Control.Applicative as X +import Control.Monad.State as X (liftIO) +import Control.Exception.Extensible as X (IOException) + +import Data.Maybe as X +import Data.List as X hiding (head, tail, init, last) +import Data.String.Utils as X + +import System.Path as X +import System.FilePath as X +import System.Directory as X +import System.Cmd.Utils as X hiding (safeSystem) +import System.IO as X hiding (FilePath) +import System.Posix.Files as X +import System.Posix.IO as X +import System.Posix.Process as X hiding (executeFile) +import System.Exit as X + +import Utility.Misc as X +import Utility.Conditional as X +import Utility.SafeCommand as X +import Utility.Path as X +import Utility.Directory as X +import Utility.Monad as X + +import Utility.PartialPrelude as X diff --git a/Common/Annex.hs b/Common/Annex.hs new file mode 100644 index 0000000000..e90825f0e9 --- /dev/null +++ b/Common/Annex.hs @@ -0,0 +1,8 @@ +module Common.Annex (module X) where + +import Common as X +import Types as X +import Types.UUID as X (toUUID, fromUUID) +import Annex as X (gitRepo, inRepo, fromRepo) +import Locations as X +import Messages as X diff --git a/Config.hs b/Config.hs new file mode 100644 index 0000000000..aa88858738 --- /dev/null +++ b/Config.hs @@ -0,0 +1,85 @@ +{- Git configuration + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Config where + +import Common.Annex +import qualified Git +import qualified Git.Config +import qualified Git.Command +import qualified Annex + +type ConfigKey = String + +{- Changes a git config setting in both internal state and .git/config -} +setConfig :: ConfigKey -> String -> Annex () +setConfig k value = do + inRepo $ Git.Command.run "config" [Param k, Param value] + -- re-read git config and update the repo's state + newg <- inRepo Git.Config.read + Annex.changeState $ \s -> s { Annex.repo = newg } + +{- Looks up a per-remote config setting in git config. + - Failing that, tries looking for a global config option. -} +getConfig :: Git.Repo -> ConfigKey -> String -> Annex String +getConfig r key def = do + def' <- fromRepo $ Git.Config.get ("annex." ++ key) def + fromRepo $ Git.Config.get (remoteConfig r key) def' + +{- Looks up a per-remote config setting in git config. -} +remoteConfig :: Git.Repo -> ConfigKey -> String +remoteConfig r key = "remote." ++ fromMaybe "" (Git.remoteName r) ++ ".annex-" ++ key + +{- Calculates cost for a remote. Either the default, or as configured + - by remote..annex-cost, or if remote..annex-cost-command + - is set and prints a number, that is used. -} +remoteCost :: Git.Repo -> Int -> Annex Int +remoteCost r def = do + cmd <- getConfig r "cost-command" "" + (fromMaybe def . readMaybe) <$> + if not $ null cmd + then liftIO $ snd <$> pipeFrom "sh" ["-c", cmd] + else getConfig r "cost" "" + +cheapRemoteCost :: Int +cheapRemoteCost = 100 +semiCheapRemoteCost :: Int +semiCheapRemoteCost = 110 +expensiveRemoteCost :: Int +expensiveRemoteCost = 200 + +{- Adjusts a remote's cost to reflect it being encrypted. -} +encryptedRemoteCostAdj :: Int +encryptedRemoteCostAdj = 50 + +{- Make sure the remote cost numbers work out. -} +prop_cost_sane :: Bool +prop_cost_sane = False `notElem` + [ expensiveRemoteCost > 0 + , cheapRemoteCost < semiCheapRemoteCost + , semiCheapRemoteCost < expensiveRemoteCost + , cheapRemoteCost + encryptedRemoteCostAdj > semiCheapRemoteCost + , cheapRemoteCost + encryptedRemoteCostAdj < expensiveRemoteCost + , semiCheapRemoteCost + encryptedRemoteCostAdj < expensiveRemoteCost + ] + +{- Checks if a repo should be ignored, based either on annex-ignore + - setting, or on command-line options. Allows command-line to override + - annex-ignore. -} +repoNotIgnored :: Git.Repo -> Annex Bool +repoNotIgnored r = not . Git.configTrue <$> getConfig r "ignore" "false" + +{- If a value is specified, it is used; otherwise the default is looked up + - in git config. forcenumcopies overrides everything. -} +getNumCopies :: Maybe Int -> Annex Int +getNumCopies v = perhaps (use v) =<< Annex.getState Annex.forcenumcopies + where + use (Just n) = return n + use Nothing = perhaps (return 1) =<< + readMaybe <$> fromRepo (Git.Config.get config "1") + perhaps fallback = maybe fallback (return . id) + config = "annex.numcopies" diff --git a/Crypto.hs b/Crypto.hs new file mode 100644 index 0000000000..cb1ca40d14 --- /dev/null +++ b/Crypto.hs @@ -0,0 +1,185 @@ +{- git-annex crypto + - + - Currently using gpg; could later be modified to support different + - crypto backends if neccessary. + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Crypto ( + Cipher, + EncryptedCipher, + genCipher, + updateCipher, + describeCipher, + storeCipher, + extractCipher, + decryptCipher, + encryptKey, + withEncryptedHandle, + withDecryptedHandle, + withEncryptedContent, + withDecryptedContent, + + prop_hmacWithCipher_sane +) where + +import qualified Data.ByteString.Lazy.Char8 as L +import qualified Data.Map as M +import Data.ByteString.Lazy.UTF8 (fromString) +import Data.Digest.Pure.SHA +import Control.Applicative + +import Common.Annex +import qualified Utility.Gpg as Gpg +import Types.Key +import Types.Remote +import Utility.Base64 +import Types.Crypto + +{- The first half of a Cipher is used for HMAC; the remainder + - is used as the GPG symmetric encryption passphrase. + - + - HMAC SHA1 needs only 64 bytes. The remainder is for expansion, + - perhaps to HMAC SHA512, which needs 128 bytes (ideally). + - + - 256 is enough for gpg's symetric cipher; unlike weaker public key + - crypto, the key does not need to be too large. + -} +cipherHalf :: Int +cipherHalf = 256 + +cipherSize :: Int +cipherSize = cipherHalf * 2 + +cipherPassphrase :: Cipher -> String +cipherPassphrase (Cipher c) = drop cipherHalf c + +cipherHmac :: Cipher -> String +cipherHmac (Cipher c) = take cipherHalf c + +{- Creates a new Cipher, encrypted as specified in the remote's configuration -} +genCipher :: RemoteConfig -> IO EncryptedCipher +genCipher c = do + ks <- configKeyIds c + random <- genrandom + encryptCipher (Cipher random) ks + where + genrandom = Gpg.readStrict + -- Armor the random data, to avoid newlines, + -- since gpg only reads ciphers up to the first + -- newline. + [ Params "--gen-random --armor" + , Param $ show randomquality + , Param $ show cipherSize + ] + -- 1 is /dev/urandom; 2 is /dev/random + randomquality = 1 :: Int + +{- Updates an existing Cipher, re-encrypting it to add KeyIds specified in + - the remote's configuration. -} +updateCipher :: RemoteConfig -> EncryptedCipher -> IO EncryptedCipher +updateCipher c encipher@(EncryptedCipher _ ks) = do + ks' <- configKeyIds c + cipher <- decryptCipher c encipher + encryptCipher cipher (merge ks ks') + where + merge (KeyIds a) (KeyIds b) = KeyIds $ a ++ b + +describeCipher :: EncryptedCipher -> String +describeCipher (EncryptedCipher _ (KeyIds ks)) = + "with gpg " ++ keys ks ++ " " ++ unwords ks + where + keys [_] = "key" + keys _ = "keys" + +{- Stores an EncryptedCipher in a remote's configuration. -} +storeCipher :: RemoteConfig -> EncryptedCipher -> RemoteConfig +storeCipher c (EncryptedCipher t ks) = + M.insert "cipher" (toB64 t) $ M.insert "cipherkeys" (showkeys ks) c + where + showkeys (KeyIds l) = join "," l + +{- Extracts an EncryptedCipher from a remote's configuration. -} +extractCipher :: RemoteConfig -> Maybe EncryptedCipher +extractCipher c = + case (M.lookup "cipher" c, M.lookup "cipherkeys" c) of + (Just t, Just ks) -> Just $ EncryptedCipher (fromB64 t) (readkeys ks) + _ -> Nothing + where + readkeys = KeyIds . split "," + +{- Encrypts a Cipher to the specified KeyIds. -} +encryptCipher :: Cipher -> KeyIds -> IO EncryptedCipher +encryptCipher (Cipher c) (KeyIds ks) = do + let ks' = nub $ sort ks -- gpg complains about duplicate recipient keyids + encipher <- Gpg.pipeStrict (encrypt++recipients ks') c + return $ EncryptedCipher encipher (KeyIds ks') + where + encrypt = [ Params "--encrypt" ] + recipients l = force_recipients : + concatMap (\k -> [Param "--recipient", Param k]) l + -- Force gpg to only encrypt to the specified + -- recipients, not configured defaults. + force_recipients = Params "--no-encrypt-to --no-default-recipient" + +{- Decrypting an EncryptedCipher is expensive; the Cipher should be cached. -} +decryptCipher :: RemoteConfig -> EncryptedCipher -> IO Cipher +decryptCipher _ (EncryptedCipher encipher _) = + Cipher <$> Gpg.pipeStrict decrypt encipher + where + decrypt = [ Param "--decrypt" ] + +{- Generates an encrypted form of a Key. The encryption does not need to be + - reversable, nor does it need to be the same type of encryption used + - on content. It does need to be repeatable. -} +encryptKey :: Cipher -> Key -> Key +encryptKey c k = Key + { keyName = hmacWithCipher c (show k) + , keyBackendName = "GPGHMACSHA1" + , keySize = Nothing -- size and mtime omitted + , keyMtime = Nothing -- to avoid leaking data + } + +{- Runs an action, passing it a handle from which it can + - stream encrypted content. -} +withEncryptedHandle :: Cipher -> IO L.ByteString -> (Handle -> IO a) -> IO a +withEncryptedHandle = Gpg.passphraseHandle [Params "--symmetric --force-mdc"] . cipherPassphrase + +{- Runs an action, passing it a handle from which it can + - stream decrypted content. -} +withDecryptedHandle :: Cipher -> IO L.ByteString -> (Handle -> IO a) -> IO a +withDecryptedHandle = Gpg.passphraseHandle [Param "--decrypt"] . cipherPassphrase + +{- Streams encrypted content to an action. -} +withEncryptedContent :: Cipher -> IO L.ByteString -> (L.ByteString -> IO a) -> IO a +withEncryptedContent = pass withEncryptedHandle + +{- Streams decrypted content to an action. -} +withDecryptedContent :: Cipher -> IO L.ByteString -> (L.ByteString -> IO a) -> IO a +withDecryptedContent = pass withDecryptedHandle + +pass :: (Cipher -> IO L.ByteString -> (Handle -> IO a) -> IO a) + -> Cipher -> IO L.ByteString -> (L.ByteString -> IO a) -> IO a +pass to c i a = to c i $ \h -> a =<< L.hGetContents h + +configKeyIds :: RemoteConfig -> IO KeyIds +configKeyIds c = Gpg.findPubKeys $ configGet c "encryption" + +configGet :: RemoteConfig -> String -> String +configGet c key = fromMaybe missing $ M.lookup key c + where + missing = error $ "missing " ++ key ++ " in remote config" + +hmacWithCipher :: Cipher -> String -> String +hmacWithCipher c = hmacWithCipher' (cipherHmac c) +hmacWithCipher' :: String -> String -> String +hmacWithCipher' c s = showDigest $ hmacSha1 (fromString c) (fromString s) + +{- Ensure that hmacWithCipher' returns the same thing forevermore. -} +prop_hmacWithCipher_sane :: Bool +prop_hmacWithCipher_sane = known_good == hmacWithCipher' "foo" "bar" + where + known_good = "46b4ec586117154dacd49d664e5d63fdc88efb51" diff --git a/GPL b/GPL new file mode 120000 index 0000000000..9539e93f72 --- /dev/null +++ b/GPL @@ -0,0 +1 @@ +doc/GPL \ No newline at end of file diff --git a/Git.hs b/Git.hs new file mode 100644 index 0000000000..9420810a67 --- /dev/null +++ b/Git.hs @@ -0,0 +1,116 @@ +{- git repository handling + - + - This is written to be completely independant of git-annex and should be + - suitable for other uses. + - + - Copyright 2010, 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git ( + Repo(..), + Ref(..), + Branch, + Sha, + Tag, + repoIsUrl, + repoIsSsh, + repoIsHttp, + repoIsLocalBare, + repoDescribe, + repoLocation, + workTree, + gitDir, + configTrue, + attributes, + assertLocal, +) where + +import qualified Data.Map as M +import Data.Char +import Network.URI (uriPath, uriScheme) + +import Common +import Git.Types + +{- User-visible description of a git repo. -} +repoDescribe :: Repo -> String +repoDescribe Repo { remoteName = Just name } = name +repoDescribe Repo { location = Url url } = show url +repoDescribe Repo { location = Dir dir } = dir +repoDescribe Repo { location = Unknown } = "UNKNOWN" + +{- Location of the repo, either as a path or url. -} +repoLocation :: Repo -> String +repoLocation Repo { location = Url url } = show url +repoLocation Repo { location = Dir dir } = dir +repoLocation Repo { location = Unknown } = undefined + +{- Some code needs to vary between URL and normal repos, + - or bare and non-bare, these functions help with that. -} +repoIsUrl :: Repo -> Bool +repoIsUrl Repo { location = Url _ } = True +repoIsUrl _ = False + +repoIsSsh :: Repo -> Bool +repoIsSsh Repo { location = Url url } + | scheme == "ssh:" = True + -- git treats these the same as ssh + | scheme == "git+ssh:" = True + | scheme == "ssh+git:" = True + | otherwise = False + where + scheme = uriScheme url +repoIsSsh _ = False + +repoIsHttp :: Repo -> Bool +repoIsHttp Repo { location = Url url } + | uriScheme url == "http:" = True + | uriScheme url == "https:" = True + | otherwise = False +repoIsHttp _ = False + +configAvail ::Repo -> Bool +configAvail Repo { config = c } = c /= M.empty + +repoIsLocalBare :: Repo -> Bool +repoIsLocalBare r@(Repo { location = Dir _ }) = configAvail r && configBare r +repoIsLocalBare _ = False + +assertLocal :: Repo -> a -> a +assertLocal repo action = + if not $ repoIsUrl repo + then action + else error $ "acting on non-local git repo " ++ repoDescribe repo ++ + " not supported" +configBare :: Repo -> Bool +configBare repo = maybe unknown configTrue $ M.lookup "core.bare" $ config repo + where + unknown = error $ "it is not known if git repo " ++ + repoDescribe repo ++ + " is a bare repository; config not read" + +{- Path to a repository's gitattributes file. -} +attributes :: Repo -> String +attributes repo + | configBare repo = workTree repo ++ "/info/.gitattributes" + | otherwise = workTree repo ++ "/.gitattributes" + +{- Path to a repository's .git directory. -} +gitDir :: Repo -> String +gitDir repo + | configBare repo = workTree repo + | otherwise = workTree repo ".git" + +{- Path to a repository's --work-tree, that is, its top. + - + - Note that for URL repositories, this is the path on the remote host. -} +workTree :: Repo -> FilePath +workTree Repo { location = Url u } = uriPath u +workTree Repo { location = Dir d } = d +workTree Repo { location = Unknown } = undefined + +{- Checks if a string from git config is a true value. -} +configTrue :: String -> Bool +configTrue s = map toLower s == "true" diff --git a/Git/Branch.hs b/Git/Branch.hs new file mode 100644 index 0000000000..cce56dcfa4 --- /dev/null +++ b/Git/Branch.hs @@ -0,0 +1,79 @@ +{- git branch stuff + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.Branch where + +import qualified Data.ByteString.Lazy.Char8 as L + +import Common +import Git +import Git.Sha +import Git.Command + +{- Checks if the second branch has any commits not present on the first + - branch. -} +changed :: Branch -> Branch -> Repo -> IO Bool +changed origbranch newbranch repo + | origbranch == newbranch = return False + | otherwise = not . L.null <$> diffs + where + diffs = pipeRead + [ Param "log" + , Param (show origbranch ++ ".." ++ show newbranch) + , Params "--oneline -n1" + ] repo + +{- Given a set of refs that are all known to have commits not + - on the branch, tries to update the branch by a fast-forward. + - + - In order for that to be possible, one of the refs must contain + - every commit present in all the other refs. + -} +fastForward :: Branch -> [Ref] -> Repo -> IO Bool +fastForward _ [] _ = return True +fastForward branch (first:rest) repo = do + -- First, check that the branch does not contain any + -- new commits that are not in the first ref. If it does, + -- cannot fast-forward. + diverged <- changed first branch repo + if diverged + then no_ff + else maybe no_ff do_ff =<< findbest first rest + where + no_ff = return False + do_ff to = do + run "update-ref" + [Param $ show branch, Param $ show to] repo + return True + findbest c [] = return $ Just c + findbest c (r:rs) + | c == r = findbest c rs + | otherwise = do + better <- changed c r repo + worse <- changed r c repo + case (better, worse) of + (True, True) -> return Nothing -- divergent fail + (True, False) -> findbest r rs -- better + (False, True) -> findbest c rs -- worse + (False, False) -> findbest c rs -- same + +{- Commits the index into the specified branch (or other ref), + - with the specified parent refs, and returns the committed sha -} +commit :: String -> Branch -> [Ref] -> Repo -> IO Sha +commit message branch parentrefs repo = do + tree <- getSha "write-tree" $ asString $ + pipeRead [Param "write-tree"] repo + sha <- getSha "commit-tree" $ asString $ + ignorehandle $ pipeWriteRead + (map Param $ ["commit-tree", show tree] ++ ps) + (L.pack message) repo + run "update-ref" [Param $ show branch, Param $ show sha] repo + return sha + where + ignorehandle a = snd <$> a + asString a = L.unpack <$> a + ps = concatMap (\r -> ["-p", show r]) parentrefs diff --git a/Git/CatFile.hs b/Git/CatFile.hs new file mode 100644 index 0000000000..16f0b11b95 --- /dev/null +++ b/Git/CatFile.hs @@ -0,0 +1,75 @@ +{- git cat-file interface + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.CatFile ( + CatFileHandle, + catFileStart, + catFileStop, + catFile, + catObject +) where + +import Control.Monad.State +import System.Cmd.Utils +import System.IO +import qualified Data.ByteString.Char8 as S +import qualified Data.ByteString.Lazy.Char8 as L + +import Common +import Git +import Git.Sha +import Git.Command + +type CatFileHandle = (PipeHandle, Handle, Handle) + +{- Starts git cat-file running in batch mode in a repo and returns a handle. -} +catFileStart :: Repo -> IO CatFileHandle +catFileStart repo = hPipeBoth "git" $ toCommand $ + gitCommandLine [Param "cat-file", Param "--batch"] repo + +{- Stops git cat-file. -} +catFileStop :: CatFileHandle -> IO () +catFileStop (pid, from, to) = do + hClose to + hClose from + forceSuccess pid + +{- Reads a file from a specified branch. -} +catFile :: CatFileHandle -> Branch -> FilePath -> IO L.ByteString +catFile h branch file = catObject h $ Ref $ show branch ++ ":" ++ file + +{- Uses a running git cat-file read the content of an object. + - Objects that do not exist will have "" returned. -} +catObject :: CatFileHandle -> Ref -> IO L.ByteString +catObject (_, from, to) object = do + hPutStrLn to $ show object + hFlush to + header <- hGetLine from + case words header of + [sha, objtype, size] + | length sha == shaSize && + validobjtype objtype -> handle size + | otherwise -> dne + _ + | header == show object ++ " missing" -> dne + | otherwise -> error $ "unknown response from git cat-file " ++ header + where + handle size = case reads size of + [(bytes, "")] -> readcontent bytes + _ -> dne + readcontent bytes = do + content <- S.hGet from bytes + c <- hGetChar from + when (c /= '\n') $ + error "missing newline from git cat-file" + return $ L.fromChunks [content] + dne = return L.empty + validobjtype t + | t == "blob" = True + | t == "commit" = True + | t == "tree" = True + | otherwise = False diff --git a/Git/CheckAttr.hs b/Git/CheckAttr.hs new file mode 100644 index 0000000000..eedaf66420 --- /dev/null +++ b/Git/CheckAttr.hs @@ -0,0 +1,66 @@ +{- git check-attr interface + - + - Copyright 2010, 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.CheckAttr where + +import System.Exit + +import Common +import Git +import Git.Command +import qualified Git.Filename +import qualified Git.Version + +{- Efficiently looks up a gitattributes value for each file in a list. -} +lookup :: String -> [FilePath] -> Repo -> IO [(FilePath, String)] +lookup attr files repo = do + cwd <- getCurrentDirectory + (_, fromh, toh) <- hPipeBoth "git" (toCommand params) + _ <- forkProcess $ do + hClose fromh + hPutStr toh $ join "\0" $ input cwd + hClose toh + exitSuccess + hClose toh + output cwd . lines <$> hGetContents fromh + where + params = gitCommandLine + [ Param "check-attr" + , Param attr + , Params "-z --stdin" + ] repo + + {- Before git 1.7.7, git check-attr worked best with + - absolute filenames; using them worked around some bugs + - with relative filenames. + - + - With newer git, git check-attr chokes on some absolute + - filenames, and the bugs that necessitated them were fixed, + - so use relative filenames. -} + oldgit = Git.Version.older "1.7.7" + input cwd + | oldgit = map (absPathFrom cwd) files + | otherwise = map (relPathDirToFile cwd . absPathFrom cwd) files + output cwd + | oldgit = map (torel cwd . topair) + | otherwise = map topair + + topair l = (Git.Filename.decode file, value) + where + file = join sep $ beginning bits + value = end bits !! 0 + bits = split sep l + sep = ": " ++ attr ++ ": " + + torel cwd (file, value) = (relfile, value) + where + relfile + | startswith cwd' file = drop (length cwd') file + | otherwise = relPathDirToFile top' file + top = workTree repo + cwd' = cwd ++ "/" + top' = top ++ "/" diff --git a/Git/Command.hs b/Git/Command.hs new file mode 100644 index 0000000000..2350bb0ca3 --- /dev/null +++ b/Git/Command.hs @@ -0,0 +1,82 @@ +{- running git commands + - + - Copyright 2010, 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.Command where + +import qualified Data.ByteString.Lazy.Char8 as L + +import Common +import Git +import Git.Types + +{- Constructs a git command line operating on the specified repo. -} +gitCommandLine :: [CommandParam] -> Repo -> [CommandParam] +gitCommandLine params repo@(Repo { location = Dir _ } ) = + -- force use of specified repo via --git-dir and --work-tree + [ Param ("--git-dir=" ++ gitDir repo) + , Param ("--work-tree=" ++ workTree repo) + ] ++ params +gitCommandLine _ repo = assertLocal repo $ error "internal" + +{- Runs git in the specified repo. -} +runBool :: String -> [CommandParam] -> Repo -> IO Bool +runBool subcommand params repo = assertLocal repo $ + boolSystem "git" $ gitCommandLine (Param subcommand : params) repo + +{- Runs git in the specified repo, throwing an error if it fails. -} +run :: String -> [CommandParam] -> Repo -> IO () +run subcommand params repo = assertLocal repo $ + runBool subcommand params repo + >>! error $ "git " ++ show params ++ " failed" + +{- Runs a git subcommand and returns its output, lazily. + - + - Note that this leaves the git process running, and so zombies will + - result unless reap is called. + -} +pipeRead :: [CommandParam] -> Repo -> IO L.ByteString +pipeRead params repo = assertLocal repo $ do + (_, h) <- hPipeFrom "git" $ toCommand $ gitCommandLine params repo + hSetBinaryMode h True + L.hGetContents h + +{- Runs a git subcommand, feeding it input. + - You should call either getProcessStatus or forceSuccess on the PipeHandle. -} +pipeWrite :: [CommandParam] -> L.ByteString -> Repo -> IO PipeHandle +pipeWrite params s repo = assertLocal repo $ do + (p, h) <- hPipeTo "git" (toCommand $ gitCommandLine params repo) + L.hPut h s + hClose h + return p + +{- Runs a git subcommand, feeding it input, and returning its output. + - You should call either getProcessStatus or forceSuccess on the PipeHandle. -} +pipeWriteRead :: [CommandParam] -> L.ByteString -> Repo -> IO (PipeHandle, L.ByteString) +pipeWriteRead params s repo = assertLocal repo $ do + (p, from, to) <- hPipeBoth "git" (toCommand $ gitCommandLine params repo) + hSetBinaryMode from True + L.hPut to s + hClose to + c <- L.hGetContents from + return (p, c) + +{- Reads null terminated output of a git command (as enabled by the -z + - parameter), and splits it. -} +pipeNullSplit :: [CommandParam] -> Repo -> IO [String] +pipeNullSplit params repo = map L.unpack <$> pipeNullSplitB params repo + +{- For when Strings are not needed. -} +pipeNullSplitB ::[CommandParam] -> Repo -> IO [L.ByteString] +pipeNullSplitB params repo = filter (not . L.null) . L.split '\0' <$> + pipeRead params repo + +{- Reaps any zombie git processes. -} +reap :: IO () +reap = do + -- throws an exception when there are no child processes + r <- catchDefaultIO (getAnyProcessStatus False True) Nothing + maybe (return ()) (const reap) r diff --git a/Git/Config.hs b/Git/Config.hs new file mode 100644 index 0000000000..7b72eba5a3 --- /dev/null +++ b/Git/Config.hs @@ -0,0 +1,60 @@ +{- git repository configuration handling + - + - Copyright 2010,2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.Config where + +import System.Posix.Directory +import Control.Exception (bracket_) +import qualified Data.Map as M + +import Common +import Git +import Git.Types +import qualified Git.Construct + +{- Returns a single git config setting, or a default value if not set. -} +get :: String -> String -> Repo -> String +get key defaultValue repo = M.findWithDefault defaultValue key (config repo) + +{- Runs git config and populates a repo with its config. -} +read :: Repo -> IO Repo +read repo@(Repo { location = Dir d }) = do + {- Cannot use pipeRead because it relies on the config having + been already read. Instead, chdir to the repo. -} + cwd <- getCurrentDirectory + bracket_ (changeWorkingDirectory d) (changeWorkingDirectory cwd) $ + pOpen ReadFromPipe "git" ["config", "--null", "--list"] $ + hRead repo +read r = assertLocal r $ error "internal" + +{- Reads git config from a handle and populates a repo with it. -} +hRead :: Repo -> Handle -> IO Repo +hRead repo h = do + val <- hGetContentsStrict h + store val repo + +{- Stores a git config into a repo, returning the new version of the repo. + - The git config may be multiple lines, or a single line. Config settings + - can be updated inrementally. -} +store :: String -> Repo -> IO Repo +store s repo = do + let repo' = repo { config = parse s `M.union` config repo } + rs <- Git.Construct.fromRemotes repo' + return $ repo' { remotes = rs } + +{- Parses git config --list or git config --null --list output into a + - config map. -} +parse :: String -> M.Map String String +parse [] = M.empty +parse s + -- --list output will have an = in the first line + | all ('=' `elem`) (take 1 ls) = sep '=' ls + -- --null --list output separates keys from values with newlines + | otherwise = sep '\n' $ split "\0" s + where + ls = lines s + sep c = M.fromList . map (separate (== c)) diff --git a/Git/Construct.hs b/Git/Construct.hs new file mode 100644 index 0000000000..a35a87cc77 --- /dev/null +++ b/Git/Construct.hs @@ -0,0 +1,215 @@ +{- Construction of Git Repo objects + - + - Copyright 2010,2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.Construct ( + fromCwd, + fromAbsPath, + fromUrl, + fromUnknown, + localToUrl, + remoteNamed, + remoteNamedFromKey, + fromRemotes, + fromRemoteLocation, + repoAbsPath, +) where + +import System.Posix.User +import qualified Data.Map as M hiding (map, split) +import Network.URI + +import Common +import Git.Types +import Git +import qualified Git.Url as Url + +{- Finds the current git repository, which may be in a parent directory. -} +fromCwd :: IO Repo +fromCwd = getCurrentDirectory >>= seekUp isRepoTop >>= maybe norepo makerepo + where + makerepo = return . newFrom . Dir + norepo = error "Not in a git repository." + +{- Local Repo constructor, requires an absolute path to the repo be + - specified. -} +fromAbsPath :: FilePath -> IO Repo +fromAbsPath dir + | "/" `isPrefixOf` dir = do + -- Git always looks for "dir.git" in preference to + -- to "dir", even if dir ends in a "/". + let canondir = dropTrailingPathSeparator dir + let dir' = canondir ++ ".git" + e <- doesDirectoryExist dir' + if e + then ret dir' + else if "/.git" `isSuffixOf` canondir + then do + -- When dir == "foo/.git", git looks + -- for "foo/.git/.git", and failing + -- that, uses "foo" as the repository. + e' <- doesDirectoryExist $ dir ".git" + if e' + then ret dir + else ret $ takeDirectory canondir + else ret dir + | otherwise = error $ "internal error, " ++ dir ++ " is not absolute" + where + ret = return . newFrom . Dir + +{- Remote Repo constructor. Throws exception on invalid url. -} +fromUrl :: String -> IO Repo +fromUrl url + | startswith "file://" url = fromAbsPath $ uriPath u + | otherwise = return $ newFrom $ Url u + where + u = fromMaybe bad $ parseURI url + bad = error $ "bad url " ++ url + +{- Creates a repo that has an unknown location. -} +fromUnknown :: IO Repo +fromUnknown = return $ newFrom Unknown + +{- Converts a local Repo into a remote repo, using the reference repo + - which is assumed to be on the same host. -} +localToUrl :: Repo -> Repo -> Repo +localToUrl reference r + | not $ repoIsUrl reference = error "internal error; reference repo not url" + | repoIsUrl r = r + | otherwise = r { location = Url $ fromJust $ parseURI absurl } + where + absurl = + Url.scheme reference ++ "//" ++ + Url.authority reference ++ + workTree r + +{- Calculates a list of a repo's configured remotes, by parsing its config. -} +fromRemotes :: Repo -> IO [Repo] +fromRemotes repo = mapM construct remotepairs + where + filterconfig f = filter f $ M.toList $ config repo + filterkeys f = filterconfig (\(k,_) -> f k) + remotepairs = filterkeys isremote + isremote k = startswith "remote." k && endswith ".url" k + construct (k,v) = remoteNamedFromKey k $ fromRemoteLocation v repo + +{- Sets the name of a remote when constructing the Repo to represent it. -} +remoteNamed :: String -> IO Repo -> IO Repo +remoteNamed n constructor = do + r <- constructor + return $ r { remoteName = Just n } + +{- Sets the name of a remote based on the git config key, such as + "remote.foo.url". -} +remoteNamedFromKey :: String -> IO Repo -> IO Repo +remoteNamedFromKey k = remoteNamed basename + where + basename = join "." $ reverse $ drop 1 $ + reverse $ drop 1 $ split "." k + +{- Constructs a new Repo for one of a Repo's remotes using a given + - location (ie, an url). -} +fromRemoteLocation :: String -> Repo -> IO Repo +fromRemoteLocation s repo = gen $ calcloc s + where + filterconfig f = filter f $ M.toList $ config repo + gen v + | scpstyle v = fromUrl $ scptourl v + | isURI v = fromUrl v + | otherwise = fromRemotePath v repo + -- insteadof config can rewrite remote location + calcloc l + | null insteadofs = l + | otherwise = replacement ++ drop (length bestvalue) l + where + replacement = drop (length prefix) $ + take (length bestkey - length suffix) bestkey + (bestkey, bestvalue) = maximumBy longestvalue insteadofs + longestvalue (_, a) (_, b) = compare b a + insteadofs = filterconfig $ \(k, v) -> + startswith prefix k && + endswith suffix k && + startswith v l + (prefix, suffix) = ("url." , ".insteadof") + -- git remotes can be written scp style -- [user@]host:dir + scpstyle v = ":" `isInfixOf` v && not ("//" `isInfixOf` v) + scptourl v = "ssh://" ++ host ++ slash dir + where + (host, dir) = separate (== ':') v + slash d | d == "" = "/~/" ++ d + | "/" `isPrefixOf` d = d + | "~" `isPrefixOf` d = '/':d + | otherwise = "/~/" ++ d + +{- Constructs a Repo from the path specified in the git remotes of + - another Repo. -} +fromRemotePath :: FilePath -> Repo -> IO Repo +fromRemotePath dir repo = do + dir' <- expandTilde dir + fromAbsPath $ workTree repo dir' + +{- Git remotes can have a directory that is specified relative + - to the user's home directory, or that contains tilde expansions. + - This converts such a directory to an absolute path. + - Note that it has to run on the system where the remote is. + -} +repoAbsPath :: FilePath -> IO FilePath +repoAbsPath d = do + d' <- expandTilde d + h <- myHomeDir + return $ h d' + +expandTilde :: FilePath -> IO FilePath +expandTilde = expandt True + where + expandt _ [] = return "" + expandt _ ('/':cs) = do + v <- expandt True cs + return ('/':v) + expandt True ('~':'/':cs) = do + h <- myHomeDir + return $ h cs + expandt True ('~':cs) = do + let (name, rest) = findname "" cs + u <- getUserEntryForName name + return $ homeDirectory u rest + expandt _ (c:cs) = do + v <- expandt False cs + return (c:v) + findname n [] = (n, "") + findname n (c:cs) + | c == '/' = (n, cs) + | otherwise = findname (n++[c]) cs + +seekUp :: (FilePath -> IO Bool) -> FilePath -> IO (Maybe FilePath) +seekUp want dir = do + ok <- want dir + if ok + then return $ Just dir + else case parentDir dir of + "" -> return Nothing + d -> seekUp want d + +isRepoTop :: FilePath -> IO Bool +isRepoTop dir = do + r <- isRepo + b <- isBareRepo + return (r || b) + where + isRepo = gitSignature ".git" ".git/config" + isBareRepo = gitSignature "objects" "config" + gitSignature subdir file = liftM2 (&&) + (doesDirectoryExist (dir ++ "/" ++ subdir)) + (doesFileExist (dir ++ "/" ++ file)) + +newFrom :: RepoLocation -> Repo +newFrom l = + Repo { + location = l, + config = M.empty, + remotes = [], + remoteName = Nothing + } diff --git a/Git/Filename.hs b/Git/Filename.hs new file mode 100644 index 0000000000..5e076d3b5a --- /dev/null +++ b/Git/Filename.hs @@ -0,0 +1,28 @@ +{- Some git commands output encoded filenames, in a rather annoyingly complex + - C-style encoding. + - + - Copyright 2010, 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.Filename where + +import Utility.Format (decode_c, encode_c) + +import Common + +decode :: String -> FilePath +decode [] = [] +decode f@(c:s) + -- encoded strings will be inside double quotes + | c == '"' && end s == ['"'] = decode_c $ beginning s + | otherwise = f + +{- Should not need to use this, except for testing decode. -} +encode :: FilePath -> String +encode s = "\"" ++ encode_c s ++ "\"" + +{- for quickcheck -} +prop_idempotent_deencode :: String -> Bool +prop_idempotent_deencode s = s == decode (encode s) diff --git a/Git/HashObject.hs b/Git/HashObject.hs new file mode 100644 index 0000000000..f5e6d50cdf --- /dev/null +++ b/Git/HashObject.hs @@ -0,0 +1,32 @@ +{- git hash-object interface + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.HashObject where + +import Common +import Git +import Git.Command + +{- Injects a set of files into git, returning the shas of the objects + - and an IO action to call ones the the shas have been used. -} +hashFiles :: [FilePath] -> Repo -> IO ([Sha], IO ()) +hashFiles paths repo = do + (pid, fromh, toh) <- hPipeBoth "git" $ toCommand $ git_hash_object repo + _ <- forkProcess (feeder toh) + hClose toh + shas <- map Ref . lines <$> hGetContentsStrict fromh + return (shas, ender fromh pid) + where + git_hash_object = gitCommandLine + [Param "hash-object", Param "-w", Param "--stdin-paths"] + feeder toh = do + hPutStr toh $ unlines paths + hClose toh + exitSuccess + ender fromh pid = do + hClose fromh + forceSuccess pid diff --git a/Git/Index.hs b/Git/Index.hs new file mode 100644 index 0000000000..aaf54e032e --- /dev/null +++ b/Git/Index.hs @@ -0,0 +1,24 @@ +{- git index file stuff + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.Index where + +import System.Posix.Env (setEnv, unsetEnv, getEnv) + +{- Forces git to use the specified index file. + - + - Returns an action that will reset back to the default + - index file. -} +override :: FilePath -> IO (IO ()) +override index = do + res <- getEnv var + setEnv var index True + return $ reset res + where + var = "GIT_INDEX_FILE" + reset (Just v) = setEnv var v True + reset _ = unsetEnv var diff --git a/Git/LsFiles.hs b/Git/LsFiles.hs new file mode 100644 index 0000000000..0de86383d3 --- /dev/null +++ b/Git/LsFiles.hs @@ -0,0 +1,71 @@ +{- git ls-files interface + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.LsFiles ( + inRepo, + notInRepo, + staged, + stagedNotDeleted, + changedUnstaged, + typeChanged, + typeChangedStaged, +) where + +import Common +import Git +import Git.Command + +{- Scans for files that are checked into git at the specified locations. -} +inRepo :: [FilePath] -> Repo -> IO [FilePath] +inRepo l = pipeNullSplit $ Params "ls-files --cached -z --" : map File l + +{- Scans for files at the specified locations that are not checked into git. -} +notInRepo :: Bool -> [FilePath] -> Repo -> IO [FilePath] +notInRepo include_ignored l repo = pipeNullSplit params repo + where + params = [Params "ls-files --others"] ++ exclude ++ + [Params "-z --"] ++ map File l + exclude + | include_ignored = [] + | otherwise = [Param "--exclude-standard"] + +{- Returns a list of all files that are staged for commit. -} +staged :: [FilePath] -> Repo -> IO [FilePath] +staged = staged' [] + +{- Returns a list of the files, staged for commit, that are being added, + - moved, or changed (but not deleted), from the specified locations. -} +stagedNotDeleted :: [FilePath] -> Repo -> IO [FilePath] +stagedNotDeleted = staged' [Param "--diff-filter=ACMRT"] + +staged' :: [CommandParam] -> [FilePath] -> Repo -> IO [FilePath] +staged' ps l = pipeNullSplit $ prefix ++ ps ++ suffix + where + prefix = [Params "diff --cached --name-only -z"] + suffix = Param "--" : map File l + +{- Returns a list of files that have unstaged changes. -} +changedUnstaged :: [FilePath] -> Repo -> IO [FilePath] +changedUnstaged l = pipeNullSplit params + where + params = Params "diff --name-only -z --" : map File l + +{- Returns a list of the files in the specified locations that are staged + - for commit, and whose type has changed. -} +typeChangedStaged :: [FilePath] -> Repo -> IO [FilePath] +typeChangedStaged = typeChanged' [Param "--cached"] + +{- Returns a list of the files in the specified locations whose type has + - changed. Files only staged for commit will not be included. -} +typeChanged :: [FilePath] -> Repo -> IO [FilePath] +typeChanged = typeChanged' [] + +typeChanged' :: [CommandParam] -> [FilePath] -> Repo -> IO [FilePath] +typeChanged' ps l = pipeNullSplit $ prefix ++ ps ++ suffix + where + prefix = [Params "diff --name-only --diff-filter=T -z"] + suffix = Param "--" : map File l diff --git a/Git/LsTree.hs b/Git/LsTree.hs new file mode 100644 index 0000000000..aae7f1263b --- /dev/null +++ b/Git/LsTree.hs @@ -0,0 +1,52 @@ +{- git ls-tree interface + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.LsTree ( + TreeItem(..), + lsTree, + parseLsTree +) where + +import Numeric +import Control.Applicative +import System.Posix.Types +import qualified Data.ByteString.Lazy.Char8 as L + +import Common +import Git +import Git.Command +import qualified Git.Filename + +data TreeItem = TreeItem + { mode :: FileMode + , typeobj :: String + , sha :: String + , file :: FilePath + } deriving Show + +{- Lists the contents of a Ref -} +lsTree :: Ref -> Repo -> IO [TreeItem] +lsTree t repo = map parseLsTree <$> + pipeNullSplitB [Params "ls-tree --full-tree -z -r --", File $ show t] repo + +{- Parses a line of ls-tree output. + - (The --long format is not currently supported.) -} +parseLsTree :: L.ByteString -> TreeItem +parseLsTree l = TreeItem + { mode = fst $ Prelude.head $ readOct $ L.unpack m + , typeobj = L.unpack t + , sha = L.unpack s + , file = Git.Filename.decode $ L.unpack f + } + where + -- l = SP SP TAB + -- All fields are fixed, so we can pull them out of + -- specific positions in the line. + (m, past_m) = L.splitAt 7 l + (t, past_t) = L.splitAt 4 past_m + (s, past_s) = L.splitAt 40 $ L.tail past_t + f = L.tail past_s diff --git a/Git/Queue.hs b/Git/Queue.hs new file mode 100644 index 0000000000..25c5b073c7 --- /dev/null +++ b/Git/Queue.hs @@ -0,0 +1,93 @@ +{- git repository command queue + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.Queue ( + Queue, + new, + add, + size, + full, + flush +) where + +import qualified Data.Map as M +import System.IO +import System.Cmd.Utils +import Data.String.Utils +import Utility.SafeCommand + +import Common +import Git +import Git.Command + +{- An action to perform in a git repository. The file to act on + - is not included, and must be able to be appended after the params. -} +data Action = Action + { getSubcommand :: String + , getParams :: [CommandParam] + } deriving (Show, Eq, Ord) + +{- A queue of actions to perform (in any order) on a git repository, + - with lists of files to perform them on. This allows coalescing + - similar git commands. -} +data Queue = Queue Int (M.Map Action [FilePath]) + deriving (Show, Eq) + +{- A recommended maximum size for the queue, after which it should be + - run. + - + - 10240 is semi-arbitrary. If we assume git filenames are between 10 and + - 255 characters long, then the queue will build up between 100kb and + - 2550kb long commands. The max command line length on linux is somewhere + - above 20k, so this is a fairly good balance -- the queue will buffer + - only a few megabytes of stuff and a minimal number of commands will be + - run by xargs. -} +maxSize :: Int +maxSize = 10240 + +{- Constructor for empty queue. -} +new :: Queue +new = Queue 0 M.empty + +{- Adds an action to a queue. -} +add :: Queue -> String -> [CommandParam] -> [FilePath] -> Queue +add (Queue n m) subcommand params files = Queue (n + 1) m' + where + action = Action subcommand params + -- There are probably few items in the map, but there + -- can be a lot of files per item. So, optimise adding + -- files. + m' = M.insertWith' const action fs m + fs = files ++ M.findWithDefault [] action m + +{- Number of items in a queue. -} +size :: Queue -> Int +size (Queue n _) = n + +{- Is a queue large enough that it should be flushed? -} +full :: Queue -> Bool +full (Queue n _) = n > maxSize + +{- Runs a queue on a git repository. -} +flush :: Queue -> Repo -> IO Queue +flush (Queue _ m) repo = do + forM_ (M.toList m) $ uncurry $ runAction repo + return new + +{- Runs an Action on a list of files in a git repository. + - + - Complicated by commandline length limits. + - + - Intentionally runs the command even if the list of files is empty; + - this allows queueing commands that do not need a list of files. -} +runAction :: Repo -> Action -> [FilePath] -> IO () +runAction repo action files = + pOpen WriteToPipe "xargs" ("-0":"git":params) feedxargs + where + params = toCommand $ gitCommandLine + (Param (getSubcommand action):getParams action) repo + feedxargs h = hPutStr h $ join "\0" files diff --git a/Git/Ref.hs b/Git/Ref.hs new file mode 100644 index 0000000000..0197ae7893 --- /dev/null +++ b/Git/Ref.hs @@ -0,0 +1,48 @@ +{- git ref stuff + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.Ref where + +import qualified Data.ByteString.Lazy.Char8 as L + +import Common +import Git +import Git.Command + +{- Converts a fully qualified git ref into a user-visible version. -} +describe :: Ref -> String +describe = remove "refs/heads/" . remove "refs/remotes/" . show + where + remove prefix s + | prefix `isPrefixOf` s = drop (length prefix) s + | otherwise = s + +{- Checks if a ref exists. -} +exists :: Ref -> Repo -> IO Bool +exists ref = runBool "show-ref" + [Param "--verify", Param "-q", Param $ show ref] + +{- Get the sha of a fully qualified git ref, if it exists. -} +sha :: Branch -> Repo -> IO (Maybe Sha) +sha branch repo = process . L.unpack <$> showref repo + where + showref = pipeRead [Param "show-ref", + Param "--hash", -- get the hash + Param $ show branch] + process [] = Nothing + process s = Just $ Ref $ firstLine s + +{- List of (refs, branches) matching a given ref spec. + - Duplicate refs are filtered out. -} +matching :: Ref -> Repo -> IO [(Ref, Branch)] +matching ref repo = do + r <- pipeRead [Param "show-ref", Param $ show ref] repo + return $ nubBy uniqref $ map (gen . L.unpack) (L.lines r) + where + uniqref (a, _) (b, _) = a == b + gen l = let (r, b) = separate (== ' ') l in + (Ref r, Ref b) diff --git a/Git/Sha.hs b/Git/Sha.hs new file mode 100644 index 0000000000..cdf9853cfa --- /dev/null +++ b/Git/Sha.hs @@ -0,0 +1,39 @@ +{- git SHA stuff + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.Sha where + +import Common +import Git.Types + +{- Runs an action that causes a git subcommand to emit a Sha, and strips + any trailing newline, returning the sha. -} +getSha :: String -> IO String -> IO Sha +getSha subcommand a = maybe bad return =<< extractSha <$> a + where + bad = error $ "failed to read sha from git " ++ subcommand + +{- Extracts the Sha from a string. There can be a trailing newline after + - it, but nothing else. -} +extractSha :: String -> Maybe Sha +extractSha s + | len == shaSize = val s + | len == shaSize + 1 && length s' == shaSize = val s' + | otherwise = Nothing + where + len = length s + s' = firstLine s + val v + | isSha v = Just $ Ref v + | otherwise = Nothing + +isSha :: String -> Bool +isSha v = all (`elem` "1234567890ABCDEFabcdef") v && length v == shaSize + +{- Size of a git sha. -} +shaSize :: Int +shaSize = 40 diff --git a/Git/TweakFetch.hs b/Git/TweakFetch.hs new file mode 100644 index 0000000000..41cc0499bd --- /dev/null +++ b/Git/TweakFetch.hs @@ -0,0 +1,79 @@ +{- git tweak-fetch hook support + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.TweakFetch (runHook, FetchedRef(..)) where + +import Data.Either (rights) +import System.Posix.IO + +import Common +import Git +import Git.Sha + +data FetchedRef = FetchedRef + { sha :: Sha + , merge :: Bool + , remote :: Ref + , local :: Ref + } + deriving (Show) + +{- Each line fed to the tweak-fetch hook should represent a ref that is + - being updated. It's important that the hook always outputs every line + - that is fed into it (possibly modified), otherwise incoming refs will + - not be stored. So to avoid breaking if the format changes, unparsable + - lines are passed through unchanged. -} +type HookLine = Either String FetchedRef + +{- Runs the hook, allowing lines to be mutated, but never be discarded. + - Returns same FetchedRefs that are output by the hook, for further use. -} +runHook :: (FetchedRef -> IO FetchedRef) -> IO [FetchedRef] +runHook mutate = do + ls <- mapM go =<< input + output ls + + -- Nothing more should be output to stdout; only hook output + -- is accepted by git. Redirect stdout to stderr. + hFlush stdout + _ <- liftIO $ dupTo stdError stdOutput + + return $ rights ls + where + go u@(Left _) = return u + go (Right r) = Right <$> catchDefaultIO (mutate r) r + +input :: IO [HookLine] +input = map parseLine . lines <$> getContents + +output :: [HookLine] -> IO () +output = mapM_ $ putStrLn . genLine + +parseLine :: String -> HookLine +parseLine line = go $ words line + where + go [s, m, r, l] + | not $ isSha s = Left line + | m == "merge" = parsed True + | m == "not-for-merge" = parsed False + | otherwise = Left line + where + parsed v = Right $ FetchedRef + { sha = Ref s + , merge = v + , remote = Ref r + , local = Ref l + } + go _ = Left line + +genLine :: HookLine -> String +genLine (Left l) = l +genLine (Right r) = unwords + [ show $ sha r + , if merge r then "merge" else "not-for-merge" + , show $ remote r + , show $ local r + ] diff --git a/Git/Types.hs b/Git/Types.hs new file mode 100644 index 0000000000..250da5f5e5 --- /dev/null +++ b/Git/Types.hs @@ -0,0 +1,36 @@ +{- git data types + - + - Copyright 2010,2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.Types where + +import Network.URI +import qualified Data.Map as M + +{- There are two types of repositories; those on local disk and those + - accessed via an URL. -} +data RepoLocation = Dir FilePath | Url URI | Unknown + deriving (Show, Eq) + +data Repo = Repo { + location :: RepoLocation, + config :: M.Map String String, + remotes :: [Repo], + -- remoteName holds the name used for this repo in remotes + remoteName :: Maybe String +} deriving (Show, Eq) + +{- A git ref. Can be a sha1, or a branch or tag name. -} +newtype Ref = Ref String + deriving (Eq) + +instance Show Ref where + show (Ref v) = v + +{- Aliases for Ref. -} +type Branch = Ref +type Sha = Ref +type Tag = Ref diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs new file mode 100644 index 0000000000..d5323af1d1 --- /dev/null +++ b/Git/UnionMerge.hs @@ -0,0 +1,141 @@ +{- git-union-merge library + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.UnionMerge ( + merge, + merge_index, + update_index, + stream_update_index, + update_index_line, + ls_tree +) where + +import System.Cmd.Utils +import qualified Data.ByteString.Lazy.Char8 as L +import qualified Data.Set as S + +import Common +import Git +import Git.Sha +import Git.CatFile +import Git.Command + +type Streamer = (String -> IO ()) -> IO () + +{- Performs a union merge between two branches, staging it in the index. + - Any previously staged changes in the index will be lost. + - + - Should be run with a temporary index file configured by useIndex. + -} +merge :: Ref -> Ref -> Repo -> IO () +merge x y repo = do + h <- catFileStart repo + stream_update_index repo + [ ls_tree x repo + , merge_trees x y h repo + ] + catFileStop h + +{- Merges a list of branches into the index. Previously staged changed in + - the index are preserved (and participate in the merge). -} +merge_index :: CatFileHandle -> Repo -> [Ref] -> IO () +merge_index h repo bs = + stream_update_index repo $ map (\b -> merge_tree_index b h repo) bs + +{- Feeds content into update-index. Later items in the list can override + - earlier ones, so the list can be generated from any combination of + - ls_tree, merge_trees, and merge_tree_index. -} +update_index :: Repo -> [String] -> IO () +update_index repo ls = stream_update_index repo [(`mapM_` ls)] + +{- Streams content into update-index. -} +stream_update_index :: Repo -> [Streamer] -> IO () +stream_update_index repo as = do + (p, h) <- hPipeTo "git" (toCommand $ gitCommandLine params repo) + forM_ as (stream h) + hClose h + forceSuccess p + where + params = map Param ["update-index", "-z", "--index-info"] + stream h a = a (streamer h) + streamer h s = do + hPutStr h s + hPutStr h "\0" + +{- Generates a line suitable to be fed into update-index, to add + - a given file with a given sha. -} +update_index_line :: Sha -> FilePath -> String +update_index_line sha file = "100644 blob " ++ show sha ++ "\t" ++ file + +{- Gets the current tree for a ref. -} +ls_tree :: Ref -> Repo -> Streamer +ls_tree (Ref x) repo streamer = mapM_ streamer =<< pipeNullSplit params repo + where + params = map Param ["ls-tree", "-z", "-r", "--full-tree", x] + +{- For merging two trees. -} +merge_trees :: Ref -> Ref -> CatFileHandle -> Repo -> Streamer +merge_trees (Ref x) (Ref y) h = calc_merge h $ "diff-tree":diff_opts ++ [x, y] + +{- For merging a single tree into the index. -} +merge_tree_index :: Ref -> CatFileHandle -> Repo -> Streamer +merge_tree_index (Ref x) h = calc_merge h $ "diff-index":diff_opts ++ ["--cached", x] + +diff_opts :: [String] +diff_opts = ["--raw", "-z", "-r", "--no-renames", "-l0"] + +{- Calculates how to perform a merge, using git to get a raw diff, + - and returning a list suitable for update_index. -} +calc_merge :: CatFileHandle -> [String] -> Repo -> Streamer +calc_merge ch differ repo streamer = gendiff >>= go + where + gendiff = pipeNullSplit (map Param differ) repo + go [] = return () + go (info:file:rest) = mergeFile info file ch repo >>= + maybe (go rest) (\l -> streamer l >> go rest) + go (_:[]) = error "calc_merge parse error" + +{- Given an info line from a git raw diff, and the filename, generates + - a line suitable for update_index that union merges the two sides of the + - diff. -} +mergeFile :: String -> FilePath -> CatFileHandle -> Repo -> IO (Maybe String) +mergeFile info file h repo = case filter (/= nullsha) [Ref asha, Ref bsha] of + [] -> return Nothing + (sha:[]) -> use sha + shas -> use =<< either return (hashObject repo . L.unlines) =<< + calcMerge . zip shas <$> mapM getcontents shas + where + [_colonmode, _bmode, asha, bsha, _status] = words info + nullsha = Ref $ replicate shaSize '0' + getcontents s = L.lines <$> catObject h s + use sha = return $ Just $ update_index_line sha file + +{- Injects some content into git, returning its Sha. -} +hashObject :: Repo -> L.ByteString -> IO Sha +hashObject repo content = getSha subcmd $ do + (h, s) <- pipeWriteRead (map Param params) content repo + L.length s `seq` do + forceSuccess h + reap -- XXX unsure why this is needed + return $ L.unpack s + where + subcmd = "hash-object" + params = [subcmd, "-w", "--stdin"] + +{- Calculates a union merge between a list of refs, with contents. + - + - When possible, reuses the content of an existing ref, rather than + - generating new content. + -} +calcMerge :: [(Ref, [L.ByteString])] -> Either Ref [L.ByteString] +calcMerge shacontents + | null reuseable = Right $ new + | otherwise = Left $ fst $ Prelude.head reuseable + where + reuseable = filter (\c -> sorteduniq (snd c) == new) shacontents + new = sorteduniq $ concat $ map snd shacontents + sorteduniq = S.toList . S.fromList diff --git a/Git/Url.hs b/Git/Url.hs new file mode 100644 index 0000000000..6a893d92fe --- /dev/null +++ b/Git/Url.hs @@ -0,0 +1,70 @@ +{- git repository urls + - + - Copyright 2010, 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.Url ( + scheme, + host, + port, + hostuser, + authority, +) where + +import Network.URI hiding (scheme, authority) + +import Common +import Git.Types +import Git + +{- Scheme of an URL repo. -} +scheme :: Repo -> String +scheme Repo { location = Url u } = uriScheme u +scheme repo = notUrl repo + +{- Work around a bug in the real uriRegName + - -} +uriRegName' :: URIAuth -> String +uriRegName' a = fixup $ uriRegName a + where + fixup x@('[':rest) + | rest !! len == ']' = take len rest + | otherwise = x + where + len = length rest - 1 + fixup x = x + +{- Hostname of an URL repo. -} +host :: Repo -> String +host = authpart uriRegName' + +{- Port of an URL repo, if it has a nonstandard one. -} +port :: Repo -> Maybe Integer +port r = + case authpart uriPort r of + ":" -> Nothing + (':':p) -> readMaybe p + _ -> Nothing + +{- Hostname of an URL repo, including any username (ie, "user@host") -} +hostuser :: Repo -> String +hostuser r = authpart uriUserInfo r ++ authpart uriRegName' r + +{- The full authority portion an URL repo. (ie, "user@host:port") -} +authority :: Repo -> String +authority = authpart assemble + where + assemble a = uriUserInfo a ++ uriRegName' a ++ uriPort a + +{- Applies a function to extract part of the uriAuthority of an URL repo. -} +authpart :: (URIAuth -> a) -> Repo -> a +authpart a Repo { location = Url u } = a auth + where + auth = fromMaybe (error $ "bad url " ++ show u) (uriAuthority u) +authpart _ repo = notUrl repo + +notUrl :: Repo -> a +notUrl repo = error $ + "acting on local git repo " ++ repoDescribe repo ++ " not supported" diff --git a/Git/Version.hs b/Git/Version.hs new file mode 100644 index 0000000000..c8bc121d66 --- /dev/null +++ b/Git/Version.hs @@ -0,0 +1,38 @@ +{- git version checking + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.Version where + +import Common +import qualified Build.SysConfig + +{- Using the version it was configured for avoids running git to check its + - version, at the cost that upgrading git won't be noticed. + - This is only acceptable because it's rare that git's version influences + - code's behavior. -} +version :: String +version = Build.SysConfig.gitversion + +older :: String -> Bool +older v = normalize version < normalize v + +{- To compare dotted versions like 1.7.7 and 1.8, they are normalized to + - a somewhat arbitrary integer representation. -} +normalize :: String -> Integer +normalize = sum . mult 1 . reverse . + extend precision . take precision . + map readi . split "." + where + extend n l = l ++ replicate (n - length l) 0 + mult _ [] = [] + mult n (x:xs) = (n*x) : mult (n*10^width) xs + readi :: String -> Integer + readi s = case reads s of + ((x,_):_) -> x + _ -> 0 + precision = 10 -- number of segments of the version to compare + width = length "yyyymmddhhmmss" -- maximum width of a segment diff --git a/GitAnnex.hs b/GitAnnex.hs new file mode 100644 index 0000000000..43daf7367b --- /dev/null +++ b/GitAnnex.hs @@ -0,0 +1,144 @@ +{- git-annex main program + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module GitAnnex where + +import System.Console.GetOpt + +import Common.Annex +import qualified Git.Config +import qualified Git.Construct +import CmdLine +import Command +import Types.TrustLevel +import qualified Annex +import qualified Remote +import qualified Limit +import qualified Utility.Format + +import qualified Command.Add +import qualified Command.Unannex +import qualified Command.Drop +import qualified Command.Move +import qualified Command.Copy +import qualified Command.Get +import qualified Command.FromKey +import qualified Command.DropKey +import qualified Command.Reinject +import qualified Command.Fix +import qualified Command.Init +import qualified Command.Describe +import qualified Command.InitRemote +import qualified Command.Fsck +import qualified Command.Unused +import qualified Command.DropUnused +import qualified Command.Unlock +import qualified Command.Lock +import qualified Command.PreCommit +import qualified Command.TweakFetch +import qualified Command.Find +import qualified Command.Whereis +import qualified Command.Merge +import qualified Command.Status +import qualified Command.Migrate +import qualified Command.Uninit +import qualified Command.Trust +import qualified Command.Untrust +import qualified Command.Semitrust +import qualified Command.Dead +import qualified Command.Sync +import qualified Command.AddUrl +import qualified Command.Map +import qualified Command.Upgrade +import qualified Command.Version + +cmds :: [Command] +cmds = concat + [ Command.Add.def + , Command.Get.def + , Command.Drop.def + , Command.Move.def + , Command.Copy.def + , Command.Unlock.def + , Command.Lock.def + , Command.Sync.def + , Command.AddUrl.def + , Command.Init.def + , Command.Describe.def + , Command.InitRemote.def + , Command.Reinject.def + , Command.Unannex.def + , Command.Uninit.def + , Command.PreCommit.def + , Command.TweakFetch.def + , Command.Trust.def + , Command.Untrust.def + , Command.Semitrust.def + , Command.Dead.def + , Command.FromKey.def + , Command.DropKey.def + , Command.Fix.def + , Command.Fsck.def + , Command.Unused.def + , Command.DropUnused.def + , Command.Find.def + , Command.Whereis.def + , Command.Merge.def + , Command.Status.def + , Command.Migrate.def + , Command.Map.def + , Command.Upgrade.def + , Command.Version.def + ] + +options :: [Option] +options = commonOptions ++ + [ Option ['t'] ["to"] (ReqArg setto paramRemote) + "specify to where to transfer content" + , Option ['f'] ["from"] (ReqArg setfrom paramRemote) + "specify from where to transfer content" + , Option ['N'] ["numcopies"] (ReqArg setnumcopies paramNumber) + "override default number of copies" + , Option [] ["trust"] (ReqArg (Remote.forceTrust Trusted) paramRemote) + "override trust setting" + , Option [] ["semitrust"] (ReqArg (Remote.forceTrust SemiTrusted) paramRemote) + "override trust setting back to default" + , Option [] ["untrust"] (ReqArg (Remote.forceTrust UnTrusted) paramRemote) + "override trust setting to untrusted" + , Option ['c'] ["config"] (ReqArg setgitconfig "NAME=VALUE") + "override git configuration setting" + , Option [] ["print0"] (NoArg setprint0) + "terminate output with null" + , Option [] ["format"] (ReqArg setformat paramFormat) + "control format of output" + , Option ['x'] ["exclude"] (ReqArg Limit.addExclude paramGlob) + "skip files matching the glob pattern" + , Option ['I'] ["include"] (ReqArg Limit.addInclude paramGlob) + "don't skip files matching the glob pattern" + , Option ['i'] ["in"] (ReqArg Limit.addIn paramRemote) + "skip files not present in a remote" + , Option ['C'] ["copies"] (ReqArg Limit.addCopies paramNumber) + "skip files with fewer copies" + , Option ['B'] ["inbackend"] (ReqArg Limit.addInBackend paramName) + "skip files not using a key-value backend" + ] ++ matcherOptions + where + setto v = Annex.changeState $ \s -> s { Annex.toremote = Just v } + setfrom v = Annex.changeState $ \s -> s { Annex.fromremote = Just v } + setnumcopies v = Annex.changeState $ \s -> s {Annex.forcenumcopies = readMaybe v } + setformat v = Annex.changeState $ \s -> s { Annex.format = Just $ Utility.Format.gen v } + setprint0 = setformat "${file}\0" + setgitconfig :: String -> Annex () + setgitconfig v = do + newg <- inRepo $ Git.Config.store v + Annex.changeState $ \s -> s { Annex.repo = newg } + +header :: String +header = "Usage: git-annex command [option ..]" + +run :: [String] -> IO () +run args = dispatch args cmds options header Git.Construct.fromCwd diff --git a/INSTALL b/INSTALL new file mode 120000 index 0000000000..67566818f0 --- /dev/null +++ b/INSTALL @@ -0,0 +1 @@ +doc/install.mdwn \ No newline at end of file diff --git a/Init.hs b/Init.hs new file mode 100644 index 0000000000..47ac9e3d35 --- /dev/null +++ b/Init.hs @@ -0,0 +1,83 @@ +{- git-annex repository initialization + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Init ( + ensureInitialized, + initialize, + uninitialize +) where + +import Common.Annex +import Utility.TempFile +import qualified Git +import qualified Annex.Branch +import Logs.UUID +import Annex.Version +import Annex.UUID + +initialize :: Maybe String -> Annex () +initialize mdescription = do + prepUUID + Annex.Branch.create + setVersion + gitHooksWrite + u <- getUUID + maybe (recordUUID u) (describeUUID u) mdescription + +uninitialize :: Annex () +uninitialize = gitHooksUnWrite + +{- Will automatically initialize if there is already a git-annex + branch from somewhere. Otherwise, require a manual init + to avoid git-annex accidentially being run in git + repos that did not intend to use it. -} +ensureInitialized :: Annex () +ensureInitialized = getVersion >>= maybe needsinit checkVersion + where + needsinit = do + annexed <- Annex.Branch.hasSibling + if annexed + then initialize Nothing + else error "First run: git-annex init" + +{- set up git hooks, if not already present -} +gitHooksWrite :: Annex () +gitHooksWrite = unlessBare $ forM_ hooks $ \(hook, content) -> do + file <- hookFile hook + exists <- liftIO $ doesFileExist file + if exists + then warning $ hook ++ " hook (" ++ file ++ ") already exists, not configuring" + else liftIO $ do + viaTmp writeFile file content + p <- getPermissions file + setPermissions file $ p {executable = True} + +gitHooksUnWrite :: Annex () +gitHooksUnWrite = unlessBare $ forM_ hooks $ \(hook, content) -> do + file <- hookFile hook + whenM (liftIO $ doesFileExist file) $ do + c <- liftIO $ readFile file + if c == content + then liftIO $ removeFile file + else warning $ hook ++ " hook (" ++ file ++ + ") contents modified; not deleting." ++ + " Edit it to remove call to git annex." + +unlessBare :: Annex () -> Annex () +unlessBare = unlessM $ fromRepo $ Git.repoIsLocalBare + +hookFile :: FilePath -> Annex FilePath +hookFile f = () <$> fromRepo Git.gitDir <*> pure ("hooks/" ++ f) + +hooks :: [(String, String)] +hooks = [ ("pre-commit", hookscript "git annex pre-commit .") + , ("tweak-fetch", hookscript "git annex tweak-fetch") + ] + where + hookscript s = "#!/bin/sh\n" ++ + "# automatically configured by git-annex\n" ++ + s ++ "\n"; diff --git a/Limit.hs b/Limit.hs new file mode 100644 index 0000000000..26e5d689c9 --- /dev/null +++ b/Limit.hs @@ -0,0 +1,103 @@ +{- user-specified limits on files to act on + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Limit where + +import Text.Regex.PCRE.Light.Char8 +import System.Path.WildMatch + +import Common.Annex +import qualified Annex +import qualified Utility.Matcher +import qualified Remote +import qualified Backend +import Logs.Location +import Annex.Content + +type Limit = Utility.Matcher.Token (FilePath -> Annex Bool) + +{- Checks if there are user-specified limits. -} +limited :: Annex Bool +limited = (not . Utility.Matcher.matchesAny) <$> getMatcher' + +{- Gets a matcher for the user-specified limits. The matcher is cached for + - speed; once it's obtained the user-specified limits can't change. -} +getMatcher :: Annex (FilePath -> Annex Bool) +getMatcher = Utility.Matcher.matchM <$> getMatcher' + +getMatcher' :: Annex (Utility.Matcher.Matcher (FilePath -> Annex Bool)) +getMatcher' = do + m <- Annex.getState Annex.limit + case m of + Right r -> return r + Left l -> do + let matcher = Utility.Matcher.generate (reverse l) + Annex.changeState $ \s -> s { Annex.limit = Right matcher } + return matcher + +{- Adds something to the limit list, which is built up reversed. -} +add :: Limit -> Annex () +add l = Annex.changeState $ \s -> s { Annex.limit = prepend $ Annex.limit s } + where + prepend (Left ls) = Left $ l:ls + prepend _ = error "internal" + +{- Adds a new token. -} +addToken :: String -> Annex () +addToken = add . Utility.Matcher.token + +{- Adds a new limit. -} +addLimit :: (FilePath -> Annex Bool) -> Annex () +addLimit = add . Utility.Matcher.Operation + +{- Add a limit to skip files that do not match the glob. -} +addInclude :: String -> Annex () +addInclude glob = addLimit $ return . matchglob glob + +{- Add a limit to skip files that match the glob. -} +addExclude :: String -> Annex () +addExclude glob = addLimit $ return . not . matchglob glob + +matchglob :: String -> FilePath -> Bool +matchglob glob f = isJust $ match cregex f [] + where + cregex = compile regex [] + regex = '^':wildToRegex glob + +{- Adds a limit to skip files not believed to be present + - in a specfied repository. -} +addIn :: String -> Annex () +addIn name = addLimit $ check $ if name == "." then inAnnex else inremote + where + check a = Backend.lookupFile >=> handle a + handle _ Nothing = return False + handle a (Just (key, _)) = a key + inremote key = do + u <- Remote.nameToUUID name + us <- keyLocations key + return $ u `elem` us + +{- Adds a limit to skip files not believed to have the specified number + - of copies. -} +addCopies :: String -> Annex () +addCopies num = + case readMaybe num :: Maybe Int of + Nothing -> error "bad number for --copies" + Just n -> addLimit $ check n + where + check n = Backend.lookupFile >=> handle n + handle _ Nothing = return False + handle n (Just (key, _)) = do + us <- keyLocations key + return $ length us >= n + +{- Adds a limit to skip files not using a specified key-value backend. -} +addInBackend :: String -> Annex () +addInBackend name = addLimit $ Backend.lookupFile >=> check + where + wanted = Backend.lookupBackendName name + check = return . maybe False ((==) wanted . snd) diff --git a/Locations.hs b/Locations.hs new file mode 100644 index 0000000000..73a2473b56 --- /dev/null +++ b/Locations.hs @@ -0,0 +1,228 @@ +{- git-annex file locations + - + - Copyright 2010-2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Locations ( + keyFile, + fileKey, + keyPaths, + gitAnnexLocation, + annexLocations, + gitAnnexDir, + gitAnnexObjectDir, + gitAnnexTmpDir, + gitAnnexTmpLocation, + gitAnnexBadDir, + gitAnnexBadLocation, + gitAnnexUnusedLog, + gitAnnexJournalDir, + gitAnnexJournalLock, + gitAnnexIndex, + gitAnnexIndexLock, + isLinkToAnnex, + annexHashes, + hashDirMixed, + hashDirLower, + + prop_idempotent_fileKey +) where + +import Data.Bits +import Data.Word +import Data.Hash.MD5 + +import Common +import Types +import Types.Key +import qualified Git + +{- Conventions: + - + - Functions ending in "Dir" should always return values ending with a + - trailing path separator. Most code does not rely on that, but a few + - things do. + - + - Everything else should not end in a trailing path sepatator. + - + - Only functions (with names starting with "git") that build a path + - based on a git repository should return an absolute path. + - Everything else should use relative paths. + -} + +{- The directory git annex uses for local state, relative to the .git + - directory -} +annexDir :: FilePath +annexDir = addTrailingPathSeparator "annex" + +{- The directory git annex uses for locally available object content, + - relative to the .git directory -} +objectDir :: FilePath +objectDir = addTrailingPathSeparator $ annexDir "objects" + +{- Annexed file's possible locations relative to the .git directory. + - There are two different possibilities, using different hashes. -} +annexLocations :: Key -> [FilePath] +annexLocations key = map (annexLocation key) annexHashes +annexLocation :: Key -> Hasher -> FilePath +annexLocation key hasher = objectDir keyPath key hasher + +{- Annexed file's absolute location in a repository. + - + - When there are multiple possible locations, returns the one where the + - file is actually present. + - + - When the file is not present, returns the location where the file should + - be stored. + -} +gitAnnexLocation :: Key -> Git.Repo -> IO FilePath +gitAnnexLocation key r + | Git.repoIsLocalBare r = + {- Bare repositories default to hashDirLower for new + - content, as it's more portable. -} + check (map inrepo $ annexLocations key) + | otherwise = + {- Non-bare repositories only use hashDirMixed, so + - don't need to do any work to check if the file is + - present. -} + return $ inrepo ".git" annexLocation key hashDirMixed + where + inrepo d = Git.workTree r d + check locs@(l:_) = fromMaybe l <$> firstM doesFileExist locs + check [] = error "internal" + +{- The annex directory of a repository. -} +gitAnnexDir :: Git.Repo -> FilePath +gitAnnexDir r + | Git.repoIsLocalBare r = addTrailingPathSeparator $ Git.workTree r annexDir + | otherwise = addTrailingPathSeparator $ Git.workTree r ".git" annexDir + +{- The part of the annex directory where file contents are stored. -} +gitAnnexObjectDir :: Git.Repo -> FilePath +gitAnnexObjectDir r + | Git.repoIsLocalBare r = addTrailingPathSeparator $ Git.workTree r objectDir + | otherwise = addTrailingPathSeparator $ Git.workTree r ".git" objectDir + +{- .git/annex/tmp/ is used for temp files -} +gitAnnexTmpDir :: Git.Repo -> FilePath +gitAnnexTmpDir r = addTrailingPathSeparator $ gitAnnexDir r "tmp" + +{- The temp file to use for a given key. -} +gitAnnexTmpLocation :: Key -> Git.Repo -> FilePath +gitAnnexTmpLocation key r = gitAnnexTmpDir r keyFile key + +{- .git/annex/bad/ is used for bad files found during fsck -} +gitAnnexBadDir :: Git.Repo -> FilePath +gitAnnexBadDir r = addTrailingPathSeparator $ gitAnnexDir r "bad" + +{- The bad file to use for a given key. -} +gitAnnexBadLocation :: Key -> Git.Repo -> FilePath +gitAnnexBadLocation key r = gitAnnexBadDir r keyFile key + +{- .git/annex/*unused is used to number possibly unused keys -} +gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath +gitAnnexUnusedLog prefix r = gitAnnexDir r (prefix ++ "unused") + +{- .git/annex/journal/ is used to journal changes made to the git-annex + - branch -} +gitAnnexJournalDir :: Git.Repo -> FilePath +gitAnnexJournalDir r = addTrailingPathSeparator $ gitAnnexDir r "journal" + +{- Lock file for the journal. -} +gitAnnexJournalLock :: Git.Repo -> FilePath +gitAnnexJournalLock r = gitAnnexDir r "journal.lck" + +{- .git/annex/index is used to stage changes to the git-annex branch -} +gitAnnexIndex :: Git.Repo -> FilePath +gitAnnexIndex r = gitAnnexDir r "index" + +{- Lock file for .git/annex/index. -} +gitAnnexIndexLock :: Git.Repo -> FilePath +gitAnnexIndexLock r = gitAnnexDir r "index.lck" + +{- Checks a symlink target to see if it appears to point to annexed content. -} +isLinkToAnnex :: FilePath -> Bool +isLinkToAnnex s = ("/.git/" ++ objectDir) `isInfixOf` s + +{- Converts a key into a filename fragment without any directory. + - + - Escape "/" in the key name, to keep a flat tree of files and avoid + - issues with keys containing "/../" or ending with "/" etc. + - + - "/" is escaped to "%" because it's short and rarely used, and resembles + - a slash + - "%" is escaped to "&s", and "&" to "&a"; this ensures that the mapping + - is one to one. + - ":" is escaped to "&c", because despite it being 2011, people still care + - about FAT. + -} +keyFile :: Key -> FilePath +keyFile key = replace "/" "%" $ replace ":" "&c" $ + replace "%" "&s" $ replace "&" "&a" $ show key + +{- A location to store a key on the filesystem. A directory hash is used, + - to protect against filesystems that dislike having many items in a + - single directory. + - + - The file is put in a directory with the same name, this allows + - write-protecting the directory to avoid accidental deletion of the file. + -} +keyPath :: Key -> Hasher -> FilePath +keyPath key hasher = hasher key f f + where + f = keyFile key + +{- All possibile locations to store a key using different directory hashes. -} +keyPaths :: Key -> [FilePath] +keyPaths key = map (keyPath key) annexHashes + +{- Reverses keyFile, converting a filename fragment (ie, the basename of + - the symlink target) into a key. -} +fileKey :: FilePath -> Maybe Key +fileKey file = readKey $ + replace "&a" "&" $ replace "&s" "%" $ + replace "&c" ":" $ replace "%" "/" file + +{- for quickcheck -} +prop_idempotent_fileKey :: String -> Bool +prop_idempotent_fileKey s = Just k == fileKey (keyFile k) + where k = stubKey { keyName = s, keyBackendName = "test" } + +{- Two different directory hashes may be used. The mixed case hash + - came first, and is fine, except for the problem of case-strict + - filesystems such as Linux VFAT (mounted with shortname=mixed), + - which do not allow using a directory "XX" when "xx" already exists. + - To support that, most repositories use the lower case hash for new data. -} +type Hasher = Key -> FilePath +annexHashes :: [Hasher] +annexHashes = [hashDirLower, hashDirMixed] + +hashDirMixed :: Hasher +hashDirMixed k = addTrailingPathSeparator $ take 2 dir drop 2 dir + where + dir = take 4 $ display_32bits_as_dir =<< [a,b,c,d] + ABCD (a,b,c,d) = md5 $ Str $ show k + +hashDirLower :: Hasher +hashDirLower k = addTrailingPathSeparator $ take 3 dir drop 3 dir + where + dir = take 6 $ md5s $ Str $ show k + +{- modified version of display_32bits_as_hex from Data.Hash.MD5 + - Copyright (C) 2001 Ian Lynagh + - License: Either BSD or GPL + -} +display_32bits_as_dir :: Word32 -> String +display_32bits_as_dir w = trim $ swap_pairs cs + where + -- Need 32 characters to use. To avoid inaverdently making + -- a real word, use letters that appear less frequently. + chars = ['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF" + cs = map (\x -> getc $ (shiftR w (6*x)) .&. 31) [0..7] + getc n = chars !! fromIntegral n + swap_pairs (x1:x2:xs) = x2:x1:swap_pairs xs + swap_pairs _ = [] + -- Last 2 will always be 00, so omit. + trim = take 6 diff --git a/Logs/Location.hs b/Logs/Location.hs new file mode 100644 index 0000000000..588962bc57 --- /dev/null +++ b/Logs/Location.hs @@ -0,0 +1,74 @@ +{-# LANGUAGE BangPatterns #-} + +{- git-annex location log + - + - git-annex keeps track of which repositories have the contents of annexed + - files. + - + - Repositories record their UUID and the date when they --get or --drop + - a value. + - + - Copyright 2010-2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.Location ( + LogStatus(..), + logChange, + readLog, + keyLocations, + loggedKeys, + loggedKeysFor, + logFile, + logFileKey +) where + +import Common.Annex +import qualified Annex.Branch +import Logs.Presence +import Logs.Trust + +{- Log a change in the presence of a key's value in a repository. -} +logChange :: Key -> UUID -> LogStatus -> Annex () +logChange key (UUID u) s = addLog (logFile key) =<< logNow s u +logChange _ NoUUID _ = return () + +{- Returns a list of repository UUIDs that, according to the log, have + - the value of a key. + - + - Dead repositories are skipped. + -} +keyLocations :: Key -> Annex [UUID] +keyLocations key = do + l <- map toUUID <$> (currentLog . logFile) key + snd <$> trustPartition DeadTrusted l + +{- Finds all keys that have location log information. + - (There may be duplicate keys in the list.) -} +loggedKeys :: Annex [Key] +loggedKeys = mapMaybe (logFileKey . takeFileName) <$> Annex.Branch.files + +{- Finds all keys that have location log information indicating + - they are present for the specified repository. -} +loggedKeysFor :: UUID -> Annex [Key] +loggedKeysFor u = filterM isthere =<< loggedKeys + where + {- This should run strictly to avoid the filterM + - building many thunks containing keyLocations data. -} + isthere k = do + us <- keyLocations k + let !there = u `elem` us + return there + +{- The filename of the log file for a given key. -} +logFile :: Key -> String +logFile key = hashDirLower key ++ keyFile key ++ ".log" + +{- Converts a log filename into a key. -} +logFileKey :: FilePath -> Maybe Key +logFileKey file + | ext == ".log" = fileKey base + | otherwise = Nothing + where + (base, ext) = splitAt (length file - 4) file diff --git a/Logs/Presence.hs b/Logs/Presence.hs new file mode 100644 index 0000000000..f5e4f1ea94 --- /dev/null +++ b/Logs/Presence.hs @@ -0,0 +1,104 @@ +{- git-annex presence log + - + - This is used to store presence information in the git-annex branch in + - a way that can be union merged. + - + - A line of the log will look like: "date N INFO" + - Where N=1 when the INFO is present, and 0 otherwise. + - + - Copyright 2010-2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.Presence ( + LogStatus(..), + addLog, + readLog, + parseLog, + showLog, + logNow, + compactLog, + currentLog, + LogLine +) where + +import Data.Time.Clock.POSIX +import Data.Time +import System.Locale +import qualified Data.Map as M + +import Common.Annex +import qualified Annex.Branch + +data LogLine = LogLine { + date :: POSIXTime, + status :: LogStatus, + info :: String +} deriving (Eq) + +data LogStatus = InfoPresent | InfoMissing + deriving (Eq) + +addLog :: FilePath -> LogLine -> Annex () +addLog file line = Annex.Branch.change file $ \s -> + showLog $ compactLog (line : parseLog s) + +{- Reads a log file. + - Note that the LogLines returned may be in any order. -} +readLog :: FilePath -> Annex [LogLine] +readLog file = parseLog <$> Annex.Branch.get file + +{- Parses a log file. Unparseable lines are ignored. -} +parseLog :: String -> [LogLine] +parseLog = mapMaybe (parseline . words) . lines + where + parseline (a:b:c:_) = do + d <- parseTime defaultTimeLocale "%s%Qs" a + s <- parsestatus b + Just $ LogLine (utcTimeToPOSIXSeconds d) s c + parseline _ = Nothing + parsestatus "1" = Just InfoPresent + parsestatus "0" = Just InfoMissing + parsestatus _ = Nothing + +{- Generates a log file. -} +showLog :: [LogLine] -> String +showLog = unlines . map genline + where + genline (LogLine d s i) = unwords [show d, genstatus s, i] + genstatus InfoPresent = "1" + genstatus InfoMissing = "0" + +{- Generates a new LogLine with the current date. -} +logNow :: LogStatus -> String -> Annex LogLine +logNow s i = do + now <- liftIO getPOSIXTime + return $ LogLine now s i + +{- Reads a log and returns only the info that is still in effect. -} +currentLog :: FilePath -> Annex [String] +currentLog file = map info . filterPresent <$> readLog file + +{- Returns the info from LogLines that are in effect. -} +filterPresent :: [LogLine] -> [LogLine] +filterPresent = filter (\l -> InfoPresent == status l) . compactLog + +{- Compacts a set of logs, returning a subset that contains the current + - status. -} +compactLog :: [LogLine] -> [LogLine] +compactLog = M.elems . foldr mapLog M.empty + +type LogMap = M.Map String LogLine + +{- Inserts a log into a map of logs, if the log has better (ie, newer) + - information than the other logs in the map -} +mapLog :: LogLine -> LogMap -> LogMap +mapLog l m = + if better + then M.insert i l m + else m + where + better = maybe True newer $ M.lookup i m + newer l' = date l' <= date l + i = info l diff --git a/Logs/Remote.hs b/Logs/Remote.hs new file mode 100644 index 0000000000..d9b41d8c47 --- /dev/null +++ b/Logs/Remote.hs @@ -0,0 +1,86 @@ +{- git-annex remote log + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.Remote ( + readRemoteLog, + configSet, + keyValToConfig, + configToKeyVal, + + prop_idempotent_configEscape +) where + +import qualified Data.Map as M +import Data.Time.Clock.POSIX +import Data.Char + +import Common.Annex +import qualified Annex.Branch +import Types.Remote +import Logs.UUIDBased + +{- Filename of remote.log. -} +remoteLog :: FilePath +remoteLog = "remote.log" + +{- Adds or updates a remote's config in the log. -} +configSet :: UUID -> RemoteConfig -> Annex () +configSet u c = do + ts <- liftIO getPOSIXTime + Annex.Branch.change remoteLog $ + showLog showConfig . changeLog ts u c . parseLog parseConfig + +{- Map of remotes by uuid containing key/value config maps. -} +readRemoteLog :: Annex (M.Map UUID RemoteConfig) +readRemoteLog = (simpleMap . parseLog parseConfig) <$> Annex.Branch.get remoteLog + +parseConfig :: String -> Maybe RemoteConfig +parseConfig = Just . keyValToConfig . words + +showConfig :: RemoteConfig -> String +showConfig = unwords . configToKeyVal + +{- Given Strings like "key=value", generates a RemoteConfig. -} +keyValToConfig :: [String] -> RemoteConfig +keyValToConfig ws = M.fromList $ map (/=/) ws + where + (/=/) s = (k, v) + where + k = takeWhile (/= '=') s + v = configUnEscape $ drop (1 + length k) s + +configToKeyVal :: M.Map String String -> [String] +configToKeyVal m = map toword $ sort $ M.toList m + where + toword (k, v) = k ++ "=" ++ configEscape v + +configEscape :: String -> String +configEscape = (>>= escape) + where + escape c + | isSpace c || c `elem` "&" = "&" ++ show (ord c) ++ ";" + | otherwise = [c] + +configUnEscape :: String -> String +configUnEscape = unescape + where + unescape [] = [] + unescape (c:rest) + | c == '&' = entity rest + | otherwise = c : unescape rest + entity s = if ok + then chr (Prelude.read num) : unescape rest + else '&' : unescape s + where + num = takeWhile isNumber s + r = drop (length num) s + rest = drop 1 r + ok = not (null num) && take 1 r == ";" + +{- for quickcheck -} +prop_idempotent_configEscape :: String -> Bool +prop_idempotent_configEscape s = s == (configUnEscape . configEscape) s diff --git a/Logs/Trust.hs b/Logs/Trust.hs new file mode 100644 index 0000000000..5d769bd247 --- /dev/null +++ b/Logs/Trust.hs @@ -0,0 +1,85 @@ +{- git-annex trust + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.Trust ( + TrustLevel(..), + trustGet, + trustSet, + trustPartition +) where + +import qualified Data.Map as M +import Data.Time.Clock.POSIX + +import Common.Annex +import Types.TrustLevel +import qualified Annex.Branch +import qualified Annex +import Logs.UUIDBased + +{- Filename of trust.log. -} +trustLog :: FilePath +trustLog = "trust.log" + +{- Returns a list of UUIDs that the trustLog indicates have the + - specified trust level. + - Note that the list can be incomplete for SemiTrusted, since that's + - the default. -} +trustGet :: TrustLevel -> Annex [UUID] +trustGet level = M.keys . M.filter (== level) <$> trustMap + +{- Partitions a list of UUIDs to those matching a TrustLevel and not. -} +trustPartition :: TrustLevel -> [UUID] -> Annex ([UUID], [UUID]) +trustPartition level ls + | level == SemiTrusted = do + t <- trustGet Trusted + u <- trustGet UnTrusted + d <- trustGet DeadTrusted + let uncandidates = t ++ u ++ d + return $ partition (`notElem` uncandidates) ls + | otherwise = do + candidates <- trustGet level + return $ partition (`elem` candidates) ls + +{- Read the trustLog into a map, overriding with any + - values from forcetrust. The map is cached for speed. -} +trustMap :: Annex TrustMap +trustMap = do + cached <- Annex.getState Annex.trustmap + case cached of + Just m -> return m + Nothing -> do + overrides <- M.fromList <$> Annex.getState Annex.forcetrust + m <- (M.union overrides . simpleMap . parseLog (Just . parseTrust)) <$> + Annex.Branch.get trustLog + Annex.changeState $ \s -> s { Annex.trustmap = Just m } + return m + +{- The trust.log used to only list trusted repos, without a field for the + - trust status, which is why this defaults to Trusted. -} +parseTrust :: String -> TrustLevel +parseTrust s = maybe Trusted parse $ headMaybe $ words s + where + parse "1" = Trusted + parse "0" = UnTrusted + parse "X" = DeadTrusted + parse _ = SemiTrusted + +showTrust :: TrustLevel -> String +showTrust Trusted = "1" +showTrust UnTrusted = "0" +showTrust DeadTrusted = "X" +showTrust SemiTrusted = "?" + +{- Changes the trust level for a uuid in the trustLog. -} +trustSet :: UUID -> TrustLevel -> Annex () +trustSet uuid@(UUID _) level = do + ts <- liftIO getPOSIXTime + Annex.Branch.change trustLog $ + showLog showTrust . changeLog ts uuid level . parseLog (Just . parseTrust) + Annex.changeState $ \s -> s { Annex.trustmap = Nothing } +trustSet NoUUID _ = error "unknown UUID; cannot modify trust level" diff --git a/Logs/UUID.hs b/Logs/UUID.hs new file mode 100644 index 0000000000..18cbee61e4 --- /dev/null +++ b/Logs/UUID.hs @@ -0,0 +1,89 @@ +{- git-annex uuids + - + - Each git repository used by git-annex has an annex.uuid setting that + - uniquely identifies that repository. + - + - UUIDs of remotes are cached in git config, using keys named + - remote..annex-uuid + - + - uuid.log stores a list of known uuids, and their descriptions. + - + - Copyright 2010-2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.UUID ( + describeUUID, + recordUUID, + uuidMap +) where + +import qualified Data.Map as M +import Data.Time.Clock.POSIX + +import Common.Annex +import qualified Annex.Branch +import Logs.UUIDBased +import qualified Annex.UUID + +{- Filename of uuid.log. -} +logfile :: FilePath +logfile = "uuid.log" + +{- Records a description for a uuid in the log. -} +describeUUID :: UUID -> String -> Annex () +describeUUID uuid desc = do + ts <- liftIO getPOSIXTime + Annex.Branch.change logfile $ + showLog id . changeLog ts uuid desc . fixBadUUID . parseLog Just + +{- Temporarily here to fix badly formatted uuid logs generated by + - versions 3.20111105 and 3.20111025. + - + - Those logs contain entries with the UUID and description flipped. + - Due to parsing, if the description is multiword, only the first + - will be taken to be the UUID. So, if the UUID of an entry does + - not look like a UUID, and the last word of the description does, + - flip them back. + -} +fixBadUUID :: Log String -> Log String +fixBadUUID = M.fromList . map fixup . M.toList + where + fixup (k, v) + | isbad = (fixeduuid, LogEntry (Date $ newertime v) fixedvalue) + | otherwise = (k, v) + where + kuuid = fromUUID k + isbad = not (isuuid kuuid) && isuuid lastword + ws = words $ value v + lastword = Prelude.last ws + fixeduuid = toUUID lastword + fixedvalue = unwords $ kuuid: Prelude.init ws + -- For the fixed line to take precidence, it should be + -- slightly newer, but only slightly. + newertime (LogEntry (Date d) _) = d + minimumPOSIXTimeSlice + newertime (LogEntry Unknown _) = minimumPOSIXTimeSlice + minimumPOSIXTimeSlice = 0.000001 + isuuid s = length s == 36 && length (split "-" s) == 5 + +{- Records the uuid in the log, if it's not already there. -} +recordUUID :: UUID -> Annex () +recordUUID u = go . M.lookup u =<< uuidMap + where + go (Just "") = set + go Nothing = set + go _ = return () + set = describeUUID u "" + +{- Read the uuidLog into a simple Map. + - + - The UUID of the current repository is included explicitly, since + - it may not have been described and so otherwise would not appear. -} +uuidMap :: Annex (M.Map UUID String) +uuidMap = do + m <- (simpleMap . parseLog Just) <$> Annex.Branch.get logfile + u <- Annex.UUID.getUUID + return $ M.insertWith' preferold u "" m + where + preferold = flip const diff --git a/Logs/UUIDBased.hs b/Logs/UUIDBased.hs new file mode 100644 index 0000000000..b09d93f903 --- /dev/null +++ b/Logs/UUIDBased.hs @@ -0,0 +1,110 @@ +{- git-annex uuid-based logs + - + - This is used to store information about a UUID in a way that can + - be union merged. + - + - A line of the log will look like: "UUID[ INFO[ timestamp=foo]]" + - The timestamp is last for backwards compatability reasons, + - and may not be present on old log lines. + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.UUIDBased ( + Log, + LogEntry(..), + TimeStamp(..), + parseLog, + showLog, + changeLog, + addLog, + simpleMap, + + prop_TimeStamp_sane, + prop_addLog_sane, +) where + +import qualified Data.Map as M +import Data.Time.Clock.POSIX +import Data.Time +import System.Locale + +import Common +import Types.UUID + +data TimeStamp = Unknown | Date POSIXTime + deriving (Eq, Ord, Show) + +data LogEntry a = LogEntry + { changed :: TimeStamp + , value :: a + } deriving (Eq, Show) + +type Log a = M.Map UUID (LogEntry a) + +tskey :: String +tskey = "timestamp=" + +showLog :: (a -> String) -> Log a -> String +showLog shower = unlines . map showpair . M.toList + where + showpair (k, LogEntry (Date p) v) = + unwords [fromUUID k, shower v, tskey ++ show p] + showpair (k, LogEntry Unknown v) = + unwords [fromUUID k, shower v] + +parseLog :: (String -> Maybe a) -> String -> Log a +parseLog parser = M.fromListWith best . mapMaybe parse . lines + where + parse line + | null ws = Nothing + | otherwise = parser (unwords info) >>= makepair + where + makepair v = Just (toUUID u, LogEntry ts v) + ws = words line + u = Prelude.head ws + t = Prelude.last ws + ts + | tskey `isPrefixOf` t = + pdate $ drop 1 $ dropWhile (/= '=') t + | otherwise = Unknown + info + | ts == Unknown = drop 1 ws + | otherwise = drop 1 $ beginning ws + pdate s = case parseTime defaultTimeLocale "%s%Qs" s of + Nothing -> Unknown + Just d -> Date $ utcTimeToPOSIXSeconds d + +changeLog :: POSIXTime -> UUID -> a -> Log a -> Log a +changeLog t u v = M.insert u $ LogEntry (Date t) v + +{- Only add an LogEntry if it's newer (or at least as new as) than any + - existing LogEntry for a UUID. -} +addLog :: UUID -> LogEntry a -> Log a -> Log a +addLog = M.insertWith best + +{- Converts a Log into a simple Map without the timestamp information. + - This is a one-way trip, but useful for code that never needs to change + - the log. -} +simpleMap :: Log a -> M.Map UUID a +simpleMap = M.map value + +best :: LogEntry a -> LogEntry a -> LogEntry a +best new old + | changed old > changed new = old + | otherwise = new + +-- Unknown is oldest. +prop_TimeStamp_sane :: Bool +prop_TimeStamp_sane = Unknown < Date 1 + +prop_addLog_sane :: Bool +prop_addLog_sane = newWins && newestWins + where + newWins = addLog (UUID "foo") (LogEntry (Date 1) "new") l == l2 + newestWins = addLog (UUID "foo") (LogEntry (Date 1) "newest") l2 /= l2 + + l = M.fromList [(UUID "foo", LogEntry (Date 0) "old")] + l2 = M.fromList [(UUID "foo", LogEntry (Date 1) "new")] diff --git a/Logs/Web.hs b/Logs/Web.hs new file mode 100644 index 0000000000..62656b7ed8 --- /dev/null +++ b/Logs/Web.hs @@ -0,0 +1,52 @@ +{- Web url logs. + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.Web ( + URLString, + webUUID, + setUrl, + setUrlPresent, + getUrls +) where + +import Common.Annex +import Logs.Presence +import Logs.Location + +type URLString = String + +-- Dummy uuid for the whole web. Do not alter. +webUUID :: UUID +webUUID = UUID "00000000-0000-0000-0000-000000000001" + +{- The urls for a key are stored in remote/web/hash/key.log + - in the git-annex branch. -} +urlLog :: Key -> FilePath +urlLog key = "remote/web" hashDirLower key keyFile key ++ ".log" +oldurlLog :: Key -> FilePath +{- A bug used to store the urls elsewhere. -} +oldurlLog key = "remote/web" hashDirLower key show key ++ ".log" + +{- Gets all urls that a key might be available from. -} +getUrls :: Key -> Annex [URLString] +getUrls key = do + us <- currentLog (urlLog key) + if null us + then currentLog (oldurlLog key) + else return us + +{- Records a change in an url for a key. -} +setUrl :: Key -> URLString -> LogStatus -> Annex () +setUrl key url status = do + addLog (urlLog key) =<< logNow status url + + -- update location log to indicate that the web has the key, or not + us <- getUrls key + logChange key webUUID (if null us then InfoMissing else InfoPresent) + +setUrlPresent :: Key -> URLString -> Annex () +setUrlPresent key url = setUrl key url InfoPresent diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000..93586762a9 --- /dev/null +++ b/Makefile @@ -0,0 +1,115 @@ +PREFIX=/usr +IGNORE=-ignore-package monads-fd +GHCFLAGS=-O2 -Wall $(IGNORE) -fspec-constr-count=5 + +ifdef PROFILE +GHCFLAGS=-prof -auto-all -rtsopts -caf-all -fforce-recomp $(IGNORE) +endif + +GHCMAKE=ghc $(GHCFLAGS) --make + +bins=git-annex git-annex-shell git-union-merge +mans=git-annex.1 git-annex-shell.1 git-union-merge.1 +sources=Build/SysConfig.hs Utility/StatFS.hs Utility/Touch.hs Remote/S3.hs + +all=$(bins) $(mans) docs + +# Am I typing :make in vim? Do a fast build. +ifdef VIM +all=fast +endif + +all: $(all) + +# Disables optimisation. Not for production use. +fast: GHCFLAGS=-Wall $(IGNORE) +fast: $(bins) + +Build/SysConfig.hs: configure.hs Build/TestConfig.hs + $(GHCMAKE) configure + ./configure + +%.hs: %.hsc + hsc2hs $< + perl -i -pe 's/^{-# INCLUDE.*//' $@ + +Remote/S3.hs: + @ln -sf S3real.hs Remote/S3.hs + +Remote/S3.o: Remote/S3.hs + @if ! $(GHCMAKE) Remote/S3.hs; then \ + ln -sf S3stub.hs Remote/S3.hs; \ + echo "** building without S3 support"; \ + fi + +sources: $(sources) + +$(bins): sources Remote/S3.o + $(GHCMAKE) $@ + +git-annex.1: doc/git-annex.mdwn + ./mdwn2man git-annex 1 doc/git-annex.mdwn > git-annex.1 +git-annex-shell.1: doc/git-annex-shell.mdwn + ./mdwn2man git-annex-shell 1 doc/git-annex-shell.mdwn > git-annex-shell.1 +git-union-merge.1: doc/git-union-merge.mdwn + ./mdwn2man git-union-merge 1 doc/git-union-merge.mdwn > git-union-merge.1 + +install: all + install -d $(DESTDIR)$(PREFIX)/bin + install $(bins) $(DESTDIR)$(PREFIX)/bin + install -d $(DESTDIR)$(PREFIX)/share/man/man1 + install -m 0644 $(mans) $(DESTDIR)$(PREFIX)/share/man/man1 + install -d $(DESTDIR)$(PREFIX)/share/doc/git-annex + if [ -d html ]; then \ + rsync -a --delete html/ $(DESTDIR)$(PREFIX)/share/doc/git-annex/html/; \ + fi + +test: + @if ! $(GHCMAKE) -O0 test; then \ + echo "** not running test suite" >&2; \ + else \ + if ! ./test; then \ + echo "** test suite failed!" >&2; \ + exit 1; \ + fi; \ + fi + +testcoverage: + rm -f test.tix test + ghc -odir build/test -hidir build/test $(GHCFLAGS) --make -fhpc test + ./test + @echo "" + @hpc report test --exclude=Main --exclude=QC + @hpc markup test --exclude=Main --exclude=QC --destdir=.hpc >/dev/null + @echo "(See .hpc/ for test coverage details.)" + +# If ikiwiki is available, build static html docs suitable for being +# shipped in the software package. +ifeq ($(shell which ikiwiki),) +IKIWIKI=@echo "** ikiwiki not found, skipping building docs" >&2; true +else +IKIWIKI=ikiwiki +endif + +docs: $(mans) + $(IKIWIKI) doc html -v --wikiname git-annex --plugin=goodstuff \ + --no-usedirs --disable-plugin=openid --plugin=sidebar \ + --underlaydir=/dev/null --disable-plugin=shortcut \ + --disable-plugin=smiley \ + --plugin=comments --set comments_pagespec="*" \ + --exclude='news/.*' + +clean: + rm -rf build $(bins) $(mans) test configure *.tix .hpc $(sources) + rm -rf doc/.ikiwiki html dist + find . \( -name \*.o -or -name \*.hi \) -exec rm {} \; + +# Workaround for cabal sdist not running Setup hooks, so I cannot +# generate a file list there. +sdist: clean + @if [ ! -e git-annex.cabal.orig ]; then cp git-annex.cabal git-annex.cabal.orig; fi + @sed -e "s!\(Extra-Source-Files: \).*!\1$(shell find . -name .git -prune -or -not -name \\*.orig -not -type d -print | perl -ne 'print unless length >= 100')!i" < git-annex.cabal.orig > git-annex.cabal + @cabal sdist + @mv git-annex.cabal.orig git-annex.cabal + +.PHONY: $(bins) test install diff --git a/Messages.hs b/Messages.hs new file mode 100644 index 0000000000..1294e44f69 --- /dev/null +++ b/Messages.hs @@ -0,0 +1,146 @@ +{- git-annex output messages + - + - Copyright 2010-2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Messages ( + showStart, + showNote, + showAction, + showProgress, + showSideAction, + showOutput, + showLongNote, + showEndOk, + showEndFail, + showEndResult, + showErr, + warning, + indent, + maybeShowJSON, + showFullJSON, + showCustom, + showHeader, + showRaw, + + setupConsole +) where + +import Text.JSON + +import Common +import Types +import qualified Annex +import qualified Messages.JSON as JSON + +showStart :: String -> String -> Annex () +showStart command file = handle (JSON.start command $ Just file) $ + flushed $ putStr $ command ++ " " ++ file ++ " " + +showNote :: String -> Annex () +showNote s = handle (JSON.note s) $ + flushed $ putStr $ "(" ++ s ++ ") " + +showAction :: String -> Annex () +showAction s = showNote $ s ++ "..." + +showProgress :: Annex () +showProgress = handle q $ + flushed $ putStr "." + +showSideAction :: String -> Annex () +showSideAction s = handle q $ + putStrLn $ "(" ++ s ++ "...)" + +showOutput :: Annex () +showOutput = handle q $ + putStr "\n" + +showLongNote :: String -> Annex () +showLongNote s = handle (JSON.note s) $ + putStrLn $ '\n' : indent s + +showEndOk :: Annex () +showEndOk = showEndResult True + +showEndFail :: Annex () +showEndFail = showEndResult False + +showEndResult :: Bool -> Annex () +showEndResult ok = handle (JSON.end ok) $ putStrLn msg + where + msg + | ok = "ok" + | otherwise = "failed" + +showErr :: (Show a) => a -> Annex () +showErr e = warning' $ "git-annex: " ++ show e + +warning :: String -> Annex () +warning = warning' . indent + +warning' :: String -> Annex () +warning' w = do + handle q $ putStr "\n" + liftIO $ do + hFlush stdout + hPutStrLn stderr w + +indent :: String -> String +indent = join "\n" . map (\l -> " " ++ l) . lines + +{- Shows a JSON fragment only when in json mode. -} +maybeShowJSON :: JSON a => [(String, a)] -> Annex () +maybeShowJSON v = handle (JSON.add v) q + +{- Shows a complete JSON value, only when in json mode. -} +showFullJSON :: JSON a => [(String, a)] -> Annex Bool +showFullJSON v = Annex.getState Annex.output >>= liftIO . go + where + go Annex.JSONOutput = JSON.complete v >> return True + go _ = return False + +{- Performs an action that outputs nonstandard/customized output, and + - in JSON mode wraps its output in JSON.start and JSON.end, so it's + - a complete JSON document. + - This is only needed when showStart and showEndOk is not used. -} +showCustom :: String -> Annex Bool -> Annex () +showCustom command a = do + handle (JSON.start command Nothing) q + r <- a + handle (JSON.end r) q + +showHeader :: String -> Annex () +showHeader h = handle q $ + flushed $ putStr $ h ++ ": " + +showRaw :: String -> Annex () +showRaw s = handle q $ putStrLn s + +{- By default, haskell honors the user's locale in its output to stdout + - and stderr. While that's great for proper unicode support, for git-annex + - all that's really needed is the ability to display simple messages + - (currently untranslated), and importantly, to display filenames exactly + - as they are written on disk, no matter what their encoding. So, force + - raw mode. + - + - NB: Once git-annex gets localized, this will need a rethink. -} +setupConsole :: IO () +setupConsole = do + hSetBinaryMode stdout True + hSetBinaryMode stderr True + +handle :: IO () -> IO () -> Annex () +handle json normal = Annex.getState Annex.output >>= go + where + go Annex.NormalOutput = liftIO normal + go Annex.QuietOutput = q + go Annex.JSONOutput = liftIO $ flushed $ json + +q :: Monad m => m () +q = return () + +flushed :: IO () -> IO () +flushed a = a >> hFlush stdout diff --git a/Messages/JSON.hs b/Messages/JSON.hs new file mode 100644 index 0000000000..f7a031e381 --- /dev/null +++ b/Messages/JSON.hs @@ -0,0 +1,40 @@ +{- git-annex JSON output + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Messages.JSON ( + start, + end, + note, + add, + complete +) where + +import Text.JSON + +import qualified Utility.JSONStream as Stream + +start :: String -> Maybe String -> IO () +start command file = + putStr $ Stream.start $ ("command", command) : filepart file + where + filepart Nothing = [] + filepart (Just f) = [("file", f)] + +end :: Bool -> IO () +end b = putStr $ Stream.add [("success", b)] ++ Stream.end + +note :: String -> IO () +note s = add [("note", s)] + +add :: JSON a => [(String, a)] -> IO () +add v = putStr $ Stream.add v + +complete :: JSON a => [(String, a)] -> IO () +complete v = putStr $ concat + [ Stream.start v + , Stream.end + ] diff --git a/Options.hs b/Options.hs new file mode 100644 index 0000000000..cce750316e --- /dev/null +++ b/Options.hs @@ -0,0 +1,96 @@ +{- git-annex command-line options + - + - Copyright 2010-2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Options where + +import System.Console.GetOpt +import System.Log.Logger + +import Common.Annex +import qualified Annex +import Limit + +{- Each dashed command-line option results in generation of an action + - in the Annex monad that performs the necessary setting. + -} +type Option = OptDescr (Annex ()) + +commonOptions :: [Option] +commonOptions = + [ Option [] ["force"] (NoArg (setforce True)) + "allow actions that may lose annexed data" + , Option ['F'] ["fast"] (NoArg (setfast True)) + "avoid slow operations" + , Option ['a'] ["auto"] (NoArg (setauto True)) + "automatic mode" + , Option ['q'] ["quiet"] (NoArg (setoutput Annex.QuietOutput)) + "avoid verbose output" + , Option ['v'] ["verbose"] (NoArg (setoutput Annex.NormalOutput)) + "allow verbose output (default)" + , Option ['j'] ["json"] (NoArg (setoutput Annex.JSONOutput)) + "enable JSON output" + , Option ['d'] ["debug"] (NoArg (setdebug)) + "show debug messages" + , Option ['b'] ["backend"] (ReqArg setforcebackend paramName) + "specify key-value backend to use" + ] + where + setforce v = Annex.changeState $ \s -> s { Annex.force = v } + setfast v = Annex.changeState $ \s -> s { Annex.fast = v } + setauto v = Annex.changeState $ \s -> s { Annex.auto = v } + setoutput v = Annex.changeState $ \s -> s { Annex.output = v } + setforcebackend v = Annex.changeState $ \s -> s { Annex.forcebackend = Just v } + setdebug = liftIO $ updateGlobalLogger rootLoggerName $ + setLevel DEBUG + +matcherOptions :: [Option] +matcherOptions = + [ longopt "not" "negate next option" + , longopt "and" "both previous and next option must match" + , longopt "or" "either previous or next option must match" + , shortopt "(" "open group of options" + , shortopt ")" "close group of options" + ] + where + longopt o = Option [] [o] $ NoArg $ addToken o + shortopt o = Option o [] $ NoArg $ addToken o + +{- Descriptions of params used in usage messages. -} +paramPaths :: String +paramPaths = paramOptional $ paramRepeating paramPath -- most often used +paramPath :: String +paramPath = "PATH" +paramKey :: String +paramKey = "KEY" +paramDesc :: String +paramDesc = "DESC" +paramUrl :: String +paramUrl = "URL" +paramNumber :: String +paramNumber = "NUMBER" +paramRemote :: String +paramRemote = "REMOTE" +paramGlob :: String +paramGlob = "GLOB" +paramName :: String +paramName = "NAME" +paramUUID :: String +paramUUID = "UUID" +paramType :: String +paramType = "TYPE" +paramFormat :: String +paramFormat = "FORMAT" +paramKeyValue :: String +paramKeyValue = "K=V" +paramNothing :: String +paramNothing = "" +paramRepeating :: String -> String +paramRepeating s = s ++ " ..." +paramOptional :: String -> String +paramOptional s = "[" ++ s ++ "]" +paramPair :: String -> String -> String +paramPair a b = a ++ " " ++ b diff --git a/README b/README new file mode 100644 index 0000000000..ce67d68166 --- /dev/null +++ b/README @@ -0,0 +1,6 @@ +git-annex allows managing files with git, without checking the file +contents into git. While that may seem paradoxical, it is useful when +dealing with files larger than git can currently easily handle, whether due +to limitations in memory, checksumming time, or disk space. + +For documentation, see doc/ or diff --git a/Remote.hs b/Remote.hs new file mode 100644 index 0000000000..10bf9d7694 --- /dev/null +++ b/Remote.hs @@ -0,0 +1,241 @@ +{- git-annex remotes + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote ( + Remote, + uuid, + name, + storeKey, + retrieveKeyFile, + removeKey, + hasKey, + hasKeyCheap, + + remoteTypes, + remoteMap, + byName, + prettyPrintUUIDs, + remotesWithUUID, + remotesWithoutUUID, + keyPossibilities, + keyPossibilitiesTrusted, + nameToUUID, + showTriedRemotes, + showLocations, + forceTrust, + logStatus +) where + +import qualified Data.Map as M +import Text.JSON +import Text.JSON.Generic + +import Common.Annex +import Types.Remote +import qualified Annex +import Config +import Annex.UUID +import Logs.UUID +import Logs.Trust +import Logs.Location +import Logs.Remote + +import qualified Remote.Git +import qualified Remote.S3 +import qualified Remote.Bup +import qualified Remote.Directory +import qualified Remote.Rsync +import qualified Remote.Web +import qualified Remote.Hook + +remoteTypes :: [RemoteType Annex] +remoteTypes = + [ Remote.Git.remote + , Remote.S3.remote + , Remote.Bup.remote + , Remote.Directory.remote + , Remote.Rsync.remote + , Remote.Web.remote + , Remote.Hook.remote + ] + +{- Builds a list of all available Remotes. + - Since doing so can be expensive, the list is cached. -} +genList :: Annex [Remote Annex] +genList = do + rs <- Annex.getState Annex.remotes + if null rs + then do + m <- readRemoteLog + l <- mapM (process m) remoteTypes + let rs' = concat l + Annex.changeState $ \s -> s { Annex.remotes = rs' } + return rs' + else return rs + where + process m t = + enumerate t >>= + mapM (gen m t) + gen m t r = do + u <- getRepoUUID r + generate t r u (M.lookup u m) + +{- Map of UUIDs of Remotes and their names. -} +remoteMap :: Annex (M.Map UUID String) +remoteMap = M.fromList . map (\r -> (uuid r, name r)) <$> genList + +{- Looks up a remote by name. (Or by UUID.) Only finds currently configured + - git remotes. -} +byName :: String -> Annex (Remote Annex) +byName n = do + res <- byName' n + case res of + Left e -> error e + Right r -> return r +byName' :: String -> Annex (Either String (Remote Annex)) +byName' "" = return $ Left "no remote specified" +byName' n = do + allremotes <- genList + let match = filter matching allremotes + if null match + then return $ Left $ "there is no git remote named \"" ++ n ++ "\"" + else return $ Right $ Prelude.head match + where + matching r = n == name r || toUUID n == uuid r + +{- Looks up a remote by name (or by UUID, or even by description), + - and returns its UUID. Finds even remotes that are not configured in + - .git/config. -} +nameToUUID :: String -> Annex UUID +nameToUUID "." = getUUID -- special case for current repo +nameToUUID "" = error "no remote specified" +nameToUUID n = byName' n >>= go + where + go (Right r) = return $ uuid r + go (Left e) = fromMaybe (error e) <$> bydescription + bydescription = do + m <- uuidMap + case M.lookup n $ transform swap m of + Just u -> return $ Just u + Nothing -> return $ byuuid m + byuuid m = M.lookup (toUUID n) $ transform double m + transform a = M.fromList . map a . M.toList + swap (a, b) = (b, a) + double (a, _) = (a, a) + +{- Pretty-prints a list of UUIDs of remotes, for human display. + - + - Shows descriptions from the uuid log, falling back to remote names, + - as some remotes may not be in the uuid log. + - + - When JSON is enabled, also generates a machine-readable description + - of the UUIDs. -} +prettyPrintUUIDs :: String -> [UUID] -> Annex String +prettyPrintUUIDs desc uuids = do + hereu <- getUUID + m <- M.unionWith addname <$> uuidMap <*> remoteMap + maybeShowJSON [(desc, map (jsonify m hereu) uuids)] + return $ unwords $ map (\u -> "\t" ++ prettify m hereu u ++ "\n") uuids + where + addname d n + | d == n = d + | null d = n + | otherwise = n ++ " (" ++ d ++ ")" + findlog m u = M.findWithDefault "" u m + prettify m hereu u + | not (null d) = fromUUID u ++ " -- " ++ d + | otherwise = fromUUID u + where + ishere = hereu == u + n = findlog m u + d + | null n && ishere = "here" + | ishere = addname n "here" + | otherwise = n + jsonify m hereu u = toJSObject + [ ("uuid", toJSON $ fromUUID u) + , ("description", toJSON $ findlog m u) + , ("here", toJSON $ hereu == u) + ] + +{- Filters a list of remotes to ones that have the listed uuids. -} +remotesWithUUID :: [Remote Annex] -> [UUID] -> [Remote Annex] +remotesWithUUID rs us = filter (\r -> uuid r `elem` us) rs + +{- Filters a list of remotes to ones that do not have the listed uuids. -} +remotesWithoutUUID :: [Remote Annex] -> [UUID] -> [Remote Annex] +remotesWithoutUUID rs us = filter (\r -> uuid r `notElem` us) rs + +{- Cost ordered lists of remotes that the Logs.Location indicate may have a key. + -} +keyPossibilities :: Key -> Annex [Remote Annex] +keyPossibilities key = fst <$> keyPossibilities' False key + +{- Cost ordered lists of remotes that the Logs.Location indicate may have a key. + - + - Also returns a list of UUIDs that are trusted to have the key + - (some may not have configured remotes). + -} +keyPossibilitiesTrusted :: Key -> Annex ([Remote Annex], [UUID]) +keyPossibilitiesTrusted = keyPossibilities' True + +keyPossibilities' :: Bool -> Key -> Annex ([Remote Annex], [UUID]) +keyPossibilities' withtrusted key = do + u <- getUUID + trusted <- if withtrusted then trustGet Trusted else return [] + + -- get uuids of all remotes that are recorded to have the key + uuids <- keyLocations key + let validuuids = filter (/= u) uuids + + -- note that validuuids is assumed to not have dups + let validtrusteduuids = validuuids `intersect` trusted + + -- remotes that match uuids that have the key + allremotes <- filterM (repoNotIgnored . repo) =<< genList + let validremotes = remotesWithUUID allremotes validuuids + + return (sort validremotes, validtrusteduuids) + +{- Displays known locations of a key. -} +showLocations :: Key -> [UUID] -> Annex () +showLocations key exclude = do + u <- getUUID + uuids <- keyLocations key + untrusteduuids <- trustGet UnTrusted + let uuidswanted = filteruuids uuids (u:exclude++untrusteduuids) + let uuidsskipped = filteruuids uuids (u:exclude++uuidswanted) + ppuuidswanted <- Remote.prettyPrintUUIDs "wanted" uuidswanted + ppuuidsskipped <- Remote.prettyPrintUUIDs "skipped" uuidsskipped + showLongNote $ message ppuuidswanted ppuuidsskipped + where + filteruuids l x = filter (`notElem` x) l + message [] [] = "No other repository is known to contain the file." + message rs [] = "Try making some of these repositories available:\n" ++ rs + message [] us = "Also these untrusted repositories may contain the file:\n" ++ us + message rs us = message rs [] ++ message [] us + +showTriedRemotes :: [Remote Annex] -> Annex () +showTriedRemotes [] = return () +showTriedRemotes remotes = + showLongNote $ "Unable to access these remotes: " ++ + (join ", " $ map name remotes) + +forceTrust :: TrustLevel -> String -> Annex () +forceTrust level remotename = do + r <- nameToUUID remotename + Annex.changeState $ \s -> + s { Annex.forcetrust = (r, level):Annex.forcetrust s } + +{- Used to log a change in a remote's having a key. The change is logged + - in the local repo, not on the remote. The process of transferring the + - key to the remote, or removing the key from it *may* log the change + - on the remote, but this cannot always be relied on. -} +logStatus :: Remote Annex -> Key -> Bool -> Annex () +logStatus remote key present = logChange key (uuid remote) status + where + status = if present then InfoPresent else InfoMissing diff --git a/Remote/Bup.hs b/Remote/Bup.hs new file mode 100644 index 0000000000..cbd5d584ac --- /dev/null +++ b/Remote/Bup.hs @@ -0,0 +1,229 @@ +{- Using bup as a remote. + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Bup (remote) where + +import qualified Data.ByteString.Lazy.Char8 as L +import System.IO.Error +import qualified Data.Map as M +import System.Process + +import Common.Annex +import Types.Remote +import qualified Git +import qualified Git.Command +import qualified Git.Config +import qualified Git.Construct +import Config +import Annex.Ssh +import Remote.Helper.Special +import Remote.Helper.Encryptable +import Crypto + +type BupRepo = String + +remote :: RemoteType Annex +remote = RemoteType { + typename = "bup", + enumerate = findSpecialRemotes "buprepo", + generate = gen, + setup = bupSetup +} + +gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex (Remote Annex) +gen r u c = do + buprepo <- getConfig r "buprepo" (error "missing buprepo") + cst <- remoteCost r (if bupLocal buprepo then semiCheapRemoteCost else expensiveRemoteCost) + bupr <- liftIO $ bup2GitRemote buprepo + (u', bupr') <- getBupUUID bupr u + + return $ encryptableRemote c + (storeEncrypted r buprepo) + (retrieveEncrypted buprepo) + Remote { + uuid = u', + cost = cst, + name = Git.repoDescribe r, + storeKey = store r buprepo, + retrieveKeyFile = retrieve buprepo, + removeKey = remove, + hasKey = checkPresent r bupr', + hasKeyCheap = bupLocal buprepo, + config = c, + repo = r + } + +bupSetup :: UUID -> RemoteConfig -> Annex RemoteConfig +bupSetup u c = do + -- verify configuration is sane + let buprepo = fromMaybe (error "Specify buprepo=") $ + M.lookup "buprepo" c + c' <- encryptionSetup c + + -- bup init will create the repository. + -- (If the repository already exists, bup init again appears safe.) + showAction "bup init" + bup "init" buprepo [] >>! error "bup init failed" + + storeBupUUID u buprepo + + -- The buprepo is stored in git config, as well as this repo's + -- persistant state, so it can vary between hosts. + gitConfigSpecialRemote u c' "buprepo" buprepo + + return c' + +bupParams :: String -> BupRepo -> [CommandParam] -> [CommandParam] +bupParams command buprepo params = + Param command : [Param "-r", Param buprepo] ++ params + +bup :: String -> BupRepo -> [CommandParam] -> Annex Bool +bup command buprepo params = do + showOutput -- make way for bup output + liftIO $ boolSystem "bup" $ bupParams command buprepo params + +pipeBup :: [CommandParam] -> Maybe Handle -> Maybe Handle -> IO Bool +pipeBup params inh outh = do + p <- runProcess "bup" (toCommand params) + Nothing Nothing inh outh Nothing + ok <- waitForProcess p + case ok of + ExitSuccess -> return True + _ -> return False + +bupSplitParams :: Git.Repo -> BupRepo -> Key -> CommandParam -> Annex [CommandParam] +bupSplitParams r buprepo k src = do + o <- getConfig r "bup-split-options" "" + let os = map Param $ words o + showOutput -- make way for bup output + return $ bupParams "split" buprepo + (os ++ [Param "-n", Param (show k), src]) + +store :: Git.Repo -> BupRepo -> Key -> Annex Bool +store r buprepo k = do + src <- inRepo $ gitAnnexLocation k + params <- bupSplitParams r buprepo k (File src) + liftIO $ boolSystem "bup" params + +storeEncrypted :: Git.Repo -> BupRepo -> (Cipher, Key) -> Key -> Annex Bool +storeEncrypted r buprepo (cipher, enck) k = do + src <- inRepo $ gitAnnexLocation k + params <- bupSplitParams r buprepo enck (Param "-") + liftIO $ catchBoolIO $ + withEncryptedHandle cipher (L.readFile src) $ \h -> + pipeBup params (Just h) Nothing + +retrieve :: BupRepo -> Key -> FilePath -> Annex Bool +retrieve buprepo k f = do + let params = bupParams "join" buprepo [Param $ show k] + liftIO $ catchBoolIO $ do + tofile <- openFile f WriteMode + pipeBup params Nothing (Just tofile) + +retrieveEncrypted :: BupRepo -> (Cipher, Key) -> FilePath -> Annex Bool +retrieveEncrypted buprepo (cipher, enck) f = do + let params = bupParams "join" buprepo [Param $ show enck] + liftIO $ catchBoolIO $ do + (pid, h) <- hPipeFrom "bup" $ toCommand params + withDecryptedContent cipher (L.hGetContents h) $ L.writeFile f + forceSuccess pid + return True + +remove :: Key -> Annex Bool +remove _ = do + warning "content cannot be removed from bup remote" + return False + +{- Bup does not provide a way to tell if a given dataset is present + - in a bup repository. One way it to check if the git repository has + - a branch matching the name (as created by bup split -n). + -} +checkPresent :: Git.Repo -> Git.Repo -> Key -> Annex (Either String Bool) +checkPresent r bupr k + | Git.repoIsUrl bupr = do + showAction $ "checking " ++ Git.repoDescribe r + ok <- onBupRemote bupr boolSystem "git" params + return $ Right ok + | otherwise = liftIO $ catchMsgIO $ + boolSystem "git" $ Git.Command.gitCommandLine params bupr + where + params = + [ Params "show-ref --quiet --verify" + , Param $ "refs/heads/" ++ show k] + +{- Store UUID in the annex.uuid setting of the bup repository. -} +storeBupUUID :: UUID -> BupRepo -> Annex () +storeBupUUID u buprepo = do + r <- liftIO $ bup2GitRemote buprepo + if Git.repoIsUrl r + then do + showAction "storing uuid" + onBupRemote r boolSystem "git" + [Params $ "config annex.uuid " ++ v] + >>! error "ssh failed" + else liftIO $ do + r' <- Git.Config.read r + let olduuid = Git.Config.get "annex.uuid" "" r' + when (olduuid == "") $ + Git.Command.run "config" + [Param "annex.uuid", Param v] r' + where + v = fromUUID u + +onBupRemote :: Git.Repo -> (FilePath -> [CommandParam] -> IO a) -> FilePath -> [CommandParam] -> Annex a +onBupRemote r a command params = do + let dir = shellEscape (Git.workTree r) + sshparams <- sshToRepo r [Param $ + "cd " ++ dir ++ " && " ++ unwords (command : toCommand params)] + liftIO $ a "ssh" sshparams + +{- Allow for bup repositories on removable media by checking + - local bup repositories to see if they are available, and getting their + - uuid (which may be different from the stored uuid for the bup remote). + - + - If a bup repository is not available, returns NoUUID. + - This will cause checkPresent to indicate nothing from the bup remote + - is known to be present. + - + - Also, returns a version of the repo with config read, if it is local. + -} +getBupUUID :: Git.Repo -> UUID -> Annex (UUID, Git.Repo) +getBupUUID r u + | Git.repoIsUrl r = return (u, r) + | otherwise = liftIO $ do + ret <- try $ Git.Config.read r + case ret of + Right r' -> return (toUUID $ Git.Config.get "annex.uuid" "" r', r') + Left _ -> return (NoUUID, r) + +{- Converts a bup remote path spec into a Git.Repo. There are some + - differences in path representation between git and bup. -} +bup2GitRemote :: BupRepo -> IO Git.Repo +bup2GitRemote "" = do + -- bup -r "" operates on ~/.bup + h <- myHomeDir + Git.Construct.fromAbsPath $ h ".bup" +bup2GitRemote r + | bupLocal r = + if "/" `isPrefixOf` r + then Git.Construct.fromAbsPath r + else error "please specify an absolute path" + | otherwise = Git.Construct.fromUrl $ "ssh://" ++ host ++ slash dir + where + bits = split ":" r + host = Prelude.head bits + dir = join ":" $ drop 1 bits + -- "host:~user/dir" is not supported specially by bup; + -- "host:dir" is relative to the home directory; + -- "host:" goes in ~/.bup + slash d + | null d = "/~/.bup" + | "/" `isPrefixOf` d = d + | otherwise = "/~/" ++ d + +bupLocal :: BupRepo -> Bool +bupLocal = notElem ':' diff --git a/Remote/Directory.hs b/Remote/Directory.hs new file mode 100644 index 0000000000..7f78b2f493 --- /dev/null +++ b/Remote/Directory.hs @@ -0,0 +1,128 @@ +{- A "remote" that is just a filesystem directory. + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Directory (remote) where + +import qualified Data.ByteString.Lazy.Char8 as L +import qualified Data.Map as M + +import Common.Annex +import Utility.CopyFile +import Types.Remote +import qualified Git +import Config +import Utility.FileMode +import Remote.Helper.Special +import Remote.Helper.Encryptable +import Crypto + +remote :: RemoteType Annex +remote = RemoteType { + typename = "directory", + enumerate = findSpecialRemotes "directory", + generate = gen, + setup = directorySetup +} + +gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex (Remote Annex) +gen r u c = do + dir <- getConfig r "directory" (error "missing directory") + cst <- remoteCost r cheapRemoteCost + return $ encryptableRemote c + (storeEncrypted dir) + (retrieveEncrypted dir) + Remote { + uuid = u, + cost = cst, + name = Git.repoDescribe r, + storeKey = store dir, + retrieveKeyFile = retrieve dir, + removeKey = remove dir, + hasKey = checkPresent dir, + hasKeyCheap = True, + config = Nothing, + repo = r + } + +directorySetup :: UUID -> RemoteConfig -> Annex RemoteConfig +directorySetup u c = do + -- verify configuration is sane + let dir = fromMaybe (error "Specify directory=") $ + M.lookup "directory" c + liftIO $ doesDirectoryExist dir + >>! error $ "Directory does not exist: " ++ dir + c' <- encryptionSetup c + + -- The directory is stored in git config, not in this remote's + -- persistant state, so it can vary between hosts. + gitConfigSpecialRemote u c' "directory" dir + return $ M.delete "directory" c' + +{- Locations to try to access a given Key in the Directory. -} +locations :: FilePath -> Key -> [FilePath] +locations d k = map (d ) (keyPaths k) + +withCheckedFile :: (FilePath -> IO Bool) -> FilePath -> Key -> (FilePath -> IO Bool) -> IO Bool +withCheckedFile _ [] _ _ = return False +withCheckedFile check d k a = go $ locations d k + where + go [] = return False + go (f:fs) = do + use <- check f + if use + then a f + else go fs + +withStoredFile :: FilePath -> Key -> (FilePath -> IO Bool) -> IO Bool +withStoredFile = withCheckedFile doesFileExist + +store :: FilePath -> Key -> Annex Bool +store d k = do + src <- inRepo $ gitAnnexLocation k + liftIO $ catchBoolIO $ storeHelper d k $ copyFileExternal src + +storeEncrypted :: FilePath -> (Cipher, Key) -> Key -> Annex Bool +storeEncrypted d (cipher, enck) k = do + src <- inRepo $ gitAnnexLocation k + liftIO $ catchBoolIO $ storeHelper d enck $ encrypt src + where + encrypt src dest = do + withEncryptedContent cipher (L.readFile src) $ L.writeFile dest + return True + +storeHelper :: FilePath -> Key -> (FilePath -> IO Bool) -> IO Bool +storeHelper d key a = do + let dest = Prelude.head $ locations d key + let dir = parentDir dest + createDirectoryIfMissing True dir + allowWrite dir + ok <- a dest + when ok $ do + preventWrite dest + preventWrite dir + return ok + +retrieve :: FilePath -> Key -> FilePath -> Annex Bool +retrieve d k f = liftIO $ withStoredFile d k $ \file -> copyFileExternal file f + +retrieveEncrypted :: FilePath -> (Cipher, Key) -> FilePath -> Annex Bool +retrieveEncrypted d (cipher, enck) f = + liftIO $ withStoredFile d enck $ \file -> catchBoolIO $ do + withDecryptedContent cipher (L.readFile file) $ L.writeFile f + return True + +remove :: FilePath -> Key -> Annex Bool +remove d k = liftIO $ withStoredFile d k $ \file -> catchBoolIO $ do + let dir = parentDir file + allowWrite dir + removeFile file + removeDirectory dir + return True + +checkPresent :: FilePath -> Key -> Annex (Either String Bool) +checkPresent d k = liftIO $ catchMsgIO $ withStoredFile d k $ + const $ return True -- withStoredFile checked that it exists diff --git a/Remote/Git.hs b/Remote/Git.hs new file mode 100644 index 0000000000..e527fa4fee --- /dev/null +++ b/Remote/Git.hs @@ -0,0 +1,277 @@ +{- Standard git remotes. + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Git (remote) where + +import Control.Exception.Extensible +import qualified Data.Map as M + +import Common.Annex +import Utility.CopyFile +import Utility.RsyncFile +import Annex.Ssh +import Types.Remote +import qualified Git +import qualified Git.Command +import qualified Git.Config +import qualified Git.Construct +import qualified Annex +import Annex.UUID +import qualified Annex.Content +import qualified Annex.BranchState +import qualified Utility.Url as Url +import Utility.TempFile +import Config +import Init + +remote :: RemoteType Annex +remote = RemoteType { + typename = "git", + enumerate = list, + generate = gen, + setup = error "not supported" +} + +list :: Annex [Git.Repo] +list = do + c <- fromRepo Git.config + mapM (tweakurl c) =<< fromRepo Git.remotes + where + annexurl n = "remote." ++ n ++ ".annexurl" + tweakurl c r = do + let n = fromJust $ Git.remoteName r + case M.lookup (annexurl n) c of + Nothing -> return r + Just url -> inRepo $ \g -> + Git.Construct.remoteNamed n $ + Git.Construct.fromRemoteLocation url g + +gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex (Remote Annex) +gen r u _ = do + {- It's assumed to be cheap to read the config of non-URL remotes, + - so this is done each time git-annex is run. Conversely, + - the config of an URL remote is only read when there is no + - cached UUID value. -} + let cheap = not $ Git.repoIsUrl r + notignored <- repoNotIgnored r + r' <- case (cheap, notignored, u) of + (_, False, _) -> return r + (True, _, _) -> tryGitConfigRead r + (False, _, NoUUID) -> tryGitConfigRead r + _ -> return r + + u' <- getRepoUUID r' + + let defcst = if cheap then cheapRemoteCost else expensiveRemoteCost + cst <- remoteCost r' defcst + + return Remote { + uuid = u', + cost = cst, + name = Git.repoDescribe r', + storeKey = copyToRemote r', + retrieveKeyFile = copyFromRemote r', + removeKey = dropKey r', + hasKey = inAnnex r', + hasKeyCheap = cheap, + config = Nothing, + repo = r' + } + +{- Tries to read the config for a specified remote, updates state, and + - returns the updated repo. -} +tryGitConfigRead :: Git.Repo -> Annex Git.Repo +tryGitConfigRead r + | not $ M.null $ Git.config r = return r -- already read + | Git.repoIsSsh r = store $ onRemote r (pipedconfig, r) "configlist" [] + | Git.repoIsHttp r = store $ safely geturlconfig + | Git.repoIsUrl r = return r + | otherwise = store $ safely $ onLocal r $ do + ensureInitialized + Annex.getState Annex.repo + where + -- Reading config can fail due to IO error or + -- for other reasons; catch all possible exceptions. + safely a = do + result <- liftIO (try a :: IO (Either SomeException Git.Repo)) + case result of + Left _ -> return r + Right r' -> return r' + + pipedconfig cmd params = safely $ + pOpen ReadFromPipe cmd (toCommand params) $ + Git.Config.hRead r + + geturlconfig = do + s <- Url.get (Git.repoLocation r ++ "/config") + withTempFile "git-annex.tmp" $ \tmpfile h -> do + hPutStr h s + hClose h + pOpen ReadFromPipe "git" ["config", "--null", "--list", "--file", tmpfile] $ + Git.Config.hRead r + + store a = do + r' <- a + g <- gitRepo + let l = Git.remotes g + let g' = g { Git.remotes = exchange l r' } + Annex.changeState $ \s -> s { Annex.repo = g' } + return r' + + exchange [] _ = [] + exchange (old:ls) new = + if Git.remoteName old == Git.remoteName new + then new : exchange ls new + else old : exchange ls new + +{- Checks if a given remote has the content for a key inAnnex. + - If the remote cannot be accessed, or if it cannot determine + - whether it has the content, returns a Left error message. + -} +inAnnex :: Git.Repo -> Key -> Annex (Either String Bool) +inAnnex r key + | Git.repoIsHttp r = checkhttp + | Git.repoIsUrl r = checkremote + | otherwise = checklocal + where + checkhttp = liftIO $ go undefined $ keyUrls r key + where + go e [] = return $ Left e + go _ (u:us) = do + res <- catchMsgIO $ Url.exists u + case res of + Left e -> go e us + v -> return v + checkremote = do + showAction $ "checking " ++ Git.repoDescribe r + onRemote r (check, unknown) "inannex" [Param (show key)] + where + check c p = dispatch <$> safeSystem c p + dispatch ExitSuccess = Right True + dispatch (ExitFailure 1) = Right False + dispatch _ = unknown + checklocal = dispatch <$> check + where + check = liftIO $ catchMsgIO $ onLocal r $ + Annex.Content.inAnnexSafe key + dispatch (Left e) = Left e + dispatch (Right (Just b)) = Right b + dispatch (Right Nothing) = unknown + unknown = Left $ "unable to check " ++ Git.repoDescribe r + +{- Runs an action on a local repository inexpensively, by making an annex + - monad using that repository. -} +onLocal :: Git.Repo -> Annex a -> IO a +onLocal r a = do + -- Avoid re-reading the repository's configuration if it was + -- already read. + state <- if M.null $ Git.config r + then Annex.new r + else return $ Annex.newState r + Annex.eval state $ do + -- No need to update the branch; its data is not used + -- for anything onLocal is used to do. + Annex.BranchState.disableUpdate + ret <- a + liftIO Git.Command.reap + return ret + +keyUrls :: Git.Repo -> Key -> [String] +keyUrls r key = map tourl (annexLocations key) + where + tourl l = Git.repoLocation r ++ "/" ++ l + +dropKey :: Git.Repo -> Key -> Annex Bool +dropKey r key + | Git.repoIsHttp r = error "dropping from http repo not supported" + | otherwise = onRemote r (boolSystem, False) "dropkey" + [ Params "--quiet --force" + , Param $ show key + ] + +{- Tries to copy a key's content from a remote's annex to a file. -} +copyFromRemote :: Git.Repo -> Key -> FilePath -> Annex Bool +copyFromRemote r key file + | not $ Git.repoIsUrl r = do + params <- rsyncParams r + loc <- liftIO $ gitAnnexLocation key r + rsyncOrCopyFile params loc file + | Git.repoIsSsh r = rsyncHelper =<< rsyncParamsRemote r True key file + | Git.repoIsHttp r = liftIO $ downloadurls $ keyUrls r key + | otherwise = error "copying from non-ssh, non-http repo not supported" + where + downloadurls us = untilTrue us $ \u -> Url.download u file + +{- Tries to copy a key's content to a remote's annex. -} +copyToRemote :: Git.Repo -> Key -> Annex Bool +copyToRemote r key + | not $ Git.repoIsUrl r = do + keysrc <- inRepo $ gitAnnexLocation key + params <- rsyncParams r + -- run copy from perspective of remote + liftIO $ onLocal r $ do + ensureInitialized + ok <- Annex.Content.getViaTmp key $ + rsyncOrCopyFile params keysrc + Annex.Content.saveState + return ok + | Git.repoIsSsh r = do + keysrc <- inRepo $ gitAnnexLocation key + rsyncHelper =<< rsyncParamsRemote r False key keysrc + | otherwise = error "copying to non-ssh repo not supported" + +rsyncHelper :: [CommandParam] -> Annex Bool +rsyncHelper p = do + showOutput -- make way for progress bar + res <- liftIO $ rsync p + if res + then return res + else do + showLongNote "rsync failed -- run git annex again to resume file transfer" + return res + +{- Copys a file with rsync unless both locations are on the same + - filesystem. Then cp could be faster. -} +rsyncOrCopyFile :: [CommandParam] -> FilePath -> FilePath -> Annex Bool +rsyncOrCopyFile rsyncparams src dest = do + ss <- liftIO $ getFileStatus $ parentDir src + ds <- liftIO $ getFileStatus $ parentDir dest + if deviceID ss == deviceID ds + then liftIO $ copyFileExternal src dest + else rsyncHelper $ rsyncparams ++ [Param src, Param dest] + +{- Generates rsync parameters that ssh to the remote and asks it + - to either receive or send the key's content. -} +rsyncParamsRemote :: Git.Repo -> Bool -> Key -> FilePath -> Annex [CommandParam] +rsyncParamsRemote r sending key file = do + Just (shellcmd, shellparams) <- git_annex_shell r + (if sending then "sendkey" else "recvkey") + [ Param $ show key + -- Command is terminated with "--", because + -- rsync will tack on its own options afterwards, + -- and they need to be ignored. + , Param "--" + ] + -- Convert the ssh command into rsync command line. + let eparam = rsyncShell (Param shellcmd:shellparams) + o <- rsyncParams r + if sending + then return $ o ++ eparam ++ [dummy, File file] + else return $ o ++ eparam ++ [File file, dummy] + where + -- the rsync shell parameter controls where rsync + -- goes, so the source/dest parameter can be a dummy value, + -- that just enables remote rsync mode. + dummy = Param ":" + +rsyncParams :: Git.Repo -> Annex [CommandParam] +rsyncParams r = do + o <- getConfig r "rsync-options" "" + return $ options ++ map Param (words o) + where + -- --inplace to resume partial files + options = [Params "-p --progress --inplace"] diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs new file mode 100644 index 0000000000..99f48fe7b0 --- /dev/null +++ b/Remote/Helper/Encryptable.hs @@ -0,0 +1,86 @@ +{- common functions for encryptable remotes + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Helper.Encryptable where + +import qualified Data.Map as M + +import Common.Annex +import Types.Remote +import Crypto +import qualified Annex +import Config + +{- Encryption setup for a remote. The user must specify whether to use + - an encryption key, or not encrypt. An encrypted cipher is created, or is + - updated to be accessible to an additional encryption key. -} +encryptionSetup :: RemoteConfig -> Annex RemoteConfig +encryptionSetup c = + case (M.lookup "encryption" c, extractCipher c) of + (Nothing, Nothing) -> error "Specify encryption=key or encryption=none" + (Just "none", Nothing) -> return c + (Just "none", Just _) -> error "Cannot change encryption type of existing remote." + (Nothing, Just _) -> return c + (Just _, Nothing) -> use "encryption setup" $ genCipher c + (Just _, Just v) -> use "encryption updated" $ updateCipher c v + where + use m a = do + cipher <- liftIO a + showNote $ m ++ " " ++ describeCipher cipher + return $ M.delete "encryption" $ storeCipher c cipher + +{- Modifies a Remote to support encryption. + - + - Two additional functions must be provided by the remote, + - to support storing and retrieving encrypted content. -} +encryptableRemote + :: Maybe RemoteConfig + -> ((Cipher, Key) -> Key -> Annex Bool) + -> ((Cipher, Key) -> FilePath -> Annex Bool) + -> Remote Annex + -> Remote Annex +encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r = + r { + storeKey = store, + retrieveKeyFile = retrieve, + removeKey = withkey $ removeKey r, + hasKey = withkey $ hasKey r, + cost = cost r + encryptedRemoteCostAdj + } + where + store k = cip k >>= maybe + (storeKey r k) + (`storeKeyEncrypted` k) + retrieve k f = cip k >>= maybe + (retrieveKeyFile r k f) + (`retrieveKeyFileEncrypted` f) + withkey a k = cip k >>= maybe (a k) (a . snd) + cip = cipherKey c + +{- Gets encryption Cipher. The decrypted Ciphers are cached in the Annex + - state. -} +remoteCipher :: RemoteConfig -> Annex (Maybe Cipher) +remoteCipher c = go $ extractCipher c + where + go Nothing = return Nothing + go (Just encipher) = do + cache <- Annex.getState Annex.ciphers + case M.lookup encipher cache of + Just cipher -> return $ Just cipher + Nothing -> decrypt encipher cache + decrypt encipher cache = do + showNote "gpg" + cipher <- liftIO $ decryptCipher c encipher + Annex.changeState (\s -> s { Annex.ciphers = M.insert encipher cipher cache }) + return $ Just cipher + +{- Gets encryption Cipher, and encrypted version of Key. -} +cipherKey :: Maybe RemoteConfig -> Key -> Annex (Maybe (Cipher, Key)) +cipherKey Nothing _ = return Nothing +cipherKey (Just c) k = maybe Nothing encrypt <$> remoteCipher c + where + encrypt ciphertext = Just (ciphertext, encryptKey ciphertext k) diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs new file mode 100644 index 0000000000..3f6c9c155f --- /dev/null +++ b/Remote/Helper/Special.hs @@ -0,0 +1,40 @@ +{- common functions for special remotes + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Helper.Special where + +import qualified Data.Map as M + +import Common.Annex +import Types.Remote +import qualified Git +import qualified Git.Command +import qualified Git.Construct + +{- Special remotes don't have a configured url, so Git.Repo does not + - automatically generate remotes for them. This looks for a different + - configuration key instead. + -} +findSpecialRemotes :: String -> Annex [Git.Repo] +findSpecialRemotes s = do + m <- fromRepo Git.config + liftIO $ mapM construct $ remotepairs m + where + remotepairs = M.toList . M.filterWithKey match + construct (k,_) = Git.Construct.remoteNamedFromKey k Git.Construct.fromUnknown + match k _ = startswith "remote." k && endswith (".annex-"++s) k + +{- Sets up configuration for a special remote in .git/config. -} +gitConfigSpecialRemote :: UUID -> RemoteConfig -> String -> String -> Annex () +gitConfigSpecialRemote u c k v = do + set ("annex-"++k) v + set ("annex-uuid") (fromUUID u) + where + set a b = inRepo $ Git.Command.run "config" + [Param (configsetting a), Param b] + remotename = fromJust (M.lookup "name" c) + configsetting s = "remote." ++ remotename ++ "." ++ s diff --git a/Remote/Hook.hs b/Remote/Hook.hs new file mode 100644 index 0000000000..5c761f43b0 --- /dev/null +++ b/Remote/Hook.hs @@ -0,0 +1,142 @@ +{- A remote that provides hooks to run shell commands. + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Hook (remote) where + +import qualified Data.ByteString.Lazy.Char8 as L +import qualified Data.Map as M +import System.Exit + +import Common.Annex +import Types.Remote +import qualified Git +import Config +import Annex.Content +import Remote.Helper.Special +import Remote.Helper.Encryptable +import Crypto + +remote :: RemoteType Annex +remote = RemoteType { + typename = "hook", + enumerate = findSpecialRemotes "hooktype", + generate = gen, + setup = hookSetup +} + +gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex (Remote Annex) +gen r u c = do + hooktype <- getConfig r "hooktype" (error "missing hooktype") + cst <- remoteCost r expensiveRemoteCost + return $ encryptableRemote c + (storeEncrypted hooktype) + (retrieveEncrypted hooktype) + Remote { + uuid = u, + cost = cst, + name = Git.repoDescribe r, + storeKey = store hooktype, + retrieveKeyFile = retrieve hooktype, + removeKey = remove hooktype, + hasKey = checkPresent r hooktype, + hasKeyCheap = False, + config = Nothing, + repo = r + } + +hookSetup :: UUID -> RemoteConfig -> Annex RemoteConfig +hookSetup u c = do + let hooktype = fromMaybe (error "Specify hooktype=") $ + M.lookup "hooktype" c + c' <- encryptionSetup c + gitConfigSpecialRemote u c' "hooktype" hooktype + return c' + +hookEnv :: Key -> Maybe FilePath -> Maybe [(String, String)] +hookEnv k f = Just $ fileenv f ++ keyenv + where + env s v = ("ANNEX_" ++ s, v) + keyenv = + [ env "KEY" (show k) + , env "HASH_1" (hashbits !! 0) + , env "HASH_2" (hashbits !! 1) + ] + fileenv Nothing = [] + fileenv (Just file) = [env "FILE" file] + hashbits = map takeDirectory $ splitPath $ hashDirMixed k + +lookupHook :: String -> String -> Annex (Maybe String) +lookupHook hooktype hook =do + g <- gitRepo + command <- getConfig g hookname "" + if null command + then do + warning $ "missing configuration for " ++ hookname + return Nothing + else return $ Just command + where + hookname = hooktype ++ "-" ++ hook ++ "-hook" + +runHook :: String -> String -> Key -> Maybe FilePath -> Annex Bool -> Annex Bool +runHook hooktype hook k f a = maybe (return False) run =<< lookupHook hooktype hook + where + run command = do + showOutput -- make way for hook output + res <- liftIO $ boolSystemEnv + "sh" [Param "-c", Param command] $ hookEnv k f + if res + then a + else do + warning $ hook ++ " hook exited nonzero!" + return res + +store :: String -> Key -> Annex Bool +store h k = do + src <- inRepo $ gitAnnexLocation k + runHook h "store" k (Just src) $ return True + +storeEncrypted :: String -> (Cipher, Key) -> Key -> Annex Bool +storeEncrypted h (cipher, enck) k = withTmp enck $ \tmp -> do + src <- inRepo $ gitAnnexLocation k + liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp + runHook h "store" enck (Just tmp) $ return True + +retrieve :: String -> Key -> FilePath -> Annex Bool +retrieve h k f = runHook h "retrieve" k (Just f) $ return True + +retrieveEncrypted :: String -> (Cipher, Key) -> FilePath -> Annex Bool +retrieveEncrypted h (cipher, enck) f = withTmp enck $ \tmp -> + runHook h "retrieve" enck (Just tmp) $ liftIO $ catchBoolIO $ do + withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f + return True + +remove :: String -> Key -> Annex Bool +remove h k = runHook h "remove" k Nothing $ return True + +checkPresent :: Git.Repo -> String -> Key -> Annex (Either String Bool) +checkPresent r h k = do + showAction $ "checking " ++ Git.repoDescribe r + v <- lookupHook h "checkpresent" + liftIO $ catchMsgIO $ check v + where + findkey s = show k `elem` lines s + env = hookEnv k Nothing + check Nothing = error "checkpresent hook misconfigured" + check (Just hook) = do + (frompipe, topipe) <- createPipe + pid <- forkProcess $ do + _ <- dupTo topipe stdOutput + closeFd frompipe + executeFile "sh" True ["-c", hook] env + closeFd topipe + fromh <- fdToHandle frompipe + reply <- hGetContentsStrict fromh + hClose fromh + s <- getProcessStatus True False pid + case s of + Just (Exited ExitSuccess) -> return $ findkey reply + _ -> error "checkpresent hook failed" diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs new file mode 100644 index 0000000000..68566c52a5 --- /dev/null +++ b/Remote/Rsync.hs @@ -0,0 +1,207 @@ +{- A remote that is only accessible by rsync. + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Rsync (remote) where + +import qualified Data.ByteString.Lazy.Char8 as L +import qualified Data.Map as M + +import Common.Annex +import Types.Remote +import qualified Git +import Config +import Annex.Content +import Remote.Helper.Special +import Remote.Helper.Encryptable +import Crypto +import Utility.RsyncFile + +type RsyncUrl = String + +data RsyncOpts = RsyncOpts { + rsyncUrl :: RsyncUrl, + rsyncOptions :: [CommandParam] +} + +remote :: RemoteType Annex +remote = RemoteType { + typename = "rsync", + enumerate = findSpecialRemotes "rsyncurl", + generate = gen, + setup = rsyncSetup +} + +gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex (Remote Annex) +gen r u c = do + o <- genRsyncOpts r + cst <- remoteCost r expensiveRemoteCost + return $ encryptableRemote c + (storeEncrypted o) + (retrieveEncrypted o) + Remote { + uuid = u, + cost = cst, + name = Git.repoDescribe r, + storeKey = store o, + retrieveKeyFile = retrieve o, + removeKey = remove o, + hasKey = checkPresent r o, + hasKeyCheap = False, + config = Nothing, + repo = r + } + +genRsyncOpts :: Git.Repo -> Annex RsyncOpts +genRsyncOpts r = do + url <- getConfig r "rsyncurl" (error "missing rsyncurl") + opts <- getConfig r "rsync-options" "" + return $ RsyncOpts url $ map Param $ filter safe $ words opts + where + safe o + -- Don't allow user to pass --delete to rsync; + -- that could cause it to delete other keys + -- in the same hash bucket as a key it sends. + | o == "--delete" = False + | o == "--delete-excluded" = False + | otherwise = True + +rsyncSetup :: UUID -> RemoteConfig -> Annex RemoteConfig +rsyncSetup u c = do + -- verify configuration is sane + let url = fromMaybe (error "Specify rsyncurl=") $ + M.lookup "rsyncurl" c + c' <- encryptionSetup c + + -- The rsyncurl is stored in git config, not only in this remote's + -- persistant state, so it can vary between hosts. + gitConfigSpecialRemote u c' "rsyncurl" url + return c' + +rsyncEscape :: RsyncOpts -> String -> String +rsyncEscape o s + | rsyncUrlIsShell (rsyncUrl o) = shellEscape s + | otherwise = s + +rsyncUrls :: RsyncOpts -> Key -> [String] +rsyncUrls o k = map use annexHashes + where + use h = rsyncUrl o h k rsyncEscape o (f f) + f = keyFile k + +store :: RsyncOpts -> Key -> Annex Bool +store o k = rsyncSend o k =<< inRepo (gitAnnexLocation k) + +storeEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> Annex Bool +storeEncrypted o (cipher, enck) k = withTmp enck $ \tmp -> do + src <- inRepo $ gitAnnexLocation k + liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp + rsyncSend o enck tmp + +retrieve :: RsyncOpts -> Key -> FilePath -> Annex Bool +retrieve o k f = untilTrue (rsyncUrls o k) $ \u -> + rsyncRemote o + -- use inplace when retrieving to support resuming + [ Param "--inplace" + , Param u + , Param f + ] + +retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> FilePath -> Annex Bool +retrieveEncrypted o (cipher, enck) f = withTmp enck $ \tmp -> do + res <- retrieve o enck tmp + if res + then liftIO $ catchBoolIO $ do + withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f + return True + else return res + +remove :: RsyncOpts -> Key -> Annex Bool +remove o k = withRsyncScratchDir $ \tmp -> liftIO $ do + {- Send an empty directory to rysnc to make it delete. -} + let dummy = tmp keyFile k + createDirectoryIfMissing True dummy + rsync $ rsyncOptions o ++ + map (\s -> Param $ "--include=" ++ s) includes ++ + [ Param "--exclude=*" -- exclude everything else + , Params "--quiet --delete --recursive" + , partialParams + , Param $ addTrailingPathSeparator dummy + , Param $ rsyncUrl o + ] + where + {- Specify include rules to match the directories where the + - content could be. Note that the parent directories have + - to also be explicitly included, due to how rsync + - traverses directories. -} + includes = concatMap use annexHashes + use h = let dir = h k in + [ parentDir dir + , dir + -- match content directory and anything in it + , dir keyFile k "***" + ] + +checkPresent :: Git.Repo -> RsyncOpts -> Key -> Annex (Either String Bool) +checkPresent r o k = do + showAction $ "checking " ++ Git.repoDescribe r + -- note: Does not currently differentiate between rsync failing + -- to connect, and the file not being present. + Right <$> check + where + check = untilTrue (rsyncUrls o k) $ \u -> + liftIO $ boolSystem "sh" [Param "-c", Param (cmd u)] + cmd u = "rsync --quiet " ++ shellEscape u ++ " 2>/dev/null" + +{- Rsync params to enable resumes of sending files safely, + - ensure that files are only moved into place once complete + -} +partialParams :: CommandParam +partialParams = Params "--no-inplace --partial --partial-dir=.rsync-partial" + +{- Runs an action in an empty scratch directory that can be used to build + - up trees for rsync. -} +withRsyncScratchDir :: (FilePath -> Annex Bool) -> Annex Bool +withRsyncScratchDir a = do + pid <- liftIO getProcessID + t <- fromRepo gitAnnexTmpDir + let tmp = t "rsynctmp" show pid + nuke tmp + liftIO $ createDirectoryIfMissing True tmp + res <- a tmp + nuke tmp + return res + where + nuke d = liftIO $ + doesDirectoryExist d >>? removeDirectoryRecursive d + +rsyncRemote :: RsyncOpts -> [CommandParam] -> Annex Bool +rsyncRemote o params = do + showOutput -- make way for progress bar + res <- liftIO $ rsync $ rsyncOptions o ++ defaultParams ++ params + if res + then return res + else do + showLongNote "rsync failed -- run git annex again to resume file transfer" + return res + where + defaultParams = [Params "--progress"] + +{- To send a single key is slightly tricky; need to build up a temporary + directory structure to pass to rsync so it can create the hash + directories. -} +rsyncSend :: RsyncOpts -> Key -> FilePath -> Annex Bool +rsyncSend o k src = withRsyncScratchDir $ \tmp -> do + let dest = tmp Prelude.head (keyPaths k) + liftIO $ createDirectoryIfMissing True $ parentDir dest + liftIO $ createLink src dest + rsyncRemote o + [ Param "--recursive" + , partialParams + -- tmp/ to send contents of tmp dir + , Param $ addTrailingPathSeparator tmp + , Param $ rsyncUrl o + ] diff --git a/Remote/S3real.hs b/Remote/S3real.hs new file mode 100644 index 0000000000..b79939b902 --- /dev/null +++ b/Remote/S3real.hs @@ -0,0 +1,310 @@ +{- Amazon S3 remotes. + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.S3 (remote) where + +import Network.AWS.AWSConnection +import Network.AWS.S3Object +import Network.AWS.S3Bucket hiding (size) +import Network.AWS.AWSResult +import qualified Data.ByteString.Lazy.Char8 as L +import qualified Data.Map as M +import Data.Char +import System.Environment +import System.Posix.Env (setEnv) + +import Common.Annex +import Types.Remote +import Types.Key +import qualified Git +import Config +import Remote.Helper.Special +import Remote.Helper.Encryptable +import Crypto +import Annex.Content +import Utility.Base64 + +remote :: RemoteType Annex +remote = RemoteType { + typename = "S3", + enumerate = findSpecialRemotes "s3", + generate = gen, + setup = s3Setup +} + +gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex (Remote Annex) +gen r u c = do + cst <- remoteCost r expensiveRemoteCost + return $ gen' r u c cst +gen' :: Git.Repo -> UUID -> Maybe RemoteConfig -> Int -> Remote Annex +gen' r u c cst = + encryptableRemote c + (storeEncrypted this) + (retrieveEncrypted this) + this + where + this = Remote { + uuid = u, + cost = cst, + name = Git.repoDescribe r, + storeKey = store this, + retrieveKeyFile = retrieve this, + removeKey = remove this, + hasKey = checkPresent this, + hasKeyCheap = False, + config = c, + repo = r + } + +s3Setup :: UUID -> RemoteConfig -> Annex RemoteConfig +s3Setup u c = handlehost $ M.lookup "host" c + where + remotename = fromJust (M.lookup "name" c) + defbucket = remotename ++ "-" ++ fromUUID u + defaults = M.fromList + [ ("datacenter", "US") + , ("storageclass", "STANDARD") + , ("host", defaultAmazonS3Host) + , ("port", show defaultAmazonS3Port) + , ("bucket", defbucket) + ] + + handlehost Nothing = defaulthost + handlehost (Just h) + | ".archive.org" `isSuffixOf` map toLower h = archiveorg + | otherwise = defaulthost + + use fullconfig = do + gitConfigSpecialRemote u fullconfig "s3" "true" + s3SetCreds fullconfig + + defaulthost = do + c' <- encryptionSetup c + let fullconfig = c' `M.union` defaults + genBucket fullconfig + use fullconfig + + archiveorg = do + showNote "Internet Archive mode" + maybe (error "specify bucket=") (const $ return ()) $ + M.lookup "bucket" archiveconfig + use archiveconfig + where + archiveconfig = + -- hS3 does not pass through + -- x-archive-* headers + M.mapKeys (replace "x-archive-" "x-amz-") $ + -- encryption does not make sense here + M.insert "encryption" "none" $ + M.union c $ + -- special constraints on key names + M.insert "mungekeys" "ia" $ + -- bucket created only when files + -- are uploaded + M.insert "x-amz-auto-make-bucket" "1" $ + -- no default bucket name; should + -- be human-readable + M.delete "bucket" defaults + +store :: Remote Annex -> Key -> Annex Bool +store r k = s3Action r False $ \(conn, bucket) -> do + dest <- inRepo $ gitAnnexLocation k + res <- liftIO $ storeHelper (conn, bucket) r k dest + s3Bool res + +storeEncrypted :: Remote Annex -> (Cipher, Key) -> Key -> Annex Bool +storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) -> + -- To get file size of the encrypted content, have to use a temp file. + -- (An alternative would be chunking to to a constant size.) + withTmp enck $ \tmp -> do + f <- inRepo $ gitAnnexLocation k + liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s + res <- liftIO $ storeHelper (conn, bucket) r enck tmp + s3Bool res + +storeHelper :: (AWSConnection, String) -> Remote Annex -> Key -> FilePath -> IO (AWSResult ()) +storeHelper (conn, bucket) r k file = do + content <- liftIO $ L.readFile file + -- size is provided to S3 so the whole content does not need to be + -- buffered to calculate it + size <- maybe getsize (return . fromIntegral) $ keySize k + let object = setStorageClass storageclass $ + S3Object bucket (bucketFile r k) "" + (("Content-Length", show size) : xheaders) content + sendObject conn object + where + storageclass = + case fromJust $ M.lookup "storageclass" $ fromJust $ config r of + "REDUCED_REDUNDANCY" -> REDUCED_REDUNDANCY + _ -> STANDARD + getsize = do + s <- liftIO $ getFileStatus file + return $ fileSize s + + xheaders = filter isxheader $ M.assocs $ fromJust $ config r + isxheader (h, _) = "x-amz-" `isPrefixOf` h + +retrieve :: Remote Annex -> Key -> FilePath -> Annex Bool +retrieve r k f = s3Action r False $ \(conn, bucket) -> do + res <- liftIO $ getObject conn $ bucketKey r bucket k + case res of + Right o -> do + liftIO $ L.writeFile f $ obj_data o + return True + Left e -> s3Warning e + +retrieveEncrypted :: Remote Annex -> (Cipher, Key) -> FilePath -> Annex Bool +retrieveEncrypted r (cipher, enck) f = s3Action r False $ \(conn, bucket) -> do + res <- liftIO $ getObject conn $ bucketKey r bucket enck + case res of + Right o -> liftIO $ + withDecryptedContent cipher (return $ obj_data o) $ \content -> do + L.writeFile f content + return True + Left e -> s3Warning e + +remove :: Remote Annex -> Key -> Annex Bool +remove r k = s3Action r False $ \(conn, bucket) -> do + res <- liftIO $ deleteObject conn $ bucketKey r bucket k + s3Bool res + +checkPresent :: Remote Annex -> Key -> Annex (Either String Bool) +checkPresent r k = s3Action r noconn $ \(conn, bucket) -> do + showAction $ "checking " ++ name r + res <- liftIO $ getObjectInfo conn $ bucketKey r bucket k + case res of + Right _ -> return $ Right True + Left (AWSError _ _) -> return $ Right False + Left e -> return $ Left (s3Error e) + where + noconn = Left $ error "S3 not configured" + +s3Warning :: ReqError -> Annex Bool +s3Warning e = do + warning $ prettyReqError e + return False + +s3Error :: ReqError -> a +s3Error e = error $ prettyReqError e + +s3Bool :: AWSResult () -> Annex Bool +s3Bool (Right _) = return True +s3Bool (Left e) = s3Warning e + +s3Action :: Remote Annex -> a -> ((AWSConnection, String) -> Annex a) -> Annex a +s3Action r noconn action = do + when (isNothing $ config r) $ + error $ "Missing configuration for special remote " ++ name r + let bucket = M.lookup "bucket" $ fromJust $ config r + conn <- s3Connection $ fromJust $ config r + case (bucket, conn) of + (Just b, Just c) -> action (c, b) + _ -> return noconn + +bucketFile :: Remote Annex -> Key -> FilePath +bucketFile r = munge . show + where + munge s = case M.lookup "mungekeys" $ fromJust $ config r of + Just "ia" -> iaMunge s + _ -> s + +bucketKey :: Remote Annex -> String -> Key -> S3Object +bucketKey r bucket k = S3Object bucket (bucketFile r k) "" [] L.empty + +{- Internet Archive limits filenames to a subset of ascii, + - with no whitespace. Other characters are xml entity + - encoded. -} +iaMunge :: String -> String +iaMunge = (>>= munge) + where + munge c + | isAsciiUpper c || isAsciiLower c || isNumber c = [c] + | c `elem` "_-.\"" = [c] + | isSpace c = [] + | otherwise = "&" ++ show (ord c) ++ ";" + +genBucket :: RemoteConfig -> Annex () +genBucket c = do + conn <- s3ConnectionRequired c + showAction "checking bucket" + loc <- liftIO $ getBucketLocation conn bucket + case loc of + Right _ -> return () + Left err@(NetworkError _) -> s3Error err + Left (AWSError _ _) -> do + showAction $ "creating bucket in " ++ datacenter + res <- liftIO $ createBucketIn conn bucket datacenter + case res of + Right _ -> return () + Left err -> s3Error err + where + bucket = fromJust $ M.lookup "bucket" c + datacenter = fromJust $ M.lookup "datacenter" c + +s3ConnectionRequired :: RemoteConfig -> Annex AWSConnection +s3ConnectionRequired c = + maybe (error "Cannot connect to S3") return =<< s3Connection c + +s3Connection :: RemoteConfig -> Annex (Maybe AWSConnection) +s3Connection c = do + creds <- s3GetCreds c + case creds of + Just (ak, sk) -> return $ Just $ AWSConnection host port ak sk + _ -> do + warning $ "Set both " ++ s3AccessKey ++ " and " ++ s3SecretKey ++ " to use S3" + return Nothing + where + host = fromJust $ M.lookup "host" c + port = let s = fromJust $ M.lookup "port" c in + case reads s of + [(p, _)] -> p + _ -> error $ "bad S3 port value: " ++ s + +{- S3 creds come from the environment if set. + - Otherwise, might be stored encrypted in the remote's config. -} +s3GetCreds :: RemoteConfig -> Annex (Maybe (String, String)) +s3GetCreds c = do + ak <- getEnvKey s3AccessKey + sk <- getEnvKey s3SecretKey + if null ak || null sk + then do + mcipher <- remoteCipher c + case (M.lookup "s3creds" c, mcipher) of + (Just encrypted, Just cipher) -> do + s <- liftIO $ withDecryptedContent cipher + (return $ L.pack $ fromB64 encrypted) + (return . L.unpack) + let [ak', sk', _rest] = lines s + liftIO $ do + setEnv s3AccessKey ak True + setEnv s3SecretKey sk True + return $ Just (ak', sk') + _ -> return Nothing + else return $ Just (ak, sk) + where + getEnvKey s = liftIO $ catchDefaultIO (getEnv s) "" + +{- Stores S3 creds encrypted in the remote's config if possible. -} +s3SetCreds :: RemoteConfig -> Annex RemoteConfig +s3SetCreds c = do + creds <- s3GetCreds c + case creds of + Just (ak, sk) -> do + mcipher <- remoteCipher c + case mcipher of + Just cipher -> do + s <- liftIO $ withEncryptedContent cipher + (return $ L.pack $ unlines [ak, sk]) + (return . L.unpack) + return $ M.insert "s3creds" (toB64 s) c + Nothing -> return c + _ -> return c + +s3AccessKey :: String +s3AccessKey = "AWS_ACCESS_KEY_ID" +s3SecretKey :: String +s3SecretKey = "AWS_SECRET_ACCESS_KEY" diff --git a/Remote/S3stub.hs b/Remote/S3stub.hs new file mode 100644 index 0000000000..d91a222e86 --- /dev/null +++ b/Remote/S3stub.hs @@ -0,0 +1,13 @@ +-- stub for when hS3 is not available +module Remote.S3 (remote) where + +import Types.Remote +import Types + +remote :: RemoteType Annex +remote = RemoteType { + typename = "S3", + enumerate = return [], + generate = error "S3 not enabled", + setup = error "S3 not enabled" +} diff --git a/Remote/Web.hs b/Remote/Web.hs new file mode 100644 index 0000000000..e31539f885 --- /dev/null +++ b/Remote/Web.hs @@ -0,0 +1,78 @@ +{- Web remotes. + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Web (remote) where + +import Common.Annex +import Types.Remote +import qualified Git +import qualified Git.Construct +import Config +import Logs.Web +import qualified Utility.Url as Url + +remote :: RemoteType Annex +remote = RemoteType { + typename = "web", + enumerate = list, + generate = gen, + setup = error "not supported" +} + +-- There is only one web remote, and it always exists. +-- (If the web should cease to exist, remove this module and redistribute +-- a new release to the survivors by carrier pigeon.) +list :: Annex [Git.Repo] +list = do + r <- liftIO $ Git.Construct.remoteNamed "web" Git.Construct.fromUnknown + return [r] + +gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex (Remote Annex) +gen r _ _ = + return Remote { + uuid = webUUID, + cost = expensiveRemoteCost, + name = Git.repoDescribe r, + storeKey = uploadKey, + retrieveKeyFile = downloadKey, + removeKey = dropKey, + hasKey = checkKey, + hasKeyCheap = False, + config = Nothing, + repo = r + } + +downloadKey :: Key -> FilePath -> Annex Bool +downloadKey key file = get =<< getUrls key + where + get [] = do + warning "no known url" + return False + get urls = do + showOutput -- make way for download progress bar + liftIO $ anyM (`Url.download` file) urls + +uploadKey :: Key -> Annex Bool +uploadKey _ = do + warning "upload to web not supported" + return False + +dropKey :: Key -> Annex Bool +dropKey _ = do + warning "removal from web not supported" + return False + +checkKey :: Key -> Annex (Either String Bool) +checkKey key = do + us <- getUrls key + if null us + then return $ Right False + else return . Right =<< checkKey' us +checkKey' :: [URLString] -> Annex Bool +checkKey' us = untilTrue us $ \u -> do + showAction $ "checking " ++ u + liftIO $ Url.exists u diff --git a/Seek.hs b/Seek.hs new file mode 100644 index 0000000000..28c6ffc00c --- /dev/null +++ b/Seek.hs @@ -0,0 +1,116 @@ +{- git-annex command seeking + - + - These functions find appropriate files or other things based on + - the values a user passes to a command, and prepare actions operating + - on them. + - + - Copyright 2010-2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Seek where + +import Common.Annex +import Types.Command +import Types.Key +import Backend +import qualified Annex +import qualified Git +import qualified Git.LsFiles as LsFiles +import qualified Git.CheckAttr +import qualified Limit + +seekHelper :: ([FilePath] -> Git.Repo -> IO [FilePath]) -> [FilePath] -> Annex [FilePath] +seekHelper a params = do + g <- gitRepo + liftIO $ runPreserveOrder (`a` g) params + +withFilesInGit :: (FilePath -> CommandStart) -> CommandSeek +withFilesInGit a params = prepFiltered a $ seekHelper LsFiles.inRepo params + +withAttrFilesInGit :: String -> ((FilePath, String) -> CommandStart) -> CommandSeek +withAttrFilesInGit attr a params = do + files <- seekHelper LsFiles.inRepo params + prepFilteredGen a fst $ inRepo $ Git.CheckAttr.lookup attr files + +withNumCopies :: (Maybe Int -> FilePath -> CommandStart) -> CommandSeek +withNumCopies a params = withAttrFilesInGit "annex.numcopies" go params + where + go (file, v) = a (readMaybe v) file + +withBackendFilesInGit :: (BackendFile -> CommandStart) -> CommandSeek +withBackendFilesInGit a params = do + files <- seekHelper LsFiles.inRepo params + prepBackendPairs a files + +withFilesMissing :: (String -> CommandStart) -> CommandSeek +withFilesMissing a params = prepFiltered a $ liftIO $ filterM missing params + where + missing = liftM not . doesFileExist + +withFilesNotInGit :: (BackendFile -> CommandStart) -> CommandSeek +withFilesNotInGit a params = do + force <- Annex.getState Annex.force + newfiles <- seekHelper (LsFiles.notInRepo force) params + prepBackendPairs a newfiles + +withWords :: ([String] -> CommandStart) -> CommandSeek +withWords a params = return [a params] + +withStrings :: (String -> CommandStart) -> CommandSeek +withStrings a params = return $ map a params + +withFilesToBeCommitted :: (String -> CommandStart) -> CommandSeek +withFilesToBeCommitted a params = prepFiltered a $ + seekHelper LsFiles.stagedNotDeleted params + +withFilesUnlocked :: (BackendFile -> CommandStart) -> CommandSeek +withFilesUnlocked = withFilesUnlocked' LsFiles.typeChanged + +withFilesUnlockedToBeCommitted :: (BackendFile -> CommandStart) -> CommandSeek +withFilesUnlockedToBeCommitted = withFilesUnlocked' LsFiles.typeChangedStaged + +withFilesUnlocked' :: ([FilePath] -> Git.Repo -> IO [FilePath]) -> (BackendFile -> CommandStart) -> CommandSeek +withFilesUnlocked' typechanged a params = do + -- unlocked files have changed type from a symlink to a regular file + top <- fromRepo Git.workTree + typechangedfiles <- seekHelper typechanged params + unlockedfiles <- liftIO $ filterM notSymlink $ + map (\f -> top ++ "/" ++ f) typechangedfiles + prepBackendPairs a unlockedfiles + +withKeys :: (Key -> CommandStart) -> CommandSeek +withKeys a params = return $ map (a . parse) params + where + parse p = fromMaybe (error "bad key") $ readKey p + +withNothing :: CommandStart -> CommandSeek +withNothing a [] = return [a] +withNothing _ _ = error "This command takes no parameters." + + +prepFiltered :: (FilePath -> CommandStart) -> Annex [FilePath] -> Annex [CommandStart] +prepFiltered a = prepFilteredGen a id + +prepBackendPairs :: (BackendFile -> CommandStart) -> CommandSeek +prepBackendPairs a fs = prepFilteredGen a snd (chooseBackends fs) + +prepFilteredGen :: (b -> CommandStart) -> (b -> FilePath) -> Annex [b] -> Annex [CommandStart] +prepFilteredGen a d fs = do + matcher <- Limit.getMatcher + prepStart (proc matcher) fs + where + proc matcher v = do + let f = d v + ok <- matcher f + if ok then a v else return Nothing + +{- Generates a list of CommandStart actions that will be run to perform a + - command, using a list (ie of files) coming from an action. The list + - will be produced and consumed lazily. -} +prepStart :: (b -> CommandStart) -> Annex [b] -> Annex [CommandStart] +prepStart a = liftM (map a) + +notSymlink :: FilePath -> IO Bool +notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f diff --git a/Setup.hs b/Setup.hs new file mode 100644 index 0000000000..547d6a156e --- /dev/null +++ b/Setup.hs @@ -0,0 +1,17 @@ +{- cabal setup file -} + +import Distribution.Simple +import System.Cmd + +main = defaultMainWithHooks simpleUserHooks { + preConf = makeSources, + postClean = makeClean +} + +makeSources _ _ = do + system "make sources" + return (Nothing, []) + +makeClean _ _ _ _ = do + system "make clean" + return () diff --git a/Types.hs b/Types.hs new file mode 100644 index 0000000000..fd77bfe575 --- /dev/null +++ b/Types.hs @@ -0,0 +1,18 @@ +{- git-annex abstract data types + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Types ( + Annex, + Backend, + Key, + UUID(..) +) where + +import Annex +import Types.Backend +import Types.Key +import Types.UUID diff --git a/Types/Backend.hs b/Types/Backend.hs new file mode 100644 index 0000000000..4f82267045 --- /dev/null +++ b/Types/Backend.hs @@ -0,0 +1,27 @@ +{- git-annex key/value backend data type + - + - Most things should not need this, using Remotes instead + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Types.Backend where + +import Types.Key + +data Backend a = Backend { + -- name of this backend + name :: String, + -- converts a filename to a key + getKey :: FilePath -> a (Maybe Key), + -- called during fsck to check a key + fsckKey :: Key -> a Bool +} + +instance Show (Backend a) where + show backend = "Backend { name =\"" ++ name backend ++ "\" }" + +instance Eq (Backend a) where + a == b = name a == name b diff --git a/Types/BranchState.hs b/Types/BranchState.hs new file mode 100644 index 0000000000..777edb32cb --- /dev/null +++ b/Types/BranchState.hs @@ -0,0 +1,19 @@ +{- git-annex BranchState data type + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Types.BranchState where + +data BranchState = BranchState { + branchUpdated :: Bool, -- has the branch been updated this run? + + -- the content of one file is cached + cachedFile :: Maybe FilePath, + cachedContent :: String +} + +startBranchState :: BranchState +startBranchState = BranchState False Nothing "" diff --git a/Types/Command.hs b/Types/Command.hs new file mode 100644 index 0000000000..5341a40545 --- /dev/null +++ b/Types/Command.hs @@ -0,0 +1,46 @@ +{- git-annex command data types + - + - Copyright 2010-2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Types.Command where + +import Types + +{- A command runs in these stages. + - + - a. The check stage runs checks, that error out if + - anything prevents the command from running. -} +data CommandCheck = CommandCheck { idCheck :: Int, runCheck :: Annex () } +{- b. The seek stage takes the parameters passed to the command, + - looks through the repo to find the ones that are relevant + - to that command (ie, new files to add), and generates + - a list of start stage actions. -} +type CommandSeek = [String] -> Annex [CommandStart] +{- c. The start stage is run before anything is printed about the + - command, is passed some input, and can early abort it + - if the input does not make sense. It should run quickly and + - should not modify Annex state. -} +type CommandStart = Annex (Maybe CommandPerform) +{- d. The perform stage is run after a message is printed about the command + - being run, and it should be where the bulk of the work happens. -} +type CommandPerform = Annex (Maybe CommandCleanup) +{- e. The cleanup stage is run only if the perform stage succeeds, and it + - returns the overall success/fail of the command. -} +type CommandCleanup = Annex Bool + +{- A command is defined by specifying these things. -} +data Command = Command { + cmdnorepo :: Maybe (IO ()), + cmdcheck :: [CommandCheck], + cmdname :: String, + cmdparams :: String, + cmdseek :: [CommandSeek], + cmddesc :: String +} + +{- CommandCheck functions can be compared using their unique id. -} +instance Eq CommandCheck where + a == b = idCheck a == idCheck b diff --git a/Types/Crypto.hs b/Types/Crypto.hs new file mode 100644 index 0000000000..686bf5c1a6 --- /dev/null +++ b/Types/Crypto.hs @@ -0,0 +1,20 @@ +{- git-annex crypto types + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Types.Crypto ( + Cipher(..), + EncryptedCipher(..), + KeyIds(..), +) where + +import Utility.Gpg (KeyIds(..)) + +-- XXX ideally, this would be a locked memory region +newtype Cipher = Cipher String + +data EncryptedCipher = EncryptedCipher String KeyIds + deriving (Ord, Eq) diff --git a/Types/Key.hs b/Types/Key.hs new file mode 100644 index 0000000000..165f814d4b --- /dev/null +++ b/Types/Key.hs @@ -0,0 +1,77 @@ +{- git-annex Key data type + - + - Most things should not need this, using Types instead + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Types.Key ( + Key(..), + stubKey, + readKey, + + prop_idempotent_key_read_show +) where + +import System.Posix.Types + +import Common + +{- A Key has a unique name, is associated with a key/value backend, + - and may contain other optional metadata. -} +data Key = Key { + keyName :: String, + keyBackendName :: String, + keySize :: Maybe Integer, + keyMtime :: Maybe EpochTime +} deriving (Eq, Ord) + +stubKey :: Key +stubKey = Key { + keyName = "", + keyBackendName = "", + keySize = Nothing, + keyMtime = Nothing +} + +fieldSep :: Char +fieldSep = '-' + +{- Keys show as strings that are suitable for use as filenames. + - The name field is always shown last, separated by doubled fieldSeps, + - and is the only field allowed to contain the fieldSep. -} +instance Show Key where + show Key { keyBackendName = b, keySize = s, keyMtime = m, keyName = n } = + b +++ ('s' ?: s) +++ ('m' ?: m) +++ (fieldSep : n) + where + "" +++ y = y + x +++ "" = x + x +++ y = x ++ fieldSep:y + c ?: (Just v) = c : show v + _ ?: _ = "" + +readKey :: String -> Maybe Key +readKey s = if key == Just stubKey then Nothing else key + where + key = startbackend stubKey s + + startbackend k v = sepfield k v addbackend + + sepfield k v a = case span (/= fieldSep) v of + (v', _:r) -> findfields r $ a k v' + _ -> Nothing + + findfields (c:v) (Just k) + | c == fieldSep = Just $ k { keyName = v } + | otherwise = sepfield k v $ addfield c + findfields _ v = v + + addbackend k v = Just k { keyBackendName = v } + addfield 's' k v = Just k { keySize = readMaybe v } + addfield 'm' k v = Just k { keyMtime = readMaybe v } + addfield _ _ _ = Nothing + +prop_idempotent_key_read_show :: Key -> Bool +prop_idempotent_key_read_show k = Just k == (readKey . show) k diff --git a/Types/Remote.hs b/Types/Remote.hs new file mode 100644 index 0000000000..ec9b7a7a70 --- /dev/null +++ b/Types/Remote.hs @@ -0,0 +1,68 @@ +{- git-annex remotes types + - + - Most things should not need this, using Remote instead + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Types.Remote where + +import Data.Map as M +import Data.Ord + +import qualified Git +import Types.Key +import Types.UUID + +type RemoteConfig = M.Map String String + +{- There are different types of remotes. -} +data RemoteType a = RemoteType { + -- human visible type name + typename :: String, + -- enumerates remotes of this type + enumerate :: a [Git.Repo], + -- generates a remote of this type + generate :: Git.Repo -> UUID -> Maybe RemoteConfig -> a (Remote a), + -- initializes or changes a remote + setup :: UUID -> RemoteConfig -> a RemoteConfig +} + +{- An individual remote. -} +data Remote a = Remote { + -- each Remote has a unique uuid + uuid :: UUID, + -- each Remote has a human visible name + name :: String, + -- Remotes have a use cost; higher is more expensive + cost :: Int, + -- Transfers a key to the remote. + storeKey :: Key -> a Bool, + -- retrieves a key's contents to a file + retrieveKeyFile :: Key -> FilePath -> a Bool, + -- removes a key's contents + removeKey :: Key -> a Bool, + -- Checks if a key is present in the remote; if the remote + -- cannot be accessed returns a Left error message. + hasKey :: Key -> a (Either String Bool), + -- Some remotes can check hasKey without an expensive network + -- operation. + hasKeyCheap :: Bool, + -- a Remote can have a persistent configuration store + config :: Maybe RemoteConfig, + -- git configuration for the remote + repo :: Git.Repo +} + +instance Show (Remote a) where + show remote = "Remote { name =\"" ++ name remote ++ "\" }" + +-- two remotes are the same if they have the same uuid +instance Eq (Remote a) where + x == y = uuid x == uuid y + +-- order remotes by cost +instance Ord (Remote a) where + compare = comparing cost diff --git a/Types/TrustLevel.hs b/Types/TrustLevel.hs new file mode 100644 index 0000000000..99d7497303 --- /dev/null +++ b/Types/TrustLevel.hs @@ -0,0 +1,20 @@ +{- git-annex trust levels + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Types.TrustLevel ( + TrustLevel(..), + TrustMap +) where + +import qualified Data.Map as M + +import Types.UUID + +data TrustLevel = Trusted | SemiTrusted | UnTrusted | DeadTrusted + deriving Eq + +type TrustMap = M.Map UUID TrustLevel diff --git a/Types/UUID.hs b/Types/UUID.hs new file mode 100644 index 0000000000..767cd0dfe8 --- /dev/null +++ b/Types/UUID.hs @@ -0,0 +1,20 @@ +{- git-annex UUID type + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Types.UUID where + +-- A UUID is either an arbitrary opaque string, or UUID info may be missing. +data UUID = NoUUID | UUID String + deriving (Eq, Ord, Show) + +fromUUID :: UUID -> String +fromUUID (UUID u) = u +fromUUID NoUUID = "" + +toUUID :: String -> UUID +toUUID [] = NoUUID +toUUID s = UUID s diff --git a/Upgrade.hs b/Upgrade.hs new file mode 100644 index 0000000000..8b2e939dde --- /dev/null +++ b/Upgrade.hs @@ -0,0 +1,24 @@ +{- git-annex upgrade support + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Upgrade where + +import Common.Annex +import Annex.Version +import qualified Upgrade.V0 +import qualified Upgrade.V1 +import qualified Upgrade.V2 + +{- Uses the annex.version git config setting to automate upgrades. -} +upgrade :: Annex Bool +upgrade = do + version <- getVersion + case version of + Just "0" -> Upgrade.V0.upgrade + Just "1" -> Upgrade.V1.upgrade + Just "2" -> Upgrade.V2.upgrade + _ -> return True diff --git a/Upgrade/V0.hs b/Upgrade/V0.hs new file mode 100644 index 0000000000..eae5c87ce4 --- /dev/null +++ b/Upgrade/V0.hs @@ -0,0 +1,54 @@ +{- git-annex v0 -> v1 upgrade support + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Upgrade.V0 where + +import System.IO.Error (try) + +import Common.Annex +import Annex.Content +import qualified Upgrade.V1 + +upgrade :: Annex Bool +upgrade = do + showAction "v0 to v1" + + -- do the reorganisation of the key files + olddir <- fromRepo gitAnnexDir + keys <- getKeysPresent0 olddir + forM_ keys $ \k -> moveAnnex k $ olddir keyFile0 k + + -- update the symlinks to the key files + -- No longer needed here; V1.upgrade does the same thing + + -- Few people had v0 repos, so go the long way around from 0 -> 1 -> 2 + Upgrade.V1.upgrade + +-- these stayed unchanged between v0 and v1 +keyFile0 :: Key -> FilePath +keyFile0 = Upgrade.V1.keyFile1 +fileKey0 :: FilePath -> Key +fileKey0 = Upgrade.V1.fileKey1 +lookupFile0 :: FilePath -> Annex (Maybe (Key, Backend Annex)) +lookupFile0 = Upgrade.V1.lookupFile1 + +getKeysPresent0 :: FilePath -> Annex [Key] +getKeysPresent0 dir = do + exists <- liftIO $ doesDirectoryExist dir + if not exists + then return [] + else do + contents <- liftIO $ getDirectoryContents dir + files <- liftIO $ filterM present contents + return $ map fileKey0 files + where + present d = do + result <- try $ + getFileStatus $ dir ++ "/" ++ takeFileName d + case result of + Right s -> return $ isRegularFile s + Left _ -> return False diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs new file mode 100644 index 0000000000..80554dc3bc --- /dev/null +++ b/Upgrade/V1.hs @@ -0,0 +1,238 @@ +{- git-annex v1 -> v2 upgrade support + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Upgrade.V1 where + +import System.IO.Error (try) +import System.Posix.Types +import Data.Char + +import Common.Annex +import Types.Key +import Annex.Content +import Logs.Presence +import qualified Annex.Queue +import qualified Git +import qualified Git.LsFiles as LsFiles +import Backend +import Annex.Version +import Utility.FileMode +import Utility.TempFile +import qualified Upgrade.V2 + +-- v2 adds hashing of filenames of content and location log files. +-- Key information is encoded in filenames differently, so +-- both content and location log files move around, and symlinks +-- to content need to be changed. +-- +-- When upgrading a v1 key to v2, file size metadata ought to be +-- added to the key (unless it is a WORM key, which encoded +-- mtime:size in v1). This can only be done when the file content +-- is present. Since upgrades need to happen consistently, +-- (so that two repos get changed the same way by the upgrade, and +-- will merge), that metadata cannot be added on upgrade. +-- +-- Note that file size metadata +-- will only be used for detecting situations where git-annex +-- would run out of disk space, so if some keys don't have it, +-- the impact is minor. At least initially. It could be used in the +-- future by smart auto-repo balancing code, etc. +-- +-- Anyway, since v2 plans ahead for other metadata being included +-- in keys, there should probably be a way to update a key. +-- Something similar to the migrate subcommand could be used, +-- and users could then run that at their leisure. + +upgrade :: Annex Bool +upgrade = do + showAction "v1 to v2" + + bare <- fromRepo Git.repoIsLocalBare + if bare + then do + moveContent + setVersion + else do + moveContent + updateSymlinks + moveLocationLogs + + Annex.Queue.flush True + setVersion + + Upgrade.V2.upgrade + +moveContent :: Annex () +moveContent = do + showAction "moving content" + files <- getKeyFilesPresent1 + forM_ files move + where + move f = do + let k = fileKey1 (takeFileName f) + let d = parentDir f + liftIO $ allowWrite d + liftIO $ allowWrite f + moveAnnex k f + liftIO $ removeDirectory d + +updateSymlinks :: Annex () +updateSymlinks = do + showAction "updating symlinks" + top <- fromRepo Git.workTree + files <- inRepo $ LsFiles.inRepo [top] + forM_ files fixlink + where + fixlink f = do + r <- lookupFile1 f + case r of + Nothing -> return () + Just (k, _) -> do + link <- calcGitLink f k + liftIO $ removeFile f + liftIO $ createSymbolicLink link f + Annex.Queue.add "add" [Param "--"] [f] + +moveLocationLogs :: Annex () +moveLocationLogs = do + showAction "moving location logs" + logkeys <- oldlocationlogs + forM_ logkeys move + where + oldlocationlogs = do + dir <- fromRepo Upgrade.V2.gitStateDir + exists <- liftIO $ doesDirectoryExist dir + if exists + then do + contents <- liftIO $ getDirectoryContents dir + return $ mapMaybe oldlog2key contents + else return [] + move (l, k) = do + dest <- fromRepo $ logFile2 k + dir <- fromRepo Upgrade.V2.gitStateDir + let f = dir l + liftIO $ createDirectoryIfMissing True (parentDir dest) + -- could just git mv, but this way deals with + -- log files that are not checked into git, + -- as well as merging with already upgraded + -- logs that have been pulled from elsewhere + old <- liftIO $ readLog1 f + new <- liftIO $ readLog1 dest + liftIO $ writeLog1 dest (old++new) + Annex.Queue.add "add" [Param "--"] [dest] + Annex.Queue.add "add" [Param "--"] [f] + Annex.Queue.add "rm" [Param "--quiet", Param "-f", Param "--"] [f] + +oldlog2key :: FilePath -> Maybe (FilePath, Key) +oldlog2key l = + let len = length l - 4 in + if drop len l == ".log" + then let k = readKey1 (take len l) in + if null (keyName k) || null (keyBackendName k) + then Nothing + else Just (l, k) + else Nothing + +-- WORM backend keys: "WORM:mtime:size:filename" +-- all the rest: "backend:key" +-- +-- If the file looks like "WORM:XXX-...", then it was created by mixing +-- v2 and v1; that infelicity is worked around by treating the value +-- as the v2 key that it is. +readKey1 :: String -> Key +readKey1 v = + if mixup + then fromJust $ readKey $ join ":" $ Prelude.tail bits + else Key { keyName = n , keyBackendName = b, keySize = s, keyMtime = t } + where + bits = split ":" v + b = Prelude.head bits + n = join ":" $ drop (if wormy then 3 else 1) bits + t = if wormy + then Just (Prelude.read (bits !! 1) :: EpochTime) + else Nothing + s = if wormy + then Just (Prelude.read (bits !! 2) :: Integer) + else Nothing + wormy = Prelude.head bits == "WORM" + mixup = wormy && isUpper (Prelude.head $ bits !! 1) + +showKey1 :: Key -> String +showKey1 Key { keyName = n , keyBackendName = b, keySize = s, keyMtime = t } = + join ":" $ filter (not . null) [b, showifhere t, showifhere s, n] + where + showifhere Nothing = "" + showifhere (Just v) = show v + +keyFile1 :: Key -> FilePath +keyFile1 key = replace "/" "%" $ replace "%" "&s" $ replace "&" "&a" $ showKey1 key + +fileKey1 :: FilePath -> Key +fileKey1 file = readKey1 $ + replace "&a" "&" $ replace "&s" "%" $ replace "%" "/" file + +writeLog1 :: FilePath -> [LogLine] -> IO () +writeLog1 file ls = viaTmp writeFile file (showLog ls) + +readLog1 :: FilePath -> IO [LogLine] +readLog1 file = catchDefaultIO (parseLog <$> readFileStrict file) [] + +lookupFile1 :: FilePath -> Annex (Maybe (Key, Backend Annex)) +lookupFile1 file = do + tl <- liftIO $ try getsymlink + case tl of + Left _ -> return Nothing + Right l -> makekey l + where + getsymlink = takeFileName <$> readSymbolicLink file + makekey l = case maybeLookupBackendName bname of + Nothing -> do + unless (null kname || null bname || + not (isLinkToAnnex l)) $ + warning skip + return Nothing + Just backend -> return $ Just (k, backend) + where + k = fileKey1 l + bname = keyBackendName k + kname = keyName k + skip = "skipping " ++ file ++ + " (unknown backend " ++ bname ++ ")" + +getKeyFilesPresent1 :: Annex [FilePath] +getKeyFilesPresent1 = getKeyFilesPresent1' =<< fromRepo gitAnnexObjectDir +getKeyFilesPresent1' :: FilePath -> Annex [FilePath] +getKeyFilesPresent1' dir = do + exists <- liftIO $ doesDirectoryExist dir + if not exists + then return [] + else do + dirs <- liftIO $ getDirectoryContents dir + let files = map (\d -> dir ++ "/" ++ d ++ "/" ++ takeFileName d) dirs + liftIO $ filterM present files + where + present f = do + result <- try $ getFileStatus f + case result of + Right s -> return $ isRegularFile s + Left _ -> return False + +logFile1 :: Git.Repo -> Key -> String +logFile1 repo key = Upgrade.V2.gitStateDir repo ++ keyFile1 key ++ ".log" + +logFile2 :: Key -> Git.Repo -> String +logFile2 = logFile' hashDirLower + +logFile' :: (Key -> FilePath) -> Key -> Git.Repo -> String +logFile' hasher key repo = + gitStateDir repo ++ hasher key ++ keyFile key ++ ".log" + +stateDir :: FilePath +stateDir = addTrailingPathSeparator ".git-annex" + +gitStateDir :: Git.Repo -> FilePath +gitStateDir repo = addTrailingPathSeparator $ Git.workTree repo stateDir diff --git a/Upgrade/V2.hs b/Upgrade/V2.hs new file mode 100644 index 0000000000..ffc2f60022 --- /dev/null +++ b/Upgrade/V2.hs @@ -0,0 +1,137 @@ +{- git-annex v2 -> v3 upgrade support + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Upgrade.V2 where + +import Common.Annex +import qualified Git +import qualified Git.Command +import qualified Git.Ref +import qualified Annex.Branch +import Logs.Location +import Annex.Content +import Utility.TempFile + +olddir :: Git.Repo -> FilePath +olddir g + | Git.repoIsLocalBare g = "" + | otherwise = ".git-annex" + +{- .git-annex/ moved to a git-annex branch. + - + - Strategy: + - + - * Create the git-annex branch. + - * Find each location log file in .git-annex/, and inject its content + - into the git-annex branch, unioning with any content already in + - there. (in passing, this deals with the semi transition that left + - some location logs hashed two different ways; both are found and + - merged). + - * Also inject remote.log, trust.log, and uuid.log. + - * git rm -rf .git-annex + - * Remove stuff that used to be needed in .gitattributes. + - * Commit changes. + -} +upgrade :: Annex Bool +upgrade = do + showAction "v2 to v3" + bare <- fromRepo Git.repoIsLocalBare + old <- fromRepo olddir + + Annex.Branch.create + showProgress + + e <- liftIO $ doesDirectoryExist old + when e $ do + mapM_ (\(k, f) -> inject f $ logFile k) =<< locationLogs + mapM_ (\f -> inject f f) =<< logFiles old + + saveState + showProgress + + when e $ do + inRepo $ Git.Command.run "rm" [Param "-r", Param "-f", Param "-q", File old] + unless bare $ inRepo gitAttributesUnWrite + showProgress + + unless bare push + + return True + +locationLogs :: Annex [(Key, FilePath)] +locationLogs = do + dir <- fromRepo gitStateDir + liftIO $ do + levela <- dirContents dir + levelb <- mapM tryDirContents levela + files <- mapM tryDirContents (concat levelb) + return $ mapMaybe islogfile (concat files) + where + tryDirContents d = catchDefaultIO (dirContents d) [] + islogfile f = maybe Nothing (\k -> Just (k, f)) $ + logFileKey $ takeFileName f + +inject :: FilePath -> FilePath -> Annex () +inject source dest = do + old <- fromRepo olddir + new <- liftIO (readFile $ old source) + Annex.Branch.change dest $ \prev -> + unlines $ nub $ lines prev ++ lines new + +logFiles :: FilePath -> Annex [FilePath] +logFiles dir = return . filter (".log" `isSuffixOf`) + =<< liftIO (getDirectoryContents dir) + +push :: Annex () +push = do + origin_master <- inRepo $ Git.Ref.exists $ Git.Ref "origin/master" + origin_gitannex <- Annex.Branch.hasOrigin + case (origin_master, origin_gitannex) of + (_, True) -> do + -- Merge in the origin's git-annex branch, + -- so that pushing the git-annex branch + -- will immediately work. Not pushed here, + -- because it's less obnoxious to let the user + -- push. + Annex.Branch.update + (True, False) -> do + -- push git-annex to origin, so that + -- "git push" will from then on + -- automatically push it + Annex.Branch.update -- just in case + showAction "pushing new git-annex branch to origin" + showOutput + inRepo $ Git.Command.run "push" + [Param "origin", Param $ show Annex.Branch.name] + _ -> do + -- no origin exists, so just let the user + -- know about the new branch + Annex.Branch.update + showLongNote $ + "git-annex branch created\n" ++ + "Be sure to push this branch when pushing to remotes.\n" + +{- Old .gitattributes contents, not needed anymore. -} +attrLines :: [String] +attrLines = + [ stateDir "*.log merge=union" + , stateDir "*/*/*.log merge=union" + ] + +gitAttributesUnWrite :: Git.Repo -> IO () +gitAttributesUnWrite repo = do + let attributes = Git.attributes repo + whenM (doesFileExist attributes) $ do + c <- readFileStrict attributes + liftIO $ viaTmp writeFile attributes $ unlines $ + filter (`notElem` attrLines) $ lines c + Git.Command.run "add" [File attributes] repo + +stateDir :: FilePath +stateDir = addTrailingPathSeparator ".git-annex" +gitStateDir :: Git.Repo -> FilePath +gitStateDir repo = addTrailingPathSeparator $ Git.workTree repo stateDir diff --git a/Utility/Base64.hs b/Utility/Base64.hs new file mode 100644 index 0000000000..dd739fd4fb --- /dev/null +++ b/Utility/Base64.hs @@ -0,0 +1,18 @@ +{- Simple Base64 access + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.Base64 (toB64, fromB64) where + +import Codec.Binary.Base64 +import Data.Bits.Utils + +toB64 :: String -> String +toB64 = encode . s2w8 + +fromB64 :: String -> String +fromB64 s = maybe bad w82s $ decode s + where bad = error "bad base64 encoded data" diff --git a/Utility/Conditional.hs b/Utility/Conditional.hs new file mode 100644 index 0000000000..85e39ec64c --- /dev/null +++ b/Utility/Conditional.hs @@ -0,0 +1,26 @@ +{- monadic conditional operators + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.Conditional where + +import Control.Monad (when, unless) + +whenM :: Monad m => m Bool -> m () -> m () +whenM c a = c >>= flip when a + +unlessM :: Monad m => m Bool -> m () -> m () +unlessM c a = c >>= flip unless a + +(>>?) :: Monad m => m Bool -> m () -> m () +(>>?) = whenM + +(>>!) :: Monad m => m Bool -> m () -> m () +(>>!) = unlessM + +-- low fixity allows eg, foo bar >>! error $ "failed " ++ meep +infixr 0 >>? +infixr 0 >>! diff --git a/Utility/CopyFile.hs b/Utility/CopyFile.hs new file mode 100644 index 0000000000..5d6855bf01 --- /dev/null +++ b/Utility/CopyFile.hs @@ -0,0 +1,28 @@ +{- git-annex file copying + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.CopyFile (copyFileExternal) where + +import System.Directory (doesFileExist, removeFile) + +import Utility.Conditional +import Utility.SafeCommand +import qualified Build.SysConfig as SysConfig + +{- The cp command is used, because I hate reinventing the wheel, + - and because this allows easy access to features like cp --reflink. -} +copyFileExternal :: FilePath -> FilePath -> IO Bool +copyFileExternal src dest = do + whenM (doesFileExist dest) $ + removeFile dest + boolSystem "cp" [params, File src, File dest] + where + params + | SysConfig.cp_reflink_auto = Params "--reflink=auto" + | SysConfig.cp_a = Params "-a" + | SysConfig.cp_p = Params "-p" + | otherwise = Params "" diff --git a/Utility/DataUnits.hs b/Utility/DataUnits.hs new file mode 100644 index 0000000000..5d80a04b9c --- /dev/null +++ b/Utility/DataUnits.hs @@ -0,0 +1,160 @@ +{- data size display and parsing + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.DataUnits ( + dataUnits, + storageUnits, + memoryUnits, + bandwidthUnits, + oldSchoolUnits, + + roughSize, + compareSizes, + readSize +) where + +import Data.List +import Data.Char + +type ByteSize = Integer +type Name = String +type Abbrev = String +data Unit = Unit ByteSize Abbrev Name + deriving (Ord, Show, Eq) + +{- And now a rant: + - + - In the beginning, we had powers of two, and they were good. + - + - Disk drive manufacturers noticed that some powers of two were + - sorta close to some powers of ten, and that rounding down to the nearest + - power of ten allowed them to advertise their drives were bigger. This + - was sorta annoying. + - + - Then drives got big. Really, really big. This was good. + - + - Except that the small rounding error perpretrated by the drive + - manufacturers suffered the fate of a small error, and became a large + - error. This was bad. + - + - So, a committee was formed. And it arrived at a committee-like decision, + - which satisfied noone, confused everyone, and made the world an uglier + - place. As with all committees, this was meh. + - + - And the drive manufacturers happily continued selling drives that are + - increasingly smaller than you'd expect, if you don't count on your + - fingers. But that are increasingly too big for anyone to much notice. + - This caused me to need git-annex. + - + - Thus, I use units here that I loathe. Because if I didn't, people would + - be confused that their drives seem the wrong size, and other people would + - complain at me for not being standards compliant. And we call this + - progress? + -} + +dataUnits :: [Unit] +dataUnits = storageUnits ++ memoryUnits + +{- Storage units are (stupidly) powers of ten. -} +storageUnits :: [Unit] +storageUnits = + [ Unit (p 8) "YB" "yottabyte" + , Unit (p 7) "ZB" "zettabyte" + , Unit (p 6) "EB" "exabyte" + , Unit (p 5) "PB" "petabyte" + , Unit (p 4) "TB" "terabyte" + , Unit (p 3) "GB" "gigabyte" + , Unit (p 2) "MB" "megabyte" + , Unit (p 1) "kB" "kilobyte" -- weird capitalization thanks to committe + , Unit (p 0) "B" "byte" + ] + where + p :: Integer -> Integer + p n = 1000^n + +{- Memory units are (stupidly named) powers of 2. -} +memoryUnits :: [Unit] +memoryUnits = + [ Unit (p 8) "YiB" "yobibyte" + , Unit (p 7) "ZiB" "zebibyte" + , Unit (p 6) "EiB" "exbibyte" + , Unit (p 5) "PiB" "pebibyte" + , Unit (p 4) "TiB" "tebibyte" + , Unit (p 3) "GiB" "gibibyte" + , Unit (p 2) "MiB" "mebibyte" + , Unit (p 1) "KiB" "kibibyte" + , Unit (p 0) "B" "byte" + ] + where + p :: Integer -> Integer + p n = 2^(n*10) + +{- Bandwidth units are only measured in bits if you're some crazy telco. -} +bandwidthUnits :: [Unit] +bandwidthUnits = error "stop trying to rip people off" + +{- Do you yearn for the days when men were men and megabytes were megabytes? -} +oldSchoolUnits :: [Unit] +oldSchoolUnits = zipWith (curry mingle) storageUnits memoryUnits + where + mingle (Unit _ a n, Unit s' _ _) = Unit s' a n + +{- approximate display of a particular number of bytes -} +roughSize :: [Unit] -> Bool -> ByteSize -> String +roughSize units abbrev i + | i < 0 = '-' : findUnit units' (negate i) + | otherwise = findUnit units' i + where + units' = reverse $ sort units -- largest first + + findUnit (u@(Unit s _ _):us) i' + | i' >= s = showUnit i' u + | otherwise = findUnit us i' + findUnit [] i' = showUnit i' (last units') -- bytes + + showUnit i' (Unit s a n) = let num = chop i' s in + show num ++ " " ++ + (if abbrev then a else plural num n) + + chop :: Integer -> Integer -> Integer + chop i' d = round $ (fromInteger i' :: Double) / fromInteger d + + plural n u + | n == 1 = u + | otherwise = u ++ "s" + +{- displays comparison of two sizes -} +compareSizes :: [Unit] -> Bool -> ByteSize -> ByteSize -> String +compareSizes units abbrev old new + | old > new = roughSize units abbrev (old - new) ++ " smaller" + | old < new = roughSize units abbrev (new - old) ++ " larger" + | otherwise = "same" + +{- Parses strings like "10 kilobytes" or "0.5tb". -} +readSize :: [Unit] -> String -> Maybe ByteSize +readSize units input + | null parsednum || null parsedunit = Nothing + | otherwise = Just $ round $ number * fromIntegral multiplier + where + (number, rest) = head parsednum + multiplier = head parsedunit + unitname = takeWhile isAlpha $ dropWhile isSpace rest + + parsednum = reads input :: [(Double, String)] + parsedunit = lookupUnit units unitname + + lookupUnit _ [] = [1] -- no unit given, assume bytes + lookupUnit [] _ = [] + lookupUnit (Unit s a n:us) v + | a ~~ v || n ~~ v = [s] + | plural n ~~ v || a ~~ byteabbrev v = [s] + | otherwise = lookupUnit us v + + a ~~ b = map toLower a == map toLower b + + plural n = n ++ "s" + byteabbrev a = a ++ "b" diff --git a/Utility/Directory.hs b/Utility/Directory.hs new file mode 100644 index 0000000000..249ed69356 --- /dev/null +++ b/Utility/Directory.hs @@ -0,0 +1,50 @@ +{- directory manipulation + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.Directory where + +import System.IO.Error +import System.Posix.Files +import System.Directory +import Control.Exception (throw) +import Control.Monad + +import Utility.SafeCommand +import Utility.Conditional +import Utility.TempFile + +{- Moves one filename to another. + - First tries a rename, but falls back to moving across devices if needed. -} +moveFile :: FilePath -> FilePath -> IO () +moveFile src dest = try (rename src dest) >>= onrename + where + onrename (Right _) = return () + onrename (Left e) + | isPermissionError e = rethrow + | isDoesNotExistError e = rethrow + | otherwise = do + -- copyFile is likely not as optimised as + -- the mv command, so we'll use the latter. + -- But, mv will move into a directory if + -- dest is one, which is not desired. + whenM (isdir dest) rethrow + viaTmp mv dest undefined + where + rethrow = throw e + mv tmp _ = do + ok <- boolSystem "mv" [Param "-f", + Param src, Param tmp] + unless ok $ do + -- delete any partial + _ <- try $ + removeFile tmp + rethrow + isdir f = do + r <- try (getFileStatus f) + case r of + (Left _) -> return False + (Right s) -> return $ isDirectory s diff --git a/Utility/Dot.hs b/Utility/Dot.hs new file mode 100644 index 0000000000..83f52a3cc1 --- /dev/null +++ b/Utility/Dot.hs @@ -0,0 +1,63 @@ +{- a simple graphviz / dot(1) digraph description generator library + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.Dot where -- import qualified + +{- generates a graph description from a list of lines -} +graph :: [String] -> String +graph s = unlines $ [header] ++ map indent s ++ [footer] + where + header = "digraph map {" + footer= "}" + +{- a node in the graph -} +graphNode :: String -> String -> String +graphNode nodeid desc = label desc $ quote nodeid + +{- an edge between two nodes -} +graphEdge :: String -> String -> Maybe String -> String +graphEdge fromid toid desc = indent $ maybe edge (`label` edge) desc + where + edge = quote fromid ++ " -> " ++ quote toid + +{- adds a label to a node or edge -} +label :: String -> String -> String +label = attr "label" + +{- adds an attribute to a node or edge + - (can be called multiple times for multiple attributes) -} +attr :: String -> String -> String -> String +attr a v s = s ++ " [ " ++ a ++ "=" ++ quote v ++ " ]" + +{- fills a node with a color -} +fillColor :: String -> String -> String +fillColor color s = attr "fillcolor" color $ attr "style" "filled" s + +{- apply to graphNode to put the node in a labeled box -} +subGraph :: String -> String -> String -> String -> String +subGraph subid l color s = + "subgraph " ++ name ++ " {\n" ++ + ii setlabel ++ + ii setfilled ++ + ii setcolor ++ + ii s ++ + indent "}" + where + -- the "cluster_" makes dot draw a box + name = quote ("cluster_" ++ subid) + setlabel = "label=" ++ quote l + setfilled = "style=" ++ quote "filled" + setcolor = "fillcolor=" ++ quote color + ii x = indent (indent x) ++ "\n" + +indent ::String -> String +indent s = '\t' : s + +quote :: String -> String +quote s = "\"" ++ s' ++ "\"" + where + s' = filter (/= '"') s diff --git a/Utility/FileMode.hs b/Utility/FileMode.hs new file mode 100644 index 0000000000..6c1c06e82a --- /dev/null +++ b/Utility/FileMode.hs @@ -0,0 +1,36 @@ +{- File mode utilities. + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.FileMode where + +import System.Posix.Files +import System.Posix.Types +import Foreign (complement) + +{- Removes a FileMode from a file. + - For example, call with otherWriteMode to chmod o-w -} +unsetFileMode :: FilePath -> FileMode -> IO () +unsetFileMode f m = do + s <- getFileStatus f + setFileMode f $ fileMode s `intersectFileModes` complement m + +{- Removes the write bits from a file. -} +preventWrite :: FilePath -> IO () +preventWrite f = unsetFileMode f writebits + where + writebits = foldl unionFileModes ownerWriteMode + [groupWriteMode, otherWriteMode] + +{- Turns a file's write bit back on. -} +allowWrite :: FilePath -> IO () +allowWrite f = do + s <- getFileStatus f + setFileMode f $ fileMode s `unionFileModes` ownerWriteMode + +{- Checks if a file mode indicates it's a symlink. -} +isSymLink :: FileMode -> Bool +isSymLink mode = symbolicLinkMode `intersectFileModes` mode == symbolicLinkMode diff --git a/Utility/Format.hs b/Utility/Format.hs new file mode 100644 index 0000000000..2c2042cc22 --- /dev/null +++ b/Utility/Format.hs @@ -0,0 +1,173 @@ +{- Formatted string handling. + - + - Copyright 2010, 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.Format ( + Format, + gen, + format, + decode_c, + encode_c, + prop_idempotent_deencode +) where + +import Text.Printf (printf) +import Data.Char (isAlphaNum, isOctDigit, isSpace, chr, ord) +import Data.Maybe (fromMaybe) +import Data.Word (Word8) +import Data.List (isPrefixOf) +import qualified Codec.Binary.UTF8.String +import qualified Data.Map as M + +import Utility.PartialPrelude + +type FormatString = String + +{- A format consists of a list of fragments. -} +type Format = [Frag] + +{- A fragment is either a constant string, + - or a variable, with a justification. -} +data Frag = Const String | Var String Justify + deriving (Show) + +data Justify = LeftJustified Int | RightJustified Int | UnJustified + deriving (Show) + +{- Expands a Format using some variables, generating a formatted string. + - This can be repeatedly called, efficiently. -} +format :: Format -> M.Map String String -> String +format f vars = concatMap expand f + where + expand (Const s) = s + expand (Var name j) + | "escaped_" `isPrefixOf` name = + justify j $ encode_c_strict $ + getvar $ drop (length "escaped_") name + | otherwise = justify j $ getvar name + getvar name = fromMaybe "" $ M.lookup name vars + justify UnJustified s = s + justify (LeftJustified i) s = s ++ pad i s + justify (RightJustified i) s = pad i s ++ s + pad i s = take (i - length s) spaces + spaces = repeat ' ' + +{- Generates a Format that can be used to expand variables in a + - format string, such as "${foo} ${bar;10} ${baz;-10}\n" + - + - (This is the same type of format string used by dpkg-query.) + -} +gen :: FormatString -> Format +gen = filter (not . empty) . fuse [] . scan [] . decode_c + where + -- The Format is built up in reverse, for efficiency, + -- and can have many adjacent Consts. Fusing it fixes both + -- problems. + fuse f [] = f + fuse f (Const c1:Const c2:vs) = fuse f $ Const (c2++c1) : vs + fuse f (v:vs) = fuse (v:f) vs + + scan f (a:b:cs) + | a == '$' && b == '{' = invar f [] cs + | otherwise = scan (Const [a] : f ) (b:cs) + scan f v = Const v : f + + invar f var [] = Const (novar var) : f + invar f var (c:cs) + | c == '}' = foundvar f var UnJustified cs + | isAlphaNum c || c == '_' = invar f (c:var) cs + | c == ';' = inpad "" f var cs + | otherwise = scan ((Const $ novar $ c:var):f) cs + + inpad p f var (c:cs) + | c == '}' = foundvar f var (readjustify $ reverse p) cs + | otherwise = inpad (c:p) f var cs + inpad p f var [] = Const (novar $ p++";"++var) : f + readjustify = getjustify . fromMaybe 0 . readMaybe + getjustify i + | i == 0 = UnJustified + | i < 0 = LeftJustified (-1 * i) + | otherwise = RightJustified i + novar v = "${" ++ reverse v + foundvar f v p cs = scan (Var (reverse v) p : f) cs + +empty :: Frag -> Bool +empty (Const "") = True +empty _ = False + +{- Decodes a C-style encoding, where \n is a newline, \NNN is an octal + - encoded character, etc. + -} +decode_c :: FormatString -> FormatString +decode_c [] = [] +decode_c s = unescape ("", s) + where + e = '\\' + unescape (b, []) = b + -- look for escapes starting with '\' + unescape (b, v) = b ++ fst pair ++ unescape (handle $ snd pair) + where + pair = span (/= e) v + isescape x = x == e + -- \NNN is an octal encoded character + handle (x:n1:n2:n3:rest) + | isescape x && alloctal = (fromoctal, rest) + where + alloctal = isOctDigit n1 && + isOctDigit n2 && + isOctDigit n3 + fromoctal = [chr $ readoctal [n1, n2, n3]] + readoctal o = Prelude.read $ "0o" ++ o :: Int + -- \C is used for a few special characters + handle (x:nc:rest) + | isescape x = ([echar nc], rest) + where + echar 'a' = '\a' + echar 'b' = '\b' + echar 'f' = '\f' + echar 'n' = '\n' + echar 'r' = '\r' + echar 't' = '\t' + echar 'v' = '\v' + echar a = a + handle n = ("", n) + +{- Inverse of decode_c. -} +encode_c :: FormatString -> FormatString +encode_c = encode_c' (const False) + +{- Encodes more strictly, including whitespace. -} +encode_c_strict :: FormatString -> FormatString +encode_c_strict = encode_c' isSpace + +encode_c' :: (Char -> Bool) -> FormatString -> FormatString +encode_c' p = concatMap echar + where + e c = '\\' : [c] + echar '\a' = e 'a' + echar '\b' = e 'b' + echar '\f' = e 'f' + echar '\n' = e 'n' + echar '\r' = e 'r' + echar '\t' = e 't' + echar '\v' = e 'v' + echar '\\' = e '\\' + echar '"' = e '"' + echar c + | ord c < 0x20 = e_asc c -- low ascii + | ord c >= 256 = e_utf c -- unicode + | ord c > 0x7E = e_asc c -- high ascii + | p c = e_asc c -- unprintable ascii + | otherwise = [c] -- printable ascii + -- unicode character is decomposed to individual Word8s, + -- and each is shown in octal + e_utf c = showoctal =<< (Codec.Binary.UTF8.String.encode [c] :: [Word8]) + e_asc c = showoctal $ ord c + showoctal i = '\\' : printf "%03o" i + +{- for quickcheck -} +prop_idempotent_deencode :: String -> Bool +prop_idempotent_deencode s = s == decode_c (encode_c s) diff --git a/Utility/Gpg.hs b/Utility/Gpg.hs new file mode 100644 index 0000000000..f3a1ac0bb5 --- /dev/null +++ b/Utility/Gpg.hs @@ -0,0 +1,194 @@ +{- gpg interface + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.Gpg where + +import qualified Data.ByteString.Lazy.Char8 as L +import System.Posix.Types +import Control.Applicative +import Control.Concurrent +import Control.Exception (finally, bracket) +import System.Exit +import System.Posix.Env (setEnv, unsetEnv, getEnv) + +import Common + +newtype KeyIds = KeyIds [String] + deriving (Ord, Eq) + +stdParams :: [CommandParam] -> IO [String] +stdParams params = do + -- Enable batch mode if GPG_AGENT_INFO is set, to avoid extraneous + -- gpg output about password prompts. + e <- getEnv "GPG_AGENT_INFO" + let batch = if isNothing e then [] else ["--batch"] + return $ batch ++ defaults ++ toCommand params + where + -- be quiet, even about checking the trustdb + defaults = ["--quiet", "--trust-model", "always"] + +{- Runs gpg with some params and returns its stdout, strictly. -} +readStrict :: [CommandParam] -> IO String +readStrict params = do + params' <- stdParams params + pOpen ReadFromPipe "gpg" params' hGetContentsStrict + +{- Runs gpg, piping an input value to it, and returning its stdout, + - strictly. -} +pipeStrict :: [CommandParam] -> String -> IO String +pipeStrict params input = do + params' <- stdParams params + (pid, fromh, toh) <- hPipeBoth "gpg" params' + _ <- forkIO $ finally (hPutStr toh input) (hClose toh) + output <- hGetContentsStrict fromh + forceSuccess pid + return output + +{- Runs gpg with some parameters, first feeding it a passphrase via + - --passphrase-fd, then feeding it an input, and passing a handle + - to its output to an action. + - + - Note that to avoid deadlock with the cleanup stage, + - the action must fully consume gpg's input before returning. -} +passphraseHandle :: [CommandParam] -> String -> IO L.ByteString -> (Handle -> IO a) -> IO a +passphraseHandle params passphrase a b = do + -- pipe the passphrase into gpg on a fd + (frompipe, topipe) <- createPipe + _ <- forkIO $ do + toh <- fdToHandle topipe + hPutStrLn toh passphrase + hClose toh + let Fd pfd = frompipe + let passphrasefd = [Param "--passphrase-fd", Param $ show pfd] + + params' <- stdParams $ passphrasefd ++ params + (pid, fromh, toh) <- hPipeBoth "gpg" params' + pid2 <- forkProcess $ do + L.hPut toh =<< a + hClose toh + exitSuccess + hClose toh + ret <- b fromh + + -- cleanup + forceSuccess pid + _ <- getProcessStatus True False pid2 + closeFd frompipe + return ret + +{- Finds gpg public keys matching some string. (Could be an email address, + - a key id, or a name. -} +findPubKeys :: String -> IO KeyIds +findPubKeys for = KeyIds . parse <$> readStrict params + where + params = [Params "--with-colons --list-public-keys", Param for] + parse = map keyIdField . filter pubKey . lines + pubKey = isPrefixOf "pub:" + keyIdField s = split ":" s !! 4 + + + +{- A test key. This is provided pre-generated since generating a new gpg + - key is too much work (requires too much entropy) for a test suite to + - do. + - + - This key was generated with no exipiration date, and a small keysize. + - It has an empty passphrase. -} +testKeyId :: String +testKeyId = "129D6E0AC537B9C7" +testKey :: String +testKey = keyBlock True + [ "mI0ETvFAZgEEAKnqwWgZqznMhi1RQExem2H8t3OyKDxaNN3rBN8T6LWGGqAYV4wT" + , "r8In5tfsnz64bKpE1Qi68JURFwYmthgUL9N48tbODU8t3xzijdjLOSaTyqkH1ik6" + , "EyulfKN63xLne9i4F9XqNwpiZzukXYbNfHkDA2yb0M6g4UFKLY/fNzGXABEBAAG0" + , "W2luc2VjdXJlIHRlc3Qga2V5ICh0aGlzIGlzIGEgdGVzdCBrZXksIGRvIG5vdCB1" + , "c2UgZm9yIGFjdHVhbCBlbmNyeXB0aW9uKSA8dGVzdEBleGFtcGxlLmNvbT6IuAQT" + , "AQgAIgUCTvFAZgIbAwYLCQgHAwIGFQgCCQoLBBYCAwECHgECF4AACgkQEp1uCsU3" + , "uceQ9wP/YMd1f0+/eLLcwGXNBvGqyVhUOfAKknO1bMzGbqTsq9g60qegy/cldqee" + , "xVxNfy0VN//JeMfgdcb8+RgJYLoaMrTy9CcsUcFPxtwN9tcLmsM0V2/fNmmFBO9t" + , "v75iH+zeFbNg0/FbPkHiN6Mjw7P2gXYKQXgTvQZBWaphk8oQlBm4jQRO8UBmAQQA" + , "vdi50M/WRCkOLt2RsUve8V8brMWYTJBJTTWoHUeRr82v4NCdX7OE1BsoVK8cy/1Q" + , "Y+gLOH9PqinuGGNWRmPV2Ju/RYn5H7sdewXA8E80xWhc4phHRMJ8Jjhg/GVPamkJ" + , "8B5zeKF0jcLFl7cuVdOyQakhoeDWJd0CyfW837nmPtMAEQEAAYifBBgBCAAJBQJO" + , "8UBmAhsMAAoJEBKdbgrFN7nHclAEAKBShuP/toH03atDUQTbGE34CA4yEC9BVghi" + , "7kviOZlOz2s8xAfp/8AYsrECx1kgbXcA7JD902eNyp7NzXsdJX0zJwHqiuZW0XlD" + , "T8ZJu4qrYRYgl/790WPESZ+ValvHD/fqkR38RF4tfxvyoMhhp0roGmJY33GASIG/" + , "+gQkDF9/" + , "=1k11" + ] +testSecretKey :: String +testSecretKey = keyBlock False + [ "lQHYBE7xQGYBBACp6sFoGas5zIYtUUBMXpth/Ldzsig8WjTd6wTfE+i1hhqgGFeM" + , "E6/CJ+bX7J8+uGyqRNUIuvCVERcGJrYYFC/TePLWzg1PLd8c4o3Yyzkmk8qpB9Yp" + , "OhMrpXyjet8S53vYuBfV6jcKYmc7pF2GzXx5AwNsm9DOoOFBSi2P3zcxlwARAQAB" + , "AAP+PlRboxy7Z0XjuG70N6+CrzSddQbW5KCwgPFrxYsPk7sAPFcBkmRMVlv9vZpS" + , "phbP4bvDK+MrSntM51g+9uE802yhPhSWdmEbImiWfV2ucEhlLjD8gw7JDex9XZ0a" + , "EbTOV56wOsILuedX/jF/6i6IQzy5YmuMeo+ip1XQIsIN+80CAMyXepOBJgHw/gBD" + , "VdXh/l//vUkQQlhInQYwgkKbr0POCTdr8DM1qdKLcUD9Q1khgNRp0vZGGz+5xsrc" + , "KaODUlMCANSczLJcYWa8yPqB3S14yTe7qmtDiOS362+SeVUwQA7eQ06PcHLPsN+p" + , "NtWoHRfYazxrs+g0JvmoQOYdj4xSQy0CAMq7H/l6aeG1n8tpyMxqE7OvBOsvzdu5" + , "XS7I1AnwllVFgvTadVvqgf7b+hdYd91doeHDUGqSYO78UG1GgaBHJdylqrRbaW5z" + , "ZWN1cmUgdGVzdCBrZXkgKHRoaXMgaXMgYSB0ZXN0IGtleSwgZG8gbm90IHVzZSBm" + , "b3IgYWN0dWFsIGVuY3J5cHRpb24pIDx0ZXN0QGV4YW1wbGUuY29tPoi4BBMBCAAi" + , "BQJO8UBmAhsDBgsJCAcDAgYVCAIJCgsEFgIDAQIeAQIXgAAKCRASnW4KxTe5x5D3" + , "A/9gx3V/T794stzAZc0G8arJWFQ58AqSc7VszMZupOyr2DrSp6DL9yV2p57FXE1/" + , "LRU3/8l4x+B1xvz5GAlguhoytPL0JyxRwU/G3A321wuawzRXb982aYUE722/vmIf" + , "7N4Vs2DT8Vs+QeI3oyPDs/aBdgpBeBO9BkFZqmGTyhCUGZ0B2ARO8UBmAQQAvdi5" + , "0M/WRCkOLt2RsUve8V8brMWYTJBJTTWoHUeRr82v4NCdX7OE1BsoVK8cy/1QY+gL" + , "OH9PqinuGGNWRmPV2Ju/RYn5H7sdewXA8E80xWhc4phHRMJ8Jjhg/GVPamkJ8B5z" + , "eKF0jcLFl7cuVdOyQakhoeDWJd0CyfW837nmPtMAEQEAAQAD/RaVtFFTkF1udun7" + , "YOwzJvQXCO9OWHZvSdEeG4BUNdAwy4YWu0oZzKkBDBS6+lWILqqb/c28U4leUJ1l" + , "H+viz5svN9BWWyj/UpI00uwUo9JaIqalemwfLx6vsh69b54L1B4exLZHYGLvy/B3" + , "5T6bT0gpOE+53BRtKcJaOh/McQeJAgDTOCBU5weWOf6Bhqnw3Vr/gRfxntAz2okN" + , "gqz/h79mWbCc/lHKoYQSsrCdMiwziHSjXwvehUrdWE/AcomtW0vbAgDmGJqJ2fNr" + , "HvdsGx4Ld/BxyiZbCURJLUQ5CwzfHGIvBu9PMT8zM26NOSncaXRjxDna2Ggh8Uum" + , "ANEwbnhxFwZpAf9L9RLYIMTtAqwBjfXJg/lHcc2R+VP0hL5c8zFz+S+w7bRqINwL" + , "ff1JstKuHT2nJnu0ustK66by8YI3T0hDFFahnNCInwQYAQgACQUCTvFAZgIbDAAK" + , "CRASnW4KxTe5x3JQBACgUobj/7aB9N2rQ1EE2xhN+AgOMhAvQVYIYu5L4jmZTs9r" + , "PMQH6f/AGLKxAsdZIG13AOyQ/dNnjcqezc17HSV9MycB6ormVtF5Q0/GSbuKq2EW" + , "IJf+/dFjxEmflWpbxw/36pEd/EReLX8b8qDIYadK6BpiWN9xgEiBv/oEJAxffw==" + , "=LDsg" + ] +keyBlock :: Bool -> [String] -> String +keyBlock public ls = unlines + [ "-----BEGIN PGP "++t++" KEY BLOCK-----" + , "Version: GnuPG v1.4.11 (GNU/Linux)" + , "" + , unlines ls + , "-----END PGP "++t++" KEY BLOCK-----" + ] + where + t + | public = "PUBLIC" + | otherwise = "PRIVATE" + +{- Runs an action using gpg in a test harness, in which gpg does + - not use ~/.gpg/, but a directory with the test key set up to be used. -} +testHarness :: IO a -> IO a +testHarness a = do + orig <- getEnv var + bracket setup (cleanup orig) (const a) + where + var = "GNUPGHOME" + + setup = do + base <- getTemporaryDirectory + dir <- mktmpdir $ base "gpgtmpXXXXXX" + setEnv var dir True + _ <- pipeStrict [Params "--import -q"] $ unlines + [testSecretKey, testKey] + return dir + + cleanup orig tmpdir = removeDirectoryRecursive tmpdir >> reset orig + reset (Just v) = setEnv var v True + reset _ = unsetEnv var + +{- Tests the test harness. -} +testTestHarness :: IO Bool +testTestHarness = do + keys <- testHarness $ findPubKeys testKeyId + return $ KeyIds [testKeyId] == keys diff --git a/Utility/JSONStream.hs b/Utility/JSONStream.hs new file mode 100644 index 0000000000..7910c11941 --- /dev/null +++ b/Utility/JSONStream.hs @@ -0,0 +1,44 @@ +{- Streaming JSON output. + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.JSONStream ( + start, + add, + end +) where + +import Text.JSON + +{- Text.JSON does not support building up a larger JSON document piece by + piece as a stream. To support streaming, a hack. The JSObject is converted + to a string with its final "}" is left off, allowing it to be added to + later. -} +start :: JSON a => [(String, a)] -> String +start l + | last s == endchar = init s + | otherwise = bad s + where + s = encodeStrict $ toJSObject l + +add :: JSON a => [(String, a)] -> String +add l + | head s == startchar = ',' : drop 1 s + | otherwise = bad s + where + s = start l + +end :: String +end = [endchar, '\n'] + +startchar :: Char +startchar = '{' + +endchar :: Char +endchar = '}' + +bad :: String -> a +bad s = error $ "Text.JSON returned unexpected string: " ++ s diff --git a/Utility/Matcher.hs b/Utility/Matcher.hs new file mode 100644 index 0000000000..01500a2111 --- /dev/null +++ b/Utility/Matcher.hs @@ -0,0 +1,100 @@ +{- A generic matcher. + - + - Can be used to check if a user-supplied condition, + - like "foo and ( bar or not baz )" matches. The condition must already + - be tokenized, and can contain arbitrary operations. + - + - If operations are not separated by and/or, they are defaulted to being + - anded together, so "foo bar baz" all must match. + - + - Is forgiving about misplaced closing parens, so "foo and (bar or baz" + - will be handled, as will "foo and ( bar or baz ) )" + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.Matcher ( + Token(..), + Matcher, + token, + generate, + match, + matchM, + matchesAny +) where + +import Control.Monad + +{- A Token can be an Operation of an arbitrary type, or one of a few + - predefined peices of syntax. -} +data Token op = Operation op | And | Or | Not | Open | Close + deriving (Show, Eq) + +data Matcher op = MAny + | MAnd (Matcher op) (Matcher op) + | MOr (Matcher op) (Matcher op) + | MNot (Matcher op) + | MOp op + deriving (Show, Eq) + +{- Converts a word of syntax into a token. Doesn't handle operations. -} +token :: String -> Token op +token "and" = And +token "or" = Or +token "not" = Not +token "(" = Open +token ")" = Close +token t = error $ "unknown token " ++ t + +{- Converts a list of Tokens into a Matcher. -} +generate :: [Token op] -> Matcher op +generate = go MAny + where + go m [] = m + go m ts = uncurry go $ consume m ts + +{- Consumes one or more Tokens, constructs a new Matcher, + - and returns unconsumed Tokens. -} +consume :: Matcher op -> [Token op] -> (Matcher op, [Token op]) +consume m [] = (m, []) +consume m (t:ts) = go t + where + go And = cont $ m `MAnd` next + go Or = cont $ m `MOr` next + go Not = cont $ m `MAnd` MNot next + go Open = let (n, r) = consume next rest in (m `MAnd` n, r) + go Close = (m, ts) + go (Operation o) = (m `MAnd` MOp o, ts) + + (next, rest) = consume MAny ts + cont v = (v, rest) + +{- Checks if a Matcher matches, using a supplied function to check + - the value of Operations. -} +match :: (op -> v -> Bool) -> Matcher op -> v -> Bool +match a m v = go m + where + go MAny = True + go (MAnd m1 m2) = go m1 && go m2 + go (MOr m1 m2) = go m1 || go m2 + go (MNot m1) = not (go m1) + go (MOp o) = a o v + +{- Runs a monadic Matcher, where Operations are actions in the monad. -} +matchM :: Monad m => Matcher (v -> m Bool) -> v -> m Bool +matchM m v = go m + where + go MAny = return True + go (MAnd m1 m2) = liftM2 (&&) (go m1) (go m2) + go (MOr m1 m2) = liftM2 (||) (go m1) (go m2) + go (MNot m1) = liftM not (go m1) + go (MOp o) = o v + +{- Checks is a matcher contains no limits, and so (presumably) matches + - anything. Note that this only checks the trivial case; it is possible + - to construct matchers that match anything but are more complicated. -} +matchesAny :: Matcher a -> Bool +matchesAny MAny = True +matchesAny _ = False diff --git a/Utility/Misc.hs b/Utility/Misc.hs new file mode 100644 index 0000000000..1d3c0e6763 --- /dev/null +++ b/Utility/Misc.hs @@ -0,0 +1,58 @@ +{- misc utility functions + - + - Copyright 2010-2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.Misc where + +import System.IO +import System.IO.Error (try) +import Control.Monad +import Control.Applicative + +{- A version of hgetContents that is not lazy. Ensures file is + - all read before it gets closed. -} +hGetContentsStrict :: Handle -> IO String +hGetContentsStrict = hGetContents >=> \s -> length s `seq` return s + +{- A version of readFile that is not lazy. -} +readFileStrict :: FilePath -> IO String +readFileStrict = readFile >=> \s -> length s `seq` return s + +{- Like break, but the character matching the condition is not included + - in the second result list. + - + - separate (== ':') "foo:bar" = ("foo", "bar") + - separate (== ':') "foobar" = ("foo, "") + -} +separate :: (a -> Bool) -> [a] -> ([a], [a]) +separate c l = unbreak $ break c l + where + unbreak r@(a, b) + | null b = r + | otherwise = (a, tail b) + +{- Breaks out the first line. -} +firstLine :: String-> String +firstLine = takeWhile (/= '\n') + +{- Catches IO errors and returns a Bool -} +catchBoolIO :: IO Bool -> IO Bool +catchBoolIO a = catchDefaultIO a False + +{- Catches IO errors and returns a Maybe -} +catchMaybeIO :: IO a -> IO (Maybe a) +catchMaybeIO a = catchDefaultIO (Just <$> a) Nothing + +{- Catches IO errors and returns a default value. -} +catchDefaultIO :: IO a -> a -> IO a +catchDefaultIO a def = catch a (const $ return def) + +{- Catches IO errors and returns the error message. -} +catchMsgIO :: IO a -> IO (Either String a) +catchMsgIO a = dispatch <$> try a + where + dispatch (Left e) = Left $ show e + dispatch (Right v) = Right v diff --git a/Utility/Monad.hs b/Utility/Monad.hs new file mode 100644 index 0000000000..0d1675fa49 --- /dev/null +++ b/Utility/Monad.hs @@ -0,0 +1,30 @@ +{- monadic stuff + - + - Copyright 2010-2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.Monad where + +import Data.Maybe +import Control.Monad (liftM) + +{- Return the first value from a list, if any, satisfying the given + - predicate -} +firstM :: (Monad m) => (a -> m Bool) -> [a] -> m (Maybe a) +firstM _ [] = return Nothing +firstM p (x:xs) = do + q <- p x + if q + then return (Just x) + else firstM p xs + +{- Returns true if any value in the list satisfies the preducate, + - stopping once one is found. -} +anyM :: (Monad m) => (a -> m Bool) -> [a] -> m Bool +anyM p = liftM isJust . firstM p + +{- Runs an action on values from a list until it succeeds. -} +untilTrue :: (Monad m) => [a] -> (a -> m Bool) -> m Bool +untilTrue = flip anyM diff --git a/Utility/PartialPrelude.hs b/Utility/PartialPrelude.hs new file mode 100644 index 0000000000..ad857196d6 --- /dev/null +++ b/Utility/PartialPrelude.hs @@ -0,0 +1,64 @@ +{- Parts of the Prelude are partial functions, which are a common source of + - bugs. + - + - This exports functions that conflict with the prelude, which avoids + - them being accidentially used. + -} + +module Utility.PartialPrelude where + +{- read should be avoided, as it throws an error + - Instead, use: readMaybe -} +read :: Read a => String -> a +read = Prelude.read + +{- head is a partial function; head [] is an error + - Instead, use: take 1 or headMaybe -} +head :: [a] -> a +head = Prelude.head + +{- tail is also partial + - Instead, use: drop 1 -} +tail :: [a] -> [a] +tail = Prelude.tail + +{- init too + - Instead, use: beginning -} +init :: [a] -> [a] +init = Prelude.init + +{- last too + - Instead, use: end or lastMaybe -} +last :: [a] -> a +last = Prelude.last + +{- Attempts to read a value from a String. + - + - Ignores leading/trailing whitespace, and throws away any trailing + - text after the part that can be read. + -} +readMaybe :: (Read a) => String -> Maybe a +readMaybe s = case reads s of + ((x,_):_) -> Just x + _ -> Nothing + +{- Like head but Nothing on empty list. -} +headMaybe :: [a] -> Maybe a +headMaybe [] = Nothing +headMaybe v = Just $ Prelude.head v + +{- Like last but Nothing on empty list. -} +lastMaybe :: [a] -> Maybe a +lastMaybe [] = Nothing +lastMaybe v = Just $ Prelude.last v + +{- All but the last element of a list. + - (Like init, but no error on an empty list.) -} +beginning :: [a] -> [a] +beginning [] = [] +beginning l = Prelude.init l + +{- Like last, but no error on an empty list. -} +end :: [a] -> [a] +end [] = [] +end l = [Prelude.last l] diff --git a/Utility/Path.hs b/Utility/Path.hs new file mode 100644 index 0000000000..38e7bd05ca --- /dev/null +++ b/Utility/Path.hs @@ -0,0 +1,136 @@ +{- path manipulation + - + - Copyright 2010-2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.Path where + +import Data.String.Utils +import System.Path +import System.FilePath +import System.Directory +import Data.List +import Data.Maybe +import Control.Applicative +import System.Posix.User + +import Utility.Monad + +{- Returns the parent directory of a path. Parent of / is "" -} +parentDir :: FilePath -> FilePath +parentDir dir + | not $ null dirs = slash ++ join s (init dirs) + | otherwise = "" + where + dirs = filter (not . null) $ split s dir + slash = if isAbsolute dir then s else "" + s = [pathSeparator] + +prop_parentDir_basics :: FilePath -> Bool +prop_parentDir_basics dir + | null dir = True + | dir == "/" = parentDir dir == "" + | otherwise = p /= dir + where + p = parentDir dir + +{- Checks if the first FilePath is, or could be said to contain the second. + - For example, "foo/" contains "foo/bar". Also, "foo", "./foo", "foo/" etc + - are all equivilant. + -} +dirContains :: FilePath -> FilePath -> Bool +dirContains a b = a == b || a' == b' || (a'++"/") `isPrefixOf` b' + where + norm p = fromMaybe "" $ absNormPath p "." + a' = norm a + b' = norm b + +{- Converts a filename into a normalized, absolute path. -} +absPath :: FilePath -> IO FilePath +absPath file = do + cwd <- getCurrentDirectory + return $ absPathFrom cwd file + +{- Converts a filename into a normalized, absolute path + - from the specified cwd. -} +absPathFrom :: FilePath -> FilePath -> FilePath +absPathFrom cwd file = fromMaybe bad $ absNormPath cwd file + where + bad = error $ "unable to normalize " ++ file + +{- Constructs a relative path from the CWD to a file. + - + - For example, assuming CWD is /tmp/foo/bar: + - relPathCwdToFile "/tmp/foo" == ".." + - relPathCwdToFile "/tmp/foo/bar" == "" + -} +relPathCwdToFile :: FilePath -> IO FilePath +relPathCwdToFile f = relPathDirToFile <$> getCurrentDirectory <*> absPath f + +{- Constructs a relative path from a directory to a file. + - + - Both must be absolute, and normalized (eg with absNormpath). + -} +relPathDirToFile :: FilePath -> FilePath -> FilePath +relPathDirToFile from to = join s $ dotdots ++ uncommon + where + s = [pathSeparator] + pfrom = split s from + pto = split s to + common = map fst $ filter same $ zip pfrom pto + same (c,d) = c == d + uncommon = drop numcommon pto + dotdots = replicate (length pfrom - numcommon) ".." + numcommon = length common + +prop_relPathDirToFile_basics :: FilePath -> FilePath -> Bool +prop_relPathDirToFile_basics from to + | from == to = null r + | otherwise = not (null r) + where + r = relPathDirToFile from to + +{- Given an original list of files, and an expanded list derived from it, + - ensures that the original list's ordering is preserved. + - + - The input list may contain a directory, like "dir" or "dir/". Any + - items in the expanded list that are contained in that directory will + - appear at the same position as it did in the input list. + -} +preserveOrder :: [FilePath] -> [FilePath] -> [FilePath] +preserveOrder [] new = new +preserveOrder [_] new = new -- optimisation +preserveOrder (l:ls) new = found ++ preserveOrder ls rest + where + (found, rest)=partition (l `dirContains`) new + +{- Runs an action that takes a list of FilePaths, and ensures that + - its return list preserves order. + - + - This assumes that it's cheaper to call preserveOrder on the result, + - than it would be to run the action separately with each param. In the case + - of git file list commands, that assumption tends to hold. + -} +runPreserveOrder :: ([FilePath] -> IO [FilePath]) -> [FilePath] -> IO [FilePath] +runPreserveOrder a files = preserveOrder files <$> a files + +{- Lists the contents of a directory. + - Unlike getDirectoryContents, paths are not relative to the directory. -} +dirContents :: FilePath -> IO [FilePath] +dirContents d = map (d ) . filter notcruft <$> getDirectoryContents d + where + notcruft "." = False + notcruft ".." = False + notcruft _ = True + +{- Current user's home directory. -} +myHomeDir :: IO FilePath +myHomeDir = homeDirectory <$> (getUserEntryForID =<< getEffectiveUserID) + +{- Checks if a command is available in PATH. -} +inPath :: String -> IO Bool +inPath command = getSearchPath >>= anyM indir + where + indir d = doesFileExist $ d command diff --git a/Utility/RsyncFile.hs b/Utility/RsyncFile.hs new file mode 100644 index 0000000000..a691d0a0e6 --- /dev/null +++ b/Utility/RsyncFile.hs @@ -0,0 +1,66 @@ +{- file copying with rsync + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.RsyncFile where + +import Data.String.Utils +import Data.List + +import Utility.SafeCommand + +{- Generates parameters to make rsync use a specified command as its remote + - shell. -} +rsyncShell :: [CommandParam] -> [CommandParam] +rsyncShell command = [Param "-e", Param $ unwords $ map escape (toCommand command)] + where + {- rsync requires some weird, non-shell like quoting in + - here. A doubled single quote inside the single quoted + - string is a single quote. -} + escape s = "'" ++ join "''" (split "'" s) ++ "'" + +{- Runs rsync in server mode to send a file, and exits. -} +rsyncServerSend :: FilePath -> IO () +rsyncServerSend file = rsyncExec $ + rsyncServerParams ++ [Param "--sender", File file] + +{- Runs rsync in server mode to receive a file. -} +rsyncServerReceive :: FilePath -> IO Bool +rsyncServerReceive file = rsync $ rsyncServerParams ++ [File file] + +rsyncServerParams :: [CommandParam] +rsyncServerParams = + [ Param "--server" + -- preserve permissions + , Param "-p" + -- preserve timestamps + , Param "-t" + -- allow resuming of transfers of big files + , Param "--inplace" + -- other options rsync normally uses in server mode + , Params "-e.Lsf ." + ] + +rsync :: [CommandParam] -> IO Bool +rsync = boolSystem "rsync" + +rsyncExec :: [CommandParam] -> IO () +rsyncExec params = executeFile "rsync" True (toCommand params) Nothing + +{- Checks if an rsync url involves the remote shell (ssh or rsh). + - Use of such urls with rsync or rsyncExec requires additional shell + - escaping. -} +rsyncUrlIsShell :: String -> Bool +rsyncUrlIsShell s + | "rsync://" `isPrefixOf` s = False + | otherwise = go s + where + -- host:dir is rsync protocol, while host:dir is ssh/rsh + go [] = False + go (c:cs) + | c == '/' = False -- got to directory with no colon + | c == ':' = not $ ":" `isPrefixOf` cs + | otherwise = go cs diff --git a/Utility/SafeCommand.hs b/Utility/SafeCommand.hs new file mode 100644 index 0000000000..aedf271373 --- /dev/null +++ b/Utility/SafeCommand.hs @@ -0,0 +1,114 @@ +{- safely running shell commands + - + - Copyright 2010-2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.SafeCommand where + +import System.Exit +import qualified System.Posix.Process +import System.Posix.Process hiding (executeFile) +import System.Posix.Signals +import Data.String.Utils +import System.Log.Logger +import Control.Applicative + +{- A type for parameters passed to a shell command. A command can + - be passed either some Params (multiple parameters can be included, + - whitespace-separated, or a single Param (for when parameters contain + - whitespace), or a File. + -} +data CommandParam = Params String | Param String | File FilePath + deriving (Eq, Show, Ord) + +{- Used to pass a list of CommandParams to a function that runs + - a command and expects Strings. -} +toCommand :: [CommandParam] -> [String] +toCommand = (>>= unwrap) + where + unwrap (Param s) = [s] + unwrap (Params s) = filter (not . null) (split " " s) + -- Files that start with a dash are modified to avoid + -- the command interpreting them as options. + unwrap (File s@('-':_)) = ["./" ++ s] + unwrap (File s) = [s] + +{- Run a system command, and returns True or False + - if it succeeded or failed. + -} +boolSystem :: FilePath -> [CommandParam] -> IO Bool +boolSystem command params = boolSystemEnv command params Nothing + +boolSystemEnv :: FilePath -> [CommandParam] -> Maybe [(String, String)] -> IO Bool +boolSystemEnv command params env = dispatch <$> safeSystemEnv command params env + where + dispatch ExitSuccess = True + dispatch _ = False + +{- Runs a system command, returning the exit status. -} +safeSystem :: FilePath -> [CommandParam] -> IO ExitCode +safeSystem command params = safeSystemEnv command params Nothing + +{- SIGINT(ctrl-c) is allowed to propigate and will terminate the program. -} +safeSystemEnv :: FilePath -> [CommandParam] -> Maybe [(String, String)] -> IO ExitCode +safeSystemEnv command params env = do + -- Going low-level because all the high-level system functions + -- block SIGINT etc. We need to block SIGCHLD, but allow + -- SIGINT to do its default program termination. + let sigset = addSignal sigCHLD emptySignalSet + oldint <- installHandler sigINT Default Nothing + oldset <- getSignalMask + blockSignals sigset + childpid <- forkProcess $ childaction oldint oldset + mps <- getProcessStatus True False childpid + restoresignals oldint oldset + case mps of + Just (Exited code) -> return code + _ -> error $ "unknown error running " ++ command + where + restoresignals oldint oldset = do + _ <- installHandler sigINT oldint Nothing + setSignalMask oldset + childaction oldint oldset = do + restoresignals oldint oldset + executeFile command True (toCommand params) env + +{- executeFile with debug logging -} +executeFile :: FilePath -> Bool -> [String] -> Maybe [(String, String)] -> IO () +executeFile c path p e = do + debugM "Utility.SafeCommand.executeFile" $ + "Running: " ++ c ++ " " ++ show p ++ " " ++ maybe "" show e + System.Posix.Process.executeFile c path p e + +{- Escapes a filename or other parameter to be safely able to be exposed to + - the shell. -} +shellEscape :: String -> String +shellEscape f = "'" ++ escaped ++ "'" + where + -- replace ' with '"'"' + escaped = join "'\"'\"'" $ split "'" f + +{- Unescapes a set of shellEscaped words or filenames. -} +shellUnEscape :: String -> [String] +shellUnEscape [] = [] +shellUnEscape s = word : shellUnEscape rest + where + (word, rest) = findword "" s + findword w [] = (w, "") + findword w (c:cs) + | c == ' ' = (w, cs) + | c == '\'' = inquote c w cs + | c == '"' = inquote c w cs + | otherwise = findword (w++[c]) cs + inquote _ w [] = (w, "") + inquote q w (c:cs) + | c == q = findword w cs + | otherwise = inquote q (w++[c]) cs + +{- For quickcheck. -} +prop_idempotent_shellEscape :: String -> Bool +prop_idempotent_shellEscape s = [s] == (shellUnEscape . shellEscape) s +prop_idempotent_shellEscape_multiword :: [String] -> Bool +prop_idempotent_shellEscape_multiword s = s == (shellUnEscape . unwords . map shellEscape) s diff --git a/Utility/StatFS.hsc b/Utility/StatFS.hsc new file mode 100644 index 0000000000..d3e4a507e5 --- /dev/null +++ b/Utility/StatFS.hsc @@ -0,0 +1,125 @@ +----------------------------------------------------------------------------- +-- | +-- +-- (This code originally comes from xmobar) +-- +-- Module : StatFS +-- Copyright : (c) Jose A Ortega Ruiz +-- License : BSD-3-clause +-- +-- All rights reserved. +-- +-- Redistribution and use in source and binary forms, with or without +-- modification, are permitted provided that the following conditions +-- are met: +-- +-- 1. Redistributions of source code must retain the above copyright +-- notice, this list of conditions and the following disclaimer. +-- 2. Redistributions in binary form must reproduce the above copyright +-- notice, this list of conditions and the following disclaimer in the +-- documentation and/or other materials provided with the distribution. +-- 3. Neither the name of the author nor the names of his contributors +-- may be used to endorse or promote products derived from this software +-- without specific prior written permission. +-- +-- THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +-- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +-- ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +-- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +-- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +-- OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +-- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +-- LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +-- OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +-- SUCH DAMAGE. +-- +-- Maintainer : Jose A Ortega Ruiz +-- Stability : unstable +-- Portability : unportable +-- +-- A binding to C's statvfs(2) +-- +----------------------------------------------------------------------------- + +{-# LANGUAGE CPP, ForeignFunctionInterface, EmptyDataDecls #-} + + +module Utility.StatFS ( FileSystemStats(..), getFileSystemStats ) where + +import Foreign +import Foreign.C.Types +import Foreign.C.String +import Data.ByteString (useAsCString) +import Data.ByteString.Char8 (pack) + +#if defined (__FreeBSD__) || defined (__FreeBSD_kernel__) || defined (__APPLE__) +# include +# include +#else +#if defined (__linux__) +#include +#else +#define UNKNOWN +#endif +#endif + +data FileSystemStats = FileSystemStats { + fsStatBlockSize :: Integer + -- ^ Optimal transfer block size. + , fsStatBlockCount :: Integer + -- ^ Total data blocks in file system. + , fsStatByteCount :: Integer + -- ^ Total bytes in file system. + , fsStatBytesFree :: Integer + -- ^ Free bytes in file system. + , fsStatBytesAvailable :: Integer + -- ^ Free bytes available to non-superusers. + , fsStatBytesUsed :: Integer + -- ^ Bytes used. + } deriving (Show, Eq) + +data CStatfs + +#ifdef UNKNOWN +#warning free space checking code not available for this OS +#else +#if defined(__APPLE__) +foreign import ccall unsafe "sys/mount.h statfs64" +#else +#if defined(__FreeBSD__) || defined (__FreeBSD_kernel__) +foreign import ccall unsafe "sys/mount.h statfs" +#else +foreign import ccall unsafe "sys/vfs.h statfs64" +#endif +#endif + c_statfs :: CString -> Ptr CStatfs -> IO CInt +#endif + +toI :: CULong -> Integer +toI = toInteger + +getFileSystemStats :: String -> IO (Maybe FileSystemStats) +getFileSystemStats path = +#ifdef UNKNOWN + return Nothing +#else + allocaBytes (#size struct statfs) $ \vfs -> + useAsCString (pack path) $ \cpath -> do + res <- c_statfs cpath vfs + if res == -1 then return Nothing + else do + bsize <- (#peek struct statfs, f_bsize) vfs + bcount <- (#peek struct statfs, f_blocks) vfs + bfree <- (#peek struct statfs, f_bfree) vfs + bavail <- (#peek struct statfs, f_bavail) vfs + let bpb = toI bsize + return $ Just FileSystemStats + { fsStatBlockSize = bpb + , fsStatBlockCount = toI bcount + , fsStatByteCount = toI bcount * bpb + , fsStatBytesFree = toI bfree * bpb + , fsStatBytesAvailable = toI bavail * bpb + , fsStatBytesUsed = toI (bcount - bfree) * bpb + } +#endif diff --git a/Utility/TempFile.hs b/Utility/TempFile.hs new file mode 100644 index 0000000000..3887b422b6 --- /dev/null +++ b/Utility/TempFile.hs @@ -0,0 +1,39 @@ +{- temp file functions + - + - Copyright 2010-2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.TempFile where + +import Control.Exception (bracket) +import System.IO +import System.Posix.Process hiding (executeFile) +import System.Directory + +import Utility.Misc +import Utility.Path + +{- Runs an action like writeFile, writing to a temp file first and + - then moving it into place. The temp file is stored in the same + - directory as the final file to avoid cross-device renames. -} +viaTmp :: (FilePath -> String -> IO ()) -> FilePath -> String -> IO () +viaTmp a file content = do + pid <- getProcessID + let tmpfile = file ++ ".tmp" ++ show pid + createDirectoryIfMissing True (parentDir file) + a tmpfile content + renameFile tmpfile file + +{- Runs an action with a temp file, then removes the file. -} +withTempFile :: String -> (FilePath -> Handle -> IO a) -> IO a +withTempFile template a = bracket create remove use + where + create = do + tmpdir <- catchDefaultIO getTemporaryDirectory "." + openTempFile tmpdir template + remove (name, handle) = do + hClose handle + catchBoolIO (removeFile name >> return True) + use (name, handle) = a name handle diff --git a/Utility/Touch.hsc b/Utility/Touch.hsc new file mode 100644 index 0000000000..fd3320cd1d --- /dev/null +++ b/Utility/Touch.hsc @@ -0,0 +1,119 @@ +{- More control over touching a file. + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE ForeignFunctionInterface #-} + +module Utility.Touch ( + TimeSpec(..), + touchBoth, + touch +) where + +import Foreign +import Foreign.C +import Control.Monad (when) + +newtype TimeSpec = TimeSpec CTime + +{- Changes the access and modification times of an existing file. + Can follow symlinks, or not. Throws IO error on failure. -} +touchBoth :: FilePath -> TimeSpec -> TimeSpec -> Bool -> IO () + +touch :: FilePath -> TimeSpec -> Bool -> IO () +touch file mtime = touchBoth file mtime mtime + +#include +#include +#include +#include + +#ifndef _BSD_SOURCE +#define _BSD_SOURCE +#endif + +#if (defined UTIME_OMIT && defined UTIME_NOW && defined AT_FDCWD && defined AT_SYMLINK_NOFOLLOW) + +at_fdcwd :: CInt +at_fdcwd = #const AT_FDCWD + +at_symlink_nofollow :: CInt +at_symlink_nofollow = #const AT_SYMLINK_NOFOLLOW + +instance Storable TimeSpec where + -- use the larger alignment of the two types in the struct + alignment _ = max sec_alignment nsec_alignment + where + sec_alignment = alignment (undefined::CTime) + nsec_alignment = alignment (undefined::CLong) + sizeOf _ = #{size struct timespec} + peek ptr = do + sec <- #{peek struct timespec, tv_sec} ptr + return $ TimeSpec sec + poke ptr (TimeSpec sec) = do + #{poke struct timespec, tv_sec} ptr sec + #{poke struct timespec, tv_nsec} ptr (0 :: CLong) + +{- While its interface is beastly, utimensat is in recent + POSIX standards, unlike lutimes. -} +foreign import ccall "utimensat" + c_utimensat :: CInt -> CString -> Ptr TimeSpec -> CInt -> IO CInt + +touchBoth file atime mtime follow = + allocaArray 2 $ \ptr -> + withCString file $ \f -> do + pokeArray ptr [atime, mtime] + r <- c_utimensat at_fdcwd f ptr flags + when (r /= 0) $ throwErrno "touchBoth" + where + flags = if follow + then 0 + else at_symlink_nofollow + +#else +#if 0 +{- Using lutimes is needed for BSD. + - + - TODO: test if lutimes is available. May have to do it in configure. + - TODO: TimeSpec uses a CTime, while tv_sec is a CLong. It is implementation + - dependent whether these are the same; need to find a cast that works. + - (Without the cast it works on linux i386, but + - maybe not elsewhere.) + -} + +instance Storable TimeSpec where + alignment _ = alignment (undefined::CLong) + sizeOf _ = #{size struct timeval} + peek ptr = do + sec <- #{peek struct timeval, tv_sec} ptr + return $ TimeSpec sec + poke ptr (TimeSpec sec) = do + #{poke struct timeval, tv_sec} ptr sec + #{poke struct timeval, tv_usec} ptr (0 :: CLong) + +foreign import ccall "utimes" + c_utimes :: CString -> Ptr TimeSpec -> IO CInt +foreign import ccall "lutimes" + c_lutimes :: CString -> Ptr TimeSpec -> IO CInt + +touchBoth file atime mtime follow = + allocaArray 2 $ \ptr -> + withCString file $ \f -> do + pokeArray ptr [atime, mtime] + r <- syscall f ptr + if (r /= 0) + then throwErrno "touchBoth" + else return () + where + syscall = if follow + then c_lutimes + else c_utimes + +#else +#warning "utimensat and lutimes not available; building without symlink timestamp preservation support" +touchBoth _ _ _ _ = return () +#endif +#endif diff --git a/Utility/Url.hs b/Utility/Url.hs new file mode 100644 index 0000000000..f215a1ebdb --- /dev/null +++ b/Utility/Url.hs @@ -0,0 +1,83 @@ +{- Url downloading. + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.Url ( + exists, + canDownload, + download, + get +) where + +import Control.Applicative +import qualified Network.Browser as Browser +import Network.HTTP +import Network.URI + +import Utility.SafeCommand +import Utility.Path + +type URLString = String + +{- Checks that an url exists and could be successfully downloaded. -} +exists :: URLString -> IO Bool +exists url = + case parseURI url of + Nothing -> return False + Just u -> do + r <- request u HEAD + case rspCode r of + (2,_,_) -> return True + _ -> return False + +canDownload :: IO Bool +canDownload = (||) <$> inPath "wget" <*> inPath "curl" + +{- Used to download large files, such as the contents of keys. + - + - Uses wget or curl program for its progress bar. (Wget has a better one, + - so is preferred.) Which program to use is determined at run time; it + - would not be appropriate to test at configure time and build support + - for only one in. + -} +download :: URLString -> FilePath -> IO Bool +download url file = do + e <- inPath "wget" + if e + then + boolSystem "wget" + [Params "-c -O", File file, File url] + else + -- Uses the -# progress display, because the normal + -- one is very confusing when resuming, showing + -- the remainder to download as the whole file, + -- and not indicating how much percent was + -- downloaded before the resume. + boolSystem "curl" + [Params "-L -C - -# -o", File file, File url] + +{- Downloads a small file. -} +get :: URLString -> IO String +get url = + case parseURI url of + Nothing -> error "url parse error" + Just u -> do + r <- request u GET + case rspCode r of + (2,_,_) -> return $ rspBody r + _ -> error $ rspReason r + +{- Makes a http request of an url. For example, HEAD can be used to + - check if the url exists, or GET used to get the url content (best for + - small urls). -} +request :: URI -> RequestMethod -> IO (Response String) +request url requesttype = Browser.browse $ do + Browser.setErrHandler ignore + Browser.setOutHandler ignore + Browser.setAllowRedirects True + snd <$> Browser.request (mkRequest requesttype url :: Request_String) + where + ignore = const $ return () diff --git a/configure.hs b/configure.hs new file mode 100644 index 0000000000..3b3626dd22 --- /dev/null +++ b/configure.hs @@ -0,0 +1,98 @@ +{- Checks system configuration and generates SysConfig.hs. -} + +import System.Directory +import Data.List +import System.Cmd.Utils + +import Build.TestConfig + +tests :: [TestCase] +tests = + [ TestCase "version" getVersion + , TestCase "git" $ requireCmd "git" "git --version >/dev/null" + , TestCase "git version" getGitVersion + , testCp "cp_a" "-a" + , testCp "cp_p" "-p" + , testCp "cp_reflink_auto" "--reflink=auto" + , TestCase "uuid generator" $ selectCmd "uuid" ["uuid", "uuidgen"] "" + , TestCase "xargs -0" $ requireCmd "xargs_0" "xargs -0 /dev/null" + , TestCase "curl" $ testCmd "curl" "curl --version >/dev/null" + , TestCase "wget" $ testCmd "wget" "wget --version >/dev/null" + , TestCase "bup" $ testCmd "bup" "bup --version >/dev/null" + , TestCase "gpg" $ testCmd "gpg" "gpg --version >/dev/null" + ] ++ shaTestCases [1, 256, 512, 224, 384] + +shaTestCases :: [Int] -> [TestCase] +shaTestCases l = map make l + where make n = + let + cmds = map (\x -> "sha" ++ show n ++ x) ["", "sum"] + key = "sha" ++ show n + in TestCase key $ maybeSelectCmd key cmds " String -> TestCase +testCp k option = TestCase cmd $ testCmd k run + where + cmd = "cp " ++ option + run = cmd ++ " " ++ testFile ++ " " ++ testFile ++ ".new" + +{- Pulls package version out of the changelog. -} +getVersion :: Test +getVersion = do + version <- getVersionString + return $ Config "packageversion" (StringConfig version) + +getVersionString :: IO String +getVersionString = do + changelog <- readFile "CHANGELOG" + let verline = head $ lines changelog + return $ middle (words verline !! 1) + where + middle = drop 1 . init + +getGitVersion :: Test +getGitVersion = do + (_, s) <- pipeFrom "git" ["--version"] + let version = last $ words $ head $ lines s + return $ Config "gitversion" (StringConfig version) + +{- Set up cabal file with version. -} +cabalSetup :: IO () +cabalSetup = do + version <- getVersionString + cabal <- readFile cabalfile + writeFile tmpcabalfile $ unlines $ + map (setfield "Version" version) $ + lines cabal + renameFile tmpcabalfile cabalfile + where + cabalfile = "git-annex.cabal" + tmpcabalfile = cabalfile++".tmp" + setfield field value s + | fullfield `isPrefixOf` s = fullfield ++ value + | otherwise = s + where + fullfield = field ++ ": " + +setup :: IO () +setup = do + createDirectoryIfMissing True tmpDir + writeFile testFile "test file contents" + +cleanup :: IO () +cleanup = removeDirectoryRecursive tmpDir + +main :: IO () +main = do + setup + config <- runTests tests + writeSysConfig config + cleanup + cabalSetup diff --git a/debian/NEWS b/debian/NEWS new file mode 100644 index 0000000000..f807d05255 --- /dev/null +++ b/debian/NEWS @@ -0,0 +1,31 @@ +git-annex (3.20110702) unstable; urgency=low + + The URL backend has been removed. Instead the new web remote can be used. + + -- Joey Hess Fri, 01 Jul 2011 15:40:51 -0400 + +git-annex (3.20110624) experimental; urgency=low + + There has been another change to the git-annex data store. + Use `git annex upgrade` to migrate your repositories to the new + layout. See or + /usr/share/doc/git-annex/html/upgrades.html + + The significant change this time is that the .git-annex/ directory + is gone; instead there is a git-annex branch that is automatically + maintained by git-annex, and encapsulates all its state nicely out + of your way. + + You should make sure you include the git-annex branch when + git pushing and pulling. + + -- Joey Hess Tue, 21 Jun 2011 20:18:00 -0400 + +git-annex (0.20110316) experimental; urgency=low + + This version reorganises the layout of git-annex's files in your repository. + There is an upgrade process to convert a repository from the old git-annex + to this version. See or + /usr/share/doc/git-annex/html/upgrades.html + + -- Joey Hess Wed, 16 Mar 2011 15:49:15 -0400 diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 0000000000..33d196fa32 --- /dev/null +++ b/debian/changelog @@ -0,0 +1,877 @@ +git-annex (3.20111212) UNRELEASED; urgency=low + + * Union merge now finds the least expensive way to represent the merge. + * reinject: Add a sanity check for using an annexed file as the source file. + * Properly handle multiline git config values. + * Fix the hook special remote, which bitrotted a while ago. + * map: --fast disables use of dot to display map + * Test suite improvements. Current top-level test coverage: 75% + * Improve deletion of files from rsync special remotes. Closes: #652849 + * Add --include, which is the same as --not --exclude. + * Can now be built with older git versions (before 1.7.7); the resulting + binary should only be used with old git. + * Format strings can be specified using the new --format option, to control + what is output by git annex find. + * Support git annex find --json + + -- Joey Hess Mon, 12 Dec 2011 01:57:49 -0400 + +git-annex (3.20111211) unstable; urgency=medium + + * Fix bug in last version in getting contents from bare repositories. + * Ensure that git-annex branch changes are merged into git-annex's index, + which fixes a bug that could cause changes that were pushed to the + git-annex branch to get reverted. As a side effect, it's now safe + for users to check out and commit changes directly to the git-annex + branch. + * map: Fix a failure to detect a loop when both repositories are local + and refer to each other with relative paths. + * Prevent key names from containing newlines. + * add: If interrupted, add can leave files converted to symlinks but not + yet added to git. Running the add again will now clean up this situtation. + * Fix caching of decrypted ciphers, which failed when drop had to check + multiple different encrypted special remotes. + * unannex: Can be run on files that have been added to the annex, but not + yet committed. + * sync: New command that synchronises the local repository and default + remote, by running git commit, pull, and push for you. + * Version monad-control dependency in cabal file. + + -- Joey Hess Sun, 11 Dec 2011 21:24:39 -0400 + +git-annex (3.20111203) unstable; urgency=low + + * The VFAT filesystem on recent versions of Linux, when mounted with + shortname=mixed, does not get along well with git-annex's mixed case + .git/annex/objects hash directories. To avoid this problem, new content + is now stored in all-lowercase hash directories. Except for non-bare + repositories which would be a pain to transition and cannot be put on FAT. + (Old mixed-case hash directories are still tried for backwards + compatibility.) + * Flush json output, avoiding a buffering problem that could result in + doubled output. + * Avoid needing haskell98 and other fixes for new ghc. Thanks, Mark Wright. + * Bugfix: dropunused did not drop keys with two spaces in their name. + * Support for storing .git/annex on a different device than the rest of the + git repository. + * --inbackend can be used to make git-annex only operate on files + whose content is stored using a specified key-value backend. + * dead: A command which says that a repository is gone for good + and you don't want git-annex to mention it again. + + -- Joey Hess Sat, 03 Dec 2011 21:01:45 -0400 + +git-annex (3.20111122) unstable; urgency=low + + * merge: Improve commit messages to mention what was merged. + * Avoid doing auto-merging in commands that don't need fully current + information from the git-annex branch. In particular, git annex add + no longer needs to auto-merge. + * init: When run in an already initalized repository, and without + a description specified, don't delete the old description. + * Optimised union merging; now only runs git cat-file once, and runs + in constant space. + * status: Now displays trusted, untrusted, and semitrusted repositories + separately. + * status: Include all special remotes in the list of repositories. + * status: Fix --json mode. + * status: --fast is back + * Fix support for insteadOf url remapping. Closes: #644278 + * When not run in a git repository, git-annex can still display a usage + message, and "git annex version" even works. + * migrate: Don't fall over a stale temp file. + * Avoid excessive escaping for rsync special remotes that are not accessed + over ssh. + * find: Support --print0 + + -- Joey Hess Tue, 22 Nov 2011 14:31:45 -0400 + +git-annex (3.20111111) unstable; urgency=low + + * Handle a case where an annexed file is moved into a gitignored directory, + by having fix --force add its change. + * Avoid cyclic drop problems. + * Optimized copy --from and get --from to avoid checking the location log + for files that are already present. + * Automatically fix up badly formatted uuid.log entries produced by + 3.20111105, whenever the uuid.log is changed (ie, by init or describe). + * map: Support remotes with /~/ and /~user/ + + -- Joey Hess Fri, 11 Nov 2011 13:44:18 -0400 + +git-annex (3.20111107) unstable; urgency=low + + * merge: Use fast-forward merges when possible. + Thanks Valentin Haenel for a test case showing how non-fast-forward + merges could result in an ongoing pull/merge/push cycle. + * Don't try to read config from repos with annex-ignore set. + * Bugfix: In the past two releases, git-annex init has written the uuid.log + in the wrong format, with the UUID and description flipped. + + -- Joey Hess Mon, 07 Nov 2011 12:47:44 -0400 + +git-annex (3.20111105) unstable; urgency=low + + * The default backend used when adding files to the annex is changed + from WORM to SHA256. + To get old behavior, add a .gitattributes containing: * annex.backend=WORM + * Sped up some operations on remotes that are on the same host. + * copy --to: Fixed leak when copying many files to a remote on the same + host. + * uninit: Add guard against being run with the git-annex branch checked out. + * Fail if --from or --to is passed to commands that do not support them. + * drop --from is now supported to remove file content from a remote. + * status: Now always shows the current repository, even when it does not + appear in uuid.log. + * fsck: Now works in bare repositories. Checks location log information, + and file contents. Does not check that numcopies is satisfied, as + .gitattributes information about numcopies is not available in a bare + repository. + * unused, dropunused: Now work in bare repositories. + * Removed the setkey command, and added a reinject command with a more + useful interface. + * The fromkey command now takes the key as its first parameter. The --key + option is no longer used. + * Built without any filename containing .git being excluded. Closes: #647215 + * Record uuid when auto-initializing a remote so it shows in status. + * Bugfix: Fixed git-annex init crash in a bare repository when there was + already an existing git-annex branch. + * Pass -t to rsync to preserve timestamps. + + -- Joey Hess Sat, 05 Nov 2011 15:47:52 -0400 + +git-annex (3.20111025) unstable; urgency=low + + * A remote can have a annexUrl configured, that is used by git-annex + instead of its usual url. (Similar to pushUrl.) + * migrate: Copy url logs for keys when migrating. + * git-annex-shell: GIT_ANNEX_SHELL_READONLY and GIT_ANNEX_SHELL_LIMITED + environment variables can be set to limit what commands can be run. + This is used by gitolite's new git-annex support! + + -- Joey Hess Tue, 25 Oct 2011 13:03:08 -0700 + +git-annex (3.20111011) unstable; urgency=low + + * This version of git-annex only works with git 1.7.7 and newer. + The breakage with old versions is subtle, and affects the + annex.numcopies settings in .gitattributes, so be sure to upgrade git + to 1.7.7. (Debian package now depends on that version.) + * Don't pass absolute paths to git show-attr, as it started following + symlinks when that's done in 1.7.7. Instead, use relative paths, + which show-attr only handles 100% correctly in 1.7.7. Closes: #645046 + * Fix referring to remotes by uuid. + * New or changed repository descriptions in uuid.log now have a timestamp, + which is used to ensure the newest description is used when the uuid.log + has been merged. + * Note that older versions of git-annex will display the timestamp as part + of the repository description, which is ugly but otherwise harmless. + * Add timestamps to trust.log and remote.log too. + * git-annex-shell: Added the --uuid option. + * git-annex now asks git-annex-shell to verify that it's operating in + the expected repository. + * Note that this git-annex will not interoperate with remotes using + older versions of git-annex-shell. + * Now supports git's insteadOf configuration, to modify the url + used to access a remote. Note that pushInsteadOf is not used; + that and pushurl are reserved for actual git pushes. Closes: #644278 + * status: List all known repositories. + * When displaying a list of repositories, show git remote names + in addition to their descriptions. + * Add locking to avoid races when changing the git-annex branch. + * Various speed improvements gained by using ByteStrings. + * Contain the zombie hordes. + + -- Joey Hess Tue, 11 Oct 2011 23:00:02 -0400 + +git-annex (3.20110928) unstable; urgency=low + + * --in can be used to make git-annex only operate on files + believed to be present in a given repository. + * Arbitrarily complex expressions can be built to limit the files git-annex + operates on, by combining the options --not --and --or -( and -) + Example: git annex get --exclude '*.mp3' --and --not -( --in usbdrive --or --in archive -) + * --copies=N can be used to make git-annex only operate on files with + the specified number of copies. (And --not --copies=N for the inverse.) + * find: Rather than only showing files whose contents are present, + when used with --exclude --copies or --in, displays all files that + match the specified conditions. + * Note that this is a behavior change for git-annex find! Old behavior + can be gotten by using: git-annex find --in . + * status: Massively sped up; remove --fast mode. + * unused: File contents used by branches and tags are no longer + considered unused, even when not used by the current branch. This is + the final piece of the puzzle needed for git-annex to to play nicely + with branches. + + -- Joey Hess Wed, 28 Sep 2011 18:14:02 -0400 + +git-annex (3.20110915) unstable; urgency=low + + * whereis: Show untrusted locations separately and do not include in + location count. + * Fix build without S3. + * addurl: Always use whole url as destination filename, rather than + only its file component. + * get, drop, copy: Added --auto option, which decides whether + to get/drop content as needed to work toward the configured numcopies. + * bugfix: drop and fsck did not honor --exclude + + -- Joey Hess Thu, 15 Sep 2011 22:25:46 -0400 + +git-annex (3.20110906) unstable; urgency=low + + * Improve display of newlines around error and warning messages. + * Fix Makefile to work with cabal again. + + -- Joey Hess Tue, 06 Sep 2011 13:45:16 -0400 + +git-annex (3.20110902) unstable; urgency=low + + * Set EMAIL when running test suite so that git does not need to be + configured first. Closes: #638998 + * The wget command will now be used in preference to curl, if available. + * init: Make description an optional parameter. + * unused, status: Sped up by avoiding unnecessary stats of annexed files. + * unused --remote: Reduced memory use to 1/4th what was used before. + * Add --json switch, to produce machine-consumable output. + + -- Joey Hess Fri, 02 Sep 2011 21:20:37 -0400 + +git-annex (3.20110819) unstable; urgency=low + + * Now "git annex init" only has to be run once, when a git repository + is first being created. Clones will automatically notice that git-annex + is in use and automatically perform a basic initalization. It's + still recommended to run "git annex init" in any clones, to describe them. + * Added annex-cost-command configuration, which can be used to vary the + cost of a remote based on the output of a shell command. + * Fix broken upgrade from V1 repository. Closes: #638584 + + -- Joey Hess Fri, 19 Aug 2011 20:34:09 -0400 + +git-annex (3.20110817) unstable; urgency=low + + * Fix shell escaping in rsync special remote. + * addurl: --fast can be used to avoid immediately downloading the url. + * Added support for getting content from git remotes using http (and https). + * Added curl to Debian package dependencies. + + -- Joey Hess Wed, 17 Aug 2011 01:29:02 -0400 + +git-annex (3.20110719) unstable; urgency=low + + * add: Be even more robust to avoid ever leaving the file seemingly deleted. + Closes: #634233 + * Bugfix: Make add ../ work. + * Support the standard git -c name=value + * unannex: Clean up use of git commit -a. + + -- Joey Hess Tue, 19 Jul 2011 23:39:53 -0400 + +git-annex (3.20110707) unstable; urgency=low + + * Fix sign bug in disk free space checking. + * Bugfix: Forgot to de-escape keys when upgrading. Could result in + bad location log data for keys that contain [&:%] in their names. + (A workaround for this problem is to run git annex fsck.) + * add: Avoid a failure mode that resulted in the file seemingly being + deleted (content put in the annex but no symlink present). + + -- Joey Hess Thu, 07 Jul 2011 19:29:39 -0400 + +git-annex (3.20110705) unstable; urgency=low + + * uninit: Delete the git-annex branch and .git/annex/ + * unannex: In --fast mode, file content is left in the annex, and a + hard link made to it. + * uninit: Use unannex in --fast mode, to support unannexing multiple + files that link to the same content. + * Drop the dependency on the haskell curl bindings, use regular haskell HTTP. + * Fix a pipeline stall when upgrading (caused by #624389). + + -- Joey Hess Tue, 05 Jul 2011 14:37:39 -0400 + +git-annex (3.20110702) unstable; urgency=low + + * Now the web can be used as a special remote. + This feature replaces the old URL backend. + * addurl: New command to download an url and store it in the annex. + * Sped back up fsck, copy --from, and other commands that often + have to read a lot of information from the git-annex branch. Such + commands are now faster than they were before introduction of the + git-annex branch. + * Always ensure git-annex branch exists. + * Modify location log parser to allow future expansion. + * --force will cause add, etc, to operate on ignored files. + * Avoid mangling encoding when storing the description of repository + and other content. + * cabal can now be used to build git-annex. This is substantially + slower than using make, does not build or install documentation, + does not run the test suite, and is not particularly recommended, + but could be useful to some. + + -- Joey Hess Sat, 02 Jul 2011 15:00:18 -0400 + +git-annex (3.20110624) experimental; urgency=low + + * New repository format, annex.version=3. Use `git annex upgrade` to migrate. + * git-annex now stores its logs in a git-annex branch. + * merge: New subcommand. Auto-merges the new git-annex branch. + * Improved handling of bare git repos with annexes. Many more commands will + work in them. + * git-annex is now more robust; it will never leave state files + uncommitted when some other git process comes along and locks the index + at an inconvenient time. + * rsync is now used when copying files from repos on other filesystems. + cp is still used when copying file from repos on the same filesystem, + since --reflink=auto can make it significantly faster on filesystems + such as btrfs. + * Allow --trust etc to specify a repository by name, for temporarily + trusting repositories that are not configured remotes. + * unlock: Made atomic. + * git-union-merge: New git subcommand, that does a generic union merge + operation, and operates efficiently without touching the working tree. + + -- Joey Hess Fri, 24 Jun 2011 14:32:18 -0400 + +git-annex (0.20110610) unstable; urgency=low + + * Add --numcopies option. + * Add --trust, --untrust, and --semitrust options. + * get --from is the same as copy --from + * Bugfix: Fix fsck to not think all SHAnE keys are bad. + + -- Joey Hess Fri, 10 Jun 2011 11:48:40 -0400 + +git-annex (0.20110601) unstable; urgency=low + + * Minor bugfixes and error message improvements. + * Massively sped up `git annex lock` by avoiding use of the uber-slow + `git reset`, and only running `git checkout` once, even when many files + are being locked. + * Fix locking of files with staged changes. + * Somewhat sped up `git commit` of modifications to unlocked files. + * Build fix for older ghc. + + -- Joey Hess Wed, 01 Jun 2011 11:50:47 -0400 + +git-annex (0.20110522) unstable; urgency=low + + * Closer emulation of git's behavior when told to use "foo/.git" as a + git repository instead of just "foo". Closes: #627563 + * Fix bug in --exclude introduced in 0.20110516. + + -- Joey Hess Fri, 27 May 2011 20:20:41 -0400 + +git-annex (0.20110521) unstable; urgency=low + + * status: New subcommand to show info about an annex, including its size. + * --backend now overrides any backend configured in .gitattributes files. + * Add --debug option. Closes: #627499 + + -- Joey Hess Sat, 21 May 2011 11:52:53 -0400 + +git-annex (0.20110516) unstable; urgency=low + + * Add a few tweaks to make it easy to use the Internet Archive's variant + of S3. In particular, munge key filenames to comply with the IA's filename + limits, disable encryption, support their nonstandard way of creating + buckets, and allow x-archive-* headers to be specified in initremote to + set item metadata. + * Added filename extension preserving variant backends SHA1E, SHA256E, etc. + * migrate: Use current filename when generating new key, for backends + where the filename affects the key name. + * Work around a bug in Network.URI's handling of bracketed ipv6 addresses. + + -- Joey Hess Mon, 16 May 2011 14:16:52 -0400 + +git-annex (0.20110503) unstable; urgency=low + + * Fix hasKeyCheap setting for bup and rsync special remotes. + * Add hook special remotes. + * Avoid crashing when an existing key is readded to the annex. + * unused: Now also lists files fsck places in .git/annex/bad/ + * S3: When encryption is enabled, the Amazon S3 login credentials + are stored, encrypted, in .git-annex/remotes.log, so environment + variables need not be set after the remote is initialized. + + -- Joey Hess Tue, 03 May 2011 20:56:01 -0400 + +git-annex (0.20110427) unstable; urgency=low + + * Switch back to haskell SHA library, so git-annex remains buildable on + Debian stable. + * Added rsync special remotes. This could be used, for example, to + store annexed content on rsync.net (encrypted naturally). Or anywhere else. + * Bugfix: Avoid pipeline stall when running git annex drop or fsck on a + lot of files. Possibly only occured with ghc 7. + + -- Joey Hess Wed, 27 Apr 2011 22:50:26 -0400 + +git-annex (0.20110425) unstable; urgency=low + + * Use haskell Crypto library instead of haskell SHA library. + * Remove testpack from build depends for non x86 architectures where it + is not available. The test suite will not be run if it cannot be compiled. + * Avoid using absolute paths when staging location log, as that can + confuse git when a remote's path contains a symlink. Closes: #621386 + + -- Joey Hess Mon, 25 Apr 2011 15:47:00 -0400 + +git-annex (0.20110420) unstable; urgency=low + + * Update Debian build dependencies for ghc 7. + * Debian package is now built with S3 support. + Thanks Joachim Breitner for making this possible. + * Somewhat improved memory usage of S3, still work to do. + Thanks Greg Heartsfield for ongoing work to improve the hS3 library + for git-annex. + + -- Joey Hess Thu, 21 Apr 2011 15:00:48 -0400 + +git-annex (0.20110419) unstable; urgency=low + + * Don't run gpg in batch mode, so it can prompt for passphrase when + there is no agent. + * Add missing build dep on dataenc. + * S3: Fix stalls when transferring encrypted data. + * bup: Avoid memory leak when transferring encrypted data. + + -- Joey Hess Tue, 19 Apr 2011 21:26:51 -0400 + +git-annex (0.20110417) unstable; urgency=low + + * bup is now supported as a special type of remote. + * The data sent to special remotes (Amazon S3, bup, etc) can be encrypted + using GPG for privacy. + * Use lowercase hash directories for locationlog files, to avoid + some issues with git on OSX with the mixed-case directories. + No migration is needed; the old mixed case hash directories are still + read; new information is written to the new directories. + * Unused files on remotes, particulary special remotes, can now be + identified and dropped, by using "--from remote" with git annex unused + and git annex dropunused. + * Clear up short option confusion between --from and --force (-f is now + --from, and there is no short option for --force). + * Add build depend on perlmagick so docs are consistently built. + Closes: #621410 + * Add doc-base file. Closes: #621408 + * Periodically flush git command queue, to avoid boating memory usage + too much. + * Support "sha1" and "sha512" commands on FreeBSD, and allow building + if any/all SHA commands are not available. Thanks, Fraser Tweedale + + -- Joey Hess Sun, 17 Apr 2011 12:00:24 -0400 + +git-annex (0.20110401) experimental; urgency=low + + * Amazon S3 is now supported as a special type of remote. + Warning: Encrypting data before sending it to S3 is not yet supported. + * Note that Amazon S3 support is not built in by default on Debian yet, + as hS3 is not packaged. + * fsck: Ensure that files and directories in .git/annex/objects + have proper permissions. + * Added a special type of remote called a directory remote, which + simply stores files in an arbitrary local directory. + * Bugfix: copy --to --fast never really copied, fixed. + + -- Joey Hess Fri, 01 Apr 2011 21:27:22 -0400 + +git-annex (0.20110328) experimental; urgency=low + + * annex.diskreserve can be given in arbitrary units (ie "0.5 gigabytes") + * Generalized remotes handling, laying groundwork for remotes that are + not regular git remotes. (Think Amazon S3.) + * Provide a less expensive version of `git annex copy --to`, enabled + via --fast. This assumes that location tracking information is correct, + rather than contacting the remote for every file. + * Bugfix: Keys could be received into v1 annexes from v2 annexes, via + v1 git-annex-shell. This results in some oddly named keys in the v1 + annex. Recognise and fix those keys when upgrading, instead of crashing. + + -- Joey Hess Mon, 28 Mar 2011 10:47:29 -0400 + +git-annex (0.20110325) experimental; urgency=low + + * Free space checking is now done, for transfers of data for keys + that have free space metadata. (Notably, not for SHA* keys generated + with git-annex 0.2x or earlier.) The code is believed to work on + Linux, FreeBSD, and OSX; check compile-time messages to see if it + is not enabled for your OS. + * Add annex.diskreserve config setting, to control how much free space + to reserve for other purposes and avoid using (defaults to 1 mb). + * Add --fast flag, that can enable less expensive, but also less thorough + versions of some commands. + * fsck: In fast mode, avoid checking checksums. + * unused: In fast mode, just show all existing temp files as unused, + and avoid expensive scan for other unused content. + * migrate: Support migrating v1 SHA keys to v2 SHA keys with + size information that can be used for free space checking. + * Fix space leak in fsck and drop commands. + * migrate: Bugfix for case when migrating a file results in a key that + is already present in .git/annex/objects. + * dropunused: Significantly sped up; only read unused log file once. + + -- Joey Hess Fri, 25 Mar 2011 00:47:37 -0400 + +git-annex (0.20110320) experimental; urgency=low + + * Fix dropping of files using the URL backend. + * Fix support for remotes with '.' in their names. + * Add version command to show git-annex version as well as repository + version information. + * No longer auto-upgrade to repository format 2, to avoid accidental + upgrades, etc. Use git-annex upgrade when you're ready to run this + version. + + -- Joey Hess Sun, 20 Mar 2011 16:36:33 -0400 + +git-annex (0.20110316) experimental; urgency=low + + * New repository format, annex.version=2. + * The first time git-annex is run in an old format repository, it + will automatically upgrade it to the new format, staging all + necessary changes to git. Also added a "git annex upgrade" command. + * Colons are now avoided in filenames, so bare clones of git repos + can be put on USB thumb drives formatted with vFAT or similar + filesystems. + * Added two levels of hashing to object directory and .git-annex logs, + to improve scalability with enormous numbers of annexed + objects. (With one hundred million annexed objects, each + directory would contain fewer than 1024 files.) + * The setkey, fromkey, and dropkey subcommands have changed how + the key is specified. --backend is no longer used with these. + + -- Joey Hess Wed, 16 Mar 2011 16:20:23 -0400 + +git-annex (0.24) unstable; urgency=low + + Branched the 0.24 series, which will be maintained for a while to + support v1 git-annex repos, while main development moves to the 0.2011 + series, with v2 git-annex repos. + + * Add Suggests on graphviz. Closes: #618039 + * When adding files to the annex, the symlinks pointing at the annexed + content are made to have the same mtime as the original file. + While git does not preserve that information, this allows a tool + like metastore to be used with annexed files. + (Currently this is only done on systems supporting POSIX 200809.) + + -- Joey Hess Wed, 16 Mar 2011 18:35:13 -0400 + +git-annex (0.23) unstable; urgency=low + + * Support ssh remotes with a port specified. + * whereis: New subcommand to show where a file's content has gotten to. + * Rethink filename encoding handling for display. Since filename encoding + may or may not match locale settings, any attempt to decode filenames + will fail for some files. So instead, do all output in binary mode. + + -- Joey Hess Sat, 12 Mar 2011 15:02:49 -0400 + +git-annex (0.22) unstable; urgency=low + + * Git annexes can now be attached to bare git repositories. + (Both the local and remote host must have this version of git-annex + installed for it to work.) + * Support filenames that start with a dash; when such a file is passed + to a utility it will be escaped to avoid it being interpreted as an + option. (I went a little overboard and got the type checker involved + in this, so such files are rather comprehensively supported now.) + * New backends: SHA512 SHA384 SHA256 SHA224 + (Supported on systems where corresponding shaNsum commands are available.) + * describe: New subcommand that can set or change the description of + a repository. + * Fix test suite to reap zombies. + (Zombies can be particularly annoying on OSX; thanks to Jimmy Tang + for his help eliminating the infestation... for now.) + * Make test suite not rely on a working cp -pr. + (The Unix wars are still ON!) + * Look for dir.git directories the same as git does. + * Support remote urls specified as relative paths. + * Support non-ssh remote paths that contain tilde expansions. + * fsck: Check for and repair location log damage. + * Bugfix: When fsck detected and moved away corrupt file content, it did + not update the location log. + + -- Joey Hess Fri, 04 Mar 2011 15:10:57 -0400 + +git-annex (0.21) unstable; urgency=low + + * test: Don't rely on chmod -R working. + * unannex: Fix recently introduced bug when attempting to unannex more + than one file at a time. + * test: Set git user name and email in case git can't guess values. + * Fix display of unicode filenames. + + -- Joey Hess Fri, 11 Feb 2011 23:21:08 -0400 + +git-annex (0.20) unstable; urgency=low + + * Preserve specified file ordering when instructed to act on multiple + files or directories. For example, "git annex get a b" will now always + get "a" before "b". Previously it could operate in either order. + * unannex: Commit staged changes at end, to avoid some confusing behavior + with the pre-commit hook, which would see some types of commits after + an unannex as checking in of an unlocked file. + * map: New subcommand that uses graphviz to display a nice map of + the git repository network. + * Deal with the mtl/monads-fd conflict. + * configure: Check for sha1sum. + + -- Joey Hess Tue, 08 Feb 2011 18:57:24 -0400 + +git-annex (0.19) unstable; urgency=low + + * configure: Support using the uuidgen command if the uuid command is + not available. + * Allow --exclude to be specified more than once. + * There are now three levels of repository trust. + * untrust: Now marks the current repository as untrusted. + * semitrust: Now restores the default trust level. (What untrust used to do.) + * fsck, drop: Take untrusted repositories into account. + * Bugfix: Files were copied from trusted remotes first even if their + annex.cost was higher than other remotes. + * Improved temp file handling. Transfers of content can now be resumed + from temp files later; the resume does not have to be the immediate + next git-annex run. + * unused: Include partially transferred content in the list. + * Bugfix: Running a second git-annex while a first has a transfer in + progress no longer deletes the first processes's temp file. + + -- Joey Hess Fri, 28 Jan 2011 14:31:37 -0400 + +git-annex (0.18) unstable; urgency=low + + * Bugfix: `copy --to` and `move --to` forgot to stage location log changes + after transferring the file to the remote repository. + (Did not affect ssh remotes.) + * fsck: Fix bug in moving of corrupted files to .git/annex/bad/ + * migrate: Fix support for --backend option. + * unlock: Fix behavior when file content is not present. + * Test suite improvements. Current top-level test coverage: 80% + + -- Joey Hess Fri, 14 Jan 2011 14:17:44 -0400 + +git-annex (0.17) unstable; urgency=low + + * unannex: Now skips files whose content is not present, rather than + it being an error. + * New migrate subcommand can be used to switch files to using a different + backend, safely and with no duplication of content. + * bugfix: Fix crash caused by empty key name. (Thanks Henrik for reporting.) + + -- Joey Hess Sun, 09 Jan 2011 10:04:11 -0400 + +git-annex (0.16) unstable; urgency=low + + * git-annex-shell: Avoid exposing any git repo config except for the + annex.uuid when doing configlist. + * bugfix: Running `move --to` with a remote whose UUID was not yet known + could result in git-annex not recording on the local side where the + file was moved to. This could not result in data loss, or even a + significant problem, since the remote *did* record that it had the file. + * Also, add a general guard to detect attempts to record information + about repositories with missing UUIDs. + * bugfix: Running `move --to` with a non-ssh remote failed. + * bugfix: Running `copy --to` with a non-ssh remote actually did a move. + * Many test suite improvements. Current top-level test coverage: 65% + + -- Joey Hess Fri, 07 Jan 2011 14:33:13 -0400 + +git-annex (0.15) unstable; urgency=low + + * Support scp-style urls for remotes (host:path). + * Support ssh urls containing "~". + * Add trust and untrust subcommands, to allow configuring repositories + that are trusted to retain files without explicit checking. + * Fix bug in numcopies handling when multiple remotes pointed to the + same repository. + * Introduce the git-annex-shell command. It's now possible to make + a user have it as a restricted login shell, similar to git-shell. + * Note that git-annex will always use git-annex-shell when accessing + a ssh remote, so all of your remotes need to be upgraded to this + version of git-annex at the same time. + * Now rsync is exclusively used for copying files to and from remotes. + scp is not longer supported. + + -- Joey Hess Fri, 31 Dec 2010 22:00:52 -0400 + +git-annex (0.14) unstable; urgency=low + + * Bugfix to git annex unused in a repository with nothing yet annexed. + * Support upgrading from a v0 annex with nothing in it. + * Avoid multiple calls to git ls-files when passed eg, "*". + + -- Joey Hess Fri, 24 Dec 2010 17:38:48 -0400 + +git-annex (0.13) unstable; urgency=low + + * Makefile: Install man page and html (when built). + * Makefile: Add GHCFLAGS variable. + * Fix upgrade from 0.03. + * Support remotes using git+ssh and ssh+git as protocol. + Closes: #607056 + + -- Joey Hess Tue, 14 Dec 2010 13:05:10 -0400 + +git-annex (0.12) unstable; urgency=low + + * Add --exclude option to exclude files from processing. + * mwdn2man: Fix a bug in newline supression. Closes: #606578 + * Bugfix to git annex add of an unlocked file in a subdir. Closes: #606579 + * Makefile: Add PREFIX variable. + + -- Joey Hess Sat, 11 Dec 2010 17:32:00 -0400 + +git-annex (0.11) unstable; urgency=low + + * If available, rsync will be used for file transfers from remote + repositories. This allows resuming interrupted transfers. + * Added remote.annex-rsync-options. + * Avoid deleting temp files when rsync fails. + * Improve detection of version 0 repos. + * Add uninit subcommand. Closes: #605749 + + -- Joey Hess Sat, 04 Dec 2010 17:27:42 -0400 + +git-annex (0.10) unstable; urgency=low + + * In .gitattributes, the annex.numcopies attribute can be used + to control the number of copies to retain of different types of files. + * Bugfix: Always correctly handle gitattributes when in a subdirectory of + the repository. (Had worked ok for ones like "*.mp3", but failed for + ones like "dir/*".) + * fsck: Fix warning about not enough copies of a file, when locations + are known, but are not available in currently configured remotes. + * precommit: Optimise to avoid calling git-check-attr more than once. + * The git-annex-backend attribute has been renamed to annex.backend. + + -- Joey Hess Sun, 28 Nov 2010 19:28:05 -0400 + +git-annex (0.09) unstable; urgency=low + + * Add copy subcommand. + * Fix bug in setkey subcommand triggered by move --to. + + -- Joey Hess Sat, 27 Nov 2010 17:14:59 -0400 + +git-annex (0.08) unstable; urgency=low + + * Fix `git annex add ../foo` (when ran in a subdir of the repo). + * Add configure step to build process. + * Only use cp -a if it is supported, falling back to cp -p or plain cp + as needed for portability. + * cp --reflink=auto is used if supported, and will make git annex unlock + much faster on filesystems like btrfs that support copy on write. + + -- Joey Hess Sun, 21 Nov 2010 13:45:44 -0400 + +git-annex (0.07) unstable; urgency=low + + * find: New subcommand. + * unused: New subcommand, finds unused data. (Split out from fsck.) + * dropunused: New subcommand, provides for easy dropping of unused keys + by number, as listed by the unused subcommand. + * fsck: Print warnings to stderr; --quiet can now be used to only see + problems. + + -- Joey Hess Mon, 15 Nov 2010 18:41:50 -0400 + +git-annex (0.06) unstable; urgency=low + + * fsck: Check if annex.numcopies is satisfied. + * fsck: Verify the sha1 of files when the SHA1 backend is used. + * fsck: Verify the size of files when the WORM backend is used. + * fsck: Allow specifying individual files if fscking everything + is not desired. + * fsck: Fix bug, introduced in 0.04, in detection of unused data. + + -- Joey Hess Sat, 13 Nov 2010 16:24:29 -0400 + +git-annex (0.05) unstable; urgency=low + + * Optimize both pre-commit and lock subcommands to not call git diff + on every file being committed/locked. + (This actually also works around a bug in ghc, that caused + git-annex 0.04 pre-commit to sometimes corrupt filename being read + from git ls-files and fail. + See + The excessive number of calls made by pre-commit exposed the ghc bug. + Thanks Josh Triplett for the debugging.) + * Build with -O2. + + -- Joey Hess Thu, 11 Nov 2010 18:31:09 -0400 + +git-annex (0.04) unstable; urgency=low + + * Add unlock subcommand, which replaces the symlink with a copy of + the file's content in preparation of changing it. The "edit" subcommand + is an alias for unlock. + * Add lock subcommand. + * Unlocked files will now automatically be added back into the annex when + committed (and the updated symlink committed), by some magic in the + pre-commit hook. + * The SHA1 backend is now fully usable. + * Add annex.version, which will be used to automate upgrades + between incompatible versions. + * Reorganised the layout of .git/annex/ + * The new layout will be automatically upgraded to the first time + git-annex is used in a repository with the old layout. + * Note that git-annex 0.04 cannot transfer content from old repositories + that have not yet been upgraded. + * Annexed file contents are now made unwritable and put in unwriteable + directories, to avoid them accidentially being removed or modified. + (Thanks Josh Triplett for the idea.) + * Add build dep on libghc6-testpack-dev. Closes: #603016 + * Avoid using runghc to run test suite as it is not available on all + architectures. Closes: #603006 + + -- Joey Hess Wed, 10 Nov 2010 14:23:23 -0400 + +git-annex (0.03) unstable; urgency=low + + * Fix support for file:// remotes. + * Add --verbose + * Fix SIGINT handling. + * Fix handling of files with unusual characters in their name. + * Fixed memory leak; git-annex no longer reads the whole file list + from git before starting, and will be much faster with large repos. + * Fix crash on unknown symlinks. + * Added remote.annex-scp-options and remote.annex-ssh-options. + * The backends to use when adding different sets of files can be configured + via gitattributes. + * In .gitattributes, the git-annex-backend attribute can be set to the + names of backends to use when adding different types of files. + * Add fsck subcommand. (For now it only finds unused key contents in the + annex.) + + -- Joey Hess Sun, 07 Nov 2010 18:26:04 -0400 + +git-annex (0.02) unstable; urgency=low + + * Can scp annexed files from remote hosts, and check remote hosts for + file content when dropping files. + * New move subcommand, that makes it easy to move file contents from + or to a remote. + * New fromkey subcommand, for registering urls, etc. + * git-annex init will now set up a pre-commit hook that fixes up symlinks + before they are committed, to ensure that moving symlinks around does not + break them. + * More intelligent and fast staging of modified files; git add coalescing. + * Add remote.annex-ignore git config setting to allow completly disabling + a given remote. + * --from/--to can be used to control the remote repository that git-annex + uses. + * --quiet can be used to avoid verbose output + * New plumbing-level dropkey and addkey subcommands. + * Lots of bug fixes. + + -- Joey Hess Wed, 27 Oct 2010 16:39:29 -0400 + +git-annex (0.01) unstable; urgency=low + + * First prerelease. + + -- Joey Hess Wed, 20 Oct 2010 12:54:24 -0400 diff --git a/debian/compat b/debian/compat new file mode 100644 index 0000000000..7f8f011eb7 --- /dev/null +++ b/debian/compat @@ -0,0 +1 @@ +7 diff --git a/debian/control b/debian/control new file mode 100644 index 0000000000..6f59ada5b8 --- /dev/null +++ b/debian/control @@ -0,0 +1,50 @@ +Source: git-annex +Section: utils +Priority: optional +Build-Depends: + debhelper (>= 7.0.50), + ghc, + libghc-missingh-dev, + libghc-hslogger-dev, + libghc-pcre-light-dev, + libghc-sha-dev, + libghc-dataenc-dev, + libghc-http-dev, + libghc-utf8-string-dev, + libghc-hs3-dev (>= 0.5.6), + libghc-testpack-dev [any-i386 any-amd64], + libghc-monad-control-dev, + libghc-json-dev, + ikiwiki, + perlmagick, + git, + uuid, + rsync, +Maintainer: Joey Hess +Standards-Version: 3.9.2 +Vcs-Git: git://git.kitenet.net/git-annex +Homepage: http://git-annex.branchable.com/ + +Package: git-annex +Architecture: any +Section: utils +Depends: ${misc:Depends}, ${shlibs:Depends}, + git (>= 1:1.7.7), + uuid, + rsync, + wget | curl, + openssh-client +Suggests: graphviz, bup, gnupg +Description: manage files with git, without checking their contents into git + git-annex allows managing files with git, without checking the file + contents into git. While that may seem paradoxical, it is useful when + dealing with files larger than git can currently easily handle, whether due + to limitations in memory, checksumming time, or disk space. + . + Even without file content tracking, being able to manage files with git, + move files around and delete files with versioned directory trees, and use + branches and distributed clones, are all very handy reasons to use git. And + annexed files can co-exist in the same git repository with regularly + versioned files, which is convenient for maintaining documents, Makefiles, + etc that are associated with annexed files but that benefit from full + revision control. diff --git a/debian/copyright b/debian/copyright new file mode 100644 index 0000000000..a8a38913e4 --- /dev/null +++ b/debian/copyright @@ -0,0 +1,39 @@ +Format: http://dep.debian.net/deps/dep5/ +Source: native package + +Files: * +Copyright: © 2010-2011 Joey Hess +License: GPL-3+ + The full text of version 3 of the GPL is distributed as doc/GPL in + this package's source, or in /usr/share/common-licenses/GPL-3 on + Debian systems. + +Files: Utility/StatFS.hsc +Copyright: Jose A Ortega Ruiz +License: BSD-3-clause + -- All rights reserved. + -- + -- Redistribution and use in source and binary forms, with or without + -- modification, are permitted provided that the following conditions + -- are met: + -- + -- 1. Redistributions of source code must retain the above copyright + -- notice, this list of conditions and the following disclaimer. + -- 2. Redistributions in binary form must reproduce the above copyright + -- notice, this list of conditions and the following disclaimer in the + -- documentation and/or other materials provided with the distribution. + -- 3. Neither the name of the author nor the names of his contributors + -- may be used to endorse or promote products derived from this software + -- without specific prior written permission. + -- + -- THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + -- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + -- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + -- ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + -- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + -- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + -- OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + -- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + -- LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + -- OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + -- SUCH DAMAGE. diff --git a/debian/doc-base b/debian/doc-base new file mode 100644 index 0000000000..f71a233333 --- /dev/null +++ b/debian/doc-base @@ -0,0 +1,9 @@ +Document: git-annex +Title: git-annex documentation +Author: Joey Hess +Abstract: All the documentation from git-annex's website. +Section: File Management + +Format: HTML +Index: /usr/share/doc/git-annex/html/index.html +Files: /usr/share/doc/git-annex/html/*.html diff --git a/debian/manpages b/debian/manpages new file mode 100644 index 0000000000..ca34203aa0 --- /dev/null +++ b/debian/manpages @@ -0,0 +1 @@ +git-annex.1 diff --git a/debian/rules b/debian/rules new file mode 100755 index 0000000000..9079120a80 --- /dev/null +++ b/debian/rules @@ -0,0 +1,7 @@ +#!/usr/bin/make -f +%: + dh $@ + +# Not intended for use by anyone except the author. +announcedir: + @echo ${HOME}/src/git-annex/doc/news diff --git a/doc/GPL b/doc/GPL new file mode 100644 index 0000000000..94a9ed024d --- /dev/null +++ b/doc/GPL @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/doc/backends.mdwn b/doc/backends.mdwn new file mode 100644 index 0000000000..2030d107a3 --- /dev/null +++ b/doc/backends.mdwn @@ -0,0 +1,39 @@ +When a file is annexed, a key is generated from its content and/or metadata. +The file checked into git symlinks to the key. This key can later be used +to retrieve the file's content (its value). + +Multiple pluggable key-value backends are supported, and a single repository +can use different ones for different files. + +* `SHA256` -- The default backend for new files. This allows + verifying that the file content is right, and can avoid duplicates of + files with the same content. Its need to generate checksums + can make it slower for large files. +* `WORM` ("Write Once, Read Many") This assumes that any file with + the same basename, size, and modification time has the same content. + This is the the least expensive backend, recommended for really large + files or slow systems. +* `SHA512` -- Best currently available hash, for the very paranoid. +* `SHA1` -- Smaller hash than `SHA256` for those who want a checksum + but are not concerned about security. +* `SHA384`, `SHA224` -- Hashes for people who like unusual sizes. +* `SHA256E`, `SHA1E`, etc -- Variants that preserve filename extension as + part of the key. Useful for archival tasks where the filename extension + contains metadata that should be preserved. + +The `annex.backends` git-config setting can be used to list the backends +git-annex should use. The first one listed will be used by default when +new files are added. + +For finer control of what backend is used when adding different types of +files, the `.gitattributes` file can be used. The `annex.backend` +attribute can be set to the name of the backend to use for matching files. + +For example, to use the SHA256 backend for sound files, which tend to be +smallish and might be modified or copied over time, +while using the WORM backend for everything else, you could set +in `.gitattributes`: + + * annex.backend=WORM + *.mp3 annex.backend=SHA256 + *.ogg annex.backend=SHA256 diff --git a/doc/bare_repositories.mdwn b/doc/bare_repositories.mdwn new file mode 100644 index 0000000000..bf56d81446 --- /dev/null +++ b/doc/bare_repositories.mdwn @@ -0,0 +1,44 @@ +Due to popular demand, git-annex can now be used with bare repositories. + +So, for example, you can stash a file away in the origin: +`git annex move mybigfile --to origin` + +Of course, for that to work, the bare repository has to be on a system with +[[git-annex-shell]] installed. If "origin" is on GitWeb, you still can't +use git-annex to store stuff there. + +It took a while, but bare repositories are now supported exactly as well +as non-bare repositories. Except for these caveats: + +* `git annex fsck` works in a bare repository, but does not display + warnings about insufficient + [[copies]]. To get those warnings, just run it in one of the non-bare + checkouts. +* `git annex unused` in a bare repository only knows about keys used in + branches that have been pushed to the bare repository. So use it with care.. +* Commands that need a work tree, like `git annex add` won't work in a bare + repository, of course. + +*** + +Here is a quick example of how to set this up, using `origin` as the remote name, and assuming `~/annex` contains an annex: + +On the server: + + mkdir bare-annex + git init --bare + git annex init origin + +Now configure the remote and do the initial push: + + cd ~/annex + git remote add origin example.com:bare-annex + git push origin master git-annex + +Now `git annex status` should show the configured bare remote. If it does not, you may have to pull from the remote first (older versions of `git-annex`) + +If you wish to configure git such that you can push/pull without arguments, set the upstream branch: + + git branch master --set-upstream origin/master + + diff --git a/doc/bugs.mdwn b/doc/bugs.mdwn new file mode 100644 index 0000000000..2786e5bf74 --- /dev/null +++ b/doc/bugs.mdwn @@ -0,0 +1,4 @@ +This is git-annex's bug list. Link bugs to [[bugs/done]] when done. + +[[!inline pages="./bugs/* and !./bugs/done and !link(done) +and !*/Discussion" actions=yes postform=yes show=0 archive=yes]] diff --git a/doc/bugs/Build_error_on_Mac_OSX_10.6.mdwn b/doc/bugs/Build_error_on_Mac_OSX_10.6.mdwn new file mode 100644 index 0000000000..43fb0323c4 --- /dev/null +++ b/doc/bugs/Build_error_on_Mac_OSX_10.6.mdwn @@ -0,0 +1,11 @@ +While following the instructions given at the OSX build page , I get this error: + +$ make +ghc -O2 -Wall -ignore-package monads-fd -fspec-constr-count=5 --make git-annex + +Utility/JSONStream.hs:14:8: + Could not find module `Text.JSON': + Use -v to see a list of the files searched for. +make: *** [git-annex] Error 1 + +> Updated the instructions. [[done]] --[[Joey]] diff --git a/doc/bugs/Cabal_dependency_monadIO_missing.mdwn b/doc/bugs/Cabal_dependency_monadIO_missing.mdwn new file mode 100644 index 0000000000..13980dd292 --- /dev/null +++ b/doc/bugs/Cabal_dependency_monadIO_missing.mdwn @@ -0,0 +1,17 @@ +Just issuing the command `cabal install` results in the following error message. + + Command/Add.hs:54:3: + No instance for (Control.Monad.IO.Control.MonadControlIO + (Control.Monad.State.Lazy.StateT Annex.AnnexState IO)) + arising from a use of `handle' at Command/Add.hs:54:3-24 + +Adding the dependency for `monadIO` to `git-annex.cabal` should fix this? +-- Thomas + +> No, it's already satisfied by `monad-control` being listed as a +> dependency in the cabal file. Your system might be old/new/or broken, +> perhaps it's time to provide some details about the version of haskell +> and of `monad-control` you have installed? --[[Joey]] + +>> Closing as apparently user error or a broken system. +>> If you see this problem please do say. [[done]] --[[Joey]] diff --git a/doc/bugs/Cabal_dependency_monadIO_missing/comment_1_14be660aa57fadec0d81b32a8b52c66f._comment b/doc/bugs/Cabal_dependency_monadIO_missing/comment_1_14be660aa57fadec0d81b32a8b52c66f._comment new file mode 100644 index 0000000000..8e38205f00 --- /dev/null +++ b/doc/bugs/Cabal_dependency_monadIO_missing/comment_1_14be660aa57fadec0d81b32a8b52c66f._comment @@ -0,0 +1,75 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawmFgsNxmnGznb5bbmcoWhoQOoxZZ-io61s" + nickname="Thomas" + subject="comment 1" + date="2011-08-08T09:04:20Z" + content=""" +I use Debian Squeeze, I have the Debian package cabal-install 0.8.0-1 installed. + + $ git clone git://git-annex.branchable.com/ + $ cd git-annex.branchable.com + $ cabal update + $ cabal install cabal-install + +This installed: Cabal-1.10.2.0, zlib-0.5.3.1, cabal-install 0.10.2. +No version of monad-control or monadIO installed. + + $ ~/.cabal/bin/cabal install + Registering QuickCheck-2.4.1.1... + Registering Crypto-4.2.3... + Registering base-unicode-symbols-0.2.2.1... + Registering deepseq-1.1.0.2... + Registering hxt-charproperties-9.1.0... + Registering hxt-regex-xmlschema-9.0.0... + Registering hxt-unicode-9.0.1... + Registering hxt-9.1.2... + Registering stm-2.2.0.1... + Registering hS3-0.5.6... + Registering transformers-0.2.2.0... + Registering monad-control-0.2.0.1... + [1 of 1] Compiling Main ( Setup.hs, dist/setup/Main.o ) + Linking ./dist/setup/setup ... + ghc -O2 -Wall -ignore-package monads-fd -fspec-constr-count=5 --make configure + [1 of 2] Compiling TestConfig ( TestConfig.hs, TestConfig.o ) + [2 of 2] Compiling Main ( configure.hs, configure.o ) + Linking configure ... + ./configure + checking version... 3.20110720 + checking cp -a... yes + checking cp -p... yes + checking cp --reflink=auto... yes + checking uuid generator... uuid + checking xargs -0... yes + checking rsync... yes + checking curl... yes + checking bup... yes + checking gpg... yes + checking sha1... sha1sum + checking sha256... sha256sum + checking sha512... sha512sum + checking sha224... sha224sum + checking sha384... sha384sum + + ... + + Command/Add.hs:54:3: + No instance for (Control.Monad.IO.Control.MonadControlIO + (Control.Monad.State.Lazy.StateT Annex.AnnexState IO)) + arising from a use of `handle' at Command/Add.hs:54:3-24 + Possible fix: + add an instance declaration for + (Control.Monad.IO.Control.MonadControlIO + (Control.Monad.State.Lazy.StateT Annex.AnnexState IO)) + In the first argument of `($)', namely `handle (undo file key)' + In a stmt of a 'do' expression: + handle (undo file key) $ moveAnnex key file + In the expression: + do { handle (undo file key) $ moveAnnex key file; + next $ cleanup file key } + cabal: Error: some packages failed to install: + git-annex-3.20110719 failed during the building phase. The exception was: + ExitFailure 1 + +After I added a depencency for monadIO to the git-annex.cabal file, it installed correctly. +-- Thomas +"""]] diff --git a/doc/bugs/Cabal_dependency_monadIO_missing/comment_2_4f4d8e1e00a2a4f7e8a8ab082e16adac._comment b/doc/bugs/Cabal_dependency_monadIO_missing/comment_2_4f4d8e1e00a2a4f7e8a8ab082e16adac._comment new file mode 100644 index 0000000000..adf7a34e66 --- /dev/null +++ b/doc/bugs/Cabal_dependency_monadIO_missing/comment_2_4f4d8e1e00a2a4f7e8a8ab082e16adac._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-08-17T04:56:30Z" + content=""" +Finally got a chance to try to reproduce this. I followed your recipe exactly in a clean squeeze chroot. monadIO was not installed, but git-annex built ok, using monad-control. +"""]] diff --git a/doc/bugs/Can__39__t___34__git-annex_get__34___with_3.20111203.mdwn b/doc/bugs/Can__39__t___34__git-annex_get__34___with_3.20111203.mdwn new file mode 100644 index 0000000000..ea56c37320 --- /dev/null +++ b/doc/bugs/Can__39__t___34__git-annex_get__34___with_3.20111203.mdwn @@ -0,0 +1,27 @@ +Hi there, + +After updating to 3.20111203 (on Arch Linux) I noticed I was not able to use `git annex get` from a SSH remote (server running Arch Linux, same version of git-annex): "requested key is not present". Same behavior with current master (commit 6cf28585). I had no issue with the previous version (3.20111122). + +On this server, I was able to track down the issue using `git-annex-shell inannex` and `strace`: + + $ strace -f -o log git-annex-shell inannex ~/photos-annex.git WORM-s369360-m1321602916--2011-11-17.jpg + $ echo $? + 1 + $ tail -n20 log + [...] + 25623 chdir("/home/schnouki/git-annex") = 0 + 25623 stat("/home/schnouki/photos-annex.git/annex/objects/082/676/WORM-s369360-m1321602916--2011-11-17.jpg/WORM-s369360-m1321602916--2011-11-17.jpg", {st_mode=S_IFREG|0400, st_size=369360, ...}) = 0 + 25623 open("annex/objects/082/676/WORM-s369360-m1321602916--2011-11-17.jpg/WORM-s369360-m1321602916--2011-11-17.jpg", O_RDONLY) = -1 ENOENT (No such file or directory) + [...] + +Note there is a call to `stat()` with the full path to the requested file, and *then* a call to `open()` with a relative path -- which calls this call to fail, and git-annex-shell to return 1. With 3.20111122, there was no call to `stat()`, just a successful call to `open()` with a full absolute path. + +Using `git bisect` I was able to determine that this bug appeared in commit 64672c62 ("refactor"). Reverting it makes `git-annex-shell` work as expected, but I'm sure there are better ways to fix this. However I don't know enough Haskell to do it myself. + +Could you please try to fix this in a future version? + +> Thanks for a very good bug report. +> +> I've fixed this stupid mistake introduced in the code refactoring. +> [[done]] +> --[[Joey]] diff --git a/doc/bugs/Displayed_copy_speed_is_wrong.mdwn b/doc/bugs/Displayed_copy_speed_is_wrong.mdwn new file mode 100644 index 0000000000..cf3b31cf48 --- /dev/null +++ b/doc/bugs/Displayed_copy_speed_is_wrong.mdwn @@ -0,0 +1,8 @@ +When copying data to my remote, I regularly see speeds in excess of 100 MB/s on my home DSL line. + + 2073939 100% 176.96MB/s 0:00:00 (xfer#1, to-check=0/1) + +This is definitely not correct. + +> Closing, as rsync does this to show you when it's making your life +> faster than it would be w/o rsync. [[done]] --[[Joey]] diff --git a/doc/bugs/Displayed_copy_speed_is_wrong/comment_1_74de3091e8bfd7acd6795e61f39f07c6._comment b/doc/bugs/Displayed_copy_speed_is_wrong/comment_1_74de3091e8bfd7acd6795e61f39f07c6._comment new file mode 100644 index 0000000000..62a595be77 --- /dev/null +++ b/doc/bugs/Displayed_copy_speed_is_wrong/comment_1_74de3091e8bfd7acd6795e61f39f07c6._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-04-03T01:37:29Z" + content=""" +That is displayed by rsync. It's not unheard of for rsync to resume a transfer and display extremely high speeds. +"""]] diff --git a/doc/bugs/Displayed_copy_speed_is_wrong/comment_2_8b240de1d5ae9229fa2d77d1cc15a552._comment b/doc/bugs/Displayed_copy_speed_is_wrong/comment_2_8b240de1d5ae9229fa2d77d1cc15a552._comment new file mode 100644 index 0000000000..28305d3ac8 --- /dev/null +++ b/doc/bugs/Displayed_copy_speed_is_wrong/comment_2_8b240de1d5ae9229fa2d77d1cc15a552._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 2" + date="2011-04-03T08:56:48Z" + content=""" +Pity. Mark as done/upstream (or similar) for house-keeping? +"""]] diff --git a/doc/bugs/Error_when_moving_annexed_file_to_a_.gitignored_location.mdwn b/doc/bugs/Error_when_moving_annexed_file_to_a_.gitignored_location.mdwn new file mode 100644 index 0000000000..34d05c0b19 --- /dev/null +++ b/doc/bugs/Error_when_moving_annexed_file_to_a_.gitignored_location.mdwn @@ -0,0 +1,21 @@ +I just noticed that if you move a git-annex symlink to a location ignored by git, it simply works. Upon committing that change, however, part of git-annex's `fix` function apparently tries to `git-add` the symlink. This fails because the new, ignored location requires a `git-add --force`. + +Considering that git proper doesn't fail or warn, I think git-annex shouldn't either. + +This is the error message: + + $ git mv annexed-file ignored-dir/ + $ git commit + fix ignored-dir/annexed-file ok + (Recording state in git...) + The following paths are ignored by one of your .gitignore files: + ignored-dir + Use -f if you really want to add them. + fatal: no files added + Command xargs ["-0","git","--git-dir=/home/[...]/repo/.git","--work-tree=/home/[...]/repo","add","--"] failed; exit code 123 + + git-annex: user error (Command xargs ["-0","git","--git-dir=/home/[...]/repo/.git","--work-tree=/home/[...]/repo","add","--"] failed; exit code 123) + failed + git-annex: 1 failed + +> Weird edge case.. ok, fixed. [[done]] --[[Joey]] diff --git a/doc/bugs/Error_while_adding_a_file___34__createSymbolicLink:_already_exists__34__.mdwn b/doc/bugs/Error_while_adding_a_file___34__createSymbolicLink:_already_exists__34__.mdwn new file mode 100644 index 0000000000..21293af547 --- /dev/null +++ b/doc/bugs/Error_while_adding_a_file___34__createSymbolicLink:_already_exists__34__.mdwn @@ -0,0 +1,46 @@ +I'm importing a directory where some files are hard links of each other. + +This is confusing git-annex. Here's a small test of that: + +
+paulproteus@pathi:/tmp$ mkdir annex-test
+paulproteus@pathi:/tmp$ cd annex-test
+paulproteus@pathi:/tmp/annex-test$ git init
+Initialized empty Git repository in /tmp/annex-test/.git/
+paulproteus@pathi:/tmp/annex-test$ git annex init testing
+init testing ok
+paulproteus@pathi:/tmp/annex-test$ echo '* annex.backend=SHA1' >> .gitattributes 
+paulproteus@pathi:/tmp/annex-test$ git commit .gitattributes -m 'Default to sha1'
+[master dd54b41] Default to sha1
+ 1 files changed, 1 insertions(+), 0 deletions(-)
+paulproteus@pathi:/tmp/annex-test$ echo "Look at me" > file1
+paulproteus@pathi:/tmp/annex-test$ cp -l file1 file2
+paulproteus@pathi:/tmp/annex-test$ git annex add file1
+add file1 (checksum...) ok
+(Recording state in git...)
+paulproteus@pathi:/tmp/annex-test$ git commit -m 'So far, so good'
+[master eb43084] So far, so good
+ 2 files changed, 2 insertions(+), 0 deletions(-)
+ create mode 100644 .git-annex/9a3/f1f/SHA1-s11--b9c599d64212934582d676c722cf3ec61f60e09c.log
+ create mode 120000 file1
+paulproteus@pathi:/tmp/annex-test$ git annex add file2
+add file2 (checksum...) 
+  git-annex: .git/annex/objects/PM/7p/SHA1-s11--b9c599d64212934582d676c722cf3ec61f60e09c/SHA1-s11--b9c599d64212934582d676c722cf3ec61f60e09c: createSymbolicLink: already exists (File exists)
+git-annex: 1 failed
+paulproteus@pathi:/tmp/annex-test$ 
+
+ +When trying to make a small test case for this bug, I noticed that if file1 and file2 have the same contents but are not hard links of each other, they both get annexed just fine. + +I think the right behavior here is to annex file2 just fine, as if they weren't hard links before. + + +-- Asheesh. + +> The same thing happens anytime the key for a file collides with a key +> already in the annex, AFAICS. (Including when the files have the same +> content but are not hard links... unless you're using WORM backend.) +> +> I've fixed this bug. The first file in wins. See commit for some +> interesting discussion about why it should not check for hash collisions +> in this situation. [[done]] --[[Joey]] diff --git a/doc/bugs/Makefile_is_missing_dependancies.mdwn b/doc/bugs/Makefile_is_missing_dependancies.mdwn new file mode 100644 index 0000000000..3e9d6e903c --- /dev/null +++ b/doc/bugs/Makefile_is_missing_dependancies.mdwn @@ -0,0 +1,47 @@ +
+From e45c73e66fc18d27bdf5797876fbeb07786a4af1 Mon Sep 17 00:00:00 2001
+From: Jimmy Tang 
+Date: Tue, 22 Mar 2011 22:24:07 +0000
+Subject: [PATCH] Touch up Makefile to depend on StatFS.hs
+
+---
+ Makefile |    2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/Makefile b/Makefile
+index 08e2f59..4ae8392 100644
+--- a/Makefile
++++ b/Makefile
+@@ -15,7 +15,7 @@ SysConfig.hs: configure.hs TestConfig.hs
+        hsc2hs $<
+        perl -i -pe 's/^{-# INCLUDE.*//' $@
+ 
+-$(bins): SysConfig.hs Touch.hs
++$(bins): SysConfig.hs Touch.hs StatFS.hs
+        $(GHCMAKE) $@
+ 
+ git-annex.1: doc/git-annex.mdwn
+-- 
+1.7.4.1
+
+
+ + +StatFS.hs never gets depended on and compiled, the makefile was just missing something + +> Thanks, [[done]]! Interested to hear if StatFS.hs works on OSX (no warning) or +> is a no-op (with warning). --[[Joey]] + +>> +>> for now it gives a warning, it looks like it should be easy enough to add OSX +>> support, I guess it's a case of just digging around documentation to find the equivalent +>> calls/headers. I'll give it a go at making this feature work on OSX and get back to you. +>> + +
+jtang@exia:~/develop/git-annex $ make
+hsc2hs StatFS.hsc
+StatFS.hsc:85:2: warning: #warning free space checking code not available for this OS
+StatFS.hsc:85:2: warning: #warning free space checking code not available for this OS
+StatFS.hsc:85:2: warning: #warning free space checking code not available for this OS
+
diff --git a/doc/bugs/Makefile_is_missing_dependancies/comment_1_5a3da5f79c8563c7a450aa29728abe7c._comment b/doc/bugs/Makefile_is_missing_dependancies/comment_1_5a3da5f79c8563c7a450aa29728abe7c._comment new file mode 100644 index 0000000000..ab8493a7a8 --- /dev/null +++ b/doc/bugs/Makefile_is_missing_dependancies/comment_1_5a3da5f79c8563c7a450aa29728abe7c._comment @@ -0,0 +1,47 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 1" + date="2011-03-23T08:21:30Z" + content=""" +Just did some minor digging around and checking, this seems to satisfy the compilers etc... I have yet to confirm that it *really* is working as expected. Also it might be better to check for a darwin operating system instead of apple I think, though I don't know of any one really using a pure darwin OS. But for now it works (I think) + +
+From fbfe27c2e19906ac02e3673b91bffa920f6dae5d Mon Sep 17 00:00:00 2001
+From: Jimmy Tang 
+Date: Wed, 23 Mar 2011 08:15:39 +0000
+Subject: [PATCH] Define (__APPLE__) in StatFS
+
+At least on OSX 10.6.6 it appears to have the same defintions as
+FreeBSD. The build process doesn't complain and the code is enabled,
+this needs to be tested and checked more.
+---
+ StatFS.hsc |    4 ++--
+ 1 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/StatFS.hsc b/StatFS.hsc
+index 8b453dc..45fd7e4 100644
+--- a/StatFS.hsc
++++ b/StatFS.hsc
+@@ -53,7 +53,7 @@ import Foreign.C.String
+ import Data.ByteString (useAsCString)
+ import Data.ByteString.Char8 (pack)
+ 
+-#if defined (__FreeBSD__)
++#if defined (__FreeBSD__) || defined(__APPLE__)
+ # include 
+ # include 
+ #else
+@@ -84,7 +84,7 @@ data CStatfs
+ #ifdef UNKNOWN
+ #warning free space checking code not available for this OS
+ #else
+-#if defined(__FreeBSD__)
++#if defined(__FreeBSD__) || defined(__APPLE__)
+ foreign import ccall unsafe \"sys/mount.h statfs\"
+ #else
+ foreign import ccall unsafe \"sys/vfs.h statfs64\"
+-- 
+1.7.4.1
+
+"""]] diff --git a/doc/bugs/Makefile_is_missing_dependancies/comment_2_416f12dbd0c2b841fac8164645b81df5._comment b/doc/bugs/Makefile_is_missing_dependancies/comment_2_416f12dbd0c2b841fac8164645b81df5._comment new file mode 100644 index 0000000000..d355514a31 --- /dev/null +++ b/doc/bugs/Makefile_is_missing_dependancies/comment_2_416f12dbd0c2b841fac8164645b81df5._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-03-23T15:05:12Z" + content=""" +There's a simple test -- just configure annex.diskreserve to be say, 10 megabytes less than the total free space on your disk. Then try to git annex get a 11 mb file, and a 9 mb file. :) +"""]] diff --git a/doc/bugs/Makefile_is_missing_dependancies/comment_3_c38b6f4abc9b9ad413c3b83ca04386c3._comment b/doc/bugs/Makefile_is_missing_dependancies/comment_3_c38b6f4abc9b9ad413c3b83ca04386c3._comment new file mode 100644 index 0000000000..6b4cf5789c --- /dev/null +++ b/doc/bugs/Makefile_is_missing_dependancies/comment_3_c38b6f4abc9b9ad413c3b83ca04386c3._comment @@ -0,0 +1,25 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-03-23T15:13:33Z" + content=""" +Alternatively, you can just load it up in ghci and see if it reports numbers that make sense: + +
+joey@gnu:~/src/git-annex>make StatFS.hs
+hsc2hs StatFS.hsc
+perl -i -pe 's/^{-# INCLUDE.*//' StatFS.hs
+joey@gnu:~/src/git-annex>ghci StatFS.hs
+GHCi, version 6.12.1: http://www.haskell.org/ghc/  :? for help
+Loading package ghc-prim ... linking ... done.
+Loading package integer-gmp ... linking ... done.
+Loading package base ... linking ... done.
+[1 of 1] Compiling StatFS           ( StatFS.hs, interpreted )
+Ok, modules loaded: StatFS.
+*StatFS> s <- getFileSystemStats \".\"
+Loading package bytestring-0.9.1.5 ... linking ... done.
+*StatFS> s
+Just (FileSystemStats {fsStatBlockSize = 4096, fsStatBlockCount = 7427989, fsStatByteCount = 30425042944, fsStatBytesFree = 2528489472, fsStatBytesAvailable = 2219384832, fsStatBytesUsed = 27896553472})
+
+"""]] diff --git a/doc/bugs/Makefile_is_missing_dependancies/comment_4_cc13873175edf191047282700315beee._comment b/doc/bugs/Makefile_is_missing_dependancies/comment_4_cc13873175edf191047282700315beee._comment new file mode 100644 index 0000000000..c3ad2dafd6 --- /dev/null +++ b/doc/bugs/Makefile_is_missing_dependancies/comment_4_cc13873175edf191047282700315beee._comment @@ -0,0 +1,30 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 4" + date="2011-03-23T16:02:34Z" + content=""" +Ok, well it looks like it isn't doing anything useful at all. + +
+jtang@x00:~/develop/git-annex $ make StatFS.hs                                                                                                                                    
+hsc2hs StatFS.hsc
+perl -i -pe 's/^{-# INCLUDE.*//' StatFS.hs
+jtang@x00:~/develop/git-annex $ ghci StatFS.hs                                                                                                                                    
+GHCi, version 6.12.3: http://www.haskell.org/ghc/  :? for help
+Loading package ghc-prim ... linking ... done.
+Loading package integer-gmp ... linking ... done.
+Loading package base ... linking ... done.
+Loading package ffi-1.0 ... linking ... done.
+[1 of 1] Compiling StatFS           ( StatFS.hs, interpreted )
+Ok, modules loaded: StatFS.
+*StatFS> s <- getFileSystemStats \".\"
+Loading package bytestring-0.9.1.7 ... linking ... done.
+*StatFS> s
+Just (FileSystemStats {fsStatBlockSize = 0, fsStatBlockCount = 1048576, fsStatByteCount = 0, fsStatBytesFree = 0, fsStatBytesAvailable = 0, fsStatBytesUsed = 0})
+*StatFS> s <- getFileSystemStats \"/\"
+*StatFS> s
+Just (FileSystemStats {fsStatBlockSize = 0, fsStatBlockCount = 1048576, fsStatByteCount = 0, fsStatBytesFree = 0, fsStatBytesAvailable = 0, fsStatBytesUsed = 0})
+*StatFS> 
+
+"""]] diff --git a/doc/bugs/Makefile_is_missing_dependancies/comment_5_0a1c52e2c96d19b9c3eb7e99b8c2434f._comment b/doc/bugs/Makefile_is_missing_dependancies/comment_5_0a1c52e2c96d19b9c3eb7e99b8c2434f._comment new file mode 100644 index 0000000000..149aeeb75a --- /dev/null +++ b/doc/bugs/Makefile_is_missing_dependancies/comment_5_0a1c52e2c96d19b9c3eb7e99b8c2434f._comment @@ -0,0 +1,59 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 5" + date="2011-03-23T16:14:22Z" + content=""" +Actually I may have just been stupid and should have read the man page on statfs... + +
+jtang@x00:~/develop/git-annex $ git diff
+diff --git a/StatFS.hsc b/StatFS.hsc
+index 8b453dc..e10b2dd 100644
+--- a/StatFS.hsc
++++ b/StatFS.hsc
+@@ -53,7 +53,7 @@ import Foreign.C.String
+ import Data.ByteString (useAsCString)
+ import Data.ByteString.Char8 (pack)
+ 
+-#if defined (__FreeBSD__)
++#if defined (__FreeBSD__) || defined (__APPLE__)
+ # include 
+ # include 
+ #else
+@@ -84,8 +84,8 @@ data CStatfs
+ #ifdef UNKNOWN
+ #warning free space checking code not available for this OS
+ #else
+-#if defined(__FreeBSD__)
+-foreign import ccall unsafe \"sys/mount.h statfs\"
++#if defined(__FreeBSD__) || defined (__APPLE__)
++foreign import ccall unsafe \"sys/mount.h statfs64\"
+ #else
+ foreign import ccall unsafe \"sys/vfs.h statfs64\"
+ #endif
+
+ +yields this... + +
+jtang@x00:~/develop/git-annex $ ghci StatFS.hs                                                                                                                                    
+GHCi, version 6.12.3: http://www.haskell.org/ghc/  :? for help
+Loading package ghc-prim ... linking ... done.
+Loading package integer-gmp ... linking ... done.
+Loading package base ... linking ... done.
+Loading package ffi-1.0 ... linking ... done.
+[1 of 1] Compiling StatFS           ( StatFS.hs, interpreted )
+Ok, modules loaded: StatFS.
+*StatFS> s <- getFileSystemStats \".\"
+Loading package bytestring-0.9.1.7 ... linking ... done.
+*StatFS> s
+Just (FileSystemStats {fsStatBlockSize = 4096, fsStatBlockCount = 244106668, fsStatByteCount = 999860912128, fsStatBytesFree = 423097798656, fsStatBytesAvailable = 422835654656, fsStatBytesUsed = 576763113472})
+*StatFS> 
+
+ + +we could just stick another if defined (__APPLE__) instead of what I previously had and it looks like it will do the right thing on OSX. + + +"""]] diff --git a/doc/bugs/Makefile_is_missing_dependancies/comment_6_24119fc5d5963ce9dd669f7dcf006859._comment b/doc/bugs/Makefile_is_missing_dependancies/comment_6_24119fc5d5963ce9dd669f7dcf006859._comment new file mode 100644 index 0000000000..714459fbe8 --- /dev/null +++ b/doc/bugs/Makefile_is_missing_dependancies/comment_6_24119fc5d5963ce9dd669f7dcf006859._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 6" + date="2011-03-23T16:23:56Z" + content=""" +I forgot to mention that the statfs64 stuff in OSX seems to be deprecated, see http://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man2/statfs64.2.html + +on a slightly different note, is anonymous pushing to the \"wiki\" over git allowed? I'd prefer to be able to edit stuff inline for updating some of my own comments if I can :P +"""]] diff --git a/doc/bugs/Makefile_is_missing_dependancies/comment_7_96fd4725df4b54e670077a18d3ac4943._comment b/doc/bugs/Makefile_is_missing_dependancies/comment_7_96fd4725df4b54e670077a18d3ac4943._comment new file mode 100644 index 0000000000..8ba8e8d1f6 --- /dev/null +++ b/doc/bugs/Makefile_is_missing_dependancies/comment_7_96fd4725df4b54e670077a18d3ac4943._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 7" + date="2011-03-23T16:57:56Z" + content=""" +Try the changes I've pushed to use statfs64 on apple. + +There is actually a standardized statvfs that I'd rather use, but after the last time that I tried going with the POSIX option first only to find it was not broadly implemented, I was happy to find some already existing code that worked for some OSs. + +(While ikiwiki supports anonymous git push, it's a feature we have not rolled out on Branchable.com yet, and anyway, ikiwiki disallows editing existing comments that way. I would, however, be happy to git pull changes from somewhere.) +"""]] diff --git a/doc/bugs/Makefile_is_missing_dependancies/comment_8_a3555e3286cdc2bfeb9cde0ff727ba74._comment b/doc/bugs/Makefile_is_missing_dependancies/comment_8_a3555e3286cdc2bfeb9cde0ff727ba74._comment new file mode 100644 index 0000000000..63d188bcce --- /dev/null +++ b/doc/bugs/Makefile_is_missing_dependancies/comment_8_a3555e3286cdc2bfeb9cde0ff727ba74._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 8" + date="2011-03-23T17:03:51Z" + content=""" +The latest change looks good, it seems to be returning sensible numbers for me. Just tried it out on a few different mount points and it appears to be working. +"""]] diff --git a/doc/bugs/Name_scheme_does_not_follow_git__39__s_rules.mdwn b/doc/bugs/Name_scheme_does_not_follow_git__39__s_rules.mdwn new file mode 100644 index 0000000000..722dac50b8 --- /dev/null +++ b/doc/bugs/Name_scheme_does_not_follow_git__39__s_rules.mdwn @@ -0,0 +1,31 @@ +I can create an annex remote named 'test:/test'. git itself does not allow colons in names, though. The name scheme for an annex should be the same as for git repos themselves. + +> What do you mean by "an annex remote"? git-annex uses the same +> remotes configuration as does git. If you put invalid +> stuff in .git/config it might handle it slightly different than +> git, I don't know. Examples needed. --[[Joey]] + +>> What I mean is this: + + % cd 1 + % git init + % git annex init "my:colon" + % [...] + % cd ../2 + % git init + % git annex init "second" + % git remote add "my:colon" ../1 + fatal: 'my:colon' is not a valid remote name + +>> -- RichiH + +>>> I see.. Git annex init does not specifiy a remote's name, it specifies +>>> an arbitrary human-readable description of the repository, which will +>>> be displayed when there is no configured remote corresponding to the +>>> repository. So this is not a bug unless some documentation of that is +>>> unclear. --[[Joey]] + +>>>> Nobody spoke up to say it's unclear, so closing as PEBKAC :) +>>>> [[done]] --[[Joey]] + +>>>>> I still think git-annex should follow the same rules as git in this regard, but if your design decision is different, I won't try to argue the point :) -- RichiH diff --git a/doc/bugs/No_easy_way_to_re-inject_a_file_into_an_annex.mdwn b/doc/bugs/No_easy_way_to_re-inject_a_file_into_an_annex.mdwn new file mode 100644 index 0000000000..8df3bde481 --- /dev/null +++ b/doc/bugs/No_easy_way_to_re-inject_a_file_into_an_annex.mdwn @@ -0,0 +1,12 @@ +My local git index got corrupted and I needed to clone and annex get all data from my main repo. + +Some files were never copied anywhere so I am stuck with symlinks to nowhere. + +I tried to copy over the symlink with a copy of the actual file, which did not work. Trying to unlock, copying over the symlink, and relock did not work, either. + +Then, I copied the annex object to the correct place in .git/annex/objects/..., set all modes, re-ran fsck and the file re-appeared. + + +Long story short, I think there should be a `git annex reinject $file` or similar which will take a file, either one replacing the symlink or with an arbitrary path, and put it into the correct place in the object store. Called normally, it should reject all reinjects where the checksum does not match. With --force, this should be overridden. For reasons of safety, WORM should always require --force. + +> [[closing|done]], seems addressed --[[Joey]] diff --git a/doc/bugs/No_easy_way_to_re-inject_a_file_into_an_annex/comment_1_c871605e187f539f3bfe7478433e7fb5._comment b/doc/bugs/No_easy_way_to_re-inject_a_file_into_an_annex/comment_1_c871605e187f539f3bfe7478433e7fb5._comment new file mode 100644 index 0000000000..9688012a47 --- /dev/null +++ b/doc/bugs/No_easy_way_to_re-inject_a_file_into_an_annex/comment_1_c871605e187f539f3bfe7478433e7fb5._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-04-03T01:46:16Z" + content=""" +Have you seen [[walkthrough/recover_data_from_lost+found]]? The method described there will also work in this scenario. +"""]] diff --git a/doc/bugs/No_easy_way_to_re-inject_a_file_into_an_annex/comment_2_e6f1e9eee8b8dfb60ca10c8cfd807ac9._comment b/doc/bugs/No_easy_way_to_re-inject_a_file_into_an_annex/comment_2_e6f1e9eee8b8dfb60ca10c8cfd807ac9._comment new file mode 100644 index 0000000000..c9b74d98f0 --- /dev/null +++ b/doc/bugs/No_easy_way_to_re-inject_a_file_into_an_annex/comment_2_e6f1e9eee8b8dfb60ca10c8cfd807ac9._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 2" + date="2011-04-03T09:00:17Z" + content=""" +I did not. Thanks :) + +This still means that you can't re-inject a new version of a file unless you have the old one if you are using a SHA* backend, but that might be a corner case anyway. +"""]] diff --git a/doc/bugs/No_easy_way_to_re-inject_a_file_into_an_annex/comment_3_be62be5fe819acc0cb8b878802decd46._comment b/doc/bugs/No_easy_way_to_re-inject_a_file_into_an_annex/comment_3_be62be5fe819acc0cb8b878802decd46._comment new file mode 100644 index 0000000000..9c56452e53 --- /dev/null +++ b/doc/bugs/No_easy_way_to_re-inject_a_file_into_an_annex/comment_3_be62be5fe819acc0cb8b878802decd46._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-05-14T16:28:36Z" + content=""" +To re-inject new content for a file, you really want to get a new key for the file. Otherwise, other repos that have the old file will never get the new content. So: + +
+git rm file
+mv ~/newcontent file
+git annex add file
+
+"""]] diff --git a/doc/bugs/No_easy_way_to_re-inject_a_file_into_an_annex/comment_4_480a4f72445a636eab1b1c0f816d365c._comment b/doc/bugs/No_easy_way_to_re-inject_a_file_into_an_annex/comment_4_480a4f72445a636eab1b1c0f816d365c._comment new file mode 100644 index 0000000000..fcca0561d4 --- /dev/null +++ b/doc/bugs/No_easy_way_to_re-inject_a_file_into_an_annex/comment_4_480a4f72445a636eab1b1c0f816d365c._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 4" + date="2011-05-14T16:29:35Z" + content=""" +Now available as `git-annex reinject`. +"""]] diff --git a/doc/bugs/No_version_information_from_cli.mdwn b/doc/bugs/No_version_information_from_cli.mdwn new file mode 100644 index 0000000000..a0d30db414 --- /dev/null +++ b/doc/bugs/No_version_information_from_cli.mdwn @@ -0,0 +1,18 @@ +git-annex does not listen to -v, --version or version. + +At the very least, it should return both the version of the binary and the version of the object store it supports. +If it supports several annex versions, they should be listed in a comma-separated fashion. +If git-annex is called from within an annex, it should print the version of the local object store. + +Sample: + + % git annex version + git-annex version : 0.24 + default object store version : 3 + supported object store versions : 2,3 + local object store version : 2 + % + +The above might look like overkill, but it's in a form that will, most likely, never need to be extended. + +> Great idea, [[done]] --[[Joey]] diff --git a/doc/bugs/Prevent_accidental_merges.mdwn b/doc/bugs/Prevent_accidental_merges.mdwn new file mode 100644 index 0000000000..3e30e02235 --- /dev/null +++ b/doc/bugs/Prevent_accidental_merges.mdwn @@ -0,0 +1,14 @@ +With the storage layout v3, pulling the git-annex branch into the master branch is... less than ideal. + +The fact that the two branches contain totally different data make an accidental merge worse, arguably. + +Adding a tiny binary file called .gitnomerge to both branches would solve that without any noticeable overhead. + +Yes, there is an argument to be made that this is too much hand-holding, but I still think it's worth it. + +-- Richard + +> It should be as easy to undo such an accidential merge +> as it is to undo any other git commit, right? I quite like that git-annex +> no longer adds any clutter to the master branch, and would be reluctant +> to change that. --[[Joey]] diff --git a/doc/bugs/Prevent_accidental_merges/comment_1_4c46a193915eab8f308a04175cb2e40a._comment b/doc/bugs/Prevent_accidental_merges/comment_1_4c46a193915eab8f308a04175cb2e40a._comment new file mode 100644 index 0000000000..3e28a28cb0 --- /dev/null +++ b/doc/bugs/Prevent_accidental_merges/comment_1_4c46a193915eab8f308a04175cb2e40a._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 1" + date="2011-10-23T15:00:48Z" + content=""" +Having run into the same issue again, I still think git-annex should ensure no merges take place. The clutter introduced by a .gitnomerge is neglible, imo. +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx.mdwn b/doc/bugs/Problems_running_make_on_osx.mdwn new file mode 100644 index 0000000000..83b75fb544 --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx.mdwn @@ -0,0 +1,49 @@ +Followed the instructions over here: http://git-annex.branchable.com/forum/git-annex_on_OSX/ + +and had to install the following extra packages to be able to get make to start: + +[realizes pcre-light is needed but pcre not installed on my mac] +sudo port install pcre +sudo cabal install pcre-light + +> Ah right, that is a new dependency. I've updated the forum page +> with this info. +> --[[Joey]] + +But then I got the following error: + +
+ghc -O2 -Wall --make git-annex  
+[ 7 of 52] Compiling BackendTypes     ( BackendTypes.hs, BackendTypes.o   
+
+BackendTypes.hs:71:17:  
+    No instance for (Arbitrary Char)  
+      arising from a use of `arbitrary' at BackendTypes.hs:71:17-25  
+    Possible fix: add an instance declaration for (Arbitrary Char)  
+    In a stmt of a 'do' expression: backendname <- arbitrary  
+    In the expression:  
+        do backendname <- arbitrary  
+           keyname <- arbitrary  
+             return $ Key (backendname, keyname)  
+    In the definition of `arbitrary':  
+        arbitrary = do backendname <- arbitrary  
+                       keyname <- arbitrary  
+                         return $ Key (backendname, keyname)  
+make: *** [git-annex] Error 1  
+
+ +My knowledge of Haskell (had to lookup the spelling...) is more than rudimentary so any help would be appreciated. + +> Hmm, it seems you may be missing part of the quickcheck haskell +> library, or have a different version than me. +> +> The easy fix is probably to just edit BackendTypes.hs and delete the +> entire end of the file from line 68, "for quickcheck" down. This code +> is only used by the test suite (so "make test" will fail), +> but it should get it to build. --[[Joey]] + +--- + +Closing this bug because the above problem now has a solution documented on +the install page, and the below test suite failure problems should all be +resolved on OSX. [[done]] --[[Joey]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_10_94e4ac430140042a2d0fb5a16d86b4e5._comment b/doc/bugs/Problems_running_make_on_osx/comment_10_94e4ac430140042a2d0fb5a16d86b4e5._comment new file mode 100644 index 0000000000..95a9773e2b --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_10_94e4ac430140042a2d0fb5a16d86b4e5._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 10" + date="2011-02-09T15:04:50Z" + content=""" +I don't know what these problems forking could be. Can you strace it? +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_11_56f1143fa191361d63b441741699e17f._comment b/doc/bugs/Problems_running_make_on_osx/comment_11_56f1143fa191361d63b441741699e17f._comment new file mode 100644 index 0000000000..3fbe57ecd5 --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_11_56f1143fa191361d63b441741699e17f._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 11" + date="2011-02-09T19:35:47Z" + content=""" +I got dtruss to give me a trace, the output is quite big to post here (~560kb gzip'd), do you mind if I emailed it or posted it somewhere else for you? +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_12_ec5131624d0d2285d3b6880e47033f97._comment b/doc/bugs/Problems_running_make_on_osx/comment_12_ec5131624d0d2285d3b6880e47033f97._comment new file mode 100644 index 0000000000..beba5dc42c --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_12_ec5131624d0d2285d3b6880e47033f97._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 12" + date="2011-02-09T19:47:30Z" + content=""" +joey@kitenet.net (hope I can make sense of dtruss output) +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_13_88ed095a448096bf8a69015a04e64df1._comment b/doc/bugs/Problems_running_make_on_osx/comment_13_88ed095a448096bf8a69015a04e64df1._comment new file mode 100644 index 0000000000..dd25c3d0cb --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_13_88ed095a448096bf8a69015a04e64df1._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 13" + date="2011-02-09T21:59:47Z" + content=""" +The dtrace puzzlingly does not have the same errors shown above, but a set of mostly new errors. I don't know what to make of that. + +> git-annex: git-annex/.t/repo/.git/hooks/pre-commit: fileAccess: permission denied (Operation not permitted) + +This seems to be caused by it setting the execute bit on the file. I don't know why that would fail; it's just written the file and renamed it into place so clearly should be able to write to it. + +> was able to modify annexed file's sha1foo content + +This also suggests something breaking with permissions. +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_14_89a960b6706ed703b390a81a8bc4e311._comment b/doc/bugs/Problems_running_make_on_osx/comment_14_89a960b6706ed703b390a81a8bc4e311._comment new file mode 100644 index 0000000000..724fe5505a --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_14_89a960b6706ed703b390a81a8bc4e311._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 14" + date="2011-02-12T21:19:24Z" + content=""" +I've been trying to dig around the trace and code, and used google to see if the forkProcess issue was a haskell thing or an OSX thing. It seems that someone may have ran into a similar issue, though I am not sure if its related. +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_15_6b8867b8e48bf807c955779c9f8f0909._comment b/doc/bugs/Problems_running_make_on_osx/comment_15_6b8867b8e48bf807c955779c9f8f0909._comment new file mode 100644 index 0000000000..733ec997a6 --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_15_6b8867b8e48bf807c955779c9f8f0909._comment @@ -0,0 +1,71 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 15" + date="2011-02-13T02:45:51Z" + content=""" +It may be possible that OSX has some low resource limits, for user processes (266 per user I think) doing a + + sudo sysctl -w kern.maxproc=2048 + sudo sysctl -w kern.maxprocperuid=1024 + sudo echo \"limit maxfiles 1024 unlimited\" >> /etc/launchd.conf + sudo echo \"limit maxproc 1024 2048\" >> /etc/launchd.conf + +seems to change the behaviour of the tests abit... + +
+Testing 1:blackbox:3:git-annex unannex:1:with content                         
+### Failure in: 1:blackbox:3:git-annex unannex:1:with content
+foo is not a symlink
+Testing 1:blackbox:4:git-annex drop:0:no remotes                              
+### Failure in: 1:blackbox:4:git-annex drop:0:no remotes
+drop wrongly succeeded with no known copy of file
+Testing 1:blackbox:4:git-annex drop:1:with remote                             
+Testing 1:blackbox:4:git-annex drop:2:untrusted remote                        
+Testing 1:blackbox:5:git-annex get                                            
+Testing 1:blackbox:6:git-annex move                                           
+Testing 1:blackbox:7:git-annex copy                                           
+Testing 1:blackbox:8:git-annex unlock/lock                                    
+Testing 1:blackbox:9:git-annex edit/commit:0                                  
+Cases: 30  Tried: 20  Errors: 0  Failures: 2add foo ok
+ok
+Testing 1:blackbox:9:git-annex edit/commit:1                                  
+Testing 1:blackbox:10:git-annex fix                                           
+Testing 1:blackbox:11:git-annex trust/untrust/semitrust                       
+Testing 1:blackbox:12:git-annex fsck:0                                        
+Cases: 30  Tried: 24  Errors: 0  Failures: 2  Only 1 of 2 trustworthy copies of foo exist.
+  Back it up with git-annex copy.
+  Only 1 of 2 trustworthy copies of sha1foo exist.
+  Back it up with git-annex copy.
+  Bad file size; moved to /Users/jtang/develop/git-annex/.t/tmprepo/.git/annex/bad/WORM:1297565141:20:foo
+  Bad file content; moved to /Users/jtang/develop/git-annex/.t/tmprepo/.git/annex/bad/SHA1:ee80d2cec57a3810db83b80e1b320df3a3721ffa
+Testing 1:blackbox:12:git-annex fsck:1                                        
+### Failure in: 1:blackbox:12:git-annex fsck:1
+fsck failed to fail with content only available in untrusted (current) repository
+Testing 1:blackbox:12:git-annex fsck:2                                        
+Cases: 30  Tried: 26  Errors: 0  Failures: 3  Only 1 of 2 trustworthy copies of foo exist.
+  Back it up with git-annex copy.
+  The following untrusted locations may also have copies: 
+  	58e831c2-371b-11e0-bc1f-47d738dc52ee  -- test repo
+  Only 1 of 2 trustworthy copies of sha1foo exist.
+  Back it up with git-annex copy.
+  The following untrusted locations may also have copies: 
+  	58e831c2-371b-11e0-bc1f-47d738dc52ee  -- test repo
+Testing 1:blackbox:13:git-annex migrate:0                                     
+Cases: 30  Tried: 27  Errors: 0  Failures: 3  git-annex: user error (Error in fork: forkProcess: resource exhausted (Resource temporarily unavailable))
+### Failure in: 1:blackbox:13:git-annex migrate:0
+migrate annexedfile failed
+Testing 1:blackbox:13:git-annex migrate:1                                     
+### Error in:   1:blackbox:13:git-annex migrate:1
+forkProcess: resource exhausted (Resource temporarily unavailable)
+Testing 1:blackbox:14:git-annex unused/dropunused                             
+### Error in:   1:blackbox:14:git-annex unused/dropunused
+forkProcess: resource exhausted (Resource temporarily unavailable)
+Cases: 30  Tried: 30  Errors: 2  Failures: 4
+test: failed
+
+ + +the number of failures vary as I change the values of the maxprocs, I think I have narrowed it down to OSX just being stupid with limits thus causing the tests to fail. + +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_16_5c2dd6002aadaab30841b77a5f5aed34._comment b/doc/bugs/Problems_running_make_on_osx/comment_16_5c2dd6002aadaab30841b77a5f5aed34._comment new file mode 100644 index 0000000000..ca1b8e8cd5 --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_16_5c2dd6002aadaab30841b77a5f5aed34._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 16" + date="2011-02-13T04:52:26Z" + content=""" +I've fixed the test suite to not accumulate all those zombie processes. Now only 2 or 3 processes should run max. Am curious to see if that clears up all the problems. +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_17_62fccb04b0e4b695312f7a3f32fb96ee._comment b/doc/bugs/Problems_running_make_on_osx/comment_17_62fccb04b0e4b695312f7a3f32fb96ee._comment new file mode 100644 index 0000000000..7c7200fb9e --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_17_62fccb04b0e4b695312f7a3f32fb96ee._comment @@ -0,0 +1,43 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 17" + date="2011-02-13T10:46:54Z" + content=""" +Yeap, that did the trick. I just tested a few separate OSX 10.6.6 systems and the tests are better behaved now, only 3 failures now. + +So the tests behave better (at least we don't get resource fork errors any more) + + * after the commit c319a3 without modifying the system limits (of 266 procs per user) + * without the commit c319a3 and when I increase the system process limits to as much as OSX allows + +On all the systems I tested on, I'm down to 3 failures now. + +
+### Failure in: 1:blackbox:3:git-annex unannex:1:with content
+foo is not a symlink
+### Failure in: 1:blackbox:4:git-annex drop:0:no remotes
+drop wrongly succeeded with no known copy of file
+Cases: 30  Tried: 20  Errors: 0  Failures: 2add foo ok
+ok
+Cases: 30  Tried: 24  Errors: 0  Failures: 2  Only 1 of 2 trustworthy copies of foo exist.
+  Back it up with git-annex copy.
+  Only 1 of 2 trustworthy copies of sha1foo exist.
+  Back it up with git-annex copy.
+  Bad file size; moved to /Users/jtang/develop/git-annex/.t/tmprepo/.git/annex/bad/WORM:1297594011:20:foo
+  Bad file content; moved to /Users/jtang/develop/git-annex/.t/tmprepo/.git/annex/bad/SHA1:ee80d2cec57a3810db83b80e1b320df3a3721ffa
+### Failure in: 1:blackbox:12:git-annex fsck:1
+fsck failed to fail with content only available in untrusted (current) repository
+Cases: 30  Tried: 26  Errors: 0  Failures: 3  Only 1 of 2 trustworthy copies of foo exist.
+  Back it up with git-annex copy.
+  The following untrusted locations may also have copies: 
+  	90d63906-375e-11e0-8867-abb8a6368269  -- test repo
+  Only 1 of 2 trustworthy copies of sha1foo exist.
+  Back it up with git-annex copy.
+  The following untrusted locations may also have copies: 
+  	90d63906-375e-11e0-8867-abb8a6368269  -- test repo
+Cases: 30  Tried: 30  Errors: 0  Failures: 3
+
+ +It's the same set of failures across all the OSX systems that I have tested on. Now I just need to figure out why there are still these three failures. +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_18_64fab50d95de619eb2e8f08f90237de1._comment b/doc/bugs/Problems_running_make_on_osx/comment_18_64fab50d95de619eb2e8f08f90237de1._comment new file mode 100644 index 0000000000..df76bb3017 --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_18_64fab50d95de619eb2e8f08f90237de1._comment @@ -0,0 +1,24 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="maybe killed another osx bug in the test." + date="2011-02-13T15:12:10Z" + content=""" +I think I have figured out why + + ### Failure in: 1:blackbox:3:git-annex unannex:1:with content + foo is not a symlink + +It goes back to the this piece of code (in test.hs) + + copyrepo :: FilePath -> FilePath -> IO FilePath + copyrepo old new = do + cleanup new + ensuretmpdir + Utility.boolSystem \"cp\" [\"-pr\", old, new] @? \"cp -pr failed\" + +It seems that on OSX it does not preserve the symbolic link information, basically cp is not gnu cp on OSX, doing a \"cp -a SOURCE DEST\" seem's to the right thing on OSX. I tried it out on my archlinux workstation by replacing *-pr* with just *-a* and all the tests passed on archlinux. + +I'm not sure what the implications would be with changing the test with changing the cp command. + +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_19_4253988ed178054c8b6400beeed68a29._comment b/doc/bugs/Problems_running_make_on_osx/comment_19_4253988ed178054c8b6400beeed68a29._comment new file mode 100644 index 0000000000..090c991c3a --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_19_4253988ed178054c8b6400beeed68a29._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 19" + date="2011-02-13T15:55:47Z" + content=""" +On second thought and after some messing (trying most of the options and combinations of options on OSX for).... I tried replacing cp with gnu cp from coreutils on my OSX install, and all the tests passed. *sigh* cp -a is preserving some permissions and attributes but not all, its not behaving in the same way as the gnu cp does... the closet thing that I have found on OSX that behaves in the same way as gnu \"cp -pr\" is to use \"ditto\". + +Just doing a \"ditto SOURCE DEST\" in the tests passes everything. I'm not sure if its a good idea to use this even though it works. Though this is just the tests, does it affect CopyFile.hs where \"cp\" is called? + +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_1_34120e82331ace01a6a4960862d38f2d._comment b/doc/bugs/Problems_running_make_on_osx/comment_1_34120e82331ace01a6a4960862d38f2d._comment new file mode 100644 index 0000000000..a33fef7d99 --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_1_34120e82331ace01a6a4960862d38f2d._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawmd3qri1pXEYktlxYGwj37wCnrM4FMEJCc" + nickname="Antoine" + subject="Got it going!" + date="2011-02-06T06:02:57Z" + content=""" +Thanks to your feedback, I got it going. + +Maybe those two should be added to the 'OSX how-to' in the forum + +[realizes pcre-light is needed but pcre not installed on my mac] +sudo port install pcre +sudo cabal install pcre-light + +[tests are failing, need haskell's quickcheck] +sudo cabal install quickcheck +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_20_7db27d1a22666c831848bc6c06d66a84._comment b/doc/bugs/Problems_running_make_on_osx/comment_20_7db27d1a22666c831848bc6c06d66a84._comment new file mode 100644 index 0000000000..b617da9261 --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_20_7db27d1a22666c831848bc6c06d66a84._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 20" + date="2011-02-13T17:54:09Z" + content=""" +Outside the test suite, git-annex's actual use of cp puts fairly low demands on it. It tries to use cp -a or cp -p if available just to preserve whatever attributes it can preserve, but the worst case if that you have a symlink pointing to a file that doesn't have the original timestamp or whatever. And there's little expectation git preserves that stuff anyway. + +I will probably try to make the test suite entirely use git clone rather than cp. +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_2_cc53d1681d576186dbc868dd9801d551._comment b/doc/bugs/Problems_running_make_on_osx/comment_2_cc53d1681d576186dbc868dd9801d551._comment new file mode 100644 index 0000000000..91d3e89f06 --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_2_cc53d1681d576186dbc868dd9801d551._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-02-06T17:39:52Z" + content=""" +Yes, I've moved it to [[install/OSX]] page where anyone can update it in this wiki, and added your improvements. +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_3_68f0f8ae953589ae26d57310b40c878d._comment b/doc/bugs/Problems_running_make_on_osx/comment_3_68f0f8ae953589ae26d57310b40c878d._comment new file mode 100644 index 0000000000..39f32c244f --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_3_68f0f8ae953589ae26d57310b40c878d._comment @@ -0,0 +1,57 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="tests fail with more recent installs of haskell platform" + date="2011-02-07T12:43:43Z" + content=""" +I'm running ghc 6.12.3 with the corresponding haskell-platform package from the HP site which I installed in preference to the macports version of haskell-platform (it's quite old). it seems when you install quickcheck, the version that is installed is of version 2.4.0.1 and not 1.2.0 which git-annex depends on for its tests. + +
+jtang@x00:~ $ cabal install quickcheck --reinstall               
+Resolving dependencies...
+Configuring QuickCheck-2.4.0.1...
+Preprocessing library QuickCheck-2.4.0.1...
+
+..
+and so on..
+..
+
+
+ +it fails with this + +
+[54 of 54] Compiling Main             ( test.hs, test.o )
+
+test.hs:56:3:
+    No instance for (QuickCheck-1.2.0.1:Test.QuickCheck.Arbitrary Char)
+      arising from a use of `qctest' at test.hs:56:3-64
+    Possible fix:
+      add an instance declaration for
+      (QuickCheck-1.2.0.1:Test.QuickCheck.Arbitrary Char)
+    In the expression:
+        qctest \"prop_idempotent_deencode\" Git.prop_idempotent_deencode
+    In the first argument of `TestList', namely
+        `[qctest \"prop_idempotent_deencode\" Git.prop_idempotent_deencode,
+          qctest \"prop_idempotent_fileKey\" Locations.prop_idempotent_fileKey,
+          qctest
+            \"prop_idempotent_key_read_show\"
+            BackendTypes.prop_idempotent_key_read_show,
+          qctest
+            \"prop_idempotent_shellEscape\" Utility.prop_idempotent_shellEscape,
+          ....]'
+    In the second argument of `($)', namely
+        `TestList
+           [qctest \"prop_idempotent_deencode\" Git.prop_idempotent_deencode,
+            qctest \"prop_idempotent_fileKey\" Locations.prop_idempotent_fileKey,
+            qctest
+              \"prop_idempotent_key_read_show\"
+              BackendTypes.prop_idempotent_key_read_show,
+            qctest
+              \"prop_idempotent_shellEscape\" Utility.prop_idempotent_shellEscape,
+            ....]'
+
+ +I'd imagine if I could downgrade, it would compile and pass the tests (I hope) + +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_4_c52be386f79f14c8570a8f1397c68581._comment b/doc/bugs/Problems_running_make_on_osx/comment_4_c52be386f79f14c8570a8f1397c68581._comment new file mode 100644 index 0000000000..e245e139fb --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_4_c52be386f79f14c8570a8f1397c68581._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 4" + date="2011-02-08T19:00:14Z" + content=""" +I doubt that git-annex can be used with QuickCheck 1.2.0. The QuickCheck I've tested it with is 2.1.0.3 actually. + +I suspect you have an old version of the TestPack haskell library on your system, that is linked against QuickCheck 1.2.0. Git-annex has been tested with TestPack 2.0.0, which uses QuickCheck 2.x. + +In any case, you don't have to run 'make test' to build git-annex, and my comments above should make the main program compile, I expect. +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_5_7f1330a1e541b0f3e2192e596d7f7bee._comment b/doc/bugs/Problems_running_make_on_osx/comment_5_7f1330a1e541b0f3e2192e596d7f7bee._comment new file mode 100644 index 0000000000..9c83feb32f --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_5_7f1330a1e541b0f3e2192e596d7f7bee._comment @@ -0,0 +1,107 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 5" + date="2011-02-08T19:56:55Z" + content=""" +Ah, that gave me a good clue, my system just got pretty confused with a mixture of quickcheck and testpack installs. Would it be possible to put up a list of versions of the software you are using on your development environment? (at least the minimum tested version) + +I guess it shouldn't matter to most users who are going to rely on packagers to sort these dependancy issues, but it's nice to know. + +Anyway, the tests build now, and they seem to fail on my (rather messy) install of haskell platform + ghc 6.12 on osx 10.6.6. + +
+< output that passed some tests >
+Testing 1:blackbox:0:git-annex init
+Testing 1:blackbox:1:git-annex add:0
+Testing 1:blackbox:1:git-annex add:1
+Cases: 30  Tried: 9  Errors: 0  Failures: 0test: sha1sum: executeFile: does not exist (No such file or directory)
+  git-annex: : hGetLine: end of file
+### Failure in: 1:blackbox:1:git-annex add:1
+add with SHA1 failed
+Testing 1:blackbox:2:git-annex setkey/fromkey
+Cases: 30  Tried: 10  Errors: 0  Failures: 1(checksum...) test: sha1sum: executeFile: does not exist (No such file or directory)
+### Error in:   1:blackbox:2:git-annex setkey/fromkey
+: hGetLine: end of file
+Testing 1:blackbox:3:git-annex unannex:0:no content
+Cases: 30  Tried: 11  Errors: 1  Failures: 1chmod: -R: No such file or directory
+chmod: -R: No such file or directory
+Testing 1:blackbox:3:git-annex unannex:1:with content
+### Failure in: 1:blackbox:3:git-annex unannex:1:with content
+foo is not a symlink
+Testing 1:blackbox:4:git-annex drop:0:no remotes
+Cases: 30  Tried: 13  Errors: 1  Failures: 2chmod: -R: No such file or directory
+### Error in:   1:blackbox:4:git-annex drop:0:no remotes
+.t/tmprepo/.git/annex/objects/WORM:1297194705:20:foo/WORM:1297194705:20:foo: removeLink: permission denied (Permission denied)
+Testing 1:blackbox:4:git-annex drop:1:with remote
+Cases: 30  Tried: 14  Errors: 2  Failures: 2chmod: -R: No such file or directory
+### Error in:   1:blackbox:4:git-annex drop:1:with remote
+.t/tmprepo/.git/annex/objects/WORM:1297194705:20:foo/WORM:1297194705:20:foo: removeLink: permission denied (Permission denied)
+Testing 1:blackbox:4:git-annex drop:2:untrusted remote
+Cases: 30  Tried: 15  Errors: 3  Failures: 2chmod: -R: No such file or directory
+### Error in:   1:blackbox:4:git-annex drop:2:untrusted remote
+.t/tmprepo/.git/annex/objects/WORM:1297194705:20:foo/WORM:1297194705:20:foo: removeLink: permission denied (Permission denied)
+Testing 1:blackbox:5:git-annex get
+Cases: 30  Tried: 16  Errors: 4  Failures: 2chmod: -R: No such file or directory
+### Error in:   1:blackbox:5:git-annex get
+.t/tmprepo/.git/annex/objects/WORM:1297194705:20:foo/WORM:1297194705:20:foo: removeLink: permission denied (Permission denied)
+Testing 1:blackbox:6:git-annex move
+Cases: 30  Tried: 17  Errors: 5  Failures: 2chmod: -R: No such file or directory
+### Error in:   1:blackbox:6:git-annex move
+.t/tmprepo/.git/annex/objects/WORM:1297194705:20:foo/WORM:1297194705:20:foo: removeLink: permission denied (Permission denied)
+Testing 1:blackbox:7:git-annex copy
+Cases: 30  Tried: 18  Errors: 6  Failures: 2chmod: -R: No such file or directory
+### Error in:   1:blackbox:7:git-annex copy
+.t/tmprepo/.git/annex/objects/WORM:1297194705:20:foo/WORM:1297194705:20:foo: removeLink: permission denied (Permission denied)
+Testing 1:blackbox:8:git-annex unlock/lock
+Cases: 30  Tried: 19  Errors: 7  Failures: 2chmod: -R: No such file or directory
+### Error in:   1:blackbox:8:git-annex unlock/lock
+.t/tmprepo/.git/annex/objects/WORM:1297194705:20:foo/WORM:1297194705:20:foo: removeLink: permission denied (Permission denied)
+Testing 1:blackbox:9:git-annex edit/commit:0
+Cases: 30  Tried: 20  Errors: 8  Failures: 2chmod: -R: No such file or directory
+### Error in:   1:blackbox:9:git-annex edit/commit:0
+.t/tmprepo/.git/annex/objects/WORM:1297194705:20:foo/WORM:1297194705:20:foo: removeLink: permission denied (Permission denied)
+Testing 1:blackbox:9:git-annex edit/commit:1
+Cases: 30  Tried: 21  Errors: 9  Failures: 2chmod: -R: No such file or directory
+### Error in:   1:blackbox:9:git-annex edit/commit:1
+.t/tmprepo/.git/annex/objects/WORM:1297194705:20:foo/WORM:1297194705:20:foo: removeLink: permission denied (Permission denied)
+Testing 1:blackbox:10:git-annex fix
+Cases: 30  Tried: 22  Errors: 10  Failures: 2chmod: -R: No such file or directory
+### Error in:   1:blackbox:10:git-annex fix
+.t/tmprepo/.git/annex/objects/WORM:1297194705:20:foo/WORM:1297194705:20:foo: removeLink: permission denied (Permission denied)
+Testing 1:blackbox:11:git-annex trust/untrust/semitrust
+Cases: 30  Tried: 23  Errors: 11  Failures: 2chmod: -R: No such file or directory
+### Error in:   1:blackbox:11:git-annex trust/untrust/semitrust
+.t/tmprepo/.git/annex/objects/WORM:1297194705:20:foo/WORM:1297194705:20:foo: removeLink: permission denied (Permission denied)
+Testing 1:blackbox:12:git-annex fsck:0
+Cases: 30  Tried: 24  Errors: 12  Failures: 2chmod: -R: No such file or directory
+### Error in:   1:blackbox:12:git-annex fsck:0
+.t/tmprepo/.git/annex/objects/WORM:1297194705:20:foo/WORM:1297194705:20:foo: removeLink: permission denied (Permission denied)
+Testing 1:blackbox:12:git-annex fsck:1
+Cases: 30  Tried: 25  Errors: 13  Failures: 2chmod: -R: No such file or directory
+### Error in:   1:blackbox:12:git-annex fsck:1
+.t/tmprepo/.git/annex/objects/WORM:1297194705:20:foo/WORM:1297194705:20:foo: removeLink: permission denied (Permission denied)
+Testing 1:blackbox:12:git-annex fsck:2
+Cases: 30  Tried: 26  Errors: 14  Failures: 2chmod: -R: No such file or directory
+### Error in:   1:blackbox:12:git-annex fsck:2
+.t/tmprepo/.git/annex/objects/WORM:1297194705:20:foo/WORM:1297194705:20:foo: removeLink: permission denied (Permission denied)
+Testing 1:blackbox:13:git-annex migrate:0
+Cases: 30  Tried: 27  Errors: 15  Failures: 2chmod: -R: No such file or directory
+### Error in:   1:blackbox:13:git-annex migrate:0
+.t/tmprepo/.git/annex/objects/WORM:1297194705:20:foo/WORM:1297194705:20:foo: removeLink: permission denied (Permission denied)
+Testing 1:blackbox:13:git-annex migrate:1
+Cases: 30  Tried: 28  Errors: 16  Failures: 2chmod: -R: No such file or directory
+### Error in:   1:blackbox:13:git-annex migrate:1
+.t/tmprepo/.git/annex/objects/WORM:1297194705:20:foo/WORM:1297194705:20:foo: removeLink: permission denied (Permission denied)
+Testing 1:blackbox:14:git-annex unused/dropunused
+Cases: 30  Tried: 29  Errors: 17  Failures: 2chmod: -R: No such file or directory
+### Error in:   1:blackbox:14:git-annex unused/dropunused
+.t/tmprepo/.git/annex/objects/WORM:1297194705:20:foo/WORM:1297194705:20:foo: removeLink: permission denied (Permission denied)
+Cases: 30  Tried: 30  Errors: 18  Failures: 2
+chmod: -R: No such file or directory
+test: .t/repo/.git/annex/objects/WORM:1297194705:20:foo/WORM:1297194705:20:foo: removeLink: permission denied (Permission denied)
+make: *** [test] Error 1
+
+ +I assumed that since the tests built, then running them shouldn't be a problem. It looks like some argument isn't being passed about for the location of the .t directory that gets created. I will check the dependancies on my system again. +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_6_0c46f5165ceb5a7b9ea9689c33b3a4f8._comment b/doc/bugs/Problems_running_make_on_osx/comment_6_0c46f5165ceb5a7b9ea9689c33b3a4f8._comment new file mode 100644 index 0000000000..afc3088d4f --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_6_0c46f5165ceb5a7b9ea9689c33b3a4f8._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 6" + date="2011-02-08T23:20:08Z" + content=""" +You're missing the sha1sum command, everything else is a followon error from that. Added a hint about this to [[install]], +and in the next version configure will check for sha1sum. +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_7_237a137cce58a28abcc736cbf2c420b0._comment b/doc/bugs/Problems_running_make_on_osx/comment_7_237a137cce58a28abcc736cbf2c420b0._comment new file mode 100644 index 0000000000..8d8aefcb20 --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_7_237a137cce58a28abcc736cbf2c420b0._comment @@ -0,0 +1,22 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 7" + date="2011-02-09T00:45:31Z" + content=""" +That's odd, I have the md5sha1sum package installed and it still fails with pretty much the same error + +
+Testing 1:blackbox:0:git-annex init
+Cases: 30  Tried: 7  Errors: 0  Failures: 0chmod: -R: No such file or directory
+### Error in:   1:blackbox:0:git-annex init
+.t/repo/.git/annex/objects/SHA1:ee80d2cec57a3810db83b80e1b320df3a3721ffa/SHA1:ee80d2cec57a3810db83b80e1b320df3a3721ffa: removeLink: permission denied (Permission denied)
+Testing 1:blackbox:1:git-annex add:0
+### Error in:   1:blackbox:1:git-annex add:0
+foo: openFile: permission denied (Permission denied)
+
+< and so on >
+
+ +the configure script finds sha1sum, builds and starts to run. +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_8_efafa203addf8fa79e33e21a87fb5a2b._comment b/doc/bugs/Problems_running_make_on_osx/comment_8_efafa203addf8fa79e33e21a87fb5a2b._comment new file mode 100644 index 0000000000..9401bd453e --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_8_efafa203addf8fa79e33e21a87fb5a2b._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 8" + date="2011-02-09T04:10:27Z" + content=""" +The chmod errors are because your chmod does not understand the -R argument. Only the test suite uses chmod -R. I've fixed it to modify modes manually. +"""]] diff --git a/doc/bugs/Problems_running_make_on_osx/comment_9_cc283b485b3c95ba7eebc8f0c96969b3._comment b/doc/bugs/Problems_running_make_on_osx/comment_9_cc283b485b3c95ba7eebc8f0c96969b3._comment new file mode 100644 index 0000000000..da6d7ca178 --- /dev/null +++ b/doc/bugs/Problems_running_make_on_osx/comment_9_cc283b485b3c95ba7eebc8f0c96969b3._comment @@ -0,0 +1,66 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 9" + date="2011-02-09T09:12:52Z" + content=""" +[a0826293][] fixed the last problem, there is coreutils available in macports, if they are installed you get the gnu equivalents but they are prefixed with a g (e.g. gchmod instead of chmod), I guess not everyone will have these install or prefer these on [[install/OSX]] + +Some more tests fail now... + +
+Testing 1:blackbox:3:git-annex unannex:1:with content
+### Failure in: 1:blackbox:3:git-annex unannex:1:with content
+foo is not a symlink
+Testing 1:blackbox:4:git-annex drop:0:no remotes
+### Failure in: 1:blackbox:4:git-annex drop:0:no remotes
+drop wrongly succeeded with no known copy of file
+Testing 1:blackbox:4:git-annex drop:1:with remote
+Testing 1:blackbox:4:git-annex drop:2:untrusted remote
+Testing 1:blackbox:5:git-annex get
+Testing 1:blackbox:6:git-annex move
+Testing 1:blackbox:7:git-annex copy
+### Failure in: 1:blackbox:7:git-annex copy
+move --to of file already there failed
+Testing 1:blackbox:8:git-annex unlock/lock
+### Error in:   1:blackbox:8:git-annex unlock/lock
+forkProcess: resource exhausted (Resource temporarily unavailable)
+Testing 1:blackbox:9:git-annex edit/commit:0
+### Error in:   1:blackbox:9:git-annex edit/commit:0
+forkProcess: resource exhausted (Resource temporarily unavailable)
+Testing 1:blackbox:9:git-annex edit/commit:1
+### Error in:   1:blackbox:9:git-annex edit/commit:1
+forkProcess: resource exhausted (Resource temporarily unavailable)
+Testing 1:blackbox:10:git-annex fix
+### Error in:   1:blackbox:10:git-annex fix
+forkProcess: resource exhausted (Resource temporarily unavailable)
+Testing 1:blackbox:11:git-annex trust/untrust/semitrust
+### Error in:   1:blackbox:11:git-annex trust/untrust/semitrust
+forkProcess: resource exhausted (Resource temporarily unavailable)
+Testing 1:blackbox:12:git-annex fsck:0
+### Error in:   1:blackbox:12:git-annex fsck:0
+forkProcess: resource exhausted (Resource temporarily unavailable)
+Testing 1:blackbox:12:git-annex fsck:1
+### Error in:   1:blackbox:12:git-annex fsck:1
+forkProcess: resource exhausted (Resource temporarily unavailable)
+Testing 1:blackbox:12:git-annex fsck:2
+### Error in:   1:blackbox:12:git-annex fsck:2
+forkProcess: resource exhausted (Resource temporarily unavailable)
+Testing 1:blackbox:13:git-annex migrate:0
+### Error in:   1:blackbox:13:git-annex migrate:0
+forkProcess: resource exhausted (Resource temporarily unavailable)
+Testing 1:blackbox:13:git-annex migrate:1
+### Error in:   1:blackbox:13:git-annex migrate:1
+forkProcess: resource exhausted (Resource temporarily unavailable)
+Testing 1:blackbox:14:git-annex unused/dropunused
+### Error in:   1:blackbox:14:git-annex unused/dropunused
+forkProcess: resource exhausted (Resource temporarily unavailable)
+Cases: 30  Tried: 30  Errors: 11  Failures: 3
+test: failed
+make: *** [test] Error 1
+
+ +On a side note, I think I found another bug in the testing. I had tested in a virtual machine in archlinux (a very recent updated version) Please see the report here [[tests fail when there is no global .gitconfig for the user]] + +[a0826293]: http://git.kitenet.net/?p=git-annex;a=commit;h=7a0826293e0ac6c0000f49a1618c1c613b909aa1 +"""]] diff --git a/doc/bugs/Remote_repo_and_set_operation_with_find.mdwn b/doc/bugs/Remote_repo_and_set_operation_with_find.mdwn new file mode 100644 index 0000000000..3e1acd4a81 --- /dev/null +++ b/doc/bugs/Remote_repo_and_set_operation_with_find.mdwn @@ -0,0 +1,6 @@ +Currently, git annex find lists files that are present in the current repository, possibly restricted to a subdirectory. But it does not easily seem possible to get this information about a remote repository. + +I would find it useful if this command understood flags that makes it tell me what is present somewhere else (maybe "--on remote") and combinations of the flags ("--on remote1 --and --not-on remote2" or "--on disk1 --or --on disk2"). + +> Almost. You're looking for `--in remote`, which was added 2 months ago. +> [[done]] --[[Joey]] diff --git a/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing.mdwn b/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing.mdwn new file mode 100644 index 0000000000..2c0037c903 --- /dev/null +++ b/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing.mdwn @@ -0,0 +1,10 @@ +While using HMAC instead of "plain" hash functions is inherently more secure, it's still a bad idea to re-use keys for different purposes. + +Also, ttbomk, HMAC needs two keys, not one. Are you re-using the same key twice? + +Compability for old buckets and support for different ones can be maintained by introducing a new option and simply copying over the encryption key's identifier into this new option should it be missing. + +> Bug was filed prematurely, but was a good bit of paranoia, and gpg and +> hmac are given different secret keys [[done]] --[[Joey]] + +>> Thanks :) -- RIchiH diff --git a/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing/comment_1_dc5ae7af499203cfd903e866595b8fea._comment b/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing/comment_1_dc5ae7af499203cfd903e866595b8fea._comment new file mode 100644 index 0000000000..320fb5ef08 --- /dev/null +++ b/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing/comment_1_dc5ae7af499203cfd903e866595b8fea._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-03-30T14:32:34Z" + content=""" +S3 doesn't support encryption at all, yet. + +It certainly makes sense to use a different portion of the encrypted secret key for HMAC than is uses as the gpg symmetric encryption key. + +The two keys used in HMAC would be the secret key and the key/value key for the content being stored. + +There is a difficult problem with encrypting filenames in S3 buckets, and that is determining when some data in the bucket is unused for dropunused. I've considered two choices: + +1. gpg encrypt the filenames. This would allow dropunused to recover the original filenames, and is probably more robust encryption. But it would double the number of times gpg is run when moving content in/out, and to check for unused content, gpg would have to be run once for every item in the bucket, which just feels way excessive, even though it would not be prompting for a passphrase. Still, haven't ruled this out. + +2. HMAC or other hash. To determine what data was unused the same hash and secret key would have to be used to hash all filenames currently used, and then that set of hashes could be interested with the set in the bucket. But then git-annex could only say \"here are some opaque hashes of content that appears unused by anything in your current git repository, but there's no way, short of downloading it and examining it to tell what it is\". (This could be improved by keeping a local mapping between filenames and S3 keys, but maintaining and committing that would bring pain of its own.) +"""]] diff --git a/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing/comment_2_c62daf5b3bfcd2f684262c96ef6628c1._comment b/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing/comment_2_c62daf5b3bfcd2f684262c96ef6628c1._comment new file mode 100644 index 0000000000..dec06c89ff --- /dev/null +++ b/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing/comment_2_c62daf5b3bfcd2f684262c96ef6628c1._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 2" + date="2011-03-30T17:01:40Z" + content=""" +After mulling this over, I think actually encrypting the filenames is preferable. + +Did you consider encrypting the symmetric key with an asymmetric one? That's what TrueCrypt etc are using to allow different people access to a shared volume. This has the added benefit that you could, potentially, add new keys for data that new people should have access to while making access to old data impossible. Or keys per subdirectory, or, or, or. + +As an aside, could the same mechanism be extended to transparently encrypt data for a remote annex repo? A friend of mine is interested to host his data with me, but he wants to encrypt his data for obvious reasons. +"""]] diff --git a/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing/comment_3_e1f39c4af5bdb0daabf000da80858cd9._comment b/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing/comment_3_e1f39c4af5bdb0daabf000da80858cd9._comment new file mode 100644 index 0000000000..c5bb26f595 --- /dev/null +++ b/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing/comment_3_e1f39c4af5bdb0daabf000da80858cd9._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-03-30T18:15:18Z" + content=""" +Yes, encrypting the symmetric key with users' regular gpg keys is the plan. + +I don't think that encryption of content in a git annex remote makes much sense; the filenames obviously cannot be encrypted there. It's more likely that the same encryption would get used for a bup remote, or with the [[special_remotes/directory]] remote I threw in today. +"""]] diff --git a/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing/comment_4_bb6b814ab961818d514f6553455d2bf3._comment b/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing/comment_4_bb6b814ab961818d514f6553455d2bf3._comment new file mode 100644 index 0000000000..09b7a8b1ab --- /dev/null +++ b/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing/comment_4_bb6b814ab961818d514f6553455d2bf3._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 4" + date="2011-03-30T18:20:56Z" + content=""" +Picking up the automagic encryption idea for annex remotes, this would allow you to host a branchable-esque git-annex hosting service. (Nexenta with ZFS is a cheap and reliable option until btrfs becomes stable in a year or five). +"""]] diff --git a/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing/comment_5_5bb128f6d2ca4b5e4d881fae297fa1f8._comment b/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing/comment_5_5bb128f6d2ca4b5e4d881fae297fa1f8._comment new file mode 100644 index 0000000000..49d43ffc63 --- /dev/null +++ b/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing/comment_5_5bb128f6d2ca4b5e4d881fae297fa1f8._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 5" + date="2011-03-30T18:59:19Z" + content=""" +This is brain-storming only so the idea might be crap, but a branch could keep encrypted filenames while master keeps the real deal. This might fit into the whole scheme just nicely or break future stuff in a dozen places, I am not really sure yet. But at least I can't forget the idea, now. +"""]] diff --git a/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing/comment_6_63fb74da342751fc35e1850409c506f6._comment b/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing/comment_6_63fb74da342751fc35e1850409c506f6._comment new file mode 100644 index 0000000000..d994ca77f3 --- /dev/null +++ b/doc/bugs/S3_bucket_uses_the_same_key_for_encryption_and_hashing/comment_6_63fb74da342751fc35e1850409c506f6._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 6" + date="2011-03-30T19:02:20Z" + content=""" +OTOH, if encryption makes a bup backend more likely disregard the idea above ;) +"""]] diff --git a/doc/bugs/S3_memory_leaks.mdwn b/doc/bugs/S3_memory_leaks.mdwn new file mode 100644 index 0000000000..f612de3960 --- /dev/null +++ b/doc/bugs/S3_memory_leaks.mdwn @@ -0,0 +1,10 @@ +S3 has memory leaks + +Sending a file to S3 causes a slow memory increase toward the file size. + +Copying the file back from S3 causes a slow memory increase toward the +file size. + +The author of hS3 is aware of the problem, and working on it. I think I +have identified the root cause of the buffering; it's done by hS3 so it can +resend the data if S3 sends it a 307 redirect. --[[Joey]] diff --git a/doc/bugs/Trouble_initializing_git_annex_on_NFS.mdwn b/doc/bugs/Trouble_initializing_git_annex_on_NFS.mdwn new file mode 100644 index 0000000000..8eb20baf97 --- /dev/null +++ b/doc/bugs/Trouble_initializing_git_annex_on_NFS.mdwn @@ -0,0 +1,16 @@ +The following occurs in a directory that is shared on an NFS server: + + /media/mybook/movies $ git init + Initialized empty Git repository in /media/mybook/movies/.git/ + /media/mybook/movies $ git annex init mybook-movies + init mybook-movies + git-annex: waitToSetLock: resource exhausted (No locks available) + failed + git-annex: init: 1 failed + /media/mybook/movies $ + +This happens reliably. Is there any way around it? I have shell +access on the NFS server, but it is a NAS, so I don't think it is +capable of running git-annex. + +[[done]] diff --git a/doc/bugs/Trouble_initializing_git_annex_on_NFS/comment_1_e26952373150d63b8a5d3643a2762de1._comment b/doc/bugs/Trouble_initializing_git_annex_on_NFS/comment_1_e26952373150d63b8a5d3643a2762de1._comment new file mode 100644 index 0000000000..8e951ab7c9 --- /dev/null +++ b/doc/bugs/Trouble_initializing_git_annex_on_NFS/comment_1_e26952373150d63b8a5d3643a2762de1._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-11-15T04:40:35Z" + content=""" +git-annex uses locking to avoid problems if multiple processes are run at the same time. + +I just tested on NFS, with Linux on the server and client, and it works ok. It seems your NFS client (or server) must not support fncl locking. What OS is your NAS running? +"""]] diff --git a/doc/bugs/Trouble_initializing_git_annex_on_NFS/comment_2_f80b10ed395738e50e345fc22c708ae5._comment b/doc/bugs/Trouble_initializing_git_annex_on_NFS/comment_2_f80b10ed395738e50e345fc22c708ae5._comment new file mode 100644 index 0000000000..bd302e6bef --- /dev/null +++ b/doc/bugs/Trouble_initializing_git_annex_on_NFS/comment_2_f80b10ed395738e50e345fc22c708ae5._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-11-15T04:46:13Z" + content=""" +You might try mounting your NAS with the mount option `local_lock=all` + +This will keep the lock files on your (I assume linux) client. If you do this make sure you don't have another client using git-annex in the same NFS directory. +"""]] diff --git a/doc/bugs/Trouble_initializing_git_annex_on_NFS/comment_3_f99e0f05950fc2fc80fdecd35e17012c._comment b/doc/bugs/Trouble_initializing_git_annex_on_NFS/comment_3_f99e0f05950fc2fc80fdecd35e17012c._comment new file mode 100644 index 0000000000..b95c795eab --- /dev/null +++ b/doc/bugs/Trouble_initializing_git_annex_on_NFS/comment_3_f99e0f05950fc2fc80fdecd35e17012c._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://cgray.myopenid.com/" + nickname="cgray" + subject="comment 3" + date="2011-11-15T05:14:03Z" + content=""" +I did a bit of research and my NAS had ancient NFS software on it. I upgraded that and things are now working as expected. Sorry for the noise. +"""]] diff --git a/doc/bugs/Unfortunate_interaction_with_Calibre.mdwn b/doc/bugs/Unfortunate_interaction_with_Calibre.mdwn new file mode 100644 index 0000000000..d00a6720cd --- /dev/null +++ b/doc/bugs/Unfortunate_interaction_with_Calibre.mdwn @@ -0,0 +1,21 @@ +# Calibre + +Calibre is a somewhat popular eBook management package that's also free software. + +Install via + # apt-get install calibre + +There is a somewhat unfortunate interaction between Calibre and git-annex... + +* git-annex makes its files become read-only. By the way, that's not quite obvious from the documentation; I suggest making that more prominent. +* Calibre modifies files (not quite sure of semantics, how, or why) when doing various operations, notably such as when copying a book from one's library to one's portable reading device. + +These don't play well together, sadly. + +I'd expect most of the issue to sit on the Calibre side, and have reported it as a bug. +[Calibre bug #739045](https://bugs.launchpad.net/calibre/+bug/739045) +Preliminary indication is that they're treating it as a functionality change they'll decline to fix. Which isn't entirely unreasonable - I anticipated as much, and I don't want to treat that as a bad/wrong decision. + +However, I think it's: +* Unfortunate, as fitting Calibre together with git-annex seems like a neat idea. +* Useful to make sure that this kind of "doesn't play well together" condition is documented, even if only as a bug report. diff --git a/doc/bugs/Unfortunate_interaction_with_Calibre/comment_1_7cb5561f11dfc7726a537ddde2477489._comment b/doc/bugs/Unfortunate_interaction_with_Calibre/comment_1_7cb5561f11dfc7726a537ddde2477489._comment new file mode 100644 index 0000000000..35a2cdb3fe --- /dev/null +++ b/doc/bugs/Unfortunate_interaction_with_Calibre/comment_1_7cb5561f11dfc7726a537ddde2477489._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 1" + date="2011-03-21T13:15:03Z" + content=""" +Maybe I will run into issues myself somewhere down the road, but generally speaking, I really really like the fact that files are immutable by default. +"""]] diff --git a/doc/bugs/Unfortunate_interaction_with_Calibre/comment_2_b8ae4bc589c787dacc08ab2ee5491d6e._comment b/doc/bugs/Unfortunate_interaction_with_Calibre/comment_2_b8ae4bc589c787dacc08ab2ee5491d6e._comment new file mode 100644 index 0000000000..719451976b --- /dev/null +++ b/doc/bugs/Unfortunate_interaction_with_Calibre/comment_2_b8ae4bc589c787dacc08ab2ee5491d6e._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-03-31T19:32:25Z" + content=""" +One option would be to use the new [[news/sharebox_a_FUSE_filesystem_for_git-annex]], which would hide the immutable file details from Calibre, and proxy any changes it made through to git-annex as a series of `git annex unlock; modify; git-annex lock` +"""]] diff --git a/doc/bugs/WORM:_Handle_long_filenames_correctly.mdwn b/doc/bugs/WORM:_Handle_long_filenames_correctly.mdwn new file mode 100644 index 0000000000..3c9374100c --- /dev/null +++ b/doc/bugs/WORM:_Handle_long_filenames_correctly.mdwn @@ -0,0 +1,4 @@ +I have files with very long filenames on an xfs at home. On my laptop the annex should have been checked out on an encfs, but there filenames can't be as long as on the xfs. So perhaps it would be good to limit the keysize to a sane substring of the filename e.g. use only the first 120 characters. + +> Since there seems no strong argument for a WORM100, and better options +> exist, closing. [[done]] --[[Joey]] diff --git a/doc/bugs/WORM:_Handle_long_filenames_correctly/comment_1_77aa9cafbe20367a41377f3edccc9ddb._comment b/doc/bugs/WORM:_Handle_long_filenames_correctly/comment_1_77aa9cafbe20367a41377f3edccc9ddb._comment new file mode 100644 index 0000000000..41d3afb3eb --- /dev/null +++ b/doc/bugs/WORM:_Handle_long_filenames_correctly/comment_1_77aa9cafbe20367a41377f3edccc9ddb._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-04-08T17:14:25Z" + content=""" +Seems like you probably have files in git with nearly as long filenames as the key files. Course, you can rename those yourself. + +This couldn't be changed directly in WORM without some ugly transition, but it would be possible to implement it as a WORM100 or so. OTOH, if you're going to git annex migrate, you might as well use SHA1. +"""]] diff --git a/doc/bugs/WORM:_Handle_long_filenames_correctly/comment_2_fe735d728878d889ccd34ec12b3a7dea._comment b/doc/bugs/WORM:_Handle_long_filenames_correctly/comment_2_fe735d728878d889ccd34ec12b3a7dea._comment new file mode 100644 index 0000000000..d00191f9d8 --- /dev/null +++ b/doc/bugs/WORM:_Handle_long_filenames_correctly/comment_2_fe735d728878d889ccd34ec12b3a7dea._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 2" + date="2011-04-08T22:02:41Z" + content=""" +What if your files have the same prefix and it happens to be 100 chars long? This can not be solved within WORM, but as Joey pointed out, SHA* exists. +"""]] diff --git a/doc/bugs/WORM:_Handle_long_filenames_correctly/comment_3_2bf0f02d27190578e8f4a32ddb195a0a._comment b/doc/bugs/WORM:_Handle_long_filenames_correctly/comment_3_2bf0f02d27190578e8f4a32ddb195a0a._comment new file mode 100644 index 0000000000..d9c291b178 --- /dev/null +++ b/doc/bugs/WORM:_Handle_long_filenames_correctly/comment_3_2bf0f02d27190578e8f4a32ddb195a0a._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-04-09T20:11:59Z" + content=""" +I wouldn't say it's completly impossible for a WORM100 to work. It would just have the contract that the pair of mtime+100chars has to be unique for each unique piece of data. + +But, I have yet to be convinced there's any point, since SHA1 exists. +"""]] diff --git a/doc/bugs/WORM:_Handle_long_filenames_correctly/comment_4_8f7ba9372463863dda5aae13205861bf._comment b/doc/bugs/WORM:_Handle_long_filenames_correctly/comment_4_8f7ba9372463863dda5aae13205861bf._comment new file mode 100644 index 0000000000..5c08cad6e0 --- /dev/null +++ b/doc/bugs/WORM:_Handle_long_filenames_correctly/comment_4_8f7ba9372463863dda5aae13205861bf._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 4" + date="2011-04-09T23:45:28Z" + content=""" +mtime+100chars can still get collisions and a _lot_ easier than even SHA1. This introduces more problems that it solves, imo. +"""]] diff --git a/doc/bugs/__39__annex_add__39___fails_to___39__git_add__39___for_parent_relative_path.mdwn b/doc/bugs/__39__annex_add__39___fails_to___39__git_add__39___for_parent_relative_path.mdwn new file mode 100644 index 0000000000..f129abf623 --- /dev/null +++ b/doc/bugs/__39__annex_add__39___fails_to___39__git_add__39___for_parent_relative_path.mdwn @@ -0,0 +1,15 @@ +The following commands show the failure: + +$ mkdir d && touch d/f + +$ mkdir g && cd g && git annex add ../d/f + +add ... ok + +error: Invalid path '.git/annex/objects/Jx/... + +... + +Then it seems it is enough to 'git add ../d/f' to complete the operation. + +> Thanks for reporting, [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/add_range_argument_to___34__git_annex_dropunused__34___.mdwn b/doc/bugs/add_range_argument_to___34__git_annex_dropunused__34___.mdwn new file mode 100644 index 0000000000..bbe6007a87 --- /dev/null +++ b/doc/bugs/add_range_argument_to___34__git_annex_dropunused__34___.mdwn @@ -0,0 +1,18 @@ +The command `git annex dropunused` currently takes a number, as referenced in output of last `git annex unused` command. + +When you want to drop all, or a range, this may be annoying, as you have to specify each number on the command line. + +A range argument, such as `1-1845`, possibly combined with other argument types (Cf. many print dialogues: `1,3,5-7,9`) would be great. + +I work around this lack as I want to drop all unused files anyway by something like this: + + git annex unused | grep -o -P "^ [0-9]+" | xargs git annex dropunused + +> It's designed to be used with `seq`. There's an example in the +> [[walkthrough|walkthrough/unused_data]], and of course multiple seq calls can be used to +> specifiy multiple ranges. So: + + git annex dropunused `seq 1 9` `seq 11 1845` + +> I don't see adding my own range operations to be an improvement worth +> making; it'd arguably only be a complication. --[[Joey]] [[done]] diff --git a/doc/bugs/add_script-friendly_output_options.mdwn b/doc/bugs/add_script-friendly_output_options.mdwn new file mode 100644 index 0000000000..7d7bdfc51a --- /dev/null +++ b/doc/bugs/add_script-friendly_output_options.mdwn @@ -0,0 +1,19 @@ +I have a need to use git-annex from a larger program. It'd be great if the information output by some of the commands that is descriptive (for example, whereis) could be sent to stdout in a machine-readable format like (preferably) JSON, or XML. That way I can simply read in the output of the command and use the data directly instead of having to parse it via regexes or other such string manipulation. + +This could perhaps be triggered by a --json or --xml flag to the relevant commands. + +> This is [[done]], --json is supported by all commands, more or less. +> +> Caveats: +> +> * the version, status, and find commands produce custom output and so +> no json. This could change for version and status; find needs to just +> be a simple list of files, I think +> * The "note" fields may repeat multiple times per object with different +> notes and are of course not machine readable, and subject to change. +> * Output of helper commands like rsync is not diverted away, and +> could clutter up the json output badly. Should only affect commands +> that transfer data. And AFAICS, wget and rsync both output their +> progress displays to stderr, so shouldn't be a problem. +> +> --[[Joey]] diff --git a/doc/bugs/annex_add_in_annex.mdwn b/doc/bugs/annex_add_in_annex.mdwn new file mode 100644 index 0000000000..e12826f00e --- /dev/null +++ b/doc/bugs/annex_add_in_annex.mdwn @@ -0,0 +1,6 @@ +I accidentally annexed some files in the .git-annex directory and it cause git-annex/git to be very unhappy when i pulled the repo to somewhere else. It might be worth teaching git-annex to disallow annex'ing of files inside the .git-annex/.git directories. + +> There is a guard against `git annex add .git-annex/foo`, but it doesn't +> notice `cd .git-annex; git annex add foo`. --[[Joey]] + +> Now fixed, by removing the .git-annex directory. [[done]] --[[Joey]] diff --git a/doc/bugs/annex_unannex__47__uninit_should_handle_copies.mdwn b/doc/bugs/annex_unannex__47__uninit_should_handle_copies.mdwn new file mode 100644 index 0000000000..e830f11564 --- /dev/null +++ b/doc/bugs/annex_unannex__47__uninit_should_handle_copies.mdwn @@ -0,0 +1,20 @@ +Just starting using v3, even more awesome, thanks! + +With git-annex, I take the habit to do copies of files without restriction, as they end up into (cheap) symlink copies. +However, if 2 copies are unannexed, only one is restored, the other becomes a broken symlink, so I kind of loose some information +(my use case: I have a repo on which I recently started using annex, but most of the files, which i would want to be annexed, are only in git, +so my plan is to unninit this repo, delete the .git dir, and then annex everything, as I don't mind the history). + +Rafaël + +> The only way for git-annex to support this in its current state would be +> for the unannex command to copy the file content from the annex, rather +> than moving it out. Then multiple links to the same content could be +> unannexed. +> +> But, this would be slower, and would depend on a later `unused` and +> `dropunused` to actually remove the content. While doable, by use case +> for unannex is more to quickly undo a mistaken add, and it's unlikely there +> are multiple symlinks to the same content in this situation. --[[Joey]] + +[[!tag done]] diff --git a/doc/bugs/annex_unannex__47__uninit_should_handle_copies/comment_1_c896ff6589f62178b60e606771e4f2bf._comment b/doc/bugs/annex_unannex__47__uninit_should_handle_copies/comment_1_c896ff6589f62178b60e606771e4f2bf._comment new file mode 100644 index 0000000000..839992477a --- /dev/null +++ b/doc/bugs/annex_unannex__47__uninit_should_handle_copies/comment_1_c896ff6589f62178b60e606771e4f2bf._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnpdM9F8VbtQ_H5PaPMpGSxPe_d5L1eJ6w" + nickname="Rafaël" + subject="comment 1" + date="2011-07-04T16:57:25Z" + content=""" +You convince me for unannex, but isn't the goal of uninit to revert all annex operations? In the current state, a clean revert is not possible (because of the broken symlinks after uninit). Instead of copying, using hard links is out of question? + +For my needs, is the command \"git annex unlock .\" (from the root of the repo) a correct workaround? +"""]] diff --git a/doc/bugs/annex_unannex__47__uninit_should_handle_copies/comment_2_9249609f83f8e9c7521cd2f007c1a39e._comment b/doc/bugs/annex_unannex__47__uninit_should_handle_copies/comment_2_9249609f83f8e9c7521cd2f007c1a39e._comment new file mode 100644 index 0000000000..21c0c449b0 --- /dev/null +++ b/doc/bugs/annex_unannex__47__uninit_should_handle_copies/comment_2_9249609f83f8e9c7521cd2f007c1a39e._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawmJfIszzreLNvCqzqzvTayA9_9L6gb9RtY" + nickname="Joey" + subject="comment 2" + date="2011-07-04T20:25:38Z" + content=""" +Indeed, uninit needed to be improved. I've done so. Also, unannex --fast can be used to make hard links to content left in the annex. +"""]] diff --git a/doc/bugs/backend_version_upgrade_leaves_repo_unusable.mdwn b/doc/bugs/backend_version_upgrade_leaves_repo_unusable.mdwn new file mode 100644 index 0000000000..122224a8f3 --- /dev/null +++ b/doc/bugs/backend_version_upgrade_leaves_repo_unusable.mdwn @@ -0,0 +1,72 @@ +foo is a local repo, bar is a bare remote. + +I upgraded foo's git-annex to 0.20110325 and upgraded a local repo backend +to version 2. I then ran `git annex copy . --to bar` and checked the +remote. This created WORM:SHA512--123123 files in annex/objects. +Understandable but unwanted. So I upgraded git-annex on bar's machine, as +well. + + % git annex copy . --to bar + copy quux (checking bar) git-annex-shell: Repository version 1 is not supported. Upgrade this repository: git-annex upgrade (to bar) + git-annex-shell: Repository version 1 is not supported. Upgrade this repository: git-annex upgrade + rsync: connection unexpectedly closed (0 bytes received so far) [sender] + rsync error: error in rsync protocol data stream (code 12) at io.c(601) [sender=3.0.7] + + rsync failed -- run git annex again to resume file transfer + failed + +Running `git annex upgrade` on bar's machine I get: + + % git annex upgrade + upgrade (v1 to v2) (moving content...) git-annex: Prelude.read: no parse + +Again, bar is a bare repo. +Running the copy job again, I am still getting the same error as above (as expected). Partial contents of annex/objects on bar: + + [...] + SHA512:123 + WORM:SHA512--234 + [...] + + +-- RichiH + +> Upgrading bare repos to v2 generally works fine, so I actually need +> to see the full content of annex/, not a fragment, in order to debug this. +> (Filename contents I don't need to see.) Feel free to email me the details at +> joey@kitenet.net if you don't want to post them here. --[[Joey]] + +>> Sent. -- RichiH + +>>> Ok, I'm going to go work on my reading comprehension. I see now +>>> that you +>>> explained the problem pretty well. The problem is caused by these +>>> few weird v1 mixed with v2 keys in the annex. +>>> Ones like "annex/objects/WORM:SHA512--$sha512". +>>> +>>> That's a v1 key, but a corrupt form of the key; it's missing the +>>> size and mtime fields that all WORM keys have in v1. And +>>> the filename is itself a key, a v2 SHA512 key. These were +>>> created when you did the `git annex copy to the v1 bare repo. +>>> In v2, git-annex-shell takes a full key object, while in v1, +>>> it takes a key name and a backend name. This incompatability +>>> leads to the weird behavior seen. +>>> +>>> I had suggested you delete data.. don't. On second thought, +>>> you shouldn't delete anything. I'll simply make the v2 upgrade +>>> detect and work around this bug. +>>> --[[Joey]] + +>>>> This should be fixed in current git. The scambled keys will be +>>>> fixed up on upgrade. Thanks for your patience! [[done]] --[[Joey]] + +>>>>> I should stop reading your answers via git; by the time I got to +>>>>> "second thoughts", I had already deleted the files & directories +>>>>> in question, upgraded the bare repo and was busy uploading from my +>>>>> local repo. I agree that taking care of this in the upgrade code +>>>>> is the cleanest approach, by the way. +>>>>> No need to thank me for my patience; thank you for your quickness! +>>>>> RichiH +>>>>> +>>>>> PS: If I get a handle on the mtime issue in the SHA backend, git +>>>>> annex will be pretty much perfect :) diff --git a/doc/bugs/bad_behaviour_with_file_names_with_newline_in_them.mdwn b/doc/bugs/bad_behaviour_with_file_names_with_newline_in_them.mdwn new file mode 100644 index 0000000000..530a8da5d5 --- /dev/null +++ b/doc/bugs/bad_behaviour_with_file_names_with_newline_in_them.mdwn @@ -0,0 +1,5 @@ +Found this out the hard way. See the comment in the below post for what happens. + +[[/forum/git_annex_add_crash_and_subsequent_recovery/]] + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/bad_behaviour_with_file_names_with_newline_in_them/comment_1_92dfe6e9089c79eb64e2177fb135ef55._comment b/doc/bugs/bad_behaviour_with_file_names_with_newline_in_them/comment_1_92dfe6e9089c79eb64e2177fb135ef55._comment new file mode 100644 index 0000000000..7ff8f8e3d9 --- /dev/null +++ b/doc/bugs/bad_behaviour_with_file_names_with_newline_in_them/comment_1_92dfe6e9089c79eb64e2177fb135ef55._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-12-06T16:49:32Z" + content=""" +This only happens with the WORM backend (or possibly with SHA1E if the file's extension has a newline). + +The problem is not the newline in the file, but the newline in the key generated for the file. It's probably best to just disallow such keys being created. +"""]] diff --git a/doc/bugs/bare_git_repos.mdwn b/doc/bugs/bare_git_repos.mdwn new file mode 100644 index 0000000000..5e9100acfe --- /dev/null +++ b/doc/bugs/bare_git_repos.mdwn @@ -0,0 +1,29 @@ +It would be nice if git-annex could be used in bare git repos. +However, that is not currently supported. Problems include: + +* git-annex often does not read a git repo's config before touching it, + so it doesn't know if the repo is bare or not + (reading the config when operating on ssh repos would be a pain and SLOW; + I had some of that code in as of 1aa19422ac8748eeff219ac4f46df166dae783c5, + but ripped it all out) +* .. which results in creating `.git/annex` in a bare repo, which mightily + confuses git (so it will complain that the bare repo is not + a git repo at all!) +* `.git-annex/` needs to have state recorded to it and committed, and that + is not possible with a bare repo. (If [[todo/branching]] were done, + that might be fixed.) (now fixed) + +---- + +Update: Now that git-annex-shell is used for accessing remote repos, +it would be possible to add smarts about bare repos there, and avoid +some of the above problems. Probably only the state recording problem +remains. + +A possible other approach to the state recording repo is to not +record state changes on the remote in that case. Git-annex already +records remote state changes locally whenever it modifies the state of a +remote. --[[Joey]] + +> And... [[done]]! See [[/bare_repositories]] for current status +> and gotchas. --[[Joey]] diff --git a/doc/bugs/build_issue_with_latest_release_0.20110522-1-gde817ba.mdwn b/doc/bugs/build_issue_with_latest_release_0.20110522-1-gde817ba.mdwn new file mode 100644 index 0000000000..a7bae50b8b --- /dev/null +++ b/doc/bugs/build_issue_with_latest_release_0.20110522-1-gde817ba.mdwn @@ -0,0 +1,14 @@ +A recent checkout of git-annex fails to build for me (I've installed the new dependancies as well) + +
+[70 of 81] Compiling Command.DropUnused ( Command/DropUnused.hs, Command/DropUnused.o )
+[71 of 81] Compiling Command.Status   ( Command/Status.hs, Command/Status.o )
+
+Command/Status.hs:133:37: Not in scope: `swap'
+make: *** [git-annex] Error 1
+
+ +it fails on OSX 10.6.x with ghc 6.12.3 and a corresponding haskell-platform install. I ran a bisect and found that commit 75a3f5027f74565d909fb940893636d081d9872a seems to have broken git-annex for me, reverting the commit allows me to build git-annex, I have not run the tests to verify everything is working correctly though. + +> Probably `swap` appeared only in a newer GHC. I've reverted to avoid a +> versioned build dependency. [[done]] --[[Joey]] diff --git a/doc/bugs/building_on_lenny.mdwn b/doc/bugs/building_on_lenny.mdwn new file mode 100644 index 0000000000..48386bde47 --- /dev/null +++ b/doc/bugs/building_on_lenny.mdwn @@ -0,0 +1,80 @@ +hi, + +I am trying to build git annex on lenny. + +I checked out the latest from git c88d4939453845efee04da811d64aa41046f9c11, +installed all the packages (some from backports) as required by dpkg-buildpackage + +Then I get this: + + ... + mkdir -p build + ghc -odir build -hidir build --make git-annex + [ 1 of 19] Compiling Utility ( Utility.hs, build/Utility.o ) + [ 2 of 19] Compiling GitRepo ( GitRepo.hs, build/GitRepo.o ) + [ 3 of 19] Compiling GitQueue ( GitQueue.hs, build/GitQueue.o ) + [ 4 of 19] Compiling TypeInternals ( TypeInternals.hs, build/TypeInternals.o ) + [ 5 of 19] Compiling Types ( Types.hs, build/Types.o ) + [ 6 of 19] Compiling Annex ( Annex.hs, build/Annex.o ) + [ 7 of 19] Compiling Locations ( Locations.hs, build/Locations.o ) + [ 8 of 19] Compiling UUID ( UUID.hs, build/UUID.o ) + [ 9 of 19] Compiling LocationLog ( LocationLog.hs, build/LocationLog.o ) + [10 of 19] Compiling Core ( Core.hs, build/Core.o ) + [11 of 19] Compiling Backend.URL ( Backend/URL.hs, build/Backend/URL.o ) + [12 of 19] Compiling Backend ( Backend.hs, build/Backend.o ) + + Backend.hs:114:50: + Not in scope: type constructor or class `SomeException' + make[1]: *** [git-annex] Error 1 + make[1]: Leaving directory `/home/cstamas/tmp/git-annex' + dh_auto_build: make -j1 returned exit code 2 + make: *** [build] Error 2 + dpkg-buildpackage: failure: debian/rules build gave error exit status 2 + +I will try to check the mentioned file for error, but I do not know how to program in haskell. + +Thanks for your help! --[[cstamas]] + +> Newer versions of ghc changed their exception handling types, and +> I coded git-annex to use the new style and not the old. gch6 6.12 will +> work. I do not think there is a backport available though. --[[Joey]] +> +> Ok, found and deployed a workaround. It is not tested. Let me know how it +> works for you. --[[Joey]] + +>> I did a git pull and now I get: + + mkdir -p build + ghc -cpp -odir build -hidir build --make git-annex + [ 1 of 20] Compiling Portability ( Portability.hs, build/Portability.o ) + + Portability.hs:13:21: + Not in scope: type constructor or class `Exception' + make[1]: *** [git-annex] Error 1 + make[1]: Leaving directory `/home/cstamas/tmp/git-annex' + dh_auto_build: make -j1 returned exit code 2 + make: *** [build] Error 2 + dpkg-buildpackage: failure: debian/rules build gave error exit status 2 + +>> --[[cstamas]] + +>>> Ok well, I'm not going to try to reimplement all of +>>> Control.Exception.Extensible so I've made it use it. You will have to +>>> figure out how to install that library yourself though, I don't know +>>> how to use cabal with such an old ghc. Library is here: +>>> +>>> and I asked how to get it on stable here: +>>> --[[Joey]] + +>>>> I made some effort with cabal on lenny. I can install (and I did it) cabal +>>>> from squeeze as dependencies are ok. Then I installed extensible +>>>> exceptions, but it places it in some local dir that git-annex's installer +>>>> (or ghc itself) does not know about. +>>>> +>>>> Later I realized that *only* for the compilation ghc6 and its friends are +>>>> needed. So I built the package on my other machine running squeeze. Then +>>>> resulting deb packages cleanly installs on lenny +>>>> +>>>> For me this is OK. Thanks! --[[cstamas]] + +[[done]] diff --git a/doc/bugs/case_sensitivity_on_FAT.mdwn b/doc/bugs/case_sensitivity_on_FAT.mdwn new file mode 100644 index 0000000000..682acc71d7 --- /dev/null +++ b/doc/bugs/case_sensitivity_on_FAT.mdwn @@ -0,0 +1,49 @@ +I was copying files to a directory remote with `git annex copy`. Out of 114 files, 9 of them failed with no message, just: + + copy data/foo.dat (to usbdrive...) failed + copy data/bar.dat (to usbdrive...) failed + +According to strace: + + 31338 mkdir("/media/annex/Zp/9v/SHA256-s1362999320--d650297c8cf8c2dc0575110a52d0c5cc0ff266f294a0599f85796a6b44b23492", 0777) = -1 ENOENT (No such file or directory) + 31338 mkdir("/media/annex/Zp/9v", 0777) = -1 ENOENT (No such file or directory) + 31338 mkdir("/media/annex/Zp", 0777) = -1 EEXIST (File exists) + 31338 stat("/media/annex/Zp", 0x7f8449f170d0) = -1 ENOENT (No such file or directory) + +The filesystem is FAT32 and has weird case semantics. This was mounted by udisks with its default options: + + /dev/sdb1 on /media/annex type vfat (rw,nosuid,nodev,uhelper=udisks,uid=1000,gid=1000,shortname=mixed,dmask=0077,utf8=1,showexec) + +I wonder if the directory remote should use hashDirLower instead of hashDirMixed? + +> git-annex intentionally uses the same layout for directory and rsync +> special remotes as it does for the .git/annex directory. As far +> as I know it works ok on (truely) case-insensative filesystems. +> +> Based on your strace, if you `ls /media/annex/Zp`, you will see +> "No such file or directory", but if you `mkdir /media/annex/Zp` it will +> fail with "File exists". Doesn't make much sense to me. +> +> The (default) VFAT mount option shortname=mixed causes this behavior. +> With shortname=lower it works ok. --[[Joey]] +> +>> So, the options for fixing this bug seem to be to fix Linux (which would +>> be a good idea IMHO but I don't really want to go there), or generally +>> convert git-annex to using lowercase for its hashing (which would be a +>> large amount of pain to rewrite all the symlinks in every git repo), +>> or some special hack around this problem. +>> +>> I've put in a workaround for the problem in the directory special +>> remote; it will use mixed case but fall-back to lowercase as necessary. +>> +>> That does leave the case of a bare git repository with annexed content +>> stored on VFAT. More special casing could fix it, but that is, I +>> think, an unusual configuration. Leaving the bug open for that case, +>> and for the even more unlikely configuration of a rsync special remote +>> stored on VFAT. --[[Joey]] + +>>> Bare repositories now use lowercase. rsync is the only remaining +>>> unsupported possibility. --[[Joey]] +>>>> Everything now uses lowercase, with the exception of non-bare +>>>> repos, which cannot be on FAT anyway due to using symlinks. [[done]] +>>>> --[[Joey]] diff --git a/doc/bugs/check_for_curl_in_configure.hs.mdwn b/doc/bugs/check_for_curl_in_configure.hs.mdwn new file mode 100644 index 0000000000..a880392bf7 --- /dev/null +++ b/doc/bugs/check_for_curl_in_configure.hs.mdwn @@ -0,0 +1,92 @@ +[[!meta title="arbitrary/configurable backends"]] + +(Retitling as this has drifted..) + +--- + +I thought this might be useful, since curl is being used for the URL backend, it might be worth checking for it's existence. + +
+diff --git a/configure.hs b/configure.hs
+index 772ba54..1a563e0 100644
+--- a/configure.hs
++++ b/configure.hs
+@@ -13,6 +13,7 @@ tests = [
+        , TestCase "uuid generator" $ selectCmd "uuid" ["uuid", "uuidgen"]
+        , TestCase "xargs -0" $ requireCmd "xargs_0" "xargs -0 /dev/null"
++       , TestCase "curl" $ requireCmd "curl" "curl --version >/dev/null"
+        , TestCase "unicode FilePath support" $ unicodeFilePath
+        ] ++ shaTestCases [1, 256, 512, 224, 384]
+
+ +> Well, curl is an optional extra, so requireCmd is too strong. Changed +> to testCmd and applied, thank you! +> +> I thought about actually *using* the resulting SysConfig.curl +> to disable the URL backend if False.. but probably it's better +> to just let it fail if curl is not available. Although, if we wanted +> to add a check for wget or something and use it when curl was not +> available, that might be worth doing. --[[Joey]] + +>> I was thinking that is it worth doing a generic "stat", "delete", "get" +>> and "put" options, I do like the idea of having the possibility of +>> being about to use completely arbitrary storage systems or arbitrary +>> transfer systems. If there was the capability of doing so it would be +>> interesting to see possibilities of using aria2 for using something +>> like bittorrent as backend, or using something like irods or some +>> grid storage system as the storage archive. It's just an idea as +>> I have seen it implemented quite well in irods. + +>>> I'm unsure about the idea of having a backend where that is +>>> parameterized. It would mean that one annex's GENERIC-foo key +>>> might be entirely different from another's key with the same backend +>>> and details. And a misconfiguration could get data the wrong +>>> way and get the wrong data, etc. +>>> +>>> I mostly look at the URL backend as an example that can be modified to +>>> make this kind of custom backend. You already probably know enough to +>>> make a TORRENT backend where keys are the urls to torrents to download +>>> with `aria2c --follow-torrent=mem`. +>>> +>>> I am also interested in doing backends that use eg, cloud storage. +>>> A S3 backend that could upload files to S3 in addition to downloading +>>> them, for example, would be handy. --[[Joey]] + +>>>> So, rather than use backends to do this, it instead made more sense +>>>> to make them [[special_remotes]]. The URL backend remains a bit +>>>> of a special case, and a bittorrent backend that downloaded a file +>>>> from a bittorrent url would still be a good use of backend, but for +>>>> storing files in external data stores like S3, making it a remote +>>>> makes better sense. I think I can close this bug now, [[done]] +>>>> --[[Joey]] + +also in Backend/URL.hs is it worth making a minor change to the way curl is called (I'm not sure if the following is correct or not) + +> It's correct, typewise, but I don't see any real reason to bother +> with the change. But I do appreciate patches, which have been rare +> so far, probaby because of Haskell.. :) --[[Joey]] + +>> heh agreed + +
+diff --git a/Backend/URL.hs b/Backend/URL.hs
+index 29dc8fe..4afcf86 100644
+--- a/Backend/URL.hs
++++ b/Backend/URL.hs
+@@ -50,10 +50,13 @@ dummyFsck _ _ _ = return True
+ dummyOk :: Key -> Annex Bool
+ dummyOk _ = return True
+ 
++curl :: [CommandParam] -> IO Bool
++curl = boolSystem "curl"
++
+ downloadUrl :: Key -> FilePath -> Annex Bool
+ downloadUrl key file = do
+        showNote "downloading"
+        showProgress -- make way for curl progress bar
+-       liftIO $ boolSystem "curl" [Params "-# -o", File file, File url]
++       liftIO $ curl [Params "-# -o", File file, File url]
+        where
+                url = join ":" $ drop 1 $ split ":" $ show key 
+
diff --git a/doc/bugs/concurrent_git-annex_processes_can_lead_to_locking_issues.mdwn b/doc/bugs/concurrent_git-annex_processes_can_lead_to_locking_issues.mdwn new file mode 100644 index 0000000000..2485e7b19e --- /dev/null +++ b/doc/bugs/concurrent_git-annex_processes_can_lead_to_locking_issues.mdwn @@ -0,0 +1,53 @@ +When two git-annex processes are running and both modifying the git-annex +branch, it's possible one will fail due to git's locking. When this +happens, git-annex has already recorded its state in the journal (so no +data is lost), but git-annex does crash, which can be surprising. + +I feel that, in general, multiple git-annex processes should be able to run +concurrently. A big lock around all commands, or even all +repository-modifying commands is a bad idea. Also, it's probably best to +only worry about locking conflicts editing the git-annex branch. While `git +annex add` and a few other commands make changes to the main git repo, +and can have similar locking issues, so can any git commands that stage +changes (I think.. check). + +Probably should KISS. Just add a lock file that is taken before changes to +the git-annex branch, and if it's locked, wait. Changes to the git-annex +branch tend to happen quickly (unless it's committing an enormous set of +changes, and even that is relatively fast), so waiting seems ok. --[[Joey]] + +---- + +Commit 7981eb4cb512fbe3c49a3dd165c31be14ae4bc49 is more pessimistic, +describes some other potential issues. + +* The journal needs to be emptied (done) and kept locked (not done) during + a merge, since a merge operates at a level below the journal, and any + changes that are journaled during a merge can overwrite changes merged + in from another branch. + +* Two git-annex processes can be doing conflicting things and inconsistent + information be written to the journal. + + - One example would be concurrent get and drop of the same key. + But could this really race? If the key was already present, the get + would do nothing, so record no changes. If the key was not yet present, + the drop would do nothing, and record no changes. + + - Instead, consider two copys of a key to different locations. If the + slower copy starts first and ends last, it could cache the location + info, add the new location, and lose the other location it was copied to. + Tested it and the location is not cached during the whole copy (logChange + reads the current log immediatly before writing), so this + race's window is very small -- but does exist. + +---- + +## Updated plan + +Make Branch.change transactional, so it takes a lock, reads a file, +applies a function to it, and writes the changed file. + +Make Branch.update hold the same lock. + +> [[Done]]. diff --git a/doc/bugs/configure_script_should_detect_uuidgen_instead_of_just_uuid.mdwn b/doc/bugs/configure_script_should_detect_uuidgen_instead_of_just_uuid.mdwn new file mode 100644 index 0000000000..2b9c773678 --- /dev/null +++ b/doc/bugs/configure_script_should_detect_uuidgen_instead_of_just_uuid.mdwn @@ -0,0 +1,6 @@ +On RHEL5 (and clones) systems uuidgen is available as an alternative to +uuid, the configure script fails, it should probably detect either uuid or +uuidgen, or let the user decide? - also uuidgen behaves differently from +uuid on debian. + +> uuidgen is now supported. --[[Joey]] [[done]] diff --git a/doc/bugs/conflicting_haskell_packages.mdwn b/doc/bugs/conflicting_haskell_packages.mdwn new file mode 100644 index 0000000000..5528fad824 --- /dev/null +++ b/doc/bugs/conflicting_haskell_packages.mdwn @@ -0,0 +1,17 @@ +The compilation command should states which packages are used and avoid the default mechnasim that automatically search for them. + +This can be done by the flags -hide-packages and then -package foo + +> My ghc does not have a `--hide-packages` option. +> +> Could you just show the build problem that you are suggesting I work +> around? --[[Joey]] + + +> Thanks npouillard, I see the problem now. +> +> +> I've added "-ignore-package monads-fd" to GHCFLAGS. I hope I don't +> really have to hide all packages and individually turn them back on; +> surely this monads-fd/mtl conflict is an exception, and Haskell's module +> system is not a mess of conflicting modules? --[[Joey]] [[done]] diff --git a/doc/bugs/conflicting_haskell_packages/comment_1_e552a6cc6d7d1882e14130edfc2d6b3b._comment b/doc/bugs/conflicting_haskell_packages/comment_1_e552a6cc6d7d1882e14130edfc2d6b3b._comment new file mode 100644 index 0000000000..42f44bf9cf --- /dev/null +++ b/doc/bugs/conflicting_haskell_packages/comment_1_e552a6cc6d7d1882e14130edfc2d6b3b._comment @@ -0,0 +1,24 @@ +[[!comment format=mdwn + username="http://ertai.myopenid.com/" + nickname="npouillard" + subject="how to reproduce the package conflict issue" + date="2011-02-07T14:12:43Z" + content=""" +If you install the monads-fd package (with cabal install for instance), then you can no longer build git-annex: + +
+./configure
+  checking cp -a... yes
+  checking cp -p... yes
+  checking cp --reflink=auto... yes
+  checking uuid generator... uuid
+  checking xargs -0... yes
+  checking rsync... yes
+ghc -O2 -Wall --make git-annex
+
+Annex.hs:22:7:
+    Ambiguous module name `Control.Monad.State':
+      it was found in multiple packages: monads-fd-0.2.0.0 mtl-2.0.1.0
+make: *** [git-annex] Error 1
+
+"""]] diff --git a/doc/bugs/copy_fast_confusing_with_broken_locationlog.mdwn b/doc/bugs/copy_fast_confusing_with_broken_locationlog.mdwn new file mode 100644 index 0000000000..69fbc816f0 --- /dev/null +++ b/doc/bugs/copy_fast_confusing_with_broken_locationlog.mdwn @@ -0,0 +1,6 @@ +Conversation moved from [[tips/recover_data_from_lost+found]] +to a proper bug. --[[Joey]] + +(Unfortunatly that scrambled the comment creation times and thus order.) + +> Added a message [[done]] --[[Joey]] diff --git a/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_10_435f87d54052f264096a8f23e99eae06._comment b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_10_435f87d54052f264096a8f23e99eae06._comment new file mode 100644 index 0000000000..ec24c478d8 --- /dev/null +++ b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_10_435f87d54052f264096a8f23e99eae06._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 10" + date="2011-05-15T16:47:53Z" + content=""" +The key is the basename of the symlink target. +"""]] diff --git a/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_11_9be0aef403a002c1706d17deee45763c._comment b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_11_9be0aef403a002c1706d17deee45763c._comment new file mode 100644 index 0000000000..7bc54573ed --- /dev/null +++ b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_11_9be0aef403a002c1706d17deee45763c._comment @@ -0,0 +1,24 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 11" + date="2011-05-15T18:53:26Z" + content=""" +It seems the objects are in the remote after all, but the remote is unaware of this fact. No idea where/why the remote lost that info, but.. Anyway, with the SHA backends, wouldn't it make sense to simply return \"OK\" and update the annex logs accordingly, no? + +Local: + + % ls -l foo + lrwxrwxrwx 1 richih richih 312 Apr 3 01:18 foo -> .git/annex/objects/gG/VW/SHA512-s80781--cef3966a19c7435acceb8fbfbff1feebe6decab7c81a0c197f00932cf9ef0eac330784cc3f0d211bd4acf56a6d16daaebe9b598aa4dfd5bfec73f4e6df3f0491/SHA512-s80781--cef3966a19c7435acceb8fbfbff1feebe6decab7c81a0c197f00932cf9ef0eac330784cc3f0d211bd4acf56a6d16daaebe9b598aa4dfd5bfec73f4e6df3f0491 + % + +Remote: + + % git-annex-shell recvkey SHA512-s80781--cef3966a19c7435acceb8fbfbff1feebe6decab7c81a0c197f00932cf9ef0eac330784cc3f0d211bd4acf56a6d16daaebe9b598aa4dfd5bfec73f4e6df3f0491 + git-annex-shell: key is already present in annex + % strace git-annex-shell recvkey /base/git-annex/fun SHA512-s80781--cef3966a19c7435acceb8fbfbff1feebe6decab7c81a0c197f00932cf9ef0eac330784cc3f0d211bd4acf56a6d16daaebe9b598aa4dfd5bfec73f4e6df3f0491 2>&1 | grep SHA512-s80781--cef3966a19c7435acceb8fbfbff1feebe6decab7c81a0c197f00932cf9ef0eac330784cc3f0d211bd4acf56a6d16daaebe9b598aa4dfd5bfec73f4e6df3f0491 + stat64(\"/base/git-annex/fun/annex/objects/gG/VW/SHA512-s80781--cef3966a19c7435acceb8fbfbff1feebe6decab7c81a0c197f00932cf9ef0eac330784cc3f0d211bd4acf56a6d16daaebe9b598aa4dfd5bfec73f4e6df3f0491/SHA512-s80781--cef3966a19c7435acceb8fbfbff1feebe6decab7c81a0c197f00932cf9ef0eac330784cc3f0d211bd4acf56a6d16daaebe9b598aa4dfd5bfec73f4e6df3f0491\", {st_mode=S_IFREG|0444, st_size=80781, ...}) = 0 + % ls -l /base/git-annex/fun/annex/objects/gG/VW/SHA512-s80781--cef3966a19c7435acceb8fbfbff1feebe6decab7c81a0c197f00932cf9ef0eac330784cc3f0d211bd4acf56a6d16daaebe9b598aa4dfd5bfec73f4e6df3f0491/SHA512-s80781--cef3966a19c7435acceb8fbfbff1feebe6decab7c81a0c197f00932cf9ef0eac330784cc3f0d211bd4acf56a6d16daaebe9b598aa4dfd5bfec73f4e6df3f0491 + -r--r--r-- 1 richih richih 80781 2011-04-01 12:44 /base/git-annex/fun/annex/objects/gG/VW/SHA512-s80781--cef3966a19c7435acceb8fbfbff1feebe6decab7c81a0c197f00932cf9ef0eac330784cc3f0d211bd4acf56a6d16daaebe9b598aa4dfd5bfec73f4e6df3f0491/SHA512-s80781--cef3966a19c7435acceb8fbfbff1feebe6decab7c81a0c197f00932cf9ef0eac330784cc3f0d211bd4acf56a6d16daaebe9b598aa4dfd5bfec73f4e6df3f0491 + % +"""]] diff --git a/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_12_26d60661196f63fd01ee4fbb6e2340e7._comment b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_12_26d60661196f63fd01ee4fbb6e2340e7._comment new file mode 100644 index 0000000000..b458a37b69 --- /dev/null +++ b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_12_26d60661196f63fd01ee4fbb6e2340e7._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 12" + date="2011-05-15T19:40:47Z" + content=""" +So, it appears that you're using git annex copy --fast. As documented that assumes the location log is correct. So it avoids directly checking if the bare repo contains the file, and tries to upload it, and the bare repo is all like \"but I've already got this file!\". The only way to improve that behavior might be to let rsync go ahead and retransfer the file, which, with recovery, should require sending little data etc. But I can't say I like the idea much, as the repo already has the content, so unlocking it and letting rsync mess with it is an unnecessary risk. I think it's ok for --force to blow up +if its assumptions turn out to be wrong. + +If you use git annex copy without --fast in this situation, it will do the right thing. +"""]] diff --git a/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_13_ead55b915d3b92a62549b2957ad211c8._comment b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_13_ead55b915d3b92a62549b2957ad211c8._comment new file mode 100644 index 0000000000..d92ecbba03 --- /dev/null +++ b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_13_ead55b915d3b92a62549b2957ad211c8._comment @@ -0,0 +1,35 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 13" + date="2011-05-15T20:25:25Z" + content=""" +Yes, makes sense. I am so used to using --fast, I forgot a non-fast mode existed. I still think it would be a good idea to fall back to non-fast mode if --fast runs into an error from the remote, but as that is well without my abilities how about this patch? + + + From 4855510c7a84eb5d28fdada429580a8a42b7112a Mon Sep 17 00:00:00 2001 + From: Richard Hartmann + Date: Sun, 15 May 2011 22:20:42 +0200 + Subject: [PATCH] Make error in RecvKey.hs suggest possible solution + + --- + Command/RecvKey.hs | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + + diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs + index 126608f..b917a1c 100644 + --- a/Command/RecvKey.hs + +++ b/Command/RecvKey.hs + @@ -27,7 +27,7 @@ start :: CommandStartKey + start key = do + present <- inAnnex key + when present $ + - error \"key is already present in annex\" + + error \"key is already present in annex. If you are running copy, try without '--fast'\" + + ok <- getViaTmp key (liftIO . rsyncServerReceive) + if ok + -- + 1.7.4.4 + +"""]] diff --git a/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_14_191de89d3988083d9cf001799818ff4a._comment b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_14_191de89d3988083d9cf001799818ff4a._comment new file mode 100644 index 0000000000..f45bd70468 --- /dev/null +++ b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_14_191de89d3988083d9cf001799818ff4a._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 14" + date="2011-05-15T20:50:26Z" + content=""" +Or, even better, wouldn't it make sense to have SHA backends always default to --fast and only use non-fast when any snags are hit, use non-fast mode for that file. + +Though if we continue here, we should probably move this to its own page. +"""]] diff --git a/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_15_b3e3b338ccfa0a32510c78ba1b1bb617._comment b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_15_b3e3b338ccfa0a32510c78ba1b1bb617._comment new file mode 100644 index 0000000000..b4a00bd7e1 --- /dev/null +++ b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_15_b3e3b338ccfa0a32510c78ba1b1bb617._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 15" + date="2011-05-15T21:38:47Z" + content=""" +PS: Just to make this clear, I am using a custom alias for all my copying needs and thus didn't even see that I used --fast. :p +"""]] diff --git a/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_16_04a9f4468c3246c8eff3dbe21dd90101._comment b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_16_04a9f4468c3246c8eff3dbe21dd90101._comment new file mode 100644 index 0000000000..6d3dabb92b --- /dev/null +++ b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_16_04a9f4468c3246c8eff3dbe21dd90101._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 16" + date="2011-05-16T20:01:28Z" + content=""" +Thanks. +"""]] diff --git a/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_1_6a41bf7e2db83db3a01722b516fb6886._comment b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_1_6a41bf7e2db83db3a01722b516fb6886._comment new file mode 100644 index 0000000000..59c30de534 --- /dev/null +++ b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_1_6a41bf7e2db83db3a01722b516fb6886._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 1" + date="2011-05-12T00:07:29Z" + content=""" +I followed this to re-inject files which git annex fsck listed as missing. + +For everyone of those files, I get + + git-annex-shell: key is already present in annex + rsync: connection unexpectedly closed (0 bytes received so far) [sender] + rsync error: error in rsync protocol data stream (code 12) at io.c(601) [sender=3.0.8] + +when trying to copy the files to the remote. + +-- Richard +"""]] diff --git a/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_2_9f5f1dbffb2dd24f4fcf8c2027bf0384._comment b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_2_9f5f1dbffb2dd24f4fcf8c2027bf0384._comment new file mode 100644 index 0000000000..44aab3baa0 --- /dev/null +++ b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_2_9f5f1dbffb2dd24f4fcf8c2027bf0384._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-05-12T01:01:34Z" + content=""" +Sounds like you probably didn't commit after the fsck, or didn't push so the other repository did not know the first had the content again -- but I'm not 100% sure. +"""]] diff --git a/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_3_b596b5cfd3377e58dbbb5d509d026b90._comment b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_3_b596b5cfd3377e58dbbb5d509d026b90._comment new file mode 100644 index 0000000000..4744db995c --- /dev/null +++ b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_3_b596b5cfd3377e58dbbb5d509d026b90._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 3" + date="2011-05-14T09:06:54Z" + content=""" +As my comment from work is stuck in moderation: + +I ran this twice: + + git pull && git annex add . && git annex copy . --to --fast --quiet && git commit -a -m \"$HOST $(date +%F--%H-%M-%S-%Z)\" && git push + +but nothing changed +"""]] diff --git a/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_4_d7112c315fb016a8a399e24e9b6461d8._comment b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_4_d7112c315fb016a8a399e24e9b6461d8._comment new file mode 100644 index 0000000000..1fb19ab192 --- /dev/null +++ b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_4_d7112c315fb016a8a399e24e9b6461d8._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 4" + date="2011-05-14T16:13:58Z" + content=""" +Hmm. Old versions may have forgotten to git add a .git-annex location log file when recovering content with fsck. That could be another reason things are out of sync. + +But I'm not clear on which repo is trying to copy files to which. + +(NB: If the files were recovered on a bare git repo, fsck cannot update the location log there, which could also explain this.) +"""]] diff --git a/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_5_4ea29a6f8152eddf806c536de33ef162._comment b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_5_4ea29a6f8152eddf806c536de33ef162._comment new file mode 100644 index 0000000000..0a546bd88c --- /dev/null +++ b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_5_4ea29a6f8152eddf806c536de33ef162._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 5" + date="2011-05-14T19:03:43Z" + content=""" +Version: 0.20110503 + +My local non-bare repo is copying to a remote bare repo. + +I have been recovering in a non-bare repo. + +If there is anything I can send you to help... If I removed said files and went through http://git-annex.branchable.com/bugs/No_easy_way_to_re-inject_a_file_into_an_annex/ -- would that help? +"""]] diff --git a/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_6_0d85f114a103bd6532a3b3b24466012e._comment b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_6_0d85f114a103bd6532a3b3b24466012e._comment new file mode 100644 index 0000000000..1e3f325319 --- /dev/null +++ b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_6_0d85f114a103bd6532a3b3b24466012e._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 6" + date="2011-05-14T19:23:45Z" + content=""" +Well, focus on a specific file that exhibits the problem. What does `git annex whereis` say about it? Is the content actually present in annex/objects/ on the bare repository? Does that contradict whereis? +"""]] diff --git a/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_7_d38d5bee6d360b0ea852f39e3a7b1bc6._comment b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_7_d38d5bee6d360b0ea852f39e3a7b1bc6._comment new file mode 100644 index 0000000000..f7dfad68ca --- /dev/null +++ b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_7_d38d5bee6d360b0ea852f39e3a7b1bc6._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 7" + date="2011-05-14T23:13:15Z" + content=""" +It exists locally, whereis tells me it exists locally and locally, only. + +The object is _not_ in the bare repo. + +The file _might_ have gone missing before I upgraded my annex backend version to 2. Could this be a factor? +"""]] diff --git a/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_8_29c3de4bf5fbd990b230c443c0303cbe._comment b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_8_29c3de4bf5fbd990b230c443c0303cbe._comment new file mode 100644 index 0000000000..01248914c3 --- /dev/null +++ b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_8_29c3de4bf5fbd990b230c443c0303cbe._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 8" + date="2011-05-15T00:09:34Z" + content=""" +What you're describing should be impossible; the error message shown can only occur if the object is present in the annex where `git-annex-shell recvkey` is run. So something strange is going on. + +Try reproducing it by running on the remote system, `git-annex-shell recvkey /remote/repo.git $key` .. if you can reproduce it, I guess the next thing to do will be to strace the command and see why it's thinking the object is there. +"""]] diff --git a/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_9_2cee4f6bd6db7518fd61453c595162c6._comment b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_9_2cee4f6bd6db7518fd61453c595162c6._comment new file mode 100644 index 0000000000..2755cf3317 --- /dev/null +++ b/doc/bugs/copy_fast_confusing_with_broken_locationlog/comment_9_2cee4f6bd6db7518fd61453c595162c6._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 9" + date="2011-05-15T09:16:49Z" + content=""" +Just to make sure: How do I get $key? What I did was look at the path in the object store of the local repo and see if that exact same path & file existed in the remote. +"""]] diff --git a/doc/bugs/cyclic_drop.mdwn b/doc/bugs/cyclic_drop.mdwn new file mode 100644 index 0000000000..296d61aac5 --- /dev/null +++ b/doc/bugs/cyclic_drop.mdwn @@ -0,0 +1,104 @@ +drop's verification that a remote still has content can fail +if the remote is also dropping the content at the same time. Each +repository checks that the other still has the content, and then both +drop it. Could also happen with larger cycles of repositories. + +> Confirmed fixed now. All cases tested. [[done]] + +--- + +Fixing this requires locking. (Well, there are other ways, like moving the +content to a holding area when checking if it's safe to drop, but they +seem complicated, and would be hard to implement for move --from.) + +Add per-content lock files. An exclusive lock is held on content when +it's in the process of being dropped, or moved. The lock is taken +nonblocking; if it cannot be obtained, something else is acting on the +content and git-annex should refuse to do anything. + +Then when checking inannex, try to take a shared lock. Note that to avoid +deadlock, this must be a nonblocking lock. (Actually, with fcntl locking, +can just check if there is a lock, without needing to take one.) +If it fails, the status of the content is unknown, so inannex should fail. +Note that this failure needs to be distinguishable from "not in annex". + +> Thinking about these lock files, this would be a lot more files, +> and would possibly break some assumptions that everything in +> `.git/annex/objects` is a key's content. (Or would need lots more +> directories to put the lock files elsewhere.) There would be more +> overhead to manage these and have them on disk. +> +> What if it just locked the actual content file? The obvious limitation +> is only content that was already inannex could be locked, but that +> happens to be exactly what's needed here; if content is not present, +> it's not going to get dropped or moved. +> +> Of course, if some consumer of a file locked it, then it could prevent it +> from being dropped or moved. This could be considered a bug, or a feature. :) +> +> However, this would mean that such a hypothetical consumer could also +> make inannex checks fail. +> +> The other downside is that, for fcntl exclusive locking, the file has to +> be opened for write. Normally the modes of content files are locked down +> to prevent modifcation. Dealt with, but oh so nasty. Almost makes flock +> locking seem worth using. + +--- + +drop --from could also cycle. Locking should fix. + +> Confirmed fixed now. + +--- + +move --to can also be included in the cycle, since it can drop data. + +Consider move to a remote that already has the content and +is at the same time doing a drop (or a move). The remote +verifies the content is present on the movee, and removes its copy. +The movee removes its copy. + +So move --to needs to take the content lock on start. Then the inannex +will fail. + +This is why it's important for inannex to fail in a way that is +distinguishable from "not in annex". Otherwise, move --to +would see the cycle as the remote not having content, and try to +redundantly send it, drop it locally, and still race. + +> Confirmed fixed now. + +-- + +move --from is similar. Consider a case where both the local and the remote +are doing a move --from. Both have the content, and confirm the other does, +via inannex checks. Then both run git-annex-shell dropkey, removing both +copies. + +So move --from needs to take the content lock on start, so the inannex will +fail. NB: If the content is not locally present, don't take the lock. + +> Confirmed fixed now. + +--- + +Another cycle might be running move --to and move --from on the same file, +locally. The exclusivity of the content lock solves this, as only one can +run at a time. + +Would it work with a shared lock? The --to would run git-annex-shell +inannex. The --from would also be running, and would run git-annex-shell +dropkey. So inannex and dropkey would end up running on the remote +at the same time. Dropkey takes the content lock, and inannex checks it, +but what if inannex runs first? Then it returns true, and then the content +is removed, so both the --to and --from see success and the --to proceeds +to remove the local content that the --from already caused to be removed +from the remote. So, no, the nasty exclusive lock is needed. + +> Confirmed fixed now. + +--- + +Another cycle might involve move --from and drop, both run on the same +file, locally. Again, the exclusive lock solves this. diff --git a/doc/bugs/directory_remote_and_case_sensitivity_on_FAT/comment_1_bcac9fd7b3f4a2ac28bee59bae674fa0._comment b/doc/bugs/directory_remote_and_case_sensitivity_on_FAT/comment_1_bcac9fd7b3f4a2ac28bee59bae674fa0._comment new file mode 100644 index 0000000000..be8b8b0a72 --- /dev/null +++ b/doc/bugs/directory_remote_and_case_sensitivity_on_FAT/comment_1_bcac9fd7b3f4a2ac28bee59bae674fa0._comment @@ -0,0 +1,79 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnBJ6Dv1glxzzi4qIzGFNa6F-mfHIvv9Ck" + nickname="Jim" + subject="Case sensitivity" + date="2011-11-22T18:51:03Z" + content=""" +I agree, it's weird, but that's what I'm seeing: + + #!/bin/sh + + if [ $UID != 0 ] ; then echo \"need root\" ; exit 1 ; fi + + set -x + + # make image + cd /tmp + dd if=/dev/zero of=diskimage bs=1M count=40 + DEV=$(losetup --find --show diskimage) + + # make FAT32 fs + mkfs.vfat -F 32 $DEV + + # mount it + mkdir annex + mount -o shortname=mixed,utf8=1 $DEV annex + + # show bug + ( + cd annex + mkdir zP + mkdir Zp + ls Zp + ls + touch zP + touch Zp + ) + + # cleanup + umount annex + rm -r annex + losetup -d $DEV + rm diskimage + + # info + uname -a + +Output: + + + cd /tmp + + dd if=/dev/zero of=diskimage bs=1M count=40 + 40+0 records in + 40+0 records out + 41943040 bytes (42 MB) copied, 0.0847729 s, 495 MB/s + ++ losetup --find --show diskimage + + DEV=/dev/loop0 + + mkfs.vfat -F 32 /dev/loop0 + mkfs.vfat 3.0.9 (31 Jan 2010) + Loop device does not match a floppy size, using default hd params + + mkdir annex + + mount -o shortname=mixed,utf8=1 /dev/loop0 annex + + cd annex + + mkdir zP + + mkdir Zp + mkdir: cannot create directory `Zp': File exists + + ls Zp + ls: cannot access Zp: No such file or directory + + ls + zP + + touch zP + + touch Zp + touch: cannot touch `Zp': File exists + + umount annex + + rm -r annex + + losetup -d /dev/loop0 + + rm diskimage + + uname -a + Linux pilot 3.0.3+ #1 SMP Mon Aug 29 15:21:18 EDT 2011 x86_64 GNU/Linux + +"""]] diff --git a/doc/bugs/directory_remote_and_case_sensitivity_on_FAT/comment_2_c9088060fb9133b66951f1a3075981e8._comment b/doc/bugs/directory_remote_and_case_sensitivity_on_FAT/comment_2_c9088060fb9133b66951f1a3075981e8._comment new file mode 100644 index 0000000000..5040b3120f --- /dev/null +++ b/doc/bugs/directory_remote_and_case_sensitivity_on_FAT/comment_2_c9088060fb9133b66951f1a3075981e8._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-11-22T19:56:55Z" + content=""" +All right, I see the same thing with linux 3.1.0. It seems this behavior has changed since linux 3.0.0. Mounting with shortname=lower avoids the problem. + +I feel a good case could be made that this new behavior is a linux bug. Your example with touch particularly shows how weird it is. + +
+$ touch Foo
+$ echo hi > foo
+sh: cannot create foo: File exists
+$ rm foo
+rm: cannot remove `foo': No such file or directory
+
+"""]] diff --git a/doc/bugs/directory_remote_and_case_sensitivity_on_FAT/comment_3_5bf34466187cfc9b34bd3ca8c89a07c6._comment b/doc/bugs/directory_remote_and_case_sensitivity_on_FAT/comment_3_5bf34466187cfc9b34bd3ca8c89a07c6._comment new file mode 100644 index 0000000000..54d6ff50ab --- /dev/null +++ b/doc/bugs/directory_remote_and_case_sensitivity_on_FAT/comment_3_5bf34466187cfc9b34bd3ca8c89a07c6._comment @@ -0,0 +1,20 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnBJ6Dv1glxzzi4qIzGFNa6F-mfHIvv9Ck" + nickname="Jim" + subject="comment 3" + date="2011-11-22T20:35:01Z" + content=""" +I see the same results (\"`touch: cannot touch 'Zp': File exists`\") on these Debian systems: + + Linux pilot 3.0.3+ #1 SMP Mon Aug 29 15:21:18 EDT 2011 x86_64 GNU/Linux + Linux neurosis 3.0.0-1-amd64 #1 SMP Sun Jul 24 02:24:44 UTC 2011 x86_64 GNU/Linux + Linux bucket 2.6.39-2-amd64 #1 SMP Tue Jul 5 02:51:22 UTC 2011 x86_64 GNU/Linux + Linux psychosis 2.6.37-trunk-amd64 #1 SMP Thu Jan 6 14:13:28 UTC 2011 x86_64 GNU/Linux + Linux bacon 2.6.32-5-amd64 #1 SMP Thu Aug 12 13:01:50 UTC 2010 x86_64 GNU/Linux + +It does NOT happen on this Ubuntu system: + + Linux esensor 3.0.0-12-generic #20-Ubuntu SMP Fri Oct 7 14:56:25 UTC 2011 x86_64 x86_64 x86_64 GNU/Linux + +So really it seems like only the Ubuntu kernel is the outlier here? Maybe it has something to do with charsets or something; I think FAT is a mess in that regard and even long versus short filenames can behave differently. +"""]] diff --git a/doc/bugs/directory_remote_and_case_sensitivity_on_FAT/comment_4_d6201f2d86d5b44051a7fd7a8c9de583._comment b/doc/bugs/directory_remote_and_case_sensitivity_on_FAT/comment_4_d6201f2d86d5b44051a7fd7a8c9de583._comment new file mode 100644 index 0000000000..406a6b18ee --- /dev/null +++ b/doc/bugs/directory_remote_and_case_sensitivity_on_FAT/comment_4_d6201f2d86d5b44051a7fd7a8c9de583._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 4" + date="2011-11-22T20:59:55Z" + content=""" +Your ubuntu system has 3.0.0 which as noted does not have the problem. +"""]] diff --git a/doc/bugs/directory_remote_and_case_sensitivity_on_FAT/comment_5_61c5f0889f30a68ac3b57c4ea564ee0e._comment b/doc/bugs/directory_remote_and_case_sensitivity_on_FAT/comment_5_61c5f0889f30a68ac3b57c4ea564ee0e._comment new file mode 100644 index 0000000000..1656ff2075 --- /dev/null +++ b/doc/bugs/directory_remote_and_case_sensitivity_on_FAT/comment_5_61c5f0889f30a68ac3b57c4ea564ee0e._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 5" + date="2011-11-22T21:01:14Z" + content=""" +I am surprised if it happens on 2.6.x though. Debian 3.0.0 seemed to not have the problem but perhaps my test was bad. +"""]] diff --git a/doc/bugs/done.mdwn b/doc/bugs/done.mdwn new file mode 100644 index 0000000000..a35d427198 --- /dev/null +++ b/doc/bugs/done.mdwn @@ -0,0 +1,4 @@ +recently fixed [[bugs]] + +[[!inline pages="./* and link(./done) and !*/Discussion" sort=mtime show=10 +archive=yes]] diff --git a/doc/bugs/dotdot_problem.mdwn b/doc/bugs/dotdot_problem.mdwn new file mode 100644 index 0000000000..cbefd5dae5 --- /dev/null +++ b/doc/bugs/dotdot_problem.mdwn @@ -0,0 +1,4 @@ +cannot "git annex ../foo" (GitRepo.relative is buggy and +git-ls-files also refuses w/o --full-name, which would need other changes) + +[[done]] diff --git a/doc/bugs/dropping_files_with_a_URL_backend_fails.mdwn b/doc/bugs/dropping_files_with_a_URL_backend_fails.mdwn new file mode 100644 index 0000000000..c6ef13f844 --- /dev/null +++ b/doc/bugs/dropping_files_with_a_URL_backend_fails.mdwn @@ -0,0 +1,13 @@ +I was trying out the example with the walkthrough using_the_URL_backend. I tried dropping files that I had after doing an "git annex get ." which have the URL backend associated with the files it fails with + + +
+[jtang@lenny gc]$ git annex drop -v curl-7.21.4.tar.gz
+drop curl-7.21.4.tar.gz
+failed
+git-annex: 1 failed
+
+ +At first I thought it was just my OSX machine not having the coreutils stuff load up before the BSD utils, but I then tried the same thing on my archlinux machine and it showed the same behaviour, that is I could not drop a file with the URL backend as shown in the walkthrough. + +> Whoops, got some logic backwards. [[fixed|done]]! --[[Joey]] diff --git a/doc/bugs/dropunused_doesn__39__t_handle_double_spaces_in_filename.mdwn b/doc/bugs/dropunused_doesn__39__t_handle_double_spaces_in_filename.mdwn new file mode 100644 index 0000000000..a6b44cd2a3 --- /dev/null +++ b/doc/bugs/dropunused_doesn__39__t_handle_double_spaces_in_filename.mdwn @@ -0,0 +1,87 @@ +Unused files with double spaces in their name are not removed by `dropunused`: + +Script: + + #!/bin/bash + + BASE=/tmp/unused-bug + + # setup + set -x + chmod -R +w $BASE + rm -rf $BASE + mkdir -p $BASE + cd $BASE + + # create annex + git init . + git annex init + + # make a file with two spaces + echo hello > 'foo bar' + + # add it + git annex add --backend WORM 'foo bar' + git commit -m 'add' + + # remove it + git rm 'foo bar' + git commit -m 'remove' + + # unused + git annex unused + git annex dropunused 1 + git annex unused + +Output: + + + chmod -R +w /tmp/unused-bug + + rm -rf /tmp/unused-bug + + mkdir -p /tmp/unused-bug + + cd /tmp/unused-bug + + git init . + Initialized empty Git repository in /tmp/unused-bug/.git/ + + git annex init + init ok + + echo hello + + git annex add --backend WORM 'foo bar' + add foo bar ok + (Recording state in git...) + + git commit -m add + [master (root-commit) 926f7f5] add + 1 files changed, 1 insertions(+), 0 deletions(-) + create mode 120000 foo bar + + git rm 'foo bar' + rm 'foo bar' + + git commit -m remove + [master d025e3f] remove + 1 files changed, 0 insertions(+), 1 deletions(-) + delete mode 120000 foo bar + + git annex unused + unused . (checking for unused data...) (checking master...) + Some annexed data is no longer used by any files: + NUMBER KEY + 1 WORM-s6-m1322200438--foo bar + (To see where data was previously used, try: git log --stat -S'KEY') + + To remove unwanted data: git-annex dropunused NUMBER + + ok + + git annex dropunused 1 + dropunused 1 ok + + git annex unused + unused . (checking for unused data...) (checking master...) + Some annexed data is no longer used by any files: + NUMBER KEY + 1 WORM-s6-m1322200438--foo bar + (To see where data was previously used, try: git log --stat -S'KEY') + + To remove unwanted data: git-annex dropunused NUMBER + + ok + +Strange that `dropunused` still said "ok" when it didn't succeed at removing the file. + +> It was misparsing the unused file, so it thought you'd asked it to drop a +> key that didn't exist (which means already dropped) so no error. I've +> fixed the bug. [[done]] --[[Joey]] diff --git a/doc/bugs/encrypted_S3_stalls.mdwn b/doc/bugs/encrypted_S3_stalls.mdwn new file mode 100644 index 0000000000..109e6e793a --- /dev/null +++ b/doc/bugs/encrypted_S3_stalls.mdwn @@ -0,0 +1,9 @@ +Sending large-ish (few megabytes) files to encrypted S3 remotes stalls out. +It works for the tiny files I was using to test while developing it, on +dialup. + +There was a similar issue with bup, which I fixed by forking a process +rather than using a thread to do some IO. Probably need the same here. +--[[Joey]] + +[[done]] --[[Joey]] diff --git a/doc/bugs/error_propigation.mdwn b/doc/bugs/error_propigation.mdwn new file mode 100644 index 0000000000..25998907e8 --- /dev/null +++ b/doc/bugs/error_propigation.mdwn @@ -0,0 +1,3 @@ +If a subcommand fails w/o throwing an error, no error is propigated to the +git-annex exit code. With --quiet, this makes it look like the command +succeeded. [[done]] diff --git a/doc/bugs/error_with_file_names_starting_with_dash.mdwn b/doc/bugs/error_with_file_names_starting_with_dash.mdwn new file mode 100644 index 0000000000..84bf1cfa07 --- /dev/null +++ b/doc/bugs/error_with_file_names_starting_with_dash.mdwn @@ -0,0 +1,15 @@ +git annex add has problems if items start with dashes, example: + +-wut-a-directory-name-/file1 + +leads to + +[[!format bash """ +add -wut-a-directory-name-/file1 (checksum...) sha1sum: invalid option -- 'u' +„sha1sum --help“ gibt weitere Informationen. + + git-annex: : hGetLine: end of file +"""]] + +> This is fixed in git, at least I think I've found all cases where +> filenames are passed to programs and escaped them. --[[Joey]] [[done]] diff --git a/doc/bugs/extraneous_shell_escaping_for_rsync_remotes.mdwn b/doc/bugs/extraneous_shell_escaping_for_rsync_remotes.mdwn new file mode 100644 index 0000000000..c4ee8d5bda --- /dev/null +++ b/doc/bugs/extraneous_shell_escaping_for_rsync_remotes.mdwn @@ -0,0 +1,15 @@ +When using `git annex get foo` where foo is available in a rsync remote with encryption I got an error saying that rsync cannot +find the required file but extra ' are here. + +I attached a patch for this. + +> But you didn't, sadly. :( +> +> I don't seem to see the problem, set up a rsync over ssh with encryption +> and sent over a file "foo", and then got it back from rsync, without +> trouble. +> +> Ah, you're not using rsync over ssh, but just to a local directory, +> right? --[[Joey]] + +>> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/fails_to_handle_lot_of_files.mdwn b/doc/bugs/fails_to_handle_lot_of_files.mdwn new file mode 100644 index 0000000000..470a5180f0 --- /dev/null +++ b/doc/bugs/fails_to_handle_lot_of_files.mdwn @@ -0,0 +1,445 @@ + git-annex version: 3.20111011 + local repository version: 3 + default repository version: 3 + supported repository versions: 3 + upgrade supported from repository versions: 0 1 2 + +I just created a new remote on a USB drive and wanted to copy my files over. git-annex wasn't too happy about that ;) +I included a few OK transfers as there was an error before git-annex ran into a wall. As I could easily access that temp file after it aborted, I suspect something went wrong internally before git-annex started to throw those errors. + +Please note the "_n TIMES_" comments. It's how often I got the same error message... + + + + git annex copy . --to USB --fast + + copy redacted.JPG (to USB...) + redacted + 4035668 100% 77.91MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4036374 bytes received 31 bytes 8072810.00 bytes/sec + total size is 4035668 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 18002094 100% 74.19MB/s 0:00:00 (xfer#1, to-check=0/1) + WARNING: redacted failed verification -- update retained (will try again). + redacted + 18002094 100% 19.60MB/s 0:00:00 (xfer#2, to-check=0/1) + rsync: open "copy_target/.git/annex/tmp/redacted_E13" failed: Permission denied (13) + + sent 36008841 bytes received 52 bytes 24005928.67 bytes/sec + total size is 18002094 speedup is 0.50 + rsync error: some files/attrs were not transferred (see previous errors) (code 23) at main.c(1070) [sender=3.0.8] + + rsync failed -- run git annex again to resume file transfer + failed + copy redacted.JPG (to USB...) + redacted + 3687111 100% 39.16MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 3687773 bytes received 31 bytes 2458536.00 bytes/sec + total size is 3687111 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 17877177 100% 79.15MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 17879573 bytes received 31 bytes 11919736.00 bytes/sec + total size is 17877177 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 3694921 100% 40.14MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 3695583 bytes received 31 bytes 2463742.67 bytes/sec + total size is 3694921 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 17875448 100% 71.20MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 17877844 bytes received 31 bytes 11918583.33 bytes/sec + total size is 17875448 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 3833377 100% 62.49MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 3834055 bytes received 31 bytes 2556057.33 bytes/sec + total size is 3833377 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 17938200 100% 65.43MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 17940604 bytes received 31 bytes 11960423.33 bytes/sec + total size is 17938200 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 4512557 100% 83.77MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4513319 bytes received 31 bytes 3008900.00 bytes/sec + total size is 4512557 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 18001641 100% 76.16MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 18004053 bytes received 31 bytes 12002722.67 bytes/sec + total size is 18001641 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 4394272 100% 50.11MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4395022 bytes received 31 bytes 8790106.00 bytes/sec + total size is 4394272 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 18095781 100% 73.30MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 18098205 bytes received 31 bytes 12065490.67 bytes/sec + total size is 18095781 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 4683795 100% 65.23MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4684577 bytes received 31 bytes 9369216.00 bytes/sec + total size is 4683795 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 18172801 100% 74.25MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 18175233 bytes received 31 bytes 36350528.00 bytes/sec + total size is 18172801 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 4486231 100% 77.22MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4486989 bytes received 31 bytes 8974040.00 bytes/sec + total size is 4486231 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 17860427 100% 68.56MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 17862823 bytes received 31 bytes 35725708.00 bytes/sec + total size is 17860427 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 4499768 100% 36.41MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4500530 bytes received 31 bytes 9001122.00 bytes/sec + total size is 4499768 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 17840132 100% 74.48MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 17842524 bytes received 31 bytes 11895036.67 bytes/sec + total size is 17840132 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 4358032 100% 75.00MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4358774 bytes received 31 bytes 8717610.00 bytes/sec + total size is 4358032 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 18084753 100% 61.48MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 18087173 bytes received 31 bytes 12058136.00 bytes/sec + total size is 18084753 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 4270213 100% 68.49MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4270947 bytes received 31 bytes 2847318.67 bytes/sec + total size is 4270213 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 17661246 100% 68.34MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 17663614 bytes received 31 bytes 11775763.33 bytes/sec + total size is 17661246 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 4538305 100% 63.19MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4539071 bytes received 31 bytes 9078204.00 bytes/sec + total size is 4538305 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 18672466 100% 68.90MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 18674958 bytes received 31 bytes 12449992.67 bytes/sec + total size is 18672466 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 4453445 100% 73.96MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4454199 bytes received 31 bytes 8908460.00 bytes/sec + total size is 4453445 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 18495494 100% 59.28MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 18497966 bytes received 31 bytes 12331998.00 bytes/sec + total size is 18495494 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 4255858 100% 70.66MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4256588 bytes received 31 bytes 1702647.60 bytes/sec + total size is 4255858 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 18376531 100% 69.15MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 18378987 bytes received 31 bytes 36758036.00 bytes/sec + total size is 18376531 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 4013365 100% 48.67MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4014067 bytes received 31 bytes 8028196.00 bytes/sec + total size is 4013365 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 17606341 100% 51.73MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 17608705 bytes received 31 bytes 11739157.33 bytes/sec + total size is 17606341 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 4179869 100% 74.62MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4180591 bytes received 31 bytes 8361244.00 bytes/sec + total size is 4179869 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 18382569 100% 67.05MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 18385025 bytes received 31 bytes 12256704.00 bytes/sec + total size is 18382569 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 4318363 100% 44.91MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4319101 bytes received 31 bytes 8638264.00 bytes/sec + total size is 4318363 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 17715958 100% 72.69MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 17718334 bytes received 31 bytes 11812243.33 bytes/sec + total size is 17715958 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 4241893 100% 65.81MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4242623 bytes received 31 bytes 8485308.00 bytes/sec + total size is 4241893 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 17717287 100% 71.77MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 17719663 bytes received 31 bytes 11813129.33 bytes/sec + total size is 17717287 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 4488380 100% 49.99MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4489138 bytes received 31 bytes 2992779.33 bytes/sec + total size is 4488380 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 17770208 100% 38.80MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 17772592 bytes received 31 bytes 11848415.33 bytes/sec + total size is 17770208 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 4603958 100% 76.48MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4604732 bytes received 31 bytes 9209526.00 bytes/sec + total size is 4603958 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 18744380 100% 74.66MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 18746884 bytes received 31 bytes 12497943.33 bytes/sec + total size is 18744380 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 4592098 100% 79.06MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4592872 bytes received 31 bytes 3061935.33 bytes/sec + total size is 4592098 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 18746205 100% 43.00MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 18748709 bytes received 31 bytes 12499160.00 bytes/sec + total size is 18746205 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 7493353 100% 80.85MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 7494479 bytes received 31 bytes 14989020.00 bytes/sec + total size is 7493353 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 19496768 100% 81.77MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 19499360 bytes received 31 bytes 12999594.00 bytes/sec + total size is 19496768 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 5462482 100% 82.19MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 5463360 bytes received 31 bytes 10926782.00 bytes/sec + total size is 5462482 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 19669815 100% 80.37MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 19672431 bytes received 31 bytes 13114974.67 bytes/sec + total size is 19669815 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 5449487 100% 57.40MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 5450365 bytes received 31 bytes 3633597.33 bytes/sec + total size is 5449487 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 19633259 100% 74.18MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 19635871 bytes received 31 bytes 13090601.33 bytes/sec + total size is 19633259 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 5392184 100% 62.33MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 5393054 bytes received 31 bytes 3595390.00 bytes/sec + total size is 5392184 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 18912104 100% 65.00MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 18914628 bytes received 31 bytes 12609772.67 bytes/sec + total size is 18912104 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 4869300 100% 80.92MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4870106 bytes received 31 bytes 9740274.00 bytes/sec + total size is 4869300 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 20178932 100% 68.13MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 20181608 bytes received 31 bytes 13454426.00 bytes/sec + total size is 20178932 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 4995425 100% 86.05MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 4996247 bytes received 31 bytes 9992556.00 bytes/sec + total size is 4995425 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 19970679 100% 76.36MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 19973331 bytes received 31 bytes 13315574.67 bytes/sec + total size is 19970679 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 7905795 100% 66.45MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 7906973 bytes received 31 bytes 15814008.00 bytes/sec + total size is 7905795 speedup is 1.00 + ok + copy redacted.NEF (to USB...) + redacted + 21234069 100% 78.07MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 21236877 bytes received 31 bytes 8494763.20 bytes/sec + total size is 21234069 speedup is 1.00 + ok + copy redacted.JPG (to USB...) + redacted + 7963979 100% 62.51MB/s 0:00:00 (xfer#1, to-check=0/1) + + sent 7965165 bytes received 31 bytes 5310130.67 bytes/sec + total size is 7963979 speedup is 1.00 + git ["--git-dir=copy_target/.git","--work-tree=copy_target","update-index","-z","--index-info"]: Error in fork: forkProcess: resource exhausted (Resource temporarily unavailable) + + git-annex: user error (git ["--git-dir=copy_target/.git","--work-tree=copy_target","update-index","-z","--index-info"]: Error in fork: forkProcess: resource exhausted (Resource temporarily unavailable)) + failed + _506 TIMES_ (user error (Error in fork: forkProcess: resource exhausted (Resource temporarily unavailable))) failed + _11 TIMES_ copy foo (createPipe: resource exhausted (Too many open files)) failed + _2 TIMES_ (user error (Error in fork: forkProcess: resource exhausted (Resource temporarily unavailable))) failed + _8574 TIMES_: copy foo (createPipe: resource exhausted (Too many open files)) failed + git-annex: createPipe: resource exhausted (Too many open files) + failed + git-annex: 9101 failed + + % ls copy_target/.git/annex/tmp/redacted_E13 copy_target/.git/annex/tmp/SHA512E-redacted_E13 # works + % find source -type l | wc -l + 13554 + % find copy_target -type l | wc -l + 13554 + % find copy_target/.git/annex/objects -type f | wc -l + 4455 + % find source -type f | wc -l + 13554 + +> Fixed unreaped process leak. +> (This has nothing to do with NTFS). Ran test with 10k files +> [[done]] --[[Joey]] diff --git a/doc/bugs/fails_to_handle_lot_of_files/comment_1_09d8e4e66d8273fab611bd29e82dc7fc._comment b/doc/bugs/fails_to_handle_lot_of_files/comment_1_09d8e4e66d8273fab611bd29e82dc7fc._comment new file mode 100644 index 0000000000..587b1fd97c --- /dev/null +++ b/doc/bugs/fails_to_handle_lot_of_files/comment_1_09d8e4e66d8273fab611bd29e82dc7fc._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 1" + date="2011-10-26T17:16:52Z" + content=""" +After another run, i am at 8909 files in the remote, now. +"""]] diff --git a/doc/bugs/fails_to_handle_lot_of_files/comment_2_fd2ec05f4b5a7a6ae6bd9f5dbc3156de._comment b/doc/bugs/fails_to_handle_lot_of_files/comment_2_fd2ec05f4b5a7a6ae6bd9f5dbc3156de._comment new file mode 100644 index 0000000000..8e83fc19f4 --- /dev/null +++ b/doc/bugs/fails_to_handle_lot_of_files/comment_2_fd2ec05f4b5a7a6ae6bd9f5dbc3156de._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 2" + date="2011-10-26T18:22:34Z" + content=""" +In case this matters, I just realized that this disk has been formatted with NTFS instead of a sane FS. +"""]] diff --git a/doc/bugs/fat_support.mdwn b/doc/bugs/fat_support.mdwn new file mode 100644 index 0000000000..70ee3b369c --- /dev/null +++ b/doc/bugs/fat_support.mdwn @@ -0,0 +1,13 @@ +Klaus pointed out that there are two problems that keep +git-annex from being used on USB keys, that would typically +be VFAT formatted: + +- Use of symlinks, which VFAT does not support. Very hard to fix. + Instead, just use [[/bare_repositories]] on the key, + they're supported now. +- Use of ":" in filenames of object files, also not supported. + Could easily be fixed by reorganizing the object directory. + +[[Done]]; in annex.version 2 repos, colons are entirely avoided in +filenames. So a bare git clone can be put on VFAT, and git-annex +used to move stuff --to and --from it, for sneakernet. diff --git a/doc/bugs/fat_support/comment_1_04bcc4795d431e8cb32293aab29bbfe2._comment b/doc/bugs/fat_support/comment_1_04bcc4795d431e8cb32293aab29bbfe2._comment new file mode 100644 index 0000000000..510e449842 --- /dev/null +++ b/doc/bugs/fat_support/comment_1_04bcc4795d431e8cb32293aab29bbfe2._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="fmarier" + ip="121.73.248.43" + subject="Exporting to a FAT filesystem?" + date="2011-04-04T07:40:41Z" + content=""" +I'm using git-annex to keep my music in sync between all of my different machines. What I'd love to be able to do is to also keep it in sync with my iRiver player. Unfortunately, the firmware, Rockbox, doesn't support ext3, so I'm stuck with a FAT filesystem. + +I can see how the design of git-annex makes it rather difficult to get rid of the symlinks, so how about taking a different approach: something like a \"git annex export DEST\" which would take a destination (not a git remote) and rsync the content over to there as regular files. + +Maybe \"git annex sync DEST\" or \"git annex rsync DEST\" would be better names if we want to convey the idea that the destination will be made to look like the source repo, including performing the necessary deletions. +"""]] diff --git a/doc/bugs/fat_support/comment_2_bb4a97ebadb5c53809fc78431eabd7c8._comment b/doc/bugs/fat_support/comment_2_bb4a97ebadb5c53809fc78431eabd7c8._comment new file mode 100644 index 0000000000..7618c9a7b6 --- /dev/null +++ b/doc/bugs/fat_support/comment_2_bb4a97ebadb5c53809fc78431eabd7c8._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-04-04T18:20:45Z" + content=""" +Hey @fmarier. Well, this bug report is closed because you can already get rid of the symlinks. Just put a bare git repo on your fat filesystem, and use git-annex copy --to/--from there. + +Now, that puts all the files that are on the device in .git/annex/objects/xx/yy/blah.mp3 -- how well rockbox would support that I don't know. And if it tries to modify or delete those files, git annex also can't help you manage those changes. + +Another recent option is the [[special_remotes/directory]] special remote type, which again uses \"xx/yy/blah.mp3\" and can't track changes made to the files. This could perhaps be extended in the direction you suggest, although trying to fit this into the special remote infrastructure might not be a good fit really. + +The most likely way this has to get dealt with is really by using [[todo/smudge]] filters, which would eliminate the symlinks and allow copying a non-bare git repo onto vfat. +"""]] diff --git a/doc/bugs/fat_support/comment_3_df3b943bc1081a8f3f7434ae0c8e061e._comment b/doc/bugs/fat_support/comment_3_df3b943bc1081a8f3f7434ae0c8e061e._comment new file mode 100644 index 0000000000..f3db75c2f6 --- /dev/null +++ b/doc/bugs/fat_support/comment_3_df3b943bc1081a8f3f7434ae0c8e061e._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="fmarier" + subject="comment 3" + date="2011-04-05T10:00:21Z" + content=""" +Thanks for the reply @joey. + +While it would certainly be possible for a bare repo to exist on my iRiver, the problem is that the music player uses the filesystem to organize files into directories like \"Artist/Album/Track.ogg\". So replacing that with \"..../xx/yy/Track.ogg\" would make it fairly difficult to browse my music collection and select the album/track I want to listen to :) + +So unless I have the files physically organized like the symlinks, then it's probably not going to work very for that particular workflow. Smudge filters are interesting though. In the meantime, I'll look into rsyncing from another box which has the right filesystem layout onto my iRiver directly. +"""]] diff --git a/doc/bugs/fat_support/comment_4_90a8a15bedd94480945a374f9d706b86._comment b/doc/bugs/fat_support/comment_4_90a8a15bedd94480945a374f9d706b86._comment new file mode 100644 index 0000000000..722cbdd9e7 --- /dev/null +++ b/doc/bugs/fat_support/comment_4_90a8a15bedd94480945a374f9d706b86._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://ethan.betacantrips.com/" + nickname="ethan.glasser.camp" + subject="no symlinks" + date="2011-06-08T20:59:38Z" + content=""" +If you try to clone a git repo that has a symlink over to a VFAT filesystem, you get (in its place) a regular file that contains the name of the symlink target. So why can't git-annex use that? I could still do git annex get on this file, git annex would still \"know\" that it's a symlink, and could replace it with a copy of the real file (instead of putting it in .git/annex). + +I know if it were that simple, someone would have done it already, so what am I missing? I guess trying to get the file FROM the repository would fail because it wouldn't find the file in .git/annex? Couldn't you store a reverse mapping? You wouldn't be able to move the file around, but you already lose that once you give up symlinks. It would also be a little harder to tell which symlinks were \"dangling\"; I don't see an easy way to get around that. It would still be better than a bare repo.. +"""]] diff --git a/doc/bugs/fat_support/comment_5_64bbf89de0836673224b83fdefa0407b._comment b/doc/bugs/fat_support/comment_5_64bbf89de0836673224b83fdefa0407b._comment new file mode 100644 index 0000000000..1063b0f910 --- /dev/null +++ b/doc/bugs/fat_support/comment_5_64bbf89de0836673224b83fdefa0407b._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 5" + date="2011-06-10T16:41:43Z" + content=""" +@ethan the reason that wouldn't work is because git would then see a file that was checked in and had its one line symlinkish content replaced with a huge binary blob. And git commit would try to commit that etc. The potential for foot-shooting is too high. +"""]] diff --git a/doc/bugs/free_space_checking.mdwn b/doc/bugs/free_space_checking.mdwn new file mode 100644 index 0000000000..92e8be40d1 --- /dev/null +++ b/doc/bugs/free_space_checking.mdwn @@ -0,0 +1,21 @@ +Should check that there is enough free space before trying to copy a +file around. + +* Need a way to tell how much free space is available on the disk containing + a given repository. + +* And, need a way to tell the size of a file before copying it from + a remote, to check local disk space. + + As of annex.version 2, this metadata can be available for any type + of backend. Newly added files will always have file size metadata, + while files that used a SHA backend and were added before the upgrade + won't. + + So, need a migration process from eg SHA1 to SHA1+filesize. It will + find files that lack size info, and rename their keys to add the size + info. Users with old repos can run this on them, to get the missing + info recorded. + +> [[done]]; no migtation process for old SHA1 keys from v1 repo though. +> --[[Joey]] diff --git a/doc/bugs/free_space_checking/comment_1_a868e805be43c5a7c19c41f1af8e41e6._comment b/doc/bugs/free_space_checking/comment_1_a868e805be43c5a7c19c41f1af8e41e6._comment new file mode 100644 index 0000000000..954433deb4 --- /dev/null +++ b/doc/bugs/free_space_checking/comment_1_a868e805be43c5a7c19c41f1af8e41e6._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 1" + date="2011-03-15T14:11:27Z" + content=""" +Keep in mind that lots of small files may have significant overhead, so a warning that it's not possible to make sure there's enough space would make sense for certain corner cases. Actually finding out the exact overhead is beyond git-annex' scope and, given transparent compression etc, ability, but a warning, optionally with a \"do you want to continue\" prompt can't hurt. + +-- RichiH +"""]] diff --git a/doc/bugs/free_space_checking/comment_2_8a65f6d3dcf5baa3f7f2dbe1346e2615._comment b/doc/bugs/free_space_checking/comment_2_8a65f6d3dcf5baa3f7f2dbe1346e2615._comment new file mode 100644 index 0000000000..9a43fe3f27 --- /dev/null +++ b/doc/bugs/free_space_checking/comment_2_8a65f6d3dcf5baa3f7f2dbe1346e2615._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-03-16T03:04:50Z" + content=""" +Right. You probably don't want git-annex to fill up your entire drive anyway, so if it tries to reseve 10 mb or 1% or whatever (probably configurable) for overhead, that should be good enough. +"""]] diff --git a/doc/bugs/free_space_checking/comment_3_0fc6ff79a357b1619d13018ccacc7c10._comment b/doc/bugs/free_space_checking/comment_3_0fc6ff79a357b1619d13018ccacc7c10._comment new file mode 100644 index 0000000000..ea4fb6c23e --- /dev/null +++ b/doc/bugs/free_space_checking/comment_3_0fc6ff79a357b1619d13018ccacc7c10._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 3" + date="2011-03-16T15:40:56Z" + content=""" +Sometimes, I might want to fill up the disk as much as possible. Thus, a warning is preferable to erroring out too early, imo -- Richard +"""]] diff --git a/doc/bugs/fsck__47__fix_should_check__47__fix_the_permissions_of_.git__47__annex.mdwn b/doc/bugs/fsck__47__fix_should_check__47__fix_the_permissions_of_.git__47__annex.mdwn new file mode 100644 index 0000000000..c649ff9f7c --- /dev/null +++ b/doc/bugs/fsck__47__fix_should_check__47__fix_the_permissions_of_.git__47__annex.mdwn @@ -0,0 +1,8 @@ +git annex carefully setup restrictive permissions of .git/annex directories and files. + +The fsck command should check that they are still correct. +The fix command should fix them. + +PS: Thanks for this nice tool! + +> Good idea, [[done]] (actually, fsck just fixes them too)! --[[Joey]] diff --git a/doc/bugs/fsck_claims_failed_checksum_when_less_copies_than_required_are_found.mdwn b/doc/bugs/fsck_claims_failed_checksum_when_less_copies_than_required_are_found.mdwn new file mode 100644 index 0000000000..fe6536b6a7 --- /dev/null +++ b/doc/bugs/fsck_claims_failed_checksum_when_less_copies_than_required_are_found.mdwn @@ -0,0 +1,57 @@ + (checksum...) failed + fsck foo (fixing location log) + Only 1 of 2 trustworthy copies exist of foo + Back it up with git-annex copy. + +> You've given me severely partial output, and no test case, but until +> it says "fsck foo", the output is pertaining to some other file than foo. +> As far as I can see, there is no bug here. --[[Joey]] + +>> Sorry, I thought it would be obvious, but that's no excuse for not +>> providing additional explanation. The problem is that fsck tells me a +>> file's fsck has failed without printing extra details. In this case, the +>> checksum is OK while I don't have enough copies to satisfy the fsck. The +>> fact that I don't have enough copies is obviously relevant, but I would +>> still like to know if the checksums are OK. -- Richard + +>>> I think you're misreading the truncated output you posted. The actual, +>>> full output would make much more sense. --[[Joey]] + +>>>> No. I have a total of 14908 annex keys, 3333 of which are on a remote. The only message other than 'checksum OK' and the above is 'git-annex: 11577 failed'. +>>>> I checked several files manually, their checksums are OK so `git annex +>>>> fsck` is reporting those files as completely failed when they "only" miss copies. -- Richard + +>>>>> fsck considers not enough copies to be a failure condition; it prints +>>>>> error messages about it etc. That has nothing to do with checksums. +>>>>> --[[Joey]] + +>>>>>> I get that. Still, I think it would be _extremely_ useful to know what failures occurred, exactly. Not having enough copies is Not Good, yet not having enough copies and a locally correct file is _lot_ better than having not enough copies and a broken file. I.e. I would prefer: + + (checksum...) OK + Not enough copies: Only 1 of 2 trustworthy copies exist of foo + +>>>>>> or similar and at the end + + git-annex: 0 wrong checksums + git-annex: 11577 with too few copies + +>>>>>> In the end, it comes down to the distinction of different failure classes. -- Richard + +>>>>>>> For the third, and final time: +>>>>>>> # You are misreading the truncated output you posted +>>>>>>> The "checksum" line is regarding **different** file than the +>>>>>>> not enough copies message. fsck does not attempt to checksum a file +>>>>>>> that is not present. [[done]] --[[Joey]] + + +>>>>>>>> I realized early on that I pasted the wrong cross-passage, but as there is a ton of the same output, I didn't think it would matter. I wasn't aware that it does not try to checksum when there aren't enough copies. To be fair, you only just mentioned that. +>>>>>>>> Personally, I think that's a bug as it makes ensuring local correctness before copying a file to remotes impossible. +>>>>>>>> Either way, I really didn't know it actually _skipped_ checksumming; that part was missing. +>>>>>>>> For the benefit of anyone else who might read this, this is the correct order: + + fsck foo (fixing location log) + Only 1 of 2 trustworthy copies exist of foo + Back it up with git-annex copy. + (checksum...) failed + +>>>>>>>> If you would like to keep things this way, fine. I think it's less than ideal, but I don't want to argue, either. -- Richard diff --git a/doc/bugs/fsck_output.mdwn b/doc/bugs/fsck_output.mdwn new file mode 100644 index 0000000000..1b00dd7b37 --- /dev/null +++ b/doc/bugs/fsck_output.mdwn @@ -0,0 +1,46 @@ +When you check several files and the fsck fails, you get confusing output: + +
+O fsck test1 (checksum...) 
+E  Only 1 of 2 trustworthy copies of test1 exist.
+E  Back it up with git-annex copy.
+O
+O failed
+O fsck test2 (checksum...) 
+E  Only 1 of 2 trustworthy copies of test2 exist.
+E  Back it up with git-annex copy.
+O 
+O failed
+
+ +The newline is in the wrong place and confuses the user. It should be printed _after_ "failed". + +> This is a consequence of part of the output being printed to stderr, and +> part to stdout. I've marked the lines above with E and O. +> +> Normally a "failed" is preceeded by a message output to stdout desribing +> the problem; such a message will not be "\n" terminated, so a newline +> is always displayed before "failed". In this case, since the message +> is sent to stderr, it is newline terminated. +> +> Fixing this properly would involve storing state, or rethinking +> when git-annex displays newlines (and I rather like its behavior +> otherwise). +> +> A related problem occurs if an error message is unexpetedly printed. +> Dummying up an example: +> +> O get test1 (from foo...) E git-annex: failed to run ssh +> failed +> +> --[[Joey]] + +>> Well, I fixed this in all cases except a thrown non-IO error (last +>> example aboce), which output is printed by haskell's runtime. I'd +>> have to add a second error handler to handle those, and it's not +>> clear what it would do. Often an error will occur before anything +>> else is printed, and then the current behavior is right; if something +>> has been printed it would be nice to have a newline before the error, +>> but by the time the error is caught we'd be out of the annex monad +>> and not really have any way to know if something has been printed. +>> I think my fix is good enough [[done]] --[[Joey]] diff --git a/doc/bugs/git-annex-shell:_internal_error:_evacuate__40__static__41__:_strange_closure_type_30799.mdwn b/doc/bugs/git-annex-shell:_internal_error:_evacuate__40__static__41__:_strange_closure_type_30799.mdwn new file mode 100644 index 0000000000..f9a61a8590 --- /dev/null +++ b/doc/bugs/git-annex-shell:_internal_error:_evacuate__40__static__41__:_strange_closure_type_30799.mdwn @@ -0,0 +1,75 @@ +I ran git-annex (git version) on three machines with ghc-7.0.2 for about a month, but recently (no more than a week ago) I've started getting this error for every file on "git annex get": + + git-annex-shell: internal error: evacuate(static): strange closure type 30799 + (GHC version 7.0.2 for i386_unknown_linux) + Please report this as a GHC bug: http://www.haskell.org/ghc/reportabug + +There were no changes to ghc or it's modules, so I assume something has changed in git-annex itself. + +strace shows "git annnex get" (on "host1") performing following exec's: + + [pid 9481] execve("/usr/bin/rsync", ["rsync", "-p", "--progress", "--inplace", "-e", "'ssh' 'user@host2' 'git-annex-shell ''sendkey'' ''/remote/path'' ''SHA1-s6654080--abd8edec20648ade69351d68ae1c64c8074a6f0b'' ''--'''", ":", "/local/path/.git/annex/tmp/SHA1-s6654080--abd8edec20648ade69351d68ae1c64c8074a6f0b"], [/* 41 vars */]) = 0 + [pid 9482] execve("/usr/bin/ssh", ["ssh", "user@host2", "git-annex-shell 'sendkey' '/remote/path' 'SHA1-s6654080--abd8edec20648ade69351d68ae1c64c8074a6f0b' '--'", "", "rsync", "--server", "--sender", "-vpe.Lsf", "--inplace", ".", ""], [/* 41 vars */] + +I've tried running the second command directly from the shell and got the same error message from a remote GHC. +Adding strace before git-annex-shell to remote command yielded something like this in the end: + + stat64("/local/path.git", 0xb727d610) = -1 ENOENT (No such file or directory) + stat64("/local/path.git", 0xb727d6b0) = -1 ENOENT (No such file or directory) + waitpid(7525, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0) = 7525 + chdir("/home/user") = 0 + rt_sigprocmask(SIG_BLOCK, [INT], [], 8) = 0 + write(2, "git-annex-shell: internal error: ", 33git-annex-shell: internal error: ) = 33 + ... + +Note that "/local/path" here is not what's specified in rsync arguments at all, and git repo with files-to-be-fetched on "host2" is in "/remote/path", but "/local/path" is present in git remotes there since I mount it via nfs from "host1" (yes, to the same path as it's there): + + [remote "nfs"] + url = /local/path + fetch = +refs/heads/*:refs/remotes/nfs/* + push = refs/heads/*:refs/remotes/host2/* + annex-uuid = 0a4e14ba-5236-11e0-9004-7f24452c0f05 + +If I comment that remote out from "/remote/path/.git/config", "git annex get" works fine. +The only git-command git-annex-shell seem to exec there (on "host2") is "git config --list", so it's shouldn't be git trying to do something with it's remotes - it's git-annex itself, right? + +Anyways, looks like a simple path-joining error, if "/local/path.git" should be "/local/path/.git" there. + +I'm actually quite confused about what it's trying to do with that path. +Connect from "host1" to "host2" just to connect back to "host1"? +What for, when it should just fetch files from "host2"? + +> git-annex (and git-annex shell) always start up by learning what git +> remotes are locally configured, and this includes checking them to +> try to look up their annex.uuid setting. +> +> Since git will, given a remote like "url = /foo", first look in +> "/foo.git" for a bare git repository, so too does git-annex. +> I do not think this is a path joining error. That seems likely to +> be a red herring. --[[Joey]] + +Not sure if it's a bug or I'm doing something wrong, but if git-annex really need to check something in git remotes' paths, error message (the one at the top of this post) can be a more descriptive, I guess. +Something like "error: failed to do something with git remote X on a remote host" would've been a lot less confusing than that GHC thing. + +Thanks! + +> I've never seen anything like this error message. I don't know if the +> problem is caused by building with GHC 7, or what. You didn't say what +> OS you're using. Searching for the error message, it seems to involve +> Mac OS X. + +> For example: +>> The error "strange closure type" indicates some kind of memory corruption, which can have many different causes, from bugs in the GC to hardware failures. +> +> You said that you'd been using git-annex built with that version of GHC +> successfully before. Perhaps you could use `git bisect` to see if you can +> identify a point in git-annex's history where this started happening? +> Since you can reproduce the problem by just running git-annex-shell at +> the command line with the right parameters, it should be easy to bisect it. +> +> Probably your best bet will be changing to a different version or build of +> GHC.. --[[Joey]] + +--- + +forwarded to GHC upstream; closing [[done]] --[[Joey]] diff --git a/doc/bugs/git-annex-shell:_internal_error:_evacuate__40__static__41__:_strange_closure_type_30799/comment_1_1c19e716069911f17bbebd196d9e4b61._comment b/doc/bugs/git-annex-shell:_internal_error:_evacuate__40__static__41__:_strange_closure_type_30799/comment_1_1c19e716069911f17bbebd196d9e4b61._comment new file mode 100644 index 0000000000..98f0adc3db --- /dev/null +++ b/doc/bugs/git-annex-shell:_internal_error:_evacuate__40__static__41__:_strange_closure_type_30799/comment_1_1c19e716069911f17bbebd196d9e4b61._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://fraggod.pip.verisignlabs.com.pip.verisignlabs.com/" + subject="Bisect it is, then" + date="2011-04-03T04:45:49Z" + content=""" +Hm, if path's ok, guess there's no way around git-bisect indeed. Wonder if there's some kind of ccache for haskell... + +OS is linux, amd64 on \"host1\" and i386 on \"host2\" where git-annex-shell is crashing. +I'll try to come up with a commit, thanks for clarifications. +"""]] diff --git a/doc/bugs/git-annex-shell:_internal_error:_evacuate__40__static__41__:_strange_closure_type_30799/comment_2_a4d66f29d257044e548313e014ca3dc3._comment b/doc/bugs/git-annex-shell:_internal_error:_evacuate__40__static__41__:_strange_closure_type_30799/comment_2_a4d66f29d257044e548313e014ca3dc3._comment new file mode 100644 index 0000000000..fb36581912 --- /dev/null +++ b/doc/bugs/git-annex-shell:_internal_error:_evacuate__40__static__41__:_strange_closure_type_30799/comment_2_a4d66f29d257044e548313e014ca3dc3._comment @@ -0,0 +1,66 @@ +[[!comment format=mdwn + username="http://fraggod.pip.verisignlabs.com.pip.verisignlabs.com/" + subject="Bisect results" + date="2011-04-03T06:22:15Z" + content=""" +Completed git-bisect twice, getting roughly the same results: + + 828a84ba3341d4b7a84292d8b9002a8095dd2382 is the first bad commit + commit 828a84ba3341d4b7a84292d8b9002a8095dd2382 + Author: Joey Hess + Date: Sat Mar 19 14:33:24 2011 -0400 + + Add version command to show git-annex version as well as repository version information. + + :040000 040000 ed849b7b6e9b177d6887ecebd6a0f146357824f3 1c98699dfd3fc3a3e2ce6b55150c4ef917de96e9 M Command + :100644 100644 b9c22bdfb403b0bdb1999411ccfd34e934f45f5c adf07e5b3e6260b296c982a01a73116b8a9a023c M GitAnnex.hs + :100644 100644 76dd156f83f3d757e1c20c80d689d24d0c533e16 d201cc73edb31f833b6d00edcbe4cf3f48eaecb0 M Upgrade.hs + :100644 100644 5f414e93b84589473af5b093381694090c278e50 d4a58d77a29a6a02daf13cec0df08b5aab74f65e M Version.hs + :100644 100644 f5c2956488a7afafd20374873d79579fb09b1677 f8cd577e992d38c7ec1438ce5c141eb0eb410243 M configure.hs + :040000 040000 f9b7295e997c0a5b1dda352f151417564458bd6e a30008475c1889f4fd8d60d4d9c982563380a692 M debian + :040000 040000 9d87a5d8b9b9fe7b722df303252ffd5760d66f75 08834f61a10d36651b3cdcc38389f45991acdf5e M doc + +contents of final refs/bisect: + + bad (828a84ba3341d4b7a84292d8b9002a8095dd2382) + good-33cb114be5135ce02671d8ce80440d40e97ca824 + good-942480c47f69e13cf053b8f50c98c2ce4eaa256e + good-ca48255495e1b8ef4bda5f7f019c482d2a59b431 + +\"roughly\" because second bisect gave two commits as a result, failing to build one of them (missing .o file on link, guess it's because of -j4 and bad deps in that version's build system): + + There are only 'skip'ped commits left to test. + The first bad commit could be any of: + 828a84ba3341d4b7a84292d8b9002a8095dd2382 + 5022a69e45a073046a2b14b6a4e798910c920ee9 + We cannot bisect more! + +Also noticed that \"git-annex-shell ...\" command succeeds if ran as root user, while failing from unprivileged one. +There are no permission/access errors in \"strace -f git-annex-shell ...\", so I guess it could be some bug in the GHC indeed. + +JIC, logged a whole second bisect operation. +Resulting log: [http://fraggod.net/static/share/git-annex-bisect.log](http://fraggod.net/static/share/git-annex-bisect.log) + +Bisect script I've used (git-annex-shell dies with error code 134 - SIGABRT on GHC error): + + res= + while true; do + if [[ -n \"$res\" ]]; then + cd /var/tmp/paludis/build/dev-scm-git-annex-scm.bak/work/git-annex-scm + echo \"---=== BISECT ($res) ===---\"; git bisect \"$res\" 2>&1; echo '---=== /BISECT ===---' + cd + rm -Rf /var/tmp/paludis/build/dev-scm-git-annex-scm + cp -a --reflink=auto /var/tmp/paludis/build/dev-scm-git-annex-scm{.bak,} + chown -R paludisbuild: /var/tmp/paludis/build/dev-scm-git-annex-scm + fi + res= + cave resolve -zx1 git-annex --skip-until-phase configure || res=skip + if [[ -z \"$res\" ]]; then + cd /remote/path + sudo -u user git-annex-shell 'sendkey' '/remote/path' 'SHA1-s6654080--abd8edec20648ade69351d68ae1c64c8074a6f0b' '--' rsync --server --sender -vpe.Lsf --inplace . '' + if [[ $? -eq 134 ]]; then res=bad; else res=good; fi + cd + fi + done 2>&1 | tee ~/git-annex-bisect.log + +"""]] diff --git a/doc/bugs/git-annex-shell:_internal_error:_evacuate__40__static__41__:_strange_closure_type_30799/comment_3_f5f1081eb18143383b2fb1f57d8640f5._comment b/doc/bugs/git-annex-shell:_internal_error:_evacuate__40__static__41__:_strange_closure_type_30799/comment_3_f5f1081eb18143383b2fb1f57d8640f5._comment new file mode 100644 index 0000000000..491b537862 --- /dev/null +++ b/doc/bugs/git-annex-shell:_internal_error:_evacuate__40__static__41__:_strange_closure_type_30799/comment_3_f5f1081eb18143383b2fb1f57d8640f5._comment @@ -0,0 +1,38 @@ +[[!comment format=mdwn + username="http://fraggod.pip.verisignlabs.com.pip.verisignlabs.com/" + subject="comment 3" + date="2011-04-03T06:57:02Z" + content=""" +Repeated bisect with -j1, just to be sure it's not a random error, and it gave me 828a84ba3341d4b7a84292d8b9002a8095dd2382 again. +Guess I'll look through the changes there a bit later and try to revert these until it works. + +Not sure if it's repeatable by anyone but me (and hence worth fixing), but here's a bit more of info about the system: + + Exherbo linux + Linux sacrilege 2.6.38.2-fg.roam #4 SMP PREEMPT Mon Mar 28 21:08:47 YEKST 2011 i686 GNU/Linux + + dev-lang/ghc-7.0.2:7.0.2::installed + dev-haskell/HUnit-1.2.2.3:1.2.2.3::installed + dev-haskell/MissingH-1.1.0.3:1.1.0.3::installed + dev-haskell/QuickCheck-2.4.0.1:2.4.0.1::installed + dev-haskell/array-0.3.0.2:0.3.0.2::installed + dev-haskell/bytestring-0.9.1.7:0.9.1.7::installed + dev-haskell/containers-0.4.0.0:0.4.0.0::installed + dev-haskell/extensible-exceptions-0.1.1.2:0.1.1.2::installed + dev-haskell/filepath-1.2.0.0:1.2.0.0::installed + dev-haskell/hslogger-1.1.3:0::installed + dev-haskell/mtl-2.0.1.0:2.0.1.0::installed + dev-haskell/network-2.3.0.1:2.3.0.1::installed + dev-haskell/old-locale-1.0.0.2:1.0.0.2::installed + dev-haskell/parsec-3.1.0:3.1.0::installed + dev-haskell/pcre-light-0.4:0::installed + dev-haskell/regex-base-0.93.2:0.93.2::installed + dev-haskell/regex-compat-0.93.1:0.93.1::installed + dev-haskell/regex-posix-0.94.4:0.94.4::installed + dev-haskell/syb-0.3:0.3::installed + dev-haskell/transformers-0.2.2.0:0.2.2.0::installed + dev-haskell/utf8-string-0.3.6:0.3.6::installed + +(some stuff listed here as ::installed, but contains no files, since these packages detect whether ghc-7.0.2 already comes with the same/newer package version) + +"""]] diff --git a/doc/bugs/git-annex-shell:_internal_error:_evacuate__40__static__41__:_strange_closure_type_30799/comment_4_b1f818b85c3540591c48e7ba8560d070._comment b/doc/bugs/git-annex-shell:_internal_error:_evacuate__40__static__41__:_strange_closure_type_30799/comment_4_b1f818b85c3540591c48e7ba8560d070._comment new file mode 100644 index 0000000000..45d3d8bac4 --- /dev/null +++ b/doc/bugs/git-annex-shell:_internal_error:_evacuate__40__static__41__:_strange_closure_type_30799/comment_4_b1f818b85c3540591c48e7ba8560d070._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 4" + date="2011-04-03T16:06:34Z" + content=""" +Nice work on the bisection. It's obviously a compiler bug. Having two test cases that differ in only as trivial and innocous a commit as 828a84ba3341d4b7a84292d8b9002a8095dd2382 might help a GHC developer track it down. + +We should probably forward this as a GHC bug. I hope you can find a different version or build of GHC to build git-annex with. +"""]] diff --git a/doc/bugs/git-annex-shell:_internal_error:_evacuate__40__static__41__:_strange_closure_type_30799/comment_5_67406dd8d9bd4944202353508468c907._comment b/doc/bugs/git-annex-shell:_internal_error:_evacuate__40__static__41__:_strange_closure_type_30799/comment_5_67406dd8d9bd4944202353508468c907._comment new file mode 100644 index 0000000000..bffa9bb868 --- /dev/null +++ b/doc/bugs/git-annex-shell:_internal_error:_evacuate__40__static__41__:_strange_closure_type_30799/comment_5_67406dd8d9bd4944202353508468c907._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="http://fraggod.pip.verisignlabs.com.pip.verisignlabs.com/" + subject="Reported the issue to GHC" + date="2011-04-07T13:44:36Z" + content=""" +Finally got around to [report the issue to GHC tracker](http://hackage.haskell.org/trac/ghc/ticket/5085#comment:7). + +Looks quite alike (at least to the haskell-illiterate person like me) to a highest-priority issue that's hanging right at the top of the list. +There are other similar reports, but they seem to be either related to PowerPC Macs, closed as invalid or due to needinfo inactivity. + +Guess any further discussion belongs there, unless ghc developers will bounce it back. +Thanks a lot for your help, Joey, and for sharing a great thing that git-annex is. +"""]] diff --git a/doc/bugs/git-annex_branch_corruption.mdwn b/doc/bugs/git-annex_branch_corruption.mdwn new file mode 100644 index 0000000000..9c864d85f0 --- /dev/null +++ b/doc/bugs/git-annex_branch_corruption.mdwn @@ -0,0 +1,95 @@ +Below is a test case which shows a way that the git-annex branch +can become corrupted and lose data, including location log records and +uuid.log lines. + +At the end, a commit on the git-annex branch removes one of the 2 lines +from the uuid.log; which should never happen. + +The actual problem occurs earlier, at the "push point". Here a repo is +cloned from the main one, initialized (adding the last uuid.log line), +and then pushed back to the main one. That push is a fast-forward, so is +allowed to directly update the git-annex branch in the main repo: + + b884fe5..c497739 git-annex -> git-annex + +Now the git-annex branch has a change that is not reflected in +`.git/annex/index`, so the next time a change is made, it's committed +using the out of date index, which causes a reversion of the changes +that were pushed to the branch. + +--- + +## Thoughts + +This is essentially the same reason why git blocks pushes to the checked-out +branch of a non-bare repository. + +This problem only affects workflows that involve pushing. Pulling workflows +do not directly update the local git-annex branch, so avoid the problem. + +And while bare repos are pushed to, they rarely have changes made directly +to their git-annex branches, so while I think the same problem could +happen with pushing to a bare repo, it's unlikely. + +None of which is to say this is not a bad bug that needs to be comprehensively +fixed. + +Probably git-annex needs to record which ref of the git-annex branch +corresponds to its index, and if the branch is at a different ref, +merge it into the index. + +> And now that's [[done]]. I managed to do it with very little slowdown. +> +> A side benefit is that users can now safely check out the git-annex +> branch and commit changes to it, and git-annex will notice them. +> Before, it was documented to ignore such changes. +> --[[Joey]] + +--- + +## Workaround + +Users who want to prevent this bug from occuring when pushing to their +non-bare repositories can install this script as `.git/hooks/update` + +
+#!/bin/sh
+if [ "$1" = refs/heads/git-annex ]; then
+	exit 1
+fi
+
+ +--[[Joey]] + +--- + +## Test Case +
+#!/bin/sh
+mkdir annextest
+cd annextest
+
+git init dir1
+cd dir1
+git annex init
+touch foo 
+echo hi > bar
+git annex add
+git commit -m add
+
+cd ..
+git clone dir1 dir2
+cd dir2
+git annex init otherdir
+git annex get
+# push point
+git push
+
+cd ..
+cd dir1
+echo "before"
+git show git-annex:uuid.log
+git annex drop foo --force
+echo "after"
+git show git-annex:uuid.log
+
diff --git a/doc/bugs/git-annex_branch_push_race.mdwn b/doc/bugs/git-annex_branch_push_race.mdwn new file mode 100644 index 0000000000..013ff70dd5 --- /dev/null +++ b/doc/bugs/git-annex_branch_push_race.mdwn @@ -0,0 +1,45 @@ +The fix for the [[git-annex_branch_corruption]] bug is subject to a race. +With that fix, git-annex does this when committing a change to the branch: + +1. lock the journal file (this avoids git-annex racing itself, FWIW) +2. check what the head of the branch points to, to see if a newer branch + has appeared +3. if so, updates the index file from the branch +4. stages changes in the index +5. commits to the branch using the index file + +If a push to the branch comes in during 2-5, then +[[git-annex_branch_corruption]] could still occur. + +--- + +## approach 1, using locking + +Add an update hook and a post-update hook. The update hook +will use locking to ensure that no git-annex is currently running +a commit, and block any git-annex's from starting one. It +will background itself, and remain running during the push. +The post-update hook will signal it to exit. + +I don't like this approach much, since it involves a daemon, two hooks, +and lots of things to go wrong. And it blocks using git-annex during a +push. This approach should be a last resort. + +## approach 2, lockless method + +After a commit is made to the branch, check to see if the parent of +the commit is the same ref that the index file was last updated to. If it's +not, then the race occurred. + +How to recover from the race? Well, just union merging the parent of the +commit into the index file and re-committing should work, I think. When +the race occurs, the commit reverts its parent's changes, and this will +redo them. + +(Of course, this re-commit will also be subject to the race, and +will need the same check for the race as the other commits. It won't loop +forever, I hope.) + +> [[done]] and tested. + +--[[Joey]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx.mdwn b/doc/bugs/git-annex_directory_hashing_problems_on_osx.mdwn new file mode 100644 index 0000000000..db6a35293c --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx.mdwn @@ -0,0 +1,100 @@ +Currently the hashed directories in .git-annex allow for upper and lower case directory names... on linux (or any case sensitive filesystem) the directory names such as 'Gg' and 'GG' are different and unique. However on systems like OSX (and probably windows if it is ever supported) the directory names 'Gg' is the same as 'GG' + +In one of the annex'd repos that I have this has occured... + +
+$ git add -i                                                                                          
+           staged     unstaged path
+  1:    unchanged        +1/-1 .git-annex/GM/GV/WORM-s183630166-m1301072171--somefile.log
+  2:    unchanged        +1/-1 .git-annex/Gm/GV/WORM-s183630166-m1301072171--somefile.log
+
+ + +this has somewhat confused git when it tries to stage/merge files, I didn't notice this at first, but it is definately a problem for someone using case insensitive filesystems like the default OSX HFS+ formats or vfat/fat32. + +> I feel a bit stupid to not have considered case-insensative filesystems. +> They are just so far from where I have lived for 20 years that it's hard +> to keep them in mind. +> +> I guess that +> [[git-annex_has_issues_with_git_when_staging__47__commiting_logs]] is +> somehow a consequence (or cause?) of this, but I don't quite understand +> how this is causing git to fail to stage files, or stage the same file +> twice under different capitalizations. git-annex always will run git add +> on the path with the "correct" capitalization. So unless something else +> has added the path with the other capitalization (perhaps git add +> .git-annex manually?) I don't understand how you get to this state. +> --[[Joey]] + +>> I think I got myself into this situation when I copied some files over from a HFS+ partition to a GPFS network share (which is pretty posix compliant) over samba. It probably is related to the [[git-annex_has_issues_with_git_when_staging__47__commiting_logs]]. I thought they were unique enough to have two bug reports logged as one is a git behavioural thing and the other is git-annex specific. + +>>> If you copied `.git/` over, perhaps you got a git repo without +>>> core.ignorecase set right for the filesystem it landed on? + +>>>> I usually git clone or do a fresh repository and pull things in, I was also unaware of this ignorecase setting as well. + +>>> +>>> Something like this might reproduce it: + +
+# mkdir test; cd test; git init
+# git config core.ignorecase false
+# mkdir Foo
+# touch Foo/bar
+# git add Foo/bar
+# git add foo/bar
+# git add fOo/bar
+# git status
+# touch foo/other
+# git add fOo/other
+# git status
+
+ +>>>> And then either git commit or git clone would probably get confused +>>>> if it thought 3 distinct files had been committed. +>>>> --[[Joey]] + +>>>>> Doing the above test on a HFS+ partition yields this + +
+## with ignorecase=false
+commit bb024c6fd7482b2d10f60ae899cb7a949aca1ad8
+Author: Jimmy Tang 
+Date:   Sun Mar 27 18:40:24 2011 +0100
+
+    commit
+
+diff --git a/Foo/bar b/Foo/bar
+new file mode 100644
+index 0000000..e69de29
+diff --git a/fOo/bar b/fOo/bar
+new file mode 100644
+index 0000000..e69de29
+diff --git a/fOo/other b/fOo/other
+new file mode 100644
+index 0000000..e69de29
+diff --git a/foo/bar b/foo/bar
+new file mode 100644
+index 0000000..e69de29
+
+ +>>>>> and without changing ignorecase + +
+commit 909a089158ffb98f8e91f98905e2bfdc7234666f
+Author: Jimmy Tang 
+Date:   Sun Mar 27 18:46:57 2011 +0100
+
+    commit
+
+diff --git a/Foo/bar b/Foo/bar
+new file mode 100644
+index 0000000..e69de29
+diff --git a/Foo/other b/Foo/other
+new file mode 100644
+index 0000000..e69de29
+
+ +> Closing this bug, as it seems I have dealt with it adequately now. +> [[done]] +> --[[Joey]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_10_f3594de3ba2ab17771a4b116031511bb._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_10_f3594de3ba2ab17771a4b116031511bb._comment new file mode 100644 index 0000000000..c3e6b5e598 --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_10_f3594de3ba2ab17771a4b116031511bb._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 10" + date="2011-04-01T16:11:52Z" + content=""" +No, I don't need a copy of your repo now. +"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_11_97de7252bf5d2a4f1381f4b2b4e24ef8._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_11_97de7252bf5d2a4f1381f4b2b4e24ef8._comment new file mode 100644 index 0000000000..db605f9650 --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_11_97de7252bf5d2a4f1381f4b2b4e24ef8._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 11" + date="2011-04-02T17:53:58Z" + content=""" +I have pushed out a preliminary fix. The old mixed-case directories will be left where they are, and still read from by git-annex. New data will be written to new, lower-case directories. I think that once git stops seeing changes being made +to mixed-case, colliding directories, the bugs you ran into won't manifest any more. + +You will need to find a way to get your git repository out of the state where it complains about uncommitted files (and won't let you commit them). I have not found a reliable way to do that; git reset --hard worked in one case but not in another. May need to clone a fresh git repository. + +Let me know how it works out. +"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_12_f1c53c3058a587185e7a78d84987539d._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_12_f1c53c3058a587185e7a78d84987539d._comment new file mode 100644 index 0000000000..5f9a0ae275 --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_12_f1c53c3058a587185e7a78d84987539d._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 12" + date="2011-04-02T17:58:24Z" + content=""" +Also, you can delete `.git-annex/??` if you want to, then running `git annex fsck --fast` in each of your clones would regenerate the data using only the lower-case hash directories. +"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_13_4f56aea35effe5c10ef37d7ad7adb48c._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_13_4f56aea35effe5c10ef37d7ad7adb48c._comment new file mode 100644 index 0000000000..b4a5a72d01 --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_13_4f56aea35effe5c10ef37d7ad7adb48c._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 13" + date="2011-04-03T07:43:37Z" + content=""" +Ok, thanks for the fix. It seems the fix isn't too reliable with my repos, I get different numbers of \"** No known copies of...\" in the various cloned repos that I have. After all the \"messing\" that I have done to my repos I think git-annex has gotten very confused. I will just leave things as they are and let git-annex slowly migrate over to the new format or re-clone from a linux source and see how things go. I will report back on this issue in abit after I use it more to see. +"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_14_cc2a53c31332fe4b828ef1e72c2a4d49._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_14_cc2a53c31332fe4b828ef1e72c2a4d49._comment new file mode 100644 index 0000000000..b92c3ab4ab --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_14_cc2a53c31332fe4b828ef1e72c2a4d49._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 14" + date="2011-04-03T08:24:17Z" + content=""" +I meant to say in it wasn't reliable when I was following the instructions for \"Comment 12\". I did find that just doing a \"git annex copy -t externalusb .\" then a \"git annex drop .\" from the root of my cloned and \"none trusted\" annexed repos to be more reliable, it just means I temporarily need a load of space to get myself out of my earlier mess. + +On testing this bug fix, I found a minor behavioural issue with [[git annex copy -f REMOTE . doesn't work as expected]] +"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_15_37f1d669c1fa53ee371f781c7bb820ae._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_15_37f1d669c1fa53ee371f781c7bb820ae._comment new file mode 100644 index 0000000000..d722d546a3 --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_15_37f1d669c1fa53ee371f781c7bb820ae._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="gernot" + ip="213.168.117.192" + subject="comment 15" + date="2011-04-03T15:41:00Z" + content=""" +I also ran into problems on a case-insensitive HFS+ file system, it seems. I +tried following the instructions in comment 12: + + 1. Remove everything in .git-annex besides uuid.log and trust.log + 2. git annex fsck --fast + 3. Commit + +However, I still see upper and lower case directories in .git-annex. Did I +misunderstand that they should all be lower case now? + +"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_16_8a4ab1af59098f4950726cf53636c2b3._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_16_8a4ab1af59098f4950726cf53636c2b3._comment new file mode 100644 index 0000000000..97eab78c91 --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_16_8a4ab1af59098f4950726cf53636c2b3._comment @@ -0,0 +1,22 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 16" + date="2011-04-03T16:02:33Z" + content=""" +I think the correct steps should be, make a backup first :) then ... + +1. git pull # update your clone, and commit everything so you don't lose anything +2. git annex fsck --fast # check the repo first, just in case +3. rm -rf .git-annex/?? # remove the old metadata +4. git annex fsck --fast # get git annex to regenerate it all +5. push your changes out to your other repos, you will need to make sure git-annex is updated everywhere if there are remotes in your setup. + +I eventually migrated all of my own annex'd repos and I no longer have the old hashed directories but the new ones in the form + + .git/annex/aaa/bbb/foo.log + +I did lose some tracking information but not data (as far as I can see for now), but that was quickly fixed by pushing and pulling to my bare repo which tracks most of my data. + +I also found that it worked a bit more reliably for me on the copies of repos that were located on case sensitive filesystems, but I guess that was expected. +"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_17_515d5c5fbf5bd0c188a4f1e936d913e2._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_17_515d5c5fbf5bd0c188a4f1e936d913e2._comment new file mode 100644 index 0000000000..f7feac67cf --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_17_515d5c5fbf5bd0c188a4f1e936d913e2._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 17" + date="2011-04-03T16:53:51Z" + content=""" +@gernot step 0 is to upgrade git-annex to current git, on all systems where you use it, in case that wasn't clear. + +"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_18_db64c91dd1322a0ab168190686db494f._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_18_db64c91dd1322a0ab168190686db494f._comment new file mode 100644 index 0000000000..550558ec16 --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_18_db64c91dd1322a0ab168190686db494f._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="gernot" + ip="213.168.117.192" + subject="comment 18" + date="2011-04-03T19:46:16Z" + content=""" +Joey, sorry, I got it wrong. I thought upgrading git didn't help and you +adjusted things in git-annex instead. + +Anyway, can I get around upgrading on all hosts by reformatting the drive to +case-sensitive HFS+? Or will I have to upgrade git (currently version 1.7.2.5) +eventually anyway? + +"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_19_ff555c271637af065203ca99c9eeaf89._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_19_ff555c271637af065203ca99c9eeaf89._comment new file mode 100644 index 0000000000..2676b35897 --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_19_ff555c271637af065203ca99c9eeaf89._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 19" + date="2011-04-03T19:53:44Z" + content=""" +Git does not need to be upgraded. Git-annex needs to be upgraded to git rev 616e6f8a840ef4d99632d12a2e7ea15c3cfb1805 or newer, on all machines. +"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_1_9a7b09de132097100c1a68ea7b846727._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_1_9a7b09de132097100c1a68ea7b846727._comment new file mode 100644 index 0000000000..aa5e46ca2b --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_1_9a7b09de132097100c1a68ea7b846727._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 1" + date="2011-03-28T07:23:41Z" + content=""" +One possible work around is to just create a loopback file system with a case sensitive filesystem. I think I might do that for anything that I really care about for now. +"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_20_7e328b970169fffb8bce373d1522743b._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_20_7e328b970169fffb8bce373d1522743b._comment new file mode 100644 index 0000000000..8f0f5ef180 --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_20_7e328b970169fffb8bce373d1522743b._comment @@ -0,0 +1,19 @@ +[[!comment format=mdwn + username="ssqq" + ip="208.70.196.4" + subject="Still a problem on 0.20110523" + date="2011-06-02T20:31:55Z" + content=""" +Hi, + +(I'm new to git and git annex, so please forgive any mistakes I make...) + +My repo is messed up right now. The fact that I copied the repo with rsync -a back and forth from a case insensitive filesystem to a case sensitive one, probably didn't help. + +I believe the annexed files in .git/annex/objects/ are still using a mixed case directory hashing scheme. That's the problem I'm having. The symlinks point to the wrong case and are now broken. I don't think the latest versions of git-annex changed that (it only changed the hashing under .git-annex, right?). + +Even if I clean up my repo, I think I'm still going to have a problem because I have one repo on an OS X case insensitive filesystem and my other repos on case sensitive Linux filesystems. Potentially the directory name under .git/annex/objects will have a different case. Then the symlink might have a different case than my Linux FS. Does git-annex track changes in git by the contents of the symlink? In which case the case difference would show up as a change even though there is no change? + +Is it possible to change the directory hashing scheme under .git/annex/objects to use lowercase names? + +"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_21_98f632652b0db9131b0173d3572f4d62._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_21_98f632652b0db9131b0173d3572f4d62._comment new file mode 100644 index 0000000000..453a8be11b --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_21_98f632652b0db9131b0173d3572f4d62._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 21" + date="2011-06-10T16:46:03Z" + content=""" +@seqq git-annex always uses the same case when creating and accessing the files pointed to by the symlinks. So it will not matter if it's used on a case-insensative, or case-insensative but preserving system like OSX. + +You need to fix up the cases of the files in .git/annex/objects to what it expects. I'm not sure what would be the best way to do that. The method described in [[walkthrough/recover_data_from_lost+found]] might work well. +"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_2_174952fc3e3be12912e5fcfe78f2dd13._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_2_174952fc3e3be12912e5fcfe78f2dd13._comment new file mode 100644 index 0000000000..6e6e5dc6be --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_2_174952fc3e3be12912e5fcfe78f2dd13._comment @@ -0,0 +1,185 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 2" + date="2011-03-28T15:09:45Z" + content=""" +I think I know how I got myself into this mess... I was on my mac workstation and I had just pulled in a change set from another repo on a linux workstation after I had a made a bunch of moves. here's a bit of a log of what happened... + + +
+jtang@x00:~/sources $ git pull cports-devel master
+Warning: untrusted X11 forwarding setup failed: xauth key data not generated
+Warning: No xauth data; using fake authentication data for X11 forwarding.
+remote: Counting objects: 4195, done.
+remote: Compressing objects: 100% (1135/1135), done.
+remote: Total 2582 (delta 866), reused 2576 (delta 860)
+Receiving objects: 100% (2582/2582), 229.42 KiB | 111 KiB/s, done.
+Resolving deltas: 100% (866/866), completed with 9 local objects.
+From cports-devel:/home/people/jtang/sources
+ * branch            master     -> FETCH_HEAD
+Updating 319df99..ab0a98c
+error: Your local changes to the following files would be overwritten by merge:
+	.git-annex/09/5X/WORM-s361516678-m1301310614--l_fcompxe_intel64_2011.2.137.tgz.log
+	.git-annex/43/2g/WORM-s19509673-m1301310496--l_fcompxe_2011.2.137_redist.tgz.log
+	.git-annex/4J/qF/WORM-s18891115-m1301310934--w_flm_p_1.0.011_ia64.zip.log
+	.git-annex/87/w1/WORM-s12212473-m1301310909--w_flm_p_1.0.011_ia32.zip.log
+	.git-annex/99/Jq/WORM-s194345957-m1301310926--l_mkl_10.3.2.137_ia32.log
+	.git-annex/99/kf/WORM-s9784531-m1301311680--l_ccompxe_2011.2.137_redist.log
+	.git-annex/FF/f3/WORM-s93033394-m1301311706--l_gen_ipp_7.0.2.137.log
+	.git-annex/MF/xZ/WORM-s515140733-m1301310936--l_cprof_p_11.1.075.log
+	.git-annex/XW/X8/WORM-s355559731-m1301310797--l_mkl_10.3.2.137.log
+	.git-annex/fJ/mZ/WORM-s1372886477-m1301313368--l_cproc_p_11.1.075.log
+	.git-annex/j7/Q9/WORM-s44423202-m1301310622--l_cprof_p_11.1.075_redist.log
+	.git-annex/k4/K7/WORM-s239539070-m1301310760--l_mkl_10.3.2.137_intel64.log
+	.git-annex/kz/01/WORM-s279573314-m1301310783--l_cprof_p_11.1.075_ia32.log
+	.git-annex/p6/Kq/WORM-s31199343-m1301311829--l_cproc_p_11.1.075_redist.log
+	.git-annex/pz/J5/WORM-s626995277-m1301312301--l_ccompxe_ia32_2011.2.137.log
+	.git-annex/v3/kX/WORM-s339693045-m1301310851--l_cprof_p_11.1.075_intel64.log
+Please, commit your changes or stash them before you can merge.
+error: Your local changes to the following files would be overwritten by merge:
+	.git-annex/12/3W/WORM-s3058814-m1276699694--Botan-1.8.9.tgz.log
+	.git-annex/1G/qV/WORM-s9122-m1251558854--Array-Compare-2.01.tar.gz.log
+	.git-annex/3W/W5/WORM-s231523-m1270740744--DBD-Pg-2.17.1.tar.gz.log
+	.git-annex/3x/PX/WORM-s380310-m1293025187--HTSeq-0.4.7.tar.gz.log
+	.git-annex/45/gk/WORM-s67337-m1248732018--ExtUtils-Install-1.54.tar.gz.log
+	.git-annex/4J/7Q/WORM-s8608-m1224694862--Algorithm-Munkres-0.08.tar.gz.log
+	.git-annex/4g/XQ/WORM-s89208-m1278682033--HTML-Parser-3.66.tar.gz.log
+	.git-annex/54/jw/WORM-s300163-m1226422051--AcePerl-1.92.tar.gz.log
+	.git-annex/63/kj/WORM-s1213460-m1262942058--DBD-SQLite-1.29.tar.gz.log
+	.git-annex/6Z/42/WORM-s4074-m943766010--File-Sync-0.09.tar.gz.log
+	.git-annex/8F/M5/WORM-s6989-m1263161127--Digest-HMAC-1.02.tar.gz.log
+	.git-annex/G2/FK/WORM-s3309-m1163872981--Bundle-BioPerl-2.1.8.tar.gz.log
+	.git-annex/Gk/XF/WORM-s23572243-m1279546902--EMBOSS-6.3.1.tar.gz.log
+	.git-annex/Jk/X6/WORM-s566429-m1279309002--DBI-1.612.tar.gz.log
+	.git-annex/K6/fV/WORM-s1561451-m1240055295--Convert-Binary-C-0.74.tar.gz.log
+	.git-annex/KM/4q/WORM-s146959-m1268515086--Graph-0.94.tar.gz.log
+	.git-annex/MF/m2/WORM-s425766-m1212514609--Data-Stag-0.11.tar.gz.log
+	.git-annex/QJ/P6/WORM-s1045868-m1282215033--9base-6.tar.gz.log
+	.git-annex/Qm/WG/WORM-s39078-m1278163547--Digest-SHA1-2.13.tar.gz.log
+	.git-annex/Wq/Fj/WORM-s45680640-m1297862101--BclConverter-1.7.1.tar.log
+	.git-annex/Wq/Wm/WORM-s263536640-m1295025537--CASAVA_v1.7.0.tar.log
+	.git-annex/XW/qm/WORM-s36609-m1276050470--Bio-ASN1-EntrezGene-1.10-withoutworldwriteables.tar.gz.log
+	.git-annex/f7/g0/WORM-s40872-m1278273227--ExtUtils-ParseXS-2.2206.tar.gz.log
+	.git-annex/j3/JF/WORM-s11753-m1232427595--Clone-0.31.tar.gz.log
+	.git-annex/kX/9g/WORM-s84690-m1229117599--GraphViz-2.04.tar.gz.log
+	.git-annex/km/z5/WORM-s44634-m1275505134--Authen-SASL-2.15.tar.gz.log
+	.git-annex/kw/J3/WORM-s132396-m1278780649--DBD-mysql-4.016.tar.gz.log
+	.git-annex/p5/1P/WORM-s53736-m1278673485--Archive-Tar-1.64.tar.gz.log
+	.git-annex/wv/zG/WORM-s30584-m1268774021--ExtUtils-CBuilder-0.2703.tar.gz.log
+	.git-annex/x5/7v/WORM-s10462526-m1254242591--BioPerl-1.6.1.tar.gz.log
+Please, commit your changes or stash them before you can merge.
+error: The following untracked working tree files would be overwritten by merge:
+	.git-annex/1g/X3/WORM-s309910751-m1301311322--l_fcompxe_ia32_2011.2.137.tgz.log
+	.git-annex/3w/Xf/WORM-s805764902-m1301312756--l_cproc_p_11.1.075_intel64.log
+	.git-annex/9Q/Wz/WORM-s1234430253-m1301311891--l_ccompxe_2011.2.137.log
+	.git-annex/FQ/4z/WORM-s318168323-m1301310848--l_cprof_p_11.1.075_ia64.log
+	.git-annex/FV/0P/WORM-s710135470-m1301311835--l_ccompxe_intel64_2011.2.137.log
+	.git-annex/Jx/qM/WORM-s599386592-m1301310731--l_fcompxe_2011.2.137.tgz.log
+	.git-annex/KX/w1/WORM-s35976002-m1301312193--l_tbb_3.0.6.174.log
+	.git-annex/Vw/jK/WORM-s15795178-m1301310913--w_flm_p_1.0.011_intel64.zip.log
+	.git-annex/jK/zK/WORM-s374617670-m1301312705--l_ipp_7.0.2.137_intel64.log
+	.git-annex/vK/kv/WORM-s584342291-m1301312669--l_cproc_p_11.1.075_ia64.log
+	.git-annex/vw/v1/WORM-s736986678-m1301312794--l_cproc_p_11.1.075_ia32.log
+	.git-annex/zq/7X/WORM-s343075585-m1301312233--l_ipp_7.0.2.137_ia32.log
+Please move or remove them before you can merge.
+Aborting
+1|jtang@x00:~/sources $ git status
+# On branch master
+# Your branch is ahead of 'origin/master' by 2 commits.
+#
+# Changes to be committed:
+#   (use \"git reset HEAD ...\" to unstage)
+#
+#	modified:   .git-annex/09/5X/WORM-s361516678-m1301310614--l_fcompxe_intel64_2011.2.137.tgz.log
+#	modified:   .git-annex/43/2g/WORM-s19509673-m1301310496--l_fcompxe_2011.2.137_redist.tgz.log
+#	modified:   .git-annex/4J/qF/WORM-s18891115-m1301310934--w_flm_p_1.0.011_ia64.zip.log
+#	modified:   .git-annex/87/w1/WORM-s12212473-m1301310909--w_flm_p_1.0.011_ia32.zip.log
+#	modified:   .git-annex/99/Jq/WORM-s194345957-m1301310926--l_mkl_10.3.2.137_ia32.log
+#	modified:   .git-annex/99/kf/WORM-s9784531-m1301311680--l_ccompxe_2011.2.137_redist.log
+#	modified:   .git-annex/FF/f3/WORM-s93033394-m1301311706--l_gen_ipp_7.0.2.137.log
+#	modified:   .git-annex/MF/xZ/WORM-s515140733-m1301310936--l_cprof_p_11.1.075.log
+#	modified:   .git-annex/XW/X8/WORM-s355559731-m1301310797--l_mkl_10.3.2.137.log
+#	modified:   .git-annex/fJ/mZ/WORM-s1372886477-m1301313368--l_cproc_p_11.1.075.log
+#	modified:   .git-annex/j7/Q9/WORM-s44423202-m1301310622--l_cprof_p_11.1.075_redist.log
+#	modified:   .git-annex/k4/K7/WORM-s239539070-m1301310760--l_mkl_10.3.2.137_intel64.log
+#	modified:   .git-annex/kz/01/WORM-s279573314-m1301310783--l_cprof_p_11.1.075_ia32.log
+#	modified:   .git-annex/p6/Kq/WORM-s31199343-m1301311829--l_cproc_p_11.1.075_redist.log
+#	modified:   .git-annex/pz/J5/WORM-s626995277-m1301312301--l_ccompxe_ia32_2011.2.137.log
+#	modified:   .git-annex/v3/kX/WORM-s339693045-m1301310851--l_cprof_p_11.1.075_intel64.log
+#
+# Changes not staged for commit:
+#   (use \"git add ...\" to update what will be committed)
+#   (use \"git checkout -- ...\" to discard changes in working directory)
+#
+#	modified:   .git-annex/12/3W/WORM-s3058814-m1276699694--Botan-1.8.9.tgz.log
+#	modified:   .git-annex/1G/qV/WORM-s9122-m1251558854--Array-Compare-2.01.tar.gz.log
+#	modified:   .git-annex/3W/W5/WORM-s231523-m1270740744--DBD-Pg-2.17.1.tar.gz.log
+#	modified:   .git-annex/3x/PX/WORM-s380310-m1293025187--HTSeq-0.4.7.tar.gz.log
+#	modified:   .git-annex/45/gk/WORM-s67337-m1248732018--ExtUtils-Install-1.54.tar.gz.log
+#	modified:   .git-annex/4J/7Q/WORM-s8608-m1224694862--Algorithm-Munkres-0.08.tar.gz.log
+#	modified:   .git-annex/4g/XQ/WORM-s89208-m1278682033--HTML-Parser-3.66.tar.gz.log
+#	modified:   .git-annex/54/jw/WORM-s300163-m1226422051--AcePerl-1.92.tar.gz.log
+#	modified:   .git-annex/63/kj/WORM-s1213460-m1262942058--DBD-SQLite-1.29.tar.gz.log
+#	modified:   .git-annex/6Z/42/WORM-s4074-m943766010--File-Sync-0.09.tar.gz.log
+#	modified:   .git-annex/8F/M5/WORM-s6989-m1263161127--Digest-HMAC-1.02.tar.gz.log
+#	modified:   .git-annex/G2/FK/WORM-s3309-m1163872981--Bundle-BioPerl-2.1.8.tar.gz.log
+#	modified:   .git-annex/Gk/XF/WORM-s23572243-m1279546902--EMBOSS-6.3.1.tar.gz.log
+#	modified:   .git-annex/Jk/X6/WORM-s566429-m1279309002--DBI-1.612.tar.gz.log
+#	modified:   .git-annex/K6/fV/WORM-s1561451-m1240055295--Convert-Binary-C-0.74.tar.gz.log
+#	modified:   .git-annex/KM/4q/WORM-s146959-m1268515086--Graph-0.94.tar.gz.log
+#	modified:   .git-annex/MF/m2/WORM-s425766-m1212514609--Data-Stag-0.11.tar.gz.log
+#	modified:   .git-annex/QJ/P6/WORM-s1045868-m1282215033--9base-6.tar.gz.log
+#	modified:   .git-annex/Qm/WG/WORM-s39078-m1278163547--Digest-SHA1-2.13.tar.gz.log
+#	modified:   .git-annex/Wq/Fj/WORM-s45680640-m1297862101--BclConverter-1.7.1.tar.log
+#	modified:   .git-annex/Wq/Wm/WORM-s263536640-m1295025537--CASAVA_v1.7.0.tar.log
+#	modified:   .git-annex/XW/qm/WORM-s36609-m1276050470--Bio-ASN1-EntrezGene-1.10-withoutworldwriteables.tar.gz.log
+#	modified:   .git-annex/Zq/7X/WORM-s343075585-m1301312233--l_ipp_7.0.2.137_ia32.log
+#	modified:   .git-annex/f7/g0/WORM-s40872-m1278273227--ExtUtils-ParseXS-2.2206.tar.gz.log
+#	modified:   .git-annex/j3/JF/WORM-s11753-m1232427595--Clone-0.31.tar.gz.log
+#	modified:   .git-annex/kX/9g/WORM-s84690-m1229117599--GraphViz-2.04.tar.gz.log
+#	modified:   .git-annex/km/z5/WORM-s44634-m1275505134--Authen-SASL-2.15.tar.gz.log
+#	modified:   .git-annex/kw/J3/WORM-s132396-m1278780649--DBD-mysql-4.016.tar.gz.log
+#	modified:   .git-annex/p5/1P/WORM-s53736-m1278673485--Archive-Tar-1.64.tar.gz.log
+#	modified:   .git-annex/wv/zG/WORM-s30584-m1268774021--ExtUtils-CBuilder-0.2703.tar.gz.log
+#	modified:   .git-annex/x5/7v/WORM-s10462526-m1254242591--BioPerl-1.6.1.tar.gz.log
+#
+# Untracked files:
+#   (use \"git add ...\" to include in what will be committed)
+#
+#	.git-annex/1G/X3/
+#	.git-annex/3W/Xf/
+#	.git-annex/9q/Wz/
+#	.git-annex/Fq/4z/
+#	.git-annex/Jk/zK/
+#	.git-annex/Kx/w1/
+#	.git-annex/VK/kv/
+#	.git-annex/fv/0P/
+#	.git-annex/jX/qM/
+#	.git-annex/vW/jK/
+#	.git-annex/vW/v1/
+jtang@x00:~/sources $ git commit -a -m \"snap\"
+[master 45f254a] snap
+ 47 files changed, 64 insertions(+), 30 deletions(-)
+jtang@x00:~/sources $ git status
+# On branch master
+# Your branch is ahead of 'origin/master' by 3 commits.
+#
+# Untracked files:
+#   (use \"git add ...\" to include in what will be committed)
+#
+#	.git-annex/1G/X3/
+#	.git-annex/3W/Xf/
+#	.git-annex/9q/Wz/
+#	.git-annex/Fq/4z/
+#	.git-annex/Jk/zK/
+#	.git-annex/Kx/w1/
+#	.git-annex/VK/kv/
+#	.git-annex/fv/0P/
+#	.git-annex/jX/qM/
+#	.git-annex/vW/jK/
+#	.git-annex/vW/v1/
+nothing added to commit but untracked files present (use \"git add\" to track)
+jtang@x00:~/sources $ git pull
+
+"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_3_a18ada7ac74c63be5753fdb2fe68dae5._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_3_a18ada7ac74c63be5753fdb2fe68dae5._comment new file mode 100644 index 0000000000..00988ab58c --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_3_a18ada7ac74c63be5753fdb2fe68dae5._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-03-28T15:25:18Z" + content=""" +So, there is evidence here of a circumstance caused by the [[other_bug|git-annex_has_issues_with_git_when_staging__47__commiting_logs]], as I suspected. + +I don't think that manual `git commit -a` caused the problem. I suspect it was a subsequent `git add` that caused git to follow the wrong case paths and add the files in the wrong place. Ie, when you run \"git add .git-annex\", it recurses into `.git-annex/Gm/`, and adds files using that case, that were previously added from `.git-annex/GM/`. + +For completeness, can you verify this repo's core.ignorecase setting? + +--- + +I hate that you are stuck using loop filesystems to work around this bug. If my guess is correct, you don't need to, as long as you avoid manually running \"git add .git-annex\". I take this bug seriously. While I'm currently very involved in adding Amazon S3 support to git-annex (which will take days more of solid work), I do plan to make a loop filesystem of my own, probably vfat, so I can try and reproduce this on a case-insensative filesystem. If you could confirm my above hypothesis, that would speed things up for me. + +It's possible I will have to tweak the hash directories. Hopefully if so, I will only tweak them for *new* keys; if I had to do a v3 backend just to fix this stupid thing, I'd be sad -- upgrading all my offline disks from v1 to v2 took me many days. +"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_4_039e945617a6c1852c96974a402db29c._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_4_039e945617a6c1852c96974a402db29c._comment new file mode 100644 index 0000000000..d045f71205 --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_4_039e945617a6c1852c96974a402db29c._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 4" + date="2011-03-28T15:41:56Z" + content=""" +In my \"sources\" repo on x00, the current setting is this \"ignorecase = true\" it was the first repo that I created before I clone it elsewhere and pull my changes back, it is on a HFS+ partition which is case insensitive and it is replicated on a portable hdd with a bare repo on a exfat partition. I wonder if my portable disk has a partially borked repo :P +"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_5_eacd0b18475c05ab9feed8cf7290b79a._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_5_eacd0b18475c05ab9feed8cf7290b79a._comment new file mode 100644 index 0000000000..7127a6eef8 --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_5_eacd0b18475c05ab9feed8cf7290b79a._comment @@ -0,0 +1,37 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 5" + date="2011-03-28T15:51:11Z" + content=""" +I also failed to mention, that in the case when i have stray log files after what has happened in comment 2, I get this left over after a commit when git is confused... + + +
+jtang@x00:~/sources $ git status
+# On branch master
+# Your branch is ahead of 'origin/master' by 1 commit.
+#
+# Changes not staged for commit:
+#   (use \"git add ...\" to update what will be committed)
+#   (use \"git checkout -- ...\" to discard changes in working directory)
+#
+#	modified:   .git-annex/1G/X3/WORM-s309910751-m1301311322--l_fcompxe_ia32_2011.2.137.tgz.log
+#	modified:   .git-annex/3W/Xf/WORM-s805764902-m1301312756--l_cproc_p_11.1.075_intel64.log
+#	modified:   .git-annex/9Q/Wz/WORM-s1234430253-m1301311891--l_ccompxe_2011.2.137.log
+#	modified:   .git-annex/FQ/4z/WORM-s318168323-m1301310848--l_cprof_p_11.1.075_ia64.log
+#	modified:   .git-annex/FV/0P/WORM-s710135470-m1301311835--l_ccompxe_intel64_2011.2.137.log
+#	modified:   .git-annex/Jk/zK/WORM-s374617670-m1301312705--l_ipp_7.0.2.137_intel64.log
+#	modified:   .git-annex/Jx/qM/WORM-s599386592-m1301310731--l_fcompxe_2011.2.137.tgz.log
+#	modified:   .git-annex/KX/w1/WORM-s35976002-m1301312193--l_tbb_3.0.6.174.log
+#	modified:   .git-annex/VK/kv/WORM-s584342291-m1301312669--l_cproc_p_11.1.075_ia64.log
+#	modified:   .git-annex/Vw/jK/WORM-s15795178-m1301310913--w_flm_p_1.0.011_intel64.zip.log
+#	modified:   .git-annex/Zq/7X/WORM-s343075585-m1301312233--l_ipp_7.0.2.137_ia32.log
+#	modified:   .git-annex/vW/v1/WORM-s736986678-m1301312794--l_cproc_p_11.1.075_ia32.log
+#
+no changes added to commit (use \"git add\" and/or \"git commit -a\")
+
+ + +Up until now I have just been updating the status of the staged files by hand and commiting it on my mac x00, this probably isn't helping. I'd rather not lose the tracking information. +"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_6_e55117cb628dc532e468519252571474._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_6_e55117cb628dc532e468519252571474._comment new file mode 100644 index 0000000000..aae020972c --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_6_e55117cb628dc532e468519252571474._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 6" + date="2011-03-31T18:02:42Z" + content=""" +Alright, I have created a case-insensative HFS+ filesystem here on my linux laptop. + +I have not been able to trick git into staging the same file with 2 different capitalizations yet. + +It might be helpful if you can send me a copy of a git repository where 'git add -i' shows the same file staged with two capitalizations. Leaving out .git/annex of course. (joey@kitenet.net; a tarball would probably work) + +It seems that `git add` only started properly working on case insensative filesystems quite recently. The commit in question is 5e738ae820ec53c45895b029baa3a1f63e654b1b, \"Support case folding for git add when core.ignorecase=true\", which was first released in git 1.7.4, January 30, 2011. If you don't yet have that version, that could explain the problem entirely. In about half an hour (dialup!) I will have downloaded an older git and will see if I can reproduce the problem with it. +"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_7_0f4f471102e394ebb01da40e4d0fd9f6._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_7_0f4f471102e394ebb01da40e4d0fd9f6._comment new file mode 100644 index 0000000000..c3aee6c579 --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_7_0f4f471102e394ebb01da40e4d0fd9f6._comment @@ -0,0 +1,68 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 7" + date="2011-03-31T19:08:01Z" + content=""" +git 1.7.4 does not make things better. With it, if I add first \"X/foo\" and then \"x/bar\", it commits \"X/bar\". + +That will *certianly* cause problems when interoperating with a repo clone on a case-sensative filesystem, since +git-annex there will not see the location log that git committed to the wrong case directory. + +It's possible there is some interoperability problem when pulling from linux like you did, onto HFS+, too. I am not quite sure. Ah, I did find one.. if I clone the repo with \"X/foo\" in it to a case-sensative filesystem, and add a \"x/foo\" there, +and pull that commit back to HFS+, git says: + +
+ * branch            master     -> FETCH_HEAD
+Updating 8754149..e3d4640
+Fast-forward
+ x/foo |    1 +
+ 1 files changed, 1 insertions(+), 0 deletions(-)
+ create mode 100644 x/foo
+joey@gnu:/mnt/r4>ls
+X/
+joey@gnu:/mnt/r4>git st
+# On branch master
+# Changes not staged for commit:
+#   (use \"git add ...\" to update what will be committed)
+#   (use \"git checkout -- ...\" to discard changes in working directory
+
+#	modified:   X/foo
+
+ +Aha -- that lets me reproduce your problem with the same file being staged twice with different capitalizations, too: + +
+joey@gnu:/mnt/r4>echo haaai >| x/foo
+joey@gnu:/mnt/r4>git st
+# On branch master
+# Changes not staged for commit:
+#   (use \"git add ...\" to update what will be committed)
+#   (use \"git checkout -- ...\" to discard changes in working directory)
+#
+#	modified:   X/bar
+#	modified:   X/foo
+#	modified:   x/foo
+#
+joey@gnu:/mnt/r4>git commit -a
+fatal: Will not add file alias 'X/Bar' ('x/Bar' already exists in index)
+
+ +And modified files that git refuses to commit, which entirely explains [[git-annex_has_issues_with_git_when_staging__47__commiting_logs]]. + +
+joey@gnu:/mnt/r4>git add X/foo
+joey@gnu:/mnt/r4>git commit X/foo
+# On branch master
+# Changes not staged for commit:
+#   (use \"git add ...\" to update what will be committed)
+#   (use \"git checkout -- ...\" to discard changes in working directory)
+#
+#	modified:   X/bar
+#	modified:   X/foo
+#
+no changes added to commit (use \"git add\" and/or \"git commit -a\")
+
+ +I think git is frankly, buggy. It seems I will need to work around this by stopping using mixed case hashing for location logs. +"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_8_68e2d6ccdb9622b879e4bc7005804623._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_8_68e2d6ccdb9622b879e4bc7005804623._comment new file mode 100644 index 0000000000..05fe4658df --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_8_68e2d6ccdb9622b879e4bc7005804623._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 8" + date="2011-03-31T19:28:02Z" + content=""" +I've posted about this on the git mailing list. It's possible that these bugs, which can be shown to affect things other than just git-annex, will be fixed in git. + +I will wait a while to see. But am considering making git-annex use all-lowercase hash dirs for the log files. Maybe it could first look for .git-annex/aaaa/bbbb/foo.log, but also look for, read, and merge in any info from +.git-annex/Aa/Bb/foo.log. And always write to the new style filenames. This would avoid confusing git with changes to +mixed-case files, and avoid another massive transition. +"""]] diff --git a/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_9_45b11ddd200261115b653c7a14d28aa9._comment b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_9_45b11ddd200261115b653c7a14d28aa9._comment new file mode 100644 index 0000000000..8dfe746420 --- /dev/null +++ b/doc/bugs/git-annex_directory_hashing_problems_on_osx/comment_9_45b11ddd200261115b653c7a14d28aa9._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 9" + date="2011-03-31T21:32:10Z" + content=""" +I'm was running git 1.7.4.1 at the time when I came across it, I have just upgraded to 1.7.4.2. I've also just moved to using a loopback fs for the stuff i care about. Do you still want a repo that exhibits the problem (excluding the .git/annex data) ??? I'm also not sure if 1.7.4.2 has corrected the problem yet as I haven't done much with my repos since. I suspect just making all the .git-annex hashed directories seems to be lower case might be better in the long run. +"""]] diff --git a/doc/bugs/git-annex_has_issues_with_git_when_staging__47__commiting_logs.mdwn b/doc/bugs/git-annex_has_issues_with_git_when_staging__47__commiting_logs.mdwn new file mode 100644 index 0000000000..ed629c4240 --- /dev/null +++ b/doc/bugs/git-annex_has_issues_with_git_when_staging__47__commiting_logs.mdwn @@ -0,0 +1,34 @@ +After a series of pretty convoluted copying files around between annex'd repos and pulling changes around between repos. I noticed that occassionally when git-annex tries to stage files (the `.git-annex/*/*/*logs`) git some times gets wedged and doing a "git commit -a" doesn't seem to work or files might not get added thus leaving a bunch of untracked files or modified files that aren't staged for a commit. + +I tried running a *`git rm --cached -f -r *`* then *git add -u .git-annex/* or the usual *git add* then a commit fixes things for me. If I don't do that then my subsequent merges/pulls will fail and result in *no known copies of files* I suspect git-annex might have just touched some file modes and git picked up the changes but got confused since there was no content change. It might also just be a git on OSX thing and it doesn't affect linux/bsd users. + +For now it's just a bit of extra work for me when it does occur but it does not seem to occur often. + +> What do you mean when you say that git "got wedged"? It hung somehow? +> +> If git-annex runs concurrently with another git command that locks +> the repository, its git add of log files can fail. +> +> Update: Also, of course, if you are running a "got annex get" or +> similar, and ctrl-c it after it has gotten some files, it can +> end up with unstaged or in some cases un-added log files that git-annex +> wrote -- since git-annex only stages log files in git on shutdown, and +> ctrl-c bypasses that. +> --[[Joey]] + +>> It "got wedged" as in git doesn't let me commit anything, even though it tells me that there is stuff to be committed in the staging area. + +>>> I've never seen git refuse to commit staged files. There would have to +>>> be some error message? --[[Joey]] + +>>>> there were no error messages at all + +>>>>> Can I see a transcript? I'm having difficulty getting my head around +>>>>> what git is doing. Sounds like the files could just not be `git +>>>>> added` yet, but I get the impression from other things that you say +>>>>> that it's not so simple. --[[Joey]] + +This turns out to be a bug in git, and I have posted a bug report on the mailing list. +The git-annex behavior that causes this situation is being handled as +another bug, [[git-annex directory hashing problems on osx]]. +So, closing this bug report. [[done]] --[[Joey]] diff --git a/doc/bugs/git-annex_incorrectly_parses_bare_IPv6_addresses.mdwn b/doc/bugs/git-annex_incorrectly_parses_bare_IPv6_addresses.mdwn new file mode 100644 index 0000000000..c94952b490 --- /dev/null +++ b/doc/bugs/git-annex_incorrectly_parses_bare_IPv6_addresses.mdwn @@ -0,0 +1,59 @@ +I have a git remote in a git-annex-enabled repository. Here's what it looks like in .git/config: + +
+[remote "renaissance"]
+        url = ssh://[2001:0:53aa:64c:24ef:5ce4:2ef9:cdda]/home/paulproteus/Music/annex/
+        fetch = +refs/heads/*:refs/remotes/renaissance/*
+        annex-uuid = 2992752e-1a13-11e0-ba68-57d3c800da64
+
+ +I wanted to "git annex get" some data. git-annex appears to pass incorrectly-formatted IPv6 addresses to rsync: + +
+get primary/emusiq/Arab Strap/Monday At The Hug And Pint/01-The Shy Retirer.mp3 (copying from renaissance...) 
+ssh: Could not resolve hostname [2001:0:53aa:64c:24ef:5ce4:2ef9:cdda]: Name or service not known
+rsync: connection unexpectedly closed (0 bytes received so far) [Receiver]
+rsync error: unexplained error (code 255) at io.c(601) [Receiver=3.0.7]
+
+  rsync failed -- run git annex again to resume file transfer
+  Unable to access these remotes: renaissance
+  Try making some of these repositories available:
+  	2992752e-1a13-11e0-ba68-57d3c800da64
+failed
+
+ +In this case, the square brackets should not be there. + +I tried changing the .git/config syntax slightly, and got a different, also-incorrect behavior: + +
+[remote "renaissance"]
+        url = [2001:0:53aa:64c:24ef:5ce4:2ef9:cdda]:/home/paulproteus/Music/annex/
+        fetch = +refs/heads/*:refs/remotes/renaissance/*
+        annex-uuid = 2992752e-1a13-11e0-ba68-57d3c800da64
+
+ +
+paulproteus@pathi:~/Music/annex$ git annex get
+git-annex: bad url ssh://[2001/~/0:53aa:64c:24ef:5ce4:2ef9:cdda]:/home/paulproteus/Music/annex/
+
+ +(Note that both these .git/config entries work fine with "git fetch".) + +-- Asheesh. + +> Technically, this seems to be a bug in the haskell URI library; it honors +> the `[]` in parsing, but does not remove them when the URI is queried for +> the host part. + +
+Prelude Network.URI> let (Just u) = parseURI "http://foo@[2001:0:53aa:64c:24ef:5ce4:2ef9:cdda]/bar"
+Prelude Network.URI> let (Just a) = uriAuthority u
+Prelude Network.URI> uriRegName a
+"[2001:0:53aa:64c:24ef:5ce4:2ef9:cdda]"
+Prelude Network.URI> isIPv6address $ uriRegName a
+False
+
+ +> I have filed a [bug upstream](http://trac.haskell.org/network/ticket/40), and put a workaround in git-annex. [[done]] +> --[[Joey]] diff --git a/doc/bugs/git-annex_losing_rsync_remotes_with_encryption_enabled.mdwn b/doc/bugs/git-annex_losing_rsync_remotes_with_encryption_enabled.mdwn new file mode 100644 index 0000000000..8df3608dbe --- /dev/null +++ b/doc/bugs/git-annex_losing_rsync_remotes_with_encryption_enabled.mdwn @@ -0,0 +1,103 @@ +Somehow git-annex has again lost a complete rsync remote with encryption enabled... + +git-annex version was 3.20111111 + +> "once again" ? When did it do it before? + +>> It's the second time i uploaded all the files to an encrypted rsync remote and git-annex is not able to find it anymore. --[[gebi]] + +> "lost" ? How is the remote lost? + +>> git-annex is not able to find any files on the encrypted rsync remote anymore. +>> Copy does not copy the content again but drop doesn't find it, thus it's somehow "lost" and in an strange state. +>> I've also had the state where the content was already on the remote side but git-annex copy would copy it again, +>> ignoring all the data on the remote side. --[[gebi]] + +Both *remoteserver* and *localserver* are rsync remotes with enabled encryption. +All commands are executed on the git repository on my laptop. +Target of origin is a gitolite repository without annex support (thus the two rsync remotes). + +Is there a way in git-annex to verify that all files fullfill the numcopies, in my case +numcopies=2, and can be read from the remotes their are on? +I thought that *copy* would verify that, but seems not. + + % g a copy --to remoteserver tools + copy tools/md5_sha1_utility.exe (gpg) (checking remoteserver...) ok + copy tools/win32diskimager-RELEASE-0.2-r23-win32.zip (checking remoteserver...) ok + + % g a copy --to localserver tools + copy tools/md5_sha1_utility.exe (gpg) (checking localserver...) ok + copy tools/win32diskimager-RELEASE-0.2-r23-win32.zip (checking localserver...) ok + + % g a drop tools + drop tools/md5_sha1_utility.exe (gpg) (checking localserver...) (checking remoteserver...) (unsafe) + Could only verify the existence of 1 out of 2 necessary copies + + Try making some of these repositories available: + 718a9b5c-1b4a-11e1-8211-6f094f20e050 -- remoteserver (remote backupserver) + + (Use --force to override this check, or adjust annex.numcopies.) + failed + drop tools/win32diskimager-RELEASE-0.2-r23-win32.zip (checking localserver...) (checking remoteserver...) (unsafe) + Could only verify the existence of 1 out of 2 necessary copies + + Try making some of these repositories available: + 718a9b5c-1b4a-11e1-8211-6f094f20e050 -- remoteserver (remote backupserver) + + (Use --force to override this check, or adjust annex.numcopies.) + failed + git-annex: drop: 2 failed + + % g a fsck tools + fsck tools/md5_sha1_utility.exe (checksum...) ok + fsck tools/win32diskimager-RELEASE-0.2-r23-win32.zip (checksum...) ok + +> Copy does do an explicit check that the content is present on remoteserver, +> and based on the above, the content was found to be already there, +> which is why it did not copy it again. +> +> Drop does an indentical check that the content is present, and +> since it failed to find it, I am left thinking something must have +> happened to the remove in between the copy and the drop to cause the +> content to go away. +> +> What happens if you copy the data to remoteserver again? --[[Joey]] + +The commands above are executed within a few seconds and completely repeatable. --[[gebi]] + +> In that case, why don't you run the commands with `-d` to see the actual +> rsync command it's running to check if the content is present. +> Then you can try repeatedly running the command by hand and see why it +> sometimes succeeds and sometimes fail. + +The commands fail and succeed consistently, not either or. +git annex copy succeeds consistently with not copying the content to remote because it checks and it's already there. + +git annex drop fails consistently with error because content is missing on the exact same remote git annex copy checks +and thinks the content is there. --[[gebi]] + +> The command will be something like this: +> `rsync --quiet hostname:/dir/file 2>/dev/null` +> +> The exit status is what's used to see if content is present -- and +> currently any failure even a failure to connect is taken to mean it's not +> present. --[[Joey]] + +hm... thats interesting, git annex drop and git annex copy check for different hashes on the same file at the same remote... + +git annex drop -d tools/md5_sha1_utility.exe +> Running: sh ["-c","rsync --quiet 'REMOVED_HOST:annex/work/JF/z7/'\"'\"'GPGHMACSHA1--7ffb3840f0e37aee964352e98808403655e8473a/GPGHMACSHA1--7ffb3840f0e37aee964352e98808403655e8473a'\"'\"'' 2>/dev/null"] + +git annex copy --to remoteserver -d tools/md5_sha1_utility.exe +> Running: sh ["-c","rsync --quiet 'REMOVED_HOST:annex/work/1F/PQ/'\"'\"'GPGHMACSHA1--ff075e57f649300c5698e346be74fb6e22d70e35/GPGHMACSHA1--ff075e57f649300c5698e346be74fb6e22d70e35'\"'\"'' 2>/dev/null"] + +And yes, only the hash *annex copy* is checking for exists on the remote side. --[[gebi]] + +> Ok, this is due to too aggressive caching of the decrypted cipher +> for a remote. When dopping, it decrypts localserver's cipher, +> caches it, and then when checking remoteserver it says hey, +> here's an already decrypted cipher -- it must be the right one! +> +> Problem reproduced here, and fixed. [[done]] --[[Joey]] + +THX Joey! -- [[gebi]] diff --git a/doc/bugs/git_annex_add_eats_files_when_filename_is_too_long.mdwn b/doc/bugs/git_annex_add_eats_files_when_filename_is_too_long.mdwn new file mode 100644 index 0000000000..d17e569f17 --- /dev/null +++ b/doc/bugs/git_annex_add_eats_files_when_filename_is_too_long.mdwn @@ -0,0 +1,14 @@ +Recently I ran into the following situation under Ubuntu with an encrypted home directory (which shortens the length that filenames can be): + + $ git annex add 687474703a2f2f6d656469612e74756d626c722e636f6d2f74756d626c725f6c656673756557324c703171663879656b2e676966.gif + add 687474703a2f2f6d656469612e74756d626c722e636f6d2f74756d626c725f6c656673756557324c703171663879656b2e676966.gif failed + git-annex: /home/lhuhn/annex/.git/annex/tmp/155_518_WORM-s426663-m1310064100--687474703a2f2f6d656469612e74756d626c722e636f6d2f74756d626c725f6c656673756557324c703171663879656b2e676966.gif.log: openBinaryFile: invalid argument (File name too long) + git-annex: 1 failed + +The file seems to be completely gone. It no longer exists in the current directory, or under .git/annex. + +I don't mind horribly that git-annex failed due to the name length limit, but it shouldn't have deleted my file in the process (fortunately the file wasn't very important, or hard to recover). + +> [[done]], as noted it did not delete content and now it makes the symlink +> before trying to write to the location log, avoiding that gotcha. +> --[[Joey]] diff --git a/doc/bugs/git_annex_add_eats_files_when_filename_is_too_long/comment_1_9650284913bec2a00cf551b90ab5d8ff._comment b/doc/bugs/git_annex_add_eats_files_when_filename_is_too_long/comment_1_9650284913bec2a00cf551b90ab5d8ff._comment new file mode 100644 index 0000000000..1df159181d --- /dev/null +++ b/doc/bugs/git_annex_add_eats_files_when_filename_is_too_long/comment_1_9650284913bec2a00cf551b90ab5d8ff._comment @@ -0,0 +1,21 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-07-07T20:27:33Z" + content=""" +When I reproduce this, the file is not gone, it's been moved under .git/annex/objects. There is no way an add can delete a file, since all it does is rename it. It would be good for it to error unwind and move the file back though. + +
+joey@gnu:~/tmp/a>touch 663879656b2e676966687474703a2f2f6d656469612e74756d626c722e636f6d2f74756d626c725f6c656673756557324c703171663879656b2e676966687474703a2f2f6d656469612e74756d626c722e636f6d2f74756d626c725f6c656673756557324c703171663879656b2e676966.gif
+joey@gnu:~/tmp/a>git annex add *.gif
+add 663879656b2e676966687474703a2f2f6d656469612e74756d626c722e636f6d2f74756d626c725f6c656673756557324c703171663879656b2e676966687474703a2f2f6d656469612e74756d626c722e636f6d2f74756d626c725f6c656673756557324c703171663879656b2e676966.gif failed
+git-annex: /home/joey/tmp/a/.git/annex/tmp/8e2_6a4_WORM-s0-m1310069979--663879656b2e676966687474703a2f2f6d656469612e74756d626c722e636f6d2f74756d626c725f6c656673756557324c703171663879656b2e676966687474703a2f2f6d656469612e74756d626c722e636f6d2f74756d626c725f6c656673756557324c703171663879656b2e676966.gif.log: openBinaryFile: invalid argument (File name too long)
+joey@gnu:~/tmp/a>touch 663879656b2e676966687474703a2f2f6d656469612e74756d626c722e636f6d2f74756d626c725f6c656673756557324c703171663879656b2e676966687474703a2f2f6d656469612e74756d626c722e636f6d2f74756d626c725f6c656673756557324c703171663879656b2e676966.gif
+joey@gnu:~/tmp/a>git annex add *.gif
+add 663879656b2e676966687474703a2f2f6d656469612e74756d626c722e636f6d2f74756d626c725f6c656673756557324c703171663879656b2e676966687474703a2f2f6d656469612e74756d626c722e636f6d2f74756d626c725f6c656673756557324c703171663879656b2e676966.gif failed
+git-annex: /home/joey/tmp/a/.git/annex/tmp/8e2_6a4_WORM-s0-m1310069979--663879656b2e676966687474703a2f2f6d656469612e74756d626c722e636f6d2f74756d626c725f6c656673756557324c703171663879656b2e676966687474703a2f2f6d656469612e74756d626c722e636f6d2f74756d626c725f6c656673756557324c703171663879656b2e676966.gif.log: openBinaryFile: invalid argument (File name too long)
+joey@gnu:~/tmp/a>find .git/annex/objects -type f
+.git/annex/objects/Mk/92/WORM-s0-m1310069979--663879656b2e676966687474703a2f2f6d656469612e74756d626c722e636f6d2f74756d626c725f6c656673756557324c703171663879656b2e676966687474703a2f2f6d656469612e74756d626c722e636f6d2f74756d626c725f6c656673756557324c703171663879656b2e676966.gif/WORM-s0-m1310069979--663879656b2e676966687474703a2f2f6d656469612e74756d626c722e636f6d2f74756d626c725f6c656673756557324c703171663879656b2e676966687474703a2f2f6d656469612e74756d626c722e636f6d2f74756d626c725f6c656673756557324c703171663879656b2e676966.gif
+
+"""]] diff --git a/doc/bugs/git_annex_add_eats_files_when_filename_is_too_long/comment_2_c6c8d2a1f444d85c582bc5396b08e148._comment b/doc/bugs/git_annex_add_eats_files_when_filename_is_too_long/comment_2_c6c8d2a1f444d85c582bc5396b08e148._comment new file mode 100644 index 0000000000..bd53627bbc --- /dev/null +++ b/doc/bugs/git_annex_add_eats_files_when_filename_is_too_long/comment_2_c6c8d2a1f444d85c582bc5396b08e148._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnpdM9F8VbtQ_H5PaPMpGSxPe_d5L1eJ6w" + nickname="Rafaël" + subject="this happens also when the user has not the permission to set the file mode" + date="2011-07-08T00:21:31Z" + content=""" +For example if the file is owned by root, I guess git-annex fails when it tries to remove write permissions (I retested with the last version of today (whose \"version\" subcommand still outputs 3.20110702)).By the way, it would be nice to have a log file created containing the list of all failures, to avoid having to scan manually all the output of a long git-annex operation. +"""]] diff --git a/doc/bugs/git_annex_add_eats_files_when_filename_is_too_long/comment_3_5776864d78d56849001dd12e3adb9cbe._comment b/doc/bugs/git_annex_add_eats_files_when_filename_is_too_long/comment_3_5776864d78d56849001dd12e3adb9cbe._comment new file mode 100644 index 0000000000..f9d1b5d682 --- /dev/null +++ b/doc/bugs/git_annex_add_eats_files_when_filename_is_too_long/comment_3_5776864d78d56849001dd12e3adb9cbe._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnpdM9F8VbtQ_H5PaPMpGSxPe_d5L1eJ6w" + nickname="Rafaël" + subject="comment 3" + date="2011-07-08T00:45:30Z" + content=""" +comment on the output of 'git-annex version' (from my last comment): now I get the right version 3.20110707. But I checked in my console that the three commands \"git checkout 3.20110707\", \"make\" and \"./git-annex version\" gave me before 3.20110702, I don't know why... +"""]] diff --git a/doc/bugs/git_annex_add_eats_files_when_filename_is_too_long/comment_4_371ec7b4ae73280ede31edfe90b42a95._comment b/doc/bugs/git_annex_add_eats_files_when_filename_is_too_long/comment_4_371ec7b4ae73280ede31edfe90b42a95._comment new file mode 100644 index 0000000000..1ba57c1992 --- /dev/null +++ b/doc/bugs/git_annex_add_eats_files_when_filename_is_too_long/comment_4_371ec7b4ae73280ede31edfe90b42a95._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 4" + date="2011-07-08T01:32:30Z" + content=""" +Indeed, I've made it even more robust now, handling the case where the file has weird permissions too, and undoing the failed add so the file is always back at the start state. Had to add a dependency on another haskell module to allow this, so it took some time to figure out how to do it.. + +"""]] diff --git a/doc/bugs/git_annex_copy_--fast_does_not_copy_files.mdwn b/doc/bugs/git_annex_copy_--fast_does_not_copy_files.mdwn new file mode 100644 index 0000000000..9b84c21fdb --- /dev/null +++ b/doc/bugs/git_annex_copy_--fast_does_not_copy_files.mdwn @@ -0,0 +1,22 @@ +Workflow: + + % git annex add + # list new files + % git commit -a -m "foo" + # commit summary + % git annex copy . --to remote --fast + # all files listed with "ok" + % git annex copy . --to remote + # again, lists all files, _but the new ones are actually copied, this time_. + +This happens no matter if I + + % git push + +before copy or not. + +PS: Arguably, a copy should push automagically. + +> Whups, not supposed to be that fast! [[Fixed|done]], and +> you should run `git annex fsck --fast` on the repo you ran the +> copy in. --[[Joey]] diff --git a/doc/bugs/git_annex_copy_-f_REMOTE_._doesn__39__t_work_as_expected.mdwn b/doc/bugs/git_annex_copy_-f_REMOTE_._doesn__39__t_work_as_expected.mdwn new file mode 100644 index 0000000000..3bda451499 --- /dev/null +++ b/doc/bugs/git_annex_copy_-f_REMOTE_._doesn__39__t_work_as_expected.mdwn @@ -0,0 +1,18 @@ +I was testing out the fix/workaround for [[git-annex directory hashing problems on osx]] and I tried using the short forms of some of the commands i.e. + + git annex copy -f externalusb . + +which gives me + + git-annex: user error (option `-f' is ambiguous; could be one of: + -f --force allow actions that may lose annexed data + -f REMOTE --from=REMOTE specify from where to transfer content + + +I would have expected that since *--to* is the same as *-t* and *--from* is the same as *-f* as the in program documentation suggests. But *-f* clashes with the force command, I would suggest that the short form of *--force* be changed to *-F* and possibly rename the *Fast* commands to *Quick* and use *-Q* as the short form of the *Quick* operations. I didn't try the *-f* option with the move command, but it probably suffers from the same issue. It's probably better to avoid clashing short forms of command options. + +I guess this issue is just a documentation issue and a minor interface change if needed and not a bug of git-annex, but a quirk. + +> Yeah, -f needs to be from; -F was already --fast. I have made --force not +> have any short option abbreviation, I think it's entirely reasonable to +> avoid fat-fingering an option that can lose data. [[done]] --[[Joey]] diff --git a/doc/bugs/git_annex_fsck_is_a_no-op_in_bare_repos.mdwn b/doc/bugs/git_annex_fsck_is_a_no-op_in_bare_repos.mdwn new file mode 100644 index 0000000000..9a044860ae --- /dev/null +++ b/doc/bugs/git_annex_fsck_is_a_no-op_in_bare_repos.mdwn @@ -0,0 +1,21 @@ +What is says on the tin: + +git annex fsck is a no-op in bare repos + +See http://lists.madduck.net/pipermail/vcs-home/2011-June/000433.html + +> Thinking about this some more, it would be difficult to do anything +> when bad content is found, since it also cannot update the location log. +> +> So this may be another thing blocked by [[todo/branching]], assuming +> that is fixed in a way that makes `.git-annex` available to bare repos. +> --[[Joey]] + +>> Even if there is nothing it can _do_, knowing that the data is intact, +>> or not, is valuable in and as of itself. -- RichiH + +>>> While storing the data is no longer an issue in bare repos, fsck would +>>> need a special mode that examines all the location logs, since it +>>> cannot run thru the checked out files. --[[Joey]] + +>>>> [[done]]! --[[Joey]] diff --git a/doc/bugs/git_annex_fsck_is_a_no-op_in_bare_repos/comment_1_fc59fbd1cdf8ca97b0a4471d9914aaa1._comment b/doc/bugs/git_annex_fsck_is_a_no-op_in_bare_repos/comment_1_fc59fbd1cdf8ca97b0a4471d9914aaa1._comment new file mode 100644 index 0000000000..d50938a784 --- /dev/null +++ b/doc/bugs/git_annex_fsck_is_a_no-op_in_bare_repos/comment_1_fc59fbd1cdf8ca97b0a4471d9914aaa1._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 1" + date="2011-06-13T16:58:52Z" + content=""" +And, maybe, a way to start a fsck from remote? At least when the other side is a ssh or git annex shell, this would work. +"""]] diff --git a/doc/bugs/git_annex_fsck_is_a_no-op_in_bare_repos/comment_2_273a45e6977d40d39e0d9ab924a83240._comment b/doc/bugs/git_annex_fsck_is_a_no-op_in_bare_repos/comment_2_273a45e6977d40d39e0d9ab924a83240._comment new file mode 100644 index 0000000000..b01590a7a7 --- /dev/null +++ b/doc/bugs/git_annex_fsck_is_a_no-op_in_bare_repos/comment_2_273a45e6977d40d39e0d9ab924a83240._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="http://ertai.myopenid.com/" + nickname="npouillard" + subject="git annex fsck --from remote" + date="2011-06-25T16:20:44Z" + content=""" +Currently fsck silently ignores --to/--from. +It should at least complain if it is not supported. +"""]] diff --git a/doc/bugs/git_annex_get_choke_when_remote_is_an_ssh_url_with_a_port.mdwn b/doc/bugs/git_annex_get_choke_when_remote_is_an_ssh_url_with_a_port.mdwn new file mode 100644 index 0000000000..92cc9170f9 --- /dev/null +++ b/doc/bugs/git_annex_get_choke_when_remote_is_an_ssh_url_with_a_port.mdwn @@ -0,0 +1,13 @@ +when i want to + + git annex get file + +on repo ssh://host-without-port/annex, it works, but if i want to get a file from ssh://host:5122/annex, it tries to run command +ssh ["host:5122", "git-annex-shell 'configlist' '/annex/file'"] and fails. ssh needs the -p option to set the default port, it doesn't support host:port notation. +this is confusing because git can handle this url correctly, and will happily clone/push/pull to/from these url. + +temporary workaround is to use ssh://host/annex as url and define remote.name.annex-ssh-options to "-p 5122", but we need to use this workaround when doing annex get and undo the workaround when pushing/cloning. + +if i had more time, i would have learned haskell and provided a patch ;) + +> Fixed in git! --[[Joey]] [[done]] diff --git a/doc/bugs/git_annex_gets_confused_about_remotes_with_dots_in_their_names.mdwn b/doc/bugs/git_annex_gets_confused_about_remotes_with_dots_in_their_names.mdwn new file mode 100644 index 0000000000..d35282e750 --- /dev/null +++ b/doc/bugs/git_annex_gets_confused_about_remotes_with_dots_in_their_names.mdwn @@ -0,0 +1,34 @@ +For test.com//test, I get this: + + % git annex copy . --to test.com//test + (getting UUID for test...) git-annex: there is no git remote named "test.com//test" + +And my .git/config changes from + + [remote "test.com//test"] + url = richih@test.com:/test + fetch = +refs/heads/*:refs/remotes/test.com//test/* + +to + + [remote "test.com//test"] + url = richih@test.com:/test + fetch = +refs/heads/*:refs/remotes/test.com//test/* + annex-uuid = xyz + [remote "test"] + annex-uuid = xyz + + +Unless I am misunderstanding something, git annex gets confused about what the name of the remote it supposed to be, truncates at the dot for some operations and uses the full name for others. + +> I've fixed this bug. [[done]] +> +> However, using "/" in a remote name seems likely to me to confuse +> git's own remote branch handling. Although I've never tried it. +> --[[Joey]] + +>> From what I can see, git handles / just fine, but would get upset about : which is why it's not allowed in a remote's name. +>> My naming scheme is host//path/to/annex. It sorts nicely and gives all important information left to right with the most specific parts at the beginning and end. +>> If you have any other ideas or scheme, I am all ears :) +>> Either way, thanks for fixing this so quickly. +>> -- RichiH diff --git a/doc/bugs/git_annex_initremote_walks_.git-annex.mdwn b/doc/bugs/git_annex_initremote_walks_.git-annex.mdwn new file mode 100644 index 0000000000..acd369bded --- /dev/null +++ b/doc/bugs/git_annex_initremote_walks_.git-annex.mdwn @@ -0,0 +1,19 @@ +a issue: `git annex initremote` (in particular, adding +a key as described in [[encryption]] -- `git annex initremote my_remote +encryption=my_key`) seems to iterate over the `.git-annex/???/???/*.log` files +with lstat (tested using strace). + +in a 50k key git-annex on a slow disk, this takes quite a while, while not +seeming necessary (it's just re-encrypting the shared secret, is it?). + +could you verify the observed behavior? + +> This is due to `git commit` being called. `git commit` exposes git's +> rather innefficient handling of the index; in order to make a commit +> it has to write a new index file, and it does this by scanning every +> file in the repository. I think that git generally needs its index +> file handleing overhauled, particularly to deal with repositories with +> large numbers of files. git-annex is seems to already be running +> `git commit` in its most efficient mode, by specifying exactly what file +> to commit. [[done]] --[[Joey]] diff --git a/doc/bugs/git_annex_map_has_problems_with_urls_containing___126__.mdwn b/doc/bugs/git_annex_map_has_problems_with_urls_containing___126__.mdwn new file mode 100644 index 0000000000..24f04eeb54 --- /dev/null +++ b/doc/bugs/git_annex_map_has_problems_with_urls_containing___126__.mdwn @@ -0,0 +1,46 @@ +I discovered a problem with `git annex map` and relative urls containing `~`. +In this case i have a remote `noam` configured with the the following urls: + + zsh» git remote show noam | head -3 + * remote noam + Fetch URL: noam:bare-annex + Push URL: noam:bare-annex + +If i try to run `git annex map` i get the following error: + + zsh» git annex map + map /home/esc/annex ok + map noam (sshing...) + bash: line 0: cd: /~/bare-annex/: No such file or directory + Command ssh ["noam","cd '/~/bare-annex/' && git config --list"] failed; exit code 1 + (sshing...) + ok + + running: dot -Tx11 map.dot + + ok + +If i run the failing command manually, i get: + + zsh» ssh noam "cd ~/bare-annex && git config --list" + core.repositoryformatversion=0 + core.filemode=true + core.bare=true + annex.uuid=f267f55c-0732-11e1-a93b-93119f9aaf54 + annex.version=3 + +Also i can change the remote url to an absolute one, in which case `git annex +map` works too: + + zsh» git remote set-url noam noam:/home/esc/bare-annex + zsh» git annex map + map /home/esc/annex ok + map noam (sshing...) + ok + + running: dot -Tx11 map.dot + + ok + + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/git_annex_migrate_leaves_old_backend_versions_around.mdwn b/doc/bugs/git_annex_migrate_leaves_old_backend_versions_around.mdwn new file mode 100644 index 0000000000..263338d64f --- /dev/null +++ b/doc/bugs/git_annex_migrate_leaves_old_backend_versions_around.mdwn @@ -0,0 +1,19 @@ +`git annex migrate` leaves old, unlinked backend versions lying around. It +would be great if these were purged automatically somehow. + +> Yes, this is an issue mentioned in the +> [[tips/migrating_data_to_a_new_backend]]. +> +> Since multiple files can point to the same content, it could be that +> only one file has been migrated, and the content is still used. So +> the content either has to be retained, or an operation as expensive +> as `git annex unused` used to find if something else still uses it. +> +> Rather than adding such an +> expensive operation to each call to migrate, I focused on hard-linking +> the values for the old and new keys, so that the old keys don't actually +> use any additional resources (beyond an extra inode). +> +> This way a lot of migrations can be done, and only when you're done you +> can do the more expensive cleanup pass if you want to. --[[Joey]] +> [[done]] diff --git a/doc/bugs/git_annex_should_use___39__git_add_-f__39___internally.mdwn b/doc/bugs/git_annex_should_use___39__git_add_-f__39___internally.mdwn new file mode 100644 index 0000000000..a92f5871b5 --- /dev/null +++ b/doc/bugs/git_annex_should_use___39__git_add_-f__39___internally.mdwn @@ -0,0 +1,11 @@ +I have this line in the .gitignore file of one of my repos: +*log + +So the command 'git annex init name' fails to add the file ".git-annex/uuid.log", and the same problem happens when git-annex-add'ing files. + +> This is avoided on the v3 branch, which does not store these files in the +> same branch as your repository. + +Also, when a file is git-ignored, it should be possible to 'git annex add' it with a -f/--force option, the same way git does it. + +> Reasonable, [[done]] --[[Joey]] diff --git a/doc/bugs/git_annex_should_use___39__git_add_-f__39___internally/comment_1_7683bf02cf9e97830fb4690314501568._comment b/doc/bugs/git_annex_should_use___39__git_add_-f__39___internally/comment_1_7683bf02cf9e97830fb4690314501568._comment new file mode 100644 index 0000000000..c556fbd771 --- /dev/null +++ b/doc/bugs/git_annex_should_use___39__git_add_-f__39___internally/comment_1_7683bf02cf9e97830fb4690314501568._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnpdM9F8VbtQ_H5PaPMpGSxPe_d5L1eJ6w" + nickname="Rafaël" + subject="comment 1" + date="2011-07-03T11:56:45Z" + content=""" +And what about emitting a warning, as git does, that some files were not annex-added (when not using --force)? +"""]] diff --git a/doc/bugs/git_annex_unlock_is_not_atomic.mdwn b/doc/bugs/git_annex_unlock_is_not_atomic.mdwn new file mode 100644 index 0000000000..6d324ff500 --- /dev/null +++ b/doc/bugs/git_annex_unlock_is_not_atomic.mdwn @@ -0,0 +1,7 @@ +Running a command like + +git annex unlock myfile + +is not atomic, that is if the execution is aborted you may end up with an incomplete version of myfile in the directory. If you don't notice this you may lock it again and then propagate this bad version of the file to your other repositories. A simple workaround is to simply name it something else while unlocking and then rename it to the correct filename once it's completely copied. I don't know Haskel yet so I can not fix this issue otherwise I would sure try. A part from this, I love git annex. + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/git_annex_unused_failes_on_empty_repository.mdwn b/doc/bugs/git_annex_unused_failes_on_empty_repository.mdwn new file mode 100644 index 0000000000..05aa695727 --- /dev/null +++ b/doc/bugs/git_annex_unused_failes_on_empty_repository.mdwn @@ -0,0 +1,15 @@ +[[!meta title="`git annex unused` fails on empty repository"]] + +The ``git annex unused`` command fails on a git-annex repository, if there are no objects yet: + + $ git annex unused + unused (checking for unused data...) + git-annex: /tmp/annextest/other_annex/.git/annex/objects: getDirectoryContents: does not exist (No such file or directory) + git-annex: 1 failed + $ + +This can give a user (especially one that wants to try out simple commands with his newly created repo) the impression that something is wrong, while it is not. I'd expect the program either to show the same message ``git annex unused`` shows when everything is ok (since it is, or should be). + +This can be a bug in the ``unused`` subcommand (that fails to accept the absence of an objects directory) or in the ``init`` subcommand (that fails to create it). + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/git_annex_unused_seems_to_check_for_current_path.mdwn b/doc/bugs/git_annex_unused_seems_to_check_for_current_path.mdwn new file mode 100644 index 0000000000..df0fb50cc1 --- /dev/null +++ b/doc/bugs/git_annex_unused_seems_to_check_for_current_path.mdwn @@ -0,0 +1,37 @@ +When I run `git annex unused` from my repository's root it shows everything ok: + + ~/annex$ git annex unused + unused (checking for unused data...) ok + +But... When I run it from a subdirectory, it shows a lot: + + ~/annex/Software$ git annex unused + unused (checking for unused data...) + Some annexed data is no longer pointed to by any files in the repository: + NUMBER KEY + 1 SHA1:######################################## + ... + 921 SHA1:######################################## + (To see where data was previously used, try: git log --stat -S'KEY') + (To remove unwanted data: git-annex dropunused NUMBER) + ok + +Is this a bug or by design? By removing these "unused" files with `dropunused` I've just lost the only copy of 160 files. + +I am using git-annex version 836e71297b8e3b5bd6f89f7eb1198f59af985b0b + +> I'm very sorry you lost data. +> +> But, git annex unused absolutely does not let the current directory +> influence what it does. It always scans the entire repo from the top. +> And I've tested it just now to make sure that in a subdirectory +> it does the same thing as at the top. +> +> There are only two ways this could happen that I can think of: +> +> 1. If "Software" were a separate git repository than "~/annex". +> 2. If gitignores or something made `git ls-files` +> not list the files when ran in the subdir. This seems *possible*, +> but I don't know how to construct such an ignore. +> +> --[[Joey]] diff --git a/doc/bugs/git_annex_upgrade_loses_track_of_files_with___34____38____34___character___40__and_probably_others__41__.mdwn b/doc/bugs/git_annex_upgrade_loses_track_of_files_with___34____38____34___character___40__and_probably_others__41__.mdwn new file mode 100644 index 0000000000..9daf8a0cb4 --- /dev/null +++ b/doc/bugs/git_annex_upgrade_loses_track_of_files_with___34____38____34___character___40__and_probably_others__41__.mdwn @@ -0,0 +1,33 @@ +"git annex upgrade" has lost track of some of my files. Most of them have "&" characters. The others contain "%" characters (I haven't tried the testcase below with "%" however). + +Testcase: + + # (With git annex v2) + mkdir ~/testannex1 + cd ~/testannex1 + git init + git annex init "testannex1" + touch '02 - Afternoons & Coffeespoons.mp3' + touch 'no ampersand.mp3' + git annex add '02 - Afternoons & Coffeespoons.mp3' + git annex add 'no ampersand.mp3' + git commit -m added + git annex whereis '02 - Afternoons & Coffeespoons.mp3' + git annex whereis 'no ampersand.mp3' + # (Upgrade git-annex binary to v3 and then...) + git annex upgrade + git annex whereis '02 - Afternoons & Coffeespoons.mp3' + git annex whereis 'no ampersand.mp3' + +This produces: + + 12:38:40 ~/testannex1 (master)$ git annex whereis '02 - Afternoons & Coffeespoons.mp3' + whereis 02 - Afternoons & Coffeespoons.mp3 (0 copies) + failed + git-annex: 1 failed + 12:38:40 ~/testannex1 (master)$ git annex whereis 'no ampersand.mp3' + whereis no ampersand.mp3 (1 copy) + a7b680fc-a8d0-11e0-b0fe-4f94e86d1fb7 -- testannex1 <-- here + ok + +[[!tag done]] diff --git a/doc/bugs/git_annex_upgrade_loses_track_of_files_with___34____38____34___character___40__and_probably_others__41__/comment_1_861506e40e0d04d2be98bbfe9188be89._comment b/doc/bugs/git_annex_upgrade_loses_track_of_files_with___34____38____34___character___40__and_probably_others__41__/comment_1_861506e40e0d04d2be98bbfe9188be89._comment new file mode 100644 index 0000000000..194b36ac10 --- /dev/null +++ b/doc/bugs/git_annex_upgrade_loses_track_of_files_with___34____38____34___character___40__and_probably_others__41__/comment_1_861506e40e0d04d2be98bbfe9188be89._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-07-07T21:04:23Z" + content=""" +What an evil little bug. In retrospect, this probably bit my own test upgrades, but I ran `git annex fsck` everywhere and so avoided the location log breakage. + +I've fixed the bug, which also involved files with other punctuation in their names [&:%] when using the WORM backend. + +The only way I have to recover repos that have already been upgraded is to run `git annex fsck --fast` in each clone of such a repo, which will let it rebuild the location log information. I think that is the best way to recover; ie I can't think of a way to recover that doesn't need to do everything fsck does anyway. +"""]] diff --git a/doc/bugs/git_annex_upgrade_output_is_inconsistent_and_spammy.mdwn b/doc/bugs/git_annex_upgrade_output_is_inconsistent_and_spammy.mdwn new file mode 100644 index 0000000000..ec8a10915e --- /dev/null +++ b/doc/bugs/git_annex_upgrade_output_is_inconsistent_and_spammy.mdwn @@ -0,0 +1,15 @@ +Upgrading from v1 to v3: + + upgrade . (v1 to v2...) (moving content...) (updating symlinks...) (moving location logs...) (v2 to v3...) .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + git-annex branch created + Be sure to push this branch when pushing to remotes. + ok + +A whirly would be preferable, imo. + +> Erm, I'm pretty sure you were the one who asked for there to be some +> progress dots, Richard. +> +> I'm not particularly interested in implementing a whirley that would only +> be used in this one place, in code that very few users are going to run +> again. I could remove the dots.. [[done]] --[[Joey]] diff --git a/doc/bugs/git_annex_upgrade_output_is_inconsistent_and_spammy/comment_1_3a01c81efba321b0e46d1bc0426ad8d1._comment b/doc/bugs/git_annex_upgrade_output_is_inconsistent_and_spammy/comment_1_3a01c81efba321b0e46d1bc0426ad8d1._comment new file mode 100644 index 0000000000..4f9565517d --- /dev/null +++ b/doc/bugs/git_annex_upgrade_output_is_inconsistent_and_spammy/comment_1_3a01c81efba321b0e46d1bc0426ad8d1._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 1" + date="2011-10-29T17:03:26Z" + content=""" +I could dig it out, but I am sure I said dots are fine and a whirly better. + +Still, WONTFIX is fine. +"""]] diff --git a/doc/bugs/git_annex_version_should_without_being_in_a_repo_.mdwn b/doc/bugs/git_annex_version_should_without_being_in_a_repo_.mdwn new file mode 100644 index 0000000000..5c995852b1 --- /dev/null +++ b/doc/bugs/git_annex_version_should_without_being_in_a_repo_.mdwn @@ -0,0 +1,7 @@ +was checking the version of git-annex on a machine before cloning a repo... + + $ git annex version + git-annex: Not in a git repository. + +> made difficult by the Annex monad, but I made it work! --[[Joey]] +> [[done]] diff --git a/doc/bugs/git_annex_version_should_without_being_in_a_repo_/comment_1_e7b26eeb1a765fd83280ef907c0deef2._comment b/doc/bugs/git_annex_version_should_without_being_in_a_repo_/comment_1_e7b26eeb1a765fd83280ef907c0deef2._comment new file mode 100644 index 0000000000..ab30d8a452 --- /dev/null +++ b/doc/bugs/git_annex_version_should_without_being_in_a_repo_/comment_1_e7b26eeb1a765fd83280ef907c0deef2._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawmBUR4O9mofxVbpb8JV9mEbVfIYv670uJo" + nickname="Justin" + subject="comment 1" + date="2011-11-16T03:24:30Z" + content=""" +oh, and that probably goes for 'help' and other subcommands as well. +"""]] diff --git a/doc/bugs/git_command_line_constructed_by_unannex_command_has_tons_of_redundant_-a_paramters.mdwn b/doc/bugs/git_command_line_constructed_by_unannex_command_has_tons_of_redundant_-a_paramters.mdwn new file mode 100644 index 0000000000..181b02b5c1 --- /dev/null +++ b/doc/bugs/git_command_line_constructed_by_unannex_command_has_tons_of_redundant_-a_paramters.mdwn @@ -0,0 +1,15 @@ +This doesn't look right: + + simons 11148 0.0 0.0 15572 1268 pts/1 SN+ 04:00 0:00 | \_ git annex unannex stuff + simons 11150 0.5 0.0 130504 11212 pts/1 SN+ 04:00 3:40 | | \_ git-annex unannex stuff + simons 11152 0.0 0.1 39536 23932 pts/1 SN+ 04:00 0:00 | | \_ git --git-dir=/home/simons/annex/.git --work-tree=/home/simons/annex ls-files --cached -z -- stuff + simons 11288 0.0 0.0 0 0 pts/1 ZN+ 04:01 0:00 | | \_ [git] + simons 11339 0.0 0.0 0 0 pts/1 ZN+ 04:02 0:00 | | \_ [git-annex] + simons 11442 0.0 0.0 0 0 pts/1 ZN+ 04:06 0:00 | | \_ [git] + simons 11443 0.0 0.0 0 0 pts/1 ZN+ 04:06 0:05 | | \_ [git] + simons 16541 0.0 0.0 0 0 pts/1 ZN+ 04:14 0:00 | | \_ [git] + simons 16543 0.3 0.0 15644 1744 pts/1 SN+ 04:14 2:13 | | \_ git --git-dir=/home/simons/annex/.git --work-tree=/home/simons/annex cat-file --batch + simons 14224 0.0 0.0 100744 796 pts/1 SN+ 14:10 0:00 | | \_ xargs -0 git --git-dir=/home/simons/annex/.git --work-tree=/home/simons/annex commit -a -m content removed from git annex + simons 14225 0.4 0.1 32684 18652 pts/1 DN+ 14:10 0:00 | | \_ git --git-dir=/home/simons/annex/.git --work-tree=/home/simons/annex commit -a -m content removed from git annex -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a -a + +> [[Fixed|done]] --[[Joey]] diff --git a/doc/bugs/git_rename_detection_on_file_move.mdwn b/doc/bugs/git_rename_detection_on_file_move.mdwn new file mode 100644 index 0000000000..76f1e098e5 --- /dev/null +++ b/doc/bugs/git_rename_detection_on_file_move.mdwn @@ -0,0 +1,13 @@ +It's unfortunate that git-annex sorta defeats git's rename detection. + +When an annexed file is moved to a different directory (specifically, a +directory that is shallower or deeper than the old directory), +the symlink often has to change. And so git log cannot --follow back +through the rename history, since all it has to go on is that symlink, +which it effectively sees as a one line file containing the symlink target. + +One way to fix this might be to do the `git annex fix` *after* the rename +is committed. This would mean that a commit would result in new staged +changes for another commit, which is perhaps startling behavior. + +The other way to fix it is to stop using symlinks, see [[todo/smudge]]. diff --git a/doc/bugs/git_rename_detection_on_file_move/comment_1_0531dcfa833b0321a7009526efe3df33._comment b/doc/bugs/git_rename_detection_on_file_move/comment_1_0531dcfa833b0321a7009526efe3df33._comment new file mode 100644 index 0000000000..8fec6bad72 --- /dev/null +++ b/doc/bugs/git_rename_detection_on_file_move/comment_1_0531dcfa833b0321a7009526efe3df33._comment @@ -0,0 +1,26 @@ +[[!comment format=mdwn + username="http://christian.amsuess.com/chrysn" + nickname="chrysn" + subject="use mini-branches" + date="2011-03-09T23:47:48Z" + content=""" +if you go for the two-commits version, small intermediate branches (or git-commit-tree) could be used to create a tree like this: + + + * commit 106eef2 + |\ Merge: 436e46f 9395665 + | | + | | the main commit + | | + | * commit 9395665 + |/ + | intermediate move + | + * commit 436e46f + | + | ... + +while the first commit (436e46f) has a \"`/subdir/foo → ../.git-annex/where_foo_is`\", the intermediate (9395665) has \"`/subdir/deeper/foo → ../.git-annex/where_foo_is`\", and the inal commit (106eef2) has \"`/subdir/deeper/foo → ../../.git-annex/where_foo_is`\". + +`--follow` uses the intermediate commit to find the history, but the intermediate commit would neither show up in `git log --first-parent` nor affect `git diff HEAD^..` & co. (there could still be confusion over `git show`, though). +"""]] diff --git a/doc/bugs/git_rename_detection_on_file_move/comment_2_7101d07400ad5935f880dc00d89bf90e._comment b/doc/bugs/git_rename_detection_on_file_move/comment_2_7101d07400ad5935f880dc00d89bf90e._comment new file mode 100644 index 0000000000..7d50c58d1b --- /dev/null +++ b/doc/bugs/git_rename_detection_on_file_move/comment_2_7101d07400ad5935f880dc00d89bf90e._comment @@ -0,0 +1,27 @@ +[[!comment format=mdwn + username="praet" + ip="81.240.159.215" + subject="Use variable symlinks, relative to the repo's root ?" + date="2011-03-10T16:50:28Z" + content=""" +It all boils down to the fact that the path to a relative symlink's target is determined relative to the symlink itself. + +Now, if we define the symlink's target relative to the git repo's root (eg. using the $GIT_DIR environment variable, which can be a relative or absolute path itself), this unfortunately results in an absolute symlink, which would -for obvious reasons- only be usable locally: + + user@host:~$ mkdir -p tmp/{.git/annex,somefolder} + user@host:~$ export GIT_DIR=~/tmp + user@host:~$ touch $GIT_DIR/.git/annex/realfile + user@host:~$ ln -s $GIT_DIR/.git/annex/realfile $GIT_DIR/somefolder/file + user@host:~$ ls -al $GIT_DIR/somefolder/ + total 12 + drwxr-x--- 2 user group 4096 2011-03-10 16:54 . + drwxr-x--- 4 user group 4096 2011-03-10 16:53 .. + lrwxrwxrwx 1 user group 33 2011-03-10 16:54 file -> /home/user/tmp/.git/annex/realfile + user@host:~$ + +So, what we need is the ability to record the actual variable name (instead of it's value) in our symlinks. + +It *is* possible, using [variable/variant symlinks](http://en.wikipedia.org/wiki/Symbolic_link#Variable_symbolic_links), yet I'm unsure as to whether or not this is available on Linux systems, and even if it is, it would introduce compatibility issues in multi-OS environments. + +Thoughts on this? +"""]] diff --git a/doc/bugs/git_rename_detection_on_file_move/comment_3_57010bcaca42089b451ad8659a1e018e._comment b/doc/bugs/git_rename_detection_on_file_move/comment_3_57010bcaca42089b451ad8659a1e018e._comment new file mode 100644 index 0000000000..534723254a --- /dev/null +++ b/doc/bugs/git_rename_detection_on_file_move/comment_3_57010bcaca42089b451ad8659a1e018e._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-03-16T03:03:19Z" + content=""" +Interesting, I had not heard of variable symlinks before. AFAIK linux does not have them. +"""]] diff --git a/doc/bugs/git_rename_detection_on_file_move/comment_4_79d96599f757757f34d7b784e6c0e81c._comment b/doc/bugs/git_rename_detection_on_file_move/comment_4_79d96599f757757f34d7b784e6c0e81c._comment new file mode 100644 index 0000000000..c265b58995 --- /dev/null +++ b/doc/bugs/git_rename_detection_on_file_move/comment_4_79d96599f757757f34d7b784e6c0e81c._comment @@ -0,0 +1,34 @@ +[[!comment format=mdwn + username="praet" + ip="81.240.27.89" + subject="Brainfart" + date="2011-03-20T20:11:27Z" + content=""" +Haven't given these any serious thought (which will become apparent in a moment) but hoping they will give birth to some less retarded ideas: + +--- + +### Bait'n'switch + +- pre-commit: Replace all staged symlinks (when pointing to annexed files) with plaintext files containing the key of their respective annexed content, re-stage, and add their paths (relative to repo root) to .gitignore. +- post-commit: Replace the plaintext files with (git annex fix'ed) symlinks. + +In doing so, the blobs to be committed can remain unaltered, irrespective of their related files' depth in the directory hierarchy. + +To prevent git from reporting ALL annexed files as unstaged changes after running post-commit hook, their paths would need to be added to .gitignore. + +This wouldn't cause any issues when adding files, very little when modifying files (would need some alterations to \"git annex unlock\"), BUT would make git totally oblivious to removals... + +--- + +### Manifest-based (re)population +- Keep a manifest of all annexed files (key + relative path) +- DON'T track the symlinks (.gitignore) +- Populate/update the directory structure using a post-commit hook. + +... thus circumventing the issue entirely, yet diffstats (et al.) would be rather uninformative. + +--- + +***Wide open to suggestions, criticism, mocking laughter and finger-pointing :)*** +"""]] diff --git a/doc/bugs/git_rename_detection_on_file_move/comment_5_d61f5693d947b9736b29fca1dbc7ad76._comment b/doc/bugs/git_rename_detection_on_file_move/comment_5_d61f5693d947b9736b29fca1dbc7ad76._comment new file mode 100644 index 0000000000..93db97e704 --- /dev/null +++ b/doc/bugs/git_rename_detection_on_file_move/comment_5_d61f5693d947b9736b29fca1dbc7ad76._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="praet" + ip="81.242.56.203" + subject="comment 5" + date="2011-03-21T19:58:34Z" + content=""" +In the meantime, would it be acceptable to split the pre-commit hook +into two discrete parts? + +This would allow to (if preferred) defer \"git annex fix\" until +post-commit while still keeping the safety net for unlocked files. +"""]] diff --git a/doc/bugs/git_rename_detection_on_file_move/comment_6_f63de6fe2f7189c8c2908cc41c4bc963._comment b/doc/bugs/git_rename_detection_on_file_move/comment_6_f63de6fe2f7189c8c2908cc41c4bc963._comment new file mode 100644 index 0000000000..7398ac5614 --- /dev/null +++ b/doc/bugs/git_rename_detection_on_file_move/comment_6_f63de6fe2f7189c8c2908cc41c4bc963._comment @@ -0,0 +1,19 @@ +[[!comment format=mdwn + username="http://adamspiers.myopenid.com/" + nickname="Adam" + subject="extra level of indirection" + date="2011-12-19T12:45:18Z" + content=""" +Surely this could be handled with an extra layer of indirection? + +git-annex would ensure that every directory containing annexed data contains a new symlink `.git-annex` which points to `$git_root/.git/annex`. Then every symlink to an annexed object uses a relative symlink via this: `.git_annex/objects/xx/yy/ZZZZZZZZZZ`. Even though this symlink is relative, moving it to a different directory would not break anything: if the move destination directory already contained other annexed data, it would also already contain `.git-annex` so git-annex wouldn't need to do anything. And if it didn't, git-annex would simply create a new `.git-annex` symlink there. + +These `.git-annex` symlinks could either be added to `.gitignore`, or manually/automatically checked in to the current branch - I'm not sure which would be best. There's also the option of using multiple levels of indirection: + + foo/bar/baz/.git-annex -> ../.git-annex + foo/bar/.git-annex -> ../.git-annex + foo/.git-annex -> ../.git-annex + .git-annex -> .git/annex + +I'm not sure whether this would bring any advantages. It might bring a performance hit due to the kernel having to traverse more symlinks, but without benchmarking it's difficult to say how much. I'd expect it only to be an issue with a large number of deep directory trees. +"""]] diff --git a/doc/bugs/git_rename_detection_on_file_move/comment_7_7f20d0b2f6ed1c34021a135438037306._comment b/doc/bugs/git_rename_detection_on_file_move/comment_7_7f20d0b2f6ed1c34021a135438037306._comment new file mode 100644 index 0000000000..0a045feb63 --- /dev/null +++ b/doc/bugs/git_rename_detection_on_file_move/comment_7_7f20d0b2f6ed1c34021a135438037306._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 7" + date="2011-12-19T18:22:25Z" + content=""" +That seems an excellent idea, also eliminating the need for git annex fix after moving. + +However, I think CVS and svn have taught us the pain associated with a version control system putting something in every subdirectory. Would this pain be worth avoiding the minor pain of needing git annex fix and sometimes being unable to follow renames? +"""]] diff --git a/doc/bugs/git_rename_detection_on_file_move/comment_8_6a00500b24ba53248c78e1ffc8d1a591._comment b/doc/bugs/git_rename_detection_on_file_move/comment_8_6a00500b24ba53248c78e1ffc8d1a591._comment new file mode 100644 index 0000000000..d53022302d --- /dev/null +++ b/doc/bugs/git_rename_detection_on_file_move/comment_8_6a00500b24ba53248c78e1ffc8d1a591._comment @@ -0,0 +1,21 @@ +[[!comment format=mdwn + username="http://adamspiers.myopenid.com/" + nickname="Adam" + subject="comment 8" + date="2011-12-20T12:00:11Z" + content=""" +Personally I'd rather have working rename detection but I agree it's not 100% ideal to be littering multiple directories like this, so perhaps you could make it optional, e.g. based on a git config setting? + +Here are a few more considerations, some in defence of the approach, some against it: + +* `.git-annex` is hidden; `CVS/` is not. +* Unlike `CVS/` and `.svn/`, it's only a symlink, not a directory containing other files. +* It doesn't contain any data specific to that directory and could easily be regenerated if deleted accidentally or otherwise. +* If a whole directory containing `.git-annex` was moved within the repository: + * git-annex would need to fix up these symlinks if and only if it's moved to a different depth within the tree. + * However, if the multi-level indirection approach is used, `.git-annex` in any subdirectory is *always* a symlink to `../.git-annex` so instead you would need to check that all of the new ancestors contain this symlink too, and optionally remove any no longer needed symlinks. + * In either case, git-annex already goes to the trouble of fixing symlinks, and if anything, I *think* this approach would reduce the number of symlinks which need checking (right?) +* find `$git_root/foo -follow`, `diff -r` etc. would traverse into `$git_root/.git/annex` + +This last point is the only downside to this approach I can think of which gives me any noticeable cause for concern. However, people are already use to working around this from CVS and svn days, e.g. `diff -r -x .svn` so I don't think it's anywhere near bad enough to rule it out. +"""]] diff --git a/doc/bugs/git_rename_detection_on_file_move/comment_9_75e0973f6d573df615e01005ebcea87d._comment b/doc/bugs/git_rename_detection_on_file_move/comment_9_75e0973f6d573df615e01005ebcea87d._comment new file mode 100644 index 0000000000..919455bdcc --- /dev/null +++ b/doc/bugs/git_rename_detection_on_file_move/comment_9_75e0973f6d573df615e01005ebcea87d._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 9" + date="2011-12-20T14:56:12Z" + content=""" +Git can follow the rename fine if the file is committed before `git annex fix` (you can git commit -n to see this), so +making git-annex pre-commit generate a fixup commit before the staged commit would be one way. Or the other two ways I originally mentioned when writing down this minor issue. I like all those approaches better than .git-annex clutter. +"""]] diff --git a/doc/bugs/interrupting_migration_causes_problems.mdwn b/doc/bugs/interrupting_migration_causes_problems.mdwn new file mode 100644 index 0000000000..68426e54af --- /dev/null +++ b/doc/bugs/interrupting_migration_causes_problems.mdwn @@ -0,0 +1,52 @@ +Killing a migration from WORM to SHA256 with ^C breaks things; future attempts to do the migration fail: + + #!/bin/bash + + BASE=/tmp/migrate-bug + + set -x + + chmod -R +w $BASE + rm -rf $BASE + mkdir -p $BASE + cd $BASE + + # create annex + git init . + git annex init + + # make a big (sparse) file and add it + dd if=/dev/zero of=bigfile bs=1 count=0 seek=1G + git annex add --backend WORM bigfile + git commit -m 'added bigfile' + + # look at status + git annex status + + # now migrate it, but kill migration during checksum + # Simulate ^C by making a new process group and sending SIGINT + setsid git annex migrate --backend SHA256 bigfile & + PID=$! + sleep 1 + kill -INT -$PID + wait + + # look at status + git annex status + + # this migration fails + git annex migrate --backend SHA256 bigfile + + # but fsck says everything's OK + git annex fsck + +The error: + + migrate bigfile + git-annex: /tmp/migrate-bug/.git/annex/objects/K9/V1/WORM-s1073741824-m1321566308--bigfile/WORM-s1073741824-m1321566308--bigfile: createLink: already exists (File exists) + failed + git-annex: migrate: 1 failed + +> Fixed it to delete the stale temp file. [[done]] +> +> Thanks for making such clear test cases, Jim! --[[Joey]] diff --git a/doc/bugs/making_annex-merge_try_a_fast-forward.mdwn b/doc/bugs/making_annex-merge_try_a_fast-forward.mdwn new file mode 100644 index 0000000000..41a5a2a581 --- /dev/null +++ b/doc/bugs/making_annex-merge_try_a_fast-forward.mdwn @@ -0,0 +1,35 @@ +While merging the git-annex branch, annex-merge does not end up in a fast-forward even when it would be possible. +But as sometimes annex-merge takes time, it would probably be worth it +(but maybe I miss something with my workflow...). + +> I don't think a fast-forward will make things much faster. +> +> git-annex needs its index file to be updated to reflect the merge. +> With the union merge it does now, this can be accomplished by using +> `git-diff-index` to efficiently get a list of files that have changed, +> and only merge those changes into the index with `git-update-index`. +> Then the index gets committed, generating the merge. +> +> To fast-forward, it would just reset the git-annex branch to the new +> head of the remote it's merging to. But then the index needs to be +> updated to reflect this new head too. To do that needs the same method +> described above, essentially (with the difference that it can replace +> files in the index with the version from the git-annex branch, rather +> than merging in the changes... but only if the index is known to be +> already committed and have no other changes, which would require both +> an attempt to commit it first, and +> locking). +> +> So will take basically the same amount of time, except +> it would not need to commit the index at the end of the merge. The +> most expensive work is the `git-diff-index` and `git-update-index`, +> which are not avoided. +> +> Although, perhaps fast-forward merge would use slightly +> less space. --[[Joey]] + +>> To avoid the ladder-merge between two repositories described at +>> , seems a fast-forward should be detected and +>> written to git, even if the index is still updated the current way. +>> [[done]] +>> --[[Joey]] diff --git a/doc/bugs/minor_bug:_errors_are_not_verbose_enough.mdwn b/doc/bugs/minor_bug:_errors_are_not_verbose_enough.mdwn new file mode 100644 index 0000000000..a6620f4255 --- /dev/null +++ b/doc/bugs/minor_bug:_errors_are_not_verbose_enough.mdwn @@ -0,0 +1,26 @@ +Current: + + % git annex status + git-annex: unknown command + +Better: + + % git annex status + git-annex: status: unknown command + +Current: + + % git annex fsck + [...] + git-annex: 18 failed + +Better: + + % git annex fsck + [...] + git-annex: fsck: 18 failed + + +etc pp. + +> [[done]] --[[Joey]] diff --git a/doc/bugs/not_possible_to_have_annex_on_a_separate_filesystem.mdwn b/doc/bugs/not_possible_to_have_annex_on_a_separate_filesystem.mdwn new file mode 100644 index 0000000000..7daf03284b --- /dev/null +++ b/doc/bugs/not_possible_to_have_annex_on_a_separate_filesystem.mdwn @@ -0,0 +1,32 @@ +I belive I have found a regression. + +Inspired by + +I tried to only have .git/annex/objects (also tested moving .git/annex) on NFS while having the rest on local SSD disk. + +But when trying to add files i get: + + > git annex add testfile + add testfile (checksum...) + git-annex: testfile: rename: unsupported operation (Invalid cross-device link) + failed + git-annex: add: 1 failed + +I have tried both using bind-mount and with a sym-link. + +> Grepping for `renameFile` and `createLink` will find all the places +> in git-annex that assume one filesystem. These would have to be changed +> to catch errors and fall back to expensive copying. +> +> Putting a separate repository on the file server could work better +> depending on what you're trying to do. --[[Joey]] + +>> I've added support for putting `.git/annex` on a separate filesystem +>> from the rest of the git repository. +>> +>> Putting individual subdirectories like `.git/annex/objects` on separate +>> filesystems from other subdirectories is not fully supported; it may +>> work but it may be slow and a few things (like `git annex migrate`) are +>> known to fail due to using hard links. I don't think this is worth +>> supporting. [[done]] +>> --[[Joey]] diff --git a/doc/bugs/old_data_isn__39__t_unused_after_migration.mdwn b/doc/bugs/old_data_isn__39__t_unused_after_migration.mdwn new file mode 100644 index 0000000000..9d468bdc7d --- /dev/null +++ b/doc/bugs/old_data_isn__39__t_unused_after_migration.mdwn @@ -0,0 +1,66 @@ +Old data isn't listed as unused after migrating backends: + + #!/bin/bash + + BASE=/tmp/migrate-bug-2 + set -x + chmod -R +w $BASE + rm -rf $BASE + mkdir -p $BASE + cd $BASE + + # create annex + git init . + git annex init + + # make a big (sparse) file and add it + dd if=/dev/zero of=bigfile bs=1 count=0 seek=1G + git annex add --backend WORM bigfile + git commit -m 'added bigfile' + + # migrate it + git annex migrate --backend SHA256 bigfile + + # status shows 2 keys taking up 2G + git annex status + + # but nothing is unused + git annex unused + +Output: + + ++ git annex status + supported backends: SHA256 SHA1 SHA512 SHA224 SHA384 SHA256E SHA1E SHA512E SHA224E SHA384E WORM URL + supported remote types: git S3 bup directory rsync web hook + known repositories: + ede95a82-1166-11e1-a475-475d55eb0f8f -- here + local annex keys: 2 + local annex size: 2 gigabytes + visible annex keys: 1 + visible annex size: 1 gigabyte + backend usage: + WORM: 1 + SHA256: 1 + ++ git annex unused + unused . (checking for unused data...) (checking master...) ok + +The two files are hardlinked, so it's not taking up extra space, but it would be nice to be able to remove the old keys. + +> `git annex unused` checks the content of all branches, and assumes that, +> when a branch contains a file that points to a key, that key is still +> used. In this case, the migration has staged a change to the file, +> but it is not yet committed, so when it checks the master branch, it +> still finds a file referring to the old key. +> +> So, slightly surprising, but not a bug. --[[Joey]] [[done]] + +>> Thanks for the explanation. In my real repository, it was a bit trickier: +>> the migration was commited to `master`, but other *remote* branches still +>> referenced those keys. I was just doing a `git pull` from a central repo, but +>> needed a `git remote update` to remove those references from `remotes/foo/master` too. +>> --Jim + +>>> I have considered making unused ignore remote tracking branches. +>>> On the one hand, it can be a little bit confusing, and those branches +>>> can be out of date. On the other hand, it can be useful to know you're +>>> not dropping anything that some remote might still refer to. --[[Joey]] diff --git a/doc/bugs/on--git-dir_and_--work-tree_options.mdwn b/doc/bugs/on--git-dir_and_--work-tree_options.mdwn new file mode 100644 index 0000000000..d76a42bfff --- /dev/null +++ b/doc/bugs/on--git-dir_and_--work-tree_options.mdwn @@ -0,0 +1,29 @@ +git-annex does not take into account the --git-dir and --work-tree command line options (while they can be useful when scripting). + + > mkdir /tmp/test + > cd /tmp/test + > git init + Initialized empty Git repository in /tmp/test/.git/ + > git annex init test + init test ok + > touch foo + > cd + > git --git-dir=/tmp/test/.git --work-tree=/tmp/test annex add foo + git-annex: Not in a git repository. + +regular git add works: + + > git --git-dir=/tmp/test/.git --work-tree=/tmp/test add foo + > git --git-dir=/tmp/test/.git --work-tree=/tmp/test status + # On branch master + # + # Initial commit + # + # Changes to be committed: + # (use "git rm --cached ..." to unstage) + # + # new file: foo + # + +git-annex version: 3.20110702 + diff --git a/doc/bugs/ordering.mdwn b/doc/bugs/ordering.mdwn new file mode 100644 index 0000000000..536bfce36a --- /dev/null +++ b/doc/bugs/ordering.mdwn @@ -0,0 +1,12 @@ +One would expect "git annex get foo bar" to first retrieve foo, and then +bar. Actually though, it will operate on them in alphabetical order +(probably). This is annoying when you wanted to 1st list the most important +files to get. Maybe you'll run out of time before all can be gotten. The +workaround of course is to run "git annex get" twice. + +This ordering comes from "git ls-files". git-annex passes it all the files +the user specified. This is a useful optimisation -- earlier it would +run "git ls-files" once per parameter, and so "git annex get *" could be +rather slow. But, it produces this ordering problem. + +[[done]] diff --git a/doc/bugs/problem_commit_normal_links.mdwn b/doc/bugs/problem_commit_normal_links.mdwn new file mode 100644 index 0000000000..6dbd41fb4e --- /dev/null +++ b/doc/bugs/problem_commit_normal_links.mdwn @@ -0,0 +1,59 @@ +Dear All, + +thank you for this wonderful tool! + +I am having an issue when I try to commit a normal link + +diokletian*194-> mkdir test + +diokletian*195-> cd test + +diokletian*196-> git init + +Initialized empty Git repository in /home/henrus/test/.git/ + +diokletian*197-> git annex init new + +init new [master (root-commit) 49f5f91] git-annex setup + + 1 files changed, 1 insertions(+), 0 deletions(-) + + create mode 100644 .gitattributes + +[master 76496ff] git annex init + + 1 files changed, 1 insertions(+), 0 deletions(-) + + create mode 100644 .git-annex/uuid.log + +ok + +diokletian*198-> mkdir subdir + +diokletian*199-> ln -s subdir link + +diokletian*200-> git add link + +diokletian*201-> git commit -m "ok" + +[master f12f62d] ok + + 1 files changed, 1 insertions(+), 0 deletions(-) + + create mode 120000 link + +diokletian*202-> ln -s subdir/ link2 + +diokletian*203-> git add link2 + +diokletian*204-> git commit -m "not ok" + +git-annex: Prelude.head: empty list + +The trailing slash seems to make a difference! + +Best Regards, + +Henrik + +> Thanks for the bug report. This is fixed in 0.17. --[[Joey]] [[!tag done]] diff --git a/doc/bugs/problem_with_upgrade_v2_-__62___v3.mdwn b/doc/bugs/problem_with_upgrade_v2_-__62___v3.mdwn new file mode 100644 index 0000000000..7f37668ad1 --- /dev/null +++ b/doc/bugs/problem_with_upgrade_v2_-__62___v3.mdwn @@ -0,0 +1,3 @@ +On several of my repos, the upgrade to v3 seemed to take forever. A Crl-C followed by another "git annex upgrade" "solved" the problem in some cases. Sometimes, I had to also delete the .git/annex/journal dir to have the upgrade. I didn't notice anything special about the non-working repos to help diagnose the problem. + +[[!tag done]] diff --git a/doc/bugs/problem_with_upgrade_v2_-__62___v3/comment_1_5f60006c9bb095167d817f234a14d20b._comment b/doc/bugs/problem_with_upgrade_v2_-__62___v3/comment_1_5f60006c9bb095167d817f234a14d20b._comment new file mode 100644 index 0000000000..0cf0ad4618 --- /dev/null +++ b/doc/bugs/problem_with_upgrade_v2_-__62___v3/comment_1_5f60006c9bb095167d817f234a14d20b._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-07-04T22:58:46Z" + content=""" +Well if it happens again why don't you use `ps` or `strace` to see what it's doing. +"""]] diff --git a/doc/bugs/problem_with_upgrade_v2_-__62___v3/comment_2_cd0123392b16d89db41b45464165c247._comment b/doc/bugs/problem_with_upgrade_v2_-__62___v3/comment_2_cd0123392b16d89db41b45464165c247._comment new file mode 100644 index 0000000000..4bef5f6454 --- /dev/null +++ b/doc/bugs/problem_with_upgrade_v2_-__62___v3/comment_2_cd0123392b16d89db41b45464165c247._comment @@ -0,0 +1,23 @@ +[[!comment format=mdwn + username="https://lithitux.org/openidserver/users/pavel" + nickname="pavel" + subject=""Me too"" + date="2011-07-05T15:54:19Z" + content=""" +I've also seen this apparent hang during upgrade to v3. A few more details: + +The annex in question has just under 18k files (and hence that many log files), which can slow down directory operations when they're all in the same place (like, for example, .git/annex/journal). + +git-annex uses virtually no CPU time and disk IO when it's hanging like this; the first time it happened, 'ps' showed three defunct git processes, with two \"git-annex\" processes and three \"git\" procs: + + * git --git-dir=/mnt/annex/.git --work-tree=/mnt/annex cat-file --batch + * git --git-dir=/mnt/annex/.git --work-tree=/mnt/annex hash-object -w --stdin-paths + * git --git-dir=/mnt/annex/.git --work-tree=/mnt/annex update-index -z --index-info + +I Ctrl+C'd that and tried again, but it hung again -- this time without the defunct gits. + +An strace of the process and its children at the time of hang can be found at http://pastebin.com/4kNh4zEJ . It showed somewhat weird behaviour: When I attached with strace, it would scroll through a whole bunch of syscalls making up the open-fstat-read-close-write loop on .git/annex/journal files, but then would block on a write (sorry, don't have that in my scrollback any more so can't give more details) until I Ctrl+C'd strace; when attaching again, it would again scroll through the syscalls for a second or so and then hang with no output. + +Ultimately I detached/reattached with strace about two dozen times and that caused it (?) to finish the upgrade; not really sure how to explain it, but it seems like too much of a timing coincidence. + +"""]] diff --git a/doc/bugs/problem_with_upgrade_v2_-__62___v3/comment_3_86d9e7244ae492bcbe62720b8c4fc4a9._comment b/doc/bugs/problem_with_upgrade_v2_-__62___v3/comment_3_86d9e7244ae492bcbe62720b8c4fc4a9._comment new file mode 100644 index 0000000000..e314e73fa0 --- /dev/null +++ b/doc/bugs/problem_with_upgrade_v2_-__62___v3/comment_3_86d9e7244ae492bcbe62720b8c4fc4a9._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-07-05T17:31:22Z" + content=""" +I've seen this kind of piping stall that is unblocked by strace before. It can vary with versions of GHC, so it would be good to know what version built git-annex (and on what OS version). I filed a bug report upstream before at . + +I really need a full strace -f from the top, or at least a complete `strace -o log` of git-annex from one hang through to another hang. The strace you pastebinned does not seem complete. If I can work out which specific git command is being written to when it hangs I can lift the writing out into a separate thread or process to fix it. + +@pavel, you mentioned three defunct git processes, and then showed ps output for 3 git processes. Were there 6 git processes in total? And then when you ran it again you said there were no defunct gits -- where the other 3 git processes running once again? + +As best I can make out from the (apparently) running git processes, it seems like the journal files for the upgrade had all been written, and the hang occurred when staging them all into the index in preparation for a commit. I have committed a change that lifts the code that does that write out into a new process, which, if I am guessing right on the limited info I have, will avoid the hang. + +However, since I can't reproduce it, even when I put 200 thousand files in the journal and have git-annex process them, I can't be sure. +"""]] diff --git a/doc/bugs/problem_with_upgrade_v2_-__62___v3/comment_4_91439d4dbbf1461e281b276eb0003691._comment b/doc/bugs/problem_with_upgrade_v2_-__62___v3/comment_4_91439d4dbbf1461e281b276eb0003691._comment new file mode 100644 index 0000000000..7bc32c259f --- /dev/null +++ b/doc/bugs/problem_with_upgrade_v2_-__62___v3/comment_4_91439d4dbbf1461e281b276eb0003691._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 4" + date="2011-07-05T18:37:21Z" + content=""" +I've managed to reproduce this and confirmed my fix works. +"""]] diff --git a/doc/bugs/problem_with_upgrade_v2_-__62___v3/comment_5_ca33a9ca0df33f7c1b58353d7ffb943d._comment b/doc/bugs/problem_with_upgrade_v2_-__62___v3/comment_5_ca33a9ca0df33f7c1b58353d7ffb943d._comment new file mode 100644 index 0000000000..8649dc77a8 --- /dev/null +++ b/doc/bugs/problem_with_upgrade_v2_-__62___v3/comment_5_ca33a9ca0df33f7c1b58353d7ffb943d._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 5" + date="2011-07-05T19:06:48Z" + content=""" +By the way, the original bug reporter mentioned deleting .git/annex/journal. This is not recommended, and doing it during an upgrade can result in git-annex losing location tracking information. You should probably run `git annex fsck` or reset to the old git tree (and `git config annex.version 2`) and upgrade again. +"""]] diff --git a/doc/bugs/problem_with_upgrade_v2_-__62___v3/comment_6_f360f0006bc9115bc5a3e2eb9fe58abd._comment b/doc/bugs/problem_with_upgrade_v2_-__62___v3/comment_6_f360f0006bc9115bc5a3e2eb9fe58abd._comment new file mode 100644 index 0000000000..0852db0795 --- /dev/null +++ b/doc/bugs/problem_with_upgrade_v2_-__62___v3/comment_6_f360f0006bc9115bc5a3e2eb9fe58abd._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://lithitux.org/openidserver/users/pavel" + nickname="pavel" + subject="comment 6" + date="2011-07-06T08:14:26Z" + content=""" +Ah, great, thanks very much for the quick fix! + +Yes, when I mentioned three defunct git processes, there were three processes shown as \"git [defunct]\", plus the three git processes I listed, plus two \"git-annex\" processes. Upon cancel/resume, there were no defunct git processes when I checked, but by the time I found the bug report on the forum and commented I'd already successfully upgraded by annex (by repeatedly attaching strace) and couldn't really easily get at either additional 'ps' info or a fuller strace than what I posted (that was just the log from one of the attach/detach cycles), so it's a relief you managed to pinpoint the problem. +"""]] diff --git a/doc/bugs/problems_with_utf8_names.mdwn b/doc/bugs/problems_with_utf8_names.mdwn new file mode 100644 index 0000000000..d6dc6ca3c3 --- /dev/null +++ b/doc/bugs/problems_with_utf8_names.mdwn @@ -0,0 +1,104 @@ +There are problems with displaying filenames in UTF8 encoding, as shown here: + + $ echo $LANG + en_GB.UTF-8 + $ git init + $ git annex init test + [...] + $ touch "Umlaut Ü.txt" + $ git annex add Uml* + add Umlaut Ã.txt ok + (Recording state in git...) + $ find -name U\* | hexdump -C + 00000000 2e 2f 55 6d 6c 61 75 74 20 c3 9c 2e 74 78 74 0a |./Umlaut ...txt.| + 00000010 + $ git annex find | hexdump -C + 00000000 55 6d 6c 61 75 74 20 c3 83 c2 9c 2e 74 78 74 0a |Umlaut .....txt.| + 00000010 + $ + +It looks like the common latin1-to-UTF8 encoding. Functionality other than otuput seems not to be affected. + +> Yes, I believe that git-annex is reading filename data from git +> as a stream of char8s, and not decoding unicode in it into logical +> characters. +> Haskell then I guess, tries to unicode encode it when it's output to +> the console. +> This only seems to matter WRT its output to the console; the data +> does not get mangled internally and so it accesses the right files +> under the hood. +> +> I am too new to haskell to really have a handle on how to handle +> unicode and other encodings issues with it. In general, there are three +> valid approaches: --[[Joey]] +> +> 1. Convert all input data to unicode and be unicode clean end-to-end +> internally. Problimatic here since filenames may not necessarily be +> encoded in utf-8 (an archive could have historical filenames using +> varying encodings), and you don't want which files are accessed to +> depend on locale settings. +> > I tried to do this by making parts of GitRepo call +> > Codec.Binary.UTF8.String.decodeString when reading filenames from +> > git. This seemed to break attempts to operate on the files, +> > weirdly encoded strings were seen in syscalls in strace. +> 1. Keep input and internal data un-decoded, but decode it when +> outputting a filename (assuming the filename is encoded using the +> user's configured encoding), and allow haskell's output encoding to then +> encode it according to the user's locale configuration. +> > This is now [[implemented|done]]. I'm not very happy that I have to watch +> > out for any place that a filename is output and call `filePathToString` +> > on it, but there are really not too many such places in git-annex. +> > +> > Note that this only affects filenames apparently. +> > (Names of files in the annex, and also some places where names +> > of keys are displayed.) Utf-8 in the uuid.map file etc seems +> > to be handled cleanly. +> 1. Avoid encodings entirely. Mostly what I'm doing now; probably +> could find a way to disable encoding of console output. Then the raw +> filename would be displayed, which should work ok. git-annex does +> not really need to pull apart filenames; they are almost entirely +> opaque blobs. I guess that the `--exclude` option is the exception +> to that, but it is currently not unicode safe anyway. (Update: tried +> `--exclude` again, seems it is unicode clean..) +> One other possible +> issue would be that this could cause problems if git-annex were +> translated. +> > On second thought, I switched to this. Any decoding of a filename +> > is going to make someone unhappy; the previous approach broke +> > non-utf8 filenames. + +---- + +Simpler test case: + +
+import Codec.Binary.UTF8.String
+import System.Environment
+
+main = do
+        args <- getArgs
+        let file = decodeString $ head args
+        putStrLn $ "file is: " ++ file
+        putStr =<< readFile file
+
+ +If I pass this a filename like 'ü', it will fail, and notice +the bad encoding of the filename in the error message: + +
+$ echo hi > ü; runghc foo.hs ü
+file is: ü
+foo.hs: �: openFile: does not exist (No such file or directory)
+
+ +On the other hand, if I remove the decodeString, it prints the filename +wrong, while accessing it right: + +
+$ runghc foo.hs ü
+file is: üa
+hi
+
+ +The only way that seems to consistently work is to delay decoding the +filename to places where it's output. But then it's easy to miss some. diff --git a/doc/bugs/problems_with_utf8_names/comment_1_3c7e3f021c2c94277eecf9c8af6cec5f._comment b/doc/bugs/problems_with_utf8_names/comment_1_3c7e3f021c2c94277eecf9c8af6cec5f._comment new file mode 100644 index 0000000000..692b5d5378 --- /dev/null +++ b/doc/bugs/problems_with_utf8_names/comment_1_3c7e3f021c2c94277eecf9c8af6cec5f._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="http://adamspiers.myopenid.com/" + nickname="Adam" + subject="Any update on this?" + date="2011-12-24T01:05:07Z" + content=""" +I just noticed this issue, and was wondering what the current status is. + + % ls -l 04\ -\ Orixás.mp3 + -rw-r--r-- 1 adam users 8377816 Jul 12 2007 04 - Orixás.mp3 + % echo 04\ -\ Orixás.mp3 | od -c + 0000000 0 4 - O r i x 303 241 s . m p 3 + 0000020 \n + 0000021 + % git annex add 04\ -\ Orixás.mp3 + git-annex: /home/adam/music/RotC/transcribe/04 - Orixás.mp3: getSymbolicLinkStatus: does not exist (No such file or directory) +"""]] diff --git a/doc/bugs/problems_with_utf8_names/comment_2_bad4c4c5f54358d1bc0ab2adc713782a._comment b/doc/bugs/problems_with_utf8_names/comment_2_bad4c4c5f54358d1bc0ab2adc713782a._comment new file mode 100644 index 0000000000..a45706e4a3 --- /dev/null +++ b/doc/bugs/problems_with_utf8_names/comment_2_bad4c4c5f54358d1bc0ab2adc713782a._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://www.joachim-breitner.de/" + nickname="nomeata" + subject="comment 2" + date="2011-12-24T12:49:40Z" + content=""" +This (rather longish) thread discusses the current situation, the planned changes for 7.2 and the various issues: http://haskell.org/pipermail/glasgow-haskell-users/2011-November/021115.html + +The summary seems to be: From 7.2 on, getDirectoryContents _will_ return proper Strings, i.e. where a Char represents a Unicode code point, and not a Word8, which will fix the problem of outputting them. +"""]] diff --git a/doc/bugs/problems_with_utf8_names/comment_3_4f936a5d3f9c7df64c8a87e62b7fbfdc._comment b/doc/bugs/problems_with_utf8_names/comment_3_4f936a5d3f9c7df64c8a87e62b7fbfdc._comment new file mode 100644 index 0000000000..9fef2eb1f2 --- /dev/null +++ b/doc/bugs/problems_with_utf8_names/comment_3_4f936a5d3f9c7df64c8a87e62b7fbfdc._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://www.joachim-breitner.de/" + nickname="nomeata" + subject="comment 3" + date="2011-12-24T12:51:43Z" + content=""" +An alternative that is available from ghc 7.4 on is a pure ByteString based unix API: http://thread.gmane.org/gmane.comp.lang.haskell.libraries/16556 +"""]] diff --git a/doc/bugs/problems_with_utf8_names/comment_4_93bee35f5fa7744834994bc7a253a6f9._comment b/doc/bugs/problems_with_utf8_names/comment_4_93bee35f5fa7744834994bc7a253a6f9._comment new file mode 100644 index 0000000000..5e11af6abe --- /dev/null +++ b/doc/bugs/problems_with_utf8_names/comment_4_93bee35f5fa7744834994bc7a253a6f9._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 4" + date="2011-12-24T16:49:13Z" + content=""" +Adam, this bug was fixed a long time ago, first using option #2 above, but later switching to option #3 -- git-annex treats filenames as opaque binary blobs and never decodes them in any encoding; haskell's normal encoding support for stdio is disabled. + +And it never resulted in a failure like you show. I cannot reproduce your problem, but it is a different bug, please open a new bug report. +"""]] diff --git a/doc/bugs/rsync_special_remote_fails_to___96__get__96___files_which_have_names_containing_spaces.mdwn b/doc/bugs/rsync_special_remote_fails_to___96__get__96___files_which_have_names_containing_spaces.mdwn new file mode 100644 index 0000000000..040d86bb87 --- /dev/null +++ b/doc/bugs/rsync_special_remote_fails_to___96__get__96___files_which_have_names_containing_spaces.mdwn @@ -0,0 +1,50 @@ + ~$ mkdir test annex + ~$ cd test + ~$ git init + Initialized empty Git repository in /home/user/test/.git/ + ~$ git annex init test + init test ok + ~$ git annex initremote localrsync encryption=none type=rsync rsyncurl=localhost:annex/ + initremote localrsync ok + ~$ cp /home/user/Music/Charming\ Hostess/Eat/03\ Mi\ Nuera.ogg ./ + ~$ git annex add 03\ Mi\ Nuera.ogg + add 03 Mi Nuera.ogg ok + (Recording state in git...) + ~$ git commit -m "add ogg" + fatal: No HEAD commit to compare with (yet) + fatal: No HEAD commit to compare with (yet) + [master (root-commit) 12608af] add ogg + 1 files changed, 1 insertions(+), 0 deletions(-) + create mode 120000 03 Mi Nuera.ogg + ~$ git annex move 03\ Mi\ Nuera.ogg --to localrsync + move 03 Mi Nuera.ogg (checking localrsync...) (to localrsync...) + sending incremental file list + 1X/ + 1X/39/ + 1X/39/WORM-s6296772-m1311874383--03 Mi Nuera.ogg/ + 1X/39/WORM-s6296772-m1311874383--03 Mi Nuera.ogg/WORM-s6296772-m1311874383--03 Mi Nuera.ogg + 6296772 100% 42.98MB/s 0:00:00 (xfer#1, to-check=0/5) + + sent 6297754 bytes received 43 bytes 4198531.33 bytes/sec + total size is 6296772 speedup is 1.00 + ok + ~$ git annex get 03\ Mi\ Nuera.ogg + get 03 Mi Nuera.ogg (from localrsync...) + rsync: link_stat "/home/user/annex/1X/39/WORM-s6296772-m1311874383--03" failed: No such file or directory (2) + rsync: link_stat "/home/user/Mi" failed: No such file or directory (2) + rsync: change_dir "/home/user/Nuera.ogg" failed: No such file or directory (2) + rsync: link_stat "/home/user/Mi" failed: No such file or directory (2) + rsync: link_stat "/home/user/Nuera.ogg" failed: No such file or directory (2) + + sent 8 bytes received 12 bytes 13.33 bytes/sec + total size is 0 speedup is 0.00 + rsync error: some files/attrs were not transferred (see previous errors) (code 23) at main.c(1526) [Receiver=3.0.7] + + rsync failed -- run git annex again to resume file transfer + Unable to access these remotes: localrsync + Try making some of these repositories available: + b8b1ea7a-b93f-11e0-b712-d7bffb6e61e6 -- localrsync + failed + git-annex: 1 failed + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/scp_interrupt_to_background.mdwn b/doc/bugs/scp_interrupt_to_background.mdwn new file mode 100644 index 0000000000..381f5cd736 --- /dev/null +++ b/doc/bugs/scp_interrupt_to_background.mdwn @@ -0,0 +1,2 @@ +When getting a file with scp, SIGINT is blocked, exposing the git +subcommand fork to background bug again. [[done]] diff --git a/doc/bugs/softlink_mtime.mdwn b/doc/bugs/softlink_mtime.mdwn new file mode 100644 index 0000000000..1427fc7147 --- /dev/null +++ b/doc/bugs/softlink_mtime.mdwn @@ -0,0 +1,54 @@ +When adding files to git annex, softlinks are created with current atime (and ctime, etc). Instead, the atime of the added file should be used and added to the meta-data, restoring it everywhere an annex is cloned to. -- RichiH + +Optionally, editing the meta-data should change the times in all annexes. + +> Thing is, git does not preserve file timestamps much at all. +> It's not uncommon for a `git checkout` to or `git update` to +> mess up timestamps. This is why things like metastore exist (and +> metastore should work ok with git annexed files too). Trying to +> make annexed file symlinks have better timestamp handling than regular +> files in git seems pointless. --[[Joey]] + +> > Improving an area where git is (not yet?) good at still makes sense, imo. Photos and the like need absolute timestamps more than source code which is fine with relative timestamps (local builds & updates). Maintaining global timestamps for source code could even cause a lot of unwanted effects. As it is, this issue is the only, but a major, blocker for me before I can start adapting git-annex. As I have three different use cases for it, this is a shame. Unfortunately, I don't speak any Haskell so scratching my own itch isn't do-able (without major effort and not soon, at least). Is there a realistic chance that you will tackle this nonetheless or is this WONTFIX? -- RichiH + +>>> Not quite WONTFIX. git-annex should at least, when adding new files, +>>> preserve their timestamp in the symlink it creates. +>>> +>>> Since it doesn't have anything to do with maintaining the symlinks +>>> during an update, or a clone, etc, maintaining the permissions of them +>>> is also out of scope, and it's best to just use metastore if you need +>>> it. Otherwise, git-annex would have to reimplement metastore, and is +>>> unlikely to do it better. + +>>>> OK, thanks for the clarification. Would it be acceptable for you to put the timestamps into the metastore with vanilla git? If such an option existed, everyone would be able to benefit and not just me. -- RichiH + +>>>>> I've now committed to git changes to make git-annex add make +>>>>> symlinks that reflect the original file's mtime. (It's not possible +>>>>> to set the ctime of a symlink; nor would you want to as messing with +>>>>> ctimes can break backup software ... and atime doesn't much matter.) +>>>>> +>>>>> So all you have to do is make the pre-commit hook call +>>>>> [metastore](http://david.hardeman.nu/software.php). The hook +>>>>> would look like this: ---[[Joey]] + + #!/bin/sh + git annex pre-commit . + metastore --save + git add .metadata + +>>>>>> Thanks a lot. Doing this in a new git-annex repo from the start should at least ensure local consistency and I assume I can simply add a post-pull hook to restore the mtimes on all all other repositories? -- RichiH + +>>>>>>> This is even better: + + #!/bin/sh + if ! type metastore >/dev/null; then echo "$0: metastore is not installed; exiting"; exit 1; fi + git annex pre-commit . + metastore --save + git add .metadata + +>>>>>>> -- RichiH + +>>>>>>>> After getting to actually play with this from different machines with a bare git as central instance for several distributed repos, the metastore trick does not work. The .metadata is causing merge conflicts for every pull. I removed the "done" tag from this issue. -- RichiH + +>>>>>>>>> softbox sounds _really_ nice. File systems need to preserve mtimes. Oviously, it would be nice if git-annex exposed this to the upper layer instead of relying on this FUSE implementation, or the next, or the other totally cool thing around the corner to implement it again and again. +>>>>>>>>> I talked to the author of metastore; he is aware that the format is merge-unfriendly but never needed merges for himself. He is aware that this is not ideal for something like git. He does not have the time to implement a text storage instead of binary and I lack the skills to do it. If metastore is used, all it would need to do is introduce a new version of the store (it's versioned, apparently) and save metadata in text, one file per line. xattr would need to be ASCII-armoured, the rest could be plain text. I still think storing this directly in git-annex would make the most sense. Introducing a metadata storage file per storage object in .git/annex and using the object file's name as index is impossible because several softlinks might point to one object so it would need to be done per-softlink :/ -- RichiH diff --git a/doc/bugs/support_bare_git_repo__44___with_the_annex_directory_exposed_to_http.mdwn b/doc/bugs/support_bare_git_repo__44___with_the_annex_directory_exposed_to_http.mdwn new file mode 100644 index 0000000000..ba7dcad300 --- /dev/null +++ b/doc/bugs/support_bare_git_repo__44___with_the_annex_directory_exposed_to_http.mdwn @@ -0,0 +1,20 @@ +Let's say that http://people.collabora.com/~alsuren/git/fate-suite.git/ is a bare git repo. It has been 'git update-server-info'd so that it can be served on a dumb http server. + +The repo is also a git annex remote, created using the following commands: + +* git remote add alsuren git+ssh://people.collabora.co.uk/user/alsuren/public_html/fate-suite.git +* git push alsuren --all +* git annex copy --to=alsuren + +so http://people.collabora.com/~alsuren/git/fate-suite.git/annex is a valid git annex (though listing dirs is forbidden, so you need to know the filenames ahead of time). + +I would like to be able to use the following commands to get a clone of the repo: + +* git clone http://people.collabora.com/~alsuren/git/fate-suite.git/ +* cd fate-suite +* git annex get + +This would allow contributors to quickly get a copy of our upstream repo and start contributing with minimal bandwidth/effort. + +> This is now supported.. I look forward to seeing your project using it! +> --[[Joey]] [[!tag done]] diff --git a/doc/bugs/test_suite_shouldn__39__t_fail_silently.mdwn b/doc/bugs/test_suite_shouldn__39__t_fail_silently.mdwn new file mode 100644 index 0000000000..2f486ad652 --- /dev/null +++ b/doc/bugs/test_suite_shouldn__39__t_fail_silently.mdwn @@ -0,0 +1,3 @@ +When the test suite cannot be compiled, the build just fails silenty. This means that in automated builds there is no easy way to ensure that the generated binaries have passed the test suite, because it may not even have been run! IMHO, "make test" should fail (i.e. return a non-zero exit code) when it can't succeeed. + +> Ok, fixed. --[[Joey]] [[done]] diff --git a/doc/bugs/tests_fail_when_there_is_no_global_.gitconfig_for_the_user.mdwn b/doc/bugs/tests_fail_when_there_is_no_global_.gitconfig_for_the_user.mdwn new file mode 100644 index 0000000000..b90b501e31 --- /dev/null +++ b/doc/bugs/tests_fail_when_there_is_no_global_.gitconfig_for_the_user.mdwn @@ -0,0 +1,50 @@ +Make test fails when git doesn't know what identity to give to commits + +
+
+Testing 1:blackbox:0:git-annex init
+Cases: 30  Tried: 7  Errors: 0  Failures: 0
+*** Please tell me who you are.
+
+Run
+
+  git config --global user.email "you@example.com"
+  git config --global user.name "Your Name"
+
+to set your account's default identity.
+Omit --global to set the identity only in this repository.
+
+fatal: empty ident   not allowed
+### Failure in: 1:blackbox:0:git-annex init
+init failed
+Testing 1:blackbox:1:git-annex add:0
+Cases: 30  Tried: 8  Errors: 0  Failures: 1
+*** Please tell me who you are.
+
+ +I guess most users testing git-annex probably have a .gitconfig sitting in their home directories already so the above never cropped up. This failure was initially found in a clean and fresh install of a virtual machine with archlinux and repeated again on my archlinux laptop. + +Update: I pulled the master on my rhel5 test machine and moved my .gitconfig out of the way, the tests passes and continues but I still get a "warning message" from git. + +
+Testing 1:blackbox:3:git-annex unannex:1:with content                         
+Cases: 30  Tried: 12  Errors: 0  Failures: 0[master fce0cde] content removed from git annex
+ Committer: Jimmy Tang 
+Your name and email address were configured automatically based
+on your username and hostname. Please check that they are accurate.
+You can suppress this message by setting them explicitly:
+
+    git config --global user.name "Your Name"
+    git config --global user.email you@example.com
+
+After doing this, you may fix the identity used for this commit with:
+
+    git commit --amend --reset-author
+
+ 2 files changed, 1 insertions(+), 2 deletions(-)
+ delete mode 120000 foo
+
+ +I guess it also depends a bit on how git figures out who it is is committing and how the machine in question is configured with hostnames and domain names. + +> Fixed that. [[done]] --[[Joey]] diff --git a/doc/bugs/tmp_file_handling.mdwn b/doc/bugs/tmp_file_handling.mdwn new file mode 100644 index 0000000000..9db932e571 --- /dev/null +++ b/doc/bugs/tmp_file_handling.mdwn @@ -0,0 +1,13 @@ +git-annex deletes all tmp files on shutdown, if everything succeeded. +This presents 2 problems: + +1. If git-annex is rsyncing something and another one is run, it will + delete the running instance's tmp files. +2. If a long-running rsync transfer is interrupted partway through, the + tmp file was expensive to obtain, and one needs to avoid running + git-annex to do anything else until that transfer can be resumed and + finished. + +--[[Joey]] + +[[done]] diff --git a/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems.mdwn b/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems.mdwn new file mode 100644 index 0000000000..118f6fbb7c --- /dev/null +++ b/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems.mdwn @@ -0,0 +1,19 @@ +It seems that commit bc5c54c987f548505a3877e8a0e460abe0b2a081 introduced some linux specific things... + +
+hsc2hs Touch.hsc
+Touch.hsc: In function ‘main’:
+Touch.hsc:46: error: ‘UTIME_OMIT’ undeclared (first use in this function)
+Touch.hsc:46: error: (Each undeclared identifier is reported only once
+Touch.hsc:46: error: for each function it appears in.)
+Touch.hsc:48: error: ‘UTIME_NOW’ undeclared (first use in this function)
+Touch.hsc:67: error: ‘AT_FDCWD’ undeclared (first use in this function)
+Touch.hsc:68: error: ‘AT_SYMLINK_NOFOLLOW’ undeclared (first use in this function)
+compiling Touch_hsc_make.c failed
+command was: /usr/bin/gcc -c -m32 -I/Library/Frameworks/GHC.framework/Versions/612/usr/lib/ghc-6.12.3/include/ Touch_hsc_make.c -o Touch_hsc_make.o
+make: *** [Touch.hs] Error 1
+
+ +I dug around the OSX documentation and fcntl.h header file and it seems that UTIME_OMIT, UTIME_NOW, AT_FDCWD and AT_SYMLINK_NOFOLLOW aren't defined (at least on OSX). I suspect the BSD's in general will have problems compiling git-annex. + +[[!meta title="annexed symlink mtime matching code is disabled on non-linux systems; needs testing"]] diff --git a/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_1_1d38283c9ea87174f3bbef9a58f5cb88._comment b/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_1_1d38283c9ea87174f3bbef9a58f5cb88._comment new file mode 100644 index 0000000000..f26239c3e9 --- /dev/null +++ b/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_1_1d38283c9ea87174f3bbef9a58f5cb88._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-03-16T16:07:26Z" + content=""" +Hmm.. is utimensat available at all? + +I've committed an update that may convince at least some compilers to expose this newer POSIX stuff. I don't know if it will help, please let me know. +"""]] diff --git a/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_2_bf112edd075fbebe4fc959a387946eb9._comment b/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_2_bf112edd075fbebe4fc959a387946eb9._comment new file mode 100644 index 0000000000..0222e645b9 --- /dev/null +++ b/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_2_bf112edd075fbebe4fc959a387946eb9._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 2" + date="2011-03-16T16:49:18Z" + content=""" +Just pulled the changes, it still fails to build. utimensat doesn't seem to exist on OSX 10.6.6. +"""]] diff --git a/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_3_a46080fbe82adf0986c5dc045e382501._comment b/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_3_a46080fbe82adf0986c5dc045e382501._comment new file mode 100644 index 0000000000..7e79dea881 --- /dev/null +++ b/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_3_a46080fbe82adf0986c5dc045e382501._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-03-16T17:46:40Z" + content=""" +Alright, I've added #idefs and the symlink timestamp mirroring feature will be unavailable on OSX until I get a version that works there. +"""]] diff --git a/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_4_760437bf3ba972a775bb190fb4b38202._comment b/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_4_760437bf3ba972a775bb190fb4b38202._comment new file mode 100644 index 0000000000..6b1e03b026 --- /dev/null +++ b/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_4_760437bf3ba972a775bb190fb4b38202._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 4" + date="2011-03-16T20:32:01Z" + content=""" +Just tried it out on my mac and it's working again. I guess this issue could be closed for now. +"""]] diff --git a/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_5_060ba5ea88dcab2f4a0c199f13ef4f67._comment b/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_5_060ba5ea88dcab2f4a0c199f13ef4f67._comment new file mode 100644 index 0000000000..aeb576be37 --- /dev/null +++ b/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_5_060ba5ea88dcab2f4a0c199f13ef4f67._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 5" + date="2011-03-20T18:12:59Z" + content=""" +I'm leaving this bug open because this feature, however minor is not available on OSX and BSD. + +I have added a partial implementation using lutimes(3), which should be available on the BSDs. However, it's ifdefed out due to a casting problem: The TimeSpec uses a CTime, while lutimes uses a CLong. These data types may be internally the same on some or all platforms, so if you want this feature you can try changing the \"ifdef 0\" in Touch.hsc to 1 and try it, see if \"git annex add\" mirrors file modification time in created symlinks, and let me know. +"""]] diff --git a/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_6_548303d6ffb21a9370b6904f41ff49c1._comment b/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_6_548303d6ffb21a9370b6904f41ff49c1._comment new file mode 100644 index 0000000000..cd116c232d --- /dev/null +++ b/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_6_548303d6ffb21a9370b6904f41ff49c1._comment @@ -0,0 +1,42 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 6" + date="2011-03-20T20:48:41Z" + content=""" +ok, pulling the latest master and building on OSX now does this... + +
+ghc -O2 -Wall -ignore-package monads-fd --make git-annex
+[ 1 of 63] Compiling Touch            ( Touch.hs, Touch.o )
+
+Touch.hsc:24:0:
+    The type signature for `touchBoth' lacks an accompanying binding
+
+Touch.hsc:27:26: Not in scope: `touchBoth'
+make: *** [git-annex] Error 1
+
+ +changing the #if 0 to 1 gives this... + +
+ghc -O2 -Wall -ignore-package monads-fd --make git-annex
+[ 1 of 63] Compiling Touch            ( Touch.hs, Touch.o )
+
+Touch.hsc:95:43:
+    Couldn't match expected type `CLong' against inferred type `CTime'
+    In the second argument of `(\ hsc_ptr
+                                    -> pokeByteOff hsc_ptr 0)', namely
+        `(sec :: CLong)'
+    In a stmt of a 'do' expression:
+        (\ hsc_ptr -> pokeByteOff hsc_ptr 0) ptr (sec :: CLong)
+    In the expression:
+        do { (\ hsc_ptr -> pokeByteOff hsc_ptr 0) ptr (sec :: CLong);
+             (\ hsc_ptr -> pokeByteOff hsc_ptr 4) ptr (0 :: CLong) }
+make: *** [git-annex] Error 1
+
+ + +it seems that commit 6634b6a6b84a924f6f6059b5bea61f449d056eee has broken support for OSX. + +"""]] diff --git a/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_7_7ca00527ab5db058aadec4fe813e51fd._comment b/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_7_7ca00527ab5db058aadec4fe813e51fd._comment new file mode 100644 index 0000000000..e35dc8a827 --- /dev/null +++ b/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_7_7ca00527ab5db058aadec4fe813e51fd._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 7" + date="2011-03-20T22:06:25Z" + content=""" +Fixed that, and removed the impossible cast so it can be built with #if 1 +"""]] diff --git a/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_8_881aecb9ae671689453f6d5d780d844b._comment b/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_8_881aecb9ae671689453f6d5d780d844b._comment new file mode 100644 index 0000000000..56a7eb360e --- /dev/null +++ b/doc/bugs/touch.hsc_has_problems_on_non-linux_based_systems/comment_8_881aecb9ae671689453f6d5d780d844b._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 8" + date="2011-03-21T08:52:18Z" + content=""" +Just tried building both of the code paths, and they seem to build and somewhat function on OSX. I have yet to confirm the functionality is working correctly, but so far it's looking good. (I somewhat care less about the utimes/mtimes of my files since I care more about the content :) ) +"""]] diff --git a/doc/bugs/unannex_and_uninit_do_not_work_when_git_index_is_broken.mdwn b/doc/bugs/unannex_and_uninit_do_not_work_when_git_index_is_broken.mdwn new file mode 100644 index 0000000000..509e12aebb --- /dev/null +++ b/doc/bugs/unannex_and_uninit_do_not_work_when_git_index_is_broken.mdwn @@ -0,0 +1,14 @@ +git's index broke and I was unable to restore it. While this is not git-annex' problem, it should still be possible to get my data in an un-annexed state. + + % git status + fatal: index file smaller than expected + % git annex unannex foo + fatal: index file smaller than expected + % git annex uninit + fatal: index file smaller than expected + uninit + pre-commit hook (/path/to/git-annex/.git/hooks/pre-commit) contents modified; not deleting. Edit it to remove call to git annex. + ok + % + +Ttbomk, the softlinks and objects are enough to un-annex the files; side-stepping git's index if necessary. diff --git a/doc/bugs/unannex_and_uninit_do_not_work_when_git_index_is_broken/comment_1_1931e733f0698af5603a8b92267203d4._comment b/doc/bugs/unannex_and_uninit_do_not_work_when_git_index_is_broken/comment_1_1931e733f0698af5603a8b92267203d4._comment new file mode 100644 index 0000000000..84b68bb7ba --- /dev/null +++ b/doc/bugs/unannex_and_uninit_do_not_work_when_git_index_is_broken/comment_1_1931e733f0698af5603a8b92267203d4._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-04-03T01:40:50Z" + content=""" +They rely on git-ls-files to get a list of files that are checked into git, in order to tell what to unannex. +"""]] diff --git a/doc/bugs/unannex_and_uninit_do_not_work_when_git_index_is_broken/comment_2_40920b88537b7715395808d8aa94bf03._comment b/doc/bugs/unannex_and_uninit_do_not_work_when_git_index_is_broken/comment_2_40920b88537b7715395808d8aa94bf03._comment new file mode 100644 index 0000000000..215619043a --- /dev/null +++ b/doc/bugs/unannex_and_uninit_do_not_work_when_git_index_is_broken/comment_2_40920b88537b7715395808d8aa94bf03._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 2" + date="2011-04-03T08:55:18Z" + content=""" +Given that the softlinks contain all needed information (if the object exists, locally), an emergency way to get files \"out\" of git-annex would be nice. I am aware that one can script it, but a canonical way is always better, especially when things go south. +"""]] diff --git a/doc/bugs/unannex_command_doesn__39__t_all_files.mdwn b/doc/bugs/unannex_command_doesn__39__t_all_files.mdwn new file mode 100644 index 0000000000..8094ed9182 --- /dev/null +++ b/doc/bugs/unannex_command_doesn__39__t_all_files.mdwn @@ -0,0 +1,26 @@ + $ git init ; git annex init test ; dd if=/dev/urandom of=file1 count=128 ; cp file1 file2 ; git annex add --backend=SHA1 file? ; git commit -m init ; git annex unannex ; ls -l + Initialized empty Git repository in /tmp/annex/.git/ + init test ok + 128+0 records in + 128+0 records out + 65536 bytes (66 kB) copied, 0.007173 s, 9.1 MB/s + add file1 (checksum...) ok + add file2 (checksum...) ok + (Recording state in git...) + [master (root-commit) 2177b10] init + 2 files changed, 2 insertions(+), 0 deletions(-) + create mode 120000 file1 + create mode 120000 file2 + unannex file1 ok + (Recording state in git...) + [master bef78b1] content removed from git annex + 1 files changed, 0 insertions(+), 1 deletions(-) + delete mode 120000 file1 + total 72 + -rw-r--r-- 1 simons users 65536 Jul 15 17:29 file1 + lrwxrwxrwx 1 simons users 132 Jul 15 17:29 file2 -> .git/annex/objects/jp/Fk/SHA1-s65536--795b58cc4e5190b02e7026fd9e94a10c98c6475f/SHA1-s65536--795b58cc4e5190b02e7026fd9e94a10c98c6475f + +> This was recently discussed in +> [[annex_unannex__47__uninit_should_handle_copies]] and `unannex --fast` +> added to leave contents behind in the annex, which allows handling +> copies. But needs manual cleanup later with dropunused. --[[Joey]] diff --git a/doc/bugs/unannex_vs_unlock_hook_confusion.mdwn b/doc/bugs/unannex_vs_unlock_hook_confusion.mdwn new file mode 100644 index 0000000000..c03990c203 --- /dev/null +++ b/doc/bugs/unannex_vs_unlock_hook_confusion.mdwn @@ -0,0 +1,15 @@ +See [[forum/unannex_alternatives]] for problem description. + +If an unannex is followed by a "git add; git commit", git-annex's hook thinks +that you have used git annex unlock on the file and are +now committing a changed version, and the right thing to do there is to add the +new content to the annex and update the symlink accordingly. + +Can we tell the difference between an unannexed file that has yet to be committed +and has been re-added as a normal file, vs an unlocked file? --[[Joey|| + +> Hmm, not really. An unannexed file's content will have been dropped from +> the backend, but that's about the only difference. Perhaps unannex should +> just commit the removal of the file itself? --[[Joey]] + +> [[done]], staged changes committed at end. diff --git a/doc/bugs/unhappy_without_UTF8_locale.mdwn b/doc/bugs/unhappy_without_UTF8_locale.mdwn new file mode 100644 index 0000000000..8d22b9ee44 --- /dev/null +++ b/doc/bugs/unhappy_without_UTF8_locale.mdwn @@ -0,0 +1,41 @@ +Try unsetting LANG and passing git-annex unicode filenames. + + joey@gnu:~/tmp/aa>git annex add ./Üa + add add add add git-annex: : commitAndReleaseBuffer: invalid + argument (Invalid or incomplete multibyte or wide character) + +> Interestingly, I can get the same crash in the de_DE.UTF-8 locale +> with certian input filenames, while in en_US.UTF-8, it's ok. +> The workaround below avoided the problem in de_DE.UTF-8. --[[Joey]] + +> Put in the utf-8 forcing workaround for now. [[done]] --[[Joey]] + +## underlying haskell problem and workaround + +The same problem can be seen with a simple haskell program: + + import System.Environment + import Codec.Binary.UTF8.String + main = do + args <- getArgs + putStrLn $ decodeString $ args !! 0 + + joey@gnu:~/src/git-annex>LANG= runghc ~/foo.hs Ü + foo.hs: : hPutChar: invalid argument (Invalid or incomplete multibyte or wide character) + +(The call to `decodeString` is necessary to make the input +unicode string be displayed properly in a utf8 locale, but +does not contribute to this problem.) + +I guess that haskell is setting the IO encoding to latin1, which +is [documented](http://haskell.org/ghc/docs/latest/html/libraries/base/System-IO.html#v:latin1) +to error out on characters > 255. + +So this program doesn't have the problem -- but may output garbage +on non-utf-8 capable terminals: + + import System.IO + main = do + hSetEncoding stdout utf8 + args <- getArgs + putStrLn $ decodeString $ args !! 0 diff --git a/doc/bugs/uninit_does_not_work_in_old_repos.mdwn b/doc/bugs/uninit_does_not_work_in_old_repos.mdwn new file mode 100644 index 0000000000..d3df061487 --- /dev/null +++ b/doc/bugs/uninit_does_not_work_in_old_repos.mdwn @@ -0,0 +1,20 @@ +As uninit does not need to actually write out any data, just remove it, it should be possible to uninit in old stores. + + % git annex uninit + git-annex: Repository version 2 is not supported. Upgrade this repository: git-annex upgrade + +If the repo happens to be broken, this essentially locks in data. + +> No, because you can always check out the version of git-annex you need +> for that repository. +> +> uninit, as implemented, runs unannex on every file and then does some +> cleanup. The cleanup does not need to write state, but the unannex does. +> And it depends on the object directory layout, which has changed between +> versions. So supporting old versions in this code would complicate it +> quite a lot. I don't want to go there. --[[Joey]] + +>>Requiring a version upgrade for unannex is fine. Yet, I see a problem when a git repo is broken; you are stuck without being able to uninit. In this case an uninit that does nothing but undo the symlinking would be useful. -- Richard + +>>> As I said, version 2 of git-annex is still there for people who need +>>> it for whatever reason. [[done]] --[[Joey]] diff --git a/doc/bugs/uninit_does_not_work_in_old_repos/comment_1_bc0619c6e17139df74639448aa6a0f72._comment b/doc/bugs/uninit_does_not_work_in_old_repos/comment_1_bc0619c6e17139df74639448aa6a0f72._comment new file mode 100644 index 0000000000..7a1ea582b0 --- /dev/null +++ b/doc/bugs/uninit_does_not_work_in_old_repos/comment_1_bc0619c6e17139df74639448aa6a0f72._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 1" + date="2011-10-29T15:30:09Z" + content=""" +After upgrading the repo, I still have to commit the changes, else git-annex won't let me uninit. Arguably a Good Thing, but I wanted to document it here. +"""]] diff --git a/doc/bugs/uninit_should_not_run_when_branch_git-annex_is_checked_out.mdwn b/doc/bugs/uninit_should_not_run_when_branch_git-annex_is_checked_out.mdwn new file mode 100644 index 0000000000..e4e407ec87 --- /dev/null +++ b/doc/bugs/uninit_should_not_run_when_branch_git-annex_is_checked_out.mdwn @@ -0,0 +1,15 @@ +Running `git annex uninit` in a repo which has branch git-annex checked out will result in: + + error: Cannot delete the branch 'git-annex' which you are currently on. + git-annex: git [Param "-D",Param "git-annex"] failed + +and trying to checkout branch master afterwards results in: + + error: The following untracked working tree files would be overwritten by checkout: + +Both of which is logical. The best thing would be if git-annex refused to run uninit while in branch git-annex. + + +Richard + +> [[done]] --[[Joey]] diff --git a/doc/bugs/upgrade_left_untracked_.git-annex__47____42___directories.mdwn b/doc/bugs/upgrade_left_untracked_.git-annex__47____42___directories.mdwn new file mode 100644 index 0000000000..7fdbc3ca4e --- /dev/null +++ b/doc/bugs/upgrade_left_untracked_.git-annex__47____42___directories.mdwn @@ -0,0 +1,16 @@ +I upgraded another one of my git-annex clones. The upgrade worked fine (i.e. +according to the manual) on two other clones before, but this time something is +different. + +After 'git pull' and 'git annex upgrade', which took a long time and seemed to +have succeeded, there are no staged changes in git. Instead there are lots of +untracked directories in .git-annex. Aside from that, nothing seems to be +wrong. + +At the time I had git-annex version 0.20110329 and I've been using the SHA1 +backend since version 1. + +> Yes, I agree with Jimmy, it's the same bug. So I'll be closing this one. +> Please keep us informed how the workaround committed to git-annex +> yesterday for the case insensativity issue works out. [[dup|done]] +> --[[Joey]] diff --git a/doc/bugs/upgrade_left_untracked_.git-annex__47____42___directories/comment_1_9ca2da52f3c8add0276b72d6099516a6._comment b/doc/bugs/upgrade_left_untracked_.git-annex__47____42___directories/comment_1_9ca2da52f3c8add0276b72d6099516a6._comment new file mode 100644 index 0000000000..78309df872 --- /dev/null +++ b/doc/bugs/upgrade_left_untracked_.git-annex__47____42___directories/comment_1_9ca2da52f3c8add0276b72d6099516a6._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-04-03T02:26:20Z" + content=""" +I'm not sure how this happened, as far as I can see, and based on my testing, `git annex upgrade` does stage the location log files. OTOH, I vaguely rememeber needing to stage some of them when I was doing my own upgrades, but that was a while ago, and I don't remember the details. + +Your upgrade seems to have gone ok from the file lists you sent, so you can just: `git add .git-annex; git commit` +"""]] diff --git a/doc/bugs/upgrade_left_untracked_.git-annex__47____42___directories/comment_2_e14e84b770305893f2fc6e4938359f47._comment b/doc/bugs/upgrade_left_untracked_.git-annex__47____42___directories/comment_2_e14e84b770305893f2fc6e4938359f47._comment new file mode 100644 index 0000000000..4fc9647e81 --- /dev/null +++ b/doc/bugs/upgrade_left_untracked_.git-annex__47____42___directories/comment_2_e14e84b770305893f2fc6e4938359f47._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="gernot" + ip="213.168.117.192" + subject="comment 2" + date="2011-04-03T15:35:52Z" + content=""" +'git add .git-annex' didn't do anything. That's when I noticed that this +repository is on a case-insensitive HFS+ file system. + +So, if I get this right it's not a new bug, but similar to this situation: +[[git-annex_directory_hashing_problems_on_osx]] + +Assuming that it was the file system's fault, I went ahead and upgraded yet +another clone. That one (on an ext3 file system) had neither staged changes +nor left-over untracked files. Everything seems to just have fallen right into +place. Is that possible or still weird? + +"""]] diff --git a/doc/bugs/upgrade_left_untracked_.git-annex__47____42___directories/comment_3_ec04e306c96fd20ab912aea54a8340aa._comment b/doc/bugs/upgrade_left_untracked_.git-annex__47____42___directories/comment_3_ec04e306c96fd20ab912aea54a8340aa._comment new file mode 100644 index 0000000000..99095c1569 --- /dev/null +++ b/doc/bugs/upgrade_left_untracked_.git-annex__47____42___directories/comment_3_ec04e306c96fd20ab912aea54a8340aa._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 3" + date="2011-04-03T16:05:39Z" + content=""" +Yes you seem to have come across the same bug that I had initially reported :P +"""]] diff --git a/doc/bugs/uuid.log_trust.log_and_remote.log_merge_wackiness.mdwn b/doc/bugs/uuid.log_trust.log_and_remote.log_merge_wackiness.mdwn new file mode 100644 index 0000000000..a84d8cb568 --- /dev/null +++ b/doc/bugs/uuid.log_trust.log_and_remote.log_merge_wackiness.mdwn @@ -0,0 +1,36 @@ +Since uuid.log, trust.log and remote.log are union merged, it's possible +for any given item in them to have multiple values after a merge. +This would happen, for example, if the value was changed in different ways +in two repos which were then merged. git-annex will use an arbitrary +one of the multiple values. + +A workaround if this should happen to you is to use `git annex describe` +or other commands to re-set the value you want. The process of setting +the value will remove the multiple lines. + +To fix this the file format needs to be changed to include a timestamp +as is done with the other log files, then git-annex can consistently +pick the newest value -- which is as close to the "right" value as can be +determined in this situation. + +---- + +File format backwards-compatability is the issue. Ideally, old git-annex +would keep working, ignoring the timestamp. + +- uuid.log: "uuid description timestamp" would work; old git-annex + would just treat the timestamp as part of the description which would be + ok + > update: converted! --[[Joey]] +- trust.log: "uuid trustlevel timestamp" would work; old git-annex + ignores trailing words + > update: converted! --[[Joey]] +- remote.log: "uuid key=value ... timestamp" is on the edge but does work + (old git-annex will include the timestamp in the key/value map it builds, + but that should not break anything really) + > update: converted! --[[Joey]] + +Appending "timestamp=xxxxx" would be good for clarity, and make +it easier to parse the timestamp out from lines that have it. + +> [[done]] --[[Joey]] diff --git a/doc/bugs/weird_local_clone_confuses.mdwn b/doc/bugs/weird_local_clone_confuses.mdwn new file mode 100644 index 0000000000..aa838f1670 --- /dev/null +++ b/doc/bugs/weird_local_clone_confuses.mdwn @@ -0,0 +1,20 @@ +See + + +If a local repo is cloned with "git clone orig/.git new", then git-annex in +new cannot see origin. + +the .git/config has "url=/.../orig/.git". Apparently git is ok with that +weird construction; probably it treats it as a bare git repo. But git-annex +just sees a directory w/o a .git subdir, and gives up. + +--- + +Just tested, and the new support for bare repositories didn't solve this. +(Because config.bare is not set.) + +I think this is not something git-annex should go out of its way to +support. [[done]] +--[[Joey]] + +Later.. Fixed this after all. --[[Joey]] diff --git a/doc/bugs/wishlist:_allow_users_to_provide_UUID_when_running___96__git_annex_init__96__.mdwn b/doc/bugs/wishlist:_allow_users_to_provide_UUID_when_running___96__git_annex_init__96__.mdwn new file mode 100644 index 0000000000..0dc9ec08a2 --- /dev/null +++ b/doc/bugs/wishlist:_allow_users_to_provide_UUID_when_running___96__git_annex_init__96__.mdwn @@ -0,0 +1,5 @@ +As there's no way to permanently hide remotes and I have to recreate two repos now, I would love to be able to re-use the old UUIDs to remove clutter. + +> git-annex already provides a way to do this: Copy `.git/config` from the +> original repo (or use `git-config` to set `annex.uuid`) *before* running +> `git annex init`. [[done]] --[[Joey]] diff --git a/doc/bugs/wishlist:_more_descriptive_commit_messages_in_git-annex_branch.mdwn b/doc/bugs/wishlist:_more_descriptive_commit_messages_in_git-annex_branch.mdwn new file mode 100644 index 0000000000..3a891fc9b2 --- /dev/null +++ b/doc/bugs/wishlist:_more_descriptive_commit_messages_in_git-annex_branch.mdwn @@ -0,0 +1,39 @@ +as of git-annex version 3.20110719, all git-annex commits only contain the word "update" as a commit message. given that the contents of the commit are pretty non-descriptive (SHA1 hashes for file names, uuids for repository names), i suggest to have more descriptive commit messages, as shown here: + + /mnt/usb_disk/photos/2011$ git annex get + /mnt/usb_disk/photos/2011$ git show git-annex + [...] + usb-disk-photos: get 2011 + + * 10 files retrieved from 2 sources (9 from local-harddisk, 1 from my-server) + * 120 files were already present + * 2 files could not be retrieved + /mnt/usb_disk/photos/2011$ cd ~/photos/2011/07 + ~/photos/2011/07$ git copy --to my-server + ~/photos/2011/07$ git show git-annex + [...] + local-harddisk: copy 2011/07 to my-server + + * 20 files pushed + ~/photos/2011/07$ + +in my opinion, the messages should at least contain + +* what command was used +* in which repository they were executed +* which files or directories they affected (not necessarily all files, but what was given on command line or implicitly from the working directory) + +--[[chrysn]] + +> The implementation of the git-annex branch precludes more descriptive +> commit messages, since a single commit can include changes that were +> previously staged to the branch's index file, or spooled to its journal +> by other git-annex commands (either concurrently running or +> interrupted commands, or even changes needed to automatically merge +> other git-annex branches). +> +> It would be possible to make it *less* verbose, with an empty commit +> message. :) --[[Joey]] + +>> Closing as this is literally impossible to do without making +>> git-annex worse. [[done]] --[[Joey]] diff --git a/doc/bugs/wishlist:_query_things_like_description__44___trust_level.mdwn b/doc/bugs/wishlist:_query_things_like_description__44___trust_level.mdwn new file mode 100644 index 0000000000..d158850cd6 --- /dev/null +++ b/doc/bugs/wishlist:_query_things_like_description__44___trust_level.mdwn @@ -0,0 +1,4 @@ +It would be helpful to have a way to query things like a repository's description and trust level, without having to poke in the git-annex branch. For example, "git annex describe ." currently clears the description but could print the current one instead. + +> `git annex status` now breaks down the repository list by type. [[done]] +> --[[Joey]] diff --git a/doc/bugs/wishlist:_query_things_like_description__44___trust_level/comment_1_14311384788312b96e550749ab7de9ea._comment b/doc/bugs/wishlist:_query_things_like_description__44___trust_level/comment_1_14311384788312b96e550749ab7de9ea._comment new file mode 100644 index 0000000000..3ac4ba2678 --- /dev/null +++ b/doc/bugs/wishlist:_query_things_like_description__44___trust_level/comment_1_14311384788312b96e550749ab7de9ea._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-10-27T17:09:33Z" + content=""" +`git annex describe` only sets the description to avoid complication. Imagine using it in a script for example. + +`git annex status` shows the description. It does not show the trust level because I have not thought of a visually pleasing and compact way to show it in the repository list there.. suggestions appreciated, since the same list is used by `whereis`, and showing trust levels there would be particularly useful. +"""]] diff --git a/doc/bugs/wishlist:_query_things_like_description__44___trust_level/comment_2_342d1ac07573c7ef4e27f003a692e261._comment b/doc/bugs/wishlist:_query_things_like_description__44___trust_level/comment_2_342d1ac07573c7ef4e27f003a692e261._comment new file mode 100644 index 0000000000..3bb92919f9 --- /dev/null +++ b/doc/bugs/wishlist:_query_things_like_description__44___trust_level/comment_2_342d1ac07573c7ef4e27f003a692e261._comment @@ -0,0 +1,32 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 2" + date="2011-10-29T18:28:13Z" + content=""" +Possible solutions: + +This: + + trusted repositories: + UUID -- foo + semi-trusted repositories: + UUID -- bar + untrusted repositories: + UUID -- baz + +or this: + + UUID -- trusted -- foo + UUID -- semi-trusted -- bar + UUID -- untrusted -- baz + +or this: + + known repositories (!/*/X): + UUID -- ! foo + UUID -- * bar + UUID -- X baz + +If you want to reformat this output, putting 'here', 'origin', etc into fixed formatting might make sense, as well. -- Richard +"""]] diff --git a/doc/bugs/wishlist:_support_drop__44___find_on_special_remotes.mdwn b/doc/bugs/wishlist:_support_drop__44___find_on_special_remotes.mdwn new file mode 100644 index 0000000000..24cacbf71c --- /dev/null +++ b/doc/bugs/wishlist:_support_drop__44___find_on_special_remotes.mdwn @@ -0,0 +1,18 @@ +Currently there is no way to drop files, or list what files are available, on a special remote. +It would be good if "git annex drop" and "git annex find" supported the --from argument. + +> I agree, drop should support --from. +>> [[done]] --[[Joey]] +> +> To find files *believed* to be present in a given remote, use +> `git annex find --in remote` +> Note that it might show out of date info, since it does not actually go +> check the current contents of the remote. The only reason to support +> `find --from` would be to always check, but I don't think that's needed. +> --[[Joey]] + +For commands that don't support the --from argument, it would also be nice to print an error. +Currently running "git annex drop --from usbdrive" doesn't behave as hoped and instead drops +all content from the local annex. + +> This is done now. --[[Joey]] diff --git a/doc/bugs/wishlist:_support_drop__44___find_on_special_remotes/comment_1_f11ed642a83d965076778a162f701e84._comment b/doc/bugs/wishlist:_support_drop__44___find_on_special_remotes/comment_1_f11ed642a83d965076778a162f701e84._comment new file mode 100644 index 0000000000..6028933b40 --- /dev/null +++ b/doc/bugs/wishlist:_support_drop__44___find_on_special_remotes/comment_1_f11ed642a83d965076778a162f701e84._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-10-27T17:13:43Z" + content=""" +Well, I don't think you mean \"special remotes\", but just any old remote (special or not). +"""]] diff --git a/doc/comments.mdwn b/doc/comments.mdwn new file mode 100644 index 0000000000..e19962b92a --- /dev/null +++ b/doc/comments.mdwn @@ -0,0 +1,9 @@ +[[!sidebar content=""" +[[!inline pages="comment_pending(*)" feedfile=pendingmoderation +description="comments pending moderation" show=-1]] +Comments in the [[!commentmoderation desc="moderation queue"]]: +[[!pagecount pages="comment_pending(*)"]] +"""]] + +Recent comments posted to this site: +[[!inline pages="comment(*)" template="comment"]] diff --git a/doc/contact.mdwn b/doc/contact.mdwn new file mode 100644 index 0000000000..22f072cb2b --- /dev/null +++ b/doc/contact.mdwn @@ -0,0 +1,10 @@ +Joey Hess is the author of git-annex. If you need to +talk about something privatly, email me. + +The [[forum]] is the best place to discuss git-annex. + +The [VCS-home mailing list](http://lists.madduck.net/listinfo/vcs-home) +is a good mailing list for users who want to use git-annex in the context +of managing their large personal files. + +For realtime chat, use the `#vcs-home` channel on irc.oftc.net. diff --git a/doc/copies.mdwn b/doc/copies.mdwn new file mode 100644 index 0000000000..93cbd8ea80 --- /dev/null +++ b/doc/copies.mdwn @@ -0,0 +1,38 @@ +Annexed data is stored inside your git repository's `.git/annex` directory. +Some [[special_remotes]] can store annexed data elsewhere. + +It's important that data not get lost by an ill-considered `git annex drop` +command. So, git-annex can be configured to try +to keep N copies of a file's content available across all repositories. +(Although [[untrusted_repositories|trust]] don't count toward this total.) + +By default, N is 1; it is configured by annex.numcopies. This default +can be overridden on a per-file-type basis by the annex.numcopies +setting in `.gitattributes` files. The --numcopies switch allows +temporarily using a different value. + +`git annex drop` attempts to check with other git remotes, to check that N +copies of the file exist. If enough repositories cannot be verified to have +it, it will retain the file content to avoid data loss. Note that +[[trusted_repositories|trust]] are not explicitly checked. + +For example, consider three repositories: Server, Laptop, and USB. Both Server +and USB have a copy of a file, and N=1. If on Laptop, you `git annex get +$file`, this will transfer it from either Server or USB (depending on which +is available), and there are now 3 copies of the file. + +Suppose you want to free up space on Laptop again, and you `git annex drop` the file +there. If USB is connected, or Server can be contacted, git-annex can check +that it still has a copy of the file, and the content is removed from +Laptop. But if USB is currently disconnected, and Server also cannot be +contacted, it can't verify that it is safe to drop the file, and will +refuse to do so. + +With N=2, in order to drop the file content from Laptop, it would need access +to both USB and Server. + +Note that different repositories can be configured with different values of +N. So just because Laptop has N=2, this does not prevent the number of +copies falling to 1, when USB and Server have N=1. To avoid this, +configure it in `.gitattributes`, which is shared between repositories +using git. diff --git a/doc/design.mdwn b/doc/design.mdwn new file mode 100644 index 0000000000..dc66d5c80a --- /dev/null +++ b/doc/design.mdwn @@ -0,0 +1,4 @@ +git-annex's high-level design is mostly inherent in the data that it +stores in git, and alongside git. See [[internals]] for details. + +See [[encryption]] for design of encryption elements. diff --git a/doc/design/encryption.mdwn b/doc/design/encryption.mdwn new file mode 100644 index 0000000000..647683bd9f --- /dev/null +++ b/doc/design/encryption.mdwn @@ -0,0 +1,117 @@ +This was the design doc for [[/encryption]] and is preserved for +the curious. For an example of using git-annex with an encrypted S3 remote, +see [[tips/using_Amazon_S3]]. + +[[!toc]] + +## encryption backends + +It makes sense to support multiple encryption backends. So, there +should be a way to tell what backend is responsible for a given filename +in an encrypted remote. (And since special remotes can also store files +unencrypted, differentiate from those as well.) + +The rest of this page will describe a single encryption backend using GPG. +Probably only one will be needed, but who knows? Maybe that backend will +turn out badly designed, or some other encryptor needed. Designing +with more than one encryption backend in mind helps future-proofing. + +## encryption key management + +[[!template id=note text=""" +The basis of this scheme was originally developed by Lars Wirzenius et al +[for Obnam](http://braawi.org/obnam/encryption/). +"""]] + +Data is encrypted by gpg, using a symmetric cipher. +The cipher is itself checked into your git repository, encrypted using one or +more gpg public keys. This scheme allows new gpg private keys to be given +access to content that has already been stored in the remote. + +Different encrypted remotes need to be able to each use different ciphers. +Allowing multiple ciphers to be used within a single remote would add a lot +of complexity, so is not planned to be supported. +Instead, if you want a new cipher, create a new S3 bucket, or whatever. +There does not seem to be much benefit to using the same cipher for +two different encrypted remotes. + +So, the encrypted cipher could just be stored with the rest of a remote's +configuration in `remotes.log` (see [[internals]]). When `git +annex intiremote` makes a remote, it can generate a random symmetric +cipher, and encrypt it with the specified gpg key. To allow another gpg +public key access, update the encrypted cipher to be encrypted to both gpg +keys. + +## filename enumeration + +If the names of files are encrypted or securely hashed, or whatever is +chosen, this makes it harder for git-annex (let alone untrusted third parties!) +to get a list of the files that are stored on a given enrypted remote. +But, does git-annex really ever need to do such an enumeration? + +Apparently not. `git annex unused --from remote` can now check for +unused data that is stored on a remote, and it does so based only on +location log data for the remote. This assumes that the location log is +kept accurately. + +What about `git annex fsck --from remote`? Such a command should be able to, +for each file in the repository, contact the encrypted remote to check +if it has the file. This can be done without enumeration, although it will +mean running gpg once per file fscked, to get the encrypted filename. + +So, the files stored in the remote should be encrypted. But, it needs +to be a repeatable encryption, so they cannot just be gpg encrypted, +that would yeild a new name each time. Instead, HMAC is used. Any hash +could be used with HMAC; currently SHA1 is used. + +It was suggested that it might not be wise to use the same cipher for both +gpg and HMAC. Being paranoid, it's best not to tie the security of one +to the security of the other. So, the encrypted cipher described above is +actually split in two; half is used for HMAC, and half for gpg. + +---- + +Does the HMAC cipher need to be gpg encrypted? Imagine if it were +stored in plainext in the git repository. Anyone who can access +the git repository already knows the actual filenames, and typically also +the content hashes of annexed content. Having access to the HMAC cipher +could perhaps be said to only let them verify that data they already +know. + +While this seems a pretty persuasive argument, I'm not 100% convinced, and +anyway, most times that the HMAC cipher is needed, the gpg cipher is also +needed. Keeping the HMAC cipher encrypted does slow down two things: +dropping content from encrypted remotes, and checking if encrypted remotes +really have content. If it's later determined to be safe to not encrypt the +HMAC cipher, the current design allows changing that, even for existing +remotes. + +## other use of the symmetric cipher + +The symmetric cipher can be used to encrypt other content than the content +sent to the remote. In particular, it may make sense to encrypt whatever +access keys are used by the special remote with the cipher, and store that +in remotes.log. This way anyone whose gpg key has been given access to +the cipher can get access to whatever other credentials are needed to +use the special remote. + +## risks + +A risk of this scheme is that, once the symmetric cipher has been obtained, it +allows full access to all the encrypted content. This scheme does not allow +revoking a given gpg key access to the cipher, since anyone with such a key +could have already decrypted the cipher and stored a copy. + +If git-annex stores the decrypted symmetric cipher in memory, then there +is a risk that it could be intercepted from there by an attacker. Gpg +amelorates these type of risks by using locked memory. For git-annex, note +that an attacker with local machine access can tell at least all the +filenames and metadata of files stored in the encrypted remote anyway, +and can access whatever content is stored locally. + +This design does not support obfuscating the size of files by chunking +them, as that would have added a lot of complexity, for dubious benefits. +If the untrusted party running the encrypted remote wants to know file sizes, +they could correlate chunks that are accessed together. Encrypting data +changes the original file size enough to avoid it being used as a direct +fingerprint at least. diff --git a/doc/design/encryption/comment_1_4715ffafb3c4a9915bc33f2b26aaa9c1._comment b/doc/design/encryption/comment_1_4715ffafb3c4a9915bc33f2b26aaa9c1._comment new file mode 100644 index 0000000000..f2ecc46d0a --- /dev/null +++ b/doc/design/encryption/comment_1_4715ffafb3c4a9915bc33f2b26aaa9c1._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 1" + date="2011-04-03T20:03:14Z" + content=""" +New encryption keys could be used for different directories/files/patterns/times/whatever. One could then encrypt this new key for the public keys of other people/machines and push them out along with the actual data. This would allow some level of access restriction or future revocation. git-annex would need to keep track of which files can be decrypted with which keys. I am undecided if that information needs to be encrypted or not. + +Encrypted object files should be checksummed in encrypted form so that it's possible to verify integrity without knowing any keys. Same goes for encrypted keys, etc. + +Chunking files in this context seems like needless overkill. This might make sense to store a DVD image on CDs or similar, at some point. But not for encryption, imo. Coming up with sane chunk sizes for all use cases is literally impossible and as you pointed out, correlation by the remote admin is trivial. +"""]] diff --git a/doc/design/encryption/comment_2_a610b3d056a059899178859a3a821ea5._comment b/doc/design/encryption/comment_2_a610b3d056a059899178859a3a821ea5._comment new file mode 100644 index 0000000000..d5461e23c0 --- /dev/null +++ b/doc/design/encryption/comment_2_a610b3d056a059899178859a3a821ea5._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-04-05T18:41:49Z" + content=""" +I see no use case for verifying encrypted object files w/o access to the encryption key. And possible use cases for not allowing anyone to verify your data. + +If there are to be multiple encryption keys usable within a single encrypted remote, than they would need to be given some kind of name (a since symmetric key is used, there is no pubkey to provide a name), and the name encoded in the files stored in the remote. While certainly doable I'm not sold that adding a layer of indirection is worthwhile. It only seems it would be worthwhile if setting up a new encrypted remote was expensive to do. Perhaps that could be the case for some type of remote other than S3 buckets. +"""]] diff --git a/doc/design/encryption/comment_3_cca186a9536cd3f6e86994631b14231c._comment b/doc/design/encryption/comment_3_cca186a9536cd3f6e86994631b14231c._comment new file mode 100644 index 0000000000..d3c483fdf3 --- /dev/null +++ b/doc/design/encryption/comment_3_cca186a9536cd3f6e86994631b14231c._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 3" + date="2011-04-05T23:24:17Z" + content=""" +Assuming you're storing your encrypted annex with me and I with you, our regular cron jobs to verify all data will catch corruption in each other's annexes. + +Checksums of the encrypted objects could be optional, mitigating any potential attack scenarios. + +It's not only about the cost of setting up new remotes. It would also be a way to keep data in one annex while making it accessible only in a subset of them. For example, I might need some private letters at work, but I don't want my work machine to be able to access them all. +"""]] diff --git a/doc/design/encryption/comment_4_8f3ba3e504b058791fc6e6f9c38154cf._comment b/doc/design/encryption/comment_4_8f3ba3e504b058791fc6e6f9c38154cf._comment new file mode 100644 index 0000000000..14eb1acac1 --- /dev/null +++ b/doc/design/encryption/comment_4_8f3ba3e504b058791fc6e6f9c38154cf._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 4" + date="2011-04-07T19:59:30Z" + content=""" +@Richard the easy way to deal with that scenario is to set up a remote that work can access, and only put in it files work should be able to see. Needing to specify which key a file should be encrypted to when putting it in a remote that supported multiple keys would add another level of complexity which that avoids. + +Of course, the right approach is probably to have a separate repository for work. If you don't trust it with seeing file contents, you probably also don't trust it with the contents of your git repository. +"""]] diff --git a/doc/distributed_version_control.mdwn b/doc/distributed_version_control.mdwn new file mode 100644 index 0000000000..e7c858c696 --- /dev/null +++ b/doc/distributed_version_control.mdwn @@ -0,0 +1,21 @@ +In git, there can be multiple clones of a repository, each clone can +be independently modified, and clones can push or pull changes to +one-another to get back in sync. + +git-annex preserves that fundamental distributed nature of git, while +dropping the requirement that, once in sync, each clone contains all the data +that was committed to each other clone. Instead of storing the content +of a file in the repository, git-annex stores a pointer to the content. + +Each git-annex repository is responsible for storing some of the content, +and can copy it to or from other repositories. [[Location_tracking]] +information is committed to git, to let repositories inform other +repositories what file contents they have available. + +-- + +The [[walkthrough]] shows how to create a distributed set of git-annex +repositories with no central repository. + +Prefer a central repository like GitHub? See the +[[tips/centralized_git_repository_tutorial]]. diff --git a/doc/download.mdwn b/doc/download.mdwn new file mode 100644 index 0000000000..e1257d2618 --- /dev/null +++ b/doc/download.mdwn @@ -0,0 +1,17 @@ +The main git repository for git-annex is `git://git-annex.branchable.com/` + +(You can push changes to this wiki from that anonymous git checkout.) + +Other mirrors of the git repository: + +* `git://git.kitenet.net/git-annex` [[gitweb](http://git.kitenet.net/?p=git-annex.git;a=summary)] +* [at github](https://github.com/joeyh/git-annex) + +To download a tarball of a particular release, use an url like + + +From time to time, releases of git-annex are uploaded +[to hackage](http://hackage.haskell.org/package/git-annex). + +Some operating systems include git-annex in easily prepackaged form and +others need some manual work. See [[install]] for details. diff --git a/doc/download/comment_1_fbd8b6d39e9d3c71791551358c863966._comment b/doc/download/comment_1_fbd8b6d39e9d3c71791551358c863966._comment new file mode 100644 index 0000000000..488e005278 --- /dev/null +++ b/doc/download/comment_1_fbd8b6d39e9d3c71791551358c863966._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://peter-simons.myopenid.com/" + ip="84.189.2.244" + subject="Please provide stable tarballs or zipfiles" + date="2011-03-22T13:06:58Z" + content=""" +I'm trying to package git annex for ArchLinux and NixOS. That task would be a *lot* easier, if there were proper release archives available for download. The Gitweb site offers to create snapshot tarballs on the fly, but those tarballs have a different SHA hash every time they're generated, so they cannot be used for the purposes of a distribution. A simple solution for this problem would be to enable snapshots in zip format (because zip files look the same every time they're generated). +"""]] diff --git a/doc/download/comment_2_f85f72b33aedc3425f0c0c47867d02f3._comment b/doc/download/comment_2_f85f72b33aedc3425f0c0c47867d02f3._comment new file mode 100644 index 0000000000..5441c3e4ce --- /dev/null +++ b/doc/download/comment_2_f85f72b33aedc3425f0c0c47867d02f3._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 2" + date="2011-03-22T14:01:37Z" + content=""" +maybe snag tarballs from ? +"""]] diff --git a/doc/download/comment_3_cf6044ebe99f71158034e21197228abd._comment b/doc/download/comment_3_cf6044ebe99f71158034e21197228abd._comment new file mode 100644 index 0000000000..b72b848f80 --- /dev/null +++ b/doc/download/comment_3_cf6044ebe99f71158034e21197228abd._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-03-22T18:09:21Z" + content=""" +The tarballs produced by gitweb are actually stable. They are wrapped in a gz file with a varying timestamp however. It might be nice if gitweb passed --no-name to gzip to avoid that inconsistency. + +git-annex also has a [pristine-tar](http://kitenet.net/~joey/code/pristine-tar/) branch in git that can be used to recreate the tarballs I upload to Debian. +"""]] diff --git a/doc/download/comment_4_10fc013865c7542c2ed9d6c0963bb391._comment b/doc/download/comment_4_10fc013865c7542c2ed9d6c0963bb391._comment new file mode 100644 index 0000000000..9bb9aa8ae3 --- /dev/null +++ b/doc/download/comment_4_10fc013865c7542c2ed9d6c0963bb391._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnOvt3TwSSDOLnoVzDNbOP1qO9OmNH5s0s" + nickname="Fraser" + subject="gitweb supplies --no-name as of 1.7.5.1" + date="2011-05-19T08:19:02Z" + content=""" +git v1.7.5.1 fixes the gitweb gzip issue. If the git instance is updated we +can have stable distributions (and I can finally write a FreeBSD port ^_^) +"""]] diff --git a/doc/download/comment_5_c6b1bc40226fc2c8ba3e558150856992._comment b/doc/download/comment_5_c6b1bc40226fc2c8ba3e558150856992._comment new file mode 100644 index 0000000000..76ba75edc4 --- /dev/null +++ b/doc/download/comment_5_c6b1bc40226fc2c8ba3e558150856992._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 5" + date="2011-05-19T16:10:35Z" + content=""" +Hmm, I've upgraded to that version, but I see nothing in its changelog, commit log, code, or runtime behavior to indicate that it's producing stable gzip output. +"""]] diff --git a/doc/download/comment_6_3a52993d3553deb9a413debec9a5f92d._comment b/doc/download/comment_6_3a52993d3553deb9a413debec9a5f92d._comment new file mode 100644 index 0000000000..0dbd88b1e5 --- /dev/null +++ b/doc/download/comment_6_3a52993d3553deb9a413debec9a5f92d._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnOvt3TwSSDOLnoVzDNbOP1qO9OmNH5s0s" + nickname="Fraser" + subject="comment 6" + date="2011-05-22T23:02:39Z" + content=""" +Whups, the fix landed in git's `maint' branch just after 1.7.5 but 1.7.5.1 was +tagged on a different branch. + +Will look closer in future, and let you know when it's really released. +"""]] diff --git a/doc/download/comment_7_a5eebd214b135f34b18274a682211943._comment b/doc/download/comment_7_a5eebd214b135f34b18274a682211943._comment new file mode 100644 index 0000000000..9960e0ea85 --- /dev/null +++ b/doc/download/comment_7_a5eebd214b135f34b18274a682211943._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnOvt3TwSSDOLnoVzDNbOP1qO9OmNH5s0s" + nickname="Fraser" + subject="comment 7" + date="2011-05-27T01:27:37Z" + content=""" +v1.7.5.3 has it. +"""]] diff --git a/doc/download/comment_8_59a976de6c7d333709b92f7cd5830850._comment b/doc/download/comment_8_59a976de6c7d333709b92f7cd5830850._comment new file mode 100644 index 0000000000..5aa4f8c94a --- /dev/null +++ b/doc/download/comment_8_59a976de6c7d333709b92f7cd5830850._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 8" + date="2011-05-28T16:04:51Z" + content=""" +And that is now installed on kitenet.net and verified to work. +"""]] diff --git a/doc/encryption.mdwn b/doc/encryption.mdwn new file mode 100644 index 0000000000..0f83bb7f90 --- /dev/null +++ b/doc/encryption.mdwn @@ -0,0 +1,35 @@ +git-annex mostly does not use encryption. Anyone with access to a git +repository can see all the filenames in it, its history, and can access +any annexed file contents. + +Encryption is needed when using [[special_remotes]] like Amazon S3, where +file content is sent to an untrusted party who does not have access to the +git repository. + +Such an encrypted remote uses strong GPG encryption on the contents of files, +as well as HMAC hashing of the filenames. The size of the encrypted files, +and access patterns of the data, should be the only clues to what is +stored in such a remote. + +You should decide whether to use encryption with a special remote before +any data is stored in it. So, `git annex initremote` requires you +to specify "encryption=none" when first setting up a remote in order +to disable encryption. + +If you want to use encryption, run `git annex initremote` with +"encryption=USERID". The value will be passed to `gpg` to find encryption keys. +Typically, you will say "encryption=2512E3C7" to use a specific gpg key. +Or, you might say "encryption=joey@kitenet.net" to search for matching keys. + +The [[encryption_design|design/encryption]] allows additional encryption keys +to be added on to a special remote later. Once a key is added, it is able +to access content that has already been stored in the special remote. +To add a new key, just run `git annex initremote` again, specifying the +new encryption key: + + git annex initremote myremote encryption=788A3F4C + +Note that once a key has been given access to a remote, it's not +possible to revoke that access, short of deleting the remote. See +[[encryption_design|design/encryption]] for other security risks +associated with encryption. diff --git a/doc/encryption/comment_1_1afca8d7182075d46db41f6ad3dd5911._comment b/doc/encryption/comment_1_1afca8d7182075d46db41f6ad3dd5911._comment new file mode 100644 index 0000000000..db93bf63f8 --- /dev/null +++ b/doc/encryption/comment_1_1afca8d7182075d46db41f6ad3dd5911._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="zooko" + ip="75.220.153.232" + subject="Tahoe-LAFS comes with encryption" + date="2011-05-18T04:32:14Z" + content=""" +The Tahoe-LAFS special remote automatically encrypts and adds cryptography integrity checks/digital signatures. For that special remote you should not use the git-annex encryption scheme. + +Tahoe-LAFS encryption generates a new independent key for each file. This means that you can share access to one of the files without thereby sharing access to all of them, and it means that individual files can be deduplicated among multiple users. +"""]] diff --git a/doc/feeds.mdwn b/doc/feeds.mdwn new file mode 100644 index 0000000000..25a3190497 --- /dev/null +++ b/doc/feeds.mdwn @@ -0,0 +1,3 @@ +Aggregating git-annex mentions from elsewhere on the net.. + +* [[!aggregate expirecount=25 name="identica" feedurl="http://identi.ca/api/statusnet/tags/timeline/gitannex.rss" url="http://identi.ca/tag/gitannex"]] diff --git a/doc/forum.mdwn b/doc/forum.mdwn new file mode 100644 index 0000000000..ce5787adcb --- /dev/null +++ b/doc/forum.mdwn @@ -0,0 +1,3 @@ +This is a place to discuss using git-annex. If you need help, advice, or anything, post about it here. + +[[!inline pages="forum/* and !*/Discussion" archive=yes rootpage=forum postformtext="Add a new thread titled:"]] diff --git a/doc/forum/--print0_option_as_in___34__find__34__.mdwn b/doc/forum/--print0_option_as_in___34__find__34__.mdwn new file mode 100644 index 0000000000..7d9a2284dd --- /dev/null +++ b/doc/forum/--print0_option_as_in___34__find__34__.mdwn @@ -0,0 +1,5 @@ +It would be nice if git annex find supported a --print0 option as GNU +find does. That way, file names that are printed could be piped to +xargs even if they have spaces. + +> Done. --[[Joey]] diff --git a/doc/forum/A_really_stupid_question.mdwn b/doc/forum/A_really_stupid_question.mdwn new file mode 100644 index 0000000000..38c7bcb56a --- /dev/null +++ b/doc/forum/A_really_stupid_question.mdwn @@ -0,0 +1,3 @@ +Sorry, but all this wiki and the manpage seem to gloss over the most obvious question: + +What happens when you commit conflicting edits in different repositories? diff --git a/doc/forum/A_really_stupid_question/comment_1_40e02556de0b00b94f245a0196b5a89f._comment b/doc/forum/A_really_stupid_question/comment_1_40e02556de0b00b94f245a0196b5a89f._comment new file mode 100644 index 0000000000..2a400db3b0 --- /dev/null +++ b/doc/forum/A_really_stupid_question/comment_1_40e02556de0b00b94f245a0196b5a89f._comment @@ -0,0 +1,31 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="Good question!" + date="2011-12-20T23:07:25Z" + content=""" +You get a regular git merge conflict, which can be resolved in any of the regular ways, except that conflicting files are just symlinks. + +Example: + +
+$ git pull
+...
+Auto-merging myfile
+CONFLICT (add/add): Merge conflict in myfile
+Automatic merge failed; fix conflicts and then commit the result.
+$ git status
+# On branch master
+# Your branch and 'origin/master' have diverged,
+# and have 1 and 1 different commit(s) each, respectively.
+#
+# Unmerged paths:
+#   (use \"git add/rm ...\" as appropriate to mark resolution)
+#
+#	both added:         myfile
+#
+no changes added to commit (use \"git add\" and/or \"git commit -a\")
+$ git add myfile
+$ git commit -m \"took local version of the conflicting file\"
+
+"""]] diff --git a/doc/forum/Behaviour_of_fsck.mdwn b/doc/forum/Behaviour_of_fsck.mdwn new file mode 100644 index 0000000000..cd27d49f76 --- /dev/null +++ b/doc/forum/Behaviour_of_fsck.mdwn @@ -0,0 +1,13 @@ +The current behaviour of 'fsck' is a bit verbose. I have an annex'd directory of tarballs for my own build system for "science" applications, there's about ~600 or so blobs in my repo, I do occassionally like to run fsck across all my data to see what files don't meet the min num copies requirement that I have set. + +Would it be better for the default behaviour of fsck when it has not been given a path to only output errors and not bother to show that a file is ok for every single file in a repo. i.e. + + git annex fsck + +should show only 'errors' and maybe a simple indicator showing the status (show a spinner or dots?) and when + + git annex fsck PATH/FILE + +it should have the current behaviour? + +Right now the current fsck behaviour might get annoying for anyone who would want to run fsck with repos with lots of big files. diff --git a/doc/forum/Behaviour_of_fsck/comment_1_0e40f158b3f4ccdcaab1408d858b68b8._comment b/doc/forum/Behaviour_of_fsck/comment_1_0e40f158b3f4ccdcaab1408d858b68b8._comment new file mode 100644 index 0000000000..dc48e2f943 --- /dev/null +++ b/doc/forum/Behaviour_of_fsck/comment_1_0e40f158b3f4ccdcaab1408d858b68b8._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-03-24T17:45:08Z" + content=""" +I tend to agree that the default output of fsck is not quite right. I often use git annex fsck -q. A progress spinner display is a good idea. +"""]] diff --git a/doc/forum/Behaviour_of_fsck/comment_2_ead36a23c3e6efa1c41e4555f93e014e._comment b/doc/forum/Behaviour_of_fsck/comment_2_ead36a23c3e6efa1c41e4555f93e014e._comment new file mode 100644 index 0000000000..357b48a234 --- /dev/null +++ b/doc/forum/Behaviour_of_fsck/comment_2_ead36a23c3e6efa1c41e4555f93e014e._comment @@ -0,0 +1,19 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 2" + date="2011-03-26T10:57:41Z" + content=""" +After some thought, perhaps the default fsck output should be at least machine readable and copy and pasteable i.e. + +
+$ git annex fsck
+Files with errors
+
+    file1
+    file2
+
+
+ +so I can then copy the list of borked files and then just paste it into a for loop in my shell to recover the files. it's just an idea. +"""]] diff --git a/doc/forum/Behaviour_of_fsck/comment_3_97848f9a3db89c0427cfb671ba13300e._comment b/doc/forum/Behaviour_of_fsck/comment_3_97848f9a3db89c0427cfb671ba13300e._comment new file mode 100644 index 0000000000..be34473c0a --- /dev/null +++ b/doc/forum/Behaviour_of_fsck/comment_3_97848f9a3db89c0427cfb671ba13300e._comment @@ -0,0 +1,19 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 3" + date="2011-03-28T01:16:21Z" + content=""" +Another nice thing would be a summary of _what_ is wrong. I.e. + + % git fsck + [...] + git-annex: 100 total failed + 50 checksum failed + 50 not enough copies exit + +And the same/similar for all other failure modes. + + +-- RichiH +"""]] diff --git a/doc/forum/Behaviour_of_fsck/comment_4_e4911dc6793f98fb81151daacbe49968._comment b/doc/forum/Behaviour_of_fsck/comment_4_e4911dc6793f98fb81151daacbe49968._comment new file mode 100644 index 0000000000..e8c9837462 --- /dev/null +++ b/doc/forum/Behaviour_of_fsck/comment_4_e4911dc6793f98fb81151daacbe49968._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 2" + date="2011-03-25T11:23:04Z" + content=""" +FWIW, I wanted to suggest exactly the same thing. +"""]] diff --git a/doc/forum/Can_I_store_normal_files_in_the_git-annex_git_repository__63__.mdwn b/doc/forum/Can_I_store_normal_files_in_the_git-annex_git_repository__63__.mdwn new file mode 100644 index 0000000000..6d1083dd57 --- /dev/null +++ b/doc/forum/Can_I_store_normal_files_in_the_git-annex_git_repository__63__.mdwn @@ -0,0 +1,6 @@ +Is it possible to story ordinary files in the git repository, or is this going to confuse git-annex? In other words, can I safely run + + git add .gitattributes + git commit -m 'remember attributes' .gitattributes + +..., or do I have to use `git-annex add` all time? diff --git a/doc/forum/Can_I_store_normal_files_in_the_git-annex_git_repository__63__/comment_1_c8f9923d8dc76b8bed25dce5ae09b520._comment b/doc/forum/Can_I_store_normal_files_in_the_git-annex_git_repository__63__/comment_1_c8f9923d8dc76b8bed25dce5ae09b520._comment new file mode 100644 index 0000000000..8873edcde8 --- /dev/null +++ b/doc/forum/Can_I_store_normal_files_in_the_git-annex_git_repository__63__/comment_1_c8f9923d8dc76b8bed25dce5ae09b520._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://peter-simons.myopenid.com/" + ip="77.12.196.1" + subject="Solved" + date="2011-07-13T16:21:25Z" + content=""" +I got my answer on #vcs-home: Yes, git-annex and git get along fine. +"""]] diff --git a/doc/forum/Error_while_adding_a_file___34__createSymbolicLink:_already_exists__34__.mdwn b/doc/forum/Error_while_adding_a_file___34__createSymbolicLink:_already_exists__34__.mdwn new file mode 100644 index 0000000000..38865f49aa --- /dev/null +++ b/doc/forum/Error_while_adding_a_file___34__createSymbolicLink:_already_exists__34__.mdwn @@ -0,0 +1 @@ +Moved to [[bugs|bugs/Error_while_adding_a_file___34__createSymbolicLink:_already_exists__34__]] --[[Joey]] diff --git a/doc/forum/Is_an_automagic_upgrade_of_the_object_directory_safe__63__.mdwn b/doc/forum/Is_an_automagic_upgrade_of_the_object_directory_safe__63__.mdwn new file mode 100644 index 0000000000..5643f6b7a0 --- /dev/null +++ b/doc/forum/Is_an_automagic_upgrade_of_the_object_directory_safe__63__.mdwn @@ -0,0 +1,9 @@ +Consider the following two use cases: + +* I have a git-annex repo on a portable medium and carry it around between several machines. I use it on a non-important system with the most current git-annex installed, automagic upgrade happens. I am now forced to upgrade git-annex on all other machines. Bonus points if this happens in the background and I don't even notice it until it's too late. + +* My system crashes and I use a rescue CD to access local data, including git-annex. The rescue CD includes a newer version of git-annex and once my system is restored, I am forced to upgrade git-annex locally. + +My suggestion would be not to upgrade automatically, but to either ask the user if this is OK or to error out and request that they run git annex update by hand. + +Optionally, this could be done via a local config variable which should default to error or ask, not upgrade. diff --git a/doc/forum/Is_an_automagic_upgrade_of_the_object_directory_safe__63__/comment_1_c25900b9d2d62cc0b8c77150bcfebadf._comment b/doc/forum/Is_an_automagic_upgrade_of_the_object_directory_safe__63__/comment_1_c25900b9d2d62cc0b8c77150bcfebadf._comment new file mode 100644 index 0000000000..8420d7bb3a --- /dev/null +++ b/doc/forum/Is_an_automagic_upgrade_of_the_object_directory_safe__63__/comment_1_c25900b9d2d62cc0b8c77150bcfebadf._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-03-18T00:38:51Z" + content=""" +These are good examples; I think you've convinced me at least for upgrades going forward after v2. I'm not sure we have enough users and outdated git-annex installations to worry about it for v1. + +(Hoping such upgrades are rare anyway.. Part of the point of changes made in v2 was to allow lots of changes to be made later w/o needing a v3.) + +Update: Upgrades from v1 to v2 will no longer be handled automatically +now. +"""]] diff --git a/doc/forum/Need_new_build_instructions_for_Debian_stable.mdwn b/doc/forum/Need_new_build_instructions_for_Debian_stable.mdwn new file mode 100644 index 0000000000..7db19697c3 --- /dev/null +++ b/doc/forum/Need_new_build_instructions_for_Debian_stable.mdwn @@ -0,0 +1,5 @@ +The instructions for building git-annex on [[install/Debian]] stable don't seem to be valid anymore. + +1. `dpkg-checkbuilddeps` is looking for the wrong packages, e.g. libghc-missingh-dev instead of libghc6-missingh-dev. + +2. Not all dependencies are available in the Squeeze repositories anymore (at least not Crypto and hS3), if I am not mistaken. diff --git a/doc/forum/Need_new_build_instructions_for_Debian_stable/comment_1_8c1eea6dfec8b7e1c7a371b6e9c26118._comment b/doc/forum/Need_new_build_instructions_for_Debian_stable/comment_1_8c1eea6dfec8b7e1c7a371b6e9c26118._comment new file mode 100644 index 0000000000..e464c84da1 --- /dev/null +++ b/doc/forum/Need_new_build_instructions_for_Debian_stable/comment_1_8c1eea6dfec8b7e1c7a371b6e9c26118._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-04-26T15:27:49Z" + content=""" +I have updated the instructions. +"""]] diff --git a/doc/forum/Need_new_build_instructions_for_Debian_stable/comment_2_f6ff8306c946219dbe39bb8938a349ab._comment b/doc/forum/Need_new_build_instructions_for_Debian_stable/comment_2_f6ff8306c946219dbe39bb8938a349ab._comment new file mode 100644 index 0000000000..40f570610a --- /dev/null +++ b/doc/forum/Need_new_build_instructions_for_Debian_stable/comment_2_f6ff8306c946219dbe39bb8938a349ab._comment @@ -0,0 +1,21 @@ +[[!comment format=mdwn + username="gernot" + ip="213.196.216.21" + subject="comment 2" + date="2011-04-26T18:56:44Z" + content=""" +Thanks for the update, Joey. I think you forgot to change libghc-missingh-dev to libghc6-missingh-dev for the copy & paste instructions though. + +Also, after having checked that I have everything installed I'm still getting this error: + + ... + [15 of 77] Compiling Annex ( Annex.hs, Annex.o ) + + Annex.hs:19:35: + Module `Control.Monad.State' does not export `state' + make[1]: *** [git-annex] Error 1 + make[1]: Leaving directory `/home/gernot/dev/git-annex' + dh_auto_build: make -j1 returned exit code 2 + make: *** [binary] Error 2 + +"""]] diff --git a/doc/forum/Need_new_build_instructions_for_Debian_stable/comment_3_bcda70cbfc7c1a14fa82da70f9f876e2._comment b/doc/forum/Need_new_build_instructions_for_Debian_stable/comment_3_bcda70cbfc7c1a14fa82da70f9f876e2._comment new file mode 100644 index 0000000000..8b41116431 --- /dev/null +++ b/doc/forum/Need_new_build_instructions_for_Debian_stable/comment_3_bcda70cbfc7c1a14fa82da70f9f876e2._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-04-26T23:40:33Z" + content=""" +Both problems fixed. +"""]] diff --git a/doc/forum/OSX__39__s_default_sshd_behaviour_has_limited_paths_set.mdwn b/doc/forum/OSX__39__s_default_sshd_behaviour_has_limited_paths_set.mdwn new file mode 100644 index 0000000000..5e417b14c9 --- /dev/null +++ b/doc/forum/OSX__39__s_default_sshd_behaviour_has_limited_paths_set.mdwn @@ -0,0 +1,12 @@ +This is a tip for users who wish to use remotes which are based on OSX systems and have used macports to install some of the required utilities for git-annex to work. + +The default behaviour of OSX's sshd is to have a "highly restricted" restricted environment. The defaults that it allows is + + jtang@x00:~ $ ssh x00 echo \$PATH + /usr/bin:/bin:/usr/sbin:/sbin + +One solution is to enable *PermitUserEnvironment yes* in `/etc/sshd_config` and then in your own `~/.ssh/environment` file you could add something like (the below is an example) + + PATH=/Users/jtang/bin:/opt/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin:/opt/X11/bin:/usr/X11/bin:/Users/jtang/.cabal/bin:/opt/local/libexec/gnubin + +If the above is not done, cloning from the OSX host will fail if git is not installed in /usr/bin (which it probably won't be). diff --git a/doc/forum/OSX__39__s_haskell-platform_statically_links_things.mdwn b/doc/forum/OSX__39__s_haskell-platform_statically_links_things.mdwn new file mode 100644 index 0000000000..537c85d019 --- /dev/null +++ b/doc/forum/OSX__39__s_haskell-platform_statically_links_things.mdwn @@ -0,0 +1,17 @@ +This isn't really a bug of git-annex, but a problem with haskell-platform/ghc6.12.x so this post might need to be moved to a better place (maybe tips). + +OSX's haskell-platform doesn't have the dynamic libraries available, as far as I know it just isn't supported therefore git-annex will always be statically built on OSX, so wrappers like or [[!google dsocks]] for preloading connect() calls won't work. + +
+jtang@x00:~/annex $ tsocks git annex get .
+dyld: could not load inserted library: /opt/local/lib/libtsocks.dylib
+
+error: git-annex died of signal 5
+
+ +The side effect of this means that users who are behind restrictive firewalls that allow only ssh via a socks proxy, they will need to configure ssh to use something like . + +
+host remotemyhost
+        ProxyCommand connect -S proxy.mydomain:1080 -R local %h %p
+
diff --git a/doc/forum/Podcast_syncing_use-case.mdwn b/doc/forum/Podcast_syncing_use-case.mdwn new file mode 100644 index 0000000000..6b3c81cabc --- /dev/null +++ b/doc/forum/Podcast_syncing_use-case.mdwn @@ -0,0 +1,34 @@ +I've been trying to use git-annex with the following strategy. + +* Download podcasts into the annex `gpodder-downloads` +* Check the podcasts into the annex using `git annex add`. +* Copy the podcasts over to my mp3 player in the annex `usb-ariaz`. + This is a FAT-formatted mp3 player, so I have been using a bare + repository. +* Move the podcasts to a different annex called `gpodder-on-usbdisk` + to indicate that they have been successfully put on the mp3 player. +* `chmod` the files on the mp3 player to `0600` so that I can delete + them from the player when I am done listening to them. + +Then I go for a run or something and listen to a bunch of podcasts, +deleting them after I have listened to them. When I get back, I would +like to find the files that I have listened to and remove them from +the annexes that are not on the mp3 player. What I have been hoping +is that something like + + ~/gpodder-on-usbdisk $ git annex find --not --in usb-ariaz --print0 | xargs -0 git rm + ~/gpodder-on-usbdisk $ git annex unused + ~/gpodder-on-usbdisk $ git annex dropunused `seq X` + +would work. However, it appears that `git-annex find` does not +actually check to see that the file contents are present, but only +looks at the `git-annex` branch of the `usb-ariaz` repository. Since +I have not changed that with my sneaky deletions, it has no way of +knowing that the files have been deleted. + +Is there any way to do this properly? (And by properly, I don't mean +"don't delete the files". That is really the only way I have of +marking that I have listened to podcasts on this particular mp3 player.) + +I tried setting the `usb-ariaz` repository to be untrusted, but that +did not change the behavior of `git annex find`. diff --git a/doc/forum/Podcast_syncing_use-case/comment_1_ace6f9d3a950348a3ac0ff592b62e786._comment b/doc/forum/Podcast_syncing_use-case/comment_1_ace6f9d3a950348a3ac0ff592b62e786._comment new file mode 100644 index 0000000000..fe396c39f3 --- /dev/null +++ b/doc/forum/Podcast_syncing_use-case/comment_1_ace6f9d3a950348a3ac0ff592b62e786._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-11-27T17:56:31Z" + content=""" +Right, --in goes by git-annex's [[location_tracking]] information; actually checking if a remote still has the files would make --in too expensive in many cases. + +So you need to give `gpodder-on-usbdisk` current information. You can do that by going to `usb-ariaz` and doing a `git annex fsck`. That will find the deleted files and update the location information. Then, back on `gpodder-on-usbdisk`, `git pull usb-ariaz`, and then you can proceed with the commands you showed. +"""]] diff --git a/doc/forum/Podcast_syncing_use-case/comment_2_930a6620b4d516e69ed952f9da5371bb._comment b/doc/forum/Podcast_syncing_use-case/comment_2_930a6620b4d516e69ed952f9da5371bb._comment new file mode 100644 index 0000000000..97eb3c681c --- /dev/null +++ b/doc/forum/Podcast_syncing_use-case/comment_2_930a6620b4d516e69ed952f9da5371bb._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://cgray.myopenid.com/" + nickname="cgray" + subject="comment 2" + date="2011-11-27T22:10:44Z" + content=""" +Thanks, that works perfectly! +"""]] diff --git a/doc/forum/Problems_with_large_numbers_of_files.mdwn b/doc/forum/Problems_with_large_numbers_of_files.mdwn new file mode 100644 index 0000000000..1dbddd3e28 --- /dev/null +++ b/doc/forum/Problems_with_large_numbers_of_files.mdwn @@ -0,0 +1,8 @@ +I'm trying to use git-annex to archive scientific data. I'm often dealing with large numbers of files, sometimes 10k or more. When I try to git-annex add these files I get this error: + + + Stack space overflow: current size 8388608 bytes. + Use `+RTS -Ksize' to increase it. + + +This is with the latest version of git-annex and a current version of git on OS 10.6.7. After this error occurs, I am unable to un-annex the files and I'm forced to recover from a backup. diff --git a/doc/forum/Problems_with_large_numbers_of_files/comment_1_08791cb78b982087c2a07316fe3ed46c._comment b/doc/forum/Problems_with_large_numbers_of_files/comment_1_08791cb78b982087c2a07316fe3ed46c._comment new file mode 100644 index 0000000000..94043a7001 --- /dev/null +++ b/doc/forum/Problems_with_large_numbers_of_files/comment_1_08791cb78b982087c2a07316fe3ed46c._comment @@ -0,0 +1,22 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 1" + date="2011-04-05T07:27:46Z" + content=""" +Heh, cool, I was thinking throwing about 28million files at git-annex. Let me know how it goes, I suspect you have just run into a default limits OSX problem. + +You probably just need to up some system limits (you will need to read the error messages that first appear) then do something like + +
+# this is really for the run time, you can set these settings in /etc/sysctl.conf
+sudo sysctl -w kern.maxproc=2048
+sudo sysctl -w kern.maxprocperuid=1024
+
+# tell launchd about having higher limits
+sudo echo \"limit maxfiles 1024 unlimited\" >> /etc/launchd.conf
+sudo echo \"limit maxproc 1024 2048\" >> /etc/launchd.conf
+
+ +There are other system limits which you can check by doing a \"ulimit -a\", once you make the above changes, you will need to reboot to make the changes take affect. I am unsure if the above will help as it is an example of what I did on 10.6.6 a few months ago to fix some forking issues. From the error you got you will probably need to increase the stacksize to something bigger or even make it unlimited if you feel lucky, the default stacksize on OSX is 8192, try making it say 10times that size first and see what happens. +"""]] diff --git a/doc/forum/Problems_with_large_numbers_of_files/comment_2_0392a11219463e40c53bae73c8188b69._comment b/doc/forum/Problems_with_large_numbers_of_files/comment_2_0392a11219463e40c53bae73c8188b69._comment new file mode 100644 index 0000000000..8ea5531f43 --- /dev/null +++ b/doc/forum/Problems_with_large_numbers_of_files/comment_2_0392a11219463e40c53bae73c8188b69._comment @@ -0,0 +1,25 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-04-05T17:46:03Z" + content=""" +This message comes from ghc's runtime memory manager. Apparently your ghc defaults to limiting the stack to 80 mb. +Mine seems to limit it slightly higher -- I have seen haskell programs successfully grow as large as 350 mb, although generally not intentionally. :) + +Here's how to adjust the limit at runtime, obviously you'd want a larger number: + +
+# git-annex +RTS -K100 -RTS find
+Stack space overflow: current size 100 bytes.
+Use `+RTS -Ksize -RTS' to increase it.
+
+ +I've tried to avoid git-annex using quantities of memory that scale with the number of files in the repo, and I think in general successfully -- I run it on 32 mb and 128 mb machines, FWIW. There are some tricky cases, and haskell makes it easy to accidentally write code that uses much more memory than would be expected. + +One well known case is `git annex unused`, which *has* to build a structure of every annexed file. I have been considering using a bloom filter or something to avoid that. + +Another possible case is when running a command like `git annex add`, and passing it a lot of files/directories. Some code tries to preserve the order of your input after passing it through `git ls-files` (which destroys ordering), and to do so it needs to buffer both the input and the result in ram. + +It's possible to build git-annex with memory profiling and generate some quite helpful profiling data. Edit the Makefile and add this to GHCFLAGS: `-prof -auto-all -caf-all -fforce-recomp` then when running git-annex, add the parameters: `+RTS -p -RTS` , and look for the git-annex.prof file. +"""]] diff --git a/doc/forum/Problems_with_large_numbers_of_files/comment_3_537e9884c1488a7a4bcf131ea63b71f7._comment b/doc/forum/Problems_with_large_numbers_of_files/comment_3_537e9884c1488a7a4bcf131ea63b71f7._comment new file mode 100644 index 0000000000..8e4101e37e --- /dev/null +++ b/doc/forum/Problems_with_large_numbers_of_files/comment_3_537e9884c1488a7a4bcf131ea63b71f7._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-04-05T18:02:05Z" + content=""" +Oh, you'll need profiling builds of various haskell libraries to build with profiling support. If that's not easily accomplished, if you could show me the form of the command you're running, and also how git annex unannex fails, that would be helpful for investigating. +"""]] diff --git a/doc/forum/Problems_with_large_numbers_of_files/comment_4_7cb65d013e72bd2b7e90452079d42ac9._comment b/doc/forum/Problems_with_large_numbers_of_files/comment_4_7cb65d013e72bd2b7e90452079d42ac9._comment new file mode 100644 index 0000000000..bac9fd7cad --- /dev/null +++ b/doc/forum/Problems_with_large_numbers_of_files/comment_4_7cb65d013e72bd2b7e90452079d42ac9._comment @@ -0,0 +1,29 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkhdKAhe3l_UyGt5SdfRBPYVwe-9f8P2dM" + nickname="Justin" + subject="comment 4" + date="2011-04-05T21:14:12Z" + content=""" +@joey + +OK, I'll try increasing the stack size and see if that helps. + +For reference, I was running: + +git annex add . + +on a directory containing about 100k files spread over many nested subdirectories. I actually have more than a dozen projects like this that I plan to keep in git annex, possibly in separate repositories if necessary. I could probably tar the data and then archive that, but I like the idea of being able to see the structure of my data even though the contents of the files are on a different machine. + +After the crash, running: + +git annex unannex + +does nothing and returns instantly. What exactly is 'git annex add' doing? I know that it's moving files into the key-value store and adding symlinks, but I don't know what else it does. + +--Justin + + + +If + +"""]] diff --git a/doc/forum/Problems_with_large_numbers_of_files/comment_5_86a42ee3173a5d38f803e64b79496ab3._comment b/doc/forum/Problems_with_large_numbers_of_files/comment_5_86a42ee3173a5d38f803e64b79496ab3._comment new file mode 100644 index 0000000000..7dcccef2e5 --- /dev/null +++ b/doc/forum/Problems_with_large_numbers_of_files/comment_5_86a42ee3173a5d38f803e64b79496ab3._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 5" + date="2011-04-07T16:41:00Z" + content=""" +I think what is happening with \"git annex unannex\" is that \"git annex add\" crashes before it can \"git add\" the symlinks. unannex only looks at files that \"git ls-files\" shows, and so files that are not added to git are not seen. So, this can be recovered from by looking at git status and manually adding the symlinks to git, and then unannex. + +That also suggests that \"git annex add .\" has done something before crashing. That's consistent with you passing it < 2 parameters; it's not just running out of memory trying to expand and preserve order of its parameters (like it might if you ran \"git annex add experiment-1/ experiment-2/\") + +I'm pretty sure I know where the space leak is now. git-annex builds up a queue of git commands, so that it can run git a minimum number of times. Currently, this queue is only flushed at the end. I had been meaning to work on having it flush the queue periodically to avoid it growing without bounds, and I will prioritize doing that. + +(The only other thing that \"git annex add\" does is record location log information.) +"""]] diff --git a/doc/forum/Problems_with_large_numbers_of_files/comment_6_4551274288383c9cc27cbf85b122d307._comment b/doc/forum/Problems_with_large_numbers_of_files/comment_6_4551274288383c9cc27cbf85b122d307._comment new file mode 100644 index 0000000000..fff8f7cdde --- /dev/null +++ b/doc/forum/Problems_with_large_numbers_of_files/comment_6_4551274288383c9cc27cbf85b122d307._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 6" + date="2011-04-07T18:09:13Z" + content=""" +I've committed the queue flush improvements, so it will buffer up to 10240 git actions, and then flush the queue. + +There may be other memory leaks at scale (besides the two I mentioned earlier), but this seems promising. I'm well into running `git annex add` on a half million files and it's using 18 mb ram and has flushed the queue several times. This run +will fail due to running out of inodes for the log files, not due to memory. :) +"""]] diff --git a/doc/forum/Problems_with_large_numbers_of_files/comment_7_d18cf944352f8303799c86f2c0354e8e._comment b/doc/forum/Problems_with_large_numbers_of_files/comment_7_d18cf944352f8303799c86f2c0354e8e._comment new file mode 100644 index 0000000000..7d2ad5eba7 --- /dev/null +++ b/doc/forum/Problems_with_large_numbers_of_files/comment_7_d18cf944352f8303799c86f2c0354e8e._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 7" + date="2011-04-08T21:55:36Z" + content=""" +http://xfs.org/index.php/XFS_FAQ#Q:_Performance:_mkfs.xfs_-n_size.3D64k_option +"""]] diff --git a/doc/forum/Recommended_number_of_repositories.mdwn b/doc/forum/Recommended_number_of_repositories.mdwn new file mode 100644 index 0000000000..9e9f2838d6 --- /dev/null +++ b/doc/forum/Recommended_number_of_repositories.mdwn @@ -0,0 +1,4 @@ +With git it is easy to create one repository per project, and it almost always makes sense to do so. When using git-annex, what is the recommended setup? + +Should one have a single annex containing all files, or is it recommended to create different repositories for things like 'photos', 'music', 'isos' ? + diff --git a/doc/forum/Recommended_number_of_repositories/comment_1_3ef256230756be8a9679b107cdbfd018._comment b/doc/forum/Recommended_number_of_repositories/comment_1_3ef256230756be8a9679b107cdbfd018._comment new file mode 100644 index 0000000000..46ce0e8d53 --- /dev/null +++ b/doc/forum/Recommended_number_of_repositories/comment_1_3ef256230756be8a9679b107cdbfd018._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="depends ..." + date="2011-11-04T19:59:24Z" + content=""" +It makes sense to have separate repositories when you have well-defined uses for them. + +I have a separate repository just for music and podcasts, which I can put various places where I have no need of the overhead of a tree of other files. + +If you're using it for whatever arbitrary large files you accumulate, I find it's useful to have them in one repository. This way I can rearrange things as makes sense. It might make sense to have \"photos\" and \"isos\" as categories today, but next year you might prefer to move those under 2011/{photos,isos}. It would certainly make sense to have different repositories for home, work, etc. + +How to split repositories up for a home directory is a general problem that the [vcs-home](http://vcs-home.branchable.com) +project has surely considered at one time or another. +"""]] diff --git a/doc/forum/Will_git_annex_work_on_a_FAT32_formatted_key__63__.mdwn b/doc/forum/Will_git_annex_work_on_a_FAT32_formatted_key__63__.mdwn new file mode 100644 index 0000000000..f458ba72e0 --- /dev/null +++ b/doc/forum/Will_git_annex_work_on_a_FAT32_formatted_key__63__.mdwn @@ -0,0 +1,3 @@ +FAT32 does not support symlinks, so I wonder if there's going to be a problem with that. + +Generally speaking, I am wondering about portability of git annex on windows and on android... diff --git a/doc/forum/Will_git_annex_work_on_a_FAT32_formatted_key__63__/comment_1_426482e6eb3a27687a48f24f6ef2332f._comment b/doc/forum/Will_git_annex_work_on_a_FAT32_formatted_key__63__/comment_1_426482e6eb3a27687a48f24f6ef2332f._comment new file mode 100644 index 0000000000..119c9e535a --- /dev/null +++ b/doc/forum/Will_git_annex_work_on_a_FAT32_formatted_key__63__/comment_1_426482e6eb3a27687a48f24f6ef2332f._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-03-07T19:13:14Z" + content=""" +See [[bugs/fat_support]]. A bare git repo will have to be used to avoid symlink problems, at least for now. The other problem is that git-annex key files have colons in their filenames. +"""]] diff --git a/doc/forum/Will_git_annex_work_on_a_FAT32_formatted_key__63__/comment_2_af4f8b52526d8bea2904c95406fd2796._comment b/doc/forum/Will_git_annex_work_on_a_FAT32_formatted_key__63__/comment_2_af4f8b52526d8bea2904c95406fd2796._comment new file mode 100644 index 0000000000..ca599b2857 --- /dev/null +++ b/doc/forum/Will_git_annex_work_on_a_FAT32_formatted_key__63__/comment_2_af4f8b52526d8bea2904c95406fd2796._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-03-19T15:37:22Z" + content=""" +Now it's fully supported, so long as you put a bare git repo on your key. +"""]] diff --git a/doc/forum/Wishlist:_Is_it_possible_to___34__unlock__34___files_without_copying_the_file_data__63__.mdwn b/doc/forum/Wishlist:_Is_it_possible_to___34__unlock__34___files_without_copying_the_file_data__63__.mdwn new file mode 100644 index 0000000000..1a7930fec4 --- /dev/null +++ b/doc/forum/Wishlist:_Is_it_possible_to___34__unlock__34___files_without_copying_the_file_data__63__.mdwn @@ -0,0 +1,6 @@ +I have a DLink Boxee media player and it can not play content from symbolic links, it needs to access regular media files. Unfortunately unlocking/locking is quite slow for such a large amount of data due to the required data copying, but it should not even be needed since I do not need write access to any file to watch the movie or to play the song. + +Is it currently possible or would it be possible to add a commands like "unlock" which would not copy the file data but simply move files out from the data store into the tree while still keeping the files read only? A corresponding "lock" command would also be needed to restore the normal symbolic link tree structure. + +Update: +I tried the rsync special remote http://git-annex.branchable.com/special_remotes/rsync/ and it works but the file structure created reflects the data store not the view given by the symbolic links. diff --git a/doc/forum/Wishlist:_Is_it_possible_to___34__unlock__34___files_without_copying_the_file_data__63__/comment_1_1cf4ab29dfa2cff59b86305fc0018251._comment b/doc/forum/Wishlist:_Is_it_possible_to___34__unlock__34___files_without_copying_the_file_data__63__/comment_1_1cf4ab29dfa2cff59b86305fc0018251._comment new file mode 100644 index 0000000000..3ab518714d --- /dev/null +++ b/doc/forum/Wishlist:_Is_it_possible_to___34__unlock__34___files_without_copying_the_file_data__63__/comment_1_1cf4ab29dfa2cff59b86305fc0018251._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-07-07T15:27:28Z" + content=""" +The rsync or directory special remotes would work if the media player uses metadata in the files, rather than directory locations. + +Beyond that there is the [[todo/smudge]] idea, which is hoped to be supported sometime. +"""]] diff --git a/doc/forum/Wishlist:_Is_it_possible_to___34__unlock__34___files_without_copying_the_file_data__63__/comment_2_f5ebb7f43dcef861ecc13373fb1e263f._comment b/doc/forum/Wishlist:_Is_it_possible_to___34__unlock__34___files_without_copying_the_file_data__63__/comment_2_f5ebb7f43dcef861ecc13373fb1e263f._comment new file mode 100644 index 0000000000..9601003798 --- /dev/null +++ b/doc/forum/Wishlist:_Is_it_possible_to___34__unlock__34___files_without_copying_the_file_data__63__/comment_2_f5ebb7f43dcef861ecc13373fb1e263f._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawmL8pteP2jbYJUn1M3CbeLDvz2SWAA1wtg" + nickname="Kristian" + subject="Solution" + date="2011-07-31T15:24:25Z" + content=""" +Yes, it can read id3-tags and guess titles from movie filenames but it sometimes gets confused by the filename metadata provided by the WORM-backend. + +I think I have a good enough solution to this problem. It's not efficient when it comes to renames but handles adding and deletion just fine + + rsync -vaL --delete source dest + +The -L flag looks at symbolic links and copies the actual data they are pointing to. Of course \"source\" must have all data locally for this to work. + +"""]] diff --git a/doc/forum/Wishlist:_Ways_of_selecting_files_based_on_meta-information.mdwn b/doc/forum/Wishlist:_Ways_of_selecting_files_based_on_meta-information.mdwn new file mode 100644 index 0000000000..1de06f7cda --- /dev/null +++ b/doc/forum/Wishlist:_Ways_of_selecting_files_based_on_meta-information.mdwn @@ -0,0 +1,15 @@ +It would be extremely useful to have some additional ways to select files (for git annex copy/move/get and maybe others) based on the meta-information available to git-annex, rather than just by file or directory name. + +An example of what I'd like to do is this: + + host1$ git annex copy --to usb-drive --missing-on host2 + +This would check location tracking information and copy each file from host1's annex which is not present on host2 onto the usb-drive annex -- i.e. it's what I want when I need to do a sneakernet synchronisation of host1 and host2 (for backup purposes, for example). Note that of course I could copy --to host2, assuming network connectivity, but that would take a long time. + +There's probably other selectors that we can imagine; an obvious one could be --present-on -- useful for judiciously dropping only those files that you have easily available in a local annex (as you may want to keep files that are hard to make available even if --numcopies would nominally be satisfied). + +Other similar ideas for file content selectors: + + * Files that have less than n, exactly n or more than n copies -- for when you need to satisfy your --numcopies policy over sneakernet. + * Files that are present (or not present) on some trusted annex -- for making sure you have trusted copies of everything. + * Boolean combinations of these filters -- "git annex drop --present-on lanserver1 --or --present-on lanserver2" or similar syntax, although obviously doing this in full generality may be quite fiddly. diff --git a/doc/forum/Wishlist:_Ways_of_selecting_files_based_on_meta-information/comment_1_818f38aa988177d3a9415055e084f0fb._comment b/doc/forum/Wishlist:_Ways_of_selecting_files_based_on_meta-information/comment_1_818f38aa988177d3a9415055e084f0fb._comment new file mode 100644 index 0000000000..11b44b8094 --- /dev/null +++ b/doc/forum/Wishlist:_Ways_of_selecting_files_based_on_meta-information/comment_1_818f38aa988177d3a9415055e084f0fb._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="http://christian.amsuess.com/chrysn" + nickname="chrysn" + subject="filtering based on git-commits" + date="2011-06-23T13:56:35Z" + content=""" +additional filter criteria could come from the git history: + +* `git annex get --touched-in HEAD~5..` to fetch what has recently been worked on +* `git annex get --touched-by chrysn --touched-in version-1.0..HEAD` to fetch what i've been workin on recently (based on regexp or substring match in author; git experts could probably craft much more meaningful expressions) + +these options could also apply to `git annex find` -- actually, looking at the normal file system tools for such tasks, that might even be sufficient (think `git annex find --numcopies-gt 3 --present-on lanserver1 --drop` like `find -iname '*foo*' -delete` + +(i was about to open a new forum discussion for commit-based getting, but this is close enough to be usefully joint in a discussion) +"""]] diff --git a/doc/forum/Wishlist:_Ways_of_selecting_files_based_on_meta-information/comment_2_97e2ed48bd552d02918c4f98f963e6e1._comment b/doc/forum/Wishlist:_Ways_of_selecting_files_based_on_meta-information/comment_2_97e2ed48bd552d02918c4f98f963e6e1._comment new file mode 100644 index 0000000000..787cf8f5d7 --- /dev/null +++ b/doc/forum/Wishlist:_Ways_of_selecting_files_based_on_meta-information/comment_2_97e2ed48bd552d02918c4f98f963e6e1._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-09-19T18:46:35Z" + content=""" +This is now almost completely implemented. See [[walkthrough/powerful_file_matching]]. + +"""]] diff --git a/doc/forum/__34__git_annex_lock__34___very_slow_for_big_repo.mdwn b/doc/forum/__34__git_annex_lock__34___very_slow_for_big_repo.mdwn new file mode 100644 index 0000000000..9bacf28dc0 --- /dev/null +++ b/doc/forum/__34__git_annex_lock__34___very_slow_for_big_repo.mdwn @@ -0,0 +1,7 @@ +I found the command "git annex lock" very slow (much slower than the initial "git annex add" with SHA1), for a not so big directory, when run in a big repo. +It seems that each underlying git command is not fast, so I thought it would be better to run them once with all files as arguments. +I had to stop the lock command, and ran "git checkout ." (I did not change any file), is this a correct alternative? + +Thanks a LOT for this software, one that I missed since a long time (but wasn't able to write)! + +Rafaël diff --git a/doc/forum/__34__git_annex_lock__34___very_slow_for_big_repo/comment_1_044f1c5e5f7a939315c28087495a8ba8._comment b/doc/forum/__34__git_annex_lock__34___very_slow_for_big_repo/comment_1_044f1c5e5f7a939315c28087495a8ba8._comment new file mode 100644 index 0000000000..0e2773bda3 --- /dev/null +++ b/doc/forum/__34__git_annex_lock__34___very_slow_for_big_repo/comment_1_044f1c5e5f7a939315c28087495a8ba8._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="fixed" + date="2011-05-31T18:51:13Z" + content=""" +Running `git checkout` by hand is fine, of course. + +Underlying problem is that git has some O(N) scalability of operations on the index with regards to the number of files in the repo. So a repo with a whole lot of files will have a big index, and any operation that changes the index, like the `git reset` this needs to do, has to read in the entire index, and write out a new, modified version. It seems that git could be much smarter about its index data structures here, but I confess I don't understand the index's data structures at all. I hope someone takes it on, as git's scalability to number of files in the repo is becoming a new pain point, now that scalability to large files is \"solved\". ;) + +Still, it is possible to speed this up at git-annex's level. Rather than doing a `git reset` followed by a git checkout, it can just `git checkout HEAD -- file`, and since that's one command, it can then be fed into the queueing machinery in git-annex (that exists mostly to work around this git malfescence), and so only a single git command will need to be run to lock multiple files. + +I've just implemented the above. In my music repo, this changed an lock of a CD's worth of files from taking ctrl-c long to 1.75 seconds. Enjoy! + +(Hey, this even speeds up the one file case greatly, since `git reset -- file` is slooooow -- it seems to scan the *entire* repository tree. Yipes.) +"""]] diff --git a/doc/forum/__34__git_annex_lock__34___very_slow_for_big_repo/comment_2_e854b93415d5ab80eda8e3be3b145ec2._comment b/doc/forum/__34__git_annex_lock__34___very_slow_for_big_repo/comment_2_e854b93415d5ab80eda8e3be3b145ec2._comment new file mode 100644 index 0000000000..9e9e778ce9 --- /dev/null +++ b/doc/forum/__34__git_annex_lock__34___very_slow_for_big_repo/comment_2_e854b93415d5ab80eda8e3be3b145ec2._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnpdM9F8VbtQ_H5PaPMpGSxPe_d5L1eJ6w" + nickname="Rafaël" + subject="comment 2" + date="2011-05-31T21:43:22Z" + content=""" +Nice! +So if I understand correctly, 'git reset -- file' was there to discard staged (but not commited) changes made to 'file', before checking out, so that it is equivalent to directly 'git checkout HEAD -- file' ? +I'm curious about the \"queueing machinery in git-annex\": does it end up calling the one git command with multiple files as arguments? does it correspond to the message \"(Recording state in git...)\" ? +Thanks! + + +"""]] diff --git a/doc/forum/__34__git_annex_lock__34___very_slow_for_big_repo/comment_3_95c110500bc54013bc1969c1a9c8f842._comment b/doc/forum/__34__git_annex_lock__34___very_slow_for_big_repo/comment_3_95c110500bc54013bc1969c1a9c8f842._comment new file mode 100644 index 0000000000..87da0c396d --- /dev/null +++ b/doc/forum/__34__git_annex_lock__34___very_slow_for_big_repo/comment_3_95c110500bc54013bc1969c1a9c8f842._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-05-31T21:54:23Z" + content=""" +@Rafaël , you're correct on all counts. +"""]] diff --git a/doc/forum/advantages_of_SHA__42___over_WORM.mdwn b/doc/forum/advantages_of_SHA__42___over_WORM.mdwn new file mode 100644 index 0000000000..5b544593f5 --- /dev/null +++ b/doc/forum/advantages_of_SHA__42___over_WORM.mdwn @@ -0,0 +1,5 @@ +Thanks for creating git-annex. + +I am confused about the advantages of the SHA* backends over WORM. The "backends" page in this wiki says that with WORM, files "can be moved around, but should never be added to or changed". But I don't see any difference to SHA* files as long as the premise of WORM that "any file with the same basename, size, and modification time has the same content" is true. Using "git annex unlock", WORM files can be modified in the same way as SHA* files. + +If the storage I use is dependable (i.e. I don't need SHA checksums for detection of corruption), and I don't need to optimize for the case that the modification date of a file is changed but the contents stay the same, and if it is unlikely that several files will be identical, is there actually any advantage in using SHA*? diff --git a/doc/forum/advantages_of_SHA__42___over_WORM/comment_1_96c354cac4b5ce5cf6664943bc84db1d._comment b/doc/forum/advantages_of_SHA__42___over_WORM/comment_1_96c354cac4b5ce5cf6664943bc84db1d._comment new file mode 100644 index 0000000000..218027ca53 --- /dev/null +++ b/doc/forum/advantages_of_SHA__42___over_WORM/comment_1_96c354cac4b5ce5cf6664943bc84db1d._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-08-29T16:10:38Z" + content=""" +You're right -- as long as nothing changes a file without letting the modification time update, editing WORM files is safe. +"""]] diff --git a/doc/forum/bainstorming:_git_annex_push___38___pull.mdwn b/doc/forum/bainstorming:_git_annex_push___38___pull.mdwn new file mode 100644 index 0000000000..8a6c552b80 --- /dev/null +++ b/doc/forum/bainstorming:_git_annex_push___38___pull.mdwn @@ -0,0 +1,28 @@ +Wouldn't it make sense to offer + + git annex pull + +which would basically do + + git pull + git annex get + +and + + git annex push + +which would do + + git annex commit . + git annex put # (the proposed "send to default annex" command) + git commit -a -m "$HOST $(date +%F-%H-%M-%S)" # or similar + git push + +Resulting in commands that are totally analogous to git push & pull: Sync all data from/to a remote. + +> Update: + +This is useful: + + git config [--global] alias.annex-push '!git pull && git annex add . && git annex copy . --to $REMOTE --fast --quiet && git commit -a -m "$HOST $(date +%F--%H-%M-%S-%Z)" && git push' + diff --git a/doc/forum/bainstorming:_git_annex_push___38___pull/comment_1_3a0bf74b51586354b7a91f8b43472376._comment b/doc/forum/bainstorming:_git_annex_push___38___pull/comment_1_3a0bf74b51586354b7a91f8b43472376._comment new file mode 100644 index 0000000000..3d69e8f290 --- /dev/null +++ b/doc/forum/bainstorming:_git_annex_push___38___pull/comment_1_3a0bf74b51586354b7a91f8b43472376._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-04-05T18:05:00Z" + content=""" +Maybe, otoh, part of the point of git-annex is that the data may be too large to pull down all of it. + +I find mr useful as a policy layer over top of git-annex, so \"mr update\" can pull down appropriate quantities of data from +appropriate locations. +"""]] diff --git a/doc/forum/bainstorming:_git_annex_push___38___pull/comment_2_b02ca09914e788393c01196686f95831._comment b/doc/forum/bainstorming:_git_annex_push___38___pull/comment_2_b02ca09914e788393c01196686f95831._comment new file mode 100644 index 0000000000..e0ecc1a819 --- /dev/null +++ b/doc/forum/bainstorming:_git_annex_push___38___pull/comment_2_b02ca09914e788393c01196686f95831._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 2" + date="2011-04-05T20:52:52Z" + content=""" +No-so-subtle sarcasm taken and acknowledged :) + +Arguably, git-annex should know about any local limits and not have them implemented via mr from the outside. I guess my concern boils down to having git-annex do the right thing all by itself with minimal user interaction. And while I really do appreciate the flexibility of chaining commands, I am a firm believer in exposing the common use cases as easily as possible. + +And yes, I am fully aware that not all annexes are created equal. Point in case, I would never use git annex pull on my laptop, but I would git annex push extensively. + + +"""]] diff --git a/doc/forum/batch_check_on_remote_when_using_copy.mdwn b/doc/forum/batch_check_on_remote_when_using_copy.mdwn new file mode 100644 index 0000000000..633b61d6f9 --- /dev/null +++ b/doc/forum/batch_check_on_remote_when_using_copy.mdwn @@ -0,0 +1,34 @@ +When I copy my local repository with SHA* to a remote repo with SHA*, every single file is checked by itself which seems rather inefficient. When my remote is accessed via ssh, git-annex opens a new connections for every check. If you are not using a ssh key or key agent, this gets tedious... + +For all locked files, either git's built-in mechanisms should be used or, if that's not possible, a few hundred checksums (assuming SHA* backend) should be transfered at once and then checked locally before deciding that to transfer. + +Once all checks are done, one single transfer session should be started. Creating new sessions and waiting for TCP's slowstart to get going is a lot less than efficient. + + +-- RichiH + +> (Use of SHA is irrelevant here, copy does not checksum anything.) +> +> I think what you're seeing is +> that `git annex copy --to remote` is slow, going to the remote repository +> every time to see if it has the file, while `git annex copy --from remote` +> is fast, since it looks at what files are locally present. +> +> That is something I mean to improve. At least `git annex copy --fast --to remote` +> could easily do a fast copy of all files that are known to be missing from +> the remote repository. When local and remote git repos are not 100% in sync, +> relying on that data could miss some files that the remote doesn't have anymore, +> but local doesn't know it dropped. That's why it's a candidate for `--fast`. +> +> I've just implemented that. +> +> While I do hope to improve ssh usage so that it sshs once, and feeds +> `git-annex-shell` a series of commands to run, that is a much longer-term +> thing. --[[Joey]] + +>> FYI, in a repo with 1228 files, all small, repos _completely in sync_. + + % git annex copy . --to foo # 1200 seconds + % git annex copy . --to foo --fast # 20 seconds + +>> RichiH diff --git a/doc/forum/can_git-annex_replace_ddm__63__.mdwn b/doc/forum/can_git-annex_replace_ddm__63__.mdwn new file mode 100644 index 0000000000..8d49652c3d --- /dev/null +++ b/doc/forum/can_git-annex_replace_ddm__63__.mdwn @@ -0,0 +1,13 @@ +Hi, +a few years ago I wrote a tool called 'ddm'. The code is overengineered and the script is more complicated then it should be, +but I think it demonstrates some good use cases, and I wonder how well git-annex can fulfill the requirements for those use cases - maybe I should remove ddm and start hacking with git-annex instead. + +To answer this question, you should read the section about the possible dataset types on http://dieter.plaetinck.be/ddm_a_distributed_data_manager.html, and the example at the bottom of that page. it demonstrates the idea behind the "selection" dataset to always try to keep a subset (the most appropriate, based on the output of some script) of files "checked out". +the introduction section on https://github.com/Dieterbe/ddm/raw/358f7cf92c0ba7b336dc97638351d4e324461afa/MANUAL should further clarify things, as well as give some more good use cases (as you can see it's a bit more about [semi-]automated workflows then purely tracking what's where) + +So I'm not sure, maybe the way to go for me is to make git-annex my "housekeeping about which data is where" backend and make ddm into a set of policies and tools on top of git-annex. + +Any input? + +Thanks, +Dieter diff --git a/doc/forum/can_git-annex_replace_ddm__63__/comment_1_aa05008dfe800474ff76678a400099e1._comment b/doc/forum/can_git-annex_replace_ddm__63__/comment_1_aa05008dfe800474ff76678a400099e1._comment new file mode 100644 index 0000000000..eb824971f6 --- /dev/null +++ b/doc/forum/can_git-annex_replace_ddm__63__/comment_1_aa05008dfe800474ff76678a400099e1._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-02-14T22:08:54Z" + content=""" +Yes, there is value in layering something over git-annex to use a policy to choose what goes where. + +I use [mr](http://kitenet.net/~joey/code/mr/) to update and manage all my repositories, and since mr can be made to run arbitrary commands when doing eg, an update, I use its config file as such a policy layer. For example, my podcasts are pulled into my sound repository in a subdirectory; boxes that consume podcasts run \"git pull; git annex get podcasts --exclude=\"*/out/*\"; git annex drop podcasts/*/out\". I move podcasts to \"out\" directories once done with them (I have yet to teach mpd to do that for me..), and the next time I run \"mr update\" to update everything, it pulls down new ones and removes old ones. + +I don't see any obstacle to doing what you want. May be that you'd need better querying facilities in git-annex (so the policy layer can know what is available where), or finer control (--exclude is a good enough hammer for me, but maybe not for you). +"""]] diff --git a/doc/forum/can_git-annex_replace_ddm__63__/comment_2_008554306dd082d7f543baf283510e92._comment b/doc/forum/can_git-annex_replace_ddm__63__/comment_2_008554306dd082d7f543baf283510e92._comment new file mode 100644 index 0000000000..ab114bb1c8 --- /dev/null +++ b/doc/forum/can_git-annex_replace_ddm__63__/comment_2_008554306dd082d7f543baf283510e92._comment @@ -0,0 +1,19 @@ +[[!comment format=mdwn + username="http://dieter-be.myopenid.com/" + nickname="dieter" + subject="comment 2" + date="2011-02-16T21:32:04Z" + content=""" +thanks Joey, + +is it possible to run some git annex command that tells me, for a specific directory, which files are available in an other remote? (and which remote, and which filenames?) +I guess I could run that, do my own policy thingie, and run `git annex get` for the files I want. + +For your podcast use case (and some of my use cases) don't you think git [annex] might actually be overkill? For example your podcasts use case, what value does git annex give over a simple rsync/rm script? +such a script wouldn't even need a data store to store its state, unlike git. it seems simpler and cleaner to me. + +for the mpd thing, check http://alip.github.com/mpdcron/ (bad project name, it's a plugin based \"event handler\") +you should be able to write a simple plugin for mpdcron that does what you want (or even interface with mpd yourself from perl/python/.. to use its idle mode to get events) + +Dieter +"""]] diff --git a/doc/forum/can_git-annex_replace_ddm__63__/comment_3_4c69097fe2ee81359655e59a03a9bb8d._comment b/doc/forum/can_git-annex_replace_ddm__63__/comment_3_4c69097fe2ee81359655e59a03a9bb8d._comment new file mode 100644 index 0000000000..5cdd6aa0c6 --- /dev/null +++ b/doc/forum/can_git-annex_replace_ddm__63__/comment_3_4c69097fe2ee81359655e59a03a9bb8d._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-03-16T03:01:17Z" + content=""" +Whups, the comment above got stuck in moderation queue for 27 days. I will try to check that more frequently. + +In the meantime, I've implemented \"git annex whereis\" -- enjoy! + +I find keeping my podcasts in the annex useful because it allows me to download individual episodes or poscasts easily when low bandwidth is available (ie, dialup), or over sneakernet. And generally keeps everything organised. +"""]] diff --git a/doc/forum/confusion_with_remotes__44___map.mdwn b/doc/forum/confusion_with_remotes__44___map.mdwn new file mode 100644 index 0000000000..0ae75d4e99 --- /dev/null +++ b/doc/forum/confusion_with_remotes__44___map.mdwn @@ -0,0 +1,113 @@ +I'm starting out with git-annex and running into some confusion with setting up the remotes. + +I have three systems I'm trying to set up (domains edited): + +* psychosis: ssh://psychosis.foo.com/vid +* bacon: ssh://bucket.foo.com/vid +* bucket: ssh://bucket.bar.org/vid + +And one bare repository so that I can have a single place to push/pull: + +* origin: https://git.foo.com/jim/vid.git + +On psychosis: + + psychosis$ git config --list | grep ^remote | sort + remote.bacon.annex-uuid=8f1f0898-f8c1-11e0-9bf2-b387af26ee63 + remote.bacon.fetch=+refs/heads/*:refs/remotes/bacon/* + remote.bacon.url=ssh://bucket.foo.com/vid + remote.bucket.annex-uuid=82814942-f8e0-11e0-b053-e70a61e98e19 + remote.bucket.fetch=+refs/heads/*:refs/remotes/bucket/* + remote.bucket.url=ssh://bucket.bar.org/vid + remote.origin.fetch=+refs/heads/*:refs/remotes/origin/* + remote.origin.url=https://git.foo.com/jim/vid.git + + psychosis$ git annex status + supported backends: WORM SHA1 SHA256 SHA512 SHA224 SHA384 SHA1E SHA256E SHA512E SHA224E SHA384E URL + supported remote types: git S3 bup directory rsync web hook + known repositories: + 09c0b436-f8de-11e0-842f-b7644539d57f -- here (psychosis) + 82814942-f8e0-11e0-b053-e70a61e98e19 -- bucket + local annex keys: 2256 + local annex size: 449 gigabytes + total annex keys: 2256 + total annex size: 449 gigabytes + backend usage: + WORM: 2256 + +**First point of confusion**: Why doesn't "bacon" show up in "git annex status"? I can "git annex copy --to bacon filename" and it will copy it there. Is there some step of setting it up that I missed? I basically just did "git remote add bacon ssh://bucket.foo.com/vid". + +Now I've started setting up the remotes on each host: + +On bacon: + + bacon$ git config --list | grep ^remote | sort + remote.origin.fetch=+refs/heads/*:refs/remotes/origin/* + remote.origin.url=https://git.foo.com/jim/vid.git + remote.psychosis.annex-uuid=09c0b436-f8de-11e0-842f-b7644539d57f + remote.psychosis.fetch=+refs/heads/*:refs/remotes/psychosis/* + remote.psychosis.url=ssh://psychosis.foo.com/vid + + bacon$ git annex status + supported backends: WORM SHA1 SHA256 SHA512 SHA224 SHA384 SHA1E SHA256E SHA512E SHA224E SHA384E URL + supported remote types: git S3 bup directory rsync web hook + known repositories: + 09c0b436-f8de-11e0-842f-b7644539d57f -- psychosis + 8f1f0898-f8c1-11e0-9bf2-b387af26ee63 -- here (bacon) + temporary directory size: 366 megabytes (clean up with git-annex unused) + local annex keys: 1 + local annex size: 308 bytes + total annex keys: 2256 + total annex size: 449 gigabytes + backend usage: + WORM: 2256 + +On bucket: + + bucket$ git config --list | grep ^remote | sort + remote.origin.fetch=+refs/heads/*:refs/remotes/origin/* + remote.origin.url=https://git.foo.com/jim/vid.git + remote.psychosis.annex-uuid=09c0b436-f8de-11e0-842f-b7644539d57f + remote.psychosis.fetch=+refs/heads/*:refs/remotes/psychosis/* + remote.psychosis.url=ssh://psychosis.foo.com/vid + + bucket$ git annex status + supported backends: WORM SHA1 SHA256 SHA512 SHA224 SHA384 SHA1E SHA256E SHA512E SHA224E SHA384E URL + supported remote types: git S3 bup directory rsync web hook + known repositories: + 09c0b436-f8de-11e0-842f-b7644539d57f -- psychosis + 82814942-f8e0-11e0-b053-e70a61e98e19 -- here (bucket) + temporary directory size: 183 megabytes (clean up with git-annex unused) + local annex keys: 3 + local annex size: 550 megabytes + total annex keys: 2256 + total annex size: 449 gigabytes + backend usage: + WORM: 2256 + +But I'm getting weird results if I try to show the map from psychosis: + + psychosis$ git annex map + $ git annex map + map /vid/tv ok + map bacon (sshing...) + ok + map bucket (sshing...) + ok + map origin + failed + map psychosis (sshing...) + jim@psychosis.foo.com's password: + ok + map psychosis (sshing...) + jim@psychosis.foo.com's password: + ok + + running: dot -Tx11 map.dot + +**Second confusion**: it's as if psychosis was considered a new remote each time? +The generated map has psychosis listed with several redundant links: + +![Map](http://jim.sh/~jim/tmp/map.png) + +Is this some bug or do I just need to be hit with the clue bat? diff --git a/doc/forum/confusion_with_remotes__44___map/comment_1_a38ded23b7f288292a843abcb1a56f38._comment b/doc/forum/confusion_with_remotes__44___map/comment_1_a38ded23b7f288292a843abcb1a56f38._comment new file mode 100644 index 0000000000..97de93d9ec --- /dev/null +++ b/doc/forum/confusion_with_remotes__44___map/comment_1_a38ded23b7f288292a843abcb1a56f38._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-10-17T19:01:21Z" + content=""" +My guess is that psychosis has not pulled the git-annex branch since bacon was set up (or that bacon's git-annex branch has not been pushed to origin). git-annex status only shows remotes present in git-annex:uuid.log This may be a bug. + +The duplicate links in the map I don't quite understand. I only see duplicate links in my maps when I have the same repository configured as two different git remotes (for example, because the same repository can be accessed two different ways). You don't seem to have that in your config. +"""]] diff --git a/doc/forum/confusion_with_remotes__44___map/comment_2_cd1c98b1276444e859a22c3dbd6f2a79._comment b/doc/forum/confusion_with_remotes__44___map/comment_2_cd1c98b1276444e859a22c3dbd6f2a79._comment new file mode 100644 index 0000000000..a61b126c0c --- /dev/null +++ b/doc/forum/confusion_with_remotes__44___map/comment_2_cd1c98b1276444e859a22c3dbd6f2a79._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-10-17T19:02:50Z" + content=""" +Actually, there is a hint that, while you ran the git annex map on psychosis, it decided to ssh to itself two times. That seems to be where the duplicate links came from, I guess you must have some git remotes you did not show. +"""]] diff --git a/doc/forum/confusion_with_remotes__44___map/comment_3_18531754089c991b6caefc57a5c17fe9._comment b/doc/forum/confusion_with_remotes__44___map/comment_3_18531754089c991b6caefc57a5c17fe9._comment new file mode 100644 index 0000000000..4c77222619 --- /dev/null +++ b/doc/forum/confusion_with_remotes__44___map/comment_3_18531754089c991b6caefc57a5c17fe9._comment @@ -0,0 +1,24 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnBJ6Dv1glxzzi4qIzGFNa6F-mfHIvv9Ck" + nickname="Jim" + subject="comment 3" + date="2011-10-17T19:50:06Z" + content=""" +No extra remotes (that I'm aware of); that output was only edited to change hostnames. + +On all three hosts, \"git push origin\" and \"git pull origin\" say everything is up to date. + +I'm using git-annex 3.20111011 on all hosts (although some were running 3.20110928 when I created the repositories). + +Regarding the multiple links, I've put a copy of the dot file [here](http://jim.sh/~jim/tmp/map.dot). +It shows psychosis in three separate subgraphs, that are just getting rendered together as one, +if that helps clarify anything. + +Wait, I just realized you said \"the git-annex branch\". My origin only has \"master\". +Do you mean the one specifically named \"git-annex\"? I thought that was something that +gets managed automatically, or is it something I need to manually check out and deal with? + +Any other info I could provide? + + +"""]] diff --git a/doc/forum/confusion_with_remotes__44___map/comment_4_3b89b6d1518267fcbc050c9de038b9ca._comment b/doc/forum/confusion_with_remotes__44___map/comment_4_3b89b6d1518267fcbc050c9de038b9ca._comment new file mode 100644 index 0000000000..f6e5993c8e --- /dev/null +++ b/doc/forum/confusion_with_remotes__44___map/comment_4_3b89b6d1518267fcbc050c9de038b9ca._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnBJ6Dv1glxzzi4qIzGFNa6F-mfHIvv9Ck" + nickname="Jim" + subject="comment 4" + date="2011-10-17T20:36:51Z" + content=""" +Ok, after pushing the \"git-annex\" branch to origin, then \"git annex status\" knows all repositories on all hosts, so that part makes sense now. Thanks for the tip. But the \"git annex map\" output hasn't changed. + + + +"""]] diff --git a/doc/forum/confusion_with_remotes__44___map/comment_5_27801584325d259fa490f67273f2ff71._comment b/doc/forum/confusion_with_remotes__44___map/comment_5_27801584325d259fa490f67273f2ff71._comment new file mode 100644 index 0000000000..77a2c4adbe --- /dev/null +++ b/doc/forum/confusion_with_remotes__44___map/comment_5_27801584325d259fa490f67273f2ff71._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnBJ6Dv1glxzzi4qIzGFNa6F-mfHIvv9Ck" + nickname="Jim" + subject="comment 5" + date="2011-10-18T04:59:13Z" + content=""" +I think: + +* The first extra edge is because bucket had \"ssh://psychosis.foo.com/vid/\", while +bacon had \"ssh://psychosis.foo.com/vid\" with no trailing slash. That got lost in the hostname/path editing I did, sorry. +Maybe those should be considered matching? +* The second extra edge is because, when running \"git annex map\" from psychosis, it doesn't recognize the remote's +remote URL as pointing back to itself. + +For the second case, after the \"spurious\" SSH, it could still recognize that the repositories are the same by the duplicated annex uuid, which currently shows up in `map.dot` twice. I wonder what it would take to avoid the spurious SSH -- maybe some config that lists \"alternate\" URLs that should be considered the same as the current repository? Or actually list URLs in uuid.log? Fortunately, I think this only affects the map, so it's not a big problem. +"""]] diff --git a/doc/forum/confusion_with_remotes__44___map/comment_6_496b0d9b86869bbac3a1356d53a3dda4._comment b/doc/forum/confusion_with_remotes__44___map/comment_6_496b0d9b86869bbac3a1356d53a3dda4._comment new file mode 100644 index 0000000000..412937f3fc --- /dev/null +++ b/doc/forum/confusion_with_remotes__44___map/comment_6_496b0d9b86869bbac3a1356d53a3dda4._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 6" + date="2011-10-22T01:18:27Z" + content=""" +Hmm, I don't see the spurious ssh edge in the dot file -- that is, I don't see any ssh:// uris in it? +"""]] diff --git a/doc/forum/confusion_with_remotes__44___map/comment_7_9a456f61f956a3d5e81e723d5a90794c._comment b/doc/forum/confusion_with_remotes__44___map/comment_7_9a456f61f956a3d5e81e723d5a90794c._comment new file mode 100644 index 0000000000..85ede3a89c --- /dev/null +++ b/doc/forum/confusion_with_remotes__44___map/comment_7_9a456f61f956a3d5e81e723d5a90794c._comment @@ -0,0 +1,27 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnBJ6Dv1glxzzi4qIzGFNa6F-mfHIvv9Ck" + nickname="Jim" + subject="comment 7" + date="2011-10-22T05:25:47Z" + content=""" +I think that's because the SSH was successful (I entered the password and let it connect), so it got the UUID and put that in the .dot instead. The same UUID (for psychosis) then ended up in two different \"subgraph\" stanzas, and Graphviz just plotted them together as one node. + +

Maybe this will clarify: + +

On psychosis, run \"git annex map\" and press ^C at the ssh password prompt: [map-nossh.dot](http://jim.sh/~jim/tmp/map-nossh.dot) +![Map](http://jim.sh/~jim/tmp/map-nossh.png) + +

On psychosis, run \"git annex map\" and type the correct password: [map-goodssh.dot](http://jim.sh/~jim/tmp/map-goodssh.dot) +![Map](http://jim.sh/~jim/tmp/map-goodssh.png) + +As I see it: + +* psychosis (\"localhost\") connects to each of its remotes +* some of them point back to ssh://psychosis +* psychosis doesn't know that ssh://psychosis is itself, so it tries to connect +* if successful: + * psychosis gets put twice in the .dot as if it was two different hosts, one \"local\" and one \"ssh://psychosis\" + * graphviz recognizes it as the same node because the UUID is the same, but graphviz still draws the extra connecting lines +* if unsuccessful: + * ssh://psychosis is shown as an additional host that can't be reached +"""]] diff --git a/doc/forum/example_of_massively_disconnected_operation.mdwn b/doc/forum/example_of_massively_disconnected_operation.mdwn new file mode 100644 index 0000000000..00a5d8d6c5 --- /dev/null +++ b/doc/forum/example_of_massively_disconnected_operation.mdwn @@ -0,0 +1,33 @@ +I found this archival drive that had been offline since October 26th 2010. Since I released git-annex 0.02 on October 27th, this must have been made using the very first release of git-annex, ever. + +So, I synced it back up! :) --[[Joey]] + +

+commit 4151f4595fe6205d4aed653617ab23eb3335130a
+Author: Joey Hess 
+Date:   Tue Oct 26 02:18:03 2010 -0400
+
+joey> git pull
+remote: Counting objects: 428782, done.
+remote: Compressing objects: 100% (280714/280714), done.
+remote: Total 416692 (delta 150923), reused 389593 (delta 125143)
+Receiving objects: 100% (416692/416692), 44.71 MiB | 495 KiB/s, done.
+Resolving deltas: 100% (150923/150923), completed with 818 local objects.
+ * [new branch]      git-annex  -> origin/git-annex
+   1893f9c..9ebcc0e  master     -> origin/master
+Updating 1893f9c..9ebcc0e
+Checking out files: 100% (76884/76884), done.
+joey> git annex version
+git-annex version: 3.20110611
+local repository version: unknown
+default repository version: 3
+supported repository versions: 3
+upgrade supported from repository versions: 0 1 2
+joey> git config annex.version 0
+joey> git annex upgrade
+upgrade . (v0 to v1...) (v1 to v2) (moving content...) (updating symlinks...)  (moving location logs...) (v2 to v3) (merging origin/git-annex into git-annex...)
+
+  git-annex branch created
+  Be sure to push this branch when pushing to remotes.
+ok
+
diff --git a/doc/forum/getting_git_annex_to_do_a_force_copy_to_a_remote.mdwn b/doc/forum/getting_git_annex_to_do_a_force_copy_to_a_remote.mdwn new file mode 100644 index 0000000000..cc9091ae5b --- /dev/null +++ b/doc/forum/getting_git_annex_to_do_a_force_copy_to_a_remote.mdwn @@ -0,0 +1,11 @@ +I'm not sure if this is my stupidity or if it's a bug, but + + git annex copy --force --to REMOTE . + +just zip's through really quickly and doesn't actually force a copy to a +remote location. This is just following up on the +[[bugs/git-annex_directory_hashing_problems_on_osx]]. I want to just do a force copy of all my data to my portable disk to really make sure that the data is really there. I would similarly would want to make sure I can force a + + git annex copy --force --from REMOTE . + +to pull down files from a remote. diff --git a/doc/forum/getting_git_annex_to_do_a_force_copy_to_a_remote/comment_1_3deb2c31cad37a49896f00d600253ee3._comment b/doc/forum/getting_git_annex_to_do_a_force_copy_to_a_remote/comment_1_3deb2c31cad37a49896f00d600253ee3._comment new file mode 100644 index 0000000000..d2692f26f0 --- /dev/null +++ b/doc/forum/getting_git_annex_to_do_a_force_copy_to_a_remote/comment_1_3deb2c31cad37a49896f00d600253ee3._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-04-03T16:49:01Z" + content=""" +How remote is REMOTE? If it's a directory on the same computer, then git-annex copy --to is actually quickly checking that each file is present on the remote, and when it is, skipping copying it again. + +If the remote is ssh, git-annex copy talks to the remote to see if it has the file. This makes copy --to slow, as Rich [[complained_before|forum/batch_check_on_remote_when_using_copy]]. :) + +So, copy --to does not trust location tracking information (unless --fast is specified), which means that it should be doing exactly what you want it to do in your situation -- transferring every file that is really not present in the destination repository already. + +Neither does copy --from, by the way. It always checks if each file is present in the current repository's annex before trying to download it. +"""]] diff --git a/doc/forum/getting_git_annex_to_do_a_force_copy_to_a_remote/comment_2_627f54d158d3ca4b72e45b4da70ff5cd._comment b/doc/forum/getting_git_annex_to_do_a_force_copy_to_a_remote/comment_2_627f54d158d3ca4b72e45b4da70ff5cd._comment new file mode 100644 index 0000000000..1079303197 --- /dev/null +++ b/doc/forum/getting_git_annex_to_do_a_force_copy_to_a_remote/comment_2_627f54d158d3ca4b72e45b4da70ff5cd._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 2" + date="2011-04-03T16:59:47Z" + content=""" +Remote as in \"another physical machine\". I assumed that + + git annex copy --force --to REMOTE . + +would have not trusted the contents in the current directory (or the remote that is being copied to) and then just go off and re-download/upload all the files and overwrite what is already there. I expected the combination of *--force* and copy *--to* that it would not bother to check if the files are there or not and just copy it regardless of the outcome. +"""]] diff --git a/doc/forum/getting_git_annex_to_do_a_force_copy_to_a_remote/comment_3_3f49dab11aae5df0c4eb5e4b8d741379._comment b/doc/forum/getting_git_annex_to_do_a_force_copy_to_a_remote/comment_3_3f49dab11aae5df0c4eb5e4b8d741379._comment new file mode 100644 index 0000000000..c3df214988 --- /dev/null +++ b/doc/forum/getting_git_annex_to_do_a_force_copy_to_a_remote/comment_3_3f49dab11aae5df0c4eb5e4b8d741379._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 3" + date="2011-04-03T17:12:35Z" + content=""" +On second thought maybe the current behaviour is better than what I am suggesting that the force command should do. I guess it's better to be safe than sorry. +"""]] diff --git a/doc/forum/git-annex_communication_channels.mdwn b/doc/forum/git-annex_communication_channels.mdwn new file mode 100644 index 0000000000..8c56ac36a2 --- /dev/null +++ b/doc/forum/git-annex_communication_channels.mdwn @@ -0,0 +1,10 @@ +Thought I'd ask how y'all are finding the current communication by this forum/website/git repo only. + +Would there be a benefit to having an irc channel for git-annex? + +Maybe a mailing list? (Any persuasive reason why it would be better than this forum?) + +Are the existing RSS feeds on this site, for eg, new [[comments]] and posts to this forum, sufficient to keep up with +things? + +--[[Joey]] diff --git a/doc/forum/git-annex_communication_channels/comment_1_198325d2e9337c90f026396de89eec0e._comment b/doc/forum/git-annex_communication_channels/comment_1_198325d2e9337c90f026396de89eec0e._comment new file mode 100644 index 0000000000..163aae02cc --- /dev/null +++ b/doc/forum/git-annex_communication_channels/comment_1_198325d2e9337c90f026396de89eec0e._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 1" + date="2011-03-28T15:48:08Z" + content=""" +No matter what you end up doing, I would appreciate a git-annex-announce@ list. + +I really like the persistence of ikiwiki, but it's not ideal for quick communication. I would be fine with IRC and/or ML. The advantage of a ML over ikiwiki is that it doesn't seem to be as \"wasteful\" to mix normal chat with actual problem-solving. But maybe that's merely my own perception. + +Speaking of RSS: I thought I had added a wishlist item to ikiwiki about providing per-subsite RSS feeds. For example there is no (obvious) way to subscribe to changes in http://git-annex.branchable.com/forum/git-annex_communication_channels/ . + +FWIW, I resorted to tagging my local clone of git-annex to keep track of what I've read, already. + + +-- RichiH +"""]] diff --git a/doc/forum/git-annex_communication_channels/comment_2_c7aeefa6ef9a2e75d8667b479ade1b7f._comment b/doc/forum/git-annex_communication_channels/comment_2_c7aeefa6ef9a2e75d8667b479ade1b7f._comment new file mode 100644 index 0000000000..09a2b8c1a5 --- /dev/null +++ b/doc/forum/git-annex_communication_channels/comment_2_c7aeefa6ef9a2e75d8667b479ade1b7f._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 2" + date="2011-03-28T18:35:50Z" + content=""" +I think the forums/website currently is sufficient, I do at times wish there was a mailing list or anonymous git push to the wiki as I find editing posts through the web browser is some times tedious (the lack of !fmt or alt-q bugs me at times ;) ). The main advantage of keeping stuff on the site/forum is that everything gets saved and passed on to anyone who checks out the git repo of the code base. +"""]] diff --git a/doc/forum/git-annex_communication_channels/comment_3_1ff08a3e0e63fa0e560cbc9602245caa._comment b/doc/forum/git-annex_communication_channels/comment_3_1ff08a3e0e63fa0e560cbc9602245caa._comment new file mode 100644 index 0000000000..72a48445ed --- /dev/null +++ b/doc/forum/git-annex_communication_channels/comment_3_1ff08a3e0e63fa0e560cbc9602245caa._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 3" + date="2011-03-28T20:47:23Z" + content=""" +Push access to the non-code bits of git-annex' ikiwiki would be very welcome indeed. Given the choice, I would rather edit everything in Vim than in a browser. -- RichiH +"""]] diff --git a/doc/forum/git-annex_communication_channels/comment_4_1ba6ddf54843c17c7d19a9996f2ab712._comment b/doc/forum/git-annex_communication_channels/comment_4_1ba6ddf54843c17c7d19a9996f2ab712._comment new file mode 100644 index 0000000000..d6bba93651 --- /dev/null +++ b/doc/forum/git-annex_communication_channels/comment_4_1ba6ddf54843c17c7d19a9996f2ab712._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnx8kHW66N3BqmkVpgtXDlYMvr8TJ5VvfY" + nickname="Yaroslav" + subject="comment 4" + date="2011-04-13T17:53:26Z" + content=""" +.1 cents: Having IRC would be really nice for seeking quick help. E.g. like I was trying to do now, google lead me to this page. +"""]] diff --git a/doc/forum/git-annex_communication_channels/comment_5_404b723a681eb93fee015cea8024b6bc._comment b/doc/forum/git-annex_communication_channels/comment_5_404b723a681eb93fee015cea8024b6bc._comment new file mode 100644 index 0000000000..042dcc1f38 --- /dev/null +++ b/doc/forum/git-annex_communication_channels/comment_5_404b723a681eb93fee015cea8024b6bc._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkptNW1PzrVjYlJWP_9e499uH0mjnBV6GQ" + nickname="Christian" + subject="comment 5" + date="2011-04-14T11:24:59Z" + content=""" +I would also like an git-annex channel. Would be #git-annex@OFTC ok? +"""]] diff --git a/doc/forum/git-annex_communication_channels/comment_6_0d87d0e26461494b1d7f8a701a924729._comment b/doc/forum/git-annex_communication_channels/comment_6_0d87d0e26461494b1d7f8a701a924729._comment new file mode 100644 index 0000000000..8dfd0f8203 --- /dev/null +++ b/doc/forum/git-annex_communication_channels/comment_6_0d87d0e26461494b1d7f8a701a924729._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 6" + date="2011-04-15T19:32:08Z" + content=""" +We seem to be using #vcs-home @ OFTC for now. madduck is fine with it and joeyh pokes his head in there, as well. I just added a CIA bot to #vcs-home and this comment is a test if pushing works. -- RichiH +"""]] diff --git a/doc/forum/git-annex_communication_channels/comment_7_2c87c7a0648fe87c2bf6b4391f1cc468._comment b/doc/forum/git-annex_communication_channels/comment_7_2c87c7a0648fe87c2bf6b4391f1cc468._comment new file mode 100644 index 0000000000..830d678ca1 --- /dev/null +++ b/doc/forum/git-annex_communication_channels/comment_7_2c87c7a0648fe87c2bf6b4391f1cc468._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="anonymous git push" + date="2011-05-19T19:21:51Z" + content=""" +@Jimmy mentioned anonymous git push -- that is now enabled for this wiki. Enjoy! + +I may try to spend more time on #vcs-home -- or I can be summoned there from my other lurking places on irc, I guess. +"""]] diff --git a/doc/forum/git-annex_on_OSX.mdwn b/doc/forum/git-annex_on_OSX.mdwn new file mode 100644 index 0000000000..a00548366a --- /dev/null +++ b/doc/forum/git-annex_on_OSX.mdwn @@ -0,0 +1 @@ +See [[install/OSX]]. diff --git a/doc/forum/git_annex_add_crash_and_subsequent_recovery.mdwn b/doc/forum/git_annex_add_crash_and_subsequent_recovery.mdwn new file mode 100644 index 0000000000..3f3b943a0b --- /dev/null +++ b/doc/forum/git_annex_add_crash_and_subsequent_recovery.mdwn @@ -0,0 +1,25 @@ +Perhaps stupidly I added some very large bare git repos into a git-annex. + +This took a very long time, used lot's of memory, and then crashed. I didn't catch the error (which is annoying) - sorry about that. IIRC it is the same error if one Ctrl-c's the addition. + +I ran `git annex add .` a second time and eventually killed it (I perhaps should have waited - I now think it was working). + +A `git annex unannex` fixed up some files but somehow I managed to end up with tonnes of files all sym-linked into the git annex object directory but not somehow recognised as annexed files. I'm assuming that they somehow didn't make it into git annex's meta-data layer (or equivalent). + +Commands such as `git annex {fsck,whereis,unannex} weirdfile` immediately returned without error. + +I've now spent a lot of manual time copying the files back. Doing the following, not the cleverest but I was a little panicky about my data... + + find . -type l -exec mv \{} \{}.link \; #Move link names out of the way + find . -type l -exec cp \{} \{}.cp \; #Copy follows links so we can copy target back to link location + find . -type f -name "*.link.cp" | xargs -n 1 rename 's/\.link\.cp//' #Change to original name + find . -type l -exec rm \{} \; #Ditch the links + git annex unused + git annex dropunused `seq 9228` + +9228 files were found to be unused, this gives an idea of the scale of the number of "lost" files for want of a better term. + +A pretty poor bug report as these things go. Anyone any idea what might have happened (it didn't seem space or memory related)? Or how I might have fixed it a little more cleverly? + +For reference I am using stable Debian, git annex version 3.20111011. + diff --git a/doc/forum/git_annex_add_crash_and_subsequent_recovery/comment_1_062d0153a379c1ba1df8585b90220d3d._comment b/doc/forum/git_annex_add_crash_and_subsequent_recovery/comment_1_062d0153a379c1ba1df8585b90220d3d._comment new file mode 100644 index 0000000000..e879441ff8 --- /dev/null +++ b/doc/forum/git_annex_add_crash_and_subsequent_recovery/comment_1_062d0153a379c1ba1df8585b90220d3d._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnXybLxkPMYpP3yw4b_I6IdC3cKTD-xEdU" + nickname="Matt" + subject="comment 1" + date="2011-12-06T12:50:27Z" + content=""" +Ah HA! Looks like I found the cause of this. + + [matt@rss01:~/files/matt_ford]0> git annex add mhs + add mhs/Accessing_Web_Manager_V10.pdf ok + .... + add mhs/MAHSC Costing Request Form Dual + Organisations - FINAL v20 Oct 2010.xls git-annex: unknown response from git cat-file refs/heads/git-annex:8d5/ed4/WORM-s568832-m1323164214--MAHSC Costing Request Form Dual missing + +Spot the file name with a newline character in it! This causes the error message above. It seems that the files proceeding this badly named file are sym-linked but not registered. + +Perhaps a bug? +"""]] diff --git a/doc/forum/git_annex_add_crash_and_subsequent_recovery/comment_2_6fc6be43c488c468a4811cd0a1360225._comment b/doc/forum/git_annex_add_crash_and_subsequent_recovery/comment_2_6fc6be43c488c468a4811cd0a1360225._comment new file mode 100644 index 0000000000..38f2434f49 --- /dev/null +++ b/doc/forum/git_annex_add_crash_and_subsequent_recovery/comment_2_6fc6be43c488c468a4811cd0a1360225._comment @@ -0,0 +1,19 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-12-06T17:08:37Z" + content=""" +The bug with newlines is now fixed. + +Thought I'd mention how to clean up from interrupting `git annex add`. +When you do that, it doesn't get a chance to `git add` the files it's +added (this is normally done at the end, or sometimes at points in the middle when you're adding a *lot* of files). +Which is also why fsck, whereis, and unannex wouldn't operate on them, since they only deal with files in git. + +So the first step is to manually use `git add` on any symlinks. + +Then, `git commit` as usual. + +At that point, `git annex unannex` would get you back to your starting state. +"""]] diff --git a/doc/forum/git_annex_add_crash_and_subsequent_recovery/comment_3_45efaaf27d9b580c4c75cbcdc4f65b64._comment b/doc/forum/git_annex_add_crash_and_subsequent_recovery/comment_3_45efaaf27d9b580c4c75cbcdc4f65b64._comment new file mode 100644 index 0000000000..b58f81c5b7 --- /dev/null +++ b/doc/forum/git_annex_add_crash_and_subsequent_recovery/comment_3_45efaaf27d9b580c4c75cbcdc4f65b64._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnXybLxkPMYpP3yw4b_I6IdC3cKTD-xEdU" + nickname="Matt" + subject="comment 3" + date="2011-12-07T07:39:15Z" + content=""" +Ah - very good to know that recovery is easier than the method I used. + +I wonder if it could be made a feature to automatically and safely recover/resume from an interrupted `git add`? +"""]] diff --git a/doc/forum/git_annex_add_crash_and_subsequent_recovery/comment_4_c560eae40867512b0af2cbef161fc8ac._comment b/doc/forum/git_annex_add_crash_and_subsequent_recovery/comment_4_c560eae40867512b0af2cbef161fc8ac._comment new file mode 100644 index 0000000000..8fca16cada --- /dev/null +++ b/doc/forum/git_annex_add_crash_and_subsequent_recovery/comment_4_c560eae40867512b0af2cbef161fc8ac._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 4" + date="2011-12-07T20:54:51Z" + content=""" +Good idea! I've made `git annex add` recover when ran a second time. +"""]] diff --git a/doc/forum/git_annex_ls___47___metadata_in_git_annex_whereis.mdwn b/doc/forum/git_annex_ls___47___metadata_in_git_annex_whereis.mdwn new file mode 100644 index 0000000000..f1aa5cc06d --- /dev/null +++ b/doc/forum/git_annex_ls___47___metadata_in_git_annex_whereis.mdwn @@ -0,0 +1 @@ +I just started experimenting with git annex, and I found that I would like to have a way to figure out metadata (well, size. Maybe modification date) of a non-local file. I first checked if there is "git annex ls" (which could list known files in an ls-like way) and found "git annex whereis" as somewhat a replacement, but it does not give metadata information. diff --git a/doc/forum/git_annex_ls___47___metadata_in_git_annex_whereis/comment_1_7fba10b85f4d9289c7782eccef46949e._comment b/doc/forum/git_annex_ls___47___metadata_in_git_annex_whereis/comment_1_7fba10b85f4d9289c7782eccef46949e._comment new file mode 100644 index 0000000000..379b9f976b --- /dev/null +++ b/doc/forum/git_annex_ls___47___metadata_in_git_annex_whereis/comment_1_7fba10b85f4d9289c7782eccef46949e._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-11-14T22:46:35Z" + content=""" +When I want that, I ls -l the file and look at the symlink to the key. Ie, in SHA1-s10481423--efc7eec0d711212842cd6bb8f957e1628146d6ed the size is 10481423 bytes. +"""]] diff --git a/doc/forum/git_annex_ls___47___metadata_in_git_annex_whereis/comment_2_7dcec124ea7d0291ed40d80e2ffd5c7e._comment b/doc/forum/git_annex_ls___47___metadata_in_git_annex_whereis/comment_2_7dcec124ea7d0291ed40d80e2ffd5c7e._comment new file mode 100644 index 0000000000..3dd14bf010 --- /dev/null +++ b/doc/forum/git_annex_ls___47___metadata_in_git_annex_whereis/comment_2_7dcec124ea7d0291ed40d80e2ffd5c7e._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-11-14T22:48:03Z" + content=""" +It might make sense to put this functionality in git annex find. Perhaps a format string with a %s for example. +"""]] diff --git a/doc/forum/git_pull_remote_git-annex.mdwn b/doc/forum/git_pull_remote_git-annex.mdwn new file mode 100644 index 0000000000..349610693b --- /dev/null +++ b/doc/forum/git_pull_remote_git-annex.mdwn @@ -0,0 +1,11 @@ +I thought I'd followed the walk through when initially setting up my repos. + +However I find that I have to do the following to sync my annex's. + + git pull remote master + git checkout git-annex + git pull remote git-annex + git checkout master + git annex get . + +Has something gone wrong? I see no mention of syncing git-annex repos in the walk-through... diff --git a/doc/forum/git_pull_remote_git-annex/comment_1_9c245db3518d8b889ecdf5115ad9e053._comment b/doc/forum/git_pull_remote_git-annex/comment_1_9c245db3518d8b889ecdf5115ad9e053._comment new file mode 100644 index 0000000000..989ab9bcd8 --- /dev/null +++ b/doc/forum/git_pull_remote_git-annex/comment_1_9c245db3518d8b889ecdf5115ad9e053._comment @@ -0,0 +1,36 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-12-06T16:43:29Z" + content=""" +You're taking a very long and strange way to a place that you can reach as follows: + +
+git pull remote
+git annex get .
+
+ +Which is just as shown in [[walkthrough/getting_file_content]]. + +In particular, \"git pull remote\" first fetches all branches from the remote, including the git-annex branch. +When you say \"git pull remote master\", you're preventing it from fetching the git-annex branch. +If for some reason you want the slightly longer way around, it is: + +
+git pull remote master
+git fetch remote git-annex
+git annex get .
+
+ +Or, eqivilantly but with less network connections: + +
+git fetch remote
+git merge remote/master
+git annex get .
+
+ +BTW, notice that this is all bog-standard git branch pulling stuff, not specific to git-annex in the least. +Consult your extensive and friendly git documentation for details. :) +"""]] diff --git a/doc/forum/git_pull_remote_git-annex/comment_2_0f7f4a311b0ec1d89613e80847e69b42._comment b/doc/forum/git_pull_remote_git-annex/comment_2_0f7f4a311b0ec1d89613e80847e69b42._comment new file mode 100644 index 0000000000..198f95cee8 --- /dev/null +++ b/doc/forum/git_pull_remote_git-annex/comment_2_0f7f4a311b0ec1d89613e80847e69b42._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnXybLxkPMYpP3yw4b_I6IdC3cKTD-xEdU" + nickname="Matt" + subject="comment 2" + date="2011-12-06T23:23:29Z" + content=""" +Doh! Total brain melt on my part. Thanks for the additional info. Not taking my time and reading things properly - kept assuming that the full remote pull failed due to the warning: + + You asked to pull from the remote 'rss', but did not specify + a branch. Because this is not the default configured remote + for your current branch, you must specify a branch on the command line. + +Rookie mistake indeed. +"""]] diff --git a/doc/forum/git_pull_remote_git-annex/comment_3_1aa89725b5196e40a16edeeb5ccfa371._comment b/doc/forum/git_pull_remote_git-annex/comment_3_1aa89725b5196e40a16edeeb5ccfa371._comment new file mode 100644 index 0000000000..0ead32dad2 --- /dev/null +++ b/doc/forum/git_pull_remote_git-annex/comment_3_1aa89725b5196e40a16edeeb5ccfa371._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnXybLxkPMYpP3yw4b_I6IdC3cKTD-xEdU" + nickname="Matt" + subject="comment 3" + date="2011-12-21T16:06:25Z" + content=""" +hmmmm - I'm still not sure I get this. + +If I'm using a whole bunch of distributed annexs with no central repo, then I can not do a `git pull remote` without either specifying the branch to use or changing default tracked remote via `git branch --set-upstream`. The former like you note doesn't pull the git-annex branch down the latter only works one-at-a-time. + +The docs read to me as though I ought to be able to do a `git pull remote ; git annex get .` using anyone of my distributed annexs. + +Am I doing something wrong? Or is the above correct? +"""]] diff --git a/doc/forum/git_pull_remote_git-annex/comment_4_646f2077edcabc000a7d9cb75a93cf55._comment b/doc/forum/git_pull_remote_git-annex/comment_4_646f2077edcabc000a7d9cb75a93cf55._comment new file mode 100644 index 0000000000..6ba1796939 --- /dev/null +++ b/doc/forum/git_pull_remote_git-annex/comment_4_646f2077edcabc000a7d9cb75a93cf55._comment @@ -0,0 +1,37 @@ +[[!comment format=mdwn + username="http://adamspiers.myopenid.com/" + nickname="Adam" + subject="I think Matt is right." + date="2011-12-23T14:04:44Z" + content=""" +I got bitten by this too. It seems that the user is expected to fetch +remote git-annex branches themselves, but this is not documented +anywhere. + +The man page says of \"git annex merge\": + + Automatically merges any changes from remotes into the git-annex + branch. + +I am not a git newbie, but even so I had incorrectly assumed that git +annex merge would take care of pulling the git-annex branch from the +remote prior to merging, thereby ensuring all versions of the +git-annex branch would be merged, and that the location tracking data +would be synced across all peer repositories. + +My master branches do not track any specific upstream branch, because +I am operating in a decentralized fashion. Therefore the error +message caused by `git pull $remote` succeeded in encouraging me to +instead use `git pull $remote master`, and this excludes the git-annex +branch from the fetch. Even worse, a git newbie might realise this +and be tempted to do `git pull $remote git-annex`. + +Therefore I think it needs to be explicitly documented that + + git fetch $remote + git merge $remote/master + +is required when the local branch doesn't track an upstream branch. +Or maybe a `--fetch` option could be added to `git annex merge` to +perform the fetch from all remotes before running the merge(s). +"""]] diff --git a/doc/forum/git_pull_remote_git-annex/comment_5_4f2a05ef6551806dd0ec65372f183ca4._comment b/doc/forum/git_pull_remote_git-annex/comment_5_4f2a05ef6551806dd0ec65372f183ca4._comment new file mode 100644 index 0000000000..c01f241202 --- /dev/null +++ b/doc/forum/git_pull_remote_git-annex/comment_5_4f2a05ef6551806dd0ec65372f183ca4._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 5" + date="2011-12-23T16:50:26Z" + content=""" +My goal for `git-annex merge` is that users should not need to know about it, so it should not be doing expensive pulls. + +I hope that `git annex sync` will grow some useful features to support fully distributed git usage, as being discussed in [[pure_git-annex_only_workflow]]. I still use centralized git to avoid these problems myself. +"""]] diff --git a/doc/forum/git_pull_remote_git-annex/comment_6_3925d1aa56bce9380f712e238d63080f._comment b/doc/forum/git_pull_remote_git-annex/comment_6_3925d1aa56bce9380f712e238d63080f._comment new file mode 100644 index 0000000000..f4b5ebec20 --- /dev/null +++ b/doc/forum/git_pull_remote_git-annex/comment_6_3925d1aa56bce9380f712e238d63080f._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://adamspiers.myopenid.com/" + nickname="Adam" + subject="comment 6" + date="2011-12-23T17:14:03Z" + content=""" +Extending `git annex sync` would be nice, although auto-commit does not suit every use case, so it would be better not to couple one to the other. +"""]] diff --git a/doc/forum/git_pull_remote_git-annex/comment_7_24c45ee981b18bc78325c768242e635d._comment b/doc/forum/git_pull_remote_git-annex/comment_7_24c45ee981b18bc78325c768242e635d._comment new file mode 100644 index 0000000000..dad2c0af21 --- /dev/null +++ b/doc/forum/git_pull_remote_git-annex/comment_7_24c45ee981b18bc78325c768242e635d._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://adamspiers.myopenid.com/" + nickname="Adam" + subject="comment 7" + date="2011-12-23T17:24:58Z" + content=""" +P.S. I see you already [fixed the docs](http://source.git-annex.branchable.com/?p=source.git;a=commitdiff;h=a0227e81f9c82afc12ac1bd1cecd63cc0894d751) - thanks! :) +"""]] diff --git a/doc/forum/git_tag_missing_for_3.20111011.mdwn b/doc/forum/git_tag_missing_for_3.20111011.mdwn new file mode 100644 index 0000000000..781d0c91a0 --- /dev/null +++ b/doc/forum/git_tag_missing_for_3.20111011.mdwn @@ -0,0 +1 @@ +Well, the subject pretty much says it all :) diff --git a/doc/forum/git_tag_missing_for_3.20111011/comment_1_7a53bf273f3078ab3351369ef2b5f2a6._comment b/doc/forum/git_tag_missing_for_3.20111011/comment_1_7a53bf273f3078ab3351369ef2b5f2a6._comment new file mode 100644 index 0000000000..87cda998bf --- /dev/null +++ b/doc/forum/git_tag_missing_for_3.20111011/comment_1_7a53bf273f3078ab3351369ef2b5f2a6._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="fixed that" + date="2011-10-13T15:36:59Z" + content=""" +:) +"""]] diff --git a/doc/forum/hashing_objects_directories.mdwn b/doc/forum/hashing_objects_directories.mdwn new file mode 100644 index 0000000000..5b7708fb58 --- /dev/null +++ b/doc/forum/hashing_objects_directories.mdwn @@ -0,0 +1,27 @@ +I'm wondering how easy the addition of hashing to the directories of the objects would be. + +Currently a tree directory structure becomes a flat two level tree under the .git/annex/objects directory ([[internals]]). This, through the 555 mode on the directory prevents the accidental destruction of content, which is _good_. However file and directory numbers soon add up in there and as such any file-systems with sub directory limitations will quickly realize the limit (certainly quicker than maybe expected). + +Suggestion is therefore to change from + + `.git/annex/objects/SHA1:123456789abcdef0123456789abcdef012345678/SHA1:123456789abcdef0123456789abcdef012345678` + +to + + `.git/annex/objects/SHA1:1/2/3456789abcdef0123456789abcdef012345678/SHA1:123456789abcdef0123456789abcdef012345678` + +or anything in between to a paranoid + + `.git/annex/objects/SHA1:123/456/789/abc/def/012/345/678/9ab/cde/f01/234/5678/SHA1:123456789abcdef0123456789abcdef012345678` + +Also the use of a colon specifically breaks FAT32 ([[bugs/fat_support]]), must it be a colon or could an extra directory be used? i.e. `.git/annex/objects/SHA1/*/...` + +`git annex init` could also create all but the last level directory on initialization. I'm thinking `SHA1/1/1, SHA1/1/2, ..., SHA256/f/f, ..., URL/f/f, ..., WORM/f/f` + +> This is done now with a 2-level hash. It also hashes .git-annex/ log +> files which were the worse problem really. Scales to hundreds of millions +> of files with each dir having 1024 or fewer contents. Example: +> +> `me -> .git/annex/objects/71/9t/WORM-s3-m1300247299--me/WORM-s3-m1300247299--me` +> +> --[[Joey]] diff --git a/doc/forum/hashing_objects_directories/comment_1_c55c56076be4f54251b0b7f79f28a607._comment b/doc/forum/hashing_objects_directories/comment_1_c55c56076be4f54251b0b7f79f28a607._comment new file mode 100644 index 0000000000..3a19310b63 --- /dev/null +++ b/doc/forum/hashing_objects_directories/comment_1_c55c56076be4f54251b0b7f79f28a607._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-03-14T16:12:49Z" + content=""" +My experience is that modern filesystems are not going to have many issues with tens to hundreds of thousands of items in the directory. However, if a transition does happen for FAT support I will consider adding hashing. Although getting a good balanced hash in general without, say, checksumming the filename and taking part of the checksum, is difficult. + +I prefer to keep all the metadata in the filename, as this eases recovery if the files end up in lost+found. So while \"SHA/\" is a nice workaround for the FAT colon problem, I'll be doing something else. (What I'm not sure yet.) + +There is no point in creating unused hash directories on initialization. If anything, with a bad filesystem that just guarantees worst performance from the beginning.. +"""]] diff --git a/doc/forum/hashing_objects_directories/comment_2_504c96959c779176f991f4125ea22009._comment b/doc/forum/hashing_objects_directories/comment_2_504c96959c779176f991f4125ea22009._comment new file mode 100644 index 0000000000..64f1e16b50 --- /dev/null +++ b/doc/forum/hashing_objects_directories/comment_2_504c96959c779176f991f4125ea22009._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 2" + date="2011-03-15T13:52:16Z" + content=""" +Can't you just use an underscore instead of a colon? + +Would it be feasible to split directories dynamically? I.e. start with SHA1_123456789abcdef0123456789abcdef012345678/SHA1_123456789abcdef0123456789abcdef012345678 and, at a certain cut-off point, switch to shorter directory names? This could even be done per subdirectory and based purely on a locally-configured number. Different annexes on different file systems or with different file subsets might even have different thresholds. This would ensure scale while not forcing you to segment from the start. Also, while segmenting with longer directory names means a flatter tree, segments longer than four characters might not make too much sense. Segmenting too often could lead to some directories becoming too populated, bringing us back to the dynamic segmentation. + +All of the above would make merging annexes by hand a _lot_ harder, but I don't know if this is a valid use case. And if all else fails, one could merge everything with the unsegemented directory names and start again from there. + +-- RichiH +"""]] diff --git a/doc/forum/hashing_objects_directories/comment_3_9134bde0a13aac0b6a4e5ebabd7f22e8._comment b/doc/forum/hashing_objects_directories/comment_3_9134bde0a13aac0b6a4e5ebabd7f22e8._comment new file mode 100644 index 0000000000..51deb2f959 --- /dev/null +++ b/doc/forum/hashing_objects_directories/comment_3_9134bde0a13aac0b6a4e5ebabd7f22e8._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-03-16T03:13:39Z" + content=""" +It is unfortunatly not possible to do system-dependant hashing, so long as git-annex stores symlinks to the content in git. + +It might be possible to start without hashing, and add hashing for new files after a cutoff point. It would add complexity. + +I'm currently looking at a 2 character hash directory segment, based on an md5sum of the key, which splits it into 1024 buckets. git uses just 256 buckets for its object directory, but then its objects tend to get packed away. I sorta hope that one level is enough, but guess I could go to 2 levels (objects/ab/cd/key), which would provide 1048576 buckets, probably plenty, as if you are storing more than a million files, you are probably using a modern enough system to have a filesystem that doesn't need hashing. +"""]] diff --git a/doc/forum/hashing_objects_directories/comment_4_0de9170e429cbfea66f5afa8980d45ac._comment b/doc/forum/hashing_objects_directories/comment_4_0de9170e429cbfea66f5afa8980d45ac._comment new file mode 100644 index 0000000000..b29eea1b2b --- /dev/null +++ b/doc/forum/hashing_objects_directories/comment_4_0de9170e429cbfea66f5afa8980d45ac._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 4" + date="2011-03-16T04:06:19Z" + content=""" +The .git-annex/ directory is what really needs hashing. + +Consider that when git looks for changes in there, it has to scan every file in the directory. With hashing, it should be able to more quickly identify just the subdirectories that contained changed files, by the directory mtimes. + +And the real kicker is that when committing there, git has to create a tree object containing every single file, even if only 1 file changed. That will be a lot of extra work; with hashed subdirs it will instead create just 2 or 3 small tree objects leading down to the changed file. (Probably these trees both pack down to similar size pack files, not sure.) +"""]] diff --git a/doc/forum/hashing_objects_directories/comment_5_ef6cfd49d24c180c2d0a062e5bd3a0be._comment b/doc/forum/hashing_objects_directories/comment_5_ef6cfd49d24c180c2d0a062e5bd3a0be._comment new file mode 100644 index 0000000000..c558ee65ee --- /dev/null +++ b/doc/forum/hashing_objects_directories/comment_5_ef6cfd49d24c180c2d0a062e5bd3a0be._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 5" + date="2011-03-16T15:47:17Z" + content=""" +If you can't segment the names retroactively, it's better to start with segmenting, imo. + +As subdirectories are cheap, going with ab/cd/rest or even ab/cd/ef/rest by default wouldn't hurt. + +Your point about git not needing to create as many tree objects is a kicker indeed. If I were you, I would default to segmentation. +"""]] diff --git a/doc/forum/incompatible_versions__63__.mdwn b/doc/forum/incompatible_versions__63__.mdwn new file mode 100644 index 0000000000..13eb181491 --- /dev/null +++ b/doc/forum/incompatible_versions__63__.mdwn @@ -0,0 +1 @@ +Are versions 0.14 and 0.20110522 incompatible? I can't seem to copy files from a system running 0.14 to one running 20110522. diff --git a/doc/forum/incompatible_versions__63__/comment_1_629f28258746d413e452cbd42a1a43f4._comment b/doc/forum/incompatible_versions__63__/comment_1_629f28258746d413e452cbd42a1a43f4._comment new file mode 100644 index 0000000000..3702fde6ea --- /dev/null +++ b/doc/forum/incompatible_versions__63__/comment_1_629f28258746d413e452cbd42a1a43f4._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-06-08T00:40:54Z" + content=""" +They are not. See [[upgrades]] +"""]] diff --git a/doc/forum/location_tracking_cleanup.mdwn b/doc/forum/location_tracking_cleanup.mdwn new file mode 100644 index 0000000000..7e2e230af7 --- /dev/null +++ b/doc/forum/location_tracking_cleanup.mdwn @@ -0,0 +1,24 @@ +I recently started experimenting with git annex, adding files that I've had +floating across several computers to repositories. During the testing I had +a few occasions where I wrecked a repository somehow, and decided to wipe it +and start anew (at this point there was no important files in them so I thought +this is the easiest way). Well, as it turns out this interacts badly with location +tracking, since now `git annex whereis` shows files residing in all those destroyed +repositories, all having same names as some existing repositories. This makes it hard +to follow whether a repo actually has a file, or was the file only seen in some dead +repo with the same name. + +I planned on cleaning this up by looking up the UUIDs of the now stable, existing +repos and untrusting all the dead copies (they should effectively disappear from +git annex´s output then, right?), but I didn't find an easy way to look up the UUID +of the current repository (maybe this could be included in `git annex status`?) +I also noticed that untrust cannot remove the trust based on the UUID -- if I try +it I simply get "there is no git remote named "11908472-...", so I guess untrust +works with git remote names, which I find a bit confusing, since trust.log logs the +trust levels based on the UUID. I could just write into trust.log manually, but I'm +unsure how the changes would get propagated. + +What should I do? As a related wishlist item I would ask for some additional mechanisms +for purging known-dead repositories from the location tracking database. And the ability +to look up the UUID of the current repo, and to use the UUID to specify repositories when +applicable (untrust and describe maybe). diff --git a/doc/forum/location_tracking_cleanup/comment_1_7d6319e8c94dfe998af9cfcbf170efb2._comment b/doc/forum/location_tracking_cleanup/comment_1_7d6319e8c94dfe998af9cfcbf170efb2._comment new file mode 100644 index 0000000000..8915ea3518 --- /dev/null +++ b/doc/forum/location_tracking_cleanup/comment_1_7d6319e8c94dfe998af9cfcbf170efb2._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-09-30T06:55:34Z" + content=""" +Specifying the UUID was supposed to work, I think I broke it a while ago. Fixed now in git. + +I'm not sure why you need to look up the UUID of the current repository. You can always refer to the current repository as \".\". Anyway, the UUID of the current repository is in `.git/config`, or use `git config annex.uuid`. +"""]] diff --git a/doc/forum/location_tracking_cleanup/comment_2_e7395cb6e01f42da72adf71ea3ebcde4._comment b/doc/forum/location_tracking_cleanup/comment_2_e7395cb6e01f42da72adf71ea3ebcde4._comment new file mode 100644 index 0000000000..f612e53a4d --- /dev/null +++ b/doc/forum/location_tracking_cleanup/comment_2_e7395cb6e01f42da72adf71ea3ebcde4._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawncBlzaDI248OZGjKQMXrLVQIx4XrZrzFo" + nickname="Perttu" + subject="comment 2" + date="2011-09-30T11:55:35Z" + content=""" +Thanks for the quick reply :) + +I wanted to look up the UUID of the current repo so that I can find out which repo is alive from the collection of repos with the same name. +I could have looked for it in .git/config though, since it's pretty obvious. I just looked into the git-annex branch and didn't find it there. +Thanks for the tip about using \".\". By the way, could there be some kind of warning about using non-unique names for repos? That would make this +scenario less likely. Or maybe that is a bad idea given the decentralized nature of git. + +By the way, do the trust settings propagate to other repos? If I mark some UUID as untrusted on one computer does it become globally untrusted? +"""]] diff --git a/doc/forum/location_tracking_cleanup/comment_3_c15428cec90e969284a5e690fb4b2fde._comment b/doc/forum/location_tracking_cleanup/comment_3_c15428cec90e969284a5e690fb4b2fde._comment new file mode 100644 index 0000000000..c676b9b615 --- /dev/null +++ b/doc/forum/location_tracking_cleanup/comment_3_c15428cec90e969284a5e690fb4b2fde._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-09-30T16:47:27Z" + content=""" +`git annex status` now includes a list of all known repositories. + +Yes, trust setting propigate on git push/pull like any other git-annex information. +"""]] diff --git a/doc/forum/migrate_existing_git_repository_to_git-annex.mdwn b/doc/forum/migrate_existing_git_repository_to_git-annex.mdwn new file mode 100644 index 0000000000..f673de765b --- /dev/null +++ b/doc/forum/migrate_existing_git_repository_to_git-annex.mdwn @@ -0,0 +1,66 @@ +I have a large git repository with binary files scattered over different branches. I want to switch to git-annex mainly for performance reasons, but I don't want to loose my history. + +I tried to rewrite the (cloned) repository with git-filter-branch but failed miserably for several reasons: + +* --tree-filter performs its operations in a temporary directory (.git-rewrite/t/) so the symlinks point to the wrong destination (../../.git/annex/). +* annex log files are stored in .git-annex/ instead of .git-rewrite/t/.git-annex/ so the filter operation misses them + +Any suggestions how to proceed? + +EDIT 3/2/2010 +I finally got it working for my purposes. Hardest part was preserving the branches while injecting the new `git annex setup` base commit. + +#### Clone repository + git clone original migrate + cd migrate + git checkout mybranch + git checkout master + git remote rm origin + +#### Inject `git annex setup` base commit and repair branches + git symbolic-ref HEAD refs/heads/newroot + git rm --cached * + git clean -f -d + git annex init master + echo \*.rpm annex.backend=SHA1 >> .gitattributes + git commit -m "store rpms in git annex" .gitattributes + git cherry-pick $(git rev-list --reverse master | head -1) + git rebase --onto newroot newroot master + git rebase --onto master mybranch~1 mybranch + git branch -d newroot + +#### Migrate repository + mkdir .temp + cp .git-annex/* .temp/ + MYWORKDIR=$(pwd) git filter-branch \ + --tag-name-filter cat \ + --tree-filter ' + mkdir -p .git-annex; + cp ${MYWORKDIR}/.temp/* .git-annex/; + for rpm in $(git ls-files | grep "\.rpm$"); do + echo; + git annex add $rpm; + annexdest=$(readlink $rpm); + if [ -e .git-annex/$(basename $annexdest).log ]; then + echo "FOUND $(basename $annexdest).log"; + else + echo "COPY $(basename $annexdest).log"; + cp ${MYWORKDIR}/.git-annex/$(basename $annexdest).log .git-annex/; + cp ${MYWORKDIR}/.git-annex/$(basename $annexdest).log ${MYWORKDIR}/.temp/; + fi; + ln -sf ${annexdest#../../} $rpm; + done; + git reset HEAD .git-rewrite; + : + ' -- $(git branch | cut -c 3-) + rm -rf .temp + git reset --hard + + +TODO: + +* Find a way to repair branches automatically (detect branch points and run appropriate `git rebase` commands) + +I'll be happy to try any suggestions to improve this migration script. + +P.S. Is there a way to edit comments? diff --git a/doc/forum/migrate_existing_git_repository_to_git-annex/comment_1_4181bf34c71e2e8845e6e5fb55d53381._comment b/doc/forum/migrate_existing_git_repository_to_git-annex/comment_1_4181bf34c71e2e8845e6e5fb55d53381._comment new file mode 100644 index 0000000000..e88794d621 --- /dev/null +++ b/doc/forum/migrate_existing_git_repository_to_git-annex/comment_1_4181bf34c71e2e8845e6e5fb55d53381._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-02-25T05:16:48Z" + content=""" +I don't know how to approach this yet, but I support the idea -- it would be great if there was a tool that could punch files out of git history and put them in the annex. (Of course with typical git history rewriting caveats.) + +Sounds like it might be enough to add a switch to git-annex that overrides where it considers the top of the git repository to be? +"""]] diff --git a/doc/forum/migrate_existing_git_repository_to_git-annex/comment_2_5f08da5e21c0b3b5a8d1e4408c0d6405._comment b/doc/forum/migrate_existing_git_repository_to_git-annex/comment_2_5f08da5e21c0b3b5a8d1e4408c0d6405._comment new file mode 100644 index 0000000000..71a40ad8cb --- /dev/null +++ b/doc/forum/migrate_existing_git_repository_to_git-annex/comment_2_5f08da5e21c0b3b5a8d1e4408c0d6405._comment @@ -0,0 +1,60 @@ +[[!comment format=mdwn + username="tyger" + ip="80.66.20.180" + subject="comment 2" + date="2011-03-01T14:07:50Z" + content=""" +My current workflow looks like this (I'm still experimenting): + +### Create backup clone for migration + + git clone original migrate + cd migrate + for branch in $(git branch -a | grep remotes/origin | grep -v HEAD); do git checkout --track $branch; done + +### Inject git annex initialization at repository base + + git symbolic-ref HEAD refs/heads/newroot + git rm --cached *.rpm + git clean -f -d + git annex init master + git cherry-pick $(git rev-list --reverse master | head -1) + git rebase --onto newroot newroot master + git rebase master mybranch # how to automate this for all branches? + git branch -d newroot + +### Start migration with tree filter + + echo \*.rpm annex.backend=SHA1 > .git/info/attributes + MYWORKDIR=$(pwd) git filter-branch --tree-filter ' \ + if [ ! -d .git-annex ]; then \ + mkdir .git-annex; \ + cp ${MYWORKDIR}/.git-annex/uuid.log .git-annex/; \ + cp ${MYWORKDIR}/.gitattributes .; \ + fi + for rpm in $(git ls-files | grep \"\.rpm$\"); do \ + echo; \ + git annex add $rpm; \ + annexdest=$(readlink $rpm); \ + if [ -e .git-annex/$(basename $annexdest).log ]; then \ + echo \"FOUND $(basename $annexdest).log\"; \ + else \ + echo \"COPY $(basename $annexdest).log\"; \ + cp ${MYWORKDIR}/.git-annex/$(basename $annexdest).log .git-annex/; \ + fi; \ + ln -sf ${annexdest#../../} $rpm; \ + done; \ + git reset HEAD .git-rewrite; \ + : \ + ' -- $(git branch | cut -c 3-) + rm -rf .temp + git reset --hard + + +There are still some drawbacks: + +* git history shows that git annex log files are modified with each checkin +* branches have to be rebased manually before starting migration + + +"""]] diff --git a/doc/forum/migrate_existing_git_repository_to_git-annex/comment_3_f483038c006cf7dcccf1014fa771744f._comment b/doc/forum/migrate_existing_git_repository_to_git-annex/comment_3_f483038c006cf7dcccf1014fa771744f._comment new file mode 100644 index 0000000000..90bf23b6cf --- /dev/null +++ b/doc/forum/migrate_existing_git_repository_to_git-annex/comment_3_f483038c006cf7dcccf1014fa771744f._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="tyger" + ip="80.66.20.180" + subject="comment 3" + date="2011-03-02T08:15:37Z" + content=""" +> Sounds like it might be enough to add a switch to git-annex that overrides where it considers the top of the git repository to be? + +It should sufficient to honor GIT_DIR/GIT_WORK_TREE/GIT_INDEX_FILE environment variables. git filter-branch sets GIT_WORK_TREE to ., but this can be mitigated by starting the filter script with 'GIT_WORK_TREE=$(pwd $GIT_WORK_TREE)'. E.g. GIT_DIR=/home/tyger/repo/.git, GIT_WORK_TREE=/home/tyger/repo/.git-rewrite/t, then git annex should be able to compute the correct relative path or maybe use absolute pathes in symlinks. + +Another problem I observed is that git annex add automatically commits the symlink; this behaviour doesn't work well with filter-tree. git annex commits the wrong path (.git-rewrite/t/LINK instead of LINK). Also filter-tree doesn't expect that the filter script commmits anything; new files in the temporary work tree will be committed by filter-tree on each iteration of the filter script (missing files will be removed). +"""]] diff --git a/doc/forum/migration_to_git-annex_and_rsync.mdwn b/doc/forum/migration_to_git-annex_and_rsync.mdwn new file mode 100644 index 0000000000..d99dab8728 --- /dev/null +++ b/doc/forum/migration_to_git-annex_and_rsync.mdwn @@ -0,0 +1,33 @@ +When migrating large file repositories to git-annex that are backuped in a way that uses an rsync-style mechanism (e.g. [dirvish](http://www.dirvish.org/)) and thus keeps incremental backups small by using hardlinks, space can be saved by manually reflecting the migration on the backup. So, instead of making a last pre-git-annex backup, migrating, and duplicating all backupped data with the next backup, I used the attached migrate.py file below, and it saved me roughly a day of backuping. + +A note on terminology: "migrating" here means migrating from not using git-annex at all to using it, not to the ``git annex migrate`` command, for which a similar but different solution may be created. + +**WARNING**: This is a quickly hacked-together script. It worked for me, but is untested apart from that. It's just a dozen lines of code, so have a look at it and make sure you understand what it does, and what migrate.sh looks like. Take special care as this tampers with your backups, and if something goes wrong, well... + +First, have an up-to-date backup; then, git annex init / add etc as described in the [[walkthrough]]. In the directory in which you use git-annex, run: + + $ python migrate.py > migrate.sh + +Then copy the resulting migrate.sh to the equivalent location inside your backups and run it there. It will move all files that are now symlinked on the master to their new positions according to the symlinks (inside .git/annex/objects), but not create the symlinks (you will do a backup later anyway). + +After that, do a backup as usual. As rsync sees the moved files at their new locations, it will accept them and not duplicate the data. + +**migrate.py**: + + #!/usr/bin/env python + + import os + from pipes import quote + + print "#!/bin/sh" + print "set -e" + print "" + + for (dirpath, dirnames, filenames) in os.walk("."): + for f in filenames: + fn = os.path.join(dirpath, f) + if os.path.islink(fn): + link = os.path.normpath(os.path.join(dirpath, os.readlink(fn))) + assert link.startswith(".git/annex/objects/") + print "mkdir -p %s"%quote(os.path.dirname(link)) + print "mv %s %s"%(quote(fn), quote(link)) diff --git a/doc/forum/new_microfeatures.mdwn b/doc/forum/new_microfeatures.mdwn new file mode 100644 index 0000000000..bfe44272a7 --- /dev/null +++ b/doc/forum/new_microfeatures.mdwn @@ -0,0 +1,59 @@ +I'm soliciting ideas for new small features that let git-annex do things that currently have to be done manually or whatever. + +Here are a few I've been considering: + +--- + +* --numcopies would be a useful command line switch. + > Update: Added. Also allows for things like `git annex drop --numcopies=2` when in a repo that normally needs 3 copies, if you need + > to urgently free up space. +* A way to make `drop` and other commands temporarily trust a given remote, or possibly all remotes. + +Combined, this would allow `git annex drop --numcopies=2 --trust=repoa --trust=repob` to remove files that have been replicated out to the other 2 repositories, which could be offline. (Slightly unsafe, but in this case the files are podcasts so not really.) + +> Update: done --[[Joey]] + +--- + +[[wishlist:_git-annex_replicate]] suggests some way for git-annex to have the smarts to copy content around on its own to ensure numcopies is satisfied. I'd be satisfied with a `git annex copy --to foo --if-needed-by-numcopies` + + > Contrary to the "basic" solution, I would love to have a git annex distribute which is smart enough to simply distribute all data according to certain rules. My ideal, personal use case during the next holidays where I will have two external disks, several SD cards with 32 GB each and a local disk with 20 GB (yes....) would be: + + cd ~/photos.annex # this repository does not have any objects! + git annex inject --bare /path/to/SD/card # this adds softlinks, but does **not** add anything to the index. it would calculate checksums (if enabled) and have to add a temporary location list, though + git annex distribute # this checks the config. it would see that my two external disks have a low cost whereas the two remotes have a higher cost. + # check numcopies. it's 3 + # copy to external disk one (cost x) + # copy to external disk two (cost x) + # copy to remote one (cost x * 2) + # remove file from temporary tracking list + git annex fsck # everything ok. yay! + +Come to think of it, the inject --bare thing is probably not a microfeature. Should I add a new wishlist item for that? -- RichiH + +> I've thought about such things before; does not seem really micro and I'm unsure how well it would work, but it would be worth a [[todo]]. --[[Joey]] + +>> Update: Done as --auto. --[[Joey]] + +--- + +Along similar lines, it might be nice to have a mode where git-annex tries to fill up a disk up to the `annex.diskreserve` with files, preferring files that have relatively few copies. Then as storage prices continue to fall, new large drives could just be plopped in and git-annex used to fill it up in a way that improves the overall redundancy without needing to manually pick and choose. + +> Update: git annex get --auto basically does this; you can tune +> --numcopies on the fly to make it get more files than needed by the +> current numcopies setting. --[[Joey]] + +--- + +If a remote could send on received files to another remote, I could use my own local bandwith efficiently while still having my git-annex repos replicate data. -- RichiH + +--- + +Really micro: + + % grep annex-push .git/config + annex-push = !git pull && git annex add . && git annex copy . --to origin --fast --quiet && git commit -a -m "$HOST $(date +%F--%H-%M-%S-%Z)" && git push + % + +-- RichiH +--[[Joey]] diff --git a/doc/forum/new_microfeatures/comment_1_058bd517c6fffaf3446b1f5d5be63623._comment b/doc/forum/new_microfeatures/comment_1_058bd517c6fffaf3446b1f5d5be63623._comment new file mode 100644 index 0000000000..84fdd325dc --- /dev/null +++ b/doc/forum/new_microfeatures/comment_1_058bd517c6fffaf3446b1f5d5be63623._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 1" + date="2011-06-01T17:36:50Z" + content=""" +I've been longing for an automated way of removing references to a remote assuming I know the exact uuid that I want to remove. i.e. I have lost a portable HDD due to a destructive process, I now want to delete all references to copies of data that was on that disk. Unless this feature exists, I would love to see it implemented. +"""]] diff --git a/doc/forum/new_microfeatures/comment_2_41ad904c68e89c85e1fc49c9e9106969._comment b/doc/forum/new_microfeatures/comment_2_41ad904c68e89c85e1fc49c9e9106969._comment new file mode 100644 index 0000000000..4451e20baf --- /dev/null +++ b/doc/forum/new_microfeatures/comment_2_41ad904c68e89c85e1fc49c9e9106969._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-06-01T20:24:33Z" + content=""" +@jimmy [[walkthrough/what_to_do_when_you_lose_a_repository]].. I have not seen a convincing argument that removing the location tracking data entirely serves any purpose +"""]] diff --git a/doc/forum/new_microfeatures/comment_3_a1a9347b5bc517f2a89a8b292c3f8517._comment b/doc/forum/new_microfeatures/comment_3_a1a9347b5bc517f2a89a8b292c3f8517._comment new file mode 100644 index 0000000000..4bb3aa684f --- /dev/null +++ b/doc/forum/new_microfeatures/comment_3_a1a9347b5bc517f2a89a8b292c3f8517._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnpdM9F8VbtQ_H5PaPMpGSxPe_d5L1eJ6w" + nickname="Rafaël" + subject="git annex unlock --readonly" + date="2011-06-02T11:34:42Z" + content=""" +This was already asked [here](http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=606577), but I have a use case where I need to unlock with the files being hardlinked instead of copied (my fs does not support CoW), even though 'git annex lock' is now much faster ;-) . The idea is that 1) I want the external world see my repo \"as if\" it wasn't annexed (because of its own limitation to deal with soft links), and 2) I know what I do, and am sure that files won't be written to but only read. + +My case is: the repo contains a snapshot A1 of a certain remote directory. Later I want to rsync this dir into a new snapshot A2. Of course, I want to transfer only new or changed files, with the --copy-dest=A1 (or --compare-dest) rsync's options. Unfortunately, rsync won't recognize soft-links from git-annex, and will re-transfer everything. + + +Maybe I'm overusing git-annex ;-) but still, I find it is a legitimate use case, and even though there are workarounds (I don't even remember what I had to do), it would be much more straightforward to have 'git annex unlock --readonly' (or '--readonly-unsafe'?), ... or have rsync take soft-links into account, but I did not see the author ask for microfeatures ideas :) (it was discussed, and only some convoluted workarounds were proposed). Thanks. + + +"""]] diff --git a/doc/forum/new_microfeatures/comment_4_5a6786dc52382fff5cc42fdb05770196._comment b/doc/forum/new_microfeatures/comment_4_5a6786dc52382fff5cc42fdb05770196._comment new file mode 100644 index 0000000000..cc98109e6b --- /dev/null +++ b/doc/forum/new_microfeatures/comment_4_5a6786dc52382fff5cc42fdb05770196._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnpdM9F8VbtQ_H5PaPMpGSxPe_d5L1eJ6w" + nickname="Rafaël" + subject="git annex unused" + date="2011-06-02T11:55:58Z" + content=""" +Before dropping unsused items, sometimes I want to check the content of the files manually. +But currently, from e.g. a sha1 key, I don't know how to find the corresponding file, except with +'find .git/annex/objects -type f -name 'SHA1-s1678--70....', wich is too slow (I'm in the case where \"git log --stat -S'KEY'\" +won't work, either because it is too slow or it was never commited). By the way, +is it documented somewhere how to determine the 2 (nested) sub-directories in which a given +(by name) object is located? + +So I would like 'git-annex unused' be able to give me the list of *paths* to the unused items. +Also, I would really appreciate a command like 'git annex unused --log NUMBER [NUMBER2...]' which would do for me the suggested command +\"git log --stat -S'KEY'\", where NUMBER is from the 'git annex unused' output. +Thanks. +"""]] diff --git a/doc/forum/new_microfeatures/comment_5_3c627d275586ff499d928a8f8136babf._comment b/doc/forum/new_microfeatures/comment_5_3c627d275586ff499d928a8f8136babf._comment new file mode 100644 index 0000000000..f7361f5d1c --- /dev/null +++ b/doc/forum/new_microfeatures/comment_5_3c627d275586ff499d928a8f8136babf._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnpdM9F8VbtQ_H5PaPMpGSxPe_d5L1eJ6w" + nickname="Rafaël" + subject="git annex unused" + date="2011-06-02T19:51:49Z" + content=""" +ps: concerning the command 'find .git/annex/objects -type f -name 'SHA1-s1678--70....' from my previous comment, it is \"significantly\" faster to search for the containing directory which have the same name: 'find .git/annex/objects -maxdepth 2 -mindepth 2 -type d -name 'SHA1-s1678--70....'. I am just curious: what is the need to have each file object in its own directory, itself nested under two more sub-directories? +"""]] diff --git a/doc/forum/new_microfeatures/comment_6_31ea08c008500560c0b96c6601bc6362._comment b/doc/forum/new_microfeatures/comment_6_31ea08c008500560c0b96c6601bc6362._comment new file mode 100644 index 0000000000..868e9677c8 --- /dev/null +++ b/doc/forum/new_microfeatures/comment_6_31ea08c008500560c0b96c6601bc6362._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnpdM9F8VbtQ_H5PaPMpGSxPe_d5L1eJ6w" + nickname="Rafaël" + subject="git annex fetch" + date="2011-07-03T14:39:41Z" + content=""" +I'm not sure it is worth adding a command for such a small feature, but I would certainly use it: having something like \"git annex fetch remote\" do \"git fetch remote && git annex copy --from=remote\", and \"git annex push remote\" do \"git push remote && git annex copy --to=remote\". And maybe the same for a pull operation? +"""]] diff --git a/doc/forum/new_microfeatures/comment_7_94045b9078b1fff877933b012d1b49e2._comment b/doc/forum/new_microfeatures/comment_7_94045b9078b1fff877933b012d1b49e2._comment new file mode 100644 index 0000000000..e39e162322 --- /dev/null +++ b/doc/forum/new_microfeatures/comment_7_94045b9078b1fff877933b012d1b49e2._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnpdM9F8VbtQ_H5PaPMpGSxPe_d5L1eJ6w" + nickname="Rafaël" + subject="git annex fetch" + date="2011-07-03T17:57:00Z" + content=""" +My last comment is a bit confused. The \"git fetch\" command allows to get all the information from a remote, and it is then possible to merge while being offline (without access to the remote). I would like a \"git annex fetch remote\" command to be able to get all annexed files from remote, so that if I later merge with remote, all annexed files are already here. And \"git annex fetch\" could (optionally) call \"git fetch\" before getting the files. + +It seems also that in my last post, I should have written \"git annex get --from=remote\" instead of \"git annex copy --from=remote\", because \"annex copy --from\" copies all files, even if the local repo already have them (is this the case? if yes, when is it useful?) +"""]] diff --git a/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk.mdwn b/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk.mdwn new file mode 100644 index 0000000000..f70c127025 --- /dev/null +++ b/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk.mdwn @@ -0,0 +1,12 @@ +This works with bind-mount, I might try with softlinks as well. + +Going through git's data on push/pull can take ages on a spindle disk even +if the repo is rather small in size. This is especially true if you are +used to ssd speeds, but ssd storage is expensive. Storing the annex objects +on a cheap spindle disk and everything else on a ssd makes things a _lot_ +faster. + +> Update: git-annex supports `.git/annex/` being moved to a different disk +> than the rest of the repisitory, but does *not* support individual +> subdirectories, like `.git/annex/objects/` being on a different disk +> than the main `.git/annex/` directory. --[[Joey]] diff --git a/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk/comment_1_b3f22f9be02bc4f2d5a121db3d753ff5._comment b/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk/comment_1_b3f22f9be02bc4f2d5a121db3d753ff5._comment new file mode 100644 index 0000000000..124993bcf1 --- /dev/null +++ b/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk/comment_1_b3f22f9be02bc4f2d5a121db3d753ff5._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-04-02T17:48:29Z" + content=""" +Either option should work fine, but git gc --aggressive will probably avoid most of git's seeking. +"""]] diff --git a/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk/comment_2_f94abce32ef818176b42a3cc860691ae._comment b/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk/comment_2_f94abce32ef818176b42a3cc860691ae._comment new file mode 100644 index 0000000000..eddc8c6315 --- /dev/null +++ b/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk/comment_2_f94abce32ef818176b42a3cc860691ae._comment @@ -0,0 +1,20 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 2" + date="2011-04-02T21:34:24Z" + content=""" +I'll give it a try as soon as I get rid of this: + + % git annex fsck +fatal: index file smaller than expected +fatal: index file smaller than expected + % git status +fatal: index file smaller than expected + % + +And no, I am not sure where that is coming from all of a sudden... (it might have to do with a hard lockup of the whole system due to a faulty hdd I tested, but I didn't do anything to it for ages before that lock-up. So meh. Also, this is prolly off topic in here) + + +Richard +"""]] diff --git a/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk/comment_3_0c8e77fe248e00bd990d568623e5a5c9._comment b/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk/comment_3_0c8e77fe248e00bd990d568623e5a5c9._comment new file mode 100644 index 0000000000..fc29236c6d --- /dev/null +++ b/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk/comment_3_0c8e77fe248e00bd990d568623e5a5c9._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-04-03T01:48:57Z" + content=""" +For future reference, git can recover from a corrupted index file with `rm .git/index; git reset --mixed`. + +Of course, you lose any staged changes that were in the old index file, and may need to re-stage some files. +"""]] diff --git a/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk/comment_4_4b7e8f9521d61900d9ad418e74808ffb._comment b/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk/comment_4_4b7e8f9521d61900d9ad418e74808ffb._comment new file mode 100644 index 0000000000..ec0f88d13c --- /dev/null +++ b/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk/comment_4_4b7e8f9521d61900d9ad418e74808ffb._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 4" + date="2011-04-03T09:03:22Z" + content=""" +Thanks a lot. I tried various howtos around the net, but none of them worked; yours did. (I tried it in one of the copies of the broken repo which I keep around for obvious reasons). +"""]] diff --git a/doc/forum/pure_git-annex_only_workflow.mdwn b/doc/forum/pure_git-annex_only_workflow.mdwn new file mode 100644 index 0000000000..36648a9058 --- /dev/null +++ b/doc/forum/pure_git-annex_only_workflow.mdwn @@ -0,0 +1,46 @@ +I’m using git annex to manage my movie collection on various devices – my laptop, a NSLU tucked away somewhere with lots of space, some external hard drives. For this use case, I do not need the full power of git as a version control system, so having to run "git commit" and coming up with commit messages is annoying. Also, this makes sense for a version control system, but not for my media collection: + + $ git annex add Hot\ Fuzz\ -\ English.mkv + add Hot Fuzz - English.mkv (checksum...) ok + (Recording state in git...) + $ git commit -m 'another movie added' + [master 851dc8a] another movie added + 1 files changed, 1 insertions(+), 0 deletions(-) + create mode 120000 00 Noch nicht gesehen/Hot Fuzz - English.mkv + $ git push jeff + Counting objects: 38, done. + Delta compression using up to 2 threads. + Compressing objects: 100% (20/20), done. + Writing objects: 100% (26/26), 2.00 KiB, done. + Total 26 (delta 11), reused 0 (delta 0) + remote: error: refusing to update checked out branch: refs/heads/master + remote: error: By default, updating the current branch in a non-bare repository + remote: error: is denied, because it will make the index and work tree inconsistent + remote: error: with what you pushed, and will require 'git reset --hard' to match + remote: error: the work tree to HEAD. + remote: error: + remote: error: You can set 'receive.denyCurrentBranch' configuration variable to + remote: error: 'ignore' or 'warn' in the remote repository to allow pushing into + remote: error: its current branch; however, this is not recommended unless you + remote: error: arranged to update its work tree to match what you pushed in some + remote: error: other way. + remote: error: + remote: error: To squelch this message and still keep the default behaviour, set + remote: error: 'receive.denyCurrentBranch' configuration variable to 'refuse'. + To jeff:/mnt/media/Movies + ! [rejected] git-annex -> git-annex (non-fast-forward) + ! [remote rejected] master -> master (branch is currently checked out) + error: failed to push some refs to 'jeff:/mnt/media/Movies' + To prevent you from losing history, non-fast-forward updates were rejected + Merge the remote changes (e.g. 'git pull') before pushing again. See the + 'Note about fast-forwards' section of 'git push --help' for details. + +It seems that to successfully make the new files known to the other side, I have to log into jeff and pull _from_ my current machine. + +What I would like to have is that + +* git annex add does not require a commit afterwards. +* Changes to the files are automatically picked up with the next git-annex call (similar to how etckeeper works). +* Commands "git annex push" and "git annex pull" that will sync the metadata (i.e. the list of files) in both directions without further manual intervention, at least not until the two repositories have diverged in a way that is not possible to merge sensible. + +Summay: git-annex is great. git is not always. Please make it possible to use git annex without having to use git. diff --git a/doc/forum/pure_git-annex_only_workflow/comment_1_a32f7efd18d174845099a4ed59e6feae._comment b/doc/forum/pure_git-annex_only_workflow/comment_1_a32f7efd18d174845099a4ed59e6feae._comment new file mode 100644 index 0000000000..def1794a3e --- /dev/null +++ b/doc/forum/pure_git-annex_only_workflow/comment_1_a32f7efd18d174845099a4ed59e6feae._comment @@ -0,0 +1,32 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-12-09T22:56:11Z" + content=""" +First, you need a bare git repository that you can push to, and pull from. This simplifies most git workflow. + +Secondly, I use [mr](http://kitenet.net/~joey/code/mr/), with this in `.mrconfig`: + +
+[DEFAULT]
+lib =
+        annexupdate() {
+                git commit -a -m update || true
+                git pull \"$@\"
+                git annex merge
+                git push || true
+        }
+
+[lib/sound]
+update = annexupdate
+[lib/big]
+update = annexupdate
+
+ +Which makes \"mr update\" in repositories where I rarely care about git details take care of syncing my changes. + +I also make \"mr update\" do a \"git annex get\" of some files in some repositories that I want to always populate. git-annex and mr go well together. :) + +Perhaps my annexupdate above should be available as \"git annex sync\"? +"""]] diff --git a/doc/forum/pure_git-annex_only_workflow/comment_2_66dc9b65523a9912411db03c039ba848._comment b/doc/forum/pure_git-annex_only_workflow/comment_2_66dc9b65523a9912411db03c039ba848._comment new file mode 100644 index 0000000000..473a0287d0 --- /dev/null +++ b/doc/forum/pure_git-annex_only_workflow/comment_2_66dc9b65523a9912411db03c039ba848._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="http://www.joachim-breitner.de/" + nickname="nomeata" + subject="comment 2" + date="2011-12-10T16:28:29Z" + content=""" +Thanks for the tips so far. I guess a bare-only repo helps, but as well is something that I don’t _need_ (for my use case), any only have to do because git works like this. + +Also, if I have a mobile device that I want to push to, then I’d have to have two repositories on the device, as I might not be able to reach my main bare repository when traveling, but I cannot push to the „real“ repo on the mobile device from my computer. I guess I am spoiled by darcs, which will happily push to a checked out +remote repository, updating the checkout if possible without conflict. + +If I introduce a central bare repository to push to and from; I’d still have to have the other non-bare repos as remotes, so that git-annex will know about them and their files, right? + +I’d appreciate a \"git annex sync\" that does what you described (commit all, pull, merge, push). Especially if it comes in a \"git annex sync --all\" variant that syncs all reachable repositories. +"""]] diff --git a/doc/forum/pure_git-annex_only_workflow/comment_3_9b7d89da52f7ebb7801f9ec8545c3aba._comment b/doc/forum/pure_git-annex_only_workflow/comment_3_9b7d89da52f7ebb7801f9ec8545c3aba._comment new file mode 100644 index 0000000000..9b6e6d7c4d --- /dev/null +++ b/doc/forum/pure_git-annex_only_workflow/comment_3_9b7d89da52f7ebb7801f9ec8545c3aba._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-12-10T19:43:04Z" + content=""" +Git can actually push into a non-bare repository, so long as the branch you change there is not a checked out one. Pushing into `remotes/$foo/master` and `remotes/$foo/git-annex` would work, however determining the value that the repository expects for `$foo` is something git cannot do on its own. And of course you'd still have to `git merge remotes/$foo/master` to get the changes. + +Yes, you still keep the non-bare repos as remotes when adding a bare repository, so git-annex knows how to get to them. + +I've made `git annex sync` run the simple script above. Perhaps it can later be improved to sync all repositories. +"""]] diff --git a/doc/forum/pure_git-annex_only_workflow/comment_4_dc8a3f75533906ad3756fcc47f7e96bb._comment b/doc/forum/pure_git-annex_only_workflow/comment_4_dc8a3f75533906ad3756fcc47f7e96bb._comment new file mode 100644 index 0000000000..1ac9e798a8 --- /dev/null +++ b/doc/forum/pure_git-annex_only_workflow/comment_4_dc8a3f75533906ad3756fcc47f7e96bb._comment @@ -0,0 +1,20 @@ +[[!comment format=mdwn + username="http://www.joachim-breitner.de/" + nickname="nomeata" + subject="comment 4" + date="2011-12-13T18:16:08Z" + content=""" +I thought about this some more, and I think I have a pretty decent solution that avoids a central bare repository. Instead of pushing to master (which git does not like) or trying to guess the remote branch name on the other side, there is a well-known branch name, say git-annex-master. Then a sync command would do something like this (untested): + + git commit -a -m 'git annex sync' # ideally with a description derived from the diff + git merge git-annex-master + git pull someremote git-annex-master # for all reachable remotes. Or better to use fetch and then merge everything in one command? + git branch -f git-annex-master # (or checkout git-annex-master, merge master, checkout master, but since we merged before this should have the same effect + git annex merge + git push someremote git-annex-master # for all reachable remotes + +The nice things are: One can push to any remote repository, and thus avoid the issue of pushing to a portable device; the merging happens on the master branch, so if it fails to merge automatically, regular git foo can resolve it, and all changes eventually reach every repository. + +What do you think? + +"""]] diff --git a/doc/forum/pure_git-annex_only_workflow/comment_5_afe5035a6b35ed2c7e193fb69cc182e2._comment b/doc/forum/pure_git-annex_only_workflow/comment_5_afe5035a6b35ed2c7e193fb69cc182e2._comment new file mode 100644 index 0000000000..0847daae9d --- /dev/null +++ b/doc/forum/pure_git-annex_only_workflow/comment_5_afe5035a6b35ed2c7e193fb69cc182e2._comment @@ -0,0 +1,24 @@ +[[!comment format=mdwn + username="http://www.joachim-breitner.de/" + nickname="nomeata" + subject="comment 5" + date="2011-12-13T18:47:18Z" + content=""" +After some experimentation, this seems to work better: + + git commit -a -m 'git annex sync' + git merge git-annex-master + for remote in $(git remote) + do + git fetch $remote + git merge $remote git-annex-master + done + git branch -f git-annex-master + git annex merge + for remote in $(git remote) + do + git push $remote git-annex git-annex-master + done + +Maybe this approach can be enhance to skip stuff gracefully if there is no git-annex-master branch and then be added to what \"git annex sync\" does, this way those who want to use the feature can do so by running \"git branch git-annex-master\" once. Or, if you like this and want to make it default, just make git-annex-init create the git-annex-master branch :-) +"""]] diff --git a/doc/forum/pure_git-annex_only_workflow/comment_6_3660d45c5656f68924acbd23790024ee._comment b/doc/forum/pure_git-annex_only_workflow/comment_6_3660d45c5656f68924acbd23790024ee._comment new file mode 100644 index 0000000000..fc66fbb8e1 --- /dev/null +++ b/doc/forum/pure_git-annex_only_workflow/comment_6_3660d45c5656f68924acbd23790024ee._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 6" + date="2011-12-13T20:53:23Z" + content=""" +It would be clearer to call \"git-annex-master\" \"synced/master\" (or really \"synced/$current_branch\"). That does highlight that this method of syncing is not particularly specific to git-annex. + +I think this would be annoying to those who do use a central bare repository, because of the unnecessary pushing and pulling to other repos, which could be expensive to do, especially if you have a lot of interconnected repos. So having a way to enable/disable it seems best. + +Maybe you should work up a patch to Command/Sync.hs, since I know you know haskell :) +"""]] diff --git a/doc/forum/pure_git-annex_only_workflow/comment_7_33db51096f568c65b22b4be0b5538c0d._comment b/doc/forum/pure_git-annex_only_workflow/comment_7_33db51096f568c65b22b4be0b5538c0d._comment new file mode 100644 index 0000000000..753a2af169 --- /dev/null +++ b/doc/forum/pure_git-annex_only_workflow/comment_7_33db51096f568c65b22b4be0b5538c0d._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="http://www.joachim-breitner.de/" + nickname="nomeata" + subject="comment 7" + date="2011-12-18T12:08:51Z" + content=""" +I agree on the naming suggestions, and that it does not suit everybody. Maybe I’ll think some more about it. The point is: I’m trying to make live easy for those who do not want to manually create some complicated setup, so if it needs configuration, it is already off that track. But turning the current behavior into something people have to configure is also not well received by the users. + +Given that \"git annex sync\" is a new command, maybe it is fine to have this as a default behavior, and offer an easy way out. The easy way out could be one of two flags that can be set for a repo (or a remote): + +* \"central\", which makes git annex sync only push and pull to and that repo (unless a different remote is given on the command line) +* \"unsynced\", which makes git annex sync skip the repo. + +Maybe central is enough. +"""]] diff --git a/doc/forum/pure_git-annex_only_workflow/comment_8_6e5b42fdb7801daadc0b3046cbc3d51e._comment b/doc/forum/pure_git-annex_only_workflow/comment_8_6e5b42fdb7801daadc0b3046cbc3d51e._comment new file mode 100644 index 0000000000..d33a296ca1 --- /dev/null +++ b/doc/forum/pure_git-annex_only_workflow/comment_8_6e5b42fdb7801daadc0b3046cbc3d51e._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 8" + date="2011-12-19T18:29:01Z" + content=""" +I don't mind changing the behavior of git-annex sync, certianly.. + +Looking thru git's documentation, I found some existing configuration that could be reused following your idea. +There is a remote.name.skipDefaultUpdate and a remote.name.skipFetchAll. Though both have to do with fetches, not pushes. +Another approach might be to use git's remote group stuff. +"""]] diff --git a/doc/forum/pure_git-annex_only_workflow/comment_9_ace319652f9c7546883b5152ddc82591._comment b/doc/forum/pure_git-annex_only_workflow/comment_9_ace319652f9c7546883b5152ddc82591._comment new file mode 100644 index 0000000000..de656d6629 --- /dev/null +++ b/doc/forum/pure_git-annex_only_workflow/comment_9_ace319652f9c7546883b5152ddc82591._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="http://www.joachim-breitner.de/" + nickname="nomeata" + subject="comment 9" + date="2011-12-19T22:56:26Z" + content=""" +Another option that would please the naive user without hindering the more advanced user: \"git annex init\", by default, creates a synced/master branch. \"git annex sync\" will pull from every /sync/master branch it finds, and also push to any /sync/master branch it finds, but will not create any. So by default (at least for new users), this provides simple one-step syncing. + +Advanced users can disable this per-repo by just deleting the synced/master branch. Presumably the logic will be: Every repo that should not be pushed to, because it has access to some central repo, should not have a synced/master branch. Every other repo, including the (or one of the few) central repos, will have the branch. + +This is not the most expressive solution, as it does not allow configuring syncing between arbitrary pairs of repos, but it feels like a good compromise between that and simplicity and transparency. + +I think it's about time that I provide less talk and more code. I’ll see when I find the time :-) +"""]] diff --git a/doc/forum/relying_on_git_for_numcopies.mdwn b/doc/forum/relying_on_git_for_numcopies.mdwn new file mode 100644 index 0000000000..37b46cf4e7 --- /dev/null +++ b/doc/forum/relying_on_git_for_numcopies.mdwn @@ -0,0 +1,47 @@ +**<out-of-date-warning>**The main problems this is supposed to solve are addressed in a different way with [[todo/hidden files]] and the `--fast` option introduced in [[batch check on remote when using copy]], so while this is not technically obsolete, the main reasons for it are gone. --[[chrysn]]**</out-of-date-warning>** + +This is a rough sketch of a modification of git-annex to rely more on git commit semantics. It might be flawed due to my lack of understanding of git-annex internals. --[[chrysn]] + +Summary +========= + +Currently, [[location tracking]] is only used for informational purposes unless a repository is [[trust]]ed, in which case there is no checking at all. It is proposed to use the location tracking information as a commitment to keep track of a file until another repository takes over responsibility. + +git's semantics for atomic commits are proposed to be used, which makes sure that before files are actually deleted, another repository has accepted the deletion. + +Modified git-annex-drop behavior +========================== + +The most important (if not only) git-annex command that is affected by this is `git annex drop`. Currently, for dropping a large number of files, every file is checked with another (or multiple, if so configured) host if it's safe to delete. + +The new behavior would be to + +* decrement the location tracking counter for all files to be dropped, +* commit that change, +* try to push it to at least as many repositories that the numcopies constraints are met, +* revert if that fails, +* otherwise really drop the files from the backend. + +Unlike explicit checking, this never looks at the remote backend if the file is really present -- otoh, git-annex already relies on the files in the backend to not be touched by anyone but git-annex itself, and git-annex would only drop them if they were derefed and committed, in which case git would not accept the push. (git by itself would accept a merged push, but even if the reverting step failed due to a power outage or similar, git-annex would, before really deleting files from the backend, check again if the numcopies restraint is still met, and revert its own delete commit as the files are still present anyway.) + +Implications for trust +============== + +The proposed change also changes the semantics of trust. Trust can now be controlled in a finer-grained way between untrusted and semi-trusted, as best illustrated by a use case: + +> Alice takes her netbook with her on a trip through Spain, and will fill most of its disk up with pictures she takes. As she expects to meet some old friends during the first days, she wants to take older pictures with her, which are safely backed up at home, so they can be deleted on demand. +> +> She tells her netbook's repository to dereference the old images (but not other parts of the repository she has not copied anywhere yet) and pushes to the server before leaving. When she adds pictures from her camera to the repository, git-annex can now free up space as needed. + +Dereferencing could be implemented as `git annex drop --no-rm` (or `move --no-rm`), freeing space is similar to `dropunused`. + +A trusted repository with the new semantics would mean that the repository would not accept dropping anything, just as before. + +Advantages / Disadvantages +===================== + +The advantage of this proposal is that the round trips required for dropping something could be greatly reduced. + +There should also be simplifications in the `git annex drop` command as it doesn't need to take care of locking any more (git should already do that between checking if HEAD is a parent of the pushed commit and replacing HEAD). + +Besides being a major change in git-annex (with the requirement to track hosts' git-annex versions for migration, as the new trust system is incompatible with the old one), no disadvantages of that stragegy are known to the author (hoping for discussion below). diff --git a/doc/forum/relying_on_git_for_numcopies/comment_1_8ad3cccd7f66f6423341d71241ba89fc._comment b/doc/forum/relying_on_git_for_numcopies/comment_1_8ad3cccd7f66f6423341d71241ba89fc._comment new file mode 100644 index 0000000000..83a908da8c --- /dev/null +++ b/doc/forum/relying_on_git_for_numcopies/comment_1_8ad3cccd7f66f6423341d71241ba89fc._comment @@ -0,0 +1,36 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-02-22T18:44:28Z" + content=""" +I see the following problems with this scheme: + +- Disallows removal of files when disconnected. It's currently safe to force that, as long as + git-annex tells you enough other repos are belived to have the file. Just as long as you + only force on one machine (say your laptop). With your scheme, if you drop a file while + disconnected, any other host could see that the counter is still at N, because your + laptop had the file last time it was online, and can decide to drop the file, and lose the last +version. + +- pushing a changed counter commit to other repos is tricky, because they're not bare, and + the network topology to get the commit pulled into the other repo could vary. + +- Merging counter files issues. If the counter file doesn't automerge, two repos dropping the same file will conflict. But, if it does automerge, it breaks the counter conflict detection. + +- Needing to revert commits is going to be annoying. An actual git revert + could probably not reliably be done. It's need to construct a revert + and commit it as a new commit. And then try to push that to remotes, and + what if *that* push conflicts? + +- I do like the pre-removal dropping somewhat as an alternative to + trust checking. I think that can be done with current git-annex though, + just remove the files from the location log, but keep them in-annex. + Dropping a file only looks at repos that the location log says have a + file; so other repos can have retained a copy of a file secretly like + this, and can safely remove it at any time. I'd need to look into this a bit more to be 100% sure it's safe, but have started [[todo/hidden_files]]. + +- I don't see any reduced round trips. It still has to contact N other + repos on drop. Now, rather than checking that they have a file, it needs + to push a change to them. +"""]] diff --git a/doc/forum/relying_on_git_for_numcopies/comment_2_be6acbc26008a9cb54e7b8f498f2c2a2._comment b/doc/forum/relying_on_git_for_numcopies/comment_2_be6acbc26008a9cb54e7b8f498f2c2a2._comment new file mode 100644 index 0000000000..d9ce8b50e0 --- /dev/null +++ b/doc/forum/relying_on_git_for_numcopies/comment_2_be6acbc26008a9cb54e7b8f498f2c2a2._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="http://christian.amsuess.com/chrysn" + nickname="chrysn" + subject="comment 2" + date="2011-02-23T16:43:59Z" + content=""" +i'll comment on each of the points separately, well aware that even a single little leftover issue can show that my plan is faulty: + +* force removal: well, yes -- but the file that is currently force-removed on the laptop could just as well be the last of its kind itself. i see the problem, but am not sure if it's fatal (after all, if we rely on out-of-band knowledge when forcing something, we could just as well ask a little more) +* non-bare repos: pushing is tricky with non-bare repos now just as well; a post-commit hook could auto-accept counter changes. (but pushing causes problems with counters anyway, doesn't it?) +* merging: i'd have them auto-merge. git-annex will have to check the validity of the current state anyway, and a situation in which a counter-decrementing commit is not a fast-forward one would be reverted in the next step (or upon discovery, in case the next step never took place). +* reverting: my wording was bad as \"revert\" is already taken in git-lingo. the correct term for what i was thinking of is \"reset\". (as the commit could not be pushed, it would be rolled back completely). + * we might have to resort to reverting, though, if the commit has already been pused to a first server of many. +* [[todo/hidden files]]: yes, this solves pre-removal dropping :-) +* round trips: it's not the number of servers, it's the number of files (up to 30k in my case). it seems to me that an individual request was made for every single file i wanted to drop (that would be N*M roundtrips for N affected servers and M files, and N roundtrips with git managed numcopies) + +all together, it seems to be a bit more complicated than i imagined, although not completely impossible. a combination of [[todo/hidden files]] and maybe a simpler reduction of the number of requests might though achieve the important goals as well. +"""]] diff --git a/doc/forum/relying_on_git_for_numcopies/comment_3_43d8e1513eb9947f8a503f094c03f307._comment b/doc/forum/relying_on_git_for_numcopies/comment_3_43d8e1513eb9947f8a503f094c03f307._comment new file mode 100644 index 0000000000..27076a877f --- /dev/null +++ b/doc/forum/relying_on_git_for_numcopies/comment_3_43d8e1513eb9947f8a503f094c03f307._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://christian.amsuess.com/chrysn" + nickname="chrysn" + subject="relation to [[todo/branching]]" + date="2011-02-23T21:48:14Z" + content=""" +the non-bare repository issue would go away if this was combined with the \"alternate\" approach to [[todo/branching]]. (with the \"fleshed out proposal\" of branching, this would not work at all for lack of shared commits.) +"""]] diff --git a/doc/forum/rsync_over_ssh__63__.mdwn b/doc/forum/rsync_over_ssh__63__.mdwn new file mode 100644 index 0000000000..9c0c9add63 --- /dev/null +++ b/doc/forum/rsync_over_ssh__63__.mdwn @@ -0,0 +1,2 @@ +[Walkthrough](http://git-annex.branchable.com/walkthrough/using_ssh_remotes/) says that when using ssh remotes rsync is used for transfering files. Is rsync used via ssh or unsecure? +-- Michael K. diff --git a/doc/forum/rsync_over_ssh__63__/comment_1_ee21f32e90303e20339e0a568321bbbe._comment b/doc/forum/rsync_over_ssh__63__/comment_1_ee21f32e90303e20339e0a568321bbbe._comment new file mode 100644 index 0000000000..2b9fc9552d --- /dev/null +++ b/doc/forum/rsync_over_ssh__63__/comment_1_ee21f32e90303e20339e0a568321bbbe._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-03-06T15:59:37Z" + content=""" +Everything is done over ssh unless both repos are on the same system (or unless you NFS mount a repo) +"""]] diff --git a/doc/forum/rsync_over_ssh__63__/comment_2_aa690da6ecfb2b30fc5080ad76dc77b1._comment b/doc/forum/rsync_over_ssh__63__/comment_2_aa690da6ecfb2b30fc5080ad76dc77b1._comment new file mode 100644 index 0000000000..49003937b6 --- /dev/null +++ b/doc/forum/rsync_over_ssh__63__/comment_2_aa690da6ecfb2b30fc5080ad76dc77b1._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://m-f-k.myopenid.com/" + ip="92.194.43.135" + subject="comment 2" + date="2011-03-06T16:33:19Z" + content=""" +Great! This was the only thing about git-annex which could have kept me from using it. --Michael +"""]] diff --git a/doc/forum/seems_to_build_fine_on_haskell_platform_2011.mdwn b/doc/forum/seems_to_build_fine_on_haskell_platform_2011.mdwn new file mode 100644 index 0000000000..60014a7f53 --- /dev/null +++ b/doc/forum/seems_to_build_fine_on_haskell_platform_2011.mdwn @@ -0,0 +1 @@ +This is just a comment on git-annex building on haskell platform 2011.2.0.0 on archlinux. It just works. diff --git a/doc/forum/sparse_git_checkouts_with_annex.mdwn b/doc/forum/sparse_git_checkouts_with_annex.mdwn new file mode 100644 index 0000000000..97d2f445d3 --- /dev/null +++ b/doc/forum/sparse_git_checkouts_with_annex.mdwn @@ -0,0 +1,31 @@ +I checked in my music collection into git annex (about 25000 files) and i'm really impressed by the performance of git annex (after i've done an git-repack). Now i'm also moving my movies into the same git-annex, but i have the following layout of my disk drives: + +* small raid-1 for important stuff (music, documents), which is also backupped (aka: raid) +* big bulk data store (aka: media) + +In the git-annex the following layout of files is used: + +* documents/ <- on raid +* music/ <- on raid +* videos/ <- on media + +Now i didn't simply clone the raid-annex to media, but did an sparse-checkout (possible since version 1.7.0) + +* raid: .git-annex/, documents/ and music +* media: .git-annex/, videos/ + +As you can see i have to checkout the .git-annex directory with the file-logs twice which slows down git operations. Everything else works fine until now. git-annex doesn't have any problem, that only a part of the symlinks are present, which is really great. Is there a possibility to sparse checkout the .git-annex directory also? Perhaps splitting the log files in .git-annex/ into N subfolders, corresponding to the toplevel subfolders, like this? + +Before: + + $ ls .git-annex + 00 01 02.... + +After: + + $ ls .git-annex + documents/ music/ videos/ + $ ls .git-annex/documents + 00 01 02.... + +This would make it possible to checkout only the part of the log files which i'm interested in. diff --git a/doc/forum/sparse_git_checkouts_with_annex/comment_1_c7dc199c5740a0e7ba606dfb5e3e579a._comment b/doc/forum/sparse_git_checkouts_with_annex/comment_1_c7dc199c5740a0e7ba606dfb5e3e579a._comment new file mode 100644 index 0000000000..7adf4fc4d6 --- /dev/null +++ b/doc/forum/sparse_git_checkouts_with_annex/comment_1_c7dc199c5740a0e7ba606dfb5e3e579a._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-04-07T16:32:04Z" + content=""" +That's awesome, I had not heard of git sparse checkouts before. + +It does not make sense to tie the log files to the directory of the corresponding files, as then the logs would have to move when the files are moved, which would be a PITA and likely make merging log file changes very complex. Also, of course, multiple files in different locations can point at the same content, which has the same log file. And, to cap it off, git-annex can need to access the log file for a given key without having the slightest idea what file in the repository might point to it, and it would be very expensive to scan the whole repository to find out what that file is in order to lookup the filename of the log file. + +The most likely change in git-annex that will make this better is in [[this_todo_item|todo/branching]] -- but it's unknown how to do it yet. +"""]] diff --git a/doc/forum/sparse_git_checkouts_with_annex/comment_2_e357db3ccc4079f07a291843975535eb._comment b/doc/forum/sparse_git_checkouts_with_annex/comment_2_e357db3ccc4079f07a291843975535eb._comment new file mode 100644 index 0000000000..d8088a2d82 --- /dev/null +++ b/doc/forum/sparse_git_checkouts_with_annex/comment_2_e357db3ccc4079f07a291843975535eb._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-04-07T16:33:30Z" + content=""" +BTW, git-annex unused *will* have a problem that not all the symlinks are present. It will suggest dropping content belonging to the excluded symlinks. +"""]] diff --git a/doc/forum/sparse_git_checkouts_with_annex/comment_3_fcfafca994194d57dccf5319c7c9e646._comment b/doc/forum/sparse_git_checkouts_with_annex/comment_3_fcfafca994194d57dccf5319c7c9e646._comment new file mode 100644 index 0000000000..1b849ef891 --- /dev/null +++ b/doc/forum/sparse_git_checkouts_with_annex/comment_3_fcfafca994194d57dccf5319c7c9e646._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkptNW1PzrVjYlJWP_9e499uH0mjnBV6GQ" + nickname="Christian" + subject="comment 3" + date="2011-04-08T07:31:03Z" + content=""" +So perhaps checking if git-status (or similar) complains about missing files is a possible solution for this? +"""]] diff --git a/doc/forum/sparse_git_checkouts_with_annex/comment_4_04dc14880f31eee2b6d767d4d4258c5a._comment b/doc/forum/sparse_git_checkouts_with_annex/comment_4_04dc14880f31eee2b6d767d4d4258c5a._comment new file mode 100644 index 0000000000..9280fc51da --- /dev/null +++ b/doc/forum/sparse_git_checkouts_with_annex/comment_4_04dc14880f31eee2b6d767d4d4258c5a._comment @@ -0,0 +1,20 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkptNW1PzrVjYlJWP_9e499uH0mjnBV6GQ" + nickname="Christian" + subject="comment 4" + date="2011-04-08T07:54:37Z" + content=""" +And something else i've done is, that i symlinked the video/ directory from the media annex to the normal raid annex + + ln -s ~/media/annex/video ~/annex + +And it's working out great. + + ~annex $ git annex whereis video/series/episode1.avi + whereis video/series/episode1.avi(1 copy) + f210b45a-60d3-11e0-b593-3318d96f2520 -- Trantor - Media + ok + +I really like this, perhaps it is a good idea to store all log files in every repo, but maybe there is a possibilitiy to to pack multiple log files into one single file, where not only the time, the present bit and the annex-repository is stored, but also the file key. I don't know if this format would also be merged correctly by the union merge driver. + +"""]] diff --git a/doc/forum/syncing_non-git_trees_with_git-annex.mdwn b/doc/forum/syncing_non-git_trees_with_git-annex.mdwn new file mode 100644 index 0000000000..9973782610 --- /dev/null +++ b/doc/forum/syncing_non-git_trees_with_git-annex.mdwn @@ -0,0 +1,46 @@ +I have a bunch of directory trees with large data files scattered over various computers and disk drives - they contain photos, videos, music, and so on. In many cases I initially copied one of these trees from one machine to another just as a cheap and dirty backup, and then made small modifications to both trees in ways I no longer remember. For example, I returned from a trip with a bunch of new photos, and then might have rotated some of them 90 degrees on one machine, and edited or renamed them on another. + +What I want to do now is use git-annex as a way of initially synchronising the trees, and then fully managing them on an ongoing basis. Note that the trees are *not* yet git repositories. In order to be able to detect straight-forward file renames, I believe that [[the SHA1 backend|tips/using_the_SHA1_backend]] probably makes the most sense. + +I've been playing around and arrived at the following setup procedure. For the sake of discussion, I assume that we have two trees `a` and `b` which live in the same directory referred to by `$td`, and that all large files end with the `.avi` suffix. + + # Setup git in 'a'. + cd $td/a + git init + + # Setup git-annex in 'a'. + echo '* annex.backend=SHA1' > .gitattributes + git add .gitattributes + git commit -m'use SHA1 backend' + git annex init + + # Annex all large files. + find -name \*.avi | xargs git annex add + git add . + git commit -m'Initial import' + + # Setup git in 'b'. + cd $td/b + git clone -n $td/a new + mv new/.git . + rmdir new + git reset # reset git index to b's wd - hangover from cloning from 'a' + + # Setup git-annex in 'b'. + # This merges a's (origin's) git-annex branch into the local git-annex branch. + git annex init + + # Annex all large files - because we're using SHA1 backend, some + # should hash to the same keys as in 'a'. + find -name \*.avi | xargs git annex add + git add . + git commit -m'Changes in b tree' + + git remote add a $td/a + + # Now pull changes in 'b' back to 'a'. + cd $td/a + git remote add b $td/b + git pull b master + +This seems to work, but have I missed anything? diff --git a/doc/forum/syncing_non-git_trees_with_git-annex/comment_1_7f9593bdfd95e4a8814e6cc5c44619e6._comment b/doc/forum/syncing_non-git_trees_with_git-annex/comment_1_7f9593bdfd95e4a8814e6cc5c44619e6._comment new file mode 100644 index 0000000000..bdec508792 --- /dev/null +++ b/doc/forum/syncing_non-git_trees_with_git-annex/comment_1_7f9593bdfd95e4a8814e6cc5c44619e6._comment @@ -0,0 +1,24 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-12-14T17:31:31Z" + content=""" +This is an entirely reasonable way to go about it. + +However, doing it this way causes files in B to always \"win\" -- If the same filename is in both repositories, with differing content, the version added in B will superscede the version from A. If A has a file that is not in B, a git commit -a in B will commit a deletion of that file. + +I might do it your way and look at the changes in B before (or even after) committing them to see if files from A were deleted or changed. + +Or, I might just instead keep B in a separate subdirectory in the repository, set up like so: + +
+mv b old_b
+git clone a b
+cd b
+mv ../old_b .
+git annex add old_b --not --exclude '*.avi'
+
+ +Or, a third way would be to commit A to a branch like branchA and B to a separate branchB, and not merge the branches at all. +"""]] diff --git a/doc/forum/syncing_non-git_trees_with_git-annex/comment_2_49f15478781a0ad5e46e75319070335c._comment b/doc/forum/syncing_non-git_trees_with_git-annex/comment_2_49f15478781a0ad5e46e75319070335c._comment new file mode 100644 index 0000000000..94b5c2ec11 --- /dev/null +++ b/doc/forum/syncing_non-git_trees_with_git-annex/comment_2_49f15478781a0ad5e46e75319070335c._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawmKPMUX0YHBjE93eBsEnacwZsddSDue3PY" + nickname="Oliver" + subject="comment 2" + date="2011-12-23T22:04:08Z" + content=""" +As joey points out the problem is B overwrites A, so that any files in A that aren't in B will be removed. But the suggestion to keep B in a separate subdirectory in the repository means I'll end up with duplicates of files in both A and B. What I want is to have the merged superset of all files from both A and B with only one copy of identical files. + +The problem is that unique symlinks in A/master are deleted when B/master is merged in. To add back the deleted files after the merge you can do this: + + git checkout master~1 deleted_file_name #checkout a single deleted file called deleted_file_name + git diff master~1 master --numstat --name-only --diff-filter=D #get the names of all files deleted between master and master~1 + git diff master~1 master --numstat --name-only --diff-filter=D | xargs git checkout master~1 #checkout all deleted files between master and master~1 + +Once the first merge has been done after set up, you can continue to make changes to A and B and future merges won't require accounting for deleted files in this way. +"""]] diff --git a/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs.mdwn b/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs.mdwn new file mode 100644 index 0000000000..8981200d88 --- /dev/null +++ b/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs.mdwn @@ -0,0 +1,22 @@ +This is work in progress, since there is now a [[special_remotes/hook]] for users to plug in whatever they want as a remote, here's my recipe for using tahoe-lafs as a remote, this is a copy and paste the relavent section from my .git/config file + + tahoe-store-hook = tahoe put $ANNEX_FILE tahoe:$ANNEX_KEY + tahoe-retrieve-hook = tahoe get tahoe:$ANNEX_KEY $ANNEX_FILE + tahoe-remove-hook = tahoe rm tahoe:$ANNEX_KEY + tahoe-checkpresent-hook = tahoe ls tahoe:$ANNEX_KEY 2>&1 || echo FAIL + +Where `tahoe:` is a tahoe-lafs alias, ideally you should create a new alias (DIR-CAP or whatever the terminolgy is) to store your files, I just used the default `tahoe:` alias for testing. + +The only quirk I've noticed is this... + +
+$ git annex whereis .
+whereis frink.jar (2 copies) 
+  	084603a8-7243-11e0-b1f5-83102bcd7953  -- here (testtest)
+   	1d1bc312-7243-11e0-a9ce-5f10c0ce9b0a
+ok
+
+ +1d1bc312-7243-11e0-a9ce-5f10c0ce9b0a is my [[!google tahoe-lafs]] remote, but there is no label/description on it. The checkpresent-hook was a little confusing when I was setting it up, I'm currently unsure if I am doing the right thing or not with my hook. My get and put commands are a little verbose for now, i might redirect it to /dev/null once I am happier with the overall performance/behaviour my setup. + +Other than the quirks above, I am able to put and get files from my tahoe-lafs remote. The only thing that I have not figured out is how to "remove a file" on the remote to free up space on the remote. diff --git a/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_1_76bb33ce45ce6a91b86454147463193b._comment b/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_1_76bb33ce45ce6a91b86454147463193b._comment new file mode 100644 index 0000000000..388641f69e --- /dev/null +++ b/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_1_76bb33ce45ce6a91b86454147463193b._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawmBUR4O9mofxVbpb8JV9mEbVfIYv670uJo" + nickname="Justin" + subject="whereis labels" + date="2011-04-29T13:08:35Z" + content=""" +You should be able to fix the missing label by editing .git-annex/uuid.log and adding + + 1d1bc312-7243-11e0-a9ce-5f10c0ce9b0a tahoe +"""]] diff --git a/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_2_4d9b9d47d01d606a475678f630797bf9._comment b/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_2_4d9b9d47d01d606a475678f630797bf9._comment new file mode 100644 index 0000000000..e7c3d619dd --- /dev/null +++ b/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_2_4d9b9d47d01d606a475678f630797bf9._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-04-29T15:24:56Z" + content=""" +If `tahoe ls` outputs only the key, on its own line, and exits nonzero if it's not present, then I think you did the right thing. + +To remove a file, use `git annex move file --from tahoe` and then you can drop it locally. +"""]] diff --git a/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_3_8a812b11fcc2dc3b6fcf01cdbbb8459d._comment b/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_3_8a812b11fcc2dc3b6fcf01cdbbb8459d._comment new file mode 100644 index 0000000000..16ad9e9886 --- /dev/null +++ b/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_3_8a812b11fcc2dc3b6fcf01cdbbb8459d._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 3" + date="2011-04-29T15:33:24Z" + content=""" +@justin, I discovered that \"git annex describe\" did what I wanted + +@joey, yep that is the behaviour of \"tahoe ls\", thanks for the tip on removing the file from the remote. + +It seems to be working okay for now, the only concern is that on the remote everything is dumped into the same directory, but I can live with that, since I want to track biggish blobs and not lots of small little files. +"""]] diff --git a/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_4_fc98c819bc5eb4d7c9e74d87fb4f6f3b._comment b/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_4_fc98c819bc5eb4d7c9e74d87fb4f6f3b._comment new file mode 100644 index 0000000000..5d271c6f3c --- /dev/null +++ b/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_4_fc98c819bc5eb4d7c9e74d87fb4f6f3b._comment @@ -0,0 +1,39 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 4" + date="2011-04-29T16:17:11Z" + content=""" +I've just tried to use the ANNEX_HASH_ variables, example of my configuration + +
+    git config annex.tahoe-store-hook 'tahoe mkdir $ANNEX_HASH_1 && tahoe put $ANNEX_FILE tahoe:$ANNEX_HASH_1/$ANNEX_KEY'
+    git config annex.tahoe-retrieve-hook 'tahoe get tahoe:$ANNEX_HASH_1/$ANNEX_KEY $ANNEX_FILE'
+    git config annex.tahoe-remove-hook 'tahoe rm tahoe:$ANNEX_HASH_1/$ANNEX_KEY'
+    git config annex.tahoe-checkpresent-hook 'tahoe ls tahoe:$ANNEX_HASH_1/$ANNEX_KEY 2>&1 || echo FAIL'
+    git annex initremote library type=hook hooktype=tahoe encryption=none
+    git annex describe 1d1bc312-7243-11e0-a9ce-5f10c0ce9b0a library
+
+ +It's seems to work quite well for me now, I did run across this when I tried to drop a file locally, leaving the file on my remote + +
+jtang@x00:/tmp/annex3 $ git annex drop .
+drop frink.sh (checking library...) (unsafe) 
+  Could only verify the existence of 0 out of 1 necessary copies
+  Try making some of these repositories available:
+  	1d1bc312-7243-11e0-a9ce-5f10c0ce9b0a  -- library
+  (Use --force to override this check, or adjust annex.numcopies.)
+failed
+drop t/frink.jar (checking library...) (unsafe) 
+  Could only verify the existence of 0 out of 1 necessary copies
+  Try making some of these repositories available:
+  	1d1bc312-7243-11e0-a9ce-5f10c0ce9b0a  -- library
+  (Use --force to override this check, or adjust annex.numcopies.)
+failed
+git-annex: 2 failed
+1|jtang@x00:/tmp/annex3 $ 
+
+ +I do know that the files exist in my library as I have just inserted them, it seemed to work when I didnt have the hashing, it appears that the checkpresent doesn't seem to pass the ANNEX_HASH_* variables (from the limited debugging I did) +"""]] diff --git a/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_5_c459fb479fe7b13eaea2377cfc1923a6._comment b/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_5_c459fb479fe7b13eaea2377cfc1923a6._comment new file mode 100644 index 0000000000..9127cdeeaa --- /dev/null +++ b/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_5_c459fb479fe7b13eaea2377cfc1923a6._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 5" + date="2011-04-29T18:01:04Z" + content=""" +I've corrected the missing `ANNEX_HASH_*` oversight. (It also affected removal, btw.) +"""]] diff --git a/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_6_2e9da5a919bbbc27b32de3b243867d4f._comment b/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_6_2e9da5a919bbbc27b32de3b243867d4f._comment new file mode 100644 index 0000000000..80874db31d --- /dev/null +++ b/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_6_2e9da5a919bbbc27b32de3b243867d4f._comment @@ -0,0 +1,23 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 6" + date="2011-04-29T20:11:08Z" + content=""" +Cool, that seems to make things work as expected, here's an updated recipe + + +
+git config annex.tahoe-store-hook 'tahoe mkdir tahoe:$ANNEX_HASH_1/$ANNEX_HASH_2 && tahoe put $ANNEX_FILE tahoe:$ANNEX_HASH_1/$ANNEX_HASH_2/$ANNEX_KEY'
+git config annex.tahoe-retrieve-hook 'tahoe get tahoe:$ANNEX_HASH_1/$ANNEX_HASH_2/$ANNEX_KEY $ANNEX_FILE'
+git config annex.tahoe-remove-hook 'tahoe rm tahoe:$ANNEX_HASH_1/$ANNEX_HASH_2/$ANNEX_KEY'
+git config annex.tahoe-checkpresent-hook 'tahoe ls tahoe:$ANNEX_HASH_1/$ANNEX_HASH_2/$ANNEX_KEY 2>&1 || echo FAIL'
+git annex initremote library type=hook hooktype=tahoe encryption=none
+git annex describe 1d1bc312-7243-11e0-a9ce-5f10c0ce9b0a library
+
+ + +I just needs some of the output redirected to /dev/null. + +(I updated this comment to fix a bug. --[[Joey]]) +"""]] diff --git a/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_7_d636c868524b2055ee85832527437f90._comment b/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_7_d636c868524b2055ee85832527437f90._comment new file mode 100644 index 0000000000..1d75fb9631 --- /dev/null +++ b/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_7_d636c868524b2055ee85832527437f90._comment @@ -0,0 +1,20 @@ +[[!comment format=mdwn + username="zooko" + ip="97.118.97.117" + subject="request for information, plus some ideas" + date="2011-05-14T05:07:17Z" + content=""" +Hey Jimmy: how's this working for you now? I would expect it to go slower and slower since Tahoe-LAFS has an O(N) algorithm for reading or updating directories. + +Of course, if it is still fast enough for your uses then that's okay. :-) + +(We're working on optimizations of this for future releases of Tahoe-LAFS.) + +I'd like to understand the desired behavior of store-hook and retrieve-hook better, in order to see if there is a more efficient way to use Tahoe-LAFS for this. + +Off to look for docs. + +Regards, + +Zooko +"""]] diff --git a/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_8_39dc449cc60a787c3bfbfaaac6f9be0c._comment b/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_8_39dc449cc60a787c3bfbfaaac6f9be0c._comment new file mode 100644 index 0000000000..dc97128bd1 --- /dev/null +++ b/doc/forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs/comment_8_39dc449cc60a787c3bfbfaaac6f9be0c._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 8" + date="2011-05-14T10:02:26Z" + content=""" +@joey thanks for the update in the previous comment, I had forgotten about updating it. + +@zooko it's working okay for me right now, since I'm only putting fairly big blogs on stuff on to it and only things that I *really* care about. On the performance side, if it ran faster then it would be nicer :) +"""]] diff --git a/doc/forum/unannex_alternatives.mdwn b/doc/forum/unannex_alternatives.mdwn new file mode 100644 index 0000000000..efd05838e6 --- /dev/null +++ b/doc/forum/unannex_alternatives.mdwn @@ -0,0 +1,9 @@ +what is the work flow to get a file that is in git-annex out of there and into git? (current situation: `git-annex add`ed a bunch of pictures, later found make files in there which i'd rather have in git for proper source code control) + +the most intuitive thing to do is `git unannex`, which at first seemed to do the right thing, but when committing there came the hook and everything was back to where it was before. + +i could disable the hook as a workaround, but that doesn't smell like a good work flow. + +the [[man page|git-annex]] does warn that `unannex` is only supposed to be used against unintentional `git annex add`s (probably meaning that it should be used before something is committed), but the alternatives it suggests (`git rm` and `git annex drop`) don't to what i want to do. + +am i missing something or is there really no work flow for this? --[[chrysn]] diff --git a/doc/forum/unannex_alternatives/comment_1_dcd4cd41280b41512bbdffafaf307993._comment b/doc/forum/unannex_alternatives/comment_1_dcd4cd41280b41512bbdffafaf307993._comment new file mode 100644 index 0000000000..7f278d2bc9 --- /dev/null +++ b/doc/forum/unannex_alternatives/comment_1_dcd4cd41280b41512bbdffafaf307993._comment @@ -0,0 +1,46 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-02-02T00:39:10Z" + content=""" +Git-annex's commit hook does not prevent unannex being used. The file you unannex will not be checked into git anymore and will be a regular file again, not a git-annex symlink. + +For example, here's a transcript: + +
+joey@gnu:~/tmp>mkdir demo
+joey@gnu:~/tmp>cd demo
+joey@gnu:~/tmp/demo>git init
+Initialized empty Git repository in /home/joey/tmp/demo/.git/
+joey@gnu:~/tmp/demo>git annex init demo
+init demo ok
+joey@gnu:~/tmp/demo>echo hi > file
+joey@gnu:~/tmp/demo>git annex add file 
+add file ok
+(Recording state in git...)
+joey@gnu:~/tmp/demo>git commit -m add
+[master 64cf267] add
+ 2 files changed, 2 insertions(+), 0 deletions(-)
+ create mode 100644 .git-annex/WORM:1296607093:3:file.log
+ create mode 120000 file
+joey@gnu:~/tmp/demo>git annex unannex file
+unannex file ok
+(Recording state in git...)
+joey@gnu:~/tmp/demo>ls -l file
+-rw-r--r-- 1 joey joey 3 Feb  1 20:38 file
+joey@gnu:~/tmp/demo>git commit
+[master 78a09cc] unannex
+ 2 files changed, 1 insertions(+), 2 deletions(-)
+ delete mode 120000 file
+joey@gnu:~/tmp/demo>ls -l file
+-rw-r--r-- 1 joey joey 3 Feb  1 20:38 file
+joey@gnu:~/tmp/demo>git status
+# On branch master
+# Untracked files:
+#   (use \"git add ...\" to include in what will be committed)
+#
+#	file
+nothing added to commit but untracked files present (use \"git add\" to track)
+
+"""]] diff --git a/doc/forum/unannex_alternatives/comment_2_58a72a9fe0f58c7af0b4d7927a2dd21d._comment b/doc/forum/unannex_alternatives/comment_2_58a72a9fe0f58c7af0b4d7927a2dd21d._comment new file mode 100644 index 0000000000..91ddadf8c6 --- /dev/null +++ b/doc/forum/unannex_alternatives/comment_2_58a72a9fe0f58c7af0b4d7927a2dd21d._comment @@ -0,0 +1,36 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-02-02T00:41:24Z" + content=""" +And following on to my transcript, you can then add the file to git in the regular git way, and it works fine: + +
+joey@gnu:~/tmp/demo>git add file
+joey@gnu:~/tmp/demo>git commit
+[master 225ffc0] added as regular git file, not in annex
+ 1 files changed, 1 insertions(+), 0 deletions(-)
+ create mode 100644 file
+joey@gnu:~/tmp/demo>ls -l file
+-rw-r--r-- 1 joey joey 3 Feb  1 20:38 file
+joey@gnu:~/tmp/demo>git log file
+commit 225ffc048f5af7c0466b3b1fe549a6d5e9a9e9fe
+Author: Joey Hess 
+Date:   Tue Feb 1 20:43:13 2011 -0400
+
+    added as regular git file, not in annex
+
+commit 78a09cc791b875c3b859ca9401e5b6472bf19d08
+Author: Joey Hess 
+Date:   Tue Feb 1 20:38:30 2011 -0400
+
+    unannex
+
+commit 64cf267734adae05c020d9fd4d5a7ff7c64390db
+Author: Joey Hess 
+Date:   Tue Feb 1 20:38:18 2011 -0400
+
+    add
+
+"""]] diff --git a/doc/forum/unannex_alternatives/comment_3_b1687fc8f9e7744327bbeb6f0635d1cd._comment b/doc/forum/unannex_alternatives/comment_3_b1687fc8f9e7744327bbeb6f0635d1cd._comment new file mode 100644 index 0000000000..9f3223578b --- /dev/null +++ b/doc/forum/unannex_alternatives/comment_3_b1687fc8f9e7744327bbeb6f0635d1cd._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-02-02T00:46:00Z" + content=""" +Sorry for all the followups, but I see now that if you unannex, then add the file to git normally, and commit, the hook *does* misbehave. + +This seems to be a bug. git-annex's hook thinks that you have used git annex unlock (or \"git annex edit\") on the file and are now committing a changed version, and the right thing to do there is to add the new content to the annex and update the symlink accordingly. I'll track this bug over at [[bugs/unannex_vs_unlock_hook_confusion]]. + +So, committing after unannex, and before checking the file into git in the +usual way, is a workaround. But only if you do a "git commit" to commit +staged changes. + +Anyway, this confusing point is fixed in git now! +"""]] diff --git a/doc/forum/using_git_annex_to_merge_and_synchronize_2_directories___40__like_unison__41__.mdwn b/doc/forum/using_git_annex_to_merge_and_synchronize_2_directories___40__like_unison__41__.mdwn new file mode 100644 index 0000000000..86e317da87 --- /dev/null +++ b/doc/forum/using_git_annex_to_merge_and_synchronize_2_directories___40__like_unison__41__.mdwn @@ -0,0 +1,7 @@ +I would like to use git-annex to synchronize 2 directories in the same manner as unison. + +I'm starting with 2 directories. There is an overlap of the same set of files in each directory, but each directory also has additional files as well. + +I create a git annex in each directory but when I do a git pull it merges and produces conflicts on those files that are the same. + +What is the correct workflow for this type of scenario? diff --git a/doc/forum/using_git_annex_to_merge_and_synchronize_2_directories___40__like_unison__41__/comment_1_5c3ee8a8aaa6d0918c0cc9683ce177ae._comment b/doc/forum/using_git_annex_to_merge_and_synchronize_2_directories___40__like_unison__41__/comment_1_5c3ee8a8aaa6d0918c0cc9683ce177ae._comment new file mode 100644 index 0000000000..4682ea64f7 --- /dev/null +++ b/doc/forum/using_git_annex_to_merge_and_synchronize_2_directories___40__like_unison__41__/comment_1_5c3ee8a8aaa6d0918c0cc9683ce177ae._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://www.joachim-breitner.de/" + nickname="nomeata" + subject="comment 1" + date="2011-12-18T13:57:33Z" + content=""" +Are the files identical or different? I today did something like that with similar, but not identical directories containing media files, and git happily merged them. but there, same files had same content. + +Also, make sure you use the same backend. In my case, one of the machines runs Debian stable, so I use the WORM backend, not the SHA backend. +"""]] diff --git a/doc/forum/using_git_annex_to_merge_and_synchronize_2_directories___40__like_unison__41__/comment_2_648946353c6d90c57351cce4010f1301._comment b/doc/forum/using_git_annex_to_merge_and_synchronize_2_directories___40__like_unison__41__/comment_2_648946353c6d90c57351cce4010f1301._comment new file mode 100644 index 0000000000..bdd4b25e43 --- /dev/null +++ b/doc/forum/using_git_annex_to_merge_and_synchronize_2_directories___40__like_unison__41__/comment_2_648946353c6d90c57351cce4010f1301._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-12-19T18:24:59Z" + content=""" +I'd recommend using the SHA backend for this, the WORM backend would produce conflicts if the files' modification times changed. + +[[syncing_non-git_trees_with_git-annex]] describes one way to do it. +"""]] diff --git a/doc/forum/version_3_upgrade.mdwn b/doc/forum/version_3_upgrade.mdwn new file mode 100644 index 0000000000..7fdbcbc805 --- /dev/null +++ b/doc/forum/version_3_upgrade.mdwn @@ -0,0 +1,9 @@ +after upgrading to git-annex 3, i'm stuck with diverging git-annex branches -- i didn't manage to follow this line in the directions: + +> After this upgrade, you should make sure you include the git-annex branch when git pushing and pulling. + +could you explain how to do that in a littel more detail? git pull seems to only merge master, although i have these ``.git/config`` settings: + + [branch "git-annex"] + remote = origin + merge = git-annex diff --git a/doc/forum/version_3_upgrade/comment_1_05fc9c9cad26c520bebb98c852c71e35._comment b/doc/forum/version_3_upgrade/comment_1_05fc9c9cad26c520bebb98c852c71e35._comment new file mode 100644 index 0000000000..18746225e9 --- /dev/null +++ b/doc/forum/version_3_upgrade/comment_1_05fc9c9cad26c520bebb98c852c71e35._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-08-17T01:33:08Z" + content=""" +It's ok that `git pull` does not merge the git-annex branch. You can merge it with `git annex merge`, or it will be done +automatically when you use other git-annex commands. + +If you use `git pull` and `git push` without any options, the defaults will make git pull and push the git-annex branch automatically. + +But if you're in the habit of doing `git push origin master`, that won't cause the git-annex branch to be pushed (use `git push origin git-annex` to manually push it then). Similarly, `git pull origin master` won't pull it. And also, the `remote.origin.fetch` setting in `.git/config` can be modified in ways that make `git pull` not automatically pull the git-annex branch. So those are the things to avoid after upgrade to v3, basically. +"""]] diff --git a/doc/forum/vlc_and_git-annex.mdwn b/doc/forum/vlc_and_git-annex.mdwn new file mode 100644 index 0000000000..cb07f8183c --- /dev/null +++ b/doc/forum/vlc_and_git-annex.mdwn @@ -0,0 +1,11 @@ +I used to save movies with the srt subtitle files next to them. + +Usually vlc finds it because it's on the same directory than the movie file, however with git annex the link is located on another folder. +So after adding movies to git, the subtitles doesn't load anymore. + +couldn't find a quick fix. I'm thinking a bash script, but wanted to discuss it here with all annex users. + +I know It's out of annex scope, but I think a movie archive is a great scenario for git-annex. +most of my HD is filled up with movies from the camcorder, screencast, etc... +And we usually don't modify those files + diff --git a/doc/forum/vlc_and_git-annex/comment_1_9c9ab8ce463cf74418aa2f385955f165._comment b/doc/forum/vlc_and_git-annex/comment_1_9c9ab8ce463cf74418aa2f385955f165._comment new file mode 100644 index 0000000000..700b3808de --- /dev/null +++ b/doc/forum/vlc_and_git-annex/comment_1_9c9ab8ce463cf74418aa2f385955f165._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-12-23T16:16:19Z" + content=""" +From what you say, it seems that vlc is following the symlink to the movie content, and then looking for subtitles next to the file the symlink points to. It would have to explicitly realpath the symlink to have this behavior, and this sounds like a misfeature.. perhaps you could point out to the vlc people the mistake in doing so? + +There's a simple use-case where this behavior is obviously wrong, without involving git-annex. Suppose I have a movie, and one version of subtitles for it, in directory `foo`. I want to modify the subtitles, so I make a new directory `bar`, symlink the large movie file from `foo` to save space, and copy over and edit the subtitles from `foo`. Now I run vlc in `bar` to test my new subtitles. If it ignores the locally present subtitles and goes off looking for the ones in `bar`, I say this is broken behavior. +"""]] diff --git a/doc/forum/vlc_and_git-annex/comment_2_037f94c1deeac873dbdb36cd4c927e45._comment b/doc/forum/vlc_and_git-annex/comment_2_037f94c1deeac873dbdb36cd4c927e45._comment new file mode 100644 index 0000000000..3c69f5fe46 --- /dev/null +++ b/doc/forum/vlc_and_git-annex/comment_2_037f94c1deeac873dbdb36cd4c927e45._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-12-23T18:43:05Z" + content=""" +Since subtitle files are typically pretty small, a workaround is to simply check them into git directly, and only use git-annex for the movies. (Or `git annex unannex` the ones you've already annexed.) +"""]] diff --git a/doc/forum/wishlist:_command_options_changes.mdwn b/doc/forum/wishlist:_command_options_changes.mdwn new file mode 100644 index 0000000000..4509f815d7 --- /dev/null +++ b/doc/forum/wishlist:_command_options_changes.mdwn @@ -0,0 +1,16 @@ +Some suggestions for changes to command options: + + * --verbose: + * add alternate: -v + + * --from: + * replace with: -s $SOURCE || --source=$SOURCE + + * --to: + * replace with: -d $DESTINATION || --destination=$DESTINATION + + * --force: + * add alternate: -F + * "-f" was removed in v0.20110417 + * since it forces unsafe operations, should be capitalized to reduce chance of accidental usage. + diff --git a/doc/forum/wishlist:_command_options_changes/comment_1_bfba72a696789bf21b2435dea15f967a._comment b/doc/forum/wishlist:_command_options_changes/comment_1_bfba72a696789bf21b2435dea15f967a._comment new file mode 100644 index 0000000000..0ab113211e --- /dev/null +++ b/doc/forum/wishlist:_command_options_changes/comment_1_bfba72a696789bf21b2435dea15f967a._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 1" + date="2011-04-17T23:46:37Z" + content=""" +--to and --from seem to have different semantics than --source and --destination. Subtle, but still different. + +That being said, I am not sure --from and --to are needed at all. Calling the local repo . and all remotes by their name, they are arguably redundant and removing them would make the syntax a lot prettier; mv and cp don't need them, either. + +I am not sure changing syntax at this point is considered good style though personally, I wouldn't mind adapting and would actually prefer it over using --to and --from. + +-v and -q would be nice. + + +Richard +"""]] diff --git a/doc/forum/wishlist:_command_options_changes/comment_2_f6a637c78c989382e3c22d41b7fb4cc2._comment b/doc/forum/wishlist:_command_options_changes/comment_2_f6a637c78c989382e3c22d41b7fb4cc2._comment new file mode 100644 index 0000000000..0072ae1d71 --- /dev/null +++ b/doc/forum/wishlist:_command_options_changes/comment_2_f6a637c78c989382e3c22d41b7fb4cc2._comment @@ -0,0 +1,19 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-04-19T20:13:10Z" + content=""" +Let's see.. + +* -v is already an alias for --verbose + +* I don't find --source and --destination as easy to type or as clear as --from or --to. + +* -F is fast, so it cannot be used for --force. And I have no desire to make it easy to mistype a short option and enable --force; it can lose data. + +@richard while it would be possible to support some syntax like \"git annex copy . remote\"; what is it supposed to do if there are local files named foo and bar, and a remotes named foo and bar? Does \"git annex copy foo bar\" copy file foo to remote bar, or file bar from remote foo? I chose to use --from/--to to specify remotes independant of files to avoid such +ambiguity, which plain old `cp` doesn't have since it's operating entirely on filesystem objects, not both filesystem objects and abstract remotes. + +Seems like nothing to do here. [[done]] --[[Joey]] +"""]] diff --git a/doc/forum/wishlist:_command_options_changes/comment_3_bf1114533d2895804e531e76eb6b8095._comment b/doc/forum/wishlist:_command_options_changes/comment_3_bf1114533d2895804e531e76eb6b8095._comment new file mode 100644 index 0000000000..9fcbae6d20 --- /dev/null +++ b/doc/forum/wishlist:_command_options_changes/comment_3_bf1114533d2895804e531e76eb6b8095._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 3" + date="2011-04-20T21:28:06Z" + content=""" +Good point. scp fixes this by using a colon, but as colons aren't needed in git-annex remotes' names... -- RichiH +"""]] diff --git a/doc/forum/wishlist:_define_remotes_that_must_have_all_files.mdwn b/doc/forum/wishlist:_define_remotes_that_must_have_all_files.mdwn new file mode 100644 index 0000000000..156cfb0090 --- /dev/null +++ b/doc/forum/wishlist:_define_remotes_that_must_have_all_files.mdwn @@ -0,0 +1,18 @@ +I would like to be able to name a few remotes that must retain *all* annexed +files. `git-annex fsck` should warn me if any files are missing from those +remotes, even if `annex.numcopies` has been satisfied by other remotes. + +I imagine this could also be useful for bup remotes, but I haven't actually +looked at those yet. + +Based on existing output, this is what a warning message could look like: + + fsck FILE + 3 of 3 trustworthy copies of FILE exist. + FILE is, however, still missing from these required remotes: + UUID -- Backup Drive 1 + UUID -- Backup Drive 2 + Back it up with git-annex copy. + Warning + +What do you think? diff --git a/doc/forum/wishlist:_define_remotes_that_must_have_all_files/comment_1_cceccc1a1730ac688d712b81a44e31c3._comment b/doc/forum/wishlist:_define_remotes_that_must_have_all_files/comment_1_cceccc1a1730ac688d712b81a44e31c3._comment new file mode 100644 index 0000000000..1f65fd982f --- /dev/null +++ b/doc/forum/wishlist:_define_remotes_that_must_have_all_files/comment_1_cceccc1a1730ac688d712b81a44e31c3._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-04-23T16:27:13Z" + content=""" +Seems to have a scalability problem, what happens when such a repository becomes full? + +Another way to accomplish I think the same thing is to pick the repositories that you would include in such a set, and make all other repositories untrusted. And set numcopies as desired. Then git-annex will never remove files from the set of non-untrusted repositories, and fsck will warn if a file is present on only an untrusted repository. +"""]] diff --git a/doc/forum/wishlist:_define_remotes_that_must_have_all_files/comment_2_eec848fcf3979c03cbff2b7407c75a7a._comment b/doc/forum/wishlist:_define_remotes_that_must_have_all_files/comment_2_eec848fcf3979c03cbff2b7407c75a7a._comment new file mode 100644 index 0000000000..1855cdda01 --- /dev/null +++ b/doc/forum/wishlist:_define_remotes_that_must_have_all_files/comment_2_eec848fcf3979c03cbff2b7407c75a7a._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="gernot" + ip="87.79.209.169" + subject="comment 2" + date="2011-04-24T11:20:05Z" + content=""" +Right, I have thought about untrusting all but a few remotes to achieve +something similar before and I'm sure it would kind of work. It would be more +of an ugly workaround, however, because I would have to untrust remotes that +are, in reality, at least semi-trusted. That's why an extra option/attribute +for that kind of purpose/remote would be nice. + +Obviously I didn't see the scalability problem though. Good Point. Maybe I can +achieve the same thing by writing a log parsing script for myself? + +"""]] diff --git a/doc/forum/wishlist:_do_round_robin_downloading_of_data.mdwn b/doc/forum/wishlist:_do_round_robin_downloading_of_data.mdwn new file mode 100644 index 0000000000..6299899e4f --- /dev/null +++ b/doc/forum/wishlist:_do_round_robin_downloading_of_data.mdwn @@ -0,0 +1,5 @@ +Given that git/config will have information on remotes and maybe costs, it might be a good idea to do a simple round robin selection of remotes to download files where the costs are the same. + +This of course assumes that we like the idea of "parallel" launching and running of curl/rsync processes... + +This wish item is probably only useful for the paranoid people who store more than 1 copy of their data. diff --git a/doc/forum/wishlist:_do_round_robin_downloading_of_data/comment_1_460335b0e59ad03871c524f1fe812357._comment b/doc/forum/wishlist:_do_round_robin_downloading_of_data/comment_1_460335b0e59ad03871c524f1fe812357._comment new file mode 100644 index 0000000000..6a5fd3d530 --- /dev/null +++ b/doc/forum/wishlist:_do_round_robin_downloading_of_data/comment_1_460335b0e59ad03871c524f1fe812357._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-04-03T16:39:35Z" + content=""" +I dunno about parrallel downloads -- eek! -- but there is at least room for improvement of what \"git annex get\" does when there are multiple remotes that have a file, and the one it decides to use is not available, or very slow, or whatever. +"""]] diff --git a/doc/forum/wishlist:_git-annex_replicate.mdwn b/doc/forum/wishlist:_git-annex_replicate.mdwn new file mode 100644 index 0000000000..0d926b3375 --- /dev/null +++ b/doc/forum/wishlist:_git-annex_replicate.mdwn @@ -0,0 +1,12 @@ +I'd like to be able to do something like the following: + + * Create encrypted git-annex remotes on a couple of semi-trusted machines - ones that have good connectivity, but non-redundant hardware + * set numcopies=3 + * run `git-annex replicate` and have git-annex run the appropriate copy commands to make sure every file is on at least 3 machines + +There would also likely be a `git annex rebalance` command which could be used if remotes were added or removed. If possible, it should copy files between servers directly, rather than proxy through a potentially slow client. + +There might be the need to have a 'replication_priority' option for each remote that configures which machines would be preferred. That way you could set your local server to a high priority to ensure that it is always 1 of the 3 machines used and files are distributed across 2 of the remaining remotes. Other than priority, other options that might help: + + * maxspace - A self imposed quota per remote machine. git-annex replicate should try to replicate files first to machines with more free space. maxspace would change the free space calculation to be `min(actual_free_space, maxspace - space_used_by_git_annex) + * bandwidth - when replication files, copies should be done between machines with the highest available bandwidth. ( I think this option could be useful for git-annex get in general) diff --git a/doc/forum/wishlist:_git-annex_replicate/comment_1_9926132ec6052760cdf28518a24e2358._comment b/doc/forum/wishlist:_git-annex_replicate/comment_1_9926132ec6052760cdf28518a24e2358._comment new file mode 100644 index 0000000000..cec971ee3b --- /dev/null +++ b/doc/forum/wishlist:_git-annex_replicate/comment_1_9926132ec6052760cdf28518a24e2358._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 1" + date="2011-04-22T18:27:00Z" + content=""" +While having remotes redistribute introduces some obvious security concerns, I might use it. + +As remotes support a cost factor already, you can basically implement bandwidth through that. +"""]] diff --git a/doc/forum/wishlist:_git-annex_replicate/comment_2_c43932f4194aba8fb2470b18e0817599._comment b/doc/forum/wishlist:_git-annex_replicate/comment_2_c43932f4194aba8fb2470b18e0817599._comment new file mode 100644 index 0000000000..9d50d15310 --- /dev/null +++ b/doc/forum/wishlist:_git-annex_replicate/comment_2_c43932f4194aba8fb2470b18e0817599._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-04-23T16:22:07Z" + content=""" +Besides the cost values, annex.diskreserve was recently added. (But is not available for special remotes.) + +I have held off on adding high-level management stuff like this to git-annex, as it's hard to make it generic enough to cover use cases. + +A low-level way to accomplish this would be to have a way for `git annex get` and/or `copy` to skip files when `numcopies` is already satisfied. Then cron jobs could be used. +"""]] diff --git a/doc/forum/wishlist:_git-annex_replicate/comment_3_c13f4f9c3d5884fc6255fd04feadc2b1._comment b/doc/forum/wishlist:_git-annex_replicate/comment_3_c13f4f9c3d5884fc6255fd04feadc2b1._comment new file mode 100644 index 0000000000..e7eb06b3b1 --- /dev/null +++ b/doc/forum/wishlist:_git-annex_replicate/comment_3_c13f4f9c3d5884fc6255fd04feadc2b1._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawmBUR4O9mofxVbpb8JV9mEbVfIYv670uJo" + nickname="Justin" + subject="comment 3" + date="2011-04-23T17:54:42Z" + content=""" +Hmm, so it seems there is almost a way to do this already. + +I think the one thing that isn't currently possible is to have 'plain' ssh remotes.. basically something just like the directory remote, but able to take a ssh user@host/path url. something like sshfs could be used to fake this, but for things like fsck you would want to do the sha1 calculations on the remote host. +"""]] diff --git a/doc/forum/wishlist:_git-annex_replicate/comment_4_63f24abf086d644dced8b01e1a9948c9._comment b/doc/forum/wishlist:_git-annex_replicate/comment_4_63f24abf086d644dced8b01e1a9948c9._comment new file mode 100644 index 0000000000..3805464a69 --- /dev/null +++ b/doc/forum/wishlist:_git-annex_replicate/comment_4_63f24abf086d644dced8b01e1a9948c9._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 4" + date="2011-09-19T18:54:46Z" + content=""" +git annex get/copy/drop all now support a --auto flag, which makes them only act on files that have not enough or too many copies. This allows for some crude replication; it doesn't take into account which repositories should be filled up more (beyond honoring annex.diskreserve), nor does it try to optimally use bandwidth (beyond honoring configured annex-cost). You have to run it in every repository that you want to participate in the replication, too. But it's probably a Good Enough solution. See [[walkthrough/automatically_managing_content]]. +"""]] diff --git a/doc/forum/wishlist:_git_annex_put_--_same_as_get__44___but_for_defaults.mdwn b/doc/forum/wishlist:_git_annex_put_--_same_as_get__44___but_for_defaults.mdwn new file mode 100644 index 0000000000..9cd56749e8 --- /dev/null +++ b/doc/forum/wishlist:_git_annex_put_--_same_as_get__44___but_for_defaults.mdwn @@ -0,0 +1,17 @@ +I am running centralized git-annex exclusively. + +Similar to + + git annex get + +I'd like to have a + + git annex put + +which would put all files on the default remote(s). + +My main reason for not wanting to use copy --to is that I need to specify the remote's name in this case which makes writing a wrapper unnecessarily hard. Also, this would allow + + mr push + +to do the right thing all by itself. diff --git a/doc/forum/wishlist:_git_annex_put_--_same_as_get__44___but_for_defaults/comment_1_d5413c8acce308505e4e2bec82fb1261._comment b/doc/forum/wishlist:_git_annex_put_--_same_as_get__44___but_for_defaults/comment_1_d5413c8acce308505e4e2bec82fb1261._comment new file mode 100644 index 0000000000..fe1d5520f4 --- /dev/null +++ b/doc/forum/wishlist:_git_annex_put_--_same_as_get__44___but_for_defaults/comment_1_d5413c8acce308505e4e2bec82fb1261._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-04-04T18:13:46Z" + content=""" +This begs the question: What is the default remote? It's probably *not* the same repository that git's master branch is tracking (ie, origin/master). It seems there would have to be an annex.defaultremote setting. + +BTW, mr can easily be configured on a per-repo basis so that \"mr push\" copies to somewhere: `push = git push; git annex push wherever` +"""]] diff --git a/doc/forum/wishlist:_git_annex_put_--_same_as_get__44___but_for_defaults/comment_2_0aa227c85d34dfff4e94febca44abea8._comment b/doc/forum/wishlist:_git_annex_put_--_same_as_get__44___but_for_defaults/comment_2_0aa227c85d34dfff4e94febca44abea8._comment new file mode 100644 index 0000000000..3090b575b7 --- /dev/null +++ b/doc/forum/wishlist:_git_annex_put_--_same_as_get__44___but_for_defaults/comment_2_0aa227c85d34dfff4e94febca44abea8._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 2" + date="2011-04-04T20:45:30Z" + content=""" +In my case, the remotes are the same, but adding a new option could make sense. + +And while I can tell mr what to do explicitly, I would prefer if it did the right thing all by itself. Having to change configs in two separate places is less than ideal. + +I am not sure what you mean by `git annex push` as that does not exist. Did you mean copy? +"""]] diff --git a/doc/forum/wishlist:_git_annex_put_--_same_as_get__44___but_for_defaults/comment_3_2082f4d708a584a1403cc1d4d005fb56._comment b/doc/forum/wishlist:_git_annex_put_--_same_as_get__44___but_for_defaults/comment_3_2082f4d708a584a1403cc1d4d005fb56._comment new file mode 100644 index 0000000000..01dc7813ff --- /dev/null +++ b/doc/forum/wishlist:_git_annex_put_--_same_as_get__44___but_for_defaults/comment_3_2082f4d708a584a1403cc1d4d005fb56._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 1" + date="2011-04-04T10:28:01Z" + content=""" +Going one step further, a --min-copy could put all files so that numcopies is satisfied. --all could push to all available ones. + +To take everything another step further, if it was possible to group remotes, one could act on the groups. \"all\" would be an obvious choice for a group that always exists, everything else would be set up by the user. +"""]] diff --git a/doc/forum/wishlist:_git_annex_status.mdwn b/doc/forum/wishlist:_git_annex_status.mdwn new file mode 100644 index 0000000000..add865410e --- /dev/null +++ b/doc/forum/wishlist:_git_annex_status.mdwn @@ -0,0 +1,19 @@ +Ideally, it would look similar to this. And yes, I put "put" in there ;) + + non-annex % git annex status + git annex status: error: not a git annex repository + annex % git annex status + annex object storage version: A + annex backend engine: {WORM,SHA512,...} + Estimated local annex size: B MiB + Estimated total annex size: C MiB + Files without file size information in local annex: D + Files without file size information in total annex: E + Last fsck: datetime + Last git pull: datetime - $annex_name + Last git push: datetime - $annex_name + Last git annex get: datetime - $annex_name + Last git annex put: datetime - $annex_name + annex % + +Datetime could be ISO's YYYY-MM-DDThh:mm:ss or, personal preference, YYYY-MM-DD--hh-mm-ss. I prefer the latter as it's DNS-, tag- and filename-safe which is why I am using it for everything. In a perfect world, ISO would standardize YYYY-MM-DD-T-hh-mm-ss-Z[-SSSSSSSS][--$timezone], but meh. diff --git a/doc/forum/wishlist:_git_annex_status/comment_1_994bfd12c5d82e08040d6116915c5090._comment b/doc/forum/wishlist:_git_annex_status/comment_1_994bfd12c5d82e08040d6116915c5090._comment new file mode 100644 index 0000000000..7b5e7bd449 --- /dev/null +++ b/doc/forum/wishlist:_git_annex_status/comment_1_994bfd12c5d82e08040d6116915c5090._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 1" + date="2011-04-08T07:23:08Z" + content=""" ++1 for this feature, I've been longing for something like this other than rolling my own perl/shell scripts to parse the outputs of \"git annex whereis .\" to see how many files are on my machine or not. +"""]] diff --git a/doc/forum/wishlist:_git_annex_status/comment_2_c2b0ce025805b774dc77ce264a222824._comment b/doc/forum/wishlist:_git_annex_status/comment_2_c2b0ce025805b774dc77ce264a222824._comment new file mode 100644 index 0000000000..21f9d713cf --- /dev/null +++ b/doc/forum/wishlist:_git_annex_status/comment_2_c2b0ce025805b774dc77ce264a222824._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="http://christian.amsuess.com/chrysn" + nickname="chrysn" + subject="format, respect working directory" + date="2011-04-26T12:31:02Z" + content=""" +we could include the information about the current directory as well, if the command is not issued in the local git root directory. to avoid large numbers of similar lines, that could look like this: + + Estimated annex size: B MiB (of C MiB; [B/C]%) + Estimated annex size in $PWD: B' MiB (of C' MiB; [B'/C']%) + +with the percentages being replaced with \"complete\" if really all files are present (and not just many enough for the value to be rounded to 100%). +"""]] diff --git a/doc/forum/wishlist:_git_annex_status/comment_3_d1fd70c67243971c96d59e1ffb7ef6e7._comment b/doc/forum/wishlist:_git_annex_status/comment_3_d1fd70c67243971c96d59e1ffb7ef6e7._comment new file mode 100644 index 0000000000..39986144be --- /dev/null +++ b/doc/forum/wishlist:_git_annex_status/comment_3_d1fd70c67243971c96d59e1ffb7ef6e7._comment @@ -0,0 +1,23 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-05-17T01:15:10Z" + content=""" +What a good idea! + +150 lines of haskell later, I have this: + +
+# git annex status
+supported backends: WORM SHA1 SHA256 SHA512 SHA224 SHA384 SHA1E SHA256E SHA512E SHA224E SHA384E URL
+supported remote types: git S3 bup directory rsync hook
+local annex keys: 32
+local annex size: 58 megabytes
+total annex keys: 38158
+total annex size: 6 terabytes (but 1632 keys have unknown size)
+backend usage: 
+	SHA1: 1789
+	WORM: 36369
+
+"""]] diff --git a/doc/forum/wishlist:_git_annex_status/comment_4_9aeeb83d202dc8fb33ff364b0705ad94._comment b/doc/forum/wishlist:_git_annex_status/comment_4_9aeeb83d202dc8fb33ff364b0705ad94._comment new file mode 100644 index 0000000000..f006f88a0a --- /dev/null +++ b/doc/forum/wishlist:_git_annex_status/comment_4_9aeeb83d202dc8fb33ff364b0705ad94._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://christian.amsuess.com/chrysn" + nickname="chrysn" + subject="status of other remotes?" + date="2011-06-15T08:39:24Z" + content=""" +using the location tracking information, it should be possible to show the status of other remotes as well. what about supporting `--from=...` or `--all`? (thus, among other things, one could determine if a remote has a complete checkout.) +"""]] diff --git a/doc/forum/wishlist:_git_backend_for_git-annex.mdwn b/doc/forum/wishlist:_git_backend_for_git-annex.mdwn new file mode 100644 index 0000000000..63ae83097e --- /dev/null +++ b/doc/forum/wishlist:_git_backend_for_git-annex.mdwn @@ -0,0 +1,7 @@ +Preamble: Obviously, the core feature of git-annex is the ability to keep a subset of files in a local repo. The main trade-off is that you don't get version tracking. + +Use case: On my laptop, I might not have enough disk space to store everything. Not so for my main box nor my backup server. And I would _really_ like to have proper version tracking for many of my files. Thus... + +Wish: ...why not use git as a version backend? That way, I could just push all my stuff to the central instance(s) and have the best of both worlds. Depending on what backend is used in the local repos, it might make sense to define a list of supported client backends with pre-computed keys. + +-- RichiH diff --git a/doc/forum/wishlist:_git_backend_for_git-annex/comment_1_04319051fedc583e6c326bb21fcce5a5._comment b/doc/forum/wishlist:_git_backend_for_git-annex/comment_1_04319051fedc583e6c326bb21fcce5a5._comment new file mode 100644 index 0000000000..a691393b1a --- /dev/null +++ b/doc/forum/wishlist:_git_backend_for_git-annex/comment_1_04319051fedc583e6c326bb21fcce5a5._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-03-28T16:01:30Z" + content=""" +Indeed, see [[todo/add_a_git_backend]], where you and I have already discussed this idea. :) + +With the new support for special remotes, which will be used by S3, it would be possible to make such a git repo, using bup, be a special remote. I think it would be pretty easy to implement now. Not a priority for me though. +"""]] diff --git a/doc/forum/wishlist:_git_backend_for_git-annex/comment_2_7f529f19a47e10b571f65ab382e97fd5._comment b/doc/forum/wishlist:_git_backend_for_git-annex/comment_2_7f529f19a47e10b571f65ab382e97fd5._comment new file mode 100644 index 0000000000..14798e7a71 --- /dev/null +++ b/doc/forum/wishlist:_git_backend_for_git-annex/comment_2_7f529f19a47e10b571f65ab382e97fd5._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 2" + date="2011-03-28T17:47:38Z" + content=""" +On the plus side, the past me wanted exactly what I had in mind. + +On the meh side, I really forgot about this conversation :/ + +When you say this todo is not a priority, does that mean there's no ETA at all and that it will most likely sleep for a long time? Or the almost usual \"what the heck, I will just wizard it up in two lines of haskell\"? + +-- RichiH +"""]] diff --git a/doc/forum/wishlist:_git_backend_for_git-annex/comment_3_a077bbad3e4b07cce019eb55a45330e7._comment b/doc/forum/wishlist:_git_backend_for_git-annex/comment_3_a077bbad3e4b07cce019eb55a45330e7._comment new file mode 100644 index 0000000000..8c3286d27b --- /dev/null +++ b/doc/forum/wishlist:_git_backend_for_git-annex/comment_3_a077bbad3e4b07cce019eb55a45330e7._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 3" + date="2011-03-28T20:05:13Z" + content=""" +Probably more like 150 lines of haskell. Maybe just 50 lines if the bup repository is required to be on the same computer as the git-annex repository. + +Since I do have some repositories where I'd appreciate this level of assurance that data not be lost, it's mostly a matter of me finding a free day. +"""]] diff --git a/doc/forum/wishlist:_git_backend_for_git-annex/comment_4_ecca429e12d734b509c671166a676c9d._comment b/doc/forum/wishlist:_git_backend_for_git-annex/comment_4_ecca429e12d734b509c671166a676c9d._comment new file mode 100644 index 0000000000..cf649a8a25 --- /dev/null +++ b/doc/forum/wishlist:_git_backend_for_git-annex/comment_4_ecca429e12d734b509c671166a676c9d._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 4" + date="2011-03-28T20:45:35Z" + content=""" +Personally, I would not mind a requirement to keep a local bup repo. I wouldn't want my data to to unncessarily complex setups, anyway. -- RichiH +"""]] diff --git a/doc/forum/wishlist:_git_backend_for_git-annex/comment_5_3459f0b41d818c23c8fb33edb89df634._comment b/doc/forum/wishlist:_git_backend_for_git-annex/comment_5_3459f0b41d818c23c8fb33edb89df634._comment new file mode 100644 index 0000000000..a1300f2e64 --- /dev/null +++ b/doc/forum/wishlist:_git_backend_for_git-annex/comment_5_3459f0b41d818c23c8fb33edb89df634._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 5" + date="2011-04-08T20:59:37Z" + content=""" +My estimates were pretty close -- the new bup special remote type took 133 lines of code, and 2 hours to write. A testament to the flexibility of the special remote infrastructure. :) +"""]] diff --git a/doc/forum/wishlist:_push_to_cia.vc_from_the_website__39__s_repo__44___not_your_personal_one.mdwn b/doc/forum/wishlist:_push_to_cia.vc_from_the_website__39__s_repo__44___not_your_personal_one.mdwn new file mode 100644 index 0000000000..6926e3cca2 --- /dev/null +++ b/doc/forum/wishlist:_push_to_cia.vc_from_the_website__39__s_repo__44___not_your_personal_one.mdwn @@ -0,0 +1 @@ +I just added a CIA bot to #vcs-home and tracking commits immediately would be nice. -- RichiH diff --git a/doc/forum/wishlist:_push_to_cia.vc_from_the_website__39__s_repo__44___not_your_personal_one/comment_1_3480b0ec629ef29a151408d869186bf8._comment b/doc/forum/wishlist:_push_to_cia.vc_from_the_website__39__s_repo__44___not_your_personal_one/comment_1_3480b0ec629ef29a151408d869186bf8._comment new file mode 100644 index 0000000000..5d0edce2ea --- /dev/null +++ b/doc/forum/wishlist:_push_to_cia.vc_from_the_website__39__s_repo__44___not_your_personal_one/comment_1_3480b0ec629ef29a151408d869186bf8._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-09-19T18:57:52Z" + content=""" +JFTR, pushing now happens automatically from branchable. +"""]] diff --git a/doc/forum/wishlist:_special_remote_for_sftp_or_rsync.mdwn b/doc/forum/wishlist:_special_remote_for_sftp_or_rsync.mdwn new file mode 100644 index 0000000000..7fd31efbcf --- /dev/null +++ b/doc/forum/wishlist:_special_remote_for_sftp_or_rsync.mdwn @@ -0,0 +1,26 @@ +i think it would be useful to have a fourth kind of [[special_remotes]] +that connects to a dumb storage using sftp or rsync. this can be emulated +by using sshfs, but that means lots of round-trips through the system and +is limited to platforms where sshfs is available. + +typical use cases are backups to storate shared between a group of people +where each user only has limited access (sftp or rsync), when using +[[special_remotes/bup]] is not an option. + +an alternative to implementing yet another special remote would be to have +some kind of plugin system by which external programs can provide an +interface to key-value stores (i'd implement the sftp backend myself, but +haven't learned haskell yet). + +> Ask and ye [[shall receive|special_remotes/rsync]]. +> +> Sometimes I almost think that a generic configurable special remote that +> just uses configured shell commands would be useful.. But there's really +> no comparison with sitting down and writing code tuned to work with +> a given transport like rsync, when it comes to reliability and taking +> advantage of its abilities (like resuming). --[[Joey]] + +>> big thanks, and bonus points for identical formats, so converting from +>> directory to rsync is just a matter of changing ``type`` from ``directory`` +>> to ``rsync`` in ``.git-annex/remote.log`` and replacing the directory info +>> with ``annex-rsyncurl = :`` in ``.git/config``. --[[chrysn]] diff --git a/doc/forum/wishlist:_special_remote_for_sftp_or_rsync/comment_1_6f07d9cc92cf8b4927b3a7d1820c9140._comment b/doc/forum/wishlist:_special_remote_for_sftp_or_rsync/comment_1_6f07d9cc92cf8b4927b3a7d1820c9140._comment new file mode 100644 index 0000000000..c513ed4008 --- /dev/null +++ b/doc/forum/wishlist:_special_remote_for_sftp_or_rsync/comment_1_6f07d9cc92cf8b4927b3a7d1820c9140._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 1" + date="2011-04-28T07:47:38Z" + content=""" ++1 for a generic user configurable backend that a user can put shell commands in, which has a disclaimer such that if a user hangs themselves with misconfiguration then its their own fault :P + +I would love to be able to quickly plugin an irods/sector set of put/get/delete/stat(get info) commands into git-annex to access my private clouds which aren't s3 compatible. +"""]] diff --git a/doc/forum/wishlist:_special_remote_for_sftp_or_rsync/comment_2_84e4414c88ae91c048564a2cdc2d3250._comment b/doc/forum/wishlist:_special_remote_for_sftp_or_rsync/comment_2_84e4414c88ae91c048564a2cdc2d3250._comment new file mode 100644 index 0000000000..6243708f94 --- /dev/null +++ b/doc/forum/wishlist:_special_remote_for_sftp_or_rsync/comment_2_84e4414c88ae91c048564a2cdc2d3250._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-04-28T21:22:03Z" + content=""" +Ask and ye shalle receive with an Abbot on top: [[special_remotes/hook]] +"""]] diff --git a/doc/forum/wishlist:_special_remote_for_sftp_or_rsync/comment_3_79de7ac44e3c0f0f5691a56d3fb88897._comment b/doc/forum/wishlist:_special_remote_for_sftp_or_rsync/comment_3_79de7ac44e3c0f0f5691a56d3fb88897._comment new file mode 100644 index 0000000000..dc21ec4885 --- /dev/null +++ b/doc/forum/wishlist:_special_remote_for_sftp_or_rsync/comment_3_79de7ac44e3c0f0f5691a56d3fb88897._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 3" + date="2011-04-29T10:43:31Z" + content=""" +Cool!, I just tried adding tahoe-lafs as a remote, and it wasn't too hard. +"""]] diff --git a/doc/forum/wishlist:_traffic_accounting_for_git-annex.mdwn b/doc/forum/wishlist:_traffic_accounting_for_git-annex.mdwn new file mode 100644 index 0000000000..4b661101d7 --- /dev/null +++ b/doc/forum/wishlist:_traffic_accounting_for_git-annex.mdwn @@ -0,0 +1,3 @@ +As git annex keeps logs about file transfers anyway, it should be relatively easy to add traffic accounting to a repo. That would allow me to monitor how much traffic a given repo generates. As I might end up hosting git-annex repos for a few personal friends, I need/want a way to track the heavy hitters. -- RichiH + +PS: If you ever plan to host git-annex similar branchable, this would probably be of interest to you, as well :) diff --git a/doc/forum/wishlist:alias_system.mdwn b/doc/forum/wishlist:alias_system.mdwn new file mode 100644 index 0000000000..1f5012966e --- /dev/null +++ b/doc/forum/wishlist:alias_system.mdwn @@ -0,0 +1 @@ +To implement things like my custom `git annex-push` without the dash, i.e. `git annex push`, an alias system for git-annex would be nice. diff --git a/doc/forum/working_without_git-annex_commits.mdwn b/doc/forum/working_without_git-annex_commits.mdwn new file mode 100644 index 0000000000..00a61f3f0f --- /dev/null +++ b/doc/forum/working_without_git-annex_commits.mdwn @@ -0,0 +1,20 @@ +Is it possible to use git-annex without having [[location tracking]] commits in the style of "got a video I want to rewatch on the plane" or "freed up space" in the main tree? + +I consider these changes to be volatile, and irrelevant to the archive history. While they are unproblematic when it comes to merging, they make the commit tree rather complicated, especially with multiple users (as opposed to a single user managing his files on an external disk, a server and his laptop). Some users might even want to contribute to a shared repository without reporting on what they checked out. + +As a minimal solution, I configured a repository to ``.gitignore`` ``.git-annex/*:*.log``, but even when using modes that do not require that information (``git annex copy --from`` instead of ``git annex get``), that failes when git-annex tried to git-add ignored files. + +A more elaborate solution might be to keep location tracking information in a branch on its own (as suggested in [[todo/branching]]), keeping the main tree clean of such commits. A stealth user could then configure that branch to never be pushed. (Alternatively, if git-annex respects .gitignore and doesn't try to check in changes on ignored files, he could locally ``.gitignore`` ``.git-annex/*:*.log``.) + +> A stealth user can simply avoid pushing, and so keep their repository +> in a forked state, that can still pull changes from origin. +> +> Beyond that, [[todo/branching]] is the best solution. +> +> I don't think that gitignoring the log files is a good plan, because +> if the files are left modified and uncommitted, git will not be able to +> merge other changes it pulls. The automerging of log files only works +> if any local changes to them have been committed. +> +> It would be possible to add a knob that +> simply blocks all local modifications to the log files. --[[Joey]] diff --git a/doc/future_proofing.mdwn b/doc/future_proofing.mdwn new file mode 100644 index 0000000000..a7bcce37c9 --- /dev/null +++ b/doc/future_proofing.mdwn @@ -0,0 +1,37 @@ +Imagine putting a git-annex drive in a time capsule. In 20, or 50, or 100 +years, you'd like its contents to be as accessible as possible to whoever +digs it up. + +This is a hard problem. git-annex cannot completly solve it, but it does +its best to not contribute to the problem. Here are some aspects of the +problem: + +* How are files accessed? Git-annex carefully adds minimal complexity + to access files in a repository. Nothing needs to be done to extract + files from the repository; they are there on disk in the usual way, + with just some symlinks pointing at the annexed file contents. + Neither git-annex nor git is needed to get at the file contents. + + (Also, git-annex provides an "uninit" command that moves everything out + of the annex, if you should ever want to stop using it.) + +* What file formats are used? Will they still be readable? To deal with + this, it's best to stick to plain text files, and the most common + image, sound, etc formats. Consider storing the same content in multiple + formats. + +* What filesystem is used on the drive? Will that filesystem still be + available? + +* What is the hardware interface of the drive? Will hardware still exist + to talk to it? + +* What if some of the data is damaged? git-annex facilitates storing a + configurable number of [[copies]] of the file contents. The metadata + about your files is stored in git, and so every clone of the repository + means another copy of that is stored. Also, git-annex uses filenames + for the data that encode everything needed to match it back to the + metadata. So if a filesystem is badly corrupted and all your annexed + files end up in `lost+found`, they can easily be lifted back out into + another clone of the repository. Even if the filenames are lost, + it's possible to [[tips/recover_data_from_lost+found]]. diff --git a/doc/git-annex-shell.mdwn b/doc/git-annex-shell.mdwn new file mode 100644 index 0000000000..7a65f10775 --- /dev/null +++ b/doc/git-annex-shell.mdwn @@ -0,0 +1,81 @@ +# NAME + +git-annex-shell - Restricted login shell for git-annex only SSH access + +# SYNOPSIS + +git-annex-shell [-c] command [params ...] + +# DESCRIPTION + +git-annex-shell is a restricted shell, similar to git-shell, which +can be used as a login shell for SSH accounts. + +Since its syntax is identical to git-shell's, it can be used as a drop-in +replacement anywhere git-shell is used. For example it can be used as a +user's restricted login shell. + +# COMMANDS + +Any command not listed below is passed through to git-shell. + +Note that the directory parameter should be an absolute path, otherwise +it is assumed to be relative to the user's home directory. Also the +first "/~/" or "/~user/" is expanded to the specified home directory. + +* configlist directory + + This outputs a subset of the git configuration, in the same form as + `git config --list` + +* inannex directory [key ...] + + This checks if all specified keys are present in the annex, + and exits zero if so. + +* dropkey directory [key ...] + + This drops the annexed data for the specified keys. + +* recvkey directory key + + This runs rsync in server mode to receive the content of a key, + and stores the content in the annex. + +* sendkey directory key + + This runs rsync in server mode to transfer out the content of a key. + +# OPTIONS + +Most options are the same as in git-annex. The ones specific +to git-annex-shell are: + +* --uuid=UUID + + git-annex uses this to specify the UUID of the repository it was expecting + git-annex-shell to access, as a sanity check. + +# ENVIRONMENT + +* GIT_ANNEX_SHELL_READONLY + + If set, disallows any command that could modify the repository. + +* GIT_ANNEX_SHELL_LIMITED + + If set, disallows running git-shell to handle unknown commands. + +# SEE ALSO + +[[git-annex]](1) + +git-shell(1) + +# AUTHOR + +Joey Hess + + + +Warning: Automatically converted into a man page by mdwn2man. Edit with care diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn new file mode 100644 index 0000000000..e0f0d78703 --- /dev/null +++ b/doc/git-annex.mdwn @@ -0,0 +1,668 @@ +# NAME + +git-annex - manage files with git, without checking their contents in + +# SYNOPSIS + +git annex command [params ...] + +# DESCRIPTION + +git-annex allows managing files with git, without checking the file +contents into git. While that may seem paradoxical, it is useful when +dealing with files larger than git can currently easily handle, whether due +to limitations in memory, checksumming time, or disk space. + +Even without file content tracking, being able to manage files with git, +move files around and delete files with versioned directory trees, and use +branches and distributed clones, are all very handy reasons to use git. And +annexed files can co-exist in the same git repository with regularly +versioned files, which is convenient for maintaining documents, Makefiles, +etc that are associated with annexed files but that benefit from full +revision control. + +When a file is annexed, its content is moved into a key-value store, and +a symlink is made that points to the content. These symlinks are checked into +git and versioned like regular files. You can move them around, delete +them, and so on. Pushing to another git repository will make git-annex +there aware of the annexed file, and it can be used to retrieve its +content from the key-value store. + +# EXAMPLES + + # git annex get video/hackity_hack_and_kaxxt.mov + get video/_why_hackity_hack_and_kaxxt.mov (not available) + I was unable to access these remotes: server + Try making some of these repositories available: + 5863d8c0-d9a9-11df-adb2-af51e6559a49 -- my home file server + 58d84e8a-d9ae-11df-a1aa-ab9aa8c00826 -- portable USB drive + ca20064c-dbb5-11df-b2fe-002170d25c55 -- backup SATA drive + failed + # sudo mount /media/usb + # git remote add usbdrive /media/usb + # git annex get video/hackity_hack_and_kaxxt.mov + get video/hackity_hack_and_kaxxt.mov (from usbdrive...) ok + + # git annex add iso + add iso/Debian_5.0.iso ok + + # git annex drop iso/Debian_4.0.iso + drop iso/Debian_4.0.iso ok + + # git annex move iso --to=usbdrive + move iso/Debian_5.0.iso (moving to usbdrive...) ok + +# COMMONLY USED COMMANDS + +Like many git commands, git-annex can be passed a path that +is either a file or a directory. In the latter case it acts on all relevant +files in the directory. When no path is specified, most git-annex commands +default to acting on all relevant files in the current directory (and +subdirectories). + +* add [path ...] + + Adds files in the path to the annex. Files that are already checked into + git, or that git has been configured to ignore will be silently skipped. + (Use --force to add ignored files.) + +* get [path ...] + + Makes the content of annexed files available in this repository. This + will involve copying them from another repository, or downloading them, + or transferring them from some kind of key-value store. + + Normally git-annex will choose which repository to copy the content from, + but you can override this using the --from option. + +* drop [path ...] + + Drops the content of annexed files from this repository. + + git-annex will refuse to drop content if it cannot verify it is + safe to do so. This can be overridden with the --force switch. + + To drop content from a remote, specify --from. + +* move [path ...] + + When used with the --from option, moves the content of annexed files + from the specified repository to the current one. + + When used with the --to option, moves the content of annexed files from + the current repository to the specified one. + +* copy [path ...] + + When used with the --from option, copies the content of annexed files + from the specified repository to the current one. + + When used with the --to option, copies the content of annexed files from + the current repository to the specified one. + + To avoid contacting the remote to check if it has every file, specify --fast + +* unlock [path ...] + + Normally, the content of annexed files is protected from being changed. + Unlocking a annexed file allows it to be modified. This replaces the + symlink for each specified file with a copy of the file's content. + You can then modify it and `git annex add` (or `git commit`) to inject + it back into the annex. + +* edit [path ...] + + This is an alias for the unlock command. May be easier to remember, + if you think of this as allowing you to edit an annexed file. + +* lock [path ...] + + Use this to undo an unlock command if you don't want to modify + the files, or have made modifications you want to discard. + +* sync + + Use this command when you want to synchronize the local repository + with its default remote (typically "origin"). The sync process involves + first committing all local changes, then pulling and merging any changes + from the remote, and finally pushing the repository's state to the remote. + You can use standard git commands to do each of those steps by hand, + or if you don't want to worry about the details, you can use sync. + + Note that sync does not transfer any file contents from or to the remote. + +* addurl [url ...] + + Downloads each url to a file, which is added to the annex. + + To avoid immediately downloading the url, specify --fast + +# REPOSITORY SETUP COMMANDS + +* init [description] + + Until a repository (or one of its remotes) has been initialized, + git-annex will refuse to operate on it, to avoid accidentially + using it in a repository that was not intended to have an annex. + + It's useful, but not mandatory, to initialize each new clone + of a repository with its own description. + +* describe repository description + + Changes the description of a repository. + + The repository to describe can be specified by git remote name or + by uuid. To change the description of the current repository, use + "." + +* initremote name [param=value ...] + + Sets up a special remote. The remote's + configuration is specified by the parameters. If a remote + with the specified name has already been configured, its configuration + is modified by any values specified. In either case, the remote will be + added to `.git/config`. + + Example Amazon S3 remote: + + initremote mys3 type=S3 encryption=none datacenter=EU + +* trust [repository ...] + + Records that a repository is trusted to not unexpectedly lose + content. Use with care. + + To trust the current repository, use "." + +* untrust [repository ...] + + Records that a repository is not trusted and could lose content + at any time. + +* semitrust [repository ...] + + Returns a repository to the default semi trusted state. + +* dead [repository ...] + + Indicates that the repository has been irretrevably lost. + (To undo, use semitrust.) + +# REPOSITORY MAINTENANCE COMMANDS + +* fsck [path ...] + + With no parameters, this command checks the whole annex for consistency, + and warns about or fixes any problems found. + + With parameters, only the specified files are checked. + + To avoid expensive checksum calculations, specify --fast + +* unused + + Checks the annex for data that does not correspond to any files present + in any tag or branch, and prints a numbered list of the data. + + To only show unused temp and bad files, specify --fast. + + To check for annexed data on a remote, specify --from. + +* dropunused [number ...] + + Drops the data corresponding to the numbers, as listed by the last + `git annex unused` + + To drop the data from a remote, specify --from. + +* merge + + Automatically merges remote tracking branches */git-annex into + the git-annex branch. While git-annex mostly handles keeping the + git-annex branch merged automatically, via the tweak-fetch hook and other + means, if you find you are unable to push the git-annex branch due + non-fast-forward, this will fix it. + +* fix [path ...] + + Fixes up symlinks that have become broken to again point to annexed content. + This is useful to run if you have been moving the symlinks around, + but is done automatically when committing a change with git too. + +* upgrade + + Upgrades the repository to current layout. + +# QUERY COMMANDS + +* version + + Shows the version of git-annex, as well as repository version information. + +* find [path ...] + + Outputs a list of annexed files in the specified path. With no path, + finds files in the current directory and its subdirectories. + + By default, only lists annexed files whose content is currently present. + This can be changed by specifying file matching options. To list all + annexed files, present or not, specify --include "*". To list all + annexed files whose content is not present, specify --not --in="." + + To output filenames terminated with nulls, for use with xargs -0, + specify --print0. Or, a custom output formatting can be specified using + --format. The default output format is the same as --format='${file}\\n' + + These variables are available for use in formats: file, key, backend, + bytesize, humansize + +* whereis [path ...] + + Displays a list of repositories known to contain the content of the + specified file or files. + +* status + + Displays some statistics and other information, including how much data + is in the annex and a list of all known repositories. + + To only show the data that can be gathered quickly, use --fast. + +* map + + Helps you keep track of your repositories, and the connections between them, + by going out and looking at all the ones it can get to, and generating a + Graphviz file displaying it all. If the `dot` command is available, it is + used to display the file to your screen (using x11 backend). (To disable + this display, specify --fast) + + This command only connects to hosts that the host it's run on can + directly connect to. It does not try to tunnel through intermediate hosts. + So it might not show all connections between the repositories in the network. + + Also, if connecting to a host requires a password, you might have to enter + it several times as the map is being built. + + Note that this subcommand can be used to graph any git repository; it + is not limited to git-annex repositories. + +# UTILITY COMMANDS + +* migrate [path ...] + + Changes the specified annexed files to use the default key-value backend + (or the one specified with --backend). Only files whose content + is currently available are migrated. + + Note that the content is also still available using the old key after + migration. Use `git annex unused` to find and remove the old key. + + Normally, nothing will be done to files already using the new backend. + However, if a backend changes the information it uses to construct a key, + this can also be used to migrate files to use the new key format. + +* reinject src dest + + Moves the src file into the annex as the content of the dest file. + This can be useful if you have obtained the content of a file from + elsewhere and want to put it in the local annex. + + Automatically runs fsck on dest to check that the expected content was + provided. + + Example: + + git annex reinject /tmp/foo.iso foo.iso + +* unannex [path ...] + + Use this to undo an accidental `git annex add` command. You can use + `git annex unannex` to move content out of the annex at any point, + even if you've already committed it. + + This is not the command you should use if you intentionally annexed a + file and don't want its contents any more. In that case you should use + `git annex drop` instead, and you can also `git rm` the file. + + In --fast mode, this command leaves content in the annex, simply making + a hard link to it. + +* uninit + + Use this to stop using git annex. It will unannex every file in the + repository, and remove all of git-annex's other data, leaving you with a + git repository plus the previously annexed files. + +# PLUMBING COMMANDS + +* pre-commit [path ...] + + Fixes up symlinks that are staged as part of a commit, to ensure they + point to annexed content. Also handles injecting changes to unlocked + files into the annex. + + This is meant to be called from git's pre-commit hook. `git annex init` + automatically creates a pre-commit hook using this. + +* tweak-fetch + + This is meant ot be called from git's tweak-fetch hook. `git annex init` + automatically creates a tweak-fetch hook using this. + +* fromkey key file + + This plumbing-level command can be used to manually set up a file + in the git repository to link to a specified key. + +* dropkey [key ...] + + This plumbing-level command drops the annexed data for the specified + keys from this repository. + + This can be used to drop content for arbitrary keys, which do not need + to have a file in the git repository pointing at them. + + Example: + + git annex dropkey SHA1-s10-7da006579dd64330eb2456001fd01948430572f2 + +# OPTIONS + +* --force + + Force unsafe actions, such as dropping a file's content when no other + source of it can be verified to still exist, or adding ignored files. + Use with care. + +* --fast + + Enables less expensive, but also less thorough versions of some commands. + What is avoided depends on the command. + +* --auto + + Enables automatic mode. Commands that get, drop, or move file contents + will only do so when needed to help satisfy the setting of annex.numcopies. + +* --quiet + + Avoid the default verbose display of what is done; only show errors + and progress displays. + +* --verbose + + Enable verbose display. + +* --json + + Rather than the normal output, generate JSON. This is intended to be + parsed by programs that use git-annex. Each line of output is a JSON + object. + +* --debug + + Show debug messages. + +* --from=repository + + Specifies a repository that content will be retrieved from, or that + should otherwise be acted on. + + It should be specified using the name of a configured remote. + +* --to=repository + + Specifies a repository that content will be sent to. + + It should be specified using the name of a configured remote. + +* --numcopies=n + + Overrides the `annex.numcopies` setting, forcing git-annex to ensure the + specified number of copies exist. + +* --trust=repository +* --semitrust=repository +* --untrust=repository + + Overrides trust settings for a repository. May be specified more than once. + + The repository should be specified using the name of a configured remote, + or the UUID or description of a repository. + +* --backend=name + + Specifies which key-value backend to use. This can be used when + adding a file to the annex, or migrating a file. Once files + are in the annex, their backend is known and this option is not + necessary. + +* --format=value + + Specifies a custom output format. The value is a format string, + in which '${var}' is expanded to the value of a variable. To right-justify + a variable with whitespace, use '${var;width}' ; to left-justify + a variable, use '${var;-width}'; to escape unusual characters in a variable, + use '${escaped_var}' + + Also, '\\n' is a newline, '\\000' is a NULL, etc. + +* -c name=value + + Used to override git configuration settings. May be specified multiple times. + +# FILE MATCHING OPTIONS + +These options can all be specified multiple times, and can be combined to +limit which files git-annex acts on. + +Arbitrarily complicated expressions can be built using these options. +For example: + + --exclude '*.mp3' --and --not -( --in=usbdrive --or --in=archive -) + +The above example prevents git-annex from working on mp3 files whose +file contents are present at either of two repositories. + +* --exclude=glob + + Skips files matching the glob pattern. The glob is matched relative to + the current directory. For example: + + --exclude='*.mp3' --exclude='subdir/*' + +* --include=glob + + Skips files not matching the glob pattern. (Same as --not --exclude.) + For example, to include only mp3 and ogg files: + + --include='*.mp3' --or --include='*.ogg' + +* --in=repository + + Matches only files that git-annex believes have their contents present + in a repository. Note that it does not check the repository to verify + that it still has the content. + + The repository should be specified using the name of a configured remote, + or the UUID or description of a repository. For the current repository, + use "--in=." + +* --copies=number + + Matches only files that git-annex believes to have the specified number + of copies, or more. Note that it does not check remotes to verify that + the copies still exist. + +* --inbackend=name + + Matches only files whose content is stored using the specified key-value + backend. + +* --not + + Inverts the next file matching option. For example, to only act on + mp3s, use: --not --exclude='*.mp3' + +* --and + + Requires that both the previous and the next file matching option matches. + The default. + +* --or + + Requires that either the previous, or the next file matching option matches. + +* -( + + Opens a group of file matching options. + +* -) + + Closes a group of file matching options. + +# CONFIGURATION + +Like other git commands, git-annex is configured via `.git/config`. +Here are all the supported configuration settings. + +* `annex.uuid` + + A unique UUID for this repository (automatically set). + +* `annex.numcopies` + + Number of copies of files to keep across all repositories. (default: 1) + +* `annex.backends` + + Space-separated list of names of the key-value backends to use. + The first listed is used to store new files by default. + +* `annex.diskreserve` + + Amount of disk space to reserve. Disk space is checked when transferring + content to avoid running out, and additional free space can be reserved + via this option, to make space for more important content (such as git + commit logs). Can be specified with any commonly used units, for example, + "0.5 gb" or "100 KiloBytes" + + The default reserve is 1 megabyte. + +* `annex.version` + + Automatically maintained, and used to automate upgrades between versions. + +* `remote..annex-cost` + + When determining which repository to + transfer annexed files from or to, ones with lower costs are preferred. + The default cost is 100 for local repositories, and 200 for remote + repositories. + +* `remote..annex-cost-command` + + If set, the command is run, and the number it outputs is used as the cost. + This allows varying the cost based on eg, the current network. The + cost-command can be any shell command line. + +* `remote..annex-ignore` + + If set to `true`, prevents git-annex + from using this remote by default. (You can still request it be used + by the --from and --to options.) + + This is, for example, useful if the remote is located somewhere + without git-annex-shell. (For example, if it's on GitHub). + Or, it could be used if the network connection between two + repositories is too slow to be used normally. + +* `remote..annexUrl` + + Can be used to specify a different url than the regular `remote..url` + for git-annex to use when talking with the remote. Similar to the `pushUrl` + used by git-push. + +* `remote..annex-uuid` + + git-annex caches UUIDs of remote repositories here. + +* `remote..annex-ssh-options` + + Options to use when using ssh to talk to this remote. + +* `remote..annex-rsync-options` + + Options to use when using rsync + to or from this remote. For example, to force ipv6, and limit + the bandwidth to 100Kbyte/s, set it to "-6 --bwlimit 100" + +* `remote..annex-bup-split-options` + + Options to pass to bup split when storing content in this remote. + For example, to limit the bandwidth to 100Kbye/s, set it to "--bwlimit 100k" + (There is no corresponding option for bup join.) + +* `annex.ssh-options`, `annex.rsync-options`, `annex.bup-split-options` + + Default ssh, rsync, and bup options to use if a remote does not have + specific options. + +* `remote..buprepo` + + Used by bup special remotes, this configures + the location of the bup repository to use. Normally this is automaticaly + set up by `git annex initremote`, but you can change it if needed. + +* `remote..directory` + + Used by directory special remotes, this configures + the location of the directory where annexed files are stored for this + remote. Normally this is automaticaly set up by `git annex initremote`, + but you can change it if needed. + +* `remote..s3` + + Used to identify Amazon S3 special remotes. + Normally this is automaticaly set up by `git annex initremote`. + +# CONFIGURATION VIA .gitattributes + +The key-value backend used when adding a new file to the annex can be +configured on a per-file-type basis via `.gitattributes` files. In the file, +the `annex.backend` attribute can be set to the name of the backend to +use. For example, this here's how to use the WORM backend by default, +but the SHA1 backend for ogg files: + + * annex.backend=WORM + *.ogg annex.backend=SHA1 + +The numcopies setting can also be configured on a per-file-type basis via +the `annex.numcopies` attribute in `.gitattributes` files. +For example, this makes two copies be needed for wav files: + + *.wav annex.numcopies=2 + +# FILES + +These files are used by git-annex, in your git repository: + +`.git/annex/objects/` contains the annexed file contents that are currently +available. Annexed files in your git repository symlink to that content. + +# SEE ALSO + +Most of git-annex's documentation is available on its web site, + + +If git-annex is installed from a package, a copy of its documentation +should be included, in, for example, `/usr/share/doc/git-annex/` + +# AUTHOR + +Joey Hess + + + +Warning: Automatically converted into a man page by mdwn2man. Edit with care diff --git a/doc/git-union-merge.mdwn b/doc/git-union-merge.mdwn new file mode 100644 index 0000000000..8e3c34f8f1 --- /dev/null +++ b/doc/git-union-merge.mdwn @@ -0,0 +1,38 @@ +# NAME + +git-union-merge - Join branches together using a union merge + +# SYNOPSIS + +git union-merge ref ref newref + +# DESCRIPTION + +Does a union merge between two refs, storing the result in the +specified newref. + +The union merge will always succeed, but assumes that files can be merged +simply by concacenating together lines from all the oldrefs, in any order. +So, this is useful only for branches containing log-type data. + +Note that this does not touch the checked out working copy. It operates +entirely on git refs and branches. + +# EXAMPLE + + git union-merge git-annex origin/git-annex refs/heads/git-annex + +Merges the current git-annex branch, and a version from origin, +storing the result in the git-annex branch. + +# BUGS + +File modes are not currently merged. + +# AUTHOR + +Joey Hess + + + +Warning: Automatically converted into a man page by mdwn2man. Edit with care diff --git a/doc/index.mdwn b/doc/index.mdwn new file mode 100644 index 0000000000..5bd42074f5 --- /dev/null +++ b/doc/index.mdwn @@ -0,0 +1,60 @@ +[[!inline raw=yes pages="summary"]] + +To get a feel for it, see the [[walkthrough]]. + +[[!sidebar content=""" +[[!img logo_small.png link=no]] + +* **[[download]]** +* [[install]] +* [[tips]] +* [[bugs]] +* [[todo]] +* [[forum]] +* [[comments]] +* [[contact]] +* Flattr this + +[[News]]: + + +[[!inline pages="news/* and !*/discussion" archive=yes show=3 feeds=no]] + + +[[Feeds]]: + + +[[!inline pages="internal(feeds/*)" archive=yes show=5 feeds=no]] + +"""]] + + + + + + +
[[!inline feeds=no template=bare pages=use_case/bob]][[!inline feeds=no template=bare pages=use_case/alice]]
+ +If that describes you, or if you're some from column A and some from column +B, then git-annex may be the tool you've been looking for to expand from +keeping all your small important files in git, to managing your large +files with git. + +## documentation + +* [[git-annex man page|git-annex]] +* [[key-value backends|backends]] for data storage +* [[special_remotes]] (including [[special_remotes/S3]] and [[special_remotes/bup]]) +* [[encryption]] +* [[bare_repositories]] +* [[internals]] +* [[design]] +* [[what git annex is not|not]] +* git-annex is Free Software, licensed under the [[GPL]]. + +
+ +---- + +git-annex's wiki is powered by [Ikiwiki](http://ikiwiki.info/) and +hosted by [Branchable](http://branchable.com/). diff --git a/doc/install.mdwn b/doc/install.mdwn new file mode 100644 index 0000000000..cc26ee91d5 --- /dev/null +++ b/doc/install.mdwn @@ -0,0 +1,45 @@ +## OS-specific instructions + +* [[OSX]] +* [[Debian]] +* [[Ubuntu]] +* [[Fedora]] +* [[FreeBSD]] +* [[openSUSE]] + +## Using cabal + +As a haskell package, git-annex can be installed using cabal. For example: + + cabal install git-annex --bindir=$HOME/bin + +## Installation by hand + +To build and use git-annex, you will need: + +* Haskell stuff + * [The Haskell Platform](http://haskell.org/platform/) + * [MissingH](http://github.com/jgoerzen/missingh/wiki) + * [pcre-light](http://hackage.haskell.org/package/pcre-light) + * [utf8-string](http://hackage.haskell.org/package/utf8-string) + * [SHA](http://hackage.haskell.org/package/SHA) + * [dataenc](http://hackage.haskell.org/package/dataenc) + * [monad-control](http://hackage.haskell.org/package/monad-control) + * [TestPack](http://hackage.haskell.org/cgi-bin/hackage-scripts/package/testpack) + * [QuickCheck 2](http://hackage.haskell.org/package/QuickCheck) + * [HTTP](http://hackage.haskell.org/package/HTTP) + * [hS3](http://hackage.haskell.org/package/hS3) (optional, but recommended) + * [json](http://hackage.haskell.org/package/json) +* Shell commands + * [git](http://git-scm.com/) + * [uuid](http://www.ossp.org/pkg/lib/uuid/) + (or `uuidgen` from util-linux) + * [xargs](http://savannah.gnu.org/projects/findutils/) + * [rsync](http://rsync.samba.org/) + * [wget](http://www.gnu.org/software/wget/) or [curl](http://http://curl.haxx.se/) (optional, but recommended) + * [sha1sum](ftp://ftp.gnu.org/gnu/coreutils/) (optional, but recommended; + a sha1 command will also do) + * [gpg](http://gnupg.org/) (optional; needed for encryption) + * [ikiwiki](http://ikiwiki.info) (optional; used to build the docs) + +Then just [[download]] git-annex and run: `make; make install` diff --git a/doc/install/Debian.mdwn b/doc/install/Debian.mdwn new file mode 100644 index 0000000000..db1329b45b --- /dev/null +++ b/doc/install/Debian.mdwn @@ -0,0 +1,8 @@ +If using Debian testing or unstable: + +* `sudo apt-get install git-annex` + +If using Debian 6.0 stable: + +* Follow the instructions to [enable backports](http://backports.debian.org/Instructions/). +* `sudo apt-get -t squeeze-backports install git-annex` diff --git a/doc/install/Debian/comment_1_029486088d098c2d4f1099f2f0e701a9._comment b/doc/install/Debian/comment_1_029486088d098c2d4f1099f2f0e701a9._comment new file mode 100644 index 0000000000..9a4ed7c31d --- /dev/null +++ b/doc/install/Debian/comment_1_029486088d098c2d4f1099f2f0e701a9._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawla7u6eLKNYZ09Z7xwBffqLaXquMQC07fU" + nickname="Matthias" + subject="squeeze-backports update?" + date="2011-08-17T12:34:46Z" + content=""" +Is there going to be an update of git-annex in debian squeeze-backports to a version that supports repository version 3? +Thx +"""]] diff --git a/doc/install/Debian/comment_2_648e3467e260cdf233acdb0b53313ce0._comment b/doc/install/Debian/comment_2_648e3467e260cdf233acdb0b53313ce0._comment new file mode 100644 index 0000000000..b8b3d68f33 --- /dev/null +++ b/doc/install/Debian/comment_2_648e3467e260cdf233acdb0b53313ce0._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="Re: squeeze-backports update?" + date="2011-08-17T15:34:29Z" + content=""" +Yes, I uploaded it last night. +"""]] diff --git a/doc/install/Fedora.mdwn b/doc/install/Fedora.mdwn new file mode 100644 index 0000000000..7e983597b2 --- /dev/null +++ b/doc/install/Fedora.mdwn @@ -0,0 +1,7 @@ +Installation recipe for Fedora 14. + +
+sudo yum install ghc cabal-install
+sudo cabal update
+cabal install git-annex --bindir=$HOME/bin
+
diff --git a/doc/install/FreeBSD.mdwn b/doc/install/FreeBSD.mdwn new file mode 100644 index 0000000000..72b402c380 --- /dev/null +++ b/doc/install/FreeBSD.mdwn @@ -0,0 +1,2 @@ +git-annex is in FreeBSD ports in +[devel/git-annex](http://www.freshports.org/devel/hs-git-annex/) diff --git a/doc/install/OSX.mdwn b/doc/install/OSX.mdwn new file mode 100644 index 0000000000..f65e0bb4fa --- /dev/null +++ b/doc/install/OSX.mdwn @@ -0,0 +1,20 @@ +Install Haskel Platform from [[http://hackage.haskell.org/platform/mac.html]]. The version provided by Macports is too old to work with current versions of git-annex. Then execute + +
+sudo port install git-core ossp-uuid md5sha1sum coreutils pcre
+
+sudo ln -s /opt/local/include/pcre.h  /usr/include/pcre.h # This is hack that allows pcre-light to find pcre
+
+# optional: this will enable the gnu tools, (to give sha224sum etc..., it does not override the BSD userland)
+export PATH=$PATH:/opt/local/libexec/gnubin
+
+sudo cabal update
+cabal install git-annex --bindir=$HOME/bin
+
+ +Originally posted by Jon at --[[Joey]], modified by [[kristianrumberg]] + +See also: + +* [[forum/OSX__39__s_haskell-platform_statically_links_things]] +* [[forum/OSX__39__s_default_sshd_behaviour_has_limited_paths_set]] diff --git a/doc/install/OSX/comment_1_0a1760bf0db1f1ba89bdb4c62032f631._comment b/doc/install/OSX/comment_1_0a1760bf0db1f1ba89bdb4c62032f631._comment new file mode 100644 index 0000000000..1148a87cab --- /dev/null +++ b/doc/install/OSX/comment_1_0a1760bf0db1f1ba89bdb4c62032f631._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="http://www.schleptet.net/~cfm/" + ip="64.30.148.100" + subject="comment 1" + date="2011-08-30T14:31:36Z" + content=""" +You can also use Homebrew instead of MacPorts. Homebrew's `haskell-platform` is up-to-date, too: + + brew install haskell-platform git ossp-uuid md5sha1sum coreutils pcre + ln -s /usr/local/include/pcre.h /usr/include/pcre.h + +As of this writing, however, Homebrew's `md5sha1sum` has a broken mirror. I wound up getting that from MacPorts anyway. +"""]] diff --git a/doc/install/Ubuntu.mdwn b/doc/install/Ubuntu.mdwn new file mode 100644 index 0000000000..8d5341e19a --- /dev/null +++ b/doc/install/Ubuntu.mdwn @@ -0,0 +1,11 @@ +If using Ubuntu Oneiric or newer: + + sudo apt-get install git-annex + +Otherwise, see [[manual_installation_instructions|install]]. + +--- + +Warning: The version of git-annex shipped in Ubuntu Oneiric +has [a bug that prevents upgrades from v1 git-annex repositories](https://bugs.launchpad.net/ubuntu/+source/git-annex/+bug/875958). +If you need to upgrade such a repository, get a newer version of git-annex. diff --git a/doc/install/comment_3_cff163ea3e7cad926f4ed9e78b896598._comment b/doc/install/comment_3_cff163ea3e7cad926f4ed9e78b896598._comment new file mode 100644 index 0000000000..6b47ed0e3e --- /dev/null +++ b/doc/install/comment_3_cff163ea3e7cad926f4ed9e78b896598._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawmByD9tmR48HuYgS4qWEGDDaoVTTC3m4kc" + nickname="Jonas" + subject="Any chance to get git-annex going on windows?" + date="2011-06-10T18:08:36Z" + content=""" +Would be great! :-) + +Jonas +"""]] diff --git a/doc/install/comment_4_82a17eee4a076c6c79fddeda347e0c9a._comment b/doc/install/comment_4_82a17eee4a076c6c79fddeda347e0c9a._comment new file mode 100644 index 0000000000..678847ecae --- /dev/null +++ b/doc/install/comment_4_82a17eee4a076c6c79fddeda347e0c9a._comment @@ -0,0 +1,69 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="short answer: no" + date="2011-06-10T19:55:38Z" + content=""" +Long answer, quoting from a mail to someone else: + +Well, I can tell you that it assumes a POSIX system, both in available +utilities and system calls, So you'd need to use cygwin or something +like that. (Perhaps you already are for git, I think git also assumes a +POSIX system.) So you need a Haskell that can target that. What this +page refers to as \"GHC-Cygwin\": + +I don't know where to get one. Did find this: + + +(There are probably also still some places where it assumes / as a path +separator, although I fixed some.) + +FWIW, git-annex works fine on OS X and other fine proprietary unixen. ;P + +---- + +Alternatively, windows versions of these functions could be found, +which are all the ones that need POSIX, I think. A fair amount of this, +the stuff to do with signals and users, could be empty stubs in windows. +The file manipulation, particularly symlinks, would probably be the main +challenge. + +
+addSignal
+blockSignals
+changeWorkingDirectory
+createLink
+createSymbolicLink
+emptySignalSet
+executeFile
+fileMode
+fileSize
+forkProcess
+getAnyProcessStatus
+getEffectiveUserID
+getEnvDefault
+getFileStatus
+getProcessID
+getProcessStatus
+getSignalMask
+getSymbolicLinkStatus
+getUserEntryForID
+getUserEntryForName
+groupWriteMode
+homeDirectory
+installHandler
+intersectFileModes
+isRegularFile
+isSymbolicLink
+modificationTime
+otherWriteMode
+ownerWriteMode
+readSymbolicLink
+setEnv
+setFileMode
+setSignalMask
+sigCHLD
+sigINT
+unionFileModes
+
+"""]] diff --git a/doc/install/openSUSE.mdwn b/doc/install/openSUSE.mdwn new file mode 100644 index 0000000000..0383cbbf2a --- /dev/null +++ b/doc/install/openSUSE.mdwn @@ -0,0 +1,4 @@ +Unfortunately there is currently no git-annex rpm available for openSUSE; however it is possible to build it via cabal or from source as described on the [[install]] page. Fulfilling the dependencies listed on that page should not be a problem, except for obtaining a suitable version of the Haskell library. + +The last [official release of Haskell for openSUSE](https://build.opensuse.org/project/show?project=devel:languages:haskell) is quite old, and may not satisfy the dependencies needed by git-annex. Fortunately [searching the openSUSE build service](http://software.opensuse.org/search?q=cabal&baseproject=openSUSE%3A11.4&lang=en&include_home=true&exclude_debug=true) reveals that Peter Trommler has built a [newer Haskell suite](https://build.opensuse.org/project/show?project=home%3Aptrommler%3Adevel%3Alanguages%3Ahaskell) based on ghc 7.2. +To install this, simply click on the relevant "1-Click Install" link in the openSUSE build service search results. diff --git a/doc/internals.mdwn b/doc/internals.mdwn new file mode 100644 index 0000000000..b2fd1e5545 --- /dev/null +++ b/doc/internals.mdwn @@ -0,0 +1,87 @@ +In the world of git, we're not scared about internal implementation +details, and sometimes we like to dive in and tweak things by hand. Here's +some documentation to that end. + +## `.git/annex/objects/aa/bb/*/*` + +This is where locally available file contents are actually stored. +Files added to the annex get a symlink checked into git that points +to the file content. + +First there are two levels of directories used for hashing, to prevent +too many things ending up in any one directory. + +Each subdirectory has the name of a key in one of the +[[key-value_backends|backends]]. The file inside also has the name of the key. +This two-level structure is used because it allows the write bit to be removed +from the subdirectories as well as from the files. That prevents accidentially +deleting or changing the file contents. + +## The git-annex branch + +This branch is managed by git-annex, with the contents listed below. + +The file `.git/annex/index` is a separate git index file it uses +to accumulate changes for the git-annex branch. +Also, `.git/annex/journal/` is used to record changes before they +are added to git. + +### `uuid.log` + +Records the UUIDs of known repositories, and associates them with a +description of the repository. This allows git-annex to display something +more useful than a UUID when it refers to a repository that does not have +a configured git remote pointing at it. + +The file format is simply one line per repository, with the uuid followed by a +space and then the description, followed by a timestamp. Example: + + e605dca6-446a-11e0-8b2a-002170d25c55 laptop timestamp=1317929189.157237s + 26339d22-446b-11e0-9101-002170d25c55 usb disk timestamp=1317929330.769997s + +## `remotes.log` + +Holds persistent configuration settings for [[special_remotes]] such as +Amazon S3. + +The file format is one line per remote, starting with the uuid of the +remote, followed by a space, and then a series of key=value pairs, +each separated by whitespace, and finally a timestamp. + +## `trust.log` + +Records the [[trust]] information for repositories. Does not exist unless +[[trust]] values are configured. + +The file format is one line per repository, with the uuid followed by a +space, and then either `1` (trusted), `0` (untrusted), `?` (semi-trusted), +`X` (dead) and finally a timestamp. + +Example: + + e605dca6-446a-11e0-8b2a-002170d25c55 1 timestamp=1317929189.157237s + 26339d22-446b-11e0-9101-002170d25c55 ? timestamp=1317929330.769997s + +Repositories not listed are semi-trusted. + +## `aaa/bbb/*.log` + +These log files record [[location_tracking]] information +for file contents. Again these are placed in two levels of subdirectories +for hashing. The name of the key is the filename, and the content +consists of a timestamp, either 1 (present) or 0 (not present), and +the UUID of the repository that has or lacks the file content. + +Example: + + 1287290776.765152s 1 e605dca6-446a-11e0-8b2a-002170d25c55 + 1287290767.478634s 0 26339d22-446b-11e0-9101-002170d25c55 + +These files are designed to be auto-merged using git's [[union merge driver|git-union-merge]]. +The timestamps allow the most recent information to be identified. + +## `remote/web/aaa/bbb/*.log` + +These log files record urls used by the +[[web_special_remote|special_remotes/web]]. Their format is similar +to the location tracking files, but with urls rather than UUIDs. diff --git a/doc/location_tracking.mdwn b/doc/location_tracking.mdwn new file mode 100644 index 0000000000..d40a7206fd --- /dev/null +++ b/doc/location_tracking.mdwn @@ -0,0 +1,30 @@ +git-annex keeps track of in which repositories it last saw a file's content. +This location tracking information is stored in the git-annex branch. +Repositories record their UUID and the date when they get or drop +a file's content. + +This location tracking information is useful if you have multiple +repositories, and not all are always accessible. For example, perhaps one +is on a home file server, and you are away from home. Then git-annex can +tell you what git remote it needs access to in order to get a file: + + # git annex get myfile + get myfile (not available) + I was unable to access these remotes: home + +Another way the location tracking comes in handy is if you put repositories +on removable USB drives, that might be archived away offline in a safe +place. In this sort of case, you probably don't have a git remotes +configured for every USB drive. So git-annex may have to resort to talking +about repository UUIDs. If you have previously used "git annex init" +to attach descriptions to those repositories, it will include their +descriptions to help you with finding them: + + # git annex get myfile + get myfile (not available) + Try making some of these repositories available: + c0a28e06-d7ef-11df-885c-775af44f8882 -- USB archive drive 1 + e1938fee-d95b-11df-96cc-002170d25c55 + +In certain cases you may want to configure git-annex to [[trust]] +that location tracking information is always correct for a repository. diff --git a/doc/logo.png b/doc/logo.png new file mode 100644 index 0000000000..38d335a451 Binary files /dev/null and b/doc/logo.png differ diff --git a/doc/logo_small.png b/doc/logo_small.png new file mode 100644 index 0000000000..867fecf86c Binary files /dev/null and b/doc/logo_small.png differ diff --git a/doc/meta.mdwn b/doc/meta.mdwn new file mode 100644 index 0000000000..5ee36f8c0b --- /dev/null +++ b/doc/meta.mdwn @@ -0,0 +1,5 @@ +This wiki contains [[!pagecount pages="*"]] pages. + +Broken links: + +[[!brokenlinks ]] diff --git a/doc/news.mdwn b/doc/news.mdwn new file mode 100644 index 0000000000..3d9ece3bbb --- /dev/null +++ b/doc/news.mdwn @@ -0,0 +1,11 @@ +[[!if test="news/*" then=""" +This is where announcements of new releases, features, and other news is +posted. git-annex users are recommended to subscribe to this page's RSS +feed. + +[[!inline pages="./news/* and !*/Discussion" rootpage="news" show="30"]] + +""" +else=""" +(Please see the changelog.) +"""]] diff --git a/doc/news/LWN_article.mdwn b/doc/news/LWN_article.mdwn new file mode 100644 index 0000000000..c1c0c40472 --- /dev/null +++ b/doc/news/LWN_article.mdwn @@ -0,0 +1,2 @@ +[Linux Weekly News](http://lwn.net/) has a nice +[article on git-annex](http://lwn.net/Articles/418337/) in it this week. diff --git a/doc/news/sharebox_a_FUSE_filesystem_for_git-annex.mdwn b/doc/news/sharebox_a_FUSE_filesystem_for_git-annex.mdwn new file mode 100644 index 0000000000..7386841b2b --- /dev/null +++ b/doc/news/sharebox_a_FUSE_filesystem_for_git-annex.mdwn @@ -0,0 +1,19 @@ +[[!meta title="sharebox: a FUSE filesystem for git-annex"]] + +Christophe-Marie Duquesne has just announced +[Sharebox](https://github.com/chmduquesne/sharebox), a FUSE filesystem +relying on git-annex: + +
+
+What are your goals?  
+Seamless synchronization "à la dropbox".  
+Ability to use with big binary files such as mp3/movies.  
+Entirely decentralized.  
+Don't use unnecessary space  
+Keep it simple: avoid special VCS commands and keep a filesystem  
+interface as much as possible.
+
+
+ +While still alpha, this is promising. --[[Joey]] diff --git a/doc/news/version_3.20111107.mdwn b/doc/news/version_3.20111107.mdwn new file mode 100644 index 0000000000..17431bf219 --- /dev/null +++ b/doc/news/version_3.20111107.mdwn @@ -0,0 +1,8 @@ +git-annex 3.20111107 released with [[!toggle text="these changes"]] +[[!toggleable text=""" + * merge: Use fast-forward merges when possible. + Thanks Valentin Haenel for a test case showing how non-fast-forward + merges could result in an ongoing pull/merge/push cycle. + * Don't try to read config from repos with annex-ignore set. + * Bugfix: In the past two releases, git-annex init has written the uuid.log + in the wrong format, with the UUID and description flipped."""]] \ No newline at end of file diff --git a/doc/news/version_3.20111111.mdwn b/doc/news/version_3.20111111.mdwn new file mode 100644 index 0000000000..2173400154 --- /dev/null +++ b/doc/news/version_3.20111111.mdwn @@ -0,0 +1,10 @@ +git-annex 3.20111111 released with [[!toggle text="these changes"]] +[[!toggleable text=""" + * Handle a case where an annexed file is moved into a gitignored directory, + by having fix --force add its change. + * Avoid cyclic drop problems. + * Optimized copy --from and get --from to avoid checking the location log + for files that are already present. + * Automatically fix up badly formatted uuid.log entries produced by + 3.20111105, whenever the uuid.log is changed (ie, by init or describe). + * map: Support remotes with /~/ and /~user/"""]] \ No newline at end of file diff --git a/doc/news/version_3.20111122.mdwn b/doc/news/version_3.20111122.mdwn new file mode 100644 index 0000000000..193394de98 --- /dev/null +++ b/doc/news/version_3.20111122.mdwn @@ -0,0 +1,22 @@ +git-annex 3.20111122 released with [[!toggle text="these changes"]] +[[!toggleable text=""" + * merge: Improve commit messages to mention what was merged. + * Avoid doing auto-merging in commands that don't need fully current + information from the git-annex branch. In particular, git annex add + no longer needs to auto-merge. + * init: When run in an already initalized repository, and without + a description specified, don't delete the old description. + * Optimised union merging; now only runs git cat-file once, and runs + in constant space. + * status: Now displays trusted, untrusted, and semitrusted repositories + separately. + * status: Include all special remotes in the list of repositories. + * status: Fix --json mode. + * status: --fast is back + * Fix support for insteadOf url remapping. Closes: #[644278](http://bugs.debian.org/644278) + * When not run in a git repository, git-annex can still display a usage + message, and "git annex version" even works. + * migrate: Don't fall over a stale temp file. + * Avoid excessive escaping for rsync special remotes that are not accessed + over ssh. + * find: Support --print0"""]] \ No newline at end of file diff --git a/doc/news/version_3.20111203.mdwn b/doc/news/version_3.20111203.mdwn new file mode 100644 index 0000000000..5be6e21424 --- /dev/null +++ b/doc/news/version_3.20111203.mdwn @@ -0,0 +1,19 @@ +git-annex 3.20111203 released with [[!toggle text="these changes"]] +[[!toggleable text=""" + * The VFAT filesystem on recent versions of Linux, when mounted with + shortname=mixed, does not get along well with git-annex's mixed case + .git/annex/objects hash directories. To avoid this problem, new content + is now stored in all-lowercase hash directories. Except for non-bare + repositories which would be a pain to transition and cannot be put on FAT. + (Old mixed-case hash directories are still tried for backwards + compatibility.) + * Flush json output, avoiding a buffering problem that could result in + doubled output. + * Avoid needing haskell98 and other fixes for new ghc. Thanks, Mark Wright. + * Bugfix: dropunused did not drop keys with two spaces in their name. + * Support for storing .git/annex on a different device than the rest of the + git repository. + * --inbackend can be used to make git-annex only operate on files + whose content is stored using a specified key-value backend. + * dead: A command which says that a repository is gone for good + and you don't want git-annex to mention it again."""]] \ No newline at end of file diff --git a/doc/news/version_3.20111211.mdwn b/doc/news/version_3.20111211.mdwn new file mode 100644 index 0000000000..5d2c57e455 --- /dev/null +++ b/doc/news/version_3.20111211.mdwn @@ -0,0 +1,20 @@ +git-annex 3.20111211 released with [[!toggle text="these changes"]] +[[!toggleable text=""" + * Fix bug in last version in getting contents from bare repositories. + * Ensure that git-annex branch changes are merged into git-annex's index, + which fixes a bug that could cause changes that were pushed to the + git-annex branch to get reverted. As a side effect, it's now safe + for users to check out and commit changes directly to the git-annex + branch. + * map: Fix a failure to detect a loop when both repositories are local + and refer to each other with relative paths. + * Prevent key names from containing newlines. + * add: If interrupted, add can leave files converted to symlinks but not + yet added to git. Running the add again will now clean up this situtation. + * Fix caching of decrypted ciphers, which failed when drop had to check + multiple different encrypted special remotes. + * unannex: Can be run on files that have been added to the annex, but not + yet committed. + * sync: New command that synchronises the local repository and default + remote, by running git commit, pull, and push for you. + * Version monad-control dependency in cabal file."""]] \ No newline at end of file diff --git a/doc/not.mdwn b/doc/not.mdwn new file mode 100644 index 0000000000..ad278da0dd --- /dev/null +++ b/doc/not.mdwn @@ -0,0 +1,50 @@ +[[!meta title="what git-annex is not"]] + +* git-annex is not a backup system. It may be a useful component of an + [[archival|use_case/bob]] system, or a way to deliver files to a backup + system. For a backup system that uses git and that git-annex supports + storing data in, see [[special_remotes/bup]]. + +* git-annex is not a filesystem or DropBox clone. But there + is a FUSE filesystem built on top of git-annex, called + [ShareBox](https://github.com/chmduquesne/sharebox), and there is + interest in making it easy to use and covering some of the use + cases supported by DropBox. + +* git-annex is not unison, but if you're finding unison's checksumming + too slow, or its strict mirroring of everything to both places too + limiting, then git-annex could be a useful alternative. + +* git-annex is more than just a workaround for git limitations that might + eventually be fixed by efforts like + [git-bigfiles](http://caca.zoy.org/wiki/git-bigfiles). + +* git-annex is not some flaky script that was quickly thrown together. + I wrote it in Haskell because I wanted it to be solid and to compile + down to a binary. And it has a fairly extensive test suite. (Don't be + fooled by "make test" only showing a few dozen test cases; each test + involves checking dozens to hundreds of assertions.) + +* git-annex is not [git-media](https://github.com/schacon/git-media), + although they both approach the same problem from a similar direction. + I only learned of git-media after writing git-annex, but I probably + would have still written git-annex instead of using it. Currently, + git-media has the advantage of using git smudge filters rather than + git-annex's pile of symlinks, and it may be a tighter fit for certain + situations. It lacks git-annex's support for widely distributed storage, + using only a single backend data store. It also does not support + partial checkouts of file contents, like git-annex does. + +* git-annex is also not [boar](http://code.google.com/p/boar/), + although it shares many of its goals and characteristics. Boar implements + its own version control system, rather than simply embracing and + extending git. And while boar supports distributed clones of a repository, + it does not support keeping different files in different clones of the + same repository, which git-annex does, and is an important feature for + large-scale archiving. + +* git-annex is not the [Mercurial largefiles extension](http://mercurial.selenic.com/wiki/LargefilesExtension). + Although mercurial and git have some of the same problems around large + files, and both try to solve them in similar ways (standin files using + mostly hashes of the real content). + diff --git a/doc/repomap.png b/doc/repomap.png new file mode 100644 index 0000000000..dcd777e125 Binary files /dev/null and b/doc/repomap.png differ diff --git a/doc/special_remotes.mdwn b/doc/special_remotes.mdwn new file mode 100644 index 0000000000..ddb2fd125d --- /dev/null +++ b/doc/special_remotes.mdwn @@ -0,0 +1,33 @@ +Most [[backends]] can transfer data to and from configured git remotes. +Normally those remotes are normal git repositories (bare and non-bare; +local and remote), that store the file contents in their own git annex +directory. + +But, git-annex also extends git's concept of remotes, with these special +types of remotes. These can be used just like any normal remote by git-annex. +They cannot be used by other git commands though. + +* [[S3]] (Amazon S3, and other compatible services) +* [[bup]] +* [[directory]] +* [[rsync]] +* [[web]] +* [[hook]] +* [[tahoe-lafs|forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs]] - limited testing + +## Unused content on special remotes + +Over time, special remotes can accumulate file content that is no longer +referred to by files in git. Normally, unused content in the current +repository is found by running `git annex unused`. To detect unused content +on special remotes, instead use `git annex unused --from`. Example: + + $ git annex unused --from mys3 + unused mys3 (checking for unused data...) + Some annexed data on mys3 is not used by any files in this repository. + NUMBER KEY + 1 WORM-s3-m1301674316--foo + (To see where data was previously used, try: git log --stat -S'KEY') + (To remove unwanted data: git-annex dropunused --from mys3 NUMBER) + $ git annex dropunused --from mys3 1 + dropunused 12948 (from mys3...) ok diff --git a/doc/special_remotes/S3.mdwn b/doc/special_remotes/S3.mdwn new file mode 100644 index 0000000000..d4d3d02388 --- /dev/null +++ b/doc/special_remotes/S3.mdwn @@ -0,0 +1,40 @@ +This special remote type stores file contents in a bucket in Amazon S3 +or a similar service. + +See [[tips/using_Amazon_S3]] and +[[tips/Internet_Archive_via_S3]] for usage examples. + +## configuration + +The standard environment variables `ANNEX_S3_ACCESS_KEY_ID` and +`ANNEX_S3_SECRET_ACCESS_KEY` are used to supply login credentials +for Amazon. When encryption is enabled, they are stored in encrypted form +by `git annex initremote`, so you do not need to keep the environment +variables set after the initial initalization of the remote. + +A number of parameters can be passed to `git annex initremote` to configure +the S3 remote. + +* `encryption` - Required. Either "none" to disable encryption + (not recommended), + or a value that can be looked up (using gpg -k) to find a gpg encryption + key that will be given access to the remote. Note that additional gpg + keys can be given access to a remote by rerunning initremote with + the new key id. See [[encryption]]. + +* `datacenter` - Defaults to "US". Other values include "EU", + "us-west-1", and "ap-southeast-1". + +* `storageclass` - Default is "STANDARD". If you have configured git-annex + to preserve multiple [[copies]], consider setting this to "REDUCED_REDUNDANCY" + to save money. + +* `host` and `port` - Specify in order to use a different, S3 compatable + service. + +* `bucket` - S3 requires that buckets have a globally unique name, + so by default, a bucket name is chosen based on the remote name + and UUID. This can be specified to pick a bucket name. + +* `x-amz-*` are passed through as http headers when storing keys + in S3. diff --git a/doc/special_remotes/bup.mdwn b/doc/special_remotes/bup.mdwn new file mode 100644 index 0000000000..e59ff240de --- /dev/null +++ b/doc/special_remotes/bup.mdwn @@ -0,0 +1,41 @@ +This special remote type stores file contents in a +[bup](http://github.com/apenwarr/bup) repository. By using git-annex +in the front-end, and bup as a remote, you get an easy git-style +interface to large files, and easy backups of the file contents using git. + +This is particularly well suited to collaboration on projects involving +large files, since both the git-annex and bup repositories can be +accessed like any other git repository. + +See [[walkthrough/using_bup]] for usage examples. + +Each individual key is stored in a bup remote using `bup split`, with +a git branch named the same as the key name. Content is retrieved from +bup using `bup join`. All other bup operations are up to you -- consider +running `bup fsck --generate` in a cron job to generate recovery blocks, +for example; or clone bup's git repository to further back it up. + +## configuration + +These parameters can be passed to `git annex initremote` to configure bup: + +* `encryption` - Required. Either "none" to disable encryption of content + stored in bup (ssh will still be used to transport it securely), + or a value that can be looked up (using gpg -k) to find a gpg encryption + key that will be given access to the remote. Note that additional gpg + keys can be given access to a remote by rerunning initremote with + the new key id. See [[encryption]]. + +* `buprepo` - Required. This is passed to `bup` as the `--remote` + to use to store data. To create the repository,`bup init` will be run. + Example: "buprepo=example.com:/big/mybup" or "buprepo=/big/mybup" + (To use the default `~/.bup` repository on the local host, specify "buprepo=") + +Options to pass to `bup split` when sending content to bup can also +be specified, by using `git config annex.bup-split-options`. This +can be used to, for example, limit its bandwidth. + +## notes + +[[git-annex-shell]] does not support bup, due to the wacky way that bup +starts its server. So, to use bup, you need full shell access to the server. diff --git a/doc/special_remotes/directory.mdwn b/doc/special_remotes/directory.mdwn new file mode 100644 index 0000000000..0a38c763cc --- /dev/null +++ b/doc/special_remotes/directory.mdwn @@ -0,0 +1,11 @@ +This special remote type stores file contents in directory. + +One use case for this would be if you have a removable drive that +you want to use it to sneakernet files between systems (possibly with +[[encrypted|encryption]] contents). Just set up both systems to use +the drive's mountpoint as a directory remote. + +Setup example: + + # git annex initremote usbdrive type=directory directory=/media/usbdrive/ encryption=none + # git annex describe usbdrive "usb drive on /media/usbdrive/" diff --git a/doc/special_remotes/hook.mdwn b/doc/special_remotes/hook.mdwn new file mode 100644 index 0000000000..9a7dbf7a19 --- /dev/null +++ b/doc/special_remotes/hook.mdwn @@ -0,0 +1,68 @@ +This special remote type lets you store content in a remote of your own +devising. + +It's not recommended to use this remote type when another like [[rsync]] +or [[directory]] will do. If your hooks are not carefully written, data +could be lost. + +## example + +Here's a simple example that stores content on clay tablets. If you +implement this example in the real world, I'd appreciate a tour +next Apert! :) --[[Joey]] + + # git config annex.cuneiform-store-hook 'tocuneiform < "$ANNEX_FILE" | tablet-writer --implement=stylus --title="$ANNEX_KEY" | tablet-proofreader | librarian --shelve --floor=$ANNEX_HASH_1 --shelf=$ANNEX_HASH_2' + # git config annex.cuneiform-retrieve-hook 'librarian --get --floor=$ANNEX_HASH_1 --shelf=$ANNEX_HASH_2 --title="$ANNEX_KEY" | tablet-reader --implement=coffee --implement=glasses --force-monastic-dedication | fromcuneiform > "$ANNEX_FILE"' + # git config annex.cuneiform-remove-hook 'librarian --get --floor=$ANNEX_HASH_1 --shelf=$ANNEX_HASH_2 --title="$ANNEX_KEY" | goon --hit-with-hammer' + # git config annex.cuneiform-checkpresent-hook 'librarian --find --force-distrust-catalog --floor=$ANNEX_HASH_1 --shelf=$ANNEX_HASH_2 --title="$ANNEX_KEY" --shout-title' + # git annex initremote library type=hook hooktype=cuneiform encryption=none + # git annex describe library "the reborn Library of Alexandria (upgrade to bronze plates pending)" + +Can you spot the potential data loss bugs in the above simple example? +(Hint: What happens when the `tablet-proofreader` exits nonzero?) + +## configuration + +These parameters can be passed to `git annex initremote`: + +* `encryption` - Required. Either "none" to disable encryption of content, + or a value that can be looked up (using gpg -k) to find a gpg encryption + key that will be given access to the remote. Note that additional gpg + keys can be given access to a remote by rerunning initremote with + the new key id. See [[encryption]]. + +* `hooktype` - Required. This specifies a collection of hooks to use for + this remote. + +## hooks + +Each type of hook remote is specified by a collection of hook commands. +Each hook command is run as a shell command line, and should return nonzero +on failure, and zero on success. + +These environment variables are used to communicate with the hook commands: + +* `ANNEX_KEY` - name of a key to store, retrieve, remove, or check. +* `ANNEX_FILE` - a file containing the key's content +* `ANNEX_HASH_1` - short stable value, based on the key, can be used for hashing + into 1024 buckets. +* `ANNEX_HASH_2` - another hash value, can be used for a second level of hashing + +The setting to use in git config for the hook commands are as follows: + +* `annex.$hooktype-store-hook` - Command run to store a key in the special remote. + `ANNEX_FILE` contains the content to be stored. + +* `annex.$hooktype-retrieve-hook` - Command run to retrieve a key from the special remote. + `ANNEX_FILE` is a file that the retrieved content should be written to. + The file may already exist with a partial + copy of the content (or possibly just garbage), to allow for resuming + of partial transfers. + +* `annex.$hooktype-remove-hook` - Command to remove a key from the special remote. + +* `annex.$hooktype-checkpresent-hook` - Command to check if a key is present + in the special remote. Should output the key name to stdout, on its own line, + if and only if the key has been actively verified to be present in the + special remote (caching presence information is a very bad idea); + all other output to stdout will be ignored. diff --git a/doc/special_remotes/rsync.mdwn b/doc/special_remotes/rsync.mdwn new file mode 100644 index 0000000000..90d544a1e1 --- /dev/null +++ b/doc/special_remotes/rsync.mdwn @@ -0,0 +1,28 @@ +This special remote type rsyncs file contents to somewhere else. + +Setup example: + + # git annex initremote myrsync type=rsync rsyncurl=rsync://rsync.example.com/myrsync encryption=joey@kitenet.net + # git annex describe myrsync "rsync server" + +Or for using rsync over SSH + + # git annex initremote myrsync type=rsync rsyncurl=ssh.example.com:/myrsync encryption=joey@kitenet.net + # git annex describe myrsync "rsync server" + +## configuration + +These parameters can be passed to `git annex initremote` to configure rsync: + +* `encryption` - Required. Either "none" to disable encryption of content + stored in rsync, + or a value that can be looked up (using gpg -k) to find a gpg encryption + key that will be given access to the remote. Note that additional gpg + keys can be given access to a remote by rerunning initremote with + the new key id. See [[encryption]]. + +* `rsyncurl` - Required. This is the url or `hostname:/directory` to + pass to rsync to tell it where to store content. + +The `annex-rsync-options` git configuration setting can be used to pass +parameters to rsync. diff --git a/doc/special_remotes/web.mdwn b/doc/special_remotes/web.mdwn new file mode 100644 index 0000000000..cd20a93bb1 --- /dev/null +++ b/doc/special_remotes/web.mdwn @@ -0,0 +1,11 @@ +git-annex can use the WWW as a special remote, downloading urls to files. +See [[tips/using_the_web_as_a_special_remote]] for usage examples. + +## notes + +Currently git-annex only supports downloading content from the web; +it cannot upload to it or remove content. + +This special remote uses arbitrary urls on the web as the source for content. +git-annex can also download content from a normal git remote, accessible by +http. diff --git a/doc/summary.mdwn b/doc/summary.mdwn new file mode 100644 index 0000000000..458eaab56d --- /dev/null +++ b/doc/summary.mdwn @@ -0,0 +1,12 @@ +git-annex allows managing files with git, without checking the file +contents into git. While that may seem paradoxical, it is useful when +dealing with files larger than git can currently easily handle, whether due +to limitations in memory, checksumming time, or disk space. + +Even without file content tracking, being able to manage files with git, +move files around and delete files with versioned directory trees, and use +branches and distributed clones, are all very handy reasons to use git. And +annexed files can co-exist in the same git repository with regularly +versioned files, which is convenient for maintaining documents, Makefiles, +etc that are associated with annexed files but that benefit from full +revision control. diff --git a/doc/templates/bare.tmpl b/doc/templates/bare.tmpl new file mode 100644 index 0000000000..2d476b716f --- /dev/null +++ b/doc/templates/bare.tmpl @@ -0,0 +1 @@ + diff --git a/doc/templates/walkthrough.tmpl b/doc/templates/walkthrough.tmpl new file mode 100644 index 0000000000..a500a5a865 --- /dev/null +++ b/doc/templates/walkthrough.tmpl @@ -0,0 +1,2 @@ +

+ diff --git a/doc/tips.mdwn b/doc/tips.mdwn new file mode 100644 index 0000000000..eda84c8672 --- /dev/null +++ b/doc/tips.mdwn @@ -0,0 +1,4 @@ +This page is a place to document tips and techniques for using git-annex. + +[[!inline pages="tips/* and !tips/*/*" archive="yes" +rootpage="tips" postformtext="Add a new tip about:" show=0]] diff --git a/doc/tips/Internet_Archive_via_S3.mdwn b/doc/tips/Internet_Archive_via_S3.mdwn new file mode 100644 index 0000000000..8c0f2dde74 --- /dev/null +++ b/doc/tips/Internet_Archive_via_S3.mdwn @@ -0,0 +1,49 @@ +[The Internet Archive](http://www.archive.org/) allows members to upload +collections using an Amazon S3 +[compatible API](http://www.archive.org/help/abouts3.txt), and this can +be used with git-annex's [[special_remotes/S3]] support. + +So, you can locally archive things with git-annex, define remotes that +correspond to "items" at the Internet Archive, and use git-annex to upload +your files to there. Of course, your use of the Internet Archive must +comply with their [terms of service](http://www.archive.org/about/terms.php). + +Sign up for an account, and get your access keys here: + + + # export AWS_ACCESS_KEY_ID=blahblah + # export AWS_SECRET_ACCESS_KEY=xxxxxxx + +Specify `host=s3.us.archive.org` when doing `initremote` to set up +a remote at the Archive. This will enable a special Internet Archive mode: +Encryption is not allowed; you are required to specify a bucket name +rather than having git-annex pick a random one; and you can optionally +specify `x-archive-meta*` headers to add metadata as explained in their +[documentation](http://www.archive.org/help/abouts3.txt). + +[[!template id=note text=""" +/!\ There seems to be a bug in either hS3 or the archive that breaks +authentication when the bucket name contains spaces or upper-case letters.. +use all lowercase and no spaces when making the bucket with `initremote`. +"""]] + + # git annex initremote archive-panama type=S3 \ + host=s3.us.archive.org bucket=panama-canal-lock-blueprints \ + x-archive-meta-mediatype=texts x-archive-meta-language=eng \ + x-archive-meta-title="original Panama Canal lock design blueprints" + initremote archive-panama (Internet Archive mode) ok + # git annex describe archive-panama "a man, a plan, a canal: panama" + describe archive-panama ok + +Then you can annex files and copy them to the remote as usual: + + # git annex add photo1.jpeg --backend=SHA1E + add photo1.jpeg (checksum...) ok + # git annex copy photo1.jpeg --fast --to archive-panama + copy (to archive-panama...) ok + +Note the use of the SHA1E [[backend|backends]]. It makes most sense +to use the WORM or SHA1E backend for files that will be stored in +the Internet Archive, since the key name will be exposed as the filename +there, and since the Archive does special processing of files based on +their extension. diff --git a/doc/tips/automatically_getting_files_on_checkout.mdwn b/doc/tips/automatically_getting_files_on_checkout.mdwn new file mode 100644 index 0000000000..bbb3b302eb --- /dev/null +++ b/doc/tips/automatically_getting_files_on_checkout.mdwn @@ -0,0 +1,15 @@ +Normally git-annex does not retrieve file contents when checking out a +tree. In some use cases, it makes sense to always have the contents of +files available after a `git checkout` or `git update`. This can be +accomplished by installing the following as `.git/hooks/post-checkout` + + #!/bin/sh + # Uses git-annex to get all files in the specified directories + # (relative to the top of the repository) on checkout. + dirs=. + top="$(git rev-parse --show-toplevel)" + for dir in "$dirs"; do git annex get $top/$dir"; done + +By default, all files in the whole repository will be made available. The +`dirs` setting can be configured if you only want to get files in certian +directories. diff --git a/doc/tips/centralised_repository:_starting_from_nothing.mdwn b/doc/tips/centralised_repository:_starting_from_nothing.mdwn new file mode 100644 index 0000000000..899068485f --- /dev/null +++ b/doc/tips/centralised_repository:_starting_from_nothing.mdwn @@ -0,0 +1,67 @@ +If you are starting from nothing (no existing `git` or `git-annex` repository) and want to use a server as a centralised repository, try the following steps. + +On the server where you'll hold the "master" repository: + + server$ cd /one/git + server$ mkdir m + server$ cd m + server$ git init --bare + Initialized empty Git repository in /one/git/m/ + server$ git annex init origin + init origin ok + server$ + +Clone that to the laptop: + + laptop$ cd /other + laptop$ git clone ssh://server//one/git/m + Cloning into 'm'... + Warning: No xauth data; using fake authentication data for X11 forwarding. + remote: Counting objects: 5, done. + remote: Compressing objects: 100% (3/3), done. + remote: Total 5 (delta 0), reused 0 (delta 0) + Receiving objects: 100% (5/5), done. + warning: remote HEAD refers to nonexistent ref, unable to checkout. + + laptop$ cd m + laptop$ git annex init laptop + init laptop ok + laptop$ + +Add some content: + + laptop$ git annex addurl http://kitenet.net/~joey/screencasts/git-annex_coding_in_haskell.ogg + "kitenet.net_~joey_screencasts_git-annex_coding_in_haskell.ogg" + addurl kitenet.net_~joey_screencasts_git-annex_coding_in_haskell.ogg (downloading http://kitenet.net/~joey/screencasts/git-annex_coding_in_haskell.ogg ...) --2011-12-15 08:13:10-- http://kitenet.net/~joey/screencasts/git-annex_coding_in_haskell.ogg + Resolving kitenet.net (kitenet.net)... 2001:41c8:125:49::10, 80.68.85.49 + Connecting to kitenet.net (kitenet.net)|2001:41c8:125:49::10|:80... connected. + HTTP request sent, awaiting response... 200 OK + Length: 39362757 (38M) [audio/ogg] + Saving to: `/other/m/.git/annex/tmp/URL--http&c%%kitenet.net%~joey%screencasts%git-annex_coding_in_haskell.ogg' + + 100%[======================================>] 39,362,757 2.31M/s in 17s + + 2011-12-15 08:13:27 (2.21 MB/s) - `/other/m/.git/annex/tmp/URL--http&c%%kitenet.net%~joey%screencasts%git-annex_coding_in_haskell.ogg' saved [39362757/39362757] + + (checksum...) ok + (Recording state in git...) + laptop$ git commit -m 'See Joey play.' + [master (root-commit) 106e923] See Joey play. + 1 files changed, 1 insertions(+), 0 deletions(-) + create mode 120000 kitenet.net_~joey_screencasts_git-annex_coding_in_haskell.ogg + laptop$ + +All fine, now push it back to the centralised master: + + laptop$ git push + Counting objects: 20, done. + Delta compression using up to 4 threads. + Compressing objects: 100% (11/11), done. + Writing objects: 100% (18/18), 1.50 KiB, done. + Total 18 (delta 1), reused 1 (delta 0) + To ssh://server//one/git/m + 3ba1386..ad3bc9e git-annex -> git-annex + laptop$ + +You can add more "client" repositories by following the `laptop` +sequence of operations. diff --git a/doc/tips/centralised_repository:_starting_from_nothing/comment_1_b0d22822017646775869ce1292e676f4._comment b/doc/tips/centralised_repository:_starting_from_nothing/comment_1_b0d22822017646775869ce1292e676f4._comment new file mode 100644 index 0000000000..22857af3e8 --- /dev/null +++ b/doc/tips/centralised_repository:_starting_from_nothing/comment_1_b0d22822017646775869ce1292e676f4._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-12-23T19:19:53Z" + content=""" +See also: [[centralized_git_repository_tutorial]] +"""]] diff --git a/doc/tips/centralized_git_repository_tutorial.mdwn b/doc/tips/centralized_git_repository_tutorial.mdwn new file mode 100644 index 0000000000..00283829fd --- /dev/null +++ b/doc/tips/centralized_git_repository_tutorial.mdwn @@ -0,0 +1,140 @@ +The [[walkthrough]] builds up a decentralized git repository setup, but +git-annex can also be used with a centralized bare repository, just like +git can. This tutorial shows how to set up a centralized repository hosted on +GitHub. + +## set up the repository, and make a checkout + +I've created a repository for technical talk videos, which you can +[fork on Github](https://github.com/joeyh/techtalks). +Or make your own repository on GitHub (or elsewhere) now. + +On your laptop, [[install]] git-annex, and clone the repository: + + # git clone git@github.com:joeyh/techtalks.git + # cd techtalks + +Tell git-annex to use the repository, and describe where this clone is +located: + + # git annex init 'my laptop' + init my laptop ok + +Let's tell git-annex that GitHub doesn't support running git-annex-shell there. +This means you can't store annexed file *contents* on GitHub; it would +really be better to host the bare repository on your own server, which +would not have this limitation. (If you want to do that, check out +[[using_gitolite_with_git-annex]].) + + # git config remote.origin.annex-ignore true + +## add files to the repository + +Add some files, obtained however. + + # youtube-dl -t 'http://www.youtube.com/watch?v=b9FagOVqxmI' + # git annex add *.mp4 + add Haskell_Amuse_Bouche-b9FagOVqxmI.mp4 (checksum) ok + (Recording state in git...) + # git commit -m "added a video. I have not watched it yet but it sounds interesting" + +This file is available directly from the web; so git-annex can download it: + + # git annex addurl http://kitenet.net/~joey/screencasts/git-annex_coding_in_haskell.ogg + addurl kitenet.net_~joey_screencasts_git-annex_coding_in_haskell.ogg + (downloading http://kitenet.net/~joey/screencasts/git-annex_coding_in_haskell.ogg ...) + (checksum...) ok + (Recording state in git...) + # git commit -a -m 'added a screencast I made' + +Feel free the rename the files, etc, using normal git commands: + + # git mv Haskell_Amuse_Bouche-b9FagOVqxmI.mp4 Haskell_Amuse_Bouche.mp4 + # git mv kitenet.net_~joey_screencasts_git-annex_coding_in_haskell.ogg git-annex_coding_in_haskell.ogg + # git commit -m 'better filenames' + +Now push your changes back to the central repository. This first time, +remember to push the git-annex branch, which is used to track the file +contents. + + # git push origin master git-annex + To git@github.com:joeyh/techtalks.git + * [new branch] master -> master + * [new branch] git-annex -> git-annex + +That push went fast, because it didn't upload large videos to GitHub. +To check this, you can ask git-annex where the contents of the videos are: + + # git annex whereis + whereis Haskell_Amuse_Bouche.mp4 (1 copy) + 767e8558-0955-11e1-be83-cbbeaab7fff8 -- here + ok + whereis git-annex_coding_in_haskell.ogg (2 copies) + 00000000-0000-0000-0000-000000000001 -- web + 767e8558-0955-11e1-be83-cbbeaab7fff8 -- here + ok + +## make more checkouts + +So far you have a central repository, and a checkout on a laptop. +Let's make another checkout that's used as a backup. You can put it anywhere +you like, just make it be somewhere your laptop can access. A few options: + +* Put it on a USB drive that you can plug into the laptop. +* Put it on a desktop. +* Put it on some server in the local network. +* Put it on a remote VPS. + +I'll use the VPS option, but these instructions should work for +any of the above. + + # ssh server + server# sudo apt-get install git-annex + +Clone the central repository as before. (If the clone fails, you need +to add your server's ssh public key to github -- see +[this page](http://help.github.com/ssh-issues/).) + + server# git clone git@github.com:joeyh/techtalks.git + server# cd techtalks + server# git config remote.origin.annex-ignore true + server# git annex init 'backup' + init backup (merging origin/git-annex into git-annex...) ok + +Notice that the server does not have the contents of any of the files yet. +If you run `ls`, you'll see broken symlinks. We want to populate this +backup with the file contents, by copying them from your laptop. + +Back on your laptop, you need to configure a git remote for the backup. +Adjust the ssh url as needed to point to wherever the backup is. (If it +was on a local USB drive, you'd use the path to the repository instead.) + + # git remote add backup ssh://server/~/techtalks + +Now git-annex on your laptop knows how to reach the backup repository, +and can do things like copy files to it: + + # git annex copy --to backup git-annex_coding_in_haskell.ogg + copy git-annex_coding_in_haskell.ogg (checking backup...) + 12877824 2% 255.11kB/s 00:00 + ok + +You can also `git annex move` files to it, to free up space on your laptop. +And then you can `git annex get` files back to your laptop later on, as +desired. + +After you use git-annex to move files around, remember to push, +which will broadcast its updated location information. + + # git push + +## take it farther + +Of course you can create as many checkouts as you desire. If you have a +desktop machine too, you can make a checkout there, and use `git remote +add` to also let your desktop access the backup repository. + +You can add remotes for each direct connection between machines you find you +need -- so make the laptop have the desktop as a remote, and the desktop +have the laptop as a remote, and then on either machine git-annex can +access files stored on the other. diff --git a/doc/tips/finding_duplicate_files.mdwn b/doc/tips/finding_duplicate_files.mdwn new file mode 100644 index 0000000000..94fc85400e --- /dev/null +++ b/doc/tips/finding_duplicate_files.mdwn @@ -0,0 +1,21 @@ +Maybe you had a lot of files scattered around on different drives, and you +added them all into a single git-annex repository. Some of the files are +surely duplicates of others. + +While git-annex stores the file contents efficiently, it would still +help in cleaning up this mess if you could find, and perhaps remove +the duplicate files. + +Here's a command line that will show duplicate sets of files grouped together: + + git annex find --include '*' --format='${file} ${escaped_key}\n' | \ + sort -k2 | uniq --all-repeated=separate -f1 | \ + sed 's/ [^ ]*$//' + +Here's a command line that will remove one of each duplicate set of files: + + git annex find --include '*' --format='${file} ${escaped_key}\n' | \ + sort -k2 | uniq --repeated -f1 | sed 's/ [^ ]*$//' | \ + xargs -d '\n' git rm + +--[[Joey]] diff --git a/doc/tips/finding_duplicate_files/comment_1_ddb477ca242ffeb21e0df394d8fdf5d2._comment b/doc/tips/finding_duplicate_files/comment_1_ddb477ca242ffeb21e0df394d8fdf5d2._comment new file mode 100644 index 0000000000..d1bd4475e5 --- /dev/null +++ b/doc/tips/finding_duplicate_files/comment_1_ddb477ca242ffeb21e0df394d8fdf5d2._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://adamspiers.myopenid.com/" + nickname="Adam" + subject="Cool" + date="2011-12-23T19:16:50Z" + content=""" +Very nice :) Just for reference, here's [my Perl implementation](https://github.com/aspiers/git-config/blob/master/bin/git-annex-finddups). As per [this discussion](http://git-annex.branchable.com/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/#comment-fb15d5829a52cd05bcbd5dc53edaffb2) it would be interesting to benchmark these two approaches and see if one is substantially more efficient than the other w.r.t. CPU and memory usage. +"""]] diff --git a/doc/tips/migrating_data_to_a_new_backend.mdwn b/doc/tips/migrating_data_to_a_new_backend.mdwn new file mode 100644 index 0000000000..b9acb8bd15 --- /dev/null +++ b/doc/tips/migrating_data_to_a_new_backend.mdwn @@ -0,0 +1,16 @@ +Maybe you started out using the WORM backend, and have now configured +git-annex to use SHA1. But files you added to the annex before still +use the WORM backend. There is a simple command that can migrate that +data: + + # git annex migrate my_cool_big_file + migrate my_cool_big_file (checksum...) ok + +You can only migrate files whose content is currently available. Other +files will be skipped. + +After migrating a file to a new backend, the old content in the old backend +will still be present. That is necessary because multiple files +can point to the same content. The `git annex unused` subcommand can be +used to clear up that detritus later. Note that hard links are used, +to avoid wasting disk space. diff --git a/doc/tips/powerful_file_matching.mdwn b/doc/tips/powerful_file_matching.mdwn new file mode 100644 index 0000000000..d5d29377c4 --- /dev/null +++ b/doc/tips/powerful_file_matching.mdwn @@ -0,0 +1,36 @@ +git-annex has a powerful syntax for making it act on only certian files. + +The simplest thing is to exclude some files, using wild cards: + + git annex get --exclude '*.mp3' --exclude '*.ogg' + +But you can also exclude files that git-annex's [[location_tracking]] +information indicates are present in a given repository. For example, +if you want to populate newarchive with files, but not those already +on oldarchive, you could do it like this: + + git annex copy --not --in oldarchive --to newarchive + +Without the --not, --in makes it act on files that *are* in the specified +repository. So, to remove files that are on oldarchive: + + git annex drop --in oldarchive + +Or maybe you're curious which files have a lot of copies, and then +also want to know which files have only one copy: + + git annex find --copies 7 + git annex find --not --copies 2 + +The above are the simple examples of specifying what files git-annex +should act on. But you can specify anything you can dream up by combining +the things above, with --and --or -( and -). Those last two strange-looking +options are parentheses, for grouping other options. You will probably +have to escape them from your shell. + +Here are the mp3 files that are in either of two repositories, but have +less than 3 copies: + + git annex find --not --exclude '*.mp3' --and \ + -\( --in usbdrive --or --in archive -\) --and \ + --not --copies 3 diff --git a/doc/tips/recover_data_from_lost+found.mdwn b/doc/tips/recover_data_from_lost+found.mdwn new file mode 100644 index 0000000000..48ef2a1d73 --- /dev/null +++ b/doc/tips/recover_data_from_lost+found.mdwn @@ -0,0 +1,19 @@ +Suppose something goes wrong, and fsck puts all the files in lost+found. +It's actually very easy to recover from this disaster. + +First, check out the git repository again. Then, in the new checkout: + + $ mkdir recovered-content + $ sudo mv ../lost+found/* recovered-content + $ sudo chown you:you recovered-content + $ chmod -R u+w recovered-content + $ git annex add recovered-content + $ git rm recovered-content + $ git commit -m "recovered some content" + $ git annex fsck + +The way that works is that when git-annex adds the same content that was in +the repository before, all the old links to that content start working +again. This works particularly well if the SHA* backends are used, but even +with the default backend it will work pretty well, as long as fsck +preserved the modification time of the files. diff --git a/doc/tips/untrusted_repositories.mdwn b/doc/tips/untrusted_repositories.mdwn new file mode 100644 index 0000000000..cdb5da7c3d --- /dev/null +++ b/doc/tips/untrusted_repositories.mdwn @@ -0,0 +1,28 @@ +Suppose you have a USB thumb drive and are using it as a git annex +repository. You don't trust the drive, because you could lose it, or +accidentally run it through the laundry. Or, maybe you have a drive that +you know is dying, and you'd like to be warned if there are any files +on it not backed up somewhere else. Maybe the drive has already died +or been lost. + +You can let git-annex know that you don't trust a repository, and it will +adjust its behavior to avoid relying on that repositories's continued +availability. + + # git annex untrust usbdrive + untrust usbdrive ok + +Now when you do a fsck, you'll be warned appropriately: + + # git annex fsck . + fsck my_big_file + Only these untrusted locations may have copies of this file! + 05e296c4-2989-11e0-bf40-bad1535567fe -- portable USB drive + Back it up to trusted locations with git-annex copy. + failed + +Also, git-annex will refuse to drop a file from elsewhere just because +it can see a copy on the untrusted repository. + +It's also possible to tell git-annex that you have an unusually high +level of trust for a repository. See [[trust]] for details. diff --git a/doc/tips/using_Amazon_S3.mdwn b/doc/tips/using_Amazon_S3.mdwn new file mode 100644 index 0000000000..b59ca9b4f8 --- /dev/null +++ b/doc/tips/using_Amazon_S3.mdwn @@ -0,0 +1,37 @@ +git-annex extends git's usual remotes with some [[special_remotes]], that +are not git repositories. This way you can set up a remote using say, +Amazon S3, and use git-annex to transfer files into the cloud. + +First, export your S3 credentials: + + # export ANNEX_S3_ACCESS_KEY_ID="08TJMT99S3511WOZEP91" + # export ANNEX_S3_SECRET_ACCESS_KEY="s3kr1t" + +Now, create a gpg key, if you don't already have one. This will be used +to encrypt everything stored in S3, for your privacy. Once you have +a gpg key, run `gpg --list-secret-keys` to look up its key id, something +like "2512E3C7" + +Next, create the S3 remote, and describe it. + + # git annex initremote cloud type=S3 encryption=2512E3C7 + initremote cloud (encryption setup with gpg key C910D9222512E3C7) (checking bucket) (creating bucket in US) (gpg) ok + # git annex describe cloud "at Amazon's US datacenter" + describe cloud ok + +The configuration for the S3 remote is stored in git. So to make another +repository use the same S3 remote is easy: + + # cd /media/usb/annex + # git pull laptop + # git annex initremote cloud + initremote cloud (gpg) (checking bucket) ok + +Now the remote can be used like any other remote. + + # git annex copy my_cool_big_file --to cloud + copy my_cool_big_file (gpg) (checking cloud...) (to cloud...) ok + # git annex move video/hackity_hack_and_kaxxt.mov --to cloud + move video/hackity_hack_and_kaxxt.mov (checking cloud...) (to cloud...) ok + +See [[special_remotes/S3]] for details. diff --git a/doc/tips/using_git_annex_with_no_fixed_hostname_and_optimising_ssh.mdwn b/doc/tips/using_git_annex_with_no_fixed_hostname_and_optimising_ssh.mdwn new file mode 100644 index 0000000000..8fb2bf9db1 --- /dev/null +++ b/doc/tips/using_git_annex_with_no_fixed_hostname_and_optimising_ssh.mdwn @@ -0,0 +1,72 @@ +## Intro + +This tip is based on my (Matt Ford) experience of using `git annex` with my out-and-about netbook which hits many different wifi networks and has no fixed home or address. + +I'm not using a bare repository that allows pushing (an alternative solution) nor do I fancy allowing `git push` to run against my desktop checked out repository (perhaps I worry over nothing?) + +None of this is really `git annex` specific but I think it is useful to know... + +## Dealing with no fixed hostname + +Essentially set up two repos as per the [[walkthrough]]. + +Desktop as follows: + + cd ~/annex + git init + git annex init "desktop" + +And the laptop like this + + git clone ssh://desktop/annex + git init + git annex init "laptop" + +Now we want to add the the repos as remotes of each other. + +For the laptop it is easy: + + git remote add desktop ssh://desktop/~/annex + +However for the desktop to add an ever changing laptops hostname it's a little tricky. We make use of remote SSH tunnels to do this. Essentially we have the laptop (which always knows it's own name and address and knows the address of the desktop) create a tunnel starting on an arbitrary port at the desktop and heads back to the laptop on it's own SSH server port (22). + +To do this make part of your laptop's SSH config look like this: + + Host desktop + User matt + HostName desktop.example.org + RemoteForward 2222 localhost:22 + +Now on the desktop to connect over the tunnel to the laptop's SSH port you need this: + + Host laptop + User matt + HostName localhost + port 2222 + +So to add the desktop's remote: + +a) From the laptop ensure the tunnel is up + + ssh desktop + +b) From the desktop add the remote + + git remote add laptop ssh://laptop/~/annex + +So now you can work on the train, pop on the wifi at work upon arrival, and sync up with a `git pull && git annex get`. + +An alternative solution may be to use direct tunnels over Openvpn. + +## Optimising SSH + +Running a `git annex get .`, at least in the version I have, creates a new SSH connection for every file transfer (maybe this should be a feature request?) + +Lot's of new small files in an _annex_ cause lot's of connections to be made quickly: this is an relatively expensive overhead and is enough for connection limiting to start in my case. The process can be made much faster by using SSH's connection sharing capabilities. An SSH config like this should do it: + + # Global Settings + ControlMaster auto + ControlPersist 30 + ControlPath ~/.ssh/master-%r@%h:%p + +This will create a master connection for sharing if one isn't present, maintain it for 30 seconds after closing down the connection (just-in-cases') and automatically use the master connection for subsequent connections. Wins all round! diff --git a/doc/tips/using_git_annex_with_no_fixed_hostname_and_optimising_ssh/comment_1_c0b7682a2b6f3078457b85683c825baf._comment b/doc/tips/using_git_annex_with_no_fixed_hostname_and_optimising_ssh/comment_1_c0b7682a2b6f3078457b85683c825baf._comment new file mode 100644 index 0000000000..e627ead47c --- /dev/null +++ b/doc/tips/using_git_annex_with_no_fixed_hostname_and_optimising_ssh/comment_1_c0b7682a2b6f3078457b85683c825baf._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://adamspiers.myopenid.com/" + nickname="Adam" + subject="comment 1" + date="2011-12-23T13:31:33Z" + content=""" +ControlPersist is awesome - thanks! + +Here's [an alternative, git-specific approach](http://thread.gmane.org/gmane.comp.version-control.home-dir/502). +"""]] diff --git a/doc/tips/using_gitolite_with_git-annex.mdwn b/doc/tips/using_gitolite_with_git-annex.mdwn new file mode 100644 index 0000000000..0d89a255be --- /dev/null +++ b/doc/tips/using_gitolite_with_git-annex.mdwn @@ -0,0 +1,89 @@ +[Gitolite](https://github.com/sitaramc/gitolite) is a git repository +manager. Here's how to add git-annex support to gitolite, so you can +`git annex copy` files to a gitolite repository, and `git annex get` +files from it. + +A nice feature of using gitolite with git-annex is that users can be given +read-only access to a repository, and this allows them to `git annex get` +file contents, but not change anything. + +First, you need new enough versions: + +* gitolite 2.2 is needed -- this version contains a git-annex-shell ADC + and supports "ua" ADCs. +* git-annex 3.20111016 or newer needs to be installed on the gitolite + server. Don't install an older version, it wouldn't be secure! + +And here's how to set it up. The examples are for gitolite as installed +on Debian with apt-get, but the changes described can be made to any +gitolite installation, just with different paths. + +Set `$GL_ADC_PATH` in `.gitolite.rc`, if you have not already done so. + +
+echo '$GL_ADC_PATH = "/usr/local/lib/gitolite/adc/;"' >>~gitolite/.gitolite.rc
+
+ +Make the ADC directory, and a "ua" subdirectory. + +
   
+mkdir -p /usr/local/lib/gitolite/adc/ua
+
+ +Install the git-annex-shell ADC into the "ua" subdirectory and make it +executable. + +
   
+cd /usr/local/lib/gitolite/adc/ua/
+wget https://raw.github.com/sitaramc/gitolite/pu/contrib/adc/git-annex-shell
+chmod +x git-annex-shell
+
+ +Now all gitolite repositories can be used with git-annex just as any +ssh remote normally would be used. For example: + +
+# git clone gitolite@localhost:testing
+Cloning into testing...
+Receiving objects: 100% (18/18), done.
+# cd testing
+# git annex init
+init  ok
+# cp /etc/passwd my-cool-big-file
+# git annex add my-cool-big-file
+add my-cool-big-file ok
+(Recording state in git...)
+# git commit -m added
+[master d36c8b4] added
+ 1 files changed, 1 insertions(+), 0 deletions(-)
+ create mode 120000 my-cool-big-file
+# git push --all
+Counting objects: 17, done.
+Delta compression using up to 2 threads.
+Compressing objects: 100% (12/12), done.
+Writing objects: 100% (14/14), 1.39 KiB, done.
+Total 14 (delta 0), reused 1 (delta 0)
+To gitolite@localhost:testing
+   c552a38..db4653e  git-annex -> git-annex
+   29cd204..d36c8b4  master -> master
+# git annex copy --to origin
+copy my-cool-big-file (checking origin...) (to origin...) 
+WORM-s2502-m1318875140--my-cool-big-file
+        2502 100%    0.00kB/s    0:00:00 (xfer#1, to-check=0/1)
+
+sent 2606 bytes  received 31 bytes  1758.00 bytes/sec
+total size is 2502  speedup is 0.95
+ok
+
+ + +### Troubleshooting + +I got an error like this when setting up gitolite *after* setting up a local git repo and git annex: + +
+git-annex-shell: First run: git-annex init
+Command ssh ["git@git.example.com","git-annex-shell 'configlist' '/~/myrepo.git'"] failed; exit code 1
+
+ +because I forgot to "git push --all" after adding the new gitolite remote. diff --git a/doc/tips/using_gitolite_with_git-annex/comment_1_9a2a2a8eac9af97e0c984ad105763a73._comment b/doc/tips/using_gitolite_with_git-annex/comment_1_9a2a2a8eac9af97e0c984ad105763a73._comment new file mode 100644 index 0000000000..807180660b --- /dev/null +++ b/doc/tips/using_gitolite_with_git-annex/comment_1_9a2a2a8eac9af97e0c984ad105763a73._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="http://www.openid.albertlash.com/openid/" + ip="71.178.29.218" + subject="comment 1" + date="2011-12-24T06:08:45Z" + content=""" +Looks like you are missing a closing double quote on the line: + + +echo '$GL_ADC_PATH = \"/usr/local/lib/gitolite/adc/;' >>~gitolite/.gitolite.rc + +right after /; + +I got this working by the way - great stuff. +"""]] diff --git a/doc/tips/using_gitolite_with_git-annex/comment_2_d8efea4ab9576555fadbb47666ecefa9._comment b/doc/tips/using_gitolite_with_git-annex/comment_2_d8efea4ab9576555fadbb47666ecefa9._comment new file mode 100644 index 0000000000..007a009ea1 --- /dev/null +++ b/doc/tips/using_gitolite_with_git-annex/comment_2_d8efea4ab9576555fadbb47666ecefa9._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-12-24T16:54:31Z" + content=""" +I've fixed the typo (anyone can edit pages in this wiki FWIW.) +"""]] diff --git a/doc/tips/using_the_SHA1_backend.mdwn b/doc/tips/using_the_SHA1_backend.mdwn new file mode 100644 index 0000000000..70dc2ef759 --- /dev/null +++ b/doc/tips/using_the_SHA1_backend.mdwn @@ -0,0 +1,11 @@ +A handy alternative to the default [[backend|backends]] is the +SHA1 backend. This backend provides more git-style assurance that your data +has not been damaged. And the checksum means that when you add the same +content to the annex twice, only one copy need be stored in the backend. + +The only reason it's not the default is that it needs to checksum +files when they're added to the annex, and this can slow things down +significantly for really big files. To make SHA1 the default, just +add something like this to `.gitattributes`: + + * annex.backend=SHA1 diff --git a/doc/tips/using_the_web_as_a_special_remote.mdwn b/doc/tips/using_the_web_as_a_special_remote.mdwn new file mode 100644 index 0000000000..8009927a49 --- /dev/null +++ b/doc/tips/using_the_web_as_a_special_remote.mdwn @@ -0,0 +1,32 @@ +The web can be used as a [[special_remote|special_remotes]] too. + + # git annex addurl http://example.com/video.mpeg + addurl example.com_video.mpeg (downloading http://example.com/video.mpeg) + ########################################################## 100.0% + ok + +Now the file is downloaded, and has been added to the annex like any other +file. So it can be renamed, copied to other repositories, and so on. + +Note that git-annex assumes that, if the web site does not 404, the file is +still present on the web, and this counts as one [[copy|copies]] of the +file. So it will let you remove your last copy, trusting it can be +downloaded again: + + # git annex drop example.com_video.mpeg + drop example.com_video.mpeg (checking http://example.com/video.mpeg) ok + +If you don't [[trust]] the web to this degree, just let git-annex know: + + # git annex untrust web + untrust web ok + +With the result that it will hang onto files: + + # git annex drop example.com_video.mpeg + drop example.com_video.mpeg (unsafe) + Could only verify the existence of 0 out of 1 necessary copies + Also these untrusted repositories may contain the file: + 00000000-0000-0000-0000-000000000001 -- web + (Use --force to override this check, or adjust annex.numcopies.) + failed diff --git a/doc/tips/what_to_do_when_a_repository_is_corrupted.mdwn b/doc/tips/what_to_do_when_a_repository_is_corrupted.mdwn new file mode 100644 index 0000000000..80cb046d90 --- /dev/null +++ b/doc/tips/what_to_do_when_a_repository_is_corrupted.mdwn @@ -0,0 +1,22 @@ +A git-annex repository on a removable USB drive is great, until the cable +falls out at the wrong time and git's repository gets trashed. The way +git checksums everything and the poor quality of USB media makes this +perhaps more likely than you would expect. If this happens to you, +here's a way to recover that makes the most of whatever data is left +on the drive. + +* First, run `git fsck`. If it does not report any problems, your data + is fine, and you don't need to proceed further. +* So `git fsck` says the git repository is corrupted. But probably the data + git-annex stored is fine. Your first step is to clone another copy + of the git repository from somewhere else. Let's call this clone + "$good", and the corrupted repository "$bad". +* Preserve your git configuration changes, and the `annex.uuid` setting: + `mv $bad/.git/config $good/.git/config` +* Move annexed data into the new repository: `mkdir $good/.git/annex; mv + $bad/.git/annex/objects $good/.git/annex/objects` +* Reinitalize git-annex: `cd $good; git annex init` +* Check for any problems with the annexed data: `cd $good; git annex fsck` +* Now you can remove the corrupted repository, the new one is ready to use. + +--[[Joey]] diff --git a/doc/tips/what_to_do_when_you_lose_a_repository.mdwn b/doc/tips/what_to_do_when_you_lose_a_repository.mdwn new file mode 100644 index 0000000000..3be13b8abd --- /dev/null +++ b/doc/tips/what_to_do_when_you_lose_a_repository.mdwn @@ -0,0 +1,19 @@ +So you lost a thumb drive containing a git-annex repository. Or a hard +drive died or some other misfortune has befallen your data. + +Unless you configured backups, git-annex can't get your data back. But it +can help you deal with the loss. + +Go somewhere that knows about the lost repository, and mark it as +dead: + + git annex dead usbdrive + +This retains the [[location_tracking]] information for the repository, +but avoids trying to access it, or list it as a location where files +are present. + +If you later found the drive, you could let git-annex know it's found +like so: + + git annex semitrusted usbdrive diff --git a/doc/todo.mdwn b/doc/todo.mdwn new file mode 100644 index 0000000000..79552298b6 --- /dev/null +++ b/doc/todo.mdwn @@ -0,0 +1,4 @@ +This is git-annex's todo list. Link items to [[todo/done]] when done. + +[[!inline pages="./todo/* and !./todo/done and !link(done) +and !*/Discussion" actions=yes postform=yes show=0 archive=yes]] diff --git a/doc/todo/Please_abort_build_if___34__make_test__34___fails.mdwn b/doc/todo/Please_abort_build_if___34__make_test__34___fails.mdwn new file mode 100644 index 0000000000..592b5e0773 --- /dev/null +++ b/doc/todo/Please_abort_build_if___34__make_test__34___fails.mdwn @@ -0,0 +1,7 @@ +A failure during "make test" should be signalled to the caller by means of +a non-zero exit code. Without that signal, it's very hard to run the +regression test suite in an automated fashion. + +> git-annex used to have a Makefile that ignored make test exit status, +> but that was fixed in commit dab5bddc64ab4ad479a1104748c15d194e138847, +> in October 6th. [[done]] --[[Joey]] diff --git a/doc/todo/Please_add_support_for_monad-control_0.3.x.mdwn b/doc/todo/Please_add_support_for_monad-control_0.3.x.mdwn new file mode 100644 index 0000000000..ca68c2c913 --- /dev/null +++ b/doc/todo/Please_add_support_for_monad-control_0.3.x.mdwn @@ -0,0 +1,7 @@ +Git-annex doesn't compile with the latest version of monad-control. Would it be hard to support that new version? + +> I have been waiting for it to land in Debian before trying to +> deal with its changes. +> +> There is now a branch in git called `new-monad-control` that will build +> with the new monad-control. --[[Joey]] diff --git a/doc/todo/S3.mdwn b/doc/todo/S3.mdwn new file mode 100644 index 0000000000..7e417336f0 --- /dev/null +++ b/doc/todo/S3.mdwn @@ -0,0 +1,24 @@ +Support Amazon S3 as a file storage backend. + +There's a haskell library that looks good. Not yet in Debian. + +Multiple ways of using S3 are possible. Currently implemented as +a special type of git remote. + +Before this can be close, I need to fix: + +## encryption + +TODO + +## unused checking + +One problem is `git annex unused`. Currently it only looks at the local +repository, not remotes. But if something is dropped from the local repo, +and you forget to drop it from S3, cruft can build up there. + +This could be fixed by adding a hook to list all keys present in a remote. +Then unused could scan remotes for keys, and if they were not used locally, +offer the possibility to drop them from the remote. + +[[done]] diff --git a/doc/todo/add_--exclude_option_to_git_annex_find.mdwn b/doc/todo/add_--exclude_option_to_git_annex_find.mdwn new file mode 100644 index 0000000000..a797e97f58 --- /dev/null +++ b/doc/todo/add_--exclude_option_to_git_annex_find.mdwn @@ -0,0 +1,4 @@ +Seems pretty self-explanatory. + +> This was already implemented, the --exclude option can be used +> for find as well as most any other subcommand. --[[Joey]] [[done]] diff --git a/doc/todo/add_-all_option.mdwn b/doc/todo/add_-all_option.mdwn new file mode 100644 index 0000000000..e6fa0b339d --- /dev/null +++ b/doc/todo/add_-all_option.mdwn @@ -0,0 +1,17 @@ +`--all` would make git-annex operate on either every key with content +present (or in some cases like `get` and `copy --from` on +every keys with content not present). + +This would be useful when a repository has a history with deleted files +whose content you want to keep (so you're not using `dropunused`). +Or when you have a lot of branches and just want to be able to fsck +every file referenced in any branch. It could also be useful (or even a +good default) in a bare repository. + +A problem with the idea is that `.gitattributes` values for keys not +currently in the tree would not be available (without horrific anounts of +grubbing thru history to find where/when the key used to exist). So +`numcopies` set via `.gitattributes` would not work. This would be a +particular problem for `drop` and for `--auto`. + +--[[Joey]] diff --git a/doc/todo/add_a_git_backend.mdwn b/doc/todo/add_a_git_backend.mdwn new file mode 100644 index 0000000000..2b224710ed --- /dev/null +++ b/doc/todo/add_a_git_backend.mdwn @@ -0,0 +1,18 @@ +There should be a backend where the file content is stored.. in a git +repository! + +This way, you know your annexed content is safe & versioned, but you only +have to deal with the pain of git with large files in one place, and can +use all of git-annex's features everywhere else. + +> Speaking as a future user, do very, very much want. -- RichiH + +>> Might also be interesting to use `bup` in the git backend, to work +>> around git's big file issues there. So git-annex would pull data out +>> of the git backend using bup. --[[Joey]] + +>>> Very much so. Generally speaking, having one or more versioned storage back-ends with current data in the local annexes sounds incredibly useful. Still being able to get at old data in via the back-end and/or making offline backups of the full history are excellent use cases. -- RichiH + +[[done]], the bup special remote type is written! --[[Joey]] + +> Yay! -- RichiH diff --git a/doc/todo/auto_remotes.mdwn b/doc/todo/auto_remotes.mdwn new file mode 100644 index 0000000000..715dea7207 --- /dev/null +++ b/doc/todo/auto_remotes.mdwn @@ -0,0 +1,29 @@ +It should be possible for clones to learn about how to contact +each other without remotes needing to always be explicitly set +up. Say that `.git-annex/remote.log` is maintained by git-annex +to contain: + + UUID hostname URI + +The URI comes from configured remotes and maybe from +`file://$(pwd)`, or even `ssh://$(hostname -f)` +for the current repo. This format will merge without +conflicts or data loss. + +Then when content is belived to be in a UUID, and no +configured remote has it, the remote.log can be consulted and +URIs that look likely tried. (file:// ones if the hostname +is the same (or maybe always -- a removable drive might tend +to be mounted at the same location on different hosts), +otherwise ssh:// ones.) + +Question: When should git-annex update the remote.log? +(If not just on init.) Whenever it reads in a repo's remotes? + +> This sounds useful and the log should be updated every time any remote is being accessed. A counter or timestamp (yes, distributed times may be wrong/different) could be used to auto-prune old entries via a global and per-remote config setting. -- RichiH + +--- + +I no longer think I'd use this myself, I find that my repositories quickly +grow the paths I actually use, somewhat organically. Unofficial paths +across university quads come to mind. [[done]] --[[Joey]] diff --git a/doc/todo/auto_remotes/discussion.mdwn b/doc/todo/auto_remotes/discussion.mdwn new file mode 100644 index 0000000000..b9e1522a8f --- /dev/null +++ b/doc/todo/auto_remotes/discussion.mdwn @@ -0,0 +1,7 @@ +Remotes log should probably be stored in ".git/annex/remote.log" +instead of ".git-annex/remote.log" to prevent leaking credentials. + +> The idea is to distribute the info between repositories, which is +> why it'd go in `.git-annex`. Of course that does mean that repository +> location information would be included, and if that'd not desirable +> this feature would need to be turned off. --[[Joey]] diff --git a/doc/todo/avoid_unnecessary_union_merges.mdwn b/doc/todo/avoid_unnecessary_union_merges.mdwn new file mode 100644 index 0000000000..5cd4b64373 --- /dev/null +++ b/doc/todo/avoid_unnecessary_union_merges.mdwn @@ -0,0 +1,20 @@ +Some commands cause a union merge unnecessarily. For example, `git annex add` +modifies the location log, which first requires reading the current log (if +any), which triggers a merge. + +Would be good to avoid these unnecessary union merges. First because it's +faster and second because it avoids a possible delay when a user might +ctrl-c and leave the repo in an inconsistent state. In the case of an add, +the file will be in the annex, but no location log will exist for it (fsck +fixes that). + +It may be that all that's needed is to modify Annex.Branch.change +to read the current value, without merging. Then commands like `get`, that +query the branch, will still cause merges, and commands like `add` that +only modify it, will not. Note that for a command like `get`, the merge +occurs before it has done anything, so ctrl-c should not be a problem +there. + +This is a delicate change, I need to take care.. --[[Joey]] + +> [[done]] (assuming I didn't miss any cases where this is not safe!) --[[Joey]] diff --git a/doc/todo/backendSHA1.mdwn b/doc/todo/backendSHA1.mdwn new file mode 100644 index 0000000000..8c16b75ad0 --- /dev/null +++ b/doc/todo/backendSHA1.mdwn @@ -0,0 +1,7 @@ +This backend is not finished. + +In particular, while files can be added using it, git-annex will not notice +when their content changes, and will not create a new key for the new sha1 +of the net content. + +[[done]]; use unlock subcommand and commit changes with git diff --git a/doc/todo/branching.mdwn b/doc/todo/branching.mdwn new file mode 100644 index 0000000000..ad7ece6f10 --- /dev/null +++ b/doc/todo/branching.mdwn @@ -0,0 +1,159 @@ +[[done]] !!! + +The use of `.git-annex` to store logs means that if a repo has branches +and the user switched between them, git-annex will see different logs in +the different branches, and so may miss info about what remotes have which +files (though it can re-learn). + +An alternative would be to store the log data directly in the git repo +as `pristine-tar` does. Problem with that approach is that git won't merge +conflicting changes to log files if they are not in the currently checked +out branch. + +It would be possible to use a branch with a tree like this, to avoid +conflicts: + +key/uuid/time/status + +As long as new files are only added, and old timestamped files deleted, +there would be no conflicts. + +A related problem though is the size of the tree objects git needs to +commit. Having the logs in a separate branch doesn't help with that. +As more keys are added, the tree object size will increase, and git will +take longer and longer to commit, and use more space. One way to deal with +this is simply by splitting the logs amoung subdirectories. Git then can +reuse trees for most directories. (Check: Does it still have to build +dup trees in memory?) + +Another approach would be to have git-annex *delete* old logs. Keep logs +for the currently available files, or something like that. If other log +info is needed, look back through history to find the first occurance of a +log. Maybe even look at other branches -- so if the logs were on master, +a new empty branch could be made and git-annex would still know where to +get keys in that branch. + +Would have to be careful about conflicts when deleting and bringing back +files with the same name. And would need to avoid expensive searching thru +all history to try to find an old log file. + +## fleshed out proposal + +Let's use one branch per uuid, named git-annex/$UUID. + +- I came to realize this would be a good idea when thinking about how + to upgrade. Each individual annex will be upgraded independantly, + so each will want to make a branch, and if the branches aren't distinct, + they will merge conflict for sure. +- TODO: What will need to be done to git to make it push/pull these new + branches? +- A given repo only ever writes to its UUID branch. So no conflicts. + - **problem**: git annex move needs to update log info for other repos! + (possibly solvable by having git-annex-shell update the log info + when content is moved using it) +- (BTW, UUIDs probably don't compress well, and this reduces the bloat of having + them repeated lots of times in the tree.) +- Per UUID branches mean that if it wants to find a file's location + amoung configured remotes, it can examine only their branches, if + desired. +- It's important that the per-repo branches propigate beyond immediate + remotes. If there is a central bare repo, that means push --all. Without + one, it means that when repo B pulls from A, and then C pulls from B, + C needs to get A's branch -- which means that B should have a tracking + branch for A's branch. + +In the branch, only one file is needed. Call it locationlog. git-annex +can cache location log changes and write them all to locationlog in +a single git operation on shutdown. + +- TODO: what if it's ctrl-c'd with changes pending? Perhaps it should + collect them to .git/annex/locationlog, and inject that file on shutdown? +- This will be less overhead than the current staging of all the log files. + +The log is not appended to, so in git we have a series of commits each of +which replaces the log's entire contens. + +To find locations of a key, all (or all relevant) branches need to be +examined, looking backward through the history of each until a log +with a indication of the presense/absense of the key is found. + +- This will be less expensive for files that have recently been added + or transfered. +- It could get pretty slow when digging deeper. +- Only 3 places in git-annex will be affected by any slowdown: move --from, + get and drop. (Update: Now also unused, whereis, fsck) + +## alternate + +As above, but use a single git-annex branch, and keep the per-UUID +info in their own log files. Hope that git can auto-merge as long as +each observing repo only writes to its own files. (Well, it can, but for +non-fast-forward merges, the git-annex branch would need to be checked out, +which is problimatic.) + +Use filenames like: + + / + +That allows one repo to record another's state when doing a +`move`. + +## outside the box approach + +If the problem is limited to only that the `.git-annex/` files make +branching difficult (and not to the related problem that commits to them +and having them in the tree are sorta annoying), then a simple approach +would be to have git-annex look in other branches for location log info +too. + +The problem would then be that any locationlog lookup would need to look in +all other branches (any branch could have more current info after all), +which could get expensive. + +## way outside the box approach + +Another approach I have been mulling over is keeping the log file +branch checked out in .git/annex/logs/ -- this would be a checkout of a git +repository inside a git repository, using "git fake bare" techniques. This +would solve the merge problem, since git auto merge could be used. It would +still mean all the log files are on-disk, which annoys some. It would +require some tighter integration with git, so that after a pull, the log +repo is updated with the data pulled. --[[Joey]] + +> Seems I can't use git fake bare exactly. Instead, the best option +> seems to be `git clone --shared` to make a clone that uses +> `.git/annex/logs/.git` to hold its index etc, but (mostly) uses +> objects from the main repo. There would be some bloat, +> as commits to the logs made in there would not be shared with the main +> repo. Using `GIT_OBJECT_DIRECTORY` might be a way to avoid that bloat. + +## notes + +Another approach could be to use git-notes. It supports merging branches +of notes, with union merge strategy (a hook would have to do this after +a pull, it's not done automatically). + +Problem: Notes are usually attached to git +objects, and there are no git objects corresponding to git-annex keys. + +Problem: Notes are not normally copied when cloning. + +------ + +## elminating the merge problem + +Most of the above options are complicated by the problem of how to merge +changes from remotes. It should be possible to deal with the merge +problem generically. Something like this: + +* We have a local branch `B`. +* For remotes, there are also `origin/B`, `otherremote/B`, etc. +* To merge two branches `B` and `foo/B`, construct a merge commit that + makes each file have all lines that were in either version of the file, + with duplicates removed (probably). Do this without checking out a tree. + -- now implemented as git-union-merge +* As a `post-merge` hook, merge `*/B` into `B`. This will ensure `B` + is always up-to-date after a pull from a remote. +* When pushing to a remote, nothing need to be done, except ensure + `B` is either successfully pushed, or the push fails (and a pull needs to + be done to get the remote's changes merged into `B`). diff --git a/doc/todo/cache_key_info.mdwn b/doc/todo/cache_key_info.mdwn new file mode 100644 index 0000000000..d4352ccf7f --- /dev/null +++ b/doc/todo/cache_key_info.mdwn @@ -0,0 +1,37 @@ +Most of git-annex is designed to be fast no matter how many other files are +in the annex. Things like add/get/drop/move/fsck have good locality; +they will only operate on as many files as you need them to. + +(git commit can get a little slow with a great deal of files, +but that's out of scope -- and recent git-annex versions use queuing +to save git add from piling up too much in the index.) + +But currently two git-annex commands are quite slow when annexes become large +in quantity of files. These are unused and status. +(Both have --fast versions that don't do as much). +> (Update: status has become acceptably fast; most of its slowdown was due to using a bad data structure; scanning the tree is not particularly slow and it no longer looks at the git-annex branch.) + +unused is slow because it needs two pieces of information that are not +quick to look up, and require examining the whole repo, very seekily: + +1. The keys present in the annex. Found by looking thru .git/annex/objects +2. The keys referenced by files in git. Found by finding every file + in git, and looking at its symlink. + +Of these, the first is less expensive (typically, an annex does not have every +key in it). It could be optimized fairly simply, by adding a database +of keys present in the annex that is optimised to list them all. The +database would be updated by the few functions that move content in and +out. + +The second is harder to optimise, because the user can delete, revert, +copy, add, etc files in git at will, and git-annex does not have a good way +to watch that and maintain a database of what keys are being referenced. + +It could use a post-commit hook and examine files changed by commits, etc. +But then staged files would be left out. It might be sufficient to +make --fast trust the database... except unused will suggest *deleting* +data if nothing references it. Or maybe it could be required to have a +clean tree with nothing staged before running git-annex unused. + +Anyway, this is a semi-longterm item for me. --[[Joey]] diff --git a/doc/todo/cache_key_info/comment_1_578df1b3b2cbfdc4aa1805378f35dc48._comment b/doc/todo/cache_key_info/comment_1_578df1b3b2cbfdc4aa1805378f35dc48._comment new file mode 100644 index 0000000000..086e7f3e84 --- /dev/null +++ b/doc/todo/cache_key_info/comment_1_578df1b3b2cbfdc4aa1805378f35dc48._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 1" + date="2011-05-17T07:27:02Z" + content=""" +Sounds like a good idea. + +* git annex fsck (or similar) should check/rebuild the caches +* I would simply require a clean tree with a verbose error. 80/20 rule and defaulting to save actions. +"""]] diff --git a/doc/todo/checkout.mdwn b/doc/todo/checkout.mdwn new file mode 100644 index 0000000000..50da2d62e1 --- /dev/null +++ b/doc/todo/checkout.mdwn @@ -0,0 +1,23 @@ +The checkout subcommand replaces the symlink that normally points at a +file's content, with a copy of the file. Once you've checked a file out, +you can edit it, and `git commit` it. On commit, git-annex will detect +if the file has been changed, and if it has, `add` its content to the +annex. + +> Internally, this will need to store the original symlink to the file, in +> `.git/annex/checkedout/$filename`. +> +> * git-annex uncheckout moves that back +> * git-annex pre-commit hook checks each file being committed to see if +> it has a symlink there, and if so, removes the symlink and adds the new +> content to the annex. +> +> And it seems the file content should be copied, not moved or hard linked: +> +> * Makes sure other annexes can find it if transferring it from +> this annex. +> * Ensures it's always available for uncheckout. +> * Avoids the last copy of a file's content being lost when +> the checked out file is modified. + +[[done]] diff --git a/doc/todo/done.mdwn b/doc/todo/done.mdwn new file mode 100644 index 0000000000..e7c98081b7 --- /dev/null +++ b/doc/todo/done.mdwn @@ -0,0 +1,4 @@ +recently fixed [[todo]] items. + +[[!inline pages="./* and link(./done) and !*/Discussion" sort=mtime show=10 +archive=yes]] diff --git a/doc/todo/exclude_files_on_a_given_remote.mdwn b/doc/todo/exclude_files_on_a_given_remote.mdwn new file mode 100644 index 0000000000..e8bb357d31 --- /dev/null +++ b/doc/todo/exclude_files_on_a_given_remote.mdwn @@ -0,0 +1,18 @@ +Say I have some files on remote A. But I'm away from it, and transferring +files from B to C. I'd like to avoid transferring any files I already have +on A. + +Something like: + + git annex copy --to C --exclude-on A + +This would not contact A, just use its cached location log info. + +I suppose I might also sometime want to only act on files that are +thought/known to be on A. + + git annex drop --only-on A + +--[[Joey]] + +[[done]] diff --git a/doc/todo/file_copy_progress_bar.mdwn b/doc/todo/file_copy_progress_bar.mdwn new file mode 100644 index 0000000000..847c1d1eb6 --- /dev/null +++ b/doc/todo/file_copy_progress_bar.mdwn @@ -0,0 +1,5 @@ +Find a way to copy a file with a progress bar, while still preserving +stat. Easiest way might be to use pv and fix up the permissions etc +after? + +[[done]] diff --git a/doc/todo/fsck.mdwn b/doc/todo/fsck.mdwn new file mode 100644 index 0000000000..1dcaad9a51 --- /dev/null +++ b/doc/todo/fsck.mdwn @@ -0,0 +1,11 @@ +add a git annex fsck that finds keys that have no referring file + +(done) + +* Need per-backend fsck support. sha1 can checksum all files in the annex. + WORM can check filesize. + +* Both can check that annex.numcopies is satisfied. Probably only + querying the locationlog, not doing an online verification. + +[[done]] diff --git a/doc/todo/git-annex-shell.mdwn b/doc/todo/git-annex-shell.mdwn new file mode 100644 index 0000000000..a9e3b43ede --- /dev/null +++ b/doc/todo/git-annex-shell.mdwn @@ -0,0 +1,15 @@ +[[done]] + +I've been considering adding a `git-annex-shell` command. This would +be similar to `git-shell` (and in fact would pass unknown commands off to +`git-shell`). + +## Reasons + +* Allows locking down an account to only be able to use git-annex (and + git). +* Avoids needing to construct complex shell commands to run on the remote + system. (Mostly already avoided by the plumbing level commands.) +* Could possibly allow multiple things to be done with one ssh connection + in future. +* Allows expanding `~` and `~user` in repopath on the remote system. diff --git a/doc/todo/git-annex_unused_eats_memory.mdwn b/doc/todo/git-annex_unused_eats_memory.mdwn new file mode 100644 index 0000000000..3e9942e98e --- /dev/null +++ b/doc/todo/git-annex_unused_eats_memory.mdwn @@ -0,0 +1,21 @@ +`git-annex unused` has to compare large sets of data +(all keys with content present in the repository, +with all keys used by files in the repository), and so +uses more memory than git-annex typically needs; around +50 mb when run in a repository with 80 thousand files. + +(Used to be 80 mb, but implementation improved.) + +I would like to reduce this. One idea is to use a bloom filter. +For example, construct a bloom filter of all keys used by files in +the repository. Then for each key with content present, check if it's +in the bloom filter. Since there can be false positives, this might +miss finding some unused keys. The probability/size of filter +could be tunable. + +Another way might be to scan the git log for files that got removed +or changed what key they pointed to. Correlate with keys with content +currently present in the repository (possibly using a bloom filter again), +and that would yield a shortlist of keys that are probably not used. +Then scan thru all files in the repo to make sure that none point to keys +on the shortlist. diff --git a/doc/todo/git_annex_init_:_include_repo_description_and__47__or_UUID_in_commit_message.mdwn b/doc/todo/git_annex_init_:_include_repo_description_and__47__or_UUID_in_commit_message.mdwn new file mode 100644 index 0000000000..be7e2dacc8 --- /dev/null +++ b/doc/todo/git_annex_init_:_include_repo_description_and__47__or_UUID_in_commit_message.mdwn @@ -0,0 +1,13 @@ +Would help alot when having to add large(ish) amounts of remotes. + +Maybe detect this kind of commit message and ask user whether to automatically add them? See [[auto_remotes]]: +> Question: When should git-annex update the remote.log? (If not just on init.) Whenever it reads in a repo's remotes? + +---- + +I'm not sure that the above suggestion is going down a path that really +makes sense. If you want a list of repository UUIDs and descriptions, +it's there in machine-usable form in `.git-annex/uuid.log`, there is no +need to try to pull this info out of git commit messages. --[[Joey]] + +[[done]] diff --git a/doc/todo/gitolite_and_gitosis_support.mdwn b/doc/todo/gitolite_and_gitosis_support.mdwn new file mode 100644 index 0000000000..2fca839863 --- /dev/null +++ b/doc/todo/gitolite_and_gitosis_support.mdwn @@ -0,0 +1,39 @@ +gitosis and gitolite should support git-annex being used to send/receive +files from the repositories they manage. Users with read-only access +could only get files, while users with write access could also put and drop +files. + +Doing this right requires modifying both programs, to add [[git-annex-shell]] +to the list of things they can run, and only allow through appropriate +git-annex-shell subcommands to read-only users. + +I have posted an RFC for modifying gitolite to the +[gitolite mailing list](http://groups.google.com/group/gitolite?lnk=srg). + +> I have not developed a patch yet, but all that git-annex needs is a way +> to ssh to the server and run the git-annex-shell command there. +> git-annex-shell is very similar to git-shell. So, one way to enable +> it is simply to set GL_ADC_PATH to a directory containing git-annex-shell. +> +> But, that's not optimal, since git-annex-shell will send off receive-pack +> commands to git, which would bypass gitolite's permissions checking. +> Also, it makes sense to limit readonly users to only download, not +> upload/delete files from git-annex. Instead, I suggest adding something +> like this to gitolite's config: + + # If set, users with W access can write file contents into the git-annex, + # and users with R access can read file contents from the git-annex. + $GL_GIT_ANNEX = 0; + +> If this makes sense, I'm sure I can put a patch together for your +> review. It would involve modifying gl-auth-command so it knows how +> to run git-annex-shell, and how to parse out the "verb" from a +> git-annex-shell command line, and modifying R_COMMANDS and W_COMMANDS. + +As I don't write python, someone else is needed to work on gitosis. +--[[Joey]] + +> [[done]]; support for gitolite is in its `pu` branch, and some changes +> made to git-annefor gitolite is in its `pu` branch, and some changes +> made to git-annex. Word is gitosis is not being maintained so I won't +> worry about try to support it. --[[Joey]] diff --git a/doc/todo/gitrm.mdwn b/doc/todo/gitrm.mdwn new file mode 100644 index 0000000000..e41c334623 --- /dev/null +++ b/doc/todo/gitrm.mdwn @@ -0,0 +1,5 @@ +how to handle git rm file? (should try to drop keys that have no +referring file, if it seems safe..) + +[[done]] -- I think that git annex unused and dropunused are the best +solution to this. diff --git a/doc/todo/hidden_files.mdwn b/doc/todo/hidden_files.mdwn new file mode 100644 index 0000000000..191e9c3286 --- /dev/null +++ b/doc/todo/hidden_files.mdwn @@ -0,0 +1,30 @@ +Add a `git annex hide $file` that behaves like drop, checking counter info +and updating location log to say the current repo no longer has a file -- +but does not actually remove the content. + +Then `git annex unused` can be used to clean it up later. And in the +meantime, it's still locally accessible. This can be useful if you're +planning to need to free up space later, but want to hold onto the content +for a while. Possibly you'll be disconnected later, so it's easier to push +out that intent now. + +-- + +TODO: + +* Make 100% sure this is safe. Drop, etc should never check content files + are present on other repos if the location log doesn't say the repo + has the content. + +* What will `git annex get` do if it's asked to get a file that has been + hidden? + +> Unless I am missing something: Make sure the data is correct (for SHA1 or other tracking) and restore locally. If that's not the case, delete and restore from remote. -- RichiH + +---- + +Is 'unused' a good name? 'clean' and 'autoclean' would make more sense, imo. 'clean' deletes everything, whereas an optional 'autoclean' could try to be smart based on disk usage and/or SHA1, etc. -- RichiH + +> Nah, `git annex unused/dropunused` already exist. --[[Joey]] + +>> OK, in that case forget what I said. No idea about your internal policy, but feel free to delete this part of the page, then. -- RichiH diff --git a/doc/todo/immutable_annexed_files.mdwn b/doc/todo/immutable_annexed_files.mdwn new file mode 100644 index 0000000000..b26838e95e --- /dev/null +++ b/doc/todo/immutable_annexed_files.mdwn @@ -0,0 +1,8 @@ +> josh: Do you do anything in git-annex to try to make the files immutable? +> For instance, removing write permission, or even chattr? +> joey: I don't, but that's a very good idea +> josh: Oh, I just thought of another slightly crazy but handy idea. +> josh: I'd hate to run into a program which somehow followed the symlink and then did an unlink to replace the file. +> josh: To break that, you could create a new directory under annex's internal directory for each file, and make the directory have no write permission. + +[[done]] and done --[[Joey]] diff --git a/doc/todo/link_file_to_remote_repo_feature.mdwn b/doc/todo/link_file_to_remote_repo_feature.mdwn new file mode 100644 index 0000000000..d6b41e8059 --- /dev/null +++ b/doc/todo/link_file_to_remote_repo_feature.mdwn @@ -0,0 +1,52 @@ +I have two repos, using SHA1 backend and both using git. +The first one is a laptop, the second one is a usb drive. + +When I drop a file on the laptop repo, the file is not available on that repo until I run *git annex get* +But when the usb drive is plugged in the file is actually available. + +How about adding a feature to link some/all files to the remote repo? + +e.g. +We have *railscasts/196-nested-model-form-part-1.mp4* file added to git, and only available on the usb drive: + + $ git annex whereis 196-nested-model-form-part-1.mp4 + whereis 196-nested-model-form-part-1.mp4 (1 copy) + a7b7d7a4-2a8a-11e1-aebc-d3c589296e81 -- origin (Portable usb drive) + +I can see the link with: + + $ cd railscasts + $ ls -ls 196* + 8 lrwxr-xr-x 1 framallo staff 193 Dec 20 05:49 196-nested-model-form-part-1.mp4 -> ../.git/annex/objects/Wz/6P/SHA256-s16898930--43679c67cd968243f58f8f7fb30690b5f3f067574e318d609a01613a2a14351e/SHA256-s16898930--43679c67cd968243f58f8f7fb30690b5f3f067574e318d609a01613a2a14351e + +I save this in a variable just to make the example more clear: + + ID=".git/annex/objects/Wz/6P/SHA256-s16898930--43679c67cd968243f58f8f7fb30690b5f3f067574e318d609a01613a2a14351e/SHA256-s16898930--43679c67cd968243f58f8f7fb30690b5f3f067574e318d609a01613a2a14351e" + +The file doesn't exist on the local repo: + + $ ls ../$ID + ls: ../$ID: No such file or directory + +however I can create a link to access that file on the remote repo. +First I create a needed dir: + + $ mkdir ../.git/annex/objects/Wz/6P/SHA256-s16898930--43679c67cd968243f58f8f7fb30690b5f3f067574e318d609a01613a2a14351e/ + +Then I link to the remote file: + + $ ln -s /mnt/usb_drive/repo_folder/$ID ../$ID + +now I can open the file in the laptop repo. + + +I think it could be easy to implement. Maybe It's a naive approach, but looks apealing. +Checking if it's a real file or a link shouldn't impact on performance. +The limitation is that it would work only with remote repos on local dirs + +Also allows you to have one directory structure like AFS or other distributed FS. If the file is not local I go to the remote server. +Which is great for apps like Picasa, Itunes, and friends that depends on the file location. + +> This is a duplicate of [[union_mounting]]. So closing it: [[done]]. +> +> It's a good idea, but making sure git-annex correctly handles these links in all cases is a subtle problem that has not yet been tackled. --[[Joey]] diff --git a/doc/todo/network_remotes.mdwn b/doc/todo/network_remotes.mdwn new file mode 100644 index 0000000000..42efa832f5 --- /dev/null +++ b/doc/todo/network_remotes.mdwn @@ -0,0 +1,5 @@ +Support for remote git repositories (ssh:// specifically can be made to +work, although the other end probably needs to have git-annex +installed..) + +[[done]], at least get and put work.. diff --git a/doc/todo/object_dir_reorg_v2.mdwn b/doc/todo/object_dir_reorg_v2.mdwn new file mode 100644 index 0000000000..49666ddc79 --- /dev/null +++ b/doc/todo/object_dir_reorg_v2.mdwn @@ -0,0 +1,25 @@ +Several things suggest now would be a good time to reorgaize the object +directory. This would be annex.version=2. It will be slightly painful for +all users, so this should be the *last* reorg in the forseeable future. + +1. Remove colons from filenames, for [[bugs/fat_support]] + +2. Add hashing, since some filesystems do suck (like er, fat at least :) + [[forum/hashing_objects_directories]] + (Also, may as well hash .git-annex/* while at it -- that's what + really gets big.) + +3. Add filesize metadata for [[bugs/free_space_checking]]. (Currently only + present in WORM, and in an ad-hoc way.) + +4. Perhaps use a generic format that will allow further metadata to be + added later. For example, + "bSHA1,s101111,kf3101c30bb23467deaec5d78c6daa71d395d1879" + + (Probably everything after ",k" should be part of the key, even if it + contains the "," separator character. Otherwise an escaping mechanism + would be needed.) + +[[done]] now! + +Although [[bugs/free_space_checking]] is not quite there --[[Joey]] diff --git a/doc/todo/object_dir_reorg_v2/comment_1_ba03333dc76ff49eccaba375e68cb525._comment b/doc/todo/object_dir_reorg_v2/comment_1_ba03333dc76ff49eccaba375e68cb525._comment new file mode 100644 index 0000000000..261c2a51f3 --- /dev/null +++ b/doc/todo/object_dir_reorg_v2/comment_1_ba03333dc76ff49eccaba375e68cb525._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 1" + date="2011-03-16T01:16:48Z" + content=""" +If you support generic meta-data, keep in mind that you will need to do conflict resolution. Timestamps may not be synched across all systems, so keeping a log of old metadata could be used, sorting by history and using the latest. Which leaves the situation of two incompatible changes. This would probably mean manual conflict resolution. You will probably have thought of this already, but I still wanted to make sure this is recorded. -- RichiH +"""]] diff --git a/doc/todo/object_dir_reorg_v2/comment_2_81276ac309959dc741bc90101c213ab7._comment b/doc/todo/object_dir_reorg_v2/comment_2_81276ac309959dc741bc90101c213ab7._comment new file mode 100644 index 0000000000..9785f1989e --- /dev/null +++ b/doc/todo/object_dir_reorg_v2/comment_2_81276ac309959dc741bc90101c213ab7._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 2" + date="2011-03-16T01:19:25Z" + content=""" +Hmm, I added quite a few comments at work, but they are stuck in moderation. Maybe I forgot to log in before adding them. I am surprised this one appeared immediately. -- RichiH +"""]] diff --git a/doc/todo/object_dir_reorg_v2/comment_3_79bdf9c51dec9f52372ce95b53233bb2._comment b/doc/todo/object_dir_reorg_v2/comment_3_79bdf9c51dec9f52372ce95b53233bb2._comment new file mode 100644 index 0000000000..886941be72 --- /dev/null +++ b/doc/todo/object_dir_reorg_v2/comment_3_79bdf9c51dec9f52372ce95b53233bb2._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 1" + date="2011-03-15T14:08:41Z" + content=""" +What is the potential time-frame for this change? As I am not using git-annex for production yet, I can see myself waiting to avoid any potential hassle. + +Supporting generic metadata seems like a great idea. Though if you are going this path, wouldn't it make sense to avoid metastore for mtime etc and support this natively without outside dependencies? + +-- RichiH +"""]] diff --git a/doc/todo/object_dir_reorg_v2/comment_4_93aada9b1680fed56cc6f0f7c3aca5e5._comment b/doc/todo/object_dir_reorg_v2/comment_4_93aada9b1680fed56cc6f0f7c3aca5e5._comment new file mode 100644 index 0000000000..475359abbf --- /dev/null +++ b/doc/todo/object_dir_reorg_v2/comment_4_93aada9b1680fed56cc6f0f7c3aca5e5._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 4" + date="2011-03-16T03:22:45Z" + content=""" +Well, I spent a few hours playing this evening in the 'reorg' branch in git. It seems to be shaping up pretty well; type-based refactoring in haskell makes these kind of big systematic changes a matter of editing until it compiles. And it compiles and test suite passes. But, so far I've only covered 1. 3. and 4. on the list, and have yet to deal with upgrades. + +I'd recommend you not wait before using git-annex. I am committed to provide upgradability between annexes created with all versions of git-annex, going forward. This is important because we can have offline archival drives that sit unused for years. Git-annex will upgrade a repository to current standard the first time it sees it, and I hope the upgrade will be pretty smooth. It was not bad for the annex.version 0 to 1 upgrade earlier. The only annoyance with upgrades is that it will result in some big commits to git, as every symlink in the repo gets changed, and log files get moved to new names. + +(The metadata being stored with keys is data that a particular backend can use, and is static to a given key, so there are no merge issues (and it won't be used to preserve mtimes, etc).) +"""]] diff --git a/doc/todo/object_dir_reorg_v2/comment_5_821c382987f105da72a50e0a5ce61fdc._comment b/doc/todo/object_dir_reorg_v2/comment_5_821c382987f105da72a50e0a5ce61fdc._comment new file mode 100644 index 0000000000..2032bce3c0 --- /dev/null +++ b/doc/todo/object_dir_reorg_v2/comment_5_821c382987f105da72a50e0a5ce61fdc._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 5" + date="2011-03-16T15:51:30Z" + content=""" +Hashing & segmenting seems to be around the corner, which is nice :) + +Is there a chance that you will optionally add mtime to your native metadata store? If yes, I'd rather wait for v2 to start with the native system from the start. If not, I will probably set it up tonight. + +PS: While posting from work, my comments are held for moderation once again. I am somewhat confused as to why this happens when I can just submit directly from home. And yes, I am using the same auth provider and user in both cases. +"""]] diff --git a/doc/todo/object_dir_reorg_v2/comment_6_8834c3a3f1258c4349d23aff8549bf35._comment b/doc/todo/object_dir_reorg_v2/comment_6_8834c3a3f1258c4349d23aff8549bf35._comment new file mode 100644 index 0000000000..ff86e3970b --- /dev/null +++ b/doc/todo/object_dir_reorg_v2/comment_6_8834c3a3f1258c4349d23aff8549bf35._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 6" + date="2011-03-16T16:32:52Z" + content=""" +The mtime cannot be stored for all keys. Consider a SHA1 key. The mtime is irrelevant; 2 files with different mtimes, when added to the SHA1 backend, should get the same key. + +Probably our spam filter doesn't like your work IP. +"""]] diff --git a/doc/todo/object_dir_reorg_v2/comment_7_42501404c82ca07147e2cce0cff59474._comment b/doc/todo/object_dir_reorg_v2/comment_7_42501404c82ca07147e2cce0cff59474._comment new file mode 100644 index 0000000000..fc866c57a6 --- /dev/null +++ b/doc/todo/object_dir_reorg_v2/comment_7_42501404c82ca07147e2cce0cff59474._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 7" + date="2011-03-16T21:05:38Z" + content=""" +Ah, OK. I assumed the metadata would be attached to a key, not part of the key. This seems to make upgrades/extensions down the line harder than they need to be, but you are right that this way, merges are not, and never will be, an issue. + +Though with the SHA1 backend, changing files can be tracked. This means that tracking changes in mtime or other is possible. It also means that there are potential merge issues. But I won't argue the point endlessly. I can accept design decisions :) + +The prefix at work is from a university netblock so yes, it might be on a few hundred proxy lists etc. +"""]] diff --git a/doc/todo/optimise_git-annex_merge.mdwn b/doc/todo/optimise_git-annex_merge.mdwn new file mode 100644 index 0000000000..8b9999eb88 --- /dev/null +++ b/doc/todo/optimise_git-annex_merge.mdwn @@ -0,0 +1,33 @@ +Typically `git-annex merge` is fast, but it could still be sped up. + +`git-annex merge` runs `git-hash-object` once per file that needs to be +merged. Elsewhere in git-annex, `git-hash-object` is used in a faster mode, +reading files from disk via `--stdin-paths`. But here, the data is not +in raw files on disk, and I doubt writing them is the best approach. +Instead, I'd like a way to stream multiple objects into git using stdin. +Sometime, should look at either extending git-hash-object to support that, +or possibly look at using git-fast-import instead. + +--- + +`git-annex merge` also runs `git show` once per file that needs to be +merged. This could be reduced to a single call to `git-cat-file --batch`, +There is already a Git.CatFile library that can do this easily. --[[Joey]] + +> This is now done, part above remains todo. --[[Joey]] + +--- + +Merging used to use memory proportional to the size of the diff. It now +streams data, running in constant space. This probably sped it up a lot, +as there's much less allocation and GC action. --[[Joey]] + +---- + +Another option is to stop doing the automatic merging. Once the git +tweak-fetch hook is widely available, the automatic mergcing won't be +needed when pulling from remotes. The only remaining use would be if +a sibling git-annex branch appeared in some other way, and git annex merge +could be manually run in such an unlikely case. Making this change +would require a hard dependency on an appropriate version of git, and a +version number bump to ensure the tweak-fetch hooks are set up. --[[Joey]] diff --git a/doc/todo/parallel_possibilities.mdwn b/doc/todo/parallel_possibilities.mdwn new file mode 100644 index 0000000000..9c0e69e294 --- /dev/null +++ b/doc/todo/parallel_possibilities.mdwn @@ -0,0 +1,13 @@ +One of my reasons for using haskell was that it provides the possibility of +some parallell processing. Although since git-annex hits the filesystem +heavily and mostly runs other git commands, maybe not a whole lot. + +Anyway, each git-annex command is broken down into a series of independant +actions, which has some potential for parallelism. + +Each action has 3 distinct phases, basically "check", "perform", and +"cleanup". The perform actions are probably parellizable; the cleanup may be +(but not if it has to run git commands to stage state; it can queue +commands though); the check should be easily parallelizable, although they +may access the disk or run minor git query commands, so would probably not +want to run too many of them at once. diff --git a/doc/todo/parallel_possibilities/comment_1_d8e34fc2bc4e5cf761574608f970d496._comment b/doc/todo/parallel_possibilities/comment_1_d8e34fc2bc4e5cf761574608f970d496._comment new file mode 100644 index 0000000000..4aceb3abd3 --- /dev/null +++ b/doc/todo/parallel_possibilities/comment_1_d8e34fc2bc4e5cf761574608f970d496._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkptNW1PzrVjYlJWP_9e499uH0mjnBV6GQ" + nickname="Christian" + subject="comment 1" + date="2011-04-08T12:41:43Z" + content=""" +I also think, that fetching keys via rsync can be done by one rsync process, when the keys are fetched from one host. This would avoid establishing a new TCP connection for every file. +"""]] diff --git a/doc/todo/parallel_possibilities/comment_2_adb76f06a7997abe4559d3169a3181c3._comment b/doc/todo/parallel_possibilities/comment_2_adb76f06a7997abe4559d3169a3181c3._comment new file mode 100644 index 0000000000..6ecce52c42 --- /dev/null +++ b/doc/todo/parallel_possibilities/comment_2_adb76f06a7997abe4559d3169a3181c3._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="http://ertai.myopenid.com/" + nickname="npouillard" + subject="comment 2" + date="2011-05-20T20:14:15Z" + content=""" +I agree with Christian. + +One should first make a better use of connections to remotes before exploring parallel possibilities. One should pipeline the requests and answers. + +Of course this could be implemented using parallel&concurrency features of Haskell to do this. +"""]] diff --git a/doc/todo/pushpull.mdwn b/doc/todo/pushpull.mdwn new file mode 100644 index 0000000000..6828b35b2f --- /dev/null +++ b/doc/todo/pushpull.mdwn @@ -0,0 +1,4 @@ +--push/--pull should take a reponame and files, and push those files + to that repo; dropping them from the current repo + +[[done]] (move --from/--to) diff --git a/doc/todo/rsync.mdwn b/doc/todo/rsync.mdwn new file mode 100644 index 0000000000..3353f19c43 --- /dev/null +++ b/doc/todo/rsync.mdwn @@ -0,0 +1,4 @@ +Transferring a file from a ssh:// remote should use rsync to allow resuming +of a prior transfer. + +[[done]] diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn new file mode 100644 index 0000000000..6103ffa61e --- /dev/null +++ b/doc/todo/smudge.mdwn @@ -0,0 +1,162 @@ +git-annex should use smudge/clean filters. + +---- + +Update: Currently, this does not look likely to work. In particular, +the clean filter needs to consume all stdin from git, which consists of the +entire content of the file. It cannot optimise by directly accessing +the file in the repository, because git may be cleaning a different +version of the file during a merge. + +So every `git status` would need to read the entire content of all +available files, and checksum them, which is too expensive. + +> Update from GitTogether: Peff thinks a new interface could be added to +> git to handle this sort of case in an efficient way.. just needs someone +> to do the work. --[[Joey]] + +---- + +The clean filter is run when files are staged for commit. So a user could copy +any file into the annex, git add it, and git-annex's clean filter causes +the file's key to be staged, while its value is added to the annex. + +The smudge filter is run when files are checked out. Since git annex +repos have partial content, this would not git annex get the file content. +Instead, if the content is not currently available, it would need to do +something like return empty file content. (Sadly, it cannot create a +symlink, as git still wants to write the file afterwards.) + +So the nice current behavior of unavailable files being clearly missing due +to dangling symlinks, would be lost when using smudge/clean filters. +(Contact git developers to get an interface to do this?) + +Instead, we get the nice behavior of not having to remeber to `git annex +add` files, and just being able to use `git add` or `git commit -a`, +and have it use git-annex when .gitattributes says to. Also, annexed +files can be directly modified without having to `git annex unlock`. + +### design + +In .gitattributes, the user would put something like "* filter=git-annex". +This way they could control which files are annexed vs added normally. + +(git-annex could have further controls to allow eg, passing small files +through to regular processing. At least .gitattributes is a special case, +it should never be annexed...) + +For files not configured this way, git-annex could continue to use +its symlink method -- this would preserve backwards compatability, +and even allow mixing the two methods in a repo as desired. + +To find files in the repository that are annexed, git-annex would do +`ls-files` as now, but would check if found files have the appropriate +filter, rather than the current symlink checks. To determine the key +of a file, rather than reading its symlink, git-annex would need to +look up the git blob associated with the file -- this can be done +efficiently using the existing code in `Branch.catFile`. + +The clean filter would inject the file's content into the annex, and hard +link from the annex to the file. Avoiding duplication of data. + +The smudge filter can't do that, so to avoid duplication of data, it +might always create an empty file. To get the content, `git annex get` +could be used (which would hard link it). A `post-checkout` hook might +be used to set up hard links for all currently available content. + +#### clean + +The trick is doing it efficiently. Since git a2b665d, v1.7.4.1, +something like this works to provide a filename to the clean script: + + git config --global filter.huge.clean huge-clean %f + +This could avoid it needing to read all the current file content from stdin +when doing eg, a git status or git commit. Instead it is passed the +filename that git is operating on, in the working directory. +(Update: No, doesn't work; git may be cleaning a different file content +than is currently on disk, and git requires all stdin be consumed too.) + +So, WORM could just look at that file and easily tell if it is one +it already knows (same mtime and size). If so, it can short-circuit and +do nothing, file content is already cached. + +SHA1 has a harder job. Would not want to re-sha1 the file every time, +probably. So it'd need a local cache of file stat info, mapped to known +objects. + +But: Even with %f, git actually passes the full file content to the clean +filter, and if it fails to consume it all, it will crash (may only happen +if the file is larger than some chunk size; tried with 500 mb file and +saw a SIGPIPE.) This means unnecessary works needs to be done, +and it slows down *everything*, from `git status` to `git commit`. +**showstopper** I have sent a patch to the git mailing list to address +this. (Update: apparently +can't be fixed.) + +#### smudge + +The smudge script can also be provided a filename with %f, but it +cannot directly write to the file or git gets unhappy. + +### dealing with partial content availability + +The smudge filter cannot be allowed to fail, that leaves the tree and +index in a weird state. So if a file's content is requested by calling +the smudge filter, the trick is to instead provide dummy content, +indicating it is not available (and perhaps saying to run "git-annex get"). + +Then, in the clean filter, it has to detect that it's cleaning a file +with that dummy content, and make sure to provide the same identifier as +it would if the file content was there. + +I've a demo implementation of this technique in the scripts below. + +---- + +### test files + +huge-smudge: + +
+#!/bin/sh
+read f
+file="$1"
+echo "smudging $f" >&2
+if [ -e ~/$f ]; then
+	cat ~/$f # possibly expensive copy here
+else
+	echo "$f not available"
+fi
+
+ +huge-clean: + +
+#!/bin/sh
+file="$1"
+cat >/tmp/file
+# in real life, this should be done more efficiently, not trying to read
+# the whole file content!
+if grep -q 'not available' /tmp/file; then
+	awk '{print $1}' /tmp/file # provide what we would if the content were avail!
+	exit 0
+fi
+echo "cleaning $file" >&2
+# XXX store file content here
+echo $file
+
+ +.gitattributes: + +
+*.huge filter=huge
+
+ +in .git/config: + +
+[filter "huge"]
+        clean = huge-clean %f
+        smudge = huge-smudge %f
+
diff --git a/doc/todo/smudge/comment_1_4ea616bcdbc9e9a6fae9f2e2795c31c9._comment b/doc/todo/smudge/comment_1_4ea616bcdbc9e9a6fae9f2e2795c31c9._comment
new file mode 100644
index 0000000000..a4eb3cf235
--- /dev/null
+++ b/doc/todo/smudge/comment_1_4ea616bcdbc9e9a6fae9f2e2795c31c9._comment
@@ -0,0 +1,8 @@
+[[!comment format=mdwn
+ username="http://christian.amsuess.com/chrysn"
+ nickname="chrysn"
+ subject="git-add instead of git-annex-add"
+ date="2011-02-26T21:43:21Z"
+ content="""
+would, with these modifications in place, there still be a way to *really* git-add a file? (my main repository contains both normal git and git-annex files.)
+"""]]
diff --git a/doc/todo/smudge/comment_2_e04b32caa0d2b4c577cdaf382a3ff7f6._comment b/doc/todo/smudge/comment_2_e04b32caa0d2b4c577cdaf382a3ff7f6._comment
new file mode 100644
index 0000000000..3a223e1c7b
--- /dev/null
+++ b/doc/todo/smudge/comment_2_e04b32caa0d2b4c577cdaf382a3ff7f6._comment
@@ -0,0 +1,12 @@
+[[!comment format=mdwn
+ username="http://dieter-be.myopenid.com/"
+ nickname="dieter"
+ subject="symlinks"
+ date="2011-04-03T20:30:21Z"
+ content="""
+> (Sadly, it cannot create a symlink, as git still wants to write the file afterwards.
+> So the nice current behavior of unavailable files being clearly missing due to dangling symlinks, would be lost when using smudge/clean filters. (Contact git developers to get an interface to do this?)
+
+Have you checked what the smudge filter sees when the input is a symlink? Because git supports tracking symlinks, so it should also support pushing symlinks through a smudge filter, right?
+Either way: yes, contact the git devs, one can only ask and hope.  And if you can demonstrate the awesomeness of git-annex they might get more 1interested :)
+"""]]
diff --git a/doc/todo/speed_up_fsck.mdwn b/doc/todo/speed_up_fsck.mdwn
new file mode 100644
index 0000000000..5d5e867f80
--- /dev/null
+++ b/doc/todo/speed_up_fsck.mdwn
@@ -0,0 +1,40 @@
+moving to the git-annex branch has slowed down fsck worse than most
+commands. Actually, some commands have sped up, while others like get
+are slightly slower but are swamped by the normal runtime. 
+
+For fsck though, it has to pull each file's location log info out of git.
+And, it's typically run on the entire tree.
+
+Another slow one in `git annex copy --from`.
+
+It would be possible to run a single `git cat-file --batch` and pass it
+sha1s of location logs for file that is going to be fsked (gotten via
+`read-tree`). Then just read its output until the next requested sha1 to
+chunk it, and pass this in to fsck in a closure.
+
+The difficulty, besides writing that is that everything that works with
+location logs now reads them out of git, would need to find a way to
+provide the info on a side channel of some sort.
+
+If this is implemented, the same infrastructure could be used for other
+commands like whereis and add. --[[Joey]]
+
+> Updated plan:
+> 
+> Run `git ls-file --batch`, and cache its stdin and out handles in Branch
+> state.
+> 
+> To see a git-annex branch file, send it something like
+> "git-annex:uuid.log", and read the content fron stdout handle.
+> 
+> To detect the end of content, send "TOKEN\n", and look for 
+> "TOKEN missing" in its output. A good choice for TOKEN is anything
+> that will never exist in the repo; 40 0's would be a fairly good choice,
+> but even better seems to be something completely invalid and impossible
+> to have as a sha1 or filename or ref: "".
+> 
+> Hmm, except that's actually an error message sent to stderr. Unless
+> stderr is connected to stdout, it might be better to look for a known,
+> empty object. Could just add a git-annex:empty file to that end.
+
+[[done]] --[[Joey]] 
diff --git a/doc/todo/support-non-utf8-locales.mdwn b/doc/todo/support-non-utf8-locales.mdwn
new file mode 100644
index 0000000000..da40118d52
--- /dev/null
+++ b/doc/todo/support-non-utf8-locales.mdwn
@@ -0,0 +1,26 @@
+Currenty, git-annex forces output, particularly of filenames, in a utf-8
+locale.
+
+Note that this does not mean it cannot be used with filenames in other
+encodings. git-annex is entirely encoding agnostic when it comes to 
+manipulating filenames. It just *displays* their names always converted to
+utf-8, which  may not look right when you have a non-utf8 locale.
+
+This had to be done to work around some bugs with haskell's handling
+of filename encodings. In particular,
+
+* [[bugs/unhappy_without_UTF8_locale]]: haskell crashes when told to output 
+  a string with characters > 255 in a non-utf8 locale.
+* [[bugs/problems_with_utf8_names]]: On many OSs, haskell expects
+  non-decoded raw char8 in FilePaths. In order to display a filename,
+  though, it needs to first be decoded, and git-annex currently assumes
+  it was encoded as utf8.
+
+git-annex's behavior is unlikely to improve much until haskell's
+support for utf8 filenames improves. --[[Joey]]
+
+> [[done]] -- I just turned off all encoding handling on stdout and stderr,
+> which avoids these problems nicely. Git-annex now displays just what it
+> input, at least on platforms where haskell does not decode unicode in
+> FilePaths. This will later be a problem when it gets localized, but for
+> now works great. --[[Joey]]
diff --git a/doc/todo/support_S3_multipart_uploads.mdwn b/doc/todo/support_S3_multipart_uploads.mdwn
new file mode 100644
index 0000000000..711ac41b2a
--- /dev/null
+++ b/doc/todo/support_S3_multipart_uploads.mdwn
@@ -0,0 +1,14 @@
+Did not know of this when I wrote S3 support. Ability to resume large
+uploads would be good.
+
+
+
+Also allows supporting files > 5 gb, a S3 limit I was not aware of.
+
+NB: It would work just as well to split the object and upload the N parts
+to S3, but not bother with S3's paperwork to rejoin them into one object. 
+Only reasons not to do that are a) backwards compatability with 
+the existing S3 remote and b) this would not allow accessing the content
+in S3 w/o using git-annex, which could be useful in some scenarios.
+
+--[[Joey]]
diff --git a/doc/todo/support_fsck_in_bare_repos.mdwn b/doc/todo/support_fsck_in_bare_repos.mdwn
new file mode 100644
index 0000000000..32ced467e0
--- /dev/null
+++ b/doc/todo/support_fsck_in_bare_repos.mdwn
@@ -0,0 +1,17 @@
+What is says on the tin:
+
+    22:56:54 < RichiH> joeyh_: by the way, i have been thinking about fsck on bare repos
+    22:57:37 < RichiH> joeyh_: the best i could come with is to have a bare and a non-bare access the same repo store
+    22:58:00 < RichiH> joeyh_: alternatively, with the SHA* backend, you have all the information to verify that the local data is correct
+    22:58:41 < RichiH> and verifying that would already be a plus. if there  really _is_ a problem, having the SHA is enough to track issues down
+    23:09:50 < joeyh_> oh, I think I have code that fsck could use on bare repos already.. just a matter of wiring it up
+    23:10:42 < joeyh_> feel free to reopen a bug or whatever so I remember.. the unused command's branch content enumeration could be used in a bare repo
+    23:14:51 < joeyh_> unused/dropunused could work in bare repos too btw
+
+> Also `status`'s total annex keys/size could be handled for bare repos. --[[Joey]] 
+
+>> Fsck is done. Rest not done yet. --[[Joey]]
+
+>>> all [[done]]! --[[Joey]] 
+
+[[!meta title="support unused, dropunused in bare repos"]]
diff --git a/doc/todo/symlink_farming_commit_hook.mdwn b/doc/todo/symlink_farming_commit_hook.mdwn
new file mode 100644
index 0000000000..3e93cb34b8
--- /dev/null
+++ b/doc/todo/symlink_farming_commit_hook.mdwn
@@ -0,0 +1,14 @@
+TODO: implement below
+
+git-annex does use a lot of symlinks. Specicially, relative symlinks,
+that are checked into git. To allow you to move those around without
+annoyance, git-annex can run as a post-commit hook. This way, you can `git mv`
+a symlink to an annexed file, and as soon as you commit, it will be fixed
+up.
+
+`git annex init` tries to set up a post-commit hook that is itself a symlink
+back to git-annex. If you want to have your own shell script in the post-commit
+hook, just make it call `git annex` with no parameters. git-annex will detect
+when it's run from a git hook and do the necessary fixups.
+
+[[done]]
diff --git a/doc/todo/tahoe_lfs_for_reals.mdwn b/doc/todo/tahoe_lfs_for_reals.mdwn
new file mode 100644
index 0000000000..9019767eb9
--- /dev/null
+++ b/doc/todo/tahoe_lfs_for_reals.mdwn
@@ -0,0 +1,21 @@
+[[forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs]] is a good
+start, but Zooko points out that using Tahoe's directory translation layer
+incurs O(N^2) overhead as the number of objects grows. Also, making
+hash subdirectories in Tahoe is expensive. Instead it would be good to use
+it as a key/value store directly. The catch is that doing so involves
+sending the content to Tahoe, and getting back a key identifier.
+
+This would be fairly easy to do as a [[backend|backends]], which can assign its
+own key names (although typically done before data is stored in it),
+but a tahoe-lafs special remote would be more flexible.
+
+To support a special remote, a mapping is needed from git-annex keys to
+Tahoe keys.
+
+The best place to store this mapping is perhaps as a new field in the
+location log:
+
+	date present repo-uuid newfields
+
+This way, each remote can store its own key-specfic data in the same place
+as other key-specific data, with minimal overhead.
diff --git a/doc/todo/tahoe_lfs_for_reals/comment_1_0a4793ce6a867638f6e510e71dd4bb44._comment b/doc/todo/tahoe_lfs_for_reals/comment_1_0a4793ce6a867638f6e510e71dd4bb44._comment
new file mode 100644
index 0000000000..16ef882a42
--- /dev/null
+++ b/doc/todo/tahoe_lfs_for_reals/comment_1_0a4793ce6a867638f6e510e71dd4bb44._comment
@@ -0,0 +1,10 @@
+[[!comment format=mdwn
+ username="zooko"
+ ip="97.118.97.117"
+ subject="performance"
+ date="2011-05-17T19:20:39Z"
+ content="""
+Hm... O(N^2)? I think it just takes O(N). To read an entry out of a directory you have to download the entire directory (and store it in RAM and parse it). The constants are basically \"too big to be good but not big enough to be prohibitive\", I think. jctang has reported that his special remote hook performs well enough to use, but it would be nice if it were faster.
+
+The Tahoe-LAFS folks are working on speeding up mutable files, by the way, after which we would be able to speed up directories.
+"""]]
diff --git a/doc/todo/tahoe_lfs_for_reals/comment_2_80b9e848edfdc7be21baab7d0cef0e3a._comment b/doc/todo/tahoe_lfs_for_reals/comment_2_80b9e848edfdc7be21baab7d0cef0e3a._comment
new file mode 100644
index 0000000000..6dba86c47c
--- /dev/null
+++ b/doc/todo/tahoe_lfs_for_reals/comment_2_80b9e848edfdc7be21baab7d0cef0e3a._comment
@@ -0,0 +1,13 @@
+[[!comment format=mdwn
+ username="http://joey.kitenet.net/"
+ nickname="joey"
+ subject="comment 2"
+ date="2011-05-17T19:57:33Z"
+ content="""
+Whoops! You'd only told me O(N) twice before..
+
+So this is not too high priority. I think I would like to get the per-remote storage sorted out anyway, since probably it will be the thing needed to convert the URL backend into a special remote, which would then allow ripping out the otherwise unused pluggable backend infrastructure.
+
+Update: Per-remote storage is now sorted out, so this could be implemented
+if it actually made sense to do so.
+"""]]
diff --git a/doc/todo/union_mounting.mdwn b/doc/todo/union_mounting.mdwn
new file mode 100644
index 0000000000..c42a055021
--- /dev/null
+++ b/doc/todo/union_mounting.mdwn
@@ -0,0 +1,10 @@
+It should be possible to union mount annexes. So if multiple drives have
+content, an annex mounting them both would have available all the 
+content from all the drives.
+
+This could be done by just making .git/annex/KEY link to the actual content
+on the mounted annex.
+
+(Need to make sure the [[copy_tracking|copies]] code does not
+confused and think the symlink is a copy of the content.. Also need to make
+sure that code that writes to .git/annex does not follow symlinks.))
diff --git a/doc/todo/use_cp_reflink.mdwn b/doc/todo/use_cp_reflink.mdwn
new file mode 100644
index 0000000000..39518abf18
--- /dev/null
+++ b/doc/todo/use_cp_reflink.mdwn
@@ -0,0 +1,7 @@
+The unlock command needs to copy a file, and it would be great to use this:
+	cp --reflink=auto src dst
+
+O(1) overhead on BTRFS. Needs coreutils 7.6; and remember that git-annex
+may be used on systems without coreutils..
+
+[[done]]
diff --git a/doc/todo/using_url_backend.mdwn b/doc/todo/using_url_backend.mdwn
new file mode 100644
index 0000000000..1f3cd56281
--- /dev/null
+++ b/doc/todo/using_url_backend.mdwn
@@ -0,0 +1,11 @@
+There is no way to `git annex add` a file using the URL [[backend|backends]].
+
+For now, we have to manually make the symlink. Something like this:
+
+	ln -s .git/annex/URL:http:%%www.example.com%foo.tar.gz
+
+Note the escaping of slashes.
+
+A `git annex register ` command could do this..
+
+[[done]]
diff --git a/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn b/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn
new file mode 100644
index 0000000000..6d1552fe4e
--- /dev/null
+++ b/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn
@@ -0,0 +1,8 @@
+Simple, when performing various git annex command over ssh, in particular a multi-file get, and using password authentication, git annex will prompt more than once for a user password.  This makes batch updates very inconvenient.
+
+> I'd suggest using ssh-agent, or a passwordless ssh key. Possibly in
+> combination with [[git-annex-shell]] if you want to lock down a
+> particular ssh key to only being able to use git-annex and git-daemon.
+> 
+> Combining multiple operations into a single ssh is on the todo list, but
+> very far down it. --[[Joey]]
diff --git a/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command/comment_1_3f9c0d08932c2ede61c802a91261a1f7._comment b/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command/comment_1_3f9c0d08932c2ede61c802a91261a1f7._comment
new file mode 100644
index 0000000000..2801d8e68f
--- /dev/null
+++ b/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command/comment_1_3f9c0d08932c2ede61c802a91261a1f7._comment
@@ -0,0 +1,14 @@
+[[!comment format=mdwn
+ username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U"
+ nickname="Richard"
+ subject="comment 1"
+ date="2011-05-06T18:30:02Z"
+ content="""
+Unless you are forced to use a password, you should really be using a ssh key.
+
+    ssh-keygen
+    #put local .ssh/id_?sa.pub into remote .ssh/authorized_keys (which needs to be chmod 600)
+    ssh-add
+    git annex whatever
+
+"""]]
diff --git a/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates.mdwn b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates.mdwn
new file mode 100644
index 0000000000..9336535788
--- /dev/null
+++ b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates.mdwn
@@ -0,0 +1,28 @@
+(Hi, this is paulproteus@debian, AKA Asheesh).
+
+I've been enjoying using git-annex to archive my data.
+
+It's great that, by using git-annex and the SHA1 backend, I get a space-saving kind of deduplication through the symbolic links.
+
+I'm looking for the ability to filter files, before they get added to the annex, so that I don't add new files whose content is already in the annex.look  That would help me in terms of personal file organization.
+
+It seems there is not, so this is a wishlist bug filed so that maybe such a thing might exist. What I would really like to do is:
+
+* $ git annex add --no-add-if-already-present .
+* $ git commit -m "Slurping in some photos I found on my old laptop hard drive"
+
+And then I'd do something like:
+
+* $ git clean -f
+
+to remove the files that didn't get annexed in this run. That way, only one filename would ever point to a particular SHA1.
+
+I want this because I have copies of various of mine (photos, in particular) scattered across various hard disks. If this feature existed, I could comfortably toss them all into one git annex that grew, bit by bit, to store all of these files exactly once.
+
+(I would be even happier for "git annex add --unlink-duplicates .")
+
+(Another way to do this would be to "git annex add" them all, and then use a "git annex remove-duplicates" that could prompt me about which files are duplicates of each other, and then I could pipe that command's output into xargs git rm.)
+
+(As I write this, I realize it's possible to parse the destination of the symlink in a way that does this..)
+
+> [[done]]; see [[tips/finding_duplicate_files]] --[[Joey]]
diff --git a/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_10_d78d79fb2f3713aa69f45d2691cf8dfe._comment b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_10_d78d79fb2f3713aa69f45d2691cf8dfe._comment
new file mode 100644
index 0000000000..5dbb66cf66
--- /dev/null
+++ b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_10_d78d79fb2f3713aa69f45d2691cf8dfe._comment
@@ -0,0 +1,68 @@
+[[!comment format=mdwn
+ username="http://adamspiers.myopenid.com/"
+ nickname="Adam"
+ subject="comment 10"
+ date="2011-12-23T17:22:11Z"
+ content="""
+> Your perl script is not O(n). Inserting into perl hash tables has
+> overhead of minimum O(n log n).
+
+What's your source for this assertion?  I would expect an amortized
+average of `O(1)` per insertion, i.e. `O(n)` for full population.
+
+> Not counting the overhead of resizing hash tables,
+> the grevious slowdown if the bucket size is overcome by data (it
+> probably falls back to a linked list or something then), and the
+> overhead of traversing the hash tables to get data out.
+
+None of which necessarily change the algorithmic complexity.  However
+real benchmarks are far more useful here than complexity analysis, and
+[the dangers of premature optimization](http://c2.com/cgi/wiki?PrematureOptimization) 
+should not be forgotten.
+
+> Your memory size calculations ignore the overhead of a hash table or
+> other data structure to store the data in, which will tend to be
+> more than the actual data size it's storing. I estimate your 50
+> million number is off by at least one order of magnitude, and more
+> likely two;
+
+Sure, I was aware of that, but my point still stands.  Even 500k keys
+per 1GB of RAM does not sound expensive to me.
+
+> in any case I don't want git-annex to use 1 gb of ram.
+
+Why not?  What's the maximum it should use?  512MB?  256MB?
+32MB?  I don't see the sense in the author of a program 
+dictating thresholds which are entirely dependent on the context
+in which the program is *run*, not the context in which it's *written*.
+That's why systems have files such as `/etc/security/limits.conf`.
+
+You said you want git-annex to scale to enormous repositories.  If you
+impose an arbitrary memory restriction such as the above, that means
+avoiding implementing *any* kind of functionality which requires `O(n)`
+memory or worse.  Isn't it reasonable to assume that many users use
+git-annex on repositories which are *not* enormous?  Even when they do
+work with enormous repositories, just like with any other program,
+they would naturally expect certain operations to take longer or
+become impractical without sufficient RAM.  That's why I say that this
+restriction amounts to throwing out the baby with the bathwater.
+It just means that those who need the functionality would have to
+reimplement it themselves, assuming they are able, which is likely
+to result in more wheel reinventions.  I've already shared
+[my implementation](https://github.com/aspiers/git-config/blob/master/bin/git-annex-finddups)
+but how many people are likely to find it, let alone get it working?
+
+> Little known fact: sort(1) will use a temp file as a buffer if too
+> much memory is needed to hold the data to sort.
+
+Interesting.  Presumably you are referring to some undocumented
+behaviour, rather than `--batch-size` which only applies when merging
+multiple files, and not when only sorting STDIN.
+
+> It's also written in the most efficient language possible and has
+> been ruthlessly optimised for 30 years, so I would be very surprised
+> if it was not the best choice.
+
+It's the best choice for sorting.  But sorting purely to detect
+duplicates is a dismally bad choice.
+"""]]
diff --git a/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_11_4316d9d74312112dc4c823077af7febe._comment b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_11_4316d9d74312112dc4c823077af7febe._comment
new file mode 100644
index 0000000000..286487eee5
--- /dev/null
+++ b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_11_4316d9d74312112dc4c823077af7febe._comment
@@ -0,0 +1,8 @@
+[[!comment format=mdwn
+ username="http://joey.kitenet.net/"
+ nickname="joey"
+ subject="comment 11"
+ date="2011-12-23T17:52:21Z"
+ content="""
+I don't think that [[tips/finding_duplicate_files]] is hard to find, and the multiple different ways it shows to deal with the duplicate files shows the flexability of the unix pipeline approach.
+"""]]
diff --git a/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_12_ed6d07f16a11c6eee7e3d5005e8e6fa3._comment b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_12_ed6d07f16a11c6eee7e3d5005e8e6fa3._comment
new file mode 100644
index 0000000000..909beed837
--- /dev/null
+++ b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_12_ed6d07f16a11c6eee7e3d5005e8e6fa3._comment
@@ -0,0 +1,8 @@
+[[!comment format=mdwn
+ username="http://joey.kitenet.net/"
+ nickname="joey"
+ subject="comment 12"
+ date="2011-12-23T18:02:24Z"
+ content="""
+BTW, sort -S '90%' benchmarks consistently 2x as fast as perl's hashes all the way up to 1 million files. Of course the pipeline approach allows you to swap in perl or whatever else is best for you at scale.
+"""]]
diff --git a/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_1_fd213310ee548d8726791d2b02237fde._comment b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_1_fd213310ee548d8726791d2b02237fde._comment
new file mode 100644
index 0000000000..094e4526eb
--- /dev/null
+++ b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_1_fd213310ee548d8726791d2b02237fde._comment
@@ -0,0 +1,29 @@
+[[!comment format=mdwn
+ username="http://joey.kitenet.net/"
+ nickname="joey"
+ subject="comment 1"
+ date="2011-01-27T18:29:44Z"
+ content="""
+Hey Asheesh, I'm happy you're finding git-annex useful.
+
+So, there are two forms of duplication going on here. There's duplication of the content, and duplication of the filenames 
+pointing at that content.
+
+Duplication of the filenames is probably not a concern, although it's what I thought you were talking about at first. It's probably info worth recording that backup-2010/some_dir/foo and backup-2009/other_dir/foo are two names you've used for the same content in the past. If you really wanted to remove backup-2009/foo, you could do it by writing a script that looks at the basenames of the symlink targets and removes files that point to the same content as other files.
+
+Using SHA1 ensures that the same key is used for identical files, so generally avoids duplication of content. But if you have 2 disks with an identical file on each, and make them both into annexes, then git-annex will happily retain both
+copies of the content, one per disk. It generally considers keeping copies of content a good thing. :)
+
+So, what if you want to remove the unnecessary copies? Well, there's a really simple way:
+
+
+cd /media/usb-1
+git remote add other-disk /media/usb-0
+git annex add
+git annex drop
+
+ +This asks git-annex to add everything to the annex, but then remove any file contents that it can safely remove. What can it safely remove? Well, anything that it can verify is on another repository such as \"other-disk\"! So, this will happily drop any duplicated file contents, while leaving all the rest alone. + +In practice, you might not want to have all your old backup disks mounted at the same time and configured as remotes. Look into configuring [[trust]] to avoid needing do to that. If usb-0 is already a trusted disk, all you need is a simple \"git annex drop\" on usb-1. +"""]] diff --git a/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_2_4394bde1c6fd44acae649baffe802775._comment b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_2_4394bde1c6fd44acae649baffe802775._comment new file mode 100644 index 0000000000..04d58a4598 --- /dev/null +++ b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_2_4394bde1c6fd44acae649baffe802775._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkjvjLHW9Omza7x1VEzIFQ8Z5honhRB90I" + nickname="Asheesh" + subject="I actually *do* want to avoid duplication of filenames" + date="2011-01-28T07:30:05Z" + content=""" +I really do want just one filename per file, at least for some cases. + +For my photos, there's no benefit to having a few filenames point to the same file. As I'm putting them all into the git-annex, that is a good time to remove the pure duplicates so that I don't e.g. see them twice when browsing the directory as a gallery. Also, I am uploading my photos to the web, and I want to avoid uploading the same photo (by content) twice. + +I hope that makes things clearer! + +For now I'm just doing this: + +* paulproteus@renaissance:/mnt/backups-terabyte/paulproteus/sd-card-from-2011-01-06/sd-cards/DCIM/100CANON $ for file in *; do hash=$(sha1sum \"$file\"); if ls /home/paulproteus/Photos/in-flickr/.git-annex | grep -q \"$hash\"; then echo already annexed ; else flickr_upload \"$file\" && mv \"$file\" \"/home/paulproteus/Photos/in-flickr/2011-01-28/from-some-nested-sd-card-bk\" && (cd /home/paulproteus/Photos/in-flickr/2011-01-28/from-some-nested-sd-card-bk && git annex add . && git commit -m ...) ; fi; done + +(Yeah, Flickr for my photos for now. I feel sad about betraying the principle of autonomo.us-ness.) +"""]] diff --git a/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_3_076cb22057583957d5179d8ba9004605._comment b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_3_076cb22057583957d5179d8ba9004605._comment new file mode 100644 index 0000000000..d11119bc3d --- /dev/null +++ b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_3_076cb22057583957d5179d8ba9004605._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkjvjLHW9Omza7x1VEzIFQ8Z5honhRB90I" + nickname="Asheesh" + subject="Duplication of the filenames is what I am concerned about" + date="2011-04-29T11:48:22Z" + content=""" +For what it's worth, yes, I want to actually forget I ever had the same file in the filesystem with a duplicated name. I'm not just aiming to clean up the disk's space usage; I'm also aiming to clean things up so that navigating the filesystem is easier. + +I can write my own script to do that based on the symlinks' target (and I wrote something along those lines), but I still think it'd be nicer if git-annex supported this use case. + +Perhaps: + +
git annex drop --by-contents
+ +could let me remove a file from git-annex if the contents are available through a different name. (Right now, \"git annex drop\" requires the name *and* contents match.) + +-- Asheesh. +"""]] diff --git a/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_4_f120d1e83c1a447f2ecce302fc69cf74._comment b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_4_f120d1e83c1a447f2ecce302fc69cf74._comment new file mode 100644 index 0000000000..a218ee3d51 --- /dev/null +++ b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_4_f120d1e83c1a447f2ecce302fc69cf74._comment @@ -0,0 +1,35 @@ +[[!comment format=mdwn + username="http://adamspiers.myopenid.com/" + nickname="Adam" + subject="List the duplicate filenames, then let the user decide what to do" + date="2011-12-22T12:31:29Z" + content=""" +I have the same use case as Asheesh but I want to be able to see which filenames point to the same objects and then decide which of the duplicates to drop myself. I think + + git annex drop --by-contents + +would be the wrong approach because how does git-annex know which ones to drop? There's too much potential for error. + +Instead it would be great to have something like + + git annex finddups + +While it's easy enough to knock up a bit of shell or Perl to achieve this, that relies on knowledge of the annex symlink structure, so I think really it belongs inside git-annex. + +If this command gave output similar to the excellent `fastdup` utility: + + Scanning for files... 672 files in 10.439 seconds + Comparing 2 sets of files... + + 2 files (70.71 MB/ea) + /home/adam/media/flat/tour/flat-tour.3gp + /home/adam/videos/tour.3gp + + Found 1 duplicate of 1 file (70.71 MB wasted) + Scanned 672 files (1.96 GB) in 11.415 seconds + +then you could do stuff like + + git annex finddups | grep /home/adam/media/flat | xargs rm + +"""]] diff --git a/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_5_5c30294b3c59fdebb1eef0ae5da4cd4f._comment b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_5_5c30294b3c59fdebb1eef0ae5da4cd4f._comment new file mode 100644 index 0000000000..e48a4a9b38 --- /dev/null +++ b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_5_5c30294b3c59fdebb1eef0ae5da4cd4f._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://adamspiers.myopenid.com/" + nickname="Adam" + subject="Here's a Perl version" + date="2011-12-22T15:43:51Z" + content=""" +https://github.com/aspiers/git-config/blob/master/bin/git-annex-finddups + +but it would be better in git-annex itself ... +"""]] diff --git a/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_6_f24541ada1c86d755acba7e9fa7cff24._comment b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_6_f24541ada1c86d755acba7e9fa7cff24._comment new file mode 100644 index 0000000000..5d8ac8e61b --- /dev/null +++ b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_6_f24541ada1c86d755acba7e9fa7cff24._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 6" + date="2011-12-22T16:39:24Z" + content=""" +My main concern with putting this in git-annex is that finding duplicates necessarily involves storing a list of every key and file in the repository, and git-annex is very carefully built to avoid things that require non-constant memory use, so that it can scale to very big repositories. (The only exception is the `unused` command, and reducing its memory usage is a continuing goal.) + +So I would rather come at this from a different angle.. like providing a way to output a list of files and their associated keys, which the user can then use in their own shell pipelines to find duplicate keys: + + git annex find --include '*' --format='${file} ${key}\n' | sort --key 2 | uniq --all-repeated --skip-fields=1 + +Which is implemented now! + +(Making that pipeline properly handle filenames with spaces is left as an exercise for the reader..) +"""]] diff --git a/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_7_c39f1bb7c61a89b238c61bee1c049767._comment b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_7_c39f1bb7c61a89b238c61bee1c049767._comment new file mode 100644 index 0000000000..a337002804 --- /dev/null +++ b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_7_c39f1bb7c61a89b238c61bee1c049767._comment @@ -0,0 +1,54 @@ +[[!comment format=mdwn + username="http://adamspiers.myopenid.com/" + nickname="Adam" + subject="comment 7" + date="2011-12-22T20:04:14Z" + content=""" +> My main concern with putting this in git-annex is that finding +> duplicates necessarily involves storing a list of every key and file +> in the repository + +Only if you want to search the *whole* repository for duplicates, and if +you do, then you're necessarily going to have to chew up memory in +some process anyway, so what difference whether it's git-annex or +(say) a Perl wrapper? + +> and git-annex is very carefully built to avoid things that require +> non-constant memory use, so that it can scale to very big +> repositories. + +That's a worthy goal, but if everything could be implemented with an +O(1) memory footprint then we'd be in much more pleasant world :-) +Even O(n) isn't that bad ... + +That aside, I like your `--format=\"%f %k\n\"` idea a lot. That opens +up the \"black box\" of `.git/annex/objects` and makes nice things +possible, as your pipeline already demonstrates. However, I'm not +sure why you think `git annex find | sort | uniq` would be more +efficient. Not only does the sort require the very thing you were +trying to avoid (i.e. the whole list in memory), but it's also +O(n log n) which is significantly slower than my O(n) Perl script +linked above. + +More considerations about this pipeline: + +* Doesn't it only include locally available files? Ideally it should + spot duplicates even when the backing blob is not available locally. +* What's the point of `--include '*'` ? Doesn't `git annex find` + with no arguments already include all files, modulo the requirement + above that they're locally available? +* Any user using this `git annex find | ...` approach is likely to + run up against its limitations sooner rather than later, because + they're already used to the plethora of options `find(1)` provides. + Rather than reinventing the wheel, is there some way `git annex find` + could harness the power of `find(1)` ? + +Those considerations aside, a combined approach would be to implement + + git annex find --format=... + +and then alter my Perl wrapper to `popen(2)` from that rather than using +`File::Find`. But I doubt you would want to ship Perl wrappers in the +distribution, so if you don't provide a Haskell equivalent then users +who can't code are left high and dry. +"""]] diff --git a/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_8_221ed2e53420278072a6d879c6f251d1._comment b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_8_221ed2e53420278072a6d879c6f251d1._comment new file mode 100644 index 0000000000..5ac292afeb --- /dev/null +++ b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_8_221ed2e53420278072a6d879c6f251d1._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://adamspiers.myopenid.com/" + nickname="Adam" + subject="How much memory would it actually use anyway?" + date="2011-12-22T20:15:22Z" + content=""" +Another thought - an SHA1 digest is 20 bytes. That means you can fit over 50 million keys into 1GB of RAM. Granted you also need memory to store the values (pathnames) which in many cases will be longer, and some users may also choose more expensive backends than SHA1 ... but even so, it seems to me that you are at risk of throwing the baby out with the bath water. +"""]] diff --git a/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_9_aecfa896c97b9448f235bce18a40621d._comment b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_9_aecfa896c97b9448f235bce18a40621d._comment new file mode 100644 index 0000000000..82c6921ebb --- /dev/null +++ b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates/comment_9_aecfa896c97b9448f235bce18a40621d._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 9" + date="2011-12-23T16:07:39Z" + content=""" +Adam, to answer a lot of points breifly.. + +* --include='*' makes find list files whether their contents are present or not +* Your perl script is not O(n). Inserting into perl hash tables has overhead of minimum O(n log n). Not counting the overhead of resizing hash tables, the grevious slowdown if the bucket size is overcome by data (it probably falls back to a linked list or something then), and the overhead of traversing the hash tables to get data out. +* I think that git-annex's set of file matching options is coming along nicely, and new ones can easily be added, so see no need to pull in unix find(1). +* Your memory size calculations ignore the overhead of a hash table or other data structure to store the data in, which will tend to be more than the actual data size it's storing. I estimate your 50 million number is off by at least one order of magnitude, and more likely two; in any case I don't want git-annex to use 1 gb of ram. +* Little known fact: sort(1) will use a temp file as a buffer if too much memory is needed to hold the data to sort. It's also written in the most efficient language possible and has been ruthlessly optimised for 30 years, so I would be very surprised if it was not the best choice. +"""]] diff --git a/doc/todo/wishlist:___34__git_annex_add__34___multiple_processes.mdwn b/doc/todo/wishlist:___34__git_annex_add__34___multiple_processes.mdwn new file mode 100644 index 0000000000..a04af05b42 --- /dev/null +++ b/doc/todo/wishlist:___34__git_annex_add__34___multiple_processes.mdwn @@ -0,0 +1,10 @@ +Hello, + +i'm in the process of managing my music collection with git-annex. The initial "git annex add" using the sha1 banckend is quite long an i was wondering that it could be nice to launch multiple "sha1sum" processes in parallel to speed up things. + +Anyway, thanks for this wonderful piece of software. + +Jean-Baptiste + +> closing as dup of [[parallel possibilities]] (also see comments below) +> [[done]] --[[Joey]] diff --git a/doc/todo/wishlist:___34__git_annex_add__34___multiple_processes/comment_1_85b14478411a33e6186a64bd41f0910d._comment b/doc/todo/wishlist:___34__git_annex_add__34___multiple_processes/comment_1_85b14478411a33e6186a64bd41f0910d._comment new file mode 100644 index 0000000000..2364b7fb83 --- /dev/null +++ b/doc/todo/wishlist:___34__git_annex_add__34___multiple_processes/comment_1_85b14478411a33e6186a64bd41f0910d._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2011-02-25T19:12:42Z" + content=""" +I'd expect the checksumming to be disk bound, not CPU bound, on most systems. + +I suggest you start off on the WORM backend, and then you can run a job later to [[migrate|walkthrough#index14h2]] to the SHA1 backend. +"""]] diff --git a/doc/todo/wishlist:___34__git_annex_add__34___multiple_processes/comment_2_82e857f463cfdf73c70f6c0a9f9a31d6._comment b/doc/todo/wishlist:___34__git_annex_add__34___multiple_processes/comment_2_82e857f463cfdf73c70f6c0a9f9a31d6._comment new file mode 100644 index 0000000000..9b8240658b --- /dev/null +++ b/doc/todo/wishlist:___34__git_annex_add__34___multiple_processes/comment_2_82e857f463cfdf73c70f6c0a9f9a31d6._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-02-25T19:54:28Z" + content=""" +But, see [[todo/parallel_possibilities]] +"""]] diff --git a/doc/todo/wishlist:___34__git_annex_add__34___multiple_processes/comment_3_8af85eba7472d9025c6fae4f03e3ad75._comment b/doc/todo/wishlist:___34__git_annex_add__34___multiple_processes/comment_3_8af85eba7472d9025c6fae4f03e3ad75._comment new file mode 100644 index 0000000000..ee769f0ddd --- /dev/null +++ b/doc/todo/wishlist:___34__git_annex_add__34___multiple_processes/comment_3_8af85eba7472d9025c6fae4f03e3ad75._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="jbd" + ip="89.158.228.148" + subject="comment 3" + date="2011-02-26T10:26:12Z" + content=""" +Thank your for your answer and the link ! +"""]] diff --git a/doc/todo/wishlist:_support_for_more_ssh_urls_.mdwn b/doc/todo/wishlist:_support_for_more_ssh_urls_.mdwn new file mode 100644 index 0000000000..55b8120a75 --- /dev/null +++ b/doc/todo/wishlist:_support_for_more_ssh_urls_.mdwn @@ -0,0 +1,22 @@ +git-annex does not seem to support all kinds of urls that git does. + +Specifically, if I have ~/bar set up on host foo: + + [remote "foo"] + ## this one is not recognized as ssh url at all + # url = foo:bar + ## this one makes git-annex try to access '/~/bar' literally + # url = ssh://foo/~/bar + ## this one works + url = ssh://foo/home/tv/bar + +> scp-style is now supported. + +> `~` expansions (for the user's home, or other users) +> are somewhat tricky to support as they require running +> code on the remote to lookup homedirs. If git-annex grows a +> `git annex shell` that is run on the remote side +> (something I am [[considering|todo/git-annex-shell]] for other reasons), it +> could handle the expansions there. --[[Joey]] + +> Update: Now `~` expansions are supported. [[done]] diff --git a/doc/todo/wishlist:_swift_backend.mdwn b/doc/todo/wishlist:_swift_backend.mdwn new file mode 100644 index 0000000000..28bd265faf --- /dev/null +++ b/doc/todo/wishlist:_swift_backend.mdwn @@ -0,0 +1,5 @@ +[swift](http://swift.openstack.org/) is the object storage of Openstack. Think S3, but fully open source. As it's backed by rackspace.com, NASA, Dell and several other major players, adoption rates will explode. + +I can provide a test account soonish if need be, else rackspace.com if offering swift storage. Their API gateway lives at https://auth.api.rackspacecloud.com/v1.0 + +Richard diff --git a/doc/todo/wishlist:_swift_backend/comment_1_e6efbb35f61ee521b473a92674036788._comment b/doc/todo/wishlist:_swift_backend/comment_1_e6efbb35f61ee521b473a92674036788._comment new file mode 100644 index 0000000000..98a998c1cf --- /dev/null +++ b/doc/todo/wishlist:_swift_backend/comment_1_e6efbb35f61ee521b473a92674036788._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 1" + date="2011-05-14T10:04:36Z" + content=""" +I don't suppose this SWIFT api is compatible with the eucalytpus walrus api ? +"""]] diff --git a/doc/todo/wishlist:_swift_backend/comment_2_5d8c83b0485112e98367b7abaab3f4e3._comment b/doc/todo/wishlist:_swift_backend/comment_2_5d8c83b0485112e98367b7abaab3f4e3._comment new file mode 100644 index 0000000000..97863b095f --- /dev/null +++ b/doc/todo/wishlist:_swift_backend/comment_2_5d8c83b0485112e98367b7abaab3f4e3._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 2" + date="2011-05-14T15:00:51Z" + content=""" +It does offer a S3 compability layer, but that is de facto non-functioning as of right now. +"""]] diff --git a/doc/transferring_data.mdwn b/doc/transferring_data.mdwn new file mode 100644 index 0000000000..57873f6f0e --- /dev/null +++ b/doc/transferring_data.mdwn @@ -0,0 +1,19 @@ +git-annex can transfer data to or from any of a repository's git remotes. +Depending on where the remote is, the data transfer is done using rsync +(over ssh or locally), or plain cp (with copy-on-write +optimisations on supported filesystems), or using curl (for repositories +on the web). Some [[special_remotes]] are also supported that are not +traditional git remotes. + +If a data transfer is interrupted, git-annex retains the partial transfer +to allow it to be automatically resumed later. + +It's equally easy to transfer a single file to or from a repository, +or to launch a retrievel of a massive pile of files from whatever +repositories they are scattered amoung. + +git-annex automatically uses whatever remotes are currently accessible, +preferring ones that are less expensive to talk to. + +[[!img repomap.png caption="A real-world repository interconnection map +(generated by git-annex map)"]] diff --git a/doc/trust.mdwn b/doc/trust.mdwn new file mode 100644 index 0000000000..1fd47fd1d3 --- /dev/null +++ b/doc/trust.mdwn @@ -0,0 +1,59 @@ +Git-annex supports several levels of trust of a repository: + +* semitrusted (default) +* untrusted +* trusted +* dead + +## semitrusted + +Normally, git-annex does not fully trust its stored [[location_tracking]] +information. When removing content, it will directly check +that other repositories have enough [[copies]]. + +Generally that explicit checking is a good idea. Consider that the current +[[location_tracking]] information for a remote may not yet have propagated +out. Or, a remote may have suffered a catastrophic loss of data, or itself +been lost. + +There is still some trust involved here. A semitrusted repository is +depended on to retain a copy of the file content; possibly the only +[[copy|copies]]. + +(Being semitrusted is the default. The `git annex semitrust` command +restores a repository to this default, when it has been overridden. +The `--semitrust` option can temporarily restore a repository to this +default.) + +## untrusted + +An untrusted repository is not trusted to retain data at all. Git-annex +will retain sufficient [[copies]] of data elsewhere. + +This is a good choice for eg, portable drives that could get lost. Or, +if a disk is known to be dying, you can set it to untrusted and let +`git annex fsck` warn about data that needs to be copied off it. + +To configure a repository as untrusted, use the `git annex untrust` +command. + +## trusted + +Sometimes, you may have reasons to fully trust the location tracking +information for a repository. For example, it may be an offline +archival drive, from which you rarely or never remove content. Deciding +when it makes sense to trust the tracking info is up to you. + +One way to handle this is just to use `--force` when a command cannot +access a remote you trust. Or to use `--trust` to specify a repisitory to +trust temporarily. + +To configure a repository as fully and permanently trusted, +use the `git annex trust` command. + +## dead + +This is used to indicate that you have no trust that the repository +exists at all. It's appropriate to use when a drive has been lost, +or a directory irretrevably deleted. It will make git-annex avoid +even showing the repository as a place where data might still reside. diff --git a/doc/upgrades.mdwn b/doc/upgrades.mdwn new file mode 100644 index 0000000000..c4aac53482 --- /dev/null +++ b/doc/upgrades.mdwn @@ -0,0 +1,93 @@ +Occasionally improvments are made to how git-annex stores its data, +that require an upgrade process to convert repositories made with an older +version to be used by a newer version. It's annoying, it should happen +rarely, but sometimes, it's worth it. + +There's a committment that git-annex will always support upgrades from all +past versions. After all, you may have offline drives from an earlier +git-annex, and might want to use them with a newer git-annex. + +git-annex will notice if it is run in a repository that +needs an upgrade, and refuse to do anything. To upgrade, +use the "git annex upgrade" command. + +The upgrade process is guaranteed to be conflict-free. Unless you +already have git conflicts in your repository or between repositories. +Upgrading a repository with conflicts is not recommended; resolve the +conflicts first before upgrading git-annex. + +## Upgrade events, so far + +### v2 -> v3 (git-annex version 3.x) + +Involved moving the .git-annex/ directory into a separate git-annex branch. + +After this upgrade, you should make sure you include the git-annex branch +when git pushing and pulling. + +### tips for this upgrade + +This upgrade is easier (and faster!) than the previous upgrades. +You don't need to upgrade every repository at once; it's sufficient +to upgrade each repository only when you next use it. + +Example upgrade process: + + cd localrepo + git pull + git annex upgrade + git commit -m "upgrade v2 to v3" + git gc + +### v1 -> v2 (git-annex version 0.20110316) + +Involved adding hashing to .git/annex/ and changing the names of all keys. +Symlinks changed. + +Also, hashing was added to location log files in .git-annex/. +And .gitattributes needed to have another line added to it. + +Previously, files added to the SHA [[backends]] did not have their file +size tracked, while files added to the WORM backend did. Files added to +the SHA backends after the conversion will have their file size tracked, +and that information will be used by git-annex for disk free space checking. +To ensure that information is available for all your annexed files, see +[[upgrades/SHA_size]]. + +### tips for this upgrade + +This upgrade can tend to take a while, if you have a lot of files. + +Each clone of a repository should be individually upgraded. +Until a repository's remotes have been upgraded, git-annex +will refuse to communicate with them. + +Start by upgrading one repository, and then you can commit +the changes git-annex staged during upgrade, and push them out to other +repositories. And then upgrade those other repositories. Doing it this +way avoids git-annex doing some duplicate work during the upgrade. + +Example upgrade process: + + cd localrepo + git pull + git annex upgrade + git commit -m "upgrade v1 to v2" + git push + + ssh remote + cd remoterepo + git pull + git annex upgrade + ... + +### v0 -> v1 (git-annex version 0.04) + +Involved a reorganisation of the layout of .git/annex/. Symlinks changed. + +Handled more or less transparently, although git-annex was just 2 weeks +old at the time, and had few users other than Joey. + +Before doing this upgrade, set annex.version: + + git config annex.version 0 diff --git a/doc/upgrades/SHA_size.mdwn b/doc/upgrades/SHA_size.mdwn new file mode 100644 index 0000000000..97603ba913 --- /dev/null +++ b/doc/upgrades/SHA_size.mdwn @@ -0,0 +1,20 @@ +Before version 2 of the git-annex repository, files added to the SHA +[[backends]] did not have their file size tracked, while files added to the +WORM backend did. The file size information is used for disk free space +checking. + +Files added to the SHA backends after the conversion will have their file +size tracked automatically. This disk free space checking is an optional +feature and since you're more likely to be using more recently added files, +you're unlikely to see any bad effect if you do nothing. + +That said, if you have old files added to SHA backends that lack file size +tracking info, here's how you can add that info. After [[upgrading|upgrades]] +to repository version 2, in each repository run: + + git annex migrate + git commit -m 'migrated keys for v2' + +The usual caveats about [[tips/migrating_data_to_a_new_backend]] +apply; you will end up with unused keys that you can later clean up with +`git annex unused`. diff --git a/doc/use_case/Alice.mdwn b/doc/use_case/Alice.mdwn new file mode 100644 index 0000000000..cdd3ea546d --- /dev/null +++ b/doc/use_case/Alice.mdwn @@ -0,0 +1,24 @@ +### use case: The Nomad + +Alice is always on the move, often with her trusty netbook and a small +handheld terabyte USB drive, or a smaller USB keydrive. She has a server +out there on the net. She stores data, encrypted in the Cloud. + +All these things can have different files on them, but Alice no longer +has to deal with the tedious process of keeping them manually in sync, +or remembering where she put a file. git-annex manages all these data +sources as if they were git remotes. +[[more about special remotes|special_remotes]] + +When she has 1 bar on her cell, Alice queues up interesting files on her +server for later. At a coffee shop, she has git-annex download them to her +USB drive. High in the sky or in a remote cabin, she catches up on +podcasts, videos, and games, first letting git-annex copy them from +her USB drive to the netbook (this saves battery power). +[[more about transferring data|transferring_data]] + +When she's done, she tells git-annex which to keep and which to remove. +They're all removed from her netbook to save space, and Alice knows +that next time she syncs up to the net, her changes will be synced back +to her server. +[[more about distributed version control|distributed_version_control]] diff --git a/doc/use_case/Bob.mdwn b/doc/use_case/Bob.mdwn new file mode 100644 index 0000000000..42d10ea975 --- /dev/null +++ b/doc/use_case/Bob.mdwn @@ -0,0 +1,25 @@ +### use case: The Archivist + +Bob has many drives to archive his data, most of them kept offline, in a +safe place. + +With git-annex, Bob has a single directory tree that includes all +his files, even if their content is being stored offline. He can +reorganize his files using that tree, committing new versions to git, +without worry about accidentally deleting anything. + +When Bob needs access to some files, git-annex can tell him which drive(s) +they're on, and easily make them available. Indeed, every drive knows what +is on every other drive. +[[more about location tracking|location_tracking]] + +Bob thinks long-term, and so he appreciates that git-annex uses a simple +repository format. He knows his files will be accessible in the future +even if the world has forgotten about git-annex and git. +[[more about future-proofing|future_proofing]] + +Run in a cron job, git-annex adds new files to archival drives at night. It +also helps Bob keep track of intentional, and unintentional copies of +files, and logs information he can use to decide when it's time to duplicate +the content of old drives. +[[more about backup copies|copies]] diff --git a/doc/users.mdwn b/doc/users.mdwn new file mode 100644 index 0000000000..b9bab48ecf --- /dev/null +++ b/doc/users.mdwn @@ -0,0 +1,9 @@ +Users of this wiki, feel free to create a subpage of this one and talk +about yourself on it, within reason. You can link to it to sign your +comments. + +List of users +============= +[[!inline pages="users/* and !users/*/* and !*/Discussion" +feeds=no archive=yes sort=title template=titlepage +rootpage="users" postformtext="Add yourself as an git-annex user:"]] diff --git a/doc/users/chrysn.mdwn b/doc/users/chrysn.mdwn new file mode 100644 index 0000000000..f5c07b88b3 --- /dev/null +++ b/doc/users/chrysn.mdwn @@ -0,0 +1,5 @@ +* **name**: chrysn +* **website**: +* **uses git-annex for**: managing the family's photos (and possibly videos and music in the future) +* **likes git-annex because**: it adds a layer of commit semantics over a regular file system without keeping everything in duplicate locally +* **would like git-annex to**: not be required any more as git itself learns to use cow filesystems to avoid abundant disk usage and gets better with sparser checkouts (git-annex might then still be a simpler tool that watches over what can be safely dropped for a sparser checkout) diff --git a/doc/users/fmarier.mdwn b/doc/users/fmarier.mdwn new file mode 100644 index 0000000000..ecf3426978 --- /dev/null +++ b/doc/users/fmarier.mdwn @@ -0,0 +1,6 @@ +# François Marier + +Free Software and Debian Developer. Lead developer of [Libravatar](http://www.libravatar.org) + +* [Blog](http://feeding.cloud.geek.nz) +* [Identica](http://identi.ca/fmarier) / [Twitter](http://twitter.com/fmarier) diff --git a/doc/users/gebi.mdwn b/doc/users/gebi.mdwn new file mode 100644 index 0000000000..121bedbdd7 --- /dev/null +++ b/doc/users/gebi.mdwn @@ -0,0 +1 @@ +Michael Gebetsroither diff --git a/doc/users/joey.mdwn b/doc/users/joey.mdwn new file mode 100644 index 0000000000..306e1cc768 --- /dev/null +++ b/doc/users/joey.mdwn @@ -0,0 +1,2 @@ +Joey Hess + diff --git a/doc/walkthrough.mdwn b/doc/walkthrough.mdwn new file mode 100644 index 0000000000..68f94a6f27 --- /dev/null +++ b/doc/walkthrough.mdwn @@ -0,0 +1,23 @@ +A walkthrough of the basic features of git-annex. + +[[!toc]] + +[[!inline feeds=no show=0 template=walkthrough pagenames=""" + creating_a_repository + adding_a_remote + adding_files + renaming_files + getting_file_content + transferring_files:_When_things_go_wrong + removing_files + removing_files:_When_things_go_wrong + modifying_annexed_files + using_ssh_remotes + moving_file_content_between_repositories + unused_data + fsck:_verifying_your_data + fsck:_when_things_go_wrong + backups + automatically_managing_content + more +"""]] diff --git a/doc/walkthrough/adding_a_remote.mdwn b/doc/walkthrough/adding_a_remote.mdwn new file mode 100644 index 0000000000..97690dfcdf --- /dev/null +++ b/doc/walkthrough/adding_a_remote.mdwn @@ -0,0 +1,19 @@ +Like any other git repository, git-annex repositories have remotes. +Let's start by adding a USB drive as a remote. + + # sudo mount /media/usb + # cd /media/usb + # git clone ~/annex + # cd annex + # git annex init "portable USB drive" + # git remote add laptop ~/annex + # cd ~/annex + # git remote add usbdrive /media/usb/annex + +This is all standard ad-hoc distributed git repository setup. +The only git-annex specific part is telling it the name +of the new repository created on the USB drive. + +Notice that both repos are set up as remotes of one another. This lets +either get annexed files from the other. You'll want to do that even +if you are using git in a more centralized fashion. diff --git a/doc/walkthrough/adding_a_remote/comment_1_0a59355bd33a796aec97173607e6adc9._comment b/doc/walkthrough/adding_a_remote/comment_1_0a59355bd33a796aec97173607e6adc9._comment new file mode 100644 index 0000000000..4b0b9c0fd2 --- /dev/null +++ b/doc/walkthrough/adding_a_remote/comment_1_0a59355bd33a796aec97173607e6adc9._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 1" + date="2011-03-19T01:18:49Z" + content=""" +After doing the above with two required copy per file, `git annex fsck` complained that I had only one copy per file even though I had created my clone, already. Once I `git pull`ed from the second repo, not getting any changes for obvious reasons, `git annex fsck` was happy. So I am not sure how my addition was incorrect. -- RichiH +"""]] diff --git a/doc/walkthrough/adding_a_remote/comment_2_f8cd79ef1593a8181a7f1086a87713e8._comment b/doc/walkthrough/adding_a_remote/comment_2_f8cd79ef1593a8181a7f1086a87713e8._comment new file mode 100644 index 0000000000..015417a4f7 --- /dev/null +++ b/doc/walkthrough/adding_a_remote/comment_2_f8cd79ef1593a8181a7f1086a87713e8._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-03-19T15:35:38Z" + content=""" +Yes, you have to pull down location tracking information in order for fsck to be satisfied in that situation. But since this is a walkthrough, and neither fsck or numcopies settings are mentioned until later, it's ok for this pull to be described a few steps along in [[getting file content]]. + +"""]] diff --git a/doc/walkthrough/adding_a_remote/comment_3_60691af4400521b5a8c8d75efe3b44cb._comment b/doc/walkthrough/adding_a_remote/comment_3_60691af4400521b5a8c8d75efe3b44cb._comment new file mode 100644 index 0000000000..9280f2dccf --- /dev/null +++ b/doc/walkthrough/adding_a_remote/comment_3_60691af4400521b5a8c8d75efe3b44cb._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="http://dieter-be.myopenid.com/" + nickname="dieter" + subject="comment 3" + date="2011-04-02T20:24:33Z" + content=""" + * why the `git remote add laptop ~/annex` ? this remote already exists under the name origin. + * doesn't the last command need to be `git remote add usbdrive /media/usb/annex`? because the actual repo would be in /media/usb/annex, not /media/usb? +"""]] diff --git a/doc/walkthrough/adding_a_remote/comment_4_6f7cf5c330272c96b3abeb6612075c9d._comment b/doc/walkthrough/adding_a_remote/comment_4_6f7cf5c330272c96b3abeb6612075c9d._comment new file mode 100644 index 0000000000..b4dcb6422a --- /dev/null +++ b/doc/walkthrough/adding_a_remote/comment_4_6f7cf5c330272c96b3abeb6612075c9d._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 4" + date="2011-04-03T02:32:17Z" + content=""" +Good spotting on the last line, fixed. + +The laptop remote is indeed redundant, but it leads to clearer views of what is going on later in the walkthrough (\"git pull laptop master\", \"(copying from laptop...)\"). And if the original clone is made from a central bare repo, this reinforces that you'll want to set up remotes for other repos on the computer. +"""]] diff --git a/doc/walkthrough/adding_files.mdwn b/doc/walkthrough/adding_files.mdwn new file mode 100644 index 0000000000..d1b5a04f77 --- /dev/null +++ b/doc/walkthrough/adding_files.mdwn @@ -0,0 +1,11 @@ + # cd ~/annex + # cp /tmp/big_file . + # cp /tmp/debian.iso . + # git annex add . + add big_file (checksum...) ok + add debian.iso (checksum...) ok + # git commit -a -m added + +When you add a file to the annex and commit it, only a symlink to +the annexed content is committed. The content itself is stored in +git-annex's backend. diff --git a/doc/walkthrough/automatically_managing_content.mdwn b/doc/walkthrough/automatically_managing_content.mdwn new file mode 100644 index 0000000000..ef883efef3 --- /dev/null +++ b/doc/walkthrough/automatically_managing_content.mdwn @@ -0,0 +1,40 @@ +Once you have multiple repositories, and have perhaps configured numcopies, +any given file can have many more copies than is needed, or perhaps fewer +than you would like. How to manage this? + +The whereis subcommand can be used to see how many copies of a file are known, +but then you have to decide what to get or drop. In this example, there +are perhaps not enough copies of the first file, and too many of the second +file. + + # cd /media/usbdrive + # git annex whereis + whereis my_cool_big_file (1 copy) + 0c443de8-e644-11df-acbf-f7cd7ca6210d -- laptop + whereis other_file (3 copies) + 0c443de8-e644-11df-acbf-f7cd7ca6210d -- laptop + 62b39bbe-4149-11e0-af01-bb89245a1e61 -- here (usb drive) + 7570b02e-15e9-11e0-adf0-9f3f94cb2eaa -- backup drive + +What would be handy is some automated versions of get and drop, that only +gets a file if there are not yet enough copies of it, or only drops a file +if there are too many copies. Well, these exist, just use the --auto option. + + # git annex get --auto --numcopies=2 + get my_cool_big_file (from laptop...) ok + # git annex drop --auto --numcopies=2 + drop other_file ok + +With two quick commands, git-annex was able to decide for you how to +work toward having two copies of your files. + + # git annex whereis + whereis my_cool_big_file (2 copies) + 0c443de8-e644-11df-acbf-f7cd7ca6210d -- laptop + 62b39bbe-4149-11e0-af01-bb89245a1e61 -- here (usb drive) + whereis other_file (2 copies) + 0c443de8-e644-11df-acbf-f7cd7ca6210d -- laptop + 7570b02e-15e9-11e0-adf0-9f3f94cb2eaa -- backup drive + +The --auto option can also be used with the copy command, +again this lets git-annex decide whether to actually copy content. diff --git a/doc/walkthrough/backups.mdwn b/doc/walkthrough/backups.mdwn new file mode 100644 index 0000000000..9723022b4c --- /dev/null +++ b/doc/walkthrough/backups.mdwn @@ -0,0 +1,25 @@ +git-annex can be configured to require more than one copy of a file exists, +as a simple backup for your data. This is controlled by the "annex.numcopies" +setting, which defaults to 1 copy. Let's change that to require 2 copies, +and send a copy of every file to a USB drive. + + # echo "* annex.numcopies=2" >> .gitattributes + # git annex copy . --to usbdrive + +Now when we try to `git annex drop` a file, it will verify that it +knows of 2 other repositories that have a copy before removing its +content from the current repository. + +You can also vary the number of copies needed, depending on the file name. +So, if you want 3 copies of all your flac files, but only 1 copy of oggs: + + # echo "*.ogg annex.numcopies=1" >> .gitattributes + # echo "*.flac annex.numcopies=3" >> .gitattributes + +Or, you might want to make a directory for important stuff, and configure +it so anything put in there is backed up more thoroughly: + + # mkdir important_stuff + # echo "* annex.numcopies=3" > important_stuff/.gitattributes + +For more details about the numcopies setting, see [[copies]]. diff --git a/doc/walkthrough/creating_a_repository.mdwn b/doc/walkthrough/creating_a_repository.mdwn new file mode 100644 index 0000000000..51ff1c72b3 --- /dev/null +++ b/doc/walkthrough/creating_a_repository.mdwn @@ -0,0 +1,6 @@ +This is very straightforward. Just tell it a description of the repository. + + # mkdir ~/annex + # cd ~/annex + # git init + # git annex init "my laptop" diff --git a/doc/walkthrough/fsck:_verifying_your_data.mdwn b/doc/walkthrough/fsck:_verifying_your_data.mdwn new file mode 100644 index 0000000000..d036332fb3 --- /dev/null +++ b/doc/walkthrough/fsck:_verifying_your_data.mdwn @@ -0,0 +1,16 @@ +You can use the fsck subcommand to check for problems in your data. What +can be checked depends on the key-value [[backend|backends]] you've used +for the data. For example, when you use the SHA1 backend, fsck will verify +that the checksums of your files are good. Fsck also checks that the +annex.numcopies setting is satisfied for all files. + + # git annex fsck + fsck some_file (checksum...) ok + fsck my_cool_big_file (checksum...) ok + ... + +You can also specify the files to check. This is particularly useful if +you're using sha1 and don't want to spend a long time checksumming everything. + + # git annex fsck my_cool_big_file + fsck my_cool_big_file (checksum...) ok diff --git a/doc/walkthrough/fsck:_when_things_go_wrong.mdwn b/doc/walkthrough/fsck:_when_things_go_wrong.mdwn new file mode 100644 index 0000000000..85d9f20fe0 --- /dev/null +++ b/doc/walkthrough/fsck:_when_things_go_wrong.mdwn @@ -0,0 +1,13 @@ +Fsck never deletes possibly bad data; instead it will be moved to +`.git/annex/bad/` for you to recover. Here is a sample of what fsck +might say about a badly messed up annex: + + # git annex fsck + fsck my_cool_big_file (checksum...) + git-annex: Bad file content; moved to .git/annex/bad/SHA1:7da006579dd64330eb2456001fd01948430572f2 + git-annex: ** No known copies exist of my_cool_big_file + failed + fsck important_file + git-annex: Only 1 of 2 copies exist. Run git annex get somewhere else to back it up. + failed + git-annex: 2 failed diff --git a/doc/walkthrough/getting_file_content.mdwn b/doc/walkthrough/getting_file_content.mdwn new file mode 100644 index 0000000000..cdb4a72e0a --- /dev/null +++ b/doc/walkthrough/getting_file_content.mdwn @@ -0,0 +1,22 @@ +A repository does not always have all annexed file contents available. +When you need the content of a file, you can use "git annex get" to +make it available. + +We can use this to copy everything in the laptop's annex to the +USB drive. + + # cd /media/usb/annex + # git fetch laptop; git merge laptop/master + # git annex get . + get my_cool_big_file (from laptop...) ok + get iso/debian.iso (from laptop...) ok + +Notice that you had to git fetch and merge from laptop first, this lets +git-annex know what has changed in laptop, and so it knows about the files +present there and can get them. + +The alternate approach is to set up a +[[central bare repository|tips/centralized_git_repository_tutorial]], and +always push changes to it after committing them, then in the above, +you can just pull from the central repository to get synced up to +all repositories. diff --git a/doc/walkthrough/modifying_annexed_files.mdwn b/doc/walkthrough/modifying_annexed_files.mdwn new file mode 100644 index 0000000000..1f7a7efb77 --- /dev/null +++ b/doc/walkthrough/modifying_annexed_files.mdwn @@ -0,0 +1,42 @@ +Normally, the content of files in the annex is prevented from being modified. +That's a good thing, because it might be the only copy, you wouldn't +want to lose it in a fumblefingered mistake. + + # echo oops > my_cool_big_file + bash: my_cool_big_file: Permission denied + +In order to modify a file, it should first be unlocked. + + # git annex unlock my_cool_big_file + unlock my_cool_big_file (copying...) ok + +That replaces the symlink that normally points at its content with a copy +of the content. You can then modify the file like any regular file. Because +it is a regular file. + +(If you decide you don't need to modify the file after all, or want to discard +modifications, just use `git annex lock`.) + +When you `git commit`, git-annex's pre-commit hook will automatically +notice that you are committing an unlocked file, and add its new content +to the annex. The file will be replaced with a symlink to the new content, +and this symlink is what gets committed to git in the end. + + # echo "now smaller, but even cooler" > my_cool_big_file + # git commit my_cool_big_file -m "changed an annexed file" + add my_cool_big_file ok + [master 64cda67] changed an annexed file + 1 files changed, 1 insertions(+), 1 deletions(-) + +There is one problem with using `git commit` like this: Git wants to first +stage the entire contents of the file in its index. That can be slow for +big files (sorta why git-annex exists in the first place). So, the +automatic handling on commit is a nice safety feature, since it prevents +the file content being accidentally committed into git. But when working with +big files, it's faster to explicitly add them to the annex yourself +before committing. + + # echo "now smaller, but even cooler yet" > my_cool_big_file + # git annex add my_cool_big_file + add my_cool_big_file ok + # git commit my_cool_big_file -m "changed an annexed file" diff --git a/doc/walkthrough/more.mdwn b/doc/walkthrough/more.mdwn new file mode 100644 index 0000000000..0a4a5b94e8 --- /dev/null +++ b/doc/walkthrough/more.mdwn @@ -0,0 +1,3 @@ +So ends the walkthrough. By now you should be able to use git-annex. + +Want more? See [[tips]] for lots more features and advice. diff --git a/doc/walkthrough/moving_file_content_between_repositories.mdwn b/doc/walkthrough/moving_file_content_between_repositories.mdwn new file mode 100644 index 0000000000..3ffcc11750 --- /dev/null +++ b/doc/walkthrough/moving_file_content_between_repositories.mdwn @@ -0,0 +1,13 @@ +Often you will want to move some file contents from a repository to some +other one. For example, your laptop's disk is getting full; time to move +some files to an external disk before moving another file from a file +server to your laptop. Doing that by hand (by using `git annex get` and +`git annex drop`) is possible, but a bit of a pain. `git annex move` +makes it very easy. + + # git annex move my_cool_big_file --to usbdrive + move my_cool_big_file (to usbdrive...) ok + # git annex move video/hackity_hack_and_kaxxt.mov --from fileserver + move video/hackity_hack_and_kaxxt.mov (from fileserver...) + SHA256-s86050597--6ae2688bc533437766a48aa19f2c06be14d1bab9c70b468af445d4f07b65f41e 100% 82MB 199.1KB/s 07:02 + ok diff --git a/doc/walkthrough/moving_file_content_between_repositories/comment_1_4c30ade91fc7113a95960aa3bd1d5427._comment b/doc/walkthrough/moving_file_content_between_repositories/comment_1_4c30ade91fc7113a95960aa3bd1d5427._comment new file mode 100644 index 0000000000..b3dc8fe7a2 --- /dev/null +++ b/doc/walkthrough/moving_file_content_between_repositories/comment_1_4c30ade91fc7113a95960aa3bd1d5427._comment @@ -0,0 +1,19 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 1" + date="2011-03-22T23:41:51Z" + content=""" +I may be missing something obvious, but when I copy to a remote repository, the object files are created, but no softlinks are created. When I pull everything from the remote, it pulls only files the local repo knows about already. + + A + / \ + B C + +Moving from B to A creates no symlinks in A but the object files are moved to A. Copying back from A to B restores the object files in B and keeps them in A. + +Copying from A to an empty C does not create any object files nor symlinks. Copying from C to A creates no symlinks in A but the object files are copied to A. + +-- RichiH + +"""]] diff --git a/doc/walkthrough/moving_file_content_between_repositories/comment_2_7d90e1e150e7524ba31687108fcc38d6._comment b/doc/walkthrough/moving_file_content_between_repositories/comment_2_7d90e1e150e7524ba31687108fcc38d6._comment new file mode 100644 index 0000000000..a6f8e9cf97 --- /dev/null +++ b/doc/walkthrough/moving_file_content_between_repositories/comment_2_7d90e1e150e7524ba31687108fcc38d6._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-03-23T00:38:10Z" + content=""" +`git annex move` only moves content. All symlink management is handled by git, so you have to keep repositories in sync using git as you would any other repo. When you `git pull B` in A, it will get whatever symlinks were added to B. + +(It can be useful to use a central bare repo and avoid needing to git pull from one repo to another, then you can just always push commits to the central repo, and pull down all changes from other repos.) +"""]] diff --git a/doc/walkthrough/moving_file_content_between_repositories/comment_3_558d80384434207b9cfc033763863de3._comment b/doc/walkthrough/moving_file_content_between_repositories/comment_3_558d80384434207b9cfc033763863de3._comment new file mode 100644 index 0000000000..9a128f1ed6 --- /dev/null +++ b/doc/walkthrough/moving_file_content_between_repositories/comment_3_558d80384434207b9cfc033763863de3._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U" + nickname="Richard" + subject="comment 3" + date="2011-03-23T02:07:49Z" + content=""" +Ah yes, I feel kinda stupid in hindsight. + +As the central server is most likely a common use case, would you object if I added that to the walkthrough? If you have any best practices on how to automate a push with every copy to a bare remote? AFAIK, git does not store information about bare/non-bare remotes, but this could easily be put into .git/config by git annex. + +-- RichiH +"""]] diff --git a/doc/walkthrough/moving_file_content_between_repositories/comment_4_a2f343eceed9e9fba1670f21e0fc6af4._comment b/doc/walkthrough/moving_file_content_between_repositories/comment_4_a2f343eceed9e9fba1670f21e0fc6af4._comment new file mode 100644 index 0000000000..8b4d9a0538 --- /dev/null +++ b/doc/walkthrough/moving_file_content_between_repositories/comment_4_a2f343eceed9e9fba1670f21e0fc6af4._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 4" + date="2011-03-23T15:28:00Z" + content=""" +I would not mind if the walkthrough documented the central git repo case. But I don't want to complicate it unduely (it's long enough), and it's important that the fully distributed case be shown to work, and I assume that people already have basic git knowledge, so documenting the details of set up of a bare git repo is sorta out of scope. (There are also a lot of way to do it, using github, or gitosis, or raw git, etc.) +"""]] diff --git a/doc/walkthrough/removing_files.mdwn b/doc/walkthrough/removing_files.mdwn new file mode 100644 index 0000000000..0df6e82a1f --- /dev/null +++ b/doc/walkthrough/removing_files.mdwn @@ -0,0 +1,5 @@ +You can always drop files safely. Git-annex checks that some other annex +has the file before removing it. + + # git annex drop iso/debian.iso + drop iso/Debian_5.0.iso ok diff --git a/doc/walkthrough/removing_files/comment_1_cb65e7c510b75be1c51f655b058667c6._comment b/doc/walkthrough/removing_files/comment_1_cb65e7c510b75be1c51f655b058667c6._comment new file mode 100644 index 0000000000..1c8719cecd --- /dev/null +++ b/doc/walkthrough/removing_files/comment_1_cb65e7c510b75be1c51f655b058667c6._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="DavidEdmondson" + subject="Is it necessary to commit after the 'drop'?" + date="2011-09-05T15:43:25Z" + content=""" +In fact is it possible? Nothing changed as far as git is concerned. + +"""]] diff --git a/doc/walkthrough/removing_files/comment_2_64709ea4558915edd5c8ca4486965b07._comment b/doc/walkthrough/removing_files/comment_2_64709ea4558915edd5c8ca4486965b07._comment new file mode 100644 index 0000000000..f5fb8dc7f5 --- /dev/null +++ b/doc/walkthrough/removing_files/comment_2_64709ea4558915edd5c8ca4486965b07._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2011-09-05T15:59:27Z" + content=""" +Good catch. It used to be necessary before there was a git-annex branch, but not now. +"""]] diff --git a/doc/walkthrough/removing_files:_When_things_go_wrong.mdwn b/doc/walkthrough/removing_files:_When_things_go_wrong.mdwn new file mode 100644 index 0000000000..2d3c0cde08 --- /dev/null +++ b/doc/walkthrough/removing_files:_When_things_go_wrong.mdwn @@ -0,0 +1,24 @@ +Before dropping a file, git-annex wants to be able to look at other +remotes, and verify that they still have a file. After all, it could +have been dropped from them too. If the remotes are not mounted/available, +you'll see something like this. + + # git annex drop important_file other.iso + drop important_file (unsafe) + Could only verify the existence of 0 out of 1 necessary copies + Unable to access these remotes: usbdrive + Try making some of these repositories available: + 58d84e8a-d9ae-11df-a1aa-ab9aa8c00826 -- portable USB drive + ca20064c-dbb5-11df-b2fe-002170d25c55 -- backup SATA drive + (Use --force to override this check, or adjust annex.numcopies.) + failed + drop other.iso (unsafe) + Could only verify the existence of 0 out of 1 necessary copies + No other repository is known to contain the file. + (Use --force to override this check, or adjust annex.numcopies.) + failed + +Here you might --force it to drop `important_file` if you [[trust]] your backup. +But `other.iso` looks to have never been copied to anywhere else, so if +it's something you want to hold onto, you'd need to transfer it to +some other repository before dropping it. diff --git a/doc/walkthrough/renaming_files.mdwn b/doc/walkthrough/renaming_files.mdwn new file mode 100644 index 0000000000..85964d1ea5 --- /dev/null +++ b/doc/walkthrough/renaming_files.mdwn @@ -0,0 +1,13 @@ + # cd ~/annex + # git mv big_file my_cool_big_file + # mkdir iso + # git mv debian.iso iso/ + # git commit -m moved + +You can use any normal git operations to move files around, or even +make copies or delete them. + +Notice that, since annexed files are represented by symlinks, +the symlink will break when the file is moved into a subdirectory. +But, git-annex will fix this up for you when you commit -- +it has a pre-commit hook that watches for and corrects broken symlinks. diff --git a/doc/walkthrough/transferring_files:_When_things_go_wrong.mdwn b/doc/walkthrough/transferring_files:_When_things_go_wrong.mdwn new file mode 100644 index 0000000000..cfb70aaf9a --- /dev/null +++ b/doc/walkthrough/transferring_files:_When_things_go_wrong.mdwn @@ -0,0 +1,17 @@ +After a while, you'll have several annexes, with different file contents. +You don't have to try to keep all that straight; git-annex does +[[location_tracking]] for you. If you ask it to get a file and the drive +or file server is not accessible, it will let you know what it needs to get +it: + + # git annex get video/hackity_hack_and_kaxxt.mov + get video/_why_hackity_hack_and_kaxxt.mov (not available) + Unable to access these remotes: usbdrive, server + Try making some of these repositories available: + 5863d8c0-d9a9-11df-adb2-af51e6559a49 -- my home file server + 58d84e8a-d9ae-11df-a1aa-ab9aa8c00826 -- portable USB drive + ca20064c-dbb5-11df-b2fe-002170d25c55 -- backup SATA drive + failed + # sudo mount /media/usb + # git annex get video/hackity_hack_and_kaxxt.mov + get video/hackity_hack_and_kaxxt.mov (from usbdrive...) ok diff --git a/doc/walkthrough/unused_data.mdwn b/doc/walkthrough/unused_data.mdwn new file mode 100644 index 0000000000..518550ac02 --- /dev/null +++ b/doc/walkthrough/unused_data.mdwn @@ -0,0 +1,30 @@ +It's possible for data to accumulate in the annex that no files in any +branch point to anymore. One way it can happen is if you `git rm` a file +without first calling `git annex drop`. And, when you modify an annexed +file, the old content of the file remains in the annex. Another way is when +migrating between key-value [[backends]]. + +This might be historical data you want to preserve, so git-annex defaults to +preserving it. So from time to time, you may want to check for such data and +eliminate it to save space. + + # git annex unused + unused . (checking for unused data...) + Some annexed data is no longer used by any files in the repository. + NUMBER KEY + 1 SHA256-s86050597--6ae2688bc533437766a48aa19f2c06be14d1bab9c70b468af445d4f07b65f41e + 2 SHA1-s14--f1358ec1873d57350e3dc62054dc232bc93c2bd1 + (To see where data was previously used, try: git log --stat -S'KEY') + (To remove unwanted data: git-annex dropunused NUMBER) + ok + +After running `git annex unused`, you can follow the instructions to examine +the history of files that used the data, and if you decide you don't need that +data anymore, you can easily remove it: + + # git annex dropunused 1 + dropunused 1 ok + +Hint: To drop a lot of unused data, use a command like this: + + # git annex dropunused `seq 1 1000` diff --git a/doc/walkthrough/using_bup.mdwn b/doc/walkthrough/using_bup.mdwn new file mode 100644 index 0000000000..3a6a8776aa --- /dev/null +++ b/doc/walkthrough/using_bup.mdwn @@ -0,0 +1,37 @@ +Another [[special_remote|special_remotes]] that git-annex can use is +a [[special_remotes/bup]] repository. Bup stores large file contents +in a git repository of its own, with deduplication. Combined with +git-annex, you can have git on both the frontend and the backend. + +Here's how to create a bup remote, and describe it. + +[[!template id=note text=""" +Instead of specifying a remote system, you could choose to make a bup +remote that is only accessible on the current system, by passing +"buprepo=/big/mybup". +"""]] + + # git annex initremote mybup type=bup encryption=none buprepo=example.com:/big/mybup + initremote bup (bup init) + Initialized empty Git repository in /big/mybup/ + ok + # git annex describe mybup "my bup repository at example.com" + describe mybup ok + +Now the remote can be used like any other remote. + + # git annex move my_cool_big_file --to mybup + move my_cool_big_file (to mybup...) + Receiving index from server: 1100/1100, done. + ok + +Note that, unlike other remotes, bup does not really support removing +content from its git repositories. This is a feature. :) + + # git annex move my_cool_big_file --from mybup + move my_cool_big_file (from mybup...) + content cannot be removed from bup remote + failed + git-annex: 1 failed + +See [[special_remotes/bup]] for details. diff --git a/doc/walkthrough/using_ssh_remotes.mdwn b/doc/walkthrough/using_ssh_remotes.mdwn new file mode 100644 index 0000000000..60011a200b --- /dev/null +++ b/doc/walkthrough/using_ssh_remotes.mdwn @@ -0,0 +1,33 @@ +So far in this walkthrough, git-annex has been used with a remote +repository on a USB drive. But it can also be used with a git remote +that is truely remote, a host accessed by ssh. + +Say you have a desktop on the same network as your laptop and want +to clone the laptop's annex to it: + + # git clone ssh://mylaptop/home/me/annex ~/annex + # cd ~/annex + # git annex init "my desktop" + +Now you can get files and they will be transferred (using `rsync` via `ssh`): + + # git annex get my_cool_big_file + get my_cool_big_file (getting UUID for origin...) (from origin...) + SHA256-s86050597--6ae2688bc533437766a48aa19f2c06be14d1bab9c70b468af445d4f07b65f41e 100% 2159 2.1KB/s 00:00 + ok + +When you drop files, git-annex will ssh over to the remote and make +sure the file's content is still there before removing it locally: + + # git annex drop my_cool_big_file + drop my_cool_big_file (checking origin..) ok + +Note that normally git-annex prefers to use non-ssh remotes, like +a USB drive, before ssh remotes. They are assumed to be faster/cheaper to +access, if available. There is a annex-cost setting you can configure in +`.git/config` to adjust which repositories it prefers. See +[[the_man_page|git-annex]] for details. + +Also, note that you need full shell access for this to work -- +git-annex needs to be able to ssh in and run commands. Or at least, +your shell needs to be able to run the [[git-annex-shell]] command. diff --git a/git-annex-shell.hs b/git-annex-shell.hs new file mode 100644 index 0000000000..872dabc58b --- /dev/null +++ b/git-annex-shell.hs @@ -0,0 +1,112 @@ +{- git-annex-shell main program + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +import System.Environment +import System.Console.GetOpt + +import Common.Annex +import qualified Git.Construct +import CmdLine +import Command +import Annex.UUID + +import qualified Command.ConfigList +import qualified Command.InAnnex +import qualified Command.DropKey +import qualified Command.RecvKey +import qualified Command.SendKey + +cmds_readonly :: [Command] +cmds_readonly = concat + [ Command.ConfigList.def + , Command.InAnnex.def + , Command.SendKey.def + ] + +cmds_notreadonly :: [Command] +cmds_notreadonly = concat + [ Command.RecvKey.def + , Command.DropKey.def + ] + +cmds :: [Command] +cmds = map adddirparam $ cmds_readonly ++ cmds_notreadonly + where + adddirparam c = c { cmdparams = "DIRECTORY " ++ cmdparams c } + +options :: [OptDescr (Annex ())] +options = commonOptions ++ + [ Option [] ["uuid"] (ReqArg checkuuid paramUUID) "repository uuid" + ] + where + checkuuid expected = getUUID >>= check + where + check u | u == toUUID expected = return () + check NoUUID = unexpected "uninitialized repository" + check u = unexpected $ "UUID " ++ fromUUID u + unexpected s = error $ + "expected repository UUID " ++ + expected ++ " but found " ++ s + +header :: String +header = "Usage: git-annex-shell [-c] command [parameters ...] [option ..]" + +main :: IO () +main = main' =<< getArgs + +main' :: [String] -> IO () +main' [] = failure +-- skip leading -c options, passed by eg, ssh +main' ("-c":p) = main' p +-- a command can be either a builtin or something to pass to git-shell +main' c@(cmd:dir:params) + | cmd `elem` builtins = builtin cmd dir params + | otherwise = external c +main' c@(cmd:_) + -- Handle the case of being the user's login shell. It will be passed + -- a single string containing all the real parameters. + | "git-annex-shell " `isPrefixOf` cmd = main' $ drop 1 $ shellUnEscape cmd + | cmd `elem` builtins = failure + | otherwise = external c + +builtins :: [String] +builtins = map cmdname cmds + +builtin :: String -> String -> [String] -> IO () +builtin cmd dir params = do + checkNotReadOnly cmd + dispatch (cmd : filterparams params) cmds options header $ + Git.Construct.repoAbsPath dir >>= Git.Construct.fromAbsPath + +external :: [String] -> IO () +external params = do + checkNotLimited + unlessM (boolSystem "git-shell" $ map Param $ "-c":filterparams params) $ + error "git-shell failed" + +-- Drop all args after "--". +-- These tend to be passed by rsync and not useful. +filterparams :: [String] -> [String] +filterparams [] = [] +filterparams ("--":_) = [] +filterparams (a:as) = a:filterparams as + +failure :: IO () +failure = error $ "bad parameters\n\n" ++ usage header cmds options + +checkNotLimited :: IO () +checkNotLimited = checkEnv "GIT_ANNEX_SHELL_LIMITED" + +checkNotReadOnly :: String -> IO () +checkNotReadOnly cmd + | cmd `elem` map cmdname cmds_readonly = return () + | otherwise = checkEnv "GIT_ANNEX_SHELL_READONLY" + +checkEnv :: String -> IO () +checkEnv var = + whenM (not . null <$> catchDefaultIO (getEnv var) "") $ + error $ "Action blocked by " ++ var diff --git a/git-annex.cabal b/git-annex.cabal new file mode 100644 index 0000000000..ae6a129b3b --- /dev/null +++ b/git-annex.cabal @@ -0,0 +1,44 @@ +Name: git-annex +Version: 3.20111212 +Cabal-Version: >= 1.6 +License: GPL +Maintainer: Joey Hess +Author: Joey Hess +Stability: Stable +Copyright: 2010-2011 Joey Hess +License-File: GPL +Extra-Source-Files: use-make-sdist-instead +Homepage: http://git-annex.branchable.com/ +Build-type: Custom +Category: Utility +Synopsis: manage files with git, without checking their contents into git +Description: + git-annex allows managing files with git, without checking the file + contents into git. While that may seem paradoxical, it is useful when + dealing with files larger than git can currently easily handle, whether due + to limitations in memory, checksumming time, or disk space. + . + Even without file content tracking, being able to manage files with git, + move files around and delete files with versioned directory trees, and use + branches and distributed clones, are all very handy reasons to use git. And + annexed files can co-exist in the same git repository with regularly + versioned files, which is convenient for maintaining documents, Makefiles, + etc that are associated with annexed files but that benefit from full + revision control. + +Executable git-annex + Main-Is: git-annex.hs + Build-Depends: MissingH, hslogger, directory, filepath, + unix, containers, utf8-string, network, mtl, bytestring, old-locale, time, + pcre-light, extensible-exceptions, dataenc, SHA, process, hS3, HTTP, + base < 5, monad-control < 0.3, json + +Executable git-annex-shell + Main-Is: git-annex-shell.hs + +Executable git-union-merge + Main-Is: git-union-merge.hs + +source-repository head + type: git + location: git://git-annex.branchable.com/ diff --git a/git-annex.hs b/git-annex.hs new file mode 100644 index 0000000000..a53697cdbb --- /dev/null +++ b/git-annex.hs @@ -0,0 +1,13 @@ +{- git-annex main program stub + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +import System.Environment + +import GitAnnex + +main :: IO () +main = run =<< getArgs diff --git a/git-union-merge.hs b/git-union-merge.hs new file mode 100644 index 0000000000..6fd19c8dae --- /dev/null +++ b/git-union-merge.hs @@ -0,0 +1,50 @@ +{- git-union-merge program + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +import System.Environment + +import Common +import qualified Git.UnionMerge +import qualified Git.Config +import qualified Git.Construct +import qualified Git.Branch +import qualified Git.Index +import qualified Git + +header :: String +header = "Usage: git-union-merge ref ref newref" + +usage :: IO a +usage = error $ "bad parameters\n\n" ++ header + +tmpIndex :: Git.Repo -> FilePath +tmpIndex g = Git.gitDir g "index.git-union-merge" + +setup :: Git.Repo -> IO () +setup = cleanup -- idempotency + +cleanup :: Git.Repo -> IO () +cleanup g = do + e' <- doesFileExist (tmpIndex g) + when e' $ removeFile (tmpIndex g) + +parseArgs :: IO [String] +parseArgs = do + args <- getArgs + if length args /= 3 + then usage + else return args + +main :: IO () +main = do + [aref, bref, newref] <- map Git.Ref <$> parseArgs + g <- Git.Config.read =<< Git.Construct.fromCwd + _ <- Git.Index.override (tmpIndex g) + setup g + Git.UnionMerge.merge aref bref g + _ <- Git.Branch.commit "union merge" newref [aref, bref] g + cleanup g diff --git a/mdwn2man b/mdwn2man new file mode 100755 index 0000000000..ad6d3c6026 --- /dev/null +++ b/mdwn2man @@ -0,0 +1,43 @@ +#!/usr/bin/perl +# Warning: hack + +my $prog=shift; +my $section=shift; + +print ".TH $prog $section\n"; + +while (<>) { + s{(\\?)\[\[([^\s\|\]]+)(\|[^\s\]]+)?\]\]}{$1 ? "[[$2]]" : $2}eg; + s/\`//g; + s/^\s*\./\\&./g; + if (/^#\s/) { + s/^#\s/.SH /; + <>; # blank; + } + s/^[ \n]+//; + s/^\t/ /; + s/-/\\-/g; + s/^Warning:.*//g; + s/^$/.PP\n/; + s/^\*\s+(.*)/.IP "$1"/; + next if $_ eq ".PP\n" && $skippara; + if (/^.IP /) { + $inlist=1; + $spippara=0; + } + elsif (/.SH/) { + $skippara=0; + $inlist=0; + } + elsif (/^\./) { + $skippara=1; + } + else { + $skippara=0; + } + if ($inlist && $_ eq ".PP\n") { + $_=".IP\n"; + } + + print $_; +} diff --git a/test.hs b/test.hs new file mode 100644 index 0000000000..a2fa98e4df --- /dev/null +++ b/test.hs @@ -0,0 +1,949 @@ +{- git-annex test suite + - + - Copyright 2010,2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +import Test.HUnit +import Test.HUnit.Tools +import Test.QuickCheck + +import System.Posix.Directory (changeWorkingDirectory) +import System.Posix.Files +import Control.Exception (bracket_, bracket, throw) +import System.IO.Error +import System.Posix.Env +import qualified Control.Exception.Extensible as E +import qualified Data.Map as M +import System.IO.HVFS (SystemFS(..)) +import Text.JSON + +import Common + +import qualified Utility.SafeCommand +import qualified Annex +import qualified Annex.UUID +import qualified Backend +import qualified Git.Config +import qualified Git.Construct +import qualified Git.Filename +import qualified Locations +import qualified Types.Backend +import qualified Types +import qualified GitAnnex +import qualified Logs.Location +import qualified Logs.UUIDBased +import qualified Logs.Trust +import qualified Logs.Remote +import qualified Remote +import qualified Command.DropUnused +import qualified Types.Key +import qualified Config +import qualified Crypto +import qualified Utility.Path +import qualified Utility.FileMode +import qualified Utility.Gpg +import qualified Build.SysConfig +import qualified Utility.Format + +-- for quickcheck +instance Arbitrary Types.Key.Key where + arbitrary = do + n <- arbitrary + b <- elements ['A'..'Z'] + return Types.Key.Key { + Types.Key.keyName = n, + Types.Key.keyBackendName = [b], + Types.Key.keySize = Nothing, + Types.Key.keyMtime = Nothing + } + +main :: IO () +main = do + prepare + r <- runVerboseTests $ TestList [quickcheck, blackbox] + cleanup tmpdir + propigate r + +propigate :: (Counts, Int) -> IO () +propigate (Counts { errors = e , failures = f }, _) + | e+f > 0 = error "failed" + | otherwise = return () + +quickcheck :: Test +quickcheck = TestLabel "quickcheck" $ TestList + [ qctest "prop_idempotent_deencode_git" Git.Filename.prop_idempotent_deencode + , qctest "prop_idempotent_deencode" Utility.Format.prop_idempotent_deencode + , qctest "prop_idempotent_fileKey" Locations.prop_idempotent_fileKey + , qctest "prop_idempotent_key_read_show" Types.Key.prop_idempotent_key_read_show + , qctest "prop_idempotent_shellEscape" Utility.SafeCommand.prop_idempotent_shellEscape + , qctest "prop_idempotent_shellEscape_multiword" Utility.SafeCommand.prop_idempotent_shellEscape_multiword + , qctest "prop_idempotent_configEscape" Logs.Remote.prop_idempotent_configEscape + , qctest "prop_parentDir_basics" Utility.Path.prop_parentDir_basics + + , qctest "prop_relPathDirToFile_basics" Utility.Path.prop_relPathDirToFile_basics + , qctest "prop_cost_sane" Config.prop_cost_sane + , qctest "prop_hmacWithCipher_sane" Crypto.prop_hmacWithCipher_sane + , qctest "prop_TimeStamp_sane" Logs.UUIDBased.prop_TimeStamp_sane + , qctest "prop_addLog_sane" Logs.UUIDBased.prop_addLog_sane + ] + +blackbox :: Test +blackbox = TestLabel "blackbox" $ TestList + -- test order matters, later tests may rely on state from earlier + [ test_init + , test_add + , test_reinject + , test_unannex + , test_drop + , test_get + , test_move + , test_copy + , test_lock + , test_edit + , test_fix + , test_trust + , test_fsck + , test_migrate + , test_unused + , test_addurl + , test_describe + , test_find + , test_merge + , test_status + , test_version + , test_sync + , test_map + , test_uninit + , test_upgrade + , test_whereis + , test_hook_remote + , test_directory_remote + , test_rsync_remote + , test_bup_remote + , test_crypto + ] + +test_init :: Test +test_init = "git-annex init" ~: TestCase $ innewrepo $ do + git_annex "init" [reponame] @? "init failed" + where + reponame = "test repo" + +test_add :: Test +test_add = "git-annex add" ~: TestList [basic, sha1dup, subdirs] + where + -- this test case runs in the main repo, to set up a basic + -- annexed file that later tests will use + basic = TestCase $ inmainrepo $ do + writeFile annexedfile $ content annexedfile + git_annex "add" [annexedfile] @? "add failed" + annexed_present annexedfile + writeFile sha1annexedfile $ content sha1annexedfile + git_annex "add" [sha1annexedfile, "--backend=SHA1"] @? "add with SHA1 failed" + annexed_present sha1annexedfile + checkbackend sha1annexedfile backendSHA1 + writeFile wormannexedfile $ content wormannexedfile + git_annex "add" [wormannexedfile, "--backend=WORM"] @? "add with WORM failed" + annexed_present wormannexedfile + checkbackend wormannexedfile backendWORM + boolSystem "git" [Params "rm --force -q", File wormannexedfile] @? "git rm failed" + writeFile ingitfile $ content ingitfile + boolSystem "git" [Param "add", File ingitfile] @? "git add failed" + boolSystem "git" [Params "commit -q -a -m commit"] @? "git commit failed" + git_annex "add" [ingitfile] @? "add ingitfile should be no-op" + unannexed ingitfile + sha1dup = TestCase $ intmpclonerepo $ do + writeFile sha1annexedfiledup $ content sha1annexedfiledup + git_annex "add" [sha1annexedfiledup, "--backend=SHA1"] @? "add of second file with same SHA1 failed" + annexed_present sha1annexedfiledup + annexed_present sha1annexedfile + subdirs = TestCase $ intmpclonerepo $ do + createDirectory "dir" + writeFile "dir/foo" $ content annexedfile + git_annex "add" ["dir"] @? "add of subdir failed" + createDirectory "dir2" + writeFile "dir2/foo" $ content annexedfile + changeWorkingDirectory "dir" + git_annex "add" ["../dir2"] @? "add of ../subdir failed" + +test_reinject :: Test +test_reinject = "git-annex reinject/fromkey" ~: TestCase $ intmpclonerepo $ do + git_annex "drop" ["--force", sha1annexedfile] @? "drop failed" + writeFile tmp $ content sha1annexedfile + r <- annexeval $ Types.Backend.getKey backendSHA1 tmp + let key = show $ fromJust r + git_annex "reinject" [tmp, sha1annexedfile] @? "reinject failed" + git_annex "fromkey" [key, sha1annexedfiledup] @? "fromkey failed" + annexed_present sha1annexedfiledup + where + tmp = "tmpfile" + +test_unannex :: Test +test_unannex = "git-annex unannex" ~: TestList [nocopy, withcopy] + where + nocopy = "no content" ~: intmpclonerepo $ do + annexed_notpresent annexedfile + git_annex "unannex" [annexedfile] @? "unannex failed with no copy" + annexed_notpresent annexedfile + withcopy = "with content" ~: intmpclonerepo $ do + git_annex "get" [annexedfile] @? "get failed" + annexed_present annexedfile + git_annex "unannex" [annexedfile, sha1annexedfile] @? "unannex failed" + unannexed annexedfile + git_annex "unannex" [annexedfile] @? "unannex failed on non-annexed file" + unannexed annexedfile + git_annex "unannex" [ingitfile] @? "unannex ingitfile should be no-op" + unannexed ingitfile + +test_drop :: Test +test_drop = "git-annex drop" ~: TestList [noremote, withremote, untrustedremote] + where + noremote = "no remotes" ~: TestCase $ intmpclonerepo $ do + git_annex "get" [annexedfile] @? "get failed" + boolSystem "git" [Params "remote rm origin"] + @? "git remote rm origin failed" + not <$> git_annex "drop" [annexedfile] @? "drop wrongly succeeded with no known copy of file" + annexed_present annexedfile + git_annex "drop" ["--force", annexedfile] @? "drop --force failed" + annexed_notpresent annexedfile + git_annex "drop" [annexedfile] @? "drop of dropped file failed" + git_annex "drop" [ingitfile] @? "drop ingitfile should be no-op" + unannexed ingitfile + withremote = "with remote" ~: TestCase $ intmpclonerepo $ do + git_annex "get" [annexedfile] @? "get failed" + annexed_present annexedfile + git_annex "drop" [annexedfile] @? "drop failed though origin has copy" + annexed_notpresent annexedfile + inmainrepo $ annexed_present annexedfile + untrustedremote = "untrusted remote" ~: TestCase $ intmpclonerepo $ do + git_annex "untrust" ["origin"] @? "untrust of origin failed" + git_annex "get" [annexedfile] @? "get failed" + annexed_present annexedfile + not <$> git_annex "drop" [annexedfile] @? "drop wrongly suceeded with only an untrusted copy of the file" + annexed_present annexedfile + inmainrepo $ annexed_present annexedfile + +test_get :: Test +test_get = "git-annex get" ~: TestCase $ intmpclonerepo $ do + inmainrepo $ annexed_present annexedfile + annexed_notpresent annexedfile + git_annex "get" [annexedfile] @? "get of file failed" + inmainrepo $ annexed_present annexedfile + annexed_present annexedfile + git_annex "get" [annexedfile] @? "get of file already here failed" + inmainrepo $ annexed_present annexedfile + annexed_present annexedfile + inmainrepo $ unannexed ingitfile + unannexed ingitfile + git_annex "get" [ingitfile] @? "get ingitfile should be no-op" + inmainrepo $ unannexed ingitfile + unannexed ingitfile + +test_move :: Test +test_move = "git-annex move" ~: TestCase $ intmpclonerepo $ do + annexed_notpresent annexedfile + inmainrepo $ annexed_present annexedfile + git_annex "move" ["--from", "origin", annexedfile] @? "move --from of file failed" + annexed_present annexedfile + inmainrepo $ annexed_notpresent annexedfile + git_annex "move" ["--from", "origin", annexedfile] @? "move --from of file already here failed" + annexed_present annexedfile + inmainrepo $ annexed_notpresent annexedfile + git_annex "move" ["--to", "origin", annexedfile] @? "move --to of file failed" + inmainrepo $ annexed_present annexedfile + annexed_notpresent annexedfile + git_annex "move" ["--to", "origin", annexedfile] @? "move --to of file already there failed" + inmainrepo $ annexed_present annexedfile + annexed_notpresent annexedfile + unannexed ingitfile + inmainrepo $ unannexed ingitfile + git_annex "move" ["--to", "origin", ingitfile] @? "move of ingitfile should be no-op" + unannexed ingitfile + inmainrepo $ unannexed ingitfile + git_annex "move" ["--from", "origin", ingitfile] @? "move of ingitfile should be no-op" + unannexed ingitfile + inmainrepo $ unannexed ingitfile + +test_copy :: Test +test_copy = "git-annex copy" ~: TestCase $ intmpclonerepo $ do + annexed_notpresent annexedfile + inmainrepo $ annexed_present annexedfile + git_annex "copy" ["--from", "origin", annexedfile] @? "copy --from of file failed" + annexed_present annexedfile + inmainrepo $ annexed_present annexedfile + git_annex "copy" ["--from", "origin", annexedfile] @? "copy --from of file already here failed" + annexed_present annexedfile + inmainrepo $ annexed_present annexedfile + git_annex "copy" ["--to", "origin", annexedfile] @? "copy --to of file already there failed" + annexed_present annexedfile + inmainrepo $ annexed_present annexedfile + git_annex "move" ["--to", "origin", annexedfile] @? "move --to of file already there failed" + annexed_notpresent annexedfile + inmainrepo $ annexed_present annexedfile + unannexed ingitfile + inmainrepo $ unannexed ingitfile + git_annex "copy" ["--to", "origin", ingitfile] @? "copy of ingitfile should be no-op" + unannexed ingitfile + inmainrepo $ unannexed ingitfile + git_annex "copy" ["--from", "origin", ingitfile] @? "copy of ingitfile should be no-op" + checkregularfile ingitfile + checkcontent ingitfile + +test_lock :: Test +test_lock = "git-annex unlock/lock" ~: intmpclonerepo $ do + -- regression test: unlock of not present file should skip it + annexed_notpresent annexedfile + not <$> git_annex "unlock" [annexedfile] @? "unlock failed to fail with not present file" + annexed_notpresent annexedfile + + git_annex "get" [annexedfile] @? "get of file failed" + annexed_present annexedfile + git_annex "unlock" [annexedfile] @? "unlock failed" + unannexed annexedfile + -- write different content, to verify that lock + -- throws it away + changecontent annexedfile + writeFile annexedfile $ content annexedfile ++ "foo" + git_annex "lock" [annexedfile] @? "lock failed" + annexed_present annexedfile + git_annex "unlock" [annexedfile] @? "unlock failed" + unannexed annexedfile + changecontent annexedfile + git_annex "add" [annexedfile] @? "add of modified file failed" + runchecks [checklink, checkunwritable] annexedfile + c <- readFile annexedfile + assertEqual "content of modified file" c (changedcontent annexedfile) + r' <- git_annex "drop" [annexedfile] + not r' @? "drop wrongly succeeded with no known copy of modified file" + +test_edit :: Test +test_edit = "git-annex edit/commit" ~: TestList [t False, t True] + where t precommit = TestCase $ intmpclonerepo $ do + git_annex "get" [annexedfile] @? "get of file failed" + annexed_present annexedfile + git_annex "edit" [annexedfile] @? "edit failed" + unannexed annexedfile + changecontent annexedfile + if precommit + then do + -- pre-commit depends on the file being + -- staged, normally git commit does this + boolSystem "git" [Param "add", File annexedfile] + @? "git add of edited file failed" + git_annex "pre-commit" [] + @? "pre-commit failed" + else do + boolSystem "git" [Params "commit -q -a -m contentchanged"] + @? "git commit of edited file failed" + runchecks [checklink, checkunwritable] annexedfile + c <- readFile annexedfile + assertEqual "content of modified file" c (changedcontent annexedfile) + not <$> git_annex "drop" [annexedfile] @? "drop wrongly succeeded with no known copy of modified file" + +test_fix :: Test +test_fix = "git-annex fix" ~: intmpclonerepo $ do + annexed_notpresent annexedfile + git_annex "fix" [annexedfile] @? "fix of not present failed" + annexed_notpresent annexedfile + git_annex "get" [annexedfile] @? "get of file failed" + annexed_present annexedfile + git_annex "fix" [annexedfile] @? "fix of present file failed" + annexed_present annexedfile + createDirectory subdir + boolSystem "git" [Param "mv", File annexedfile, File subdir] + @? "git mv failed" + git_annex "fix" [newfile] @? "fix of moved file failed" + runchecks [checklink, checkunwritable] newfile + c <- readFile newfile + assertEqual "content of moved file" c (content annexedfile) + where + subdir = "s" + newfile = subdir ++ "/" ++ annexedfile + +test_trust :: Test +test_trust = "git-annex trust/untrust/semitrust/dead" ~: intmpclonerepo $ do + git_annex "trust" [repo] @? "trust failed" + trustcheck Logs.Trust.Trusted "trusted 1" + git_annex "trust" [repo] @? "trust of trusted failed" + trustcheck Logs.Trust.Trusted "trusted 2" + git_annex "untrust" [repo] @? "untrust failed" + trustcheck Logs.Trust.UnTrusted "untrusted 1" + git_annex "untrust" [repo] @? "untrust of untrusted failed" + trustcheck Logs.Trust.UnTrusted "untrusted 2" + git_annex "dead" [repo] @? "dead failed" + trustcheck Logs.Trust.DeadTrusted "deadtrusted 1" + git_annex "dead" [repo] @? "dead of dead failed" + trustcheck Logs.Trust.DeadTrusted "deadtrusted 2" + git_annex "semitrust" [repo] @? "semitrust failed" + trustcheck Logs.Trust.SemiTrusted "semitrusted 1" + git_annex "semitrust" [repo] @? "semitrust of semitrusted failed" + trustcheck Logs.Trust.SemiTrusted "semitrusted 2" + where + repo = "origin" + trustcheck expected msg = do + present <- annexeval $ do + l <- Logs.Trust.trustGet expected + u <- Remote.nameToUUID repo + return $ u `elem` l + assertBool msg present + +test_fsck :: Test +test_fsck = "git-annex fsck" ~: TestList [basicfsck, barefsck, withlocaluntrusted, withremoteuntrusted] + where + basicfsck = TestCase $ intmpclonerepo $ do + git_annex "fsck" [] @? "fsck failed" + boolSystem "git" [Params "config annex.numcopies 2"] @? "git config failed" + fsck_should_fail "numcopies unsatisfied" + boolSystem "git" [Params "config annex.numcopies 1"] @? "git config failed" + corrupt annexedfile + corrupt sha1annexedfile + barefsck = TestCase $ intmpbareclonerepo $ do + git_annex "fsck" [] @? "fsck failed" + withlocaluntrusted = TestCase $ intmpclonerepo $ do + git_annex "get" [annexedfile] @? "get failed" + git_annex "untrust" ["origin"] @? "untrust of origin repo failed" + git_annex "untrust" ["."] @? "untrust of current repo failed" + fsck_should_fail "content only available in untrusted (current) repository" + git_annex "trust" ["."] @? "trust of current repo failed" + git_annex "fsck" [annexedfile] @? "fsck failed on file present in trusted repo" + withremoteuntrusted = TestCase $ intmpclonerepo $ do + boolSystem "git" [Params "config annex.numcopies 2"] @? "git config failed" + git_annex "get" [annexedfile] @? "get failed" + git_annex "get" [sha1annexedfile] @? "get failed" + git_annex "fsck" [] @? "fsck failed with numcopies=2 and 2 copies" + git_annex "untrust" ["origin"] @? "untrust of origin failed" + fsck_should_fail "content not replicated to enough non-untrusted repositories" + + corrupt f = do + git_annex "get" [f] @? "get of file failed" + Utility.FileMode.allowWrite f + writeFile f (changedcontent f) + not <$> git_annex "fsck" [] @? "fsck failed to fail with corrupted file content" + git_annex "fsck" [] @? "fsck unexpectedly failed again; previous one did not fix problem with " ++ f + fsck_should_fail m = do + not <$> git_annex "fsck" [] @? "fsck failed to fail with " ++ m + +test_migrate :: Test +test_migrate = "git-annex migrate" ~: TestList [t False, t True] + where t usegitattributes = TestCase $ intmpclonerepo $ do + annexed_notpresent annexedfile + annexed_notpresent sha1annexedfile + git_annex "migrate" [annexedfile] @? "migrate of not present failed" + git_annex "migrate" [sha1annexedfile] @? "migrate of not present failed" + git_annex "get" [annexedfile] @? "get of file failed" + git_annex "get" [sha1annexedfile] @? "get of file failed" + annexed_present annexedfile + annexed_present sha1annexedfile + if usegitattributes + then do + writeFile ".gitattributes" $ "* annex.backend=SHA1" + git_annex "migrate" [sha1annexedfile] + @? "migrate sha1annexedfile failed" + git_annex "migrate" [annexedfile] + @? "migrate annexedfile failed" + else do + git_annex "migrate" [sha1annexedfile, "--backend", "SHA1"] + @? "migrate sha1annexedfile failed" + git_annex "migrate" [annexedfile, "--backend", "SHA1"] + @? "migrate annexedfile failed" + annexed_present annexedfile + annexed_present sha1annexedfile + checkbackend annexedfile backendSHA1 + checkbackend sha1annexedfile backendSHA1 + + -- check that reversing a migration works + writeFile ".gitattributes" $ "* annex.backend=SHA256" + git_annex "migrate" [sha1annexedfile] + @? "migrate sha1annexedfile failed" + git_annex "migrate" [annexedfile] + @? "migrate annexedfile failed" + annexed_present annexedfile + annexed_present sha1annexedfile + checkbackend annexedfile backendSHA256 + checkbackend sha1annexedfile backendSHA256 + +test_unused :: Test +test_unused = "git-annex unused/dropunused" ~: intmpclonerepo $ do + -- keys have to be looked up before files are removed + annexedfilekey <- annexeval $ findkey annexedfile + sha1annexedfilekey <- annexeval $ findkey sha1annexedfile + git_annex "get" [annexedfile] @? "get of file failed" + git_annex "get" [sha1annexedfile] @? "get of file failed" + checkunused [] + boolSystem "git" [Params "rm -q", File annexedfile] @? "git rm failed" + checkunused [] + boolSystem "git" [Params "commit -q -m foo"] @? "git commit failed" + checkunused [] + -- unused checks origin/master; once it's gone it is really unused + boolSystem "git" [Params "remote rm origin"] @? "git remote rm origin failed" + checkunused [annexedfilekey] + boolSystem "git" [Params "rm -q", File sha1annexedfile] @? "git rm failed" + boolSystem "git" [Params "commit -q -m foo"] @? "git commit failed" + checkunused [annexedfilekey, sha1annexedfilekey] + + -- good opportunity to test dropkey also + git_annex "dropkey" ["--force", show annexedfilekey] + @? "dropkey failed" + checkunused [sha1annexedfilekey] + + git_annex "dropunused" ["1", "2"] @? "dropunused failed" + checkunused [] + git_annex "dropunused" ["10", "501"] @? "dropunused failed on bogus numbers" + + where + checkunused expectedkeys = do + git_annex "unused" [] @? "unused failed" + unusedmap <- annexeval $ Command.DropUnused.readUnusedLog "" + let unusedkeys = M.elems unusedmap + assertEqual "unused keys differ" + (sort expectedkeys) (sort unusedkeys) + findkey f = do + r <- Backend.lookupFile f + return $ fst $ fromJust r + +test_addurl :: Test +test_addurl = "git-annex addurl" ~: intmpclonerepo $ do + annexed_notpresent annexedfile + -- can't check download; test suite should not access network, + -- and starting up a web server seems excessive + git_annex "addurl" ["--fast", "http://example.com/nosuchfile"] @? "addurl failed" + +test_describe :: Test +test_describe = "git-annex describe" ~: intmpclonerepo $ do + git_annex "describe" [".", "this repo"] @? "describe 1 failed" + git_annex "describe" ["origin", "origin repo"] @? "describe 2 failed" + +test_find :: Test +test_find = "git-annex find" ~: intmpclonerepo $ do + annexed_notpresent annexedfile + git_annex_expectoutput "find" [] [] + git_annex "get" [annexedfile] @? "get failed" + annexed_present annexedfile + annexed_notpresent sha1annexedfile + git_annex_expectoutput "find" [] [annexedfile] + git_annex_expectoutput "find" ["--exclude", annexedfile, "--and", "--exclude", sha1annexedfile] [] + git_annex_expectoutput "find" ["--include", annexedfile] [annexedfile] + git_annex_expectoutput "find" ["--not", "--in", "origin"] [] + git_annex_expectoutput "find" ["--copies", "1", "--and", "--not", "--copies", "2"] [sha1annexedfile] + git_annex_expectoutput "find" ["--inbackend", "SHA1"] [sha1annexedfile] + git_annex_expectoutput "find" ["--inbackend", "WORM"] [] + +test_merge :: Test +test_merge = "git-annex merge" ~: intmpclonerepo $ do + git_annex "merge" [] @? "merge failed" + +test_status :: Test +test_status = "git-annex status" ~: intmpclonerepo $ do + json <- git_annex_output "status" ["--json"] + case Text.JSON.decodeStrict json :: Text.JSON.Result (JSObject JSValue) of + Ok _ -> return () + Error e -> assertFailure e + +test_version :: Test +test_version = "git-annex version" ~: intmpclonerepo $ do + git_annex "version" [] @? "version failed" + +test_sync :: Test +test_sync = "git-annex sync" ~: intmpclonerepo $ do + git_annex "sync" [] @? "sync failed" + +test_map :: Test +test_map = "git-annex map" ~: intmpclonerepo $ do + -- set descriptions, that will be looked for in the map + git_annex "describe" [".", "this repo"] @? "describe 1 failed" + git_annex "describe" ["origin", "origin repo"] @? "describe 2 failed" + -- --fast avoids it running graphviz, not a build dependency + git_annex "map" ["--fast"] @? "map failed" + doesFileExist "map.dot" @? "map.dot not generated" + c <- readFile "map.dot" + ("this repo" `isInfixOf` c && "origin repo" `isInfixOf` c) @? ("map.dot bad content: " ++ c) + +test_uninit :: Test +test_uninit = "git-annex uninit" ~: intmpclonerepo $ do + git_annex "get" [] @? "get failed" + annexed_present annexedfile + boolSystem "git" [Params "checkout git-annex"] @? "git checkout git-annex" + not <$> git_annex "uninit" [] @? "uninit failed to fail when git-annex branch was checked out" + boolSystem "git" [Params "checkout master"] @? "git checkout master" + _ <- git_annex "uninit" [] -- exit status not checked; does abnormal exit + checkregularfile annexedfile + doesDirectoryExist ".git" @? ".git vanished in uninit" + not <$> doesDirectoryExist ".git/annex" @? ".git/annex still present after uninit" + +test_upgrade :: Test +test_upgrade = "git-annex upgrade" ~: intmpclonerepo $ do + git_annex "upgrade" [] @? "upgrade from same version failed" + +test_whereis :: Test +test_whereis = "git-annex whereis" ~: intmpclonerepo $ do + annexed_notpresent annexedfile + git_annex "whereis" [annexedfile] @? "whereis on non-present file failed" + git_annex "untrust" ["origin"] @? "untrust failed" + not <$> git_annex "whereis" [annexedfile] @? "whereis on non-present file only present in untrusted repo failed to fail" + git_annex "get" [annexedfile] @? "get failed" + annexed_present annexedfile + git_annex "whereis" [annexedfile] @? "whereis on present file failed" + +test_hook_remote :: Test +test_hook_remote = "git-annex hook remote" ~: intmpclonerepo $ do + git_annex "initremote" (words "foo type=hook encryption=none hooktype=foo") @? "initremote failed" + createDirectory dir + git_config "annex.foo-store-hook" $ + "cp $ANNEX_FILE " ++ loc + git_config "annex.foo-retrieve-hook" $ + "cp " ++ loc ++ " $ANNEX_FILE" + git_config "annex.foo-remove-hook" $ + "rm -f " ++ loc + git_config "annex.foo-checkpresent-hook" $ + "if [ -e " ++ loc ++ " ]; then echo $ANNEX_KEY; fi" + git_annex "get" [annexedfile] @? "get of file failed" + annexed_present annexedfile + git_annex "copy" [annexedfile, "--to", "foo"] @? "copy --to hook remote failed" + annexed_present annexedfile + git_annex "drop" [annexedfile, "--numcopies=2"] @? "drop failed" + annexed_notpresent annexedfile + git_annex "move" [annexedfile, "--from", "foo"] @? "move --from hook remote failed" + annexed_present annexedfile + not <$> git_annex "drop" [annexedfile, "--numcopies=2"] @? "drop failed to fail" + annexed_present annexedfile + where + dir = "dir" + loc = dir ++ "/$ANNEX_KEY" + git_config k v = boolSystem "git" [Param "config", Param k, Param v] + @? "git config failed" + +test_directory_remote :: Test +test_directory_remote = "git-annex directory remote" ~: intmpclonerepo $ do + createDirectory "dir" + git_annex "initremote" (words $ "foo type=directory encryption=none directory=dir") @? "initremote failed" + git_annex "get" [annexedfile] @? "get of file failed" + annexed_present annexedfile + git_annex "copy" [annexedfile, "--to", "foo"] @? "copy --to directory remote failed" + annexed_present annexedfile + git_annex "drop" [annexedfile, "--numcopies=2"] @? "drop failed" + annexed_notpresent annexedfile + git_annex "move" [annexedfile, "--from", "foo"] @? "move --from directory remote failed" + annexed_present annexedfile + not <$> git_annex "drop" [annexedfile, "--numcopies=2"] @? "drop failed to fail" + annexed_present annexedfile + +test_rsync_remote :: Test +test_rsync_remote = "git-annex rsync remote" ~: intmpclonerepo $ do + createDirectory "dir" + git_annex "initremote" (words $ "foo type=rsync encryption=none rsyncurl=dir") @? "initremote failed" + git_annex "get" [annexedfile] @? "get of file failed" + annexed_present annexedfile + git_annex "copy" [annexedfile, "--to", "foo"] @? "copy --to rsync remote failed" + annexed_present annexedfile + git_annex "drop" [annexedfile, "--numcopies=2"] @? "drop failed" + annexed_notpresent annexedfile + git_annex "move" [annexedfile, "--from", "foo"] @? "move --from rsync remote failed" + annexed_present annexedfile + not <$> git_annex "drop" [annexedfile, "--numcopies=2"] @? "drop failed to fail" + annexed_present annexedfile + +test_bup_remote :: Test +test_bup_remote = "git-annex bup remote" ~: intmpclonerepo $ when Build.SysConfig.bup $ do + dir <- absPath "dir" -- bup special remote needs an absolute path + createDirectory dir + git_annex "initremote" (words $ "foo type=bup encryption=none buprepo="++dir) @? "initremote failed" + git_annex "get" [annexedfile] @? "get of file failed" + annexed_present annexedfile + git_annex "copy" [annexedfile, "--to", "foo"] @? "copy --to bup remote failed" + annexed_present annexedfile + git_annex "drop" [annexedfile, "--numcopies=2"] @? "drop failed" + annexed_notpresent annexedfile + git_annex "copy" [annexedfile, "--from", "foo"] @? "copy --from bup remote failed" + annexed_present annexedfile + not <$> git_annex "move" [annexedfile, "--from", "foo"] @? "move --from bup remote failed to fail" + annexed_present annexedfile + +-- gpg is not a build dependency, so only test when it's available +test_crypto :: Test +test_crypto = "git-annex crypto" ~: intmpclonerepo $ when Build.SysConfig.gpg $ do + Utility.Gpg.testTestHarness @? "test harness self-test failed" + Utility.Gpg.testHarness $ do + createDirectory "dir" + let initremote = git_annex "initremote" + [ "foo" + , "type=directory" + , "encryption=" ++ Utility.Gpg.testKeyId + , "directory=dir" + ] + initremote @? "initremote failed" + initremote @? "initremote failed when run twice in a row" + git_annex "get" [annexedfile] @? "get of file failed" + annexed_present annexedfile + git_annex "copy" [annexedfile, "--to", "foo"] @? "copy --to encrypted remote failed" + annexed_present annexedfile + git_annex "drop" [annexedfile, "--numcopies=2"] @? "drop failed" + annexed_notpresent annexedfile + git_annex "move" [annexedfile, "--from", "foo"] @? "move --from encrypted remote failed" + annexed_present annexedfile + not <$> git_annex "drop" [annexedfile, "--numcopies=2"] @? "drop failed to fail" + annexed_present annexedfile + +-- This is equivilant to running git-annex, but it's all run in-process +-- so test coverage collection works. +git_annex :: String -> [String] -> IO Bool +git_annex command params = do + -- catch all errors, including normally fatal errors + r <- E.try (run)::IO (Either E.SomeException ()) + case r of + Right _ -> return True + Left _ -> return False + where + run = GitAnnex.run (command:"-q":params) + +{- Runs git-annex and returns its output. -} +git_annex_output :: String -> [String] -> IO String +git_annex_output command params = do + (frompipe, topipe) <- createPipe + pid <- forkProcess $ do + _ <- dupTo topipe stdOutput + closeFd frompipe + _ <- git_annex command params + exitSuccess + -- XXX since the above is a separate process, code coverage stats are + -- not gathered for things run in it. + closeFd topipe + fromh <- fdToHandle frompipe + got <- hGetContentsStrict fromh + hClose fromh + _ <- getProcessStatus True False pid + -- XXX hack Run same command again, to get code coverage. + _ <- git_annex command params + return got + +git_annex_expectoutput :: String -> [String] -> [String] -> IO () +git_annex_expectoutput command params expected = do + got <- lines <$> git_annex_output command params + got == expected @? ("unexpected value running " ++ command ++ " " ++ show params ++ " -- got: " ++ show got ++ " expected: " ++ show expected) + +-- Runs an action in the current annex. Note that shutdown actions +-- are not run; this should only be used for actions that query state. +annexeval :: Types.Annex a -> IO a +annexeval a = do + g <- Git.Construct.fromCwd + g' <- Git.Config.read g + s <- Annex.new g' + Annex.eval s { Annex.output = Annex.QuietOutput } a + +innewrepo :: Assertion -> Assertion +innewrepo a = withgitrepo $ \r -> indir r a + +inmainrepo :: Assertion -> Assertion +inmainrepo a = indir mainrepodir a + +intmpclonerepo :: Assertion -> Assertion +intmpclonerepo a = withtmpclonerepo False $ \r -> indir r a + +intmpbareclonerepo :: Assertion -> Assertion +intmpbareclonerepo a = withtmpclonerepo True $ \r -> indir r a + +withtmpclonerepo :: Bool -> (FilePath -> Assertion) -> Assertion +withtmpclonerepo bare = bracket (clonerepo mainrepodir tmprepodir bare) cleanup + +withgitrepo :: (FilePath -> Assertion) -> Assertion +withgitrepo = bracket (setuprepo mainrepodir) return + +indir :: FilePath -> Assertion -> Assertion +indir dir a = do + cwd <- getCurrentDirectory + -- Assertion failures throw non-IO errors; catch + -- any type of error and change back to cwd before + -- rethrowing. + r <- bracket_ (changeToTmpDir dir) (changeWorkingDirectory cwd) + (E.try (a)::IO (Either E.SomeException ())) + case r of + Right () -> return () + Left e -> throw e + +setuprepo :: FilePath -> IO FilePath +setuprepo dir = do + cleanup dir + ensuretmpdir + boolSystem "git" [Params "init -q", File dir] @? "git init failed" + indir dir $ do + boolSystem "git" [Params "config user.name", Param "Test User"] @? "git config failed" + boolSystem "git" [Params "config user.email test@example.com"] @? "git config failed" + return dir + +-- clones are always done as local clones; we cannot test ssh clones +clonerepo :: FilePath -> FilePath -> Bool -> IO FilePath +clonerepo old new bare = do + cleanup new + ensuretmpdir + let b = if bare then " --bare" else "" + boolSystem "git" [Params ("clone -q" ++ b), File old, File new] @? "git clone failed" + indir new $ git_annex "init" ["-q", new] @? "git annex init failed" + return new + +ensuretmpdir :: IO () +ensuretmpdir = do + e <- doesDirectoryExist tmpdir + unless e $ + createDirectory tmpdir + +cleanup :: FilePath -> IO () +cleanup dir = do + e <- doesDirectoryExist dir + when e $ do + -- git-annex prevents annexed file content from being + -- removed via directory permissions; undo + recurseDir SystemFS dir >>= + filterM doesDirectoryExist >>= + mapM_ Utility.FileMode.allowWrite + removeDirectoryRecursive dir + +checklink :: FilePath -> Assertion +checklink f = do + s <- getSymbolicLinkStatus f + isSymbolicLink s @? f ++ " is not a symlink" + +checkregularfile :: FilePath -> Assertion +checkregularfile f = do + s <- getSymbolicLinkStatus f + isRegularFile s @? f ++ " is not a normal file" + return () + +checkcontent :: FilePath -> Assertion +checkcontent f = do + c <- readFile f + assertEqual ("checkcontent " ++ f) c (content f) + +checkunwritable :: FilePath -> Assertion +checkunwritable f = do + -- Look at permissions bits rather than trying to write or using + -- fileAccess because if run as root, any file can be modified + -- despite permissions. + s <- getFileStatus f + let mode = fileMode s + if (mode == mode `unionFileModes` ownerWriteMode) + then assertFailure $ "able to modify annexed file's " ++ f ++ " content" + else return () + +checkwritable :: FilePath -> Assertion +checkwritable f = do + r <- try $ writeFile f $ content f + case r of + Left _ -> assertFailure $ "unable to modify " ++ f + Right _ -> return () + +checkdangling :: FilePath -> Assertion +checkdangling f = do + r <- try $ readFile f + case r of + Left _ -> return () -- expected; dangling link + Right _ -> assertFailure $ f ++ " was not a dangling link as expected" + +checklocationlog :: FilePath -> Bool -> Assertion +checklocationlog f expected = do + thisuuid <- annexeval Annex.UUID.getUUID + r <- annexeval $ Backend.lookupFile f + case r of + Just (k, _) -> do + uuids <- annexeval $ Logs.Location.keyLocations k + assertEqual ("bad content in location log for " ++ f ++ " key " ++ (show k) ++ " uuid " ++ show thisuuid) + expected (thisuuid `elem` uuids) + _ -> assertFailure $ f ++ " failed to look up key" + +checkbackend :: FilePath -> Types.Backend Types.Annex -> Assertion +checkbackend file expected = do + r <- annexeval $ Backend.lookupFile file + let b = snd $ fromJust r + assertEqual ("backend for " ++ file) expected b + +inlocationlog :: FilePath -> Assertion +inlocationlog f = checklocationlog f True + +notinlocationlog :: FilePath -> Assertion +notinlocationlog f = checklocationlog f False + +runchecks :: [FilePath -> Assertion] -> FilePath -> Assertion +runchecks [] _ = return () +runchecks (a:as) f = do + a f + runchecks as f + +annexed_notpresent :: FilePath -> Assertion +annexed_notpresent = runchecks + [checklink, checkdangling, notinlocationlog] + +annexed_present :: FilePath -> Assertion +annexed_present = runchecks + [checklink, checkcontent, checkunwritable, inlocationlog] + +unannexed :: FilePath -> Assertion +unannexed = runchecks [checkregularfile, checkcontent, checkwritable] + +prepare :: IO () +prepare = do + -- While PATH is mostly avoided, the commit hook does run it. Make + -- sure that the just-built git annex is used. + cwd <- getCurrentDirectory + p <- getEnvDefault "PATH" "" + setEnv "PATH" (cwd ++ ":" ++ p) True + setEnv "TOPDIR" cwd True + -- Avoid git complaining if it cannot determine the user's email + -- address. + setEnv "EMAIL" "git-annex test " True + +changeToTmpDir :: FilePath -> IO () +changeToTmpDir t = do + -- Hack alert. Threading state to here was too much bother. + topdir <- getEnvDefault "TOPDIR" "" + changeWorkingDirectory $ topdir ++ "/" ++ t + +tmpdir :: String +tmpdir = ".t" + +mainrepodir :: String +mainrepodir = tmpdir ++ "/repo" + +tmprepodir :: String +tmprepodir = tmpdir ++ "/tmprepo" + +annexedfile :: String +annexedfile = "foo" + +wormannexedfile :: String +wormannexedfile = "apple" + +sha1annexedfile :: String +sha1annexedfile = "sha1foo" + +sha1annexedfiledup :: String +sha1annexedfiledup = "sha1foodup" + +ingitfile :: String +ingitfile = "bar" + +content :: FilePath -> String +content f + | f == annexedfile = "annexed file content" + | f == ingitfile = "normal file content" + | f == sha1annexedfile ="sha1 annexed file content" + | f == sha1annexedfiledup = content sha1annexedfile + | f == wormannexedfile = "worm annexed file content" + | otherwise = "unknown file " ++ f + +changecontent :: FilePath -> IO () +changecontent f = writeFile f $ changedcontent f + +changedcontent :: FilePath -> String +changedcontent f = (content f) ++ " (modified)" + +backendSHA1 :: Types.Backend Types.Annex +backendSHA1 = backend_ "SHA1" + +backendSHA256 :: Types.Backend Types.Annex +backendSHA256 = backend_ "SHA256" + +backendWORM :: Types.Backend Types.Annex +backendWORM = backend_ "WORM" + +backend_ :: String -> Types.Backend Types.Annex +backend_ name = Backend.lookupBackendName name