Merge branch 'master' into concurrentprogress

This commit is contained in:
Joey Hess 2015-04-07 13:32:52 -04:00
commit 683c596a0e
46 changed files with 959 additions and 244 deletions

View file

@ -11,6 +11,7 @@ import Git.Types
import Git.Config
import Types.GitConfig
import qualified Git.Construct as Construct
import qualified Git.BuildVersion
import Utility.Path
import Utility.SafeCommand
import Utility.Directory
@ -36,8 +37,10 @@ fixupRepo r c = do
{- Disable git's built-in wildcard expansion, which is not wanted
- when using it as plumbing by git-annex. -}
disableWildcardExpansion :: Repo -> Repo
disableWildcardExpansion r = r
{ gitGlobalOpts = gitGlobalOpts r ++ [Param "--literal-pathspecs"] }
disableWildcardExpansion r
| Git.BuildVersion.older "1.8.1" = r
| otherwise = r
{ gitGlobalOpts = gitGlobalOpts r ++ [Param "--literal-pathspecs"] }
{- Direct mode repos have core.bare=true, but are not really bare.
- Fix up the Repo to be a non-bare repo, and arrange for git commands

View file

@ -162,13 +162,17 @@ probeFifoSupport = do
#else
tmp <- fromRepo gitAnnexTmpMiscDir
let f = tmp </> "gaprobe"
let f2 = tmp </> "gaprobe2"
createAnnexDirectory tmp
liftIO $ do
nukeFile f
nukeFile f2
ms <- tryIO $ do
createNamedPipe f ownerReadMode
createLink f f2
getFileStatus f
nukeFile f
nukeFile f2
return $ either (const False) isNamedPipe ms
#endif

View file

@ -99,7 +99,7 @@ makeinfos updated version = do
void $ inRepo $ runBool
[ Param "commit"
, Param "-a"
, Param "-S89C809CB" -- git-annex distribution signing key
, Param ("-S" ++ signingKey)
, Param "-m"
, Param $ "publishing git-annex " ++ version
]
@ -124,8 +124,8 @@ makeinfos updated version = do
signFile f
void $ inRepo $ runBool
[ Param "commit"
, Param ("-S" ++ signingKey)
, Param "-m"
, Param "-S89C809CB" -- git-annex distribution signing key
, Param $ "updated info files for git-annex " ++ version
]
void $ inRepo $ runBool

View file

@ -45,6 +45,7 @@ import qualified Command.Describe
import qualified Command.InitRemote
import qualified Command.EnableRemote
import qualified Command.Fsck
import qualified Command.Expire
import qualified Command.Repair
import qualified Command.Unused
import qualified Command.DropUnused
@ -169,6 +170,7 @@ cmds = concat
, Command.VCycle.cmd
, Command.Fix.cmd
, Command.Fsck.cmd
, Command.Expire.cmd
, Command.Repair.cmd
, Command.Unused.cmd
, Command.DropUnused.cmd

106
Command/Expire.hs Normal file
View file

@ -0,0 +1,106 @@
{- git-annex command
-
- Copyright 2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.Expire where
import Common.Annex
import Command
import Logs.Activity
import Logs.UUID
import Logs.MapLog
import Logs.Trust
import Annex.UUID
import qualified Remote
import Utility.HumanTime
import Data.Time.Clock.POSIX
import qualified Data.Map as M
cmd :: [Command]
cmd = [withOptions [activityOption, noActOption] $ command "expire" paramExpire seek
SectionMaintenance "expire inactive repositories"]
paramExpire :: String
paramExpire = (paramRepeating $ paramOptional paramRemote ++ ":" ++ paramTime)
activityOption :: Option
activityOption = fieldOption [] "activity" "Name" "specify activity"
noActOption :: Option
noActOption = flagOption [] "no-act" "don't really do anything"
seek :: CommandSeek
seek ps = do
expire <- parseExpire ps
wantact <- getOptionField activityOption (pure . parseActivity)
noact <- getOptionFlag noActOption
actlog <- lastActivities wantact
u <- getUUID
us <- filter (/= u) . M.keys <$> uuidMap
descs <- uuidMap
seekActions $ pure $ map (start expire noact actlog descs) us
start :: Expire -> Bool -> Log Activity -> M.Map UUID String -> UUID -> CommandStart
start (Expire expire) noact actlog descs u =
case lastact of
Just ent | notexpired ent -> checktrust (== DeadTrusted) $ do
showStart "unexpire" desc
showNote =<< whenactive
unless noact $
trustSet u SemiTrusted
_ -> checktrust (/= DeadTrusted) $ do
showStart "expire" desc
showNote =<< whenactive
unless noact $
trustSet u DeadTrusted
where
lastact = changed <$> M.lookup u actlog
whenactive = case lastact of
Just (Date t) -> do
d <- liftIO $ durationSince $ posixSecondsToUTCTime t
return $ "last active: " ++ fromDuration d ++ " ago"
_ -> return "no activity"
desc = fromUUID u ++ " " ++ fromMaybe "" (M.lookup u descs)
notexpired ent = case ent of
Unknown -> False
Date t -> case lookupexpire of
Just (Just expiretime) -> t >= expiretime
_ -> True
lookupexpire = headMaybe $ catMaybes $
map (`M.lookup` expire) [Just u, Nothing]
checktrust want a = ifM (want <$> lookupTrust u)
( do
void a
next $ next $ return True
, stop
)
data Expire = Expire (M.Map (Maybe UUID) (Maybe POSIXTime))
parseExpire :: [String] -> Annex Expire
parseExpire [] = error "Specify an expire time."
parseExpire ps = do
now <- liftIO getPOSIXTime
Expire . M.fromList <$> mapM (parse now) ps
where
parse now s = case separate (== ':') s of
(t, []) -> return (Nothing, parsetime now t)
(n, t) -> do
r <- Remote.nameToUUID n
return (Just r, parsetime now t)
parsetime _ "never" = Nothing
parsetime now s = case parseDuration s of
Nothing -> error $ "bad expire time: " ++ s
Just d -> Just (now - durationToPOSIXTime d)
parseActivity :: Maybe String -> Maybe Activity
parseActivity Nothing = Nothing
parseActivity (Just s) = case readish s of
Nothing -> error $ "Unknown activity. Choose from: " ++
unwords (map show [minBound..maxBound :: Activity])
Just v -> Just v

View file

@ -41,10 +41,10 @@ start _ [] = do
start _ _ = error "specify a key and a dest file"
massAdd :: CommandPerform
massAdd = go True =<< map words . lines <$> liftIO getContents
massAdd = go True =<< map (separate (== ' ')) . lines <$> liftIO getContents
where
go status [] = next $ return status
go status ([keyname,f]:rest) = do
go status ((keyname,f):rest) | not (null keyname) && not (null f) = do
let key = fromMaybe (error $ "bad key " ++ keyname) $ file2key keyname
ok <- perform' key f
let !status' = status && ok

View file

@ -22,8 +22,8 @@ import Annex.Direct
import Annex.Perms
import Annex.Link
import Logs.Location
import Logs.Presence
import Logs.Trust
import Logs.Activity
import Config.NumCopies
import Annex.UUID
import Utility.DataUnits
@ -39,7 +39,6 @@ import Data.Time.Clock.POSIX
import Data.Time
import System.Posix.Types (EpochTime)
import System.Locale
import qualified Data.Map as M
cmd :: [Command]
cmd = [withOptions fsckOptions $ command "fsck" paramPaths seek
@ -58,22 +57,12 @@ incrementalScheduleOption :: Option
incrementalScheduleOption = fieldOption [] "incremental-schedule" paramTime
"schedule incremental fscking"
distributedOption :: Option
distributedOption = flagOption [] "distributed" "distributed fsck mode"
expireOption :: Option
expireOption = fieldOption [] "expire"
(paramRepeating $ paramOptional paramRemote ++ ":" ++ paramTime)
"distributed expire mode"
fsckOptions :: [Option]
fsckOptions =
[ fsckFromOption
, startIncrementalOption
, moreIncrementalOption
, incrementalScheduleOption
, distributedOption
, expireOption
] ++ keyOptions ++ annexedMatchingOptions
seek :: CommandSeek
@ -81,28 +70,28 @@ seek ps = do
from <- getOptionField fsckFromOption Remote.byNameWithUUID
u <- maybe getUUID (pure . Remote.uuid) from
i <- getIncremental u
d <- getDistributed
withKeyOptions False
(\k -> startKey i d k =<< getNumCopies)
(withFilesInGit $ whenAnnexed $ start from i d)
(\k -> startKey i k =<< getNumCopies)
(withFilesInGit $ whenAnnexed $ start from i)
ps
withFsckDb i FsckDb.closeDb
recordActivity Fsck u
start :: Maybe Remote -> Incremental -> Distributed -> FilePath -> Key -> CommandStart
start from inc dist file key = do
start :: Maybe Remote -> Incremental -> FilePath -> Key -> CommandStart
start from inc file key = do
v <- Backend.getBackend file key
case v of
Nothing -> stop
Just backend -> do
numcopies <- getFileNumCopies file
case from of
Nothing -> go $ perform dist key file backend numcopies
Just r -> go $ performRemote dist key file backend numcopies r
Nothing -> go $ perform key file backend numcopies
Just r -> go $ performRemote key file backend numcopies r
where
go = runFsck inc file key
perform :: Distributed -> Key -> FilePath -> Backend -> NumCopies -> Annex Bool
perform dist key file backend numcopies = check
perform :: Key -> FilePath -> Backend -> NumCopies -> Annex Bool
perform key file backend numcopies = check
-- order matters
[ fixLink key file
, verifyLocationLog key file
@ -110,14 +99,13 @@ perform dist key file backend numcopies = check
, verifyDirectMode key file
, checkKeySize key
, checkBackend backend key (Just file)
, checkDistributed dist key Nothing
, checkKeyNumCopies key file numcopies
]
{- To fsck a remote, the content is retrieved to a tmp file,
- and checked locally. -}
performRemote :: Distributed -> Key -> FilePath -> Backend -> NumCopies -> Remote -> Annex Bool
performRemote dist key file backend numcopies remote =
performRemote :: Key -> FilePath -> Backend -> NumCopies -> Remote -> Annex Bool
performRemote key file backend numcopies remote =
dispatch =<< Remote.hasKey remote key
where
dispatch (Left err) = do
@ -136,7 +124,6 @@ performRemote dist key file backend numcopies remote =
[ verifyLocationLogRemote key file remote present
, checkKeySizeRemote key remote localcopy
, checkBackendRemote backend key remote localcopy
, checkDistributed dist key (Just $ Remote.uuid remote)
, checkKeyNumCopies key file numcopies
]
withtmp a = do
@ -157,19 +144,18 @@ performRemote dist key file backend numcopies remote =
)
dummymeter _ = noop
startKey :: Incremental -> Distributed -> Key -> NumCopies -> CommandStart
startKey inc dist key numcopies =
startKey :: Incremental -> Key -> NumCopies -> CommandStart
startKey inc key numcopies =
case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of
Nothing -> stop
Just backend -> runFsck inc (key2file key) key $
performKey dist key backend numcopies
performKey key backend numcopies
performKey :: Distributed -> Key -> Backend -> NumCopies -> Annex Bool
performKey dist key backend numcopies = check
performKey :: Key -> Backend -> NumCopies -> Annex Bool
performKey key backend numcopies = check
[ verifyLocationLog key (key2file key)
, checkKeySize key
, checkBackend backend key Nothing
, checkDistributed dist key Nothing
, checkKeyNumCopies key (key2file key) numcopies
]
@ -513,69 +499,3 @@ getIncremental u = do
when (now - realToFrac started >= durationToPOSIXTime delta) $
resetStartTime u
return True
data Distributed
= NonDistributed
| Distributed POSIXTime
| DistributedExpire POSIXTime (M.Map (Maybe UUID) (Maybe POSIXTime))
deriving (Show)
getDistributed :: Annex Distributed
getDistributed = go =<< getOptionField expireOption parseexpire
where
go (Just m) = DistributedExpire <$> liftIO getPOSIXTime <*> pure m
go Nothing = ifM (getOptionFlag distributedOption)
( Distributed <$> liftIO getPOSIXTime
, return NonDistributed
)
parseexpire Nothing = return Nothing
parseexpire (Just s) = do
now <- liftIO getPOSIXTime
Just . M.fromList <$> mapM (parseexpire' now) (words s)
parseexpire' now s = case separate (== ':') s of
(t, []) -> return (Nothing, parsetime now t)
(n, t) -> do
r <- Remote.nameToUUID n
return (Just r, parsetime now t)
parsetime _ "never" = Nothing
parsetime now s = case parseDuration s of
Nothing -> error $ "bad expire time: " ++ s
Just d -> Just (now - durationToPOSIXTime d)
checkDistributed :: Distributed -> Key -> Maybe UUID -> Annex Bool
checkDistributed d k mu = do
go d
return True
where
go NonDistributed = noop
-- This is called after fsck has checked the key's content, so
-- if the key is present in the annex now, we just need to update
-- the location log with the timestamp of the start of the fsck.
--
-- Note that reusing this timestamp means that the same log line
-- is generated for each key, which keeps the size increase
-- of the git-annex branch down.
go (Distributed ts) = whenM (inAnnex k) $ do
u <- maybe getUUID return mu
logChange' (logThen ts) k u InfoPresent
-- Get the location log for the key, and expire all entries
-- that are older than their uuid's listed expiration date.
-- (Except for the local repository.)
go (DistributedExpire ts m) = do
ls <- locationLog k
hereu <- getUUID
forM_ ls $ \l -> do
let u = toUUID (info l)
unless (u == hereu) $
case lookupexpire u of
Just (Just expiretime)
| date l < expiretime ->
logChange' (logThen ts) k u InfoMissing
_ -> noop
where
lookupexpire u = headMaybe $ catMaybes $
map (`M.lookup` m) [Just u, Nothing]

View file

@ -198,6 +198,7 @@ remote_fast_stats r = map (\s -> s r)
[ remote_name
, remote_description
, remote_uuid
, remote_trust
, remote_cost
, remote_type
]
@ -266,6 +267,10 @@ remote_uuid :: Remote -> Stat
remote_uuid r = simpleStat "uuid" $ pure $
fromUUID $ Remote.uuid r
remote_trust :: Remote -> Stat
remote_trust r = simpleStat "trust" $ lift $
showTrustLevel <$> lookupTrust (Remote.uuid r)
remote_cost :: Remote -> Stat
remote_cost r = simpleStat "cost" $ pure $
show $ Remote.cost r

View file

@ -34,10 +34,10 @@ start [] = do
start _ = error "specify a key and an url"
massAdd :: CommandPerform
massAdd = go True =<< map words . lines <$> liftIO getContents
massAdd = go True =<< map (separate (== ' ')) . lines <$> liftIO getContents
where
go status [] = next $ return status
go status ([keyname,u]:rest) = do
go status ((keyname,u):rest) | not (null keyname) && not (null u) = do
let key = fromMaybe (error $ "bad key " ++ keyname) $ file2key keyname
ok <- perform' key u
let !status' = status && ok

View file

@ -1,6 +1,6 @@
{- git-annex log file names
-
- Copyright 2013-2014 Joey Hess <id@joeyh.name>
- Copyright 2013-2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -40,6 +40,7 @@ topLevelUUIDBasedLogs =
, preferredContentLog
, requiredContentLog
, scheduleLog
, activityLog
, differenceLog
]
@ -84,9 +85,13 @@ groupPreferredContentLog = "group-preferred-content.log"
scheduleLog :: FilePath
scheduleLog = "schedule.log"
activityLog :: FilePath
activityLog = "activity.log"
differenceLog :: FilePath
differenceLog = "difference.log"
{- The pathname of the location log file for a given key. -}
locationLogFile :: GitConfig -> Key -> String
locationLogFile config key = branchHashDir config key </> keyFile key ++ ".log"

37
Logs/Activity.hs Normal file
View file

@ -0,0 +1,37 @@
{- git-annex activity log
-
- Copyright 2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.Activity (
Log,
Activity(..),
recordActivity,
lastActivities,
) where
import Data.Time.Clock.POSIX
import Common.Annex
import qualified Annex.Branch
import Logs
import Logs.UUIDBased
data Activity = Fsck
deriving (Eq, Read, Show, Enum, Bounded)
recordActivity :: Activity -> UUID -> Annex ()
recordActivity act uuid = do
ts <- liftIO getPOSIXTime
Annex.Branch.change activityLog $
showLog id . changeLog ts uuid (show act) . parseLog readish
lastActivities :: Maybe Activity -> Annex (Log Activity)
lastActivities wantact = parseLog onlywanted <$> Annex.Branch.get activityLog
where
onlywanted s = case readish s of
Just a | wanted a -> Just a
_ -> Nothing
wanted a = maybe True (a ==) wantact

View file

@ -17,7 +17,6 @@ module Logs.Location (
LogStatus(..),
logStatus,
logChange,
logChange',
loggedLocations,
loggedLocationsHistorical,
locationLog,

View file

@ -16,7 +16,6 @@ module Logs.Presence (
addLog,
readLog,
logNow,
logThen,
currentLog,
currentLogInfo,
historicalLogInfo,
@ -44,9 +43,6 @@ logNow s i = do
now <- liftIO getPOSIXTime
return $ LogLine now s i
logThen :: POSIXTime -> LogStatus -> String -> Annex LogLine
logThen t s i = return $ LogLine t s i
{- Reads a log and returns only the info that is still in effect. -}
currentLogInfo :: FilePath -> Annex [String]
currentLogInfo file = map info <$> currentLog file

View file

@ -164,19 +164,15 @@ type ProgressParser = String -> (Maybe BytesProcessed, String)
- to update a meter.
-}
commandMeter :: ProgressParser -> OutputHandler -> MeterUpdate -> FilePath -> [CommandParam] -> IO Bool
commandMeter progressparser oh meterupdate cmd params = catchBoolIO $
withOEHandles createProcessSuccess p $ \(outh, errh) -> do
ep <- async $ handlestderr errh
op <- async $ feedprogress zeroBytesProcessed [] outh
wait ep
wait op
commandMeter progressparser oh meterupdate cmd params =
outputFilter cmd params Nothing
(feedprogress zeroBytesProcessed [])
handlestderr
where
p = proc cmd (toCommand params)
feedprogress prev buf h = do
b <- S.hGetSome h 80
if S.null b
then return True
then return ()
else do
unless (quietMode oh) $ do
S.hPut stdout b
@ -203,18 +199,13 @@ demeterCommand :: OutputHandler -> FilePath -> [CommandParam] -> IO Bool
demeterCommand oh cmd params = demeterCommandEnv oh cmd params Nothing
demeterCommandEnv :: OutputHandler -> FilePath -> [CommandParam] -> Maybe [(String, String)] -> IO Bool
demeterCommandEnv oh cmd params environ = catchBoolIO $
withOEHandles createProcessSuccess p $ \(outh, errh) -> do
ep <- async $ avoidProgress True errh $ stderrHandler oh
op <- async $ avoidProgress True outh $ \l ->
unless (quietMode oh) $
putStrLn l
wait ep
wait op
return True
demeterCommandEnv oh cmd params environ = outputFilter cmd params environ
(\outh -> avoidProgress True outh stdouthandler)
(\errh -> avoidProgress True errh $ stderrHandler oh)
where
p = (proc cmd (toCommand params))
{ env = environ }
stdouthandler l =
unless (quietMode oh) $
putStrLn l
{- To suppress progress output, while displaying other messages,
- filter out lines that contain \r (typically used to reset to the
@ -226,3 +217,23 @@ avoidProgress doavoid h emitter = unlessM (hIsEOF h) $ do
unless (doavoid && '\r' `elem` s) $
emitter s
avoidProgress doavoid h emitter
outputFilter
:: FilePath
-> [CommandParam]
-> Maybe [(String, String)]
-> (Handle -> IO ())
-> (Handle -> IO ())
-> IO Bool
outputFilter cmd params environ outfilter errfilter = catchBoolIO $ do
(_, Just outh, Just errh, pid) <- createProcess p
{ std_out = CreatePipe
, std_err = CreatePipe
}
void $ async $ tryIO (outfilter outh) >> hClose outh
void $ async $ tryIO (errfilter errh) >> hClose errh
ret <- checkSuccessProcess pid
return ret
where
p = (proc cmd (toCommand params))
{ env = environ }

30
debian/changelog vendored
View file

@ -1,4 +1,22 @@
git-annex (5.20150328) UNRELEASED; urgency=medium
git-annex (5.20150406.2) UNRELEASED; urgency=medium
* --quiet now suppresses progress displays from eg, rsync.
(Second time's the charm..)
* fromkey, registerurl: When reading from stdin, allow the
filename and url, respectively, to contain whitespace.
-- Joey Hess <id@joeyh.name> Mon, 06 Apr 2015 20:14:20 -0400
git-annex (5.20150406.1) unstable; urgency=medium
* Fixes a bug in the last release that caused rsync and possibly
other commands to hang at the end of a file transfer.
(--quiet is back to not blocking progress displays until
that code can be fixed properly.)
-- Joey Hess <id@joeyh.name> Mon, 06 Apr 2015 17:13:13 -0400
git-annex (5.20150406) unstable; urgency=medium
* Prevent git-ls-files from double-expanding wildcards when an
unexpanded wildcard is passed to a git-annex command like add or find.
@ -10,7 +28,7 @@ git-annex (5.20150328) UNRELEASED; urgency=medium
guid has been downloaded before, even when the url has changed.
* importfeed: Always store itemid in metadata; before this was only
done when annex.genmetadata was set.
* Relax debian package dependencies to git >= 1:1.7.7.6 rather
* Relax debian package dependencies to git >= 1:1.8.1 rather
than needing >= 1:2.0.
* test: Fix --list-tests
* addurl --file: When used with a special remote that claims
@ -20,17 +38,19 @@ git-annex (5.20150328) UNRELEASED; urgency=medium
multiple files.
* import: --deduplicate and --cleanduplicates now output the keys
corresponding to duplicated files they process.
* fsck: Added --distributed and --expire options,
for distributed fsck.
* expire: New command, for expiring inactive repositories.
* fsck: Record fsck activity for use by expire command.
* Fix truncation of parameters that could occur when using xargs git-annex.
* Significantly sped up processing of large numbers of directories
passed to a single git-annex command.
* version: Add --raw
* init: Improve fifo test to detect NFS systems that support fifos
but not well enough for sshcaching.
* --quiet now suppresses progress displays from eg, rsync.
(The option already suppressed git-annex's own built-in progress
displays.)
-- Joey Hess <id@joeyh.name> Fri, 27 Mar 2015 16:04:43 -0400
-- Joey Hess <id@joeyh.name> Mon, 06 Apr 2015 12:48:48 -0400
git-annex (5.20150327) unstable; urgency=medium

4
debian/control vendored
View file

@ -75,7 +75,7 @@ Build-Depends:
lsof [!kfreebsd-i386 !kfreebsd-amd64 !hurd-any],
ikiwiki,
perlmagick,
git (>= 1:1.7.7.6),
git (>= 1:1.8.1),
rsync,
wget,
curl,
@ -92,7 +92,7 @@ Package: git-annex
Architecture: any
Section: utils
Depends: ${misc:Depends}, ${shlibs:Depends},
git (>= 1:1.7.7.6),
git (>= 1:1.8.1),
rsync,
wget,
curl,

View file

@ -1,6 +0,0 @@
### Feature request
It is not possible to put encrypted content in place on remotes with just a public GPG key. You always need the private key, even for encryption. I guess this is because how the cipher HMAC is used for replacing file names with their hashes. However, if that requirement (having secret file names) was dropped, I assume a pubkey-only mode could be implemented?
My specific use case is backup archiving. I have my backups packed in archive files and want to use git-annex to copy the archives to offsite remotes (S3). In that case, I don't care much about hiding file names, but would appreciate the increased security of not having the secret key on the backup server. It would only be needed if I wanted to verify or restore backups.
> [[closed|done]] per my comment --[[Joey]]

View file

@ -0,0 +1,119 @@
### Please describe the problem.
[[git-annex-shell]] seems to be designed to be put as some users' shells so that git-annex can be safely used from `sshd`.
### What steps will reproduce the problem?
<pre>
root@cs:/srv/gitannex-test# grep gitannex /etc/passwd
gitannex:x:999:999:Git annex sandbox:/var/lib/gitannex:/usr/local/bin/git-annex-shell
</pre>
`/usr/local/bin/git-annex-shell` is a symlink to the standalone git-annex installed in `/opt`.
<pre>
anarcat@desktop008:isuma-annex-test$ git remote -v
origin gitannex@example.com:/srv/gitannex-test (fetch)
anarcat@desktop008:isuma-annex-test$ git annex sync
muxserver_listen: link mux listener .git/annex/ssh/gitannex@example.com.bFOddoa2pVKZGHQ2 => .git/annex/ssh/gitannex@example.com: Operation not permitted
Remote origin does not have git-annex installed; setting annex-ignore
This could be a problem with the git-annex installation on the remote. Please make sure that git-annex-shell is available in PATH when you ssh into the remote. Once you have fixed the git-annex installation, run: git config remote.origin.annex-ignore false
commit ok
pull origin
git-annex: unknown command gitannex@cs.isuma.tv
Usage: git-annex command [option ...]
Commonly used commands:
add [PATH ...] add files to annex
[...]
Testing commands:
fuzztest generates fuzz test files
test run built-in test suite
testremote REMOTE test transfers to/from a remote
fatal: Could not read from remote repository.
Please make sure you have the correct access rights
and the repository exists.
failed
push origin
git-annex: unknown command gitannex@cs.isuma.tv
Usage: git-annex command [option ...]
Commonly used commands:
add [PATH ...] add files to annex
[...]
Testing commands:
fuzztest generates fuzz test files
test run built-in test suite
testremote REMOTE test transfers to/from a remote
fatal: Could not read from remote repository.
Please make sure you have the correct access rights
and the repository exists.
Pushing to origin failed.
(non-fast-forward problems can be solved by setting receive.denyNonFastforwards to false in the remote's git config)
failed
git-annex: sync: 2 failed
</pre>
Note that moving the repository out of NFS doesn't fix the problem, i still get `git-annex: unknown command gitannex@cs.isuma.tv`.
How am i supposed to use `git-annex-shell`?
### What version of git-annex are you using? On what operating system?
client side: 5.20141125
server side: 5.20150406-g2a9fbec
### Please provide any additional information below.
[[!format sh """
# If you can, paste a complete transcript of the problem occurring here.
# If the problem is with the git-annex assistant, paste in .git/annex/daemon.log
# End of transcript or log.
"""]]
Update: well that's confusing. it turns out the `unknown command` bit still happens when i use `/bin/sh` as a shell for the gitannex user. i really don't understand what's going on here... :/
After running with `--debug`, i noticed this was happening on the client with any git-annex remote, including one running the same version (`5.20141125`), and it happens after `git-annex` calls `git push` or `git fetch`:
<pre>
anarcat@desktop008:isuma-annex-test$ git annex --debug sync test3
[2015-04-06 16:52:36 EDT] read: git ["--git-dir=/srv/scratch/anarcat/isuma-annex-test/.git","--work-tree=/srv/scratch/anarcat/isuma-annex-test","show-ref","git-annex"]
[2015-04-06 16:52:36 EDT] read: git ["--git-dir=/srv/scratch/anarcat/isuma-annex-test/.git","--work-tree=/srv/scratch/anarcat/isuma-annex-test","show-ref","--hash","refs/heads/git-annex"]
[2015-04-06 16:52:36 EDT] read: git ["--git-dir=/srv/scratch/anarcat/isuma-annex-test/.git","--work-tree=/srv/scratch/anarcat/isuma-annex-test","log","refs/heads/git-annex..41069ddc0e22abc7ef0dca2aa31b20af9cee6116","-n1","--pretty=%H"]
[2015-04-06 16:52:36 EDT] read: git ["--git-dir=/srv/scratch/anarcat/isuma-annex-test/.git","--work-tree=/srv/scratch/anarcat/isuma-annex-test","log","refs/heads/git-annex..0ad850f59bdbe2448fa75e415ebfa5cf19cbebcd","-n1","--pretty=%H"]
[2015-04-06 16:52:36 EDT] read: git ["--git-dir=/srv/scratch/anarcat/isuma-annex-test/.git","--work-tree=/srv/scratch/anarcat/isuma-annex-test","log","refs/heads/git-annex..3476b0db2960fa9c9b00350e692e23dd30cd18c7","-n1","--pretty=%H"]
[2015-04-06 16:52:36 EDT] read: git ["--git-dir=/srv/scratch/anarcat/isuma-annex-test/.git","--work-tree=/srv/scratch/anarcat/isuma-annex-test","log","refs/heads/git-annex..d5cd95f472e00c51a2d35dedabf85f47cf3ce7fa","-n1","--pretty=%H"]
[2015-04-06 16:52:36 EDT] read: git ["--git-dir=/srv/scratch/anarcat/isuma-annex-test/.git","--work-tree=/srv/scratch/anarcat/isuma-annex-test","log","refs/heads/git-annex..778ba43445db7deb1bc6543e07145c13d3c3e5e2","-n1","--pretty=%H"]
[2015-04-06 16:52:36 EDT] chat: git ["--git-dir=/srv/scratch/anarcat/isuma-annex-test/.git","--work-tree=/srv/scratch/anarcat/isuma-annex-test","cat-file","--batch"]
[2015-04-06 16:52:36 EDT] read: git ["config","--null","--list"]
commit [2015-04-06 16:52:36 EDT] read: git ["--git-dir=/srv/scratch/anarcat/isuma-annex-test/.git","--work-tree=/srv/scratch/anarcat/isuma-annex-test","commit","-a","-m","git-annex automatic sync"]
ok
[2015-04-06 16:52:36 EDT] read: git ["--git-dir=/srv/scratch/anarcat/isuma-annex-test/.git","--work-tree=/srv/scratch/anarcat/isuma-annex-test","symbolic-ref","HEAD"]
[2015-04-06 16:52:36 EDT] read: git ["--git-dir=/srv/scratch/anarcat/isuma-annex-test/.git","--work-tree=/srv/scratch/anarcat/isuma-annex-test","show-ref","refs/heads/master"]
[2015-04-06 16:52:36 EDT] call: git ["--git-dir=/srv/scratch/anarcat/isuma-annex-test/.git","--work-tree=/srv/scratch/anarcat/isuma-annex-test","show-ref","--verify","-q","refs/heads/synced/master"]
[2015-04-06 16:52:36 EDT] read: git ["--git-dir=/srv/scratch/anarcat/isuma-annex-test/.git","--work-tree=/srv/scratch/anarcat/isuma-annex-test","log","refs/heads/master..refs/heads/synced/master","-n1","--pretty=%H"]
pull test3
[2015-04-06 16:52:36 EDT] read: ssh ["-O","stop","-S","anarc.at","-o","ControlMaster=auto","-o","ControlPersist=yes","localhost"]
[2015-04-06 16:52:36 EDT] call: git ["--git-dir=/srv/scratch/anarcat/isuma-annex-test/.git","--work-tree=/srv/scratch/anarcat/isuma-annex-test","fetch","test3"]
git-annex: unknown command anarc.at
</pre>
Turning off `sshcaching` seems to work around the issue. Note that this happens even if the git repo is moved to a non-NFS filesystem, so I have the feeling it's not directly related to [this bugfix](http://source.git-annex.branchable.com/?p=source.git;a=commit;h=bd110516c09d318b298804efc4ee888270f3d601).

View file

@ -0,0 +1,26 @@
[[!comment format=mdwn
username="joey"
subject="""comment 1"""
date="2015-04-06T21:15:46Z"
content="""
This has nothing to do with git-annex-shell or anything server-side. We can
see this in the error messages; the error comes from client-side git-annex
and not from git-annex-shell at all.
For annex.sshcaching to work, git-annex has to set GIT_SSH=git-annex, and
then git calls it with the name of the hostname to ssh to.
So, your client-side git-annex is new enough to do sshcaching on sync, but
then when git runs $GIT_SSH, the git-annex program it then runs seems
to be an older version of git-annex. Which does not appreciate being called
as if it is ssh.
Fix this version confusion and your problem will be solved. Maybe
have an older version of git-annex somewhere in PATH where git finds it.
Or, maybe you have a ~/.config/git-annex/program file that points to some
older installation of git-annex.
(It might also help to upgrade to a current version, ideally before filing
a bug report; it's quite possible some change has been made that will
make whatever your setup is work.)
"""]]

View file

@ -0,0 +1,13 @@
[[!comment format=mdwn
username="https://id.koumbit.net/anarcat"
subject="comment 2"
date="2015-04-06T22:16:57Z"
content="""
thanks for the quick feedback. it can certainly be the case that i have two installs of git-annex out there... i'll take a look when i'm back in the office.
it wasn't obvious to me that the error was from the client-side of things, how could I tell?
wouldn't it be better if GIT_SSH would be set to the full path of the binary?
thanks!
"""]]

View file

@ -0,0 +1,28 @@
Hi,
I've a script which generates .rss files which reference local files with the file:// scheme. I can import the file:// urls with git annex addurl, but it fails with git annex importfeed:
`$ git annex importfeed --fast file:///path/to/local/rss/file.rss`
`(checking known urls...)`
`importfeed file:///path/to/local/rss/file.rss`
`git-annex: /tmp/feed6757: openFile: resource busy (file is locked)`
If I try to import it with `$ git annex importfeed --fast /path/to/local/rss/file.rss` I get
`importfeed /path/to/local/rss/file.rss`
` warning: bad feed content`
`ok`
But the directory stays empty.
Is it possible to use local files in rss format with items which reference local files using the file:// scheme as input for importfeed?
Cheers,
Marco

View file

@ -0,0 +1,18 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawlZF5AC-FSxwkiay5ZgEYZwUzN69Wa6PTE"
nickname="Sunke"
subject="filename from metadata?"
date="2015-04-06T18:00:25Z"
content="""
Hi everbody,
is it possible to use a metadata field for the filename in a
metadata driven view?
I am thinking of the following use case:
git annex metadata --set artist=Led\ Zeppelin --set album=Led\ Zeppelin\ IV --set title=04\ Stairway\ to\ heaven some/weird/filename.mp3
git annex view --filename-from title artist=* album=*
result:
Led Zeppelin/Led Zeppelin IV/04 Stairway to heaven.mp3
"""]]

View file

@ -23,3 +23,5 @@ there's still commit and tree update overhead.
Probably doesn't make sense to run distributed fscks too often for that and
other reasons. If the git-annex branch does get too large, there's always
`git annex forget` ...
**(Update: This was later rethought and works much more efficiently now..)**

View file

@ -0,0 +1,32 @@
I've started work on [[todo/parallel_get]].
Today, laid the groundwork in two areas:
1. Evalulated the ascii-progress haskell library. It can display
multiple progress bars in the terminal, portably, and its author
Pedro Tacla Yamada has kindly offered to improve it to meet
git-annex's needs.
I ended up filing [10 issues](https://github.com/yamadapc/haskell-ascii-progress/issues)
on it today, around 3 of the are blockers for git-annex using it.
2. Worked on making --quiet more quiet. Commands like rsync and wget
need to have thier progress output disabled when run in parallel.
Didn't quite finish this yet.
---
Yesterday I made some improvements to how git-annex behaves when it's
passed a massive number of directories or files on the command line.
Eg, when driven by xargs. There turned out to be some bugs in that
scenario.
One problem one I kind of had to paper over. While git-annex get
normally is careful to get the files in the same order they were listed on
the command line, it becomes very expensive to expand directories using
git-ls-files, and reorder its output to preserve order, when a large number
offiles are passed on the command line. There was a O(N*M) time blowup.
I worked around it by making it only preserve the order of the first 100
files. Assumption being that if you're specifying so many files on the
command line, you probably have less of an attachment to their ordering. :)

View file

@ -0,0 +1,13 @@
Rethought distributed fsck. It's not really a fsck, but an expiration of
inactive repositories, where fscking is one kind of activity. That insight
let me reimplement it much more efficiently. Rather than updating all
the location logs to prove it was active, `git annex fsck` can simply and
inexpensively update an activity log. It's so cheap it'll do it by default.
The `git annex expire` command then reads the activity log and expires
(or unexpires) repositories that have not been active in the desired time
period. Expiring a repository simply marks it as dead.
Yesterday, finished making --quiet really be quiet. That sounds easy,
but it took several hours. On the `concurrentprogress` branch, I have
ascii-progress hooked up and working, but it's not quite ready for prime
time.

View file

@ -0,0 +1,27 @@
[[!comment format=mdwn
username="https://openid.stackexchange.com/user/e65e6d0e-58ba-41de-84cc-1f2ba54cf574"
nickname="Mica"
subject="Yes and no..."
date="2015-04-04T04:13:48Z"
content="""
> I am interested in using git-annex to manage git repositories, and I am wondering if it is possible and if anyone has experience with it?
That is not what git-annex is for. git-annex adds large file support to git.
> I have done some searching, and I know that many people have asked for support for a Dropbox-like workflow, where Git repositories are mirrored everywhere.
The git-annex assistant will automatically sync your files for you. It is similar to dropbox but not entirely the same.
> I also know that no such support seems forthcoming, however this is not my goal.
I don't know what would give you that idea, the git-annex assistant has already been released.
> Rather, I would like to use git-annex to track the location of many repositories. I keep a lot of repositories and would like to offload them onto other storage devices and keep track of where each repository is stored.
git-annex is not the right tool for that. Maybe you want something like myrepos, which is always written by Joey.
> Perhaps entire Git repositories can be added as a single unit for tracking in git-annex?
Seems unlikely as it is out of scope for git-annex.
"""]]

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="joey"
subject="""comment 2"""
date="2015-04-06T17:01:32Z"
content="""
This is not what git-annex was designed to do, so I don't think it's the
right tool for the job.
I suggest <http://myrepos.branchable.com/>
"""]]

View file

@ -0,0 +1,7 @@
[[!comment format=mdwn
username="joey"
subject="""comment 2"""
date="2015-04-06T16:59:07Z"
content="""
"""]]

View file

@ -0,0 +1,12 @@
[[!comment format=mdwn
username="joey"
subject="""comment 3"""
date="2015-04-06T16:59:10Z"
content="""
The map shows one line for each git remote. So if you have two git remotes
configured that are both pointing at the laptop, that'll be the result.
For example, you might have both "origin" and "laptop" remotes pointing to
it, possibly via two different urls.
There's nothing bad about this.
"""]]

66
doc/git-annex-expire.mdwn Normal file
View file

@ -0,0 +1,66 @@
# NAME
git-annex expire - expire inactive repositories
# SYNOPSIS
git annex expire `[repository:]time ...`
# DESCRIPTION
This command expires repositories that have not performed some activity
within a specified time period. A repository is expired by marking it as
dead. De-expiration is also done; if a dead repository performed some
activity recently, it is marked as semitrusted again.
This can be useful when it's not possible to keep track of the state
of repositories manually. For example, a distributed network of
repositories where nobody can directly access all the repositories to
check their status.
The repository can be specified using the name of a remote,
or the description or uuid of the repository.
The time is in the form "60d" or "1y". A time of "never" will disable
expiration.
If a time is specified without a repository, it is used as the default
value for all repositories. Note that the current repository is never
expired.
# OPTIONS
* `--no-act`
Print out what would be done, but not not actually expite or unexpire
any repositories.
* `--activity=Name`
Specify the activity that a repository must have performed to avoid being
expired. The default is any activity.
Currently, the only activity that can be performed to avoid expiration
is `git annex fsck`. Note that fscking a remote updates the
expiration of the remote repository, not the local repository.
The first version of git-annex that recorded fsck activity was
5.20150405.
# SEE ALSO
[[git-annex]](1)
[[git-annex-fsck]](1)
[[git-annex-schedule]](1)
[[git-annex-dead]](1)
[[git-annex-semitrust]](1)
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -13,7 +13,7 @@ in the git repository to link to a specified key.
If the key and file are not specified on the command line, they are
instead read from stdin. Any number of lines can be provided in this
mode, each containing a key and filename, sepearated by whitespace.
mode, each containing a key and filename, sepearated by a single space.
# OPTIONS

View file

@ -53,44 +53,6 @@ With parameters, only the specified files are checked.
git annex fsck --incremental-schedule 30d --time-limit 5h
* `--distributed`
Normally, fsck only fixes the git-annex location logs when an inconsistecy
is detected. In distributed mode, each file that is checked will result
in a location log update noting the time that it was present.
This is useful in situations where repositories cannot be trusted to
continue to exist. By running a periodic distributed fsck, those
repositories can verify that they still exist and that the information
about their contents is still accurate.
This is not the default mode, because each distributed fsck increases
the size of the git-annex branch. While it takes care to log identical
location tracking lines for all keys, which will delta-compress well,
there is still overhead in committing the changes. If this causes
the git-annex branch to grow too big, it can be pruned using
[[git-annex-forget]](1)
* `--expire="[repository:]time`..."
This option makes the fsck check for location logs of the specified
repository that have not been updated by a distributed fsck within the
specified time period. Such stale location logs are then thrown out, so
git-annex will no longer think that a repository contains data, if it is
not participating in distributed fscking.
The repository can be specified using the name of a remote,
or the description or uuid of the repository. If a time is specified
without a repository, it is used as the default value for all
repositories. Note that location logs for the current repository are
never expired, since they can be verified directly.
The time is in the form "60d" or "1y". A time of "never" will disable
expiration.
Note that a remote can always run `fsck` later on to re-update the
location log if it was expired in error.
* `--numcopies=N`
Override the normally configured number of copies.

View file

@ -15,7 +15,7 @@ No verification is performed of the url's contents.
If the key and url are not specified on the command line, they are
instead read from stdin. Any number of lines can be provided in this
mode, each containing a key and url, separated by whitespace.
mode, each containing a key and url, separated by a single space.
# SEE ALSO

View file

@ -302,6 +302,11 @@ subdirectories).
See [[git-annex-fsck]](1) for details.
* `expire [repository:]time ...`
Expires repositories that have not recently performed an activity
(such as a fsck).
* `unused`
Checks the annex for data that does not correspond to any files present

View file

@ -247,6 +247,14 @@ Example:
42bf2035-0636-461d-a367-49e9dfd361dd fsck self 30m every day at any time; fsck 4b3ebc86-0faf-4892-83c5-ce00cbe30f0a 1h every year at any time timestamp=1385646997.053162s
## `activity.log`
Used to record the times of activities, such as fscks.
Example:
42bf2035-0636-461d-a367-49e9dfd361dd Fsck timestamp=1422387398.30395s
## `transitions.log`
Used to record transitions, eg by `git annex forget`

View file

@ -1,25 +0,0 @@
git-annex 5.20150113 released with [[!toggle text="these changes"]]
[[!toggleable text="""
* unlock: Don't allow unlocking files that have never been committed to git
before, to avoid an intractable problem that prevents the pre-commit
hook from telling if such a file is intended to be an annexed file or not.
* Avoid re-checksumming when migrating from hash to hashE backend.
Closes: #[774494](http://bugs.debian.org/774494)
* Fix build with process 1.2.1.0.
* Android: Provide a version built with -fPIE -pie to support Android 5.0.
* sync: Fix an edge case where syncing in a bare repository would try to
merge and so fail.
* Check git version at runtime, rather than assuming it will be the same
as the git version used at build time when running git-checkattr and
git-branch remove.
* Switch to using relative paths to the git repository.
- This allows the git repository to be moved while git-annex is running in
it, with fewer problems.
- On Windows, this avoids some of the problems with the absurdly small
MAX\_PATH of 260 bytes. In particular, git-annex repositories should
work in deeper/longer directory structures than before.
* Generate shorter keys for WORM and URL, avoiding keys that are longer
than used for SHA256, so as to not break on systems like Windows that
have very small maximum path length limits.
* Bugfix: A file named HEAD in the work tree could confuse some git commands
run by git-annex."""]]

View file

@ -1,34 +0,0 @@
git-annex 5.20150205 released with [[!toggle text="these changes"]]
[[!toggleable text="""
* info: Can now display info about a given uuid.
* Added to remote/uuid info: Count of the number of keys present
on the remote, and their size. This is rather expensive to calculate,
so comes last and --fast will disable it.
* info remote: Include the date of the last sync with the remote.
* sync: Added --message/-m option like git commit.
* remotedaemon: Fix problem that could prevent ssh connections being
made after two LOSTNET messages were received in a row (perhaps due to
two different network interfaces being brought down).
* Fix build failure when wget is not installed.
* Fix wording of message displayed when unable to get a file that
is available in untrusted repositories.
* addurl: When a Content-Disposition header suggests a filename to use,
addurl will consider using it, if it's reasonable and doesn't conflict
with an existing file. (--file overrides this)
* Fix default repository description created by git annex init,
which got broken by the relative path changes in the last release.
* init: Repository tuning parameters can now be passed when initializing a
repository for the first time. For details, see
http://git-annex.branchable.com/tuning/
* merge: Refuse to merge changes from a git-annex branch of a repo
that has been tuned in incompatible ways.
* Support annex.tune.objecthash1, annex.tune.objecthashlower, and
annex.tune.branchhash1.
* Remove support for building without cryptohash.
* Added MD5 and MD5E backends.
* assistant: Fix local pairing when ssh pubkey comment contains spaces.
* Avoid using fileSize which maxes out at just 2 gb on Windows.
Instead, use hFileSize, which doesn't have a bounded size.
Fixes support for files &gt; 2 gb on Windows.
* Windows: Fix running of the pre-commit-annex hook.
* Windows: Fix S3 special remote; need to call withSocketsDo. Thanks, Trent."""]]

View file

@ -0,0 +1,6 @@
git-annex 5.20150406.1 released with [[!toggle text="these changes"]]
[[!toggleable text="""
* Fixes a bug in the last release that caused rsync and possibly
other commands to hang at the end of a file transfer.
(--quiet is back to not blocking progress displays until
that code can be fixed properly.)"""]]

View file

@ -0,0 +1,33 @@
git-annex 5.20150406 released with [[!toggle text="these changes"]]
[[!toggleable text="""
* Prevent git-ls-files from double-expanding wildcards when an
unexpanded wildcard is passed to a git-annex command like add or find.
* Fix make build target. Thanks, Justin Geibel.
* Fix GETURLS in external special remote protocol to strip
downloader prefix from logged url info before checking for the
specified prefix.
* importfeed: Avoid downloading a redundant item from a feed whose
guid has been downloaded before, even when the url has changed.
* importfeed: Always store itemid in metadata; before this was only
done when annex.genmetadata was set.
* Relax debian package dependencies to git &gt;= 1:1.8.1 rather
than needing &gt;= 1:2.0.
* test: Fix --list-tests
* addurl --file: When used with a special remote that claims
urls and checks their contents, don't override the user's provided
filename with filenames that the special remote suggests. Also,
don't allow adding the url if the special remote says it contains
multiple files.
* import: --deduplicate and --cleanduplicates now output the keys
corresponding to duplicated files they process.
* expire: New command, for expiring inactive repositories.
* fsck: Record fsck activity for use by expire command.
* Fix truncation of parameters that could occur when using xargs git-annex.
* Significantly sped up processing of large numbers of directories
passed to a single git-annex command.
* version: Add --raw
* init: Improve fifo test to detect NFS systems that support fifos
but not well enough for sshcaching.
* --quiet now suppresses progress displays from eg, rsync.
(The option already suppressed git-annex's own built-in progress
displays.)"""]]

View file

@ -0,0 +1,38 @@
Global fsck updates all location log entries for a repo. This wastes disk
space.
I realized now that it can be implemented w/o such waste. Probably cheaply
enough to be the default!
What we need is a new log file, call it fscktimes.log.
This records the time of the last fsck of each repo.
`git annex fsck --expire` no longer needs to look at the location log at
all. It can just check the repo's fscktimes.log entry. If the entry is
recent enough, we know that the repo has fscked recently, and its location
log is good, and nothing needs to be done. Otherwise, we know that the repo
has stopped fscking, and we simply expire *all* its location logs.
Note that fscktime.log is only used by fsck; it does not impact git-annex
generally or make it slower. And, it's very low overhead to update the one
file. Repos could do a fsck --fast on a daily basis and not grow the
git-annex branch much. Maybe on an hourly basis even.
(BTW, there is some overlap with the fsck.log file that is currently used to
hold the timestamp of the last local fsck. May be able to eliminate that
file too.)
----
It might be worth making the fsck.log record --fast and full fscks
separately so we know the last of each for each repo. This would let
--expire require periodic full fscks and more frequent fast fscks.
----
Hmm, --expire updates all the location logs when it thinks a repo has gone
missing. Why not just mark it dead? Again, this would save a lot of space!
It would complicate recovery if a repo had been offline and came back; it
would need to mark itself as not dead any longer.
> [[done]] --[[Joey]]

View file

@ -0,0 +1,14 @@
### Feature request
It is not possible to put encrypted content in place on remotes with just a
public GPG key. You always need the private key, even for encryption. I
guess this is because how the cipher HMAC is used for replacing file names
with their hashes. However, if that requirement (having secret file names)
was dropped, I assume a pubkey-only mode could be implemented?
My specific use case is backup archiving. I have my backups packed in
archive files and want to use git-annex to copy the archives to offsite
remotes (S3). In that case, I don't care much about hiding file names, but
would appreciate the increased security of not having the secret key on the
backup server. It would only be needed if I wanted to verify or restore
backups.

View file

@ -0,0 +1,24 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawl6rte43qSRK1o2zn7Ww4Z8pgBmJm8gDrc"
nickname="Rickard"
subject="comment 2"
date="2015-04-04T07:34:58Z"
content="""
> The contents of files are also encrypted using your gpg private key
I assume you meant to say gpg *public* key here?
You're correct in that I can publish the symmetric HMAC key unencrypted with no bad effects for me. I've searched the documents but haven't found a way to tell git-annex to use a specific, unencrypted, symmetric key for HMAC, though. Is there a way?
> So, I see no benefit to the suggested mode.
I don't understand the reasoning that made you come to this conclusion.
Let me restate my use case:
With only the public part of a gpg key id available to a user, I would like that user to be able to add files to a git-annex repository. The user should then be able to copy the files encrypted to remotes that support encryption (S3 etc). The user should not be able to fetch or verify files from the encrypted remotes (since she lacks the private gpg key). The remote would be write-only for the user, basically. However, a friend of the user, posessing the private key (and having access to the remote), should be able to use the remote just like a normal git-annex remote.
This is the normal way of using gpg for asymmetric encryption of files. I would find it useful to be able to use git-annex in a similar way. As far as I can understand, only the encrypted HMAC key is stopping me from using git-annex in this way.
However, there might be other things in git-annex' design that would make it difficult or even impossible to implement this functionality. It could also be the case that there's no benefit to adding this functionality to git-annex because there is some other (simpler) way to achieve the same thing. Both these cases are perfectly acceptable, but I would then be interested in knowing a bit more details.
"""]]

View file

@ -0,0 +1,24 @@
[[!comment format=mdwn
username="joey"
subject="""comment 3"""
date="2015-04-06T17:04:34Z"
content="""
I somehow completely misread what you wanted! Thanks, it makes sense now.
I anticipate there would be one problem with this mode; `git annex fsck --from remote`
would fail because it would be unable to decrypt the encrypted content
when run on the client that is only able to encrypt to the public key, but
lacks the necessary private key to decrypt.
(So would `git annex move --to remote; git annex get --from remote`, but
presumably that failure is the point of the mode..)
It would be fairly easy to add this, I think. There is already support
for configuring the MAC algorithm to use to encrypt filenames. Your mode seems
to just need a "clear" mode that doesn't encrypt filenames at all.
It does add complication to crypto paths, and potential for user
foot-shooting though.
I'm going to move this feature request from bugs/ to todo/
"""]]

View file

@ -1,3 +1,188 @@
coming from [[bugs/weird_entry_in_process_list]] - are there plans to make an init.d / systemd .service file for git-annex?
my use case is that i have dedicated machines that will sync a common directory. they will run only one assistant - would patches to make a `git-annex` user, and the associated startup scripts, in the debian package be welcome? --[[anarcat]]
Here's a sample startup script:
<pre>
#!/bin/sh
### BEGIN INIT INFO
# Provides: gitannex
# Required-Start: $local_fs $network $remote_fs $syslog
# Required-Stop: $local_fs $network $remote_fs $syslog
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: start the git-annex assistant
# Description: start the git-annex assistant in the given directory
### END INIT INFO
# Author: Antoine Beaupré <anarcat@koumbit.org>
# Do NOT "set -e"
# PATH should only include /usr/* if it runs after the mountnfs.sh script
PATH=/sbin:/usr/sbin:/bin:/usr/bin
DESC="gitannex"
NAME=gitannex
USER=$NAME
DAEMON=git-annex
DAEMON_ARGS="assistant"
PIDFILE=/var/run/$NAME.pid
SCRIPTNAME=/etc/init.d/$NAME
ANNEX=auto
# Read configuration variable file if it is present
[ -r /etc/default/$NAME ] && . /etc/default/$NAME
# Exit if the package is not installed
[ -x "$DAEMON" ] || exit 0
# Load the VERBOSE setting and other rcS variables
. /lib/init/vars.sh
# Define LSB log_* functions.
# Depend on lsb-base (>= 3.2-14) to ensure that this file is present
# and status_of_proc is working.
. /lib/lsb/init-functions
if [ "$ANNEX" = "auto" ]; then
DAEMON_ARGS="$DAEMON_ARGS --autostart"
else
cd $ANNEX
PIDFILE="$ANNEX/.git/annex/daemon.pid"
EXTRA_OPTS="--chdir $ANNEX"
fi
#
# Function that starts the daemon/service
#
do_start()
{
# Return
# 0 if daemon has been started
# 1 if daemon was already running
# 2 if daemon could not be started
start-stop-daemon --start --quiet --user $USER --pidfile $PIDFILE --exec $DAEMON --test > /dev/null \
|| return 1
start-stop-daemon --start --quiet --user $USER --chuid $USER $EXTRA_OPTS --pidfile $PIDFILE --exec $DAEMON -- \
$DAEMON_ARGS \
|| return 2
# The above code will not work for interpreted scripts, use the next
# six lines below instead (Ref: #643337, start-stop-daemon(8) )
#start-stop-daemon --start --quiet --pidfile $PIDFILE --startas $DAEMON \
# --name $NAME --test > /dev/null \
# || return 1
#start-stop-daemon --start --quiet --pidfile $PIDFILE --startas $DAEMON \
# --name $NAME -- $DAEMON_ARGS \
# || return 2
# Add code here, if necessary, that waits for the process to be ready
# to handle requests from services started subsequently which depend
# on this one. As a last resort, sleep for some time.
}
#
# Function that stops the daemon/service
#
do_stop()
{
# Return
# 0 if daemon has been stopped
# 1 if daemon was already stopped
# 2 if daemon could not be stopped
# other if a failure occurred
su $USER -c "$DAEMON $DAEMON_ARGS --stop" && return 0
# Wait for children to finish too if this is a daemon that forks
# and if the daemon is only ever run from this initscript.
# If the above conditions are not satisfied then add some other code
# that waits for the process to drop all resources that could be
# needed by services started subsequently. A last resort is to
# sleep for some time.
start-stop-daemon --stop --quiet --oknodo --retry=0/30/KILL/5 --user $USER --exec $DAEMON
[ "$?" = 2 ] && return 2
# Many daemons don't delete their pidfiles when they exit.
rm -f $PIDFILE
return "$RETVAL"
}
#
# Function that sends a SIGHUP to the daemon/service
#
do_reload() {
#
# If the daemon can reload its configuration without
# restarting (for example, when it is sent a SIGHUP),
# then implement that here.
#
start-stop-daemon --stop --signal 1 --quiet --pidfile $PIDFILE --name $NAME
return 0
}
case "$1" in
start)
[ "$VERBOSE" != no ] && log_daemon_msg "Starting $DESC" "$NAME"
do_start
case "$?" in
0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;;
2) [ "$VERBOSE" != no ] && log_end_msg 1 ;;
esac
;;
stop)
[ "$VERBOSE" != no ] && log_daemon_msg "Stopping $DESC" "$NAME"
do_stop
case "$?" in
0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;;
2) [ "$VERBOSE" != no ] && log_end_msg 1 ;;
esac
;;
status)
status_of_proc -p "$PIDFILE" "$DAEMON" "$NAME" && exit 0 || exit $?
;;
#reload|force-reload)
#
# If do_reload() is not implemented then leave this commented out
# and leave 'force-reload' as an alias for 'restart'.
#
#log_daemon_msg "Reloading $DESC" "$NAME"
#do_reload
#log_end_msg $?
#;;
restart|force-reload)
#
# If the "reload" option is implemented then remove the
# 'force-reload' alias
#
log_daemon_msg "Restarting $DESC" "$NAME"
do_stop
case "$?" in
0|1)
do_start
case "$?" in
0) log_end_msg 0 ;;
1) log_end_msg 1 ;; # Old process is still running
*) log_end_msg 1 ;; # Failed to start
esac
;;
*)
# Failed to stop
log_end_msg 1
;;
esac
;;
*)
#echo "Usage: $SCRIPTNAME {start|stop|restart|reload|force-reload}" >&2
echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2
exit 3
;;
esac
:
</pre>
Now this is not without problems:
1. it assumes a gitannex user is created outside of the script
2. it assumes a gitannex repository is created outside of the script and specified in the `/etc/default/gitannex` file (or added to the autostart file)
3. it is Debian-specific (a proper init script would be POSIX only and/or a `.service` file)
Maybe using [metainit](https://wiki.debian.org/MetaInit) would be a good idea here? --[[anarcat]]

View file

@ -1,5 +1,5 @@
Name: git-annex
Version: 5.20150327
Version: 5.20150406.1
Cabal-Version: >= 1.8
License: GPL-3
Maintainer: Joey Hess <id@joeyh.name>