Merge branch 'master' of git://git-annex.branchable.com

This commit is contained in:
Richard Hartmann 2014-02-25 12:38:25 +01:00
commit 77648cead5
82 changed files with 1185 additions and 383 deletions

View file

@ -87,8 +87,7 @@ catKey' modeguaranteed ref mode
| modeguaranteed = catObject ref
| otherwise = L.take 8192 <$> catObject ref
{- Looks up the file mode corresponding to the Ref using the running
- cat-file.
{- Looks up the key corresponding to the Ref using the running cat-file.
-
- Currently this always has to look in HEAD, because cat-file --batch
- does not offer a way to specify that we want to look up a tree object

View file

@ -514,10 +514,8 @@ saveState nocommit = doSideAction $ do
downloadUrl :: [Url.URLString] -> FilePath -> Annex Bool
downloadUrl urls file = go =<< annexWebDownloadCommand <$> Annex.getGitConfig
where
go Nothing = do
opts <- map Param . annexWebOptions <$> Annex.getGitConfig
headers <- getHttpHeaders
anyM (\u -> Url.withUserAgent $ Url.download u headers opts file) urls
go Nothing = Url.withUrlOptions $ \uo ->
anyM (\u -> Url.download u file uo) urls
go (Just basecmd) = liftIO $ anyM (downloadcmd basecmd) urls
downloadcmd basecmd url =
boolSystem "sh" [Param "-c", Param $ gencmd url basecmd]

61
Annex/MetaData.hs Normal file
View file

@ -0,0 +1,61 @@
{- git-annex metadata
-
- Copyright 2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.MetaData where
import Common.Annex
import qualified Annex
import Types.MetaData
import Logs.MetaData
import Annex.CatFile
import qualified Data.Set as S
import qualified Data.Map as M
import Data.Time.Calendar
import Data.Time.Clock
import Data.Time.Clock.POSIX
tagMetaField :: MetaField
tagMetaField = MetaField "tag"
yearMetaField :: MetaField
yearMetaField = MetaField "year"
monthMetaField :: MetaField
monthMetaField = MetaField "month"
{- Adds metadata for a file that has just been ingested into the
- annex, but has not yet been committed to git.
-
- When the file has been modified, the metadata is copied over
- from the old key to the new key. Note that it looks at the old key as
- committed to HEAD -- the new key may or may not have already been staged
- in th annex.
-
- Also, can generate new metadata, if configured to do so.
-}
genMetaData :: Key -> FilePath -> FileStatus -> Annex ()
genMetaData key file status = do
maybe noop (flip copyMetaData key) =<< catKeyFileHEAD file
whenM (annexGenMetaData <$> Annex.getGitConfig) $ do
metadata <- getCurrentMetaData key
let metadata' = genMetaData' status metadata
unless (metadata' == emptyMetaData) $
addMetaData key metadata'
{- Generates metadata from the FileStatus.
- Does not overwrite any existing metadata values. -}
genMetaData' :: FileStatus -> MetaData -> MetaData
genMetaData' status old = MetaData $ M.fromList $ filter isnew
[ (yearMetaField, S.singleton $ toMetaValue $ show y)
, (monthMetaField, S.singleton $ toMetaValue $ show m)
]
where
isnew (f, _) = S.null (currentMetaDataValues f old)
(y, m, _d) = toGregorian $ utctDay $
posixSecondsToUTCTime $ realToFrac $
modificationTime status

View file

@ -1,13 +1,15 @@
{- Url downloading, with git-annex user agent.
{- Url downloading, with git-annex user agent and configured http
- headers and wget/curl options.
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
- Copyright 2013-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.Url (
module U,
withUserAgent,
withUrlOptions,
getUrlOptions,
getUserAgent,
) where
@ -23,5 +25,18 @@ getUserAgent :: Annex (Maybe U.UserAgent)
getUserAgent = Annex.getState $
Just . fromMaybe defaultUserAgent . Annex.useragent
withUserAgent :: (Maybe U.UserAgent -> IO a) -> Annex a
withUserAgent a = liftIO . a =<< getUserAgent
getUrlOptions :: Annex U.UrlOptions
getUrlOptions = U.UrlOptions
<$> getUserAgent
<*> headers
<*> options
where
headers = do
v <- annexHttpHeadersCommand <$> Annex.getGitConfig
case v of
Just cmd -> lines <$> liftIO (readProcess "sh" ["-c", cmd])
Nothing -> annexHttpHeaders <$> Annex.getGitConfig
options = map Param . annexWebOptions <$> Annex.getGitConfig
withUrlOptions :: (U.UrlOptions -> IO a) -> Annex a
withUrlOptions a = liftIO . a =<< getUrlOptions

View file

@ -5,11 +5,10 @@
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.View where
import Common.Annex
import Annex.View.ViewedFile
import Types.View
import Types.MetaData
import qualified Git
@ -28,22 +27,16 @@ import Annex.Link
import Annex.CatFile
import Logs.MetaData
import Logs.View
import Utility.Glob
import Utility.FileMode
import Types.Command
import Config
import CmdLine.Action
import qualified Data.Set as S
import System.Path.WildMatch
import qualified Data.Map as M
import "mtl" Control.Monad.Writer
#ifdef WITH_TDFA
import Text.Regex.TDFA
import Text.Regex.TDFA.String
#else
import Text.Regex
#endif
{- Each visible ViewFilter in a view results in another level of
- subdirectory nesting. When a file matches multiple ways, it will appear
- in multiple subdirectories. This means there is a bit of an exponential
@ -127,42 +120,13 @@ combineViewFilter old@(FilterValues olds) (FilterValues news)
combineViewFilter (FilterValues _) newglob@(FilterGlob _) =
(newglob, Widening)
combineViewFilter (FilterGlob oldglob) new@(FilterValues s)
| all (matchGlob (compileGlob oldglob) . fromMetaValue) (S.toList s) = (new, Narrowing)
| all (matchGlob (compileGlob oldglob CaseInsensative) . fromMetaValue) (S.toList s) = (new, Narrowing)
| otherwise = (new, Widening)
combineViewFilter (FilterGlob old) newglob@(FilterGlob new)
| old == new = (newglob, Unchanged)
| matchGlob (compileGlob old) new = (newglob, Narrowing)
| matchGlob (compileGlob old CaseInsensative) new = (newglob, Narrowing)
| otherwise = (newglob, Widening)
{- Converts a filepath used in a reference branch to the
- filename that will be used in the view.
-
- No two filepaths from the same branch should yeild the same result,
- so all directory structure needs to be included in the output file
- in some way. However, the branch's directory structure is not relevant
- in the view.
-
- So, from dir/subdir/file.foo, generate file_{dir;subdir}.foo
-
- (To avoid collisions with a filename that already contains {foo},
- that is doubled to {{foo}}.)
-}
fileViewFromReference :: MkFileView
fileViewFromReference f = concat
[ double base
, if null dirs then "" else "_{" ++ double (intercalate ";" dirs) ++ "}"
, double $ concat extensions
]
where
(path, basefile) = splitFileName f
dirs = filter (/= ".") $ map dropTrailingPathSeparator (splitPath path)
(base, extensions) = splitShortExtensions basefile
double = replace "{" "{{" . replace "}" "}}"
fileViewReuse :: MkFileView
fileViewReuse = takeFileName
{- Generates views for a file from a branch, based on its metadata
- and the filename used in the branch.
-
@ -176,10 +140,10 @@ fileViewReuse = takeFileName
- evaluate this function with the view parameter and reuse
- the result. The globs in the view will then be compiled and memoized.
-}
fileViews :: View -> MkFileView -> FilePath -> MetaData -> [FileView]
fileViews view =
viewedFiles :: View -> MkViewedFile -> FilePath -> MetaData -> [ViewedFile]
viewedFiles view =
let matchers = map viewComponentMatcher (viewComponents view)
in \mkfileview file metadata ->
in \mkviewedfile file metadata ->
let matches = map (\m -> m metadata) matchers
in if any isNothing matches
then []
@ -187,8 +151,8 @@ fileViews view =
let paths = pathProduct $
map (map toViewPath) (visible matches)
in if null paths
then [mkfileview file]
else map (</> mkfileview file) paths
then [mkviewedfile file]
else map (</> mkviewedfile file) paths
where
visible = map (fromJust . snd) .
filter (viewVisible . fst) .
@ -205,31 +169,9 @@ viewComponentMatcher viewcomponent = \metadata ->
matcher = case viewFilter viewcomponent of
FilterValues s -> \values -> S.intersection s values
FilterGlob glob ->
let regex = compileGlob glob
let cglob = compileGlob glob CaseInsensative
in \values ->
S.filter (matchGlob regex . fromMetaValue) values
compileGlob :: String -> Regex
compileGlob glob =
#ifdef WITH_TDFA
case compile (defaultCompOpt {caseSensitive = False}) defaultExecOpt regex of
Right r -> r
Left _ -> error $ "failed to compile regex: " ++ regex
#else
mkRegexWithOpts regex False True
#endif
where
regex = '^':wildToRegex glob
matchGlob :: Regex -> String -> Bool
matchGlob regex val =
#ifdef WITH_TDFA
case execute regex val of
Right (Just _) -> True
_ -> False
#else
isJust $ matchRegex regex val
#endif
S.filter (matchGlob cglob . fromMetaValue) values
toViewPath :: MetaValue -> FilePath
toViewPath = concatMap escapeslash . fromMetaValue
@ -268,23 +210,28 @@ pathProduct (l:ls) = foldl combinel l ls
where
combinel xs ys = [combine x y | x <- xs, y <- ys]
{- Extracts the metadata from a fileview, based on the view that was used
- to construct it. -}
fromView :: View -> FileView -> MetaData
fromView view f = foldr (uncurry updateMetaData) newMetaData (zip fields values)
{- Extracts the metadata from a ViewedFile, based on the view that was used
- to construct it.
-
- Derived metadata is excluded.
-}
fromView :: View -> ViewedFile -> MetaData
fromView view f = MetaData $
M.fromList (zip fields values) `M.difference` derived
where
visible = filter viewVisible (viewComponents view)
fields = map viewField visible
paths = splitDirectories $ dropFileName f
values = map fromViewPath paths
paths = splitDirectories (dropFileName f)
values = map (S.singleton . fromViewPath) paths
MetaData derived = getViewedFileMetaData f
{- Constructing a view that will match arbitrary metadata, and applying
- it to a file yields a set of FileViews which all contain the same
- it to a file yields a set of ViewedFile which all contain the same
- MetaFields that were present in the input metadata
- (excluding fields that are not visible). -}
prop_view_roundtrips :: FilePath -> MetaData -> Bool -> Bool
prop_view_roundtrips f metadata visible = null f || viewTooLarge view ||
all hasfields (fileViews view fileViewFromReference f metadata)
all hasfields (viewedFiles view viewedFileFromReference f metadata)
where
view = View (Git.Ref "master") $
map (\(mf, mv) -> ViewComponent mf (FilterValues $ S.filter (not . null . fromMetaValue) mv) visible)
@ -292,11 +239,32 @@ prop_view_roundtrips f metadata visible = null f || viewTooLarge view ||
visiblefields = sort (map viewField $ filter viewVisible (viewComponents view))
hasfields fv = sort (map fst (fromMetaData (fromView view fv))) == visiblefields
{- A directory foo/bar/baz/ is turned into metadata fields
- /=foo, foo/=bar, foo/bar/=baz.
-
- Note that this may generate MetaFields that legalField rejects.
- This is necessary to have a 1:1 mapping between directory names and
- fields. So this MetaData cannot safely be serialized. -}
getDirMetaData :: FilePath -> MetaData
getDirMetaData d = MetaData $ M.fromList $ zip fields values
where
dirs = splitDirectories d
fields = map (MetaField . addTrailingPathSeparator . joinPath)
(inits dirs)
values = map (S.singleton . toMetaValue . fromMaybe "" . headMaybe)
(tails dirs)
getWorkTreeMetaData :: FilePath -> MetaData
getWorkTreeMetaData = getDirMetaData . dropFileName
getViewedFileMetaData :: FilePath -> MetaData
getViewedFileMetaData = getDirMetaData . dirFromViewedFile . takeFileName
{- Applies a view to the currently checked out branch, generating a new
- branch for the view.
-}
applyView :: View -> Annex Git.Branch
applyView view = applyView' fileViewFromReference view
applyView view = applyView' viewedFileFromReference getWorkTreeMetaData view
{- Generates a new branch for a View, which must be a more narrow
- version of the View originally used to generate the currently
@ -304,18 +272,18 @@ applyView view = applyView' fileViewFromReference view
- in view, not any others.
-}
narrowView :: View -> Annex Git.Branch
narrowView = applyView' fileViewReuse
narrowView = applyView' viewedFileReuse getViewedFileMetaData
{- Go through each file in the currently checked out branch.
- If the file is not annexed, skip it, unless it's a dotfile in the top.
- Look up the metadata of annexed files, and generate any FileViews,
- Look up the metadata of annexed files, and generate any ViewedFiles,
- and stage them.
-
- Currently only works in indirect mode. Must be run from top of
- repository.
-}
applyView' :: MkFileView -> View -> Annex Git.Branch
applyView' mkfileview view = do
applyView' :: MkViewedFile -> (FilePath -> MetaData) -> View -> Annex Git.Branch
applyView' mkviewedfile getfilemetadata view = do
top <- fromRepo Git.repoPath
(l, clean) <- inRepo $ Git.LsFiles.inRepo [top]
liftIO . nukeFile =<< fromRepo gitAnnexViewIndex
@ -329,10 +297,11 @@ applyView' mkfileview view = do
void $ stopUpdateIndex uh
void clean
where
genfileviews = fileViews view mkfileview -- enables memoization
genviewedfiles = viewedFiles view mkviewedfile -- enables memoization
go uh hasher f (Just (k, _)) = do
metadata <- getCurrentMetaData k
forM_ (genfileviews f metadata) $ \fv -> do
let metadata' = getfilemetadata f `unionMetaData` metadata
forM_ (genviewedfiles f metadata') $ \fv -> do
stagesymlink uh hasher fv =<< inRepo (gitAnnexLink fv k)
go uh hasher f Nothing
| "." `isPrefixOf` f = do
@ -381,7 +350,7 @@ updateView view ref oldref = genViewBranch view $ do
- Note that removes must be handled before adds. This is so
- that moving a file from x/foo/ to x/bar/ adds back the metadata for x.
-}
withViewChanges :: (FileView -> Key -> CommandStart) -> (FileView -> Key -> CommandStart) -> Annex ()
withViewChanges :: (ViewedFile -> Key -> CommandStart) -> (ViewedFile -> Key -> CommandStart) -> Annex ()
withViewChanges addmeta removemeta = do
makeabs <- flip fromTopFilePath <$> gitRepo
(diffs, cleanup) <- inRepo $ DiffTree.diffIndex Git.Ref.headRef

75
Annex/View/ViewedFile.hs Normal file
View file

@ -0,0 +1,75 @@
{- filenames (not paths) used in views
-
- Copyright 2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.View.ViewedFile (
ViewedFile,
MkViewedFile,
viewedFileFromReference,
viewedFileReuse,
dirFromViewedFile,
prop_viewedFile_roundtrips,
) where
import Common.Annex
type FileName = String
type ViewedFile = FileName
type MkViewedFile = FilePath -> ViewedFile
{- Converts a filepath used in a reference branch to the
- filename that will be used in the view.
-
- No two filepaths from the same branch should yeild the same result,
- so all directory structure needs to be included in the output filename
- in some way.
-
- So, from dir/subdir/file.foo, generate file_%dir%subdir%.foo
-}
viewedFileFromReference :: MkViewedFile
viewedFileFromReference f = concat
[ escape base
, if null dirs then "" else "_%" ++ intercalate "%" (map escape dirs) ++ "%"
, escape $ concat extensions
]
where
(path, basefile) = splitFileName f
dirs = filter (/= ".") $ map dropTrailingPathSeparator (splitPath path)
(base, extensions) = splitShortExtensions basefile
{- To avoid collisions with filenames or directories that contain
- '%', and to allow the original directories to be extracted
- from the ViewedFile, '%' is escaped to '\%' (and '\' to '\\').
-}
escape :: String -> String
escape = replace "%" "\\%" . replace "\\" "\\\\"
{- For use when operating already within a view, so whatever filepath
- is present in the work tree is already a ViewedFile. -}
viewedFileReuse :: MkViewedFile
viewedFileReuse = takeFileName
{- Extracts from a ViewedFile the directory where the file is located on
- in the reference branch. -}
dirFromViewedFile :: ViewedFile -> FilePath
dirFromViewedFile = joinPath . drop 1 . sep [] ""
where
sep l _ [] = reverse l
sep l curr (c:cs)
| c == '%' = sep (reverse curr:l) "" cs
| c == '\\' = case cs of
(c':cs') -> sep l (c':curr) cs'
[] -> sep l curr cs
| otherwise = sep l (c:curr) cs
prop_viewedFile_roundtrips :: FilePath -> Bool
prop_viewedFile_roundtrips f
| isAbsolute f = True -- Only relative paths are encoded.
| any (isPathSeparator) (end f) = True -- Filenames wanted, not directories.
| otherwise = dir == dirFromViewedFile (viewedFileFromReference f)
where
dir = joinPath $ beginning $ splitDirectories f

View file

@ -30,6 +30,7 @@ import System.Posix (signalProcess, sigTERM)
#else
import Utility.WinProcess
#endif
import Data.Default
{- Before the assistant can be restarted, have to remove our
- gitAnnexUrlFile and our gitAnnexPidFile. Pausing the watcher is also
@ -81,7 +82,7 @@ newAssistantUrl repo = do
( return url
, delayed $ waiturl urlfile
)
listening url = catchBoolIO $ fst <$> exists url [] Nothing
listening url = catchBoolIO $ fst <$> exists url def
delayed a = do
threadDelay 100000 -- 1/10th of a second
a

View file

@ -89,10 +89,10 @@ canUpgrade urgency urlrenderer d = ifM autoUpgradeEnabled
getDistributionInfo :: Assistant (Maybe GitAnnexDistribution)
getDistributionInfo = do
ua <- liftAnnex Url.getUserAgent
uo <- liftAnnex Url.getUrlOptions
liftIO $ withTmpFile "git-annex.tmp" $ \tmpfile h -> do
hClose h
ifM (Url.downloadQuiet distributionInfoUrl [] [] tmpfile ua)
ifM (Url.downloadQuiet distributionInfoUrl tmpfile uo)
( readish <$> readFileStrict tmpfile
, return Nothing
)

View file

@ -190,8 +190,8 @@ escapeHeader = escapeURIString (\c -> isUnescapedInURI c && c /= ' ')
getRepoInfo :: RemoteConfig -> Widget
getRepoInfo c = do
ua <- liftAnnex Url.getUserAgent
exists <- liftIO $ catchDefaultIO False $ fst <$> Url.exists url [] ua
uo <- liftAnnex Url.getUrlOptions
exists <- liftIO $ catchDefaultIO False $ fst <$> Url.exists url uo
[whamlet|
<a href="#{url}">
Internet Archive item

View file

@ -19,6 +19,7 @@ import Annex.Content
import Annex.Content.Direct
import Annex.Perms
import Annex.Link
import Annex.MetaData
import qualified Annex
import qualified Annex.Queue
#ifdef WITH_CLIBS
@ -145,26 +146,32 @@ ingest Nothing = return (Nothing, Nothing)
ingest (Just source) = do
backend <- chooseBackend $ keyFilename source
k <- genKey source backend
cache <- liftIO $ genInodeCache $ contentLocation source
case (cache, inodeCache source) of
(_, Nothing) -> go k cache
(Just newc, Just c) | compareStrong c newc -> go k cache
ms <- liftIO $ catchMaybeIO $ getFileStatus $ contentLocation source
let mcache = toInodeCache =<< ms
case (mcache, inodeCache source) of
(_, Nothing) -> go k mcache ms
(Just newc, Just c) | compareStrong c newc -> go k mcache ms
_ -> failure "changed while it was being added"
where
go k cache = ifM isDirect ( godirect k cache , goindirect k cache )
go k mcache ms = ifM isDirect
( godirect k mcache ms
, goindirect k mcache ms
)
goindirect (Just (key, _)) mcache = do
goindirect (Just (key, _)) mcache ms = do
catchAnnex (moveAnnex key $ contentLocation source)
(undo (keyFilename source) key)
maybe noop (genMetaData key (keyFilename source)) ms
liftIO $ nukeFile $ keyFilename source
return $ (Just key, mcache)
goindirect Nothing _ = failure "failed to generate a key"
goindirect _ _ _ = failure "failed to generate a key"
godirect (Just (key, _)) (Just cache) = do
godirect (Just (key, _)) (Just cache) ms = do
addInodeCache key cache
maybe noop (genMetaData key (keyFilename source)) ms
finishIngestDirect key source
return $ (Just key, Just cache)
godirect _ _ = failure "failed to generate a key"
godirect _ _ _ = failure "failed to generate a key"
failure msg = do
warning $ keyFilename source ++ " " ++ msg

View file

@ -134,8 +134,7 @@ perform relaxed url file = ifAnnexed file addurl geturl
setUrlPresent key url
next $ return True
| otherwise = do
headers <- getHttpHeaders
(exists, samesize) <- Url.withUserAgent $ Url.check url headers $ keySize key
(exists, samesize) <- Url.withUrlOptions $ Url.check url (keySize key)
if exists && samesize
then do
setUrlPresent key url
@ -192,8 +191,7 @@ download url file = do
-}
addSizeUrlKey :: URLString -> Key -> Annex Key
addSizeUrlKey url key = do
headers <- getHttpHeaders
size <- snd <$> Url.withUserAgent (Url.exists url headers)
size <- snd <$> Url.withUrlOptions (Url.exists url)
return $ key { keySize = size }
cleanup :: URLString -> FilePath -> Key -> Maybe FilePath -> Annex Bool
@ -212,10 +210,9 @@ cleanup url file key mtmp = do
nodownload :: Bool -> URLString -> FilePath -> Annex Bool
nodownload relaxed url file = do
headers <- getHttpHeaders
(exists, size) <- if relaxed
then pure (True, Nothing)
else Url.withUserAgent $ Url.exists url headers
else Url.withUrlOptions (Url.exists url)
if exists
then do
key <- Backend.URL.fromUrl url size

View file

@ -121,10 +121,10 @@ findDownloads u = go =<< downloadFeed u
downloadFeed :: URLString -> Annex (Maybe Feed)
downloadFeed url = do
showOutput
ua <- Url.getUserAgent
uo <- Url.getUrlOptions
liftIO $ withTmpFile "feed" $ \f h -> do
fileEncoding h
ifM (Url.download url [] [] f ua)
ifM (Url.download url f uo)
( parseFeedString <$> hGetContentsStrict h
, return Nothing
)

View file

@ -10,6 +10,7 @@ module Command.MetaData where
import Common.Annex
import qualified Annex
import Command
import Annex.MetaData
import Logs.MetaData
import Types.MetaData
@ -17,7 +18,7 @@ import qualified Data.Set as S
import Data.Time.Clock.POSIX
def :: [Command]
def = [withOptions [setOption, tagOption, untagOption] $
def = [withOptions [setOption, tagOption, untagOption, jsonOption] $
command "metadata" paramPaths seek
SectionMetaData "sets metadata of a file"]
@ -55,14 +56,16 @@ perform :: POSIXTime -> [ModMeta] -> Key -> CommandPerform
perform _ [] k = next $ cleanup k
perform now ms k = do
oldm <- getCurrentMetaData k
let m = foldl' unionMetaData newMetaData $ map (modMeta oldm) ms
let m = foldl' unionMetaData emptyMetaData $ map (modMeta oldm) ms
addMetaData' k m now
next $ cleanup k
cleanup :: Key -> CommandCleanup
cleanup k = do
m <- getCurrentMetaData k
showLongNote $ unlines $ concatMap showmeta $ fromMetaData $ currentMetaData m
l <- map unwrapmeta . fromMetaData <$> getCurrentMetaData k
maybeShowJSON l
showLongNote $ unlines $ concatMap showmeta l
return True
where
showmeta (f, vs) = map (\v -> fromMetaField f ++ "=" ++ fromMetaValue v) $ S.toList vs
unwrapmeta (f, v) = (fromMetaField f, map fromMetaValue (S.toList v))
showmeta (f, vs) = map ((f ++ "=") ++) vs

View file

@ -14,6 +14,7 @@ import qualified Command.Add
import qualified Command.Fix
import Annex.Direct
import Annex.View
import Annex.View.ViewedFile
import Logs.View
import Logs.MetaData
import Types.View
@ -52,12 +53,12 @@ startIndirect f = next $ do
startDirect :: [String] -> CommandStart
startDirect _ = next $ next $ preCommitDirect
addViewMetaData :: View -> FileView -> Key -> CommandStart
addViewMetaData :: View -> ViewedFile -> Key -> CommandStart
addViewMetaData v f k = do
showStart "metadata" f
next $ next $ changeMetaData k $ fromView v f
removeViewMetaData :: View -> FileView -> Key -> CommandStart
removeViewMetaData :: View -> ViewedFile -> Key -> CommandStart
removeViewMetaData v f k = do
showStart "metadata" f
next $ next $ changeMetaData k $ unsetMetaData $ fromView v f

View file

@ -10,11 +10,11 @@ module Command.VAdd where
import Common.Annex
import Command
import Annex.View
import Command.View (paramView, parseViewParam, checkoutViewBranch)
import Command.View (parseViewParam, checkoutViewBranch)
def :: [Command]
def = [notBareRepo $ notDirect $
command "vadd" paramView seek SectionMetaData "add subdirs to current view"]
def = [notBareRepo $ notDirect $ command "vadd" (paramRepeating "FIELD=GLOB")
seek SectionMetaData "add subdirs to current view"]
seek :: CommandSeek
seek = withWords start

View file

@ -14,6 +14,7 @@ import qualified Git.Command
import qualified Git.Ref
import qualified Git.Branch
import Types.MetaData
import Annex.MetaData
import Types.View
import Annex.View
import Logs.View
@ -43,12 +44,19 @@ perform view = do
next $ checkoutViewBranch view applyView
paramView :: String
paramView = paramPair (paramRepeating "FIELD=VALUE") (paramRepeating "TAG")
paramView = paramPair (paramRepeating "TAG") (paramRepeating "FIELD=VALUE")
{- Parse field=value
-
- Note that the field may not be a legal metadata field name,
- but it's let through anywa (using MetaField rather than mkMetaField).
- This is useful when matching on directory names with spaces,
- which are not legal MetaFields.
-}
parseViewParam :: String -> (MetaField, String)
parseViewParam s = case separate (== '=') s of
(tag, []) -> (tagMetaField, tag)
(field, wanted) -> either error (\f -> (f, wanted)) (mkMetaField field)
(field, wanted) -> (MetaField field, wanted)
mkView :: [String] -> Annex View
mkView params = do

View file

@ -79,11 +79,3 @@ setCrippledFileSystem :: Bool -> Annex ()
setCrippledFileSystem b = do
setConfig (annexConfig "crippledfilesystem") (Git.Config.boolConfig b)
Annex.changeGitConfig $ \c -> c { annexCrippledFileSystem = b }
{- Gets the http headers to use. -}
getHttpHeaders :: Annex [String]
getHttpHeaders = do
v <- annexHttpHeadersCommand <$> Annex.getGitConfig
case v of
Just cmd -> lines <$> liftIO (readProcess "sh" ["-c", cmd])
Nothing -> annexHttpHeaders <$> Annex.getGitConfig

View file

@ -75,24 +75,35 @@ removeLoose r s = do
return True
else return False
{- Explodes all pack files, and deletes them.
-
- First moves all pack files to a temp dir, before unpacking them each in
- turn.
-
- This is because unpack-objects will not unpack a pack file if it's in the
- git repo.
-
- Also, this prevents unpack-objects from possibly looking at corrupt
- pack files to see if they contain an object, while unpacking a
- non-corrupt pack file.
-}
explodePacks :: Repo -> IO Bool
explodePacks r = do
packs <- listPackFiles r
if null packs
then return False
else do
putStrLn "Unpacking all pack files."
mapM_ go packs
return True
explodePacks r = go =<< listPackFiles r
where
go packfile = withTmpFileIn (localGitDir r) "pack" $ \tmp _ -> do
moveFile packfile tmp
go [] = return False
go packs = withTmpDir "packs" $ \tmpdir -> do
putStrLn "Unpacking all pack files."
forM_ packs $ \packfile -> do
moveFile packfile (tmpdir </> takeFileName packfile)
nukeFile $ packIdxFile packfile
forM_ packs $ \packfile -> do
let tmp = tmpdir </> takeFileName packfile
allowRead tmp
-- May fail, if pack file is corrupt.
void $ tryIO $
pipeWrite [Param "unpack-objects", Param "-r"] r $ \h ->
L.hPut h =<< L.readFile tmp
return True
{- Try to retrieve a set of missing objects, from the remotes of a
- repository. Returns any that could not be retreived.

View file

@ -5,8 +5,6 @@
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Limit where
import Common.Annex
@ -29,18 +27,13 @@ import Logs.Group
import Logs.Unused
import Logs.Location
import Git.Types (RefDate(..))
import Utility.Glob
import Utility.HumanTime
import Utility.DataUnits
import Data.Time.Clock.POSIX
import qualified Data.Set as S
import qualified Data.Map as M
import System.Path.WildMatch
#ifdef WITH_TDFA
import Text.Regex.TDFA
import Text.Regex.TDFA.String
#endif
{- Checks if there are user-specified limits. -}
limited :: Annex Bool
@ -82,33 +75,21 @@ addInclude :: String -> Annex ()
addInclude = addLimit . limitInclude
limitInclude :: MkLimit
limitInclude glob = Right $ const $ return . matchglob glob
limitInclude glob = Right $ const $ return . matchGlobFile glob
{- Add a limit to skip files that match the glob. -}
addExclude :: String -> Annex ()
addExclude = addLimit . limitExclude
limitExclude :: MkLimit
limitExclude glob = Right $ const $ return . not . matchglob glob
limitExclude glob = Right $ const $ return . not . matchGlobFile glob
{- Could just use wildCheckCase, but this way the regex is only compiled
- once. Also, we use regex-TDFA when available, because it's less buggy
- in its support of non-unicode characters. -}
matchglob :: String -> MatchInfo -> Bool
matchglob glob (MatchingFile fi) =
#ifdef WITH_TDFA
case cregex of
Right r -> case execute r (matchFile fi) of
Right (Just _) -> True
_ -> False
Left _ -> error $ "failed to compile regex: " ++ regex
matchGlobFile :: String -> (MatchInfo -> Bool)
matchGlobFile glob = go
where
cregex = compile defaultCompOpt defaultExecOpt regex
regex = '^':wildToRegex glob
#else
wildCheckCase glob (matchFile fi)
#endif
matchglob _ (MatchingKey _) = False
cglob = compileGlob glob CaseSensative -- memoized
go (MatchingKey _) = False
go (MatchingFile fi) = matchGlob cglob (matchFile fi)
{- Adds a limit to skip files not believed to be present
- in a specfied repository. Optionally on a prior date. -}
@ -270,9 +251,13 @@ addMetaData = addLimit . limitMetaData
limitMetaData :: MkLimit
limitMetaData s = case parseMetaData s of
Left e -> Left e
Right (f, v) -> Right $ const $ checkKey (check f v)
Right (f, v) ->
let cglob = compileGlob (fromMetaValue v) CaseInsensative
in Right $ const $ checkKey (check f cglob)
where
check f v k = S.member v . metaDataValues f <$> getCurrentMetaData k
check f cglob k = not . S.null
. S.filter (matchGlob cglob . fromMetaValue)
. metaDataValues f <$> getCurrentMetaData k
addTimeLimit :: String -> Annex ()
addTimeLimit s = do

View file

@ -28,10 +28,10 @@
module Logs.MetaData (
getCurrentMetaData,
getMetaData,
addMetaData,
addMetaData',
currentMetaData,
copyMetaData,
) where
import Common.Annex
@ -55,7 +55,7 @@ getMetaData = readLog . metaDataLogFile
getCurrentMetaData :: Key -> Annex MetaData
getCurrentMetaData = currentMetaData . collect <$$> getMetaData
where
collect = foldl' unionMetaData newMetaData . map value . S.toAscList
collect = foldl' unionMetaData emptyMetaData . map value . S.toAscList
{- Adds in some metadata, which can override existing values, or unset
- them, but otherwise leaves any existing metadata as-is. -}
@ -129,9 +129,26 @@ simplifyLog s = case sl of
go c _ [] = c
go c newer (l:ls)
| unique == newMetaData = go c newer ls
| unique == emptyMetaData = go c newer ls
| otherwise = go (l { value = unique } : c)
(unionMetaData unique newer) ls
where
older = value l
unique = older `differenceMetaData` newer
{- Copies the metadata from the old key to the new key.
-
- The exact content of the metadata file is copied, so that the timestamps
- remain the same, and because this is more space-efficient in the git
- repository.
-
- Any metadata already attached to the new key is not preserved.
-}
copyMetaData :: Key -> Key -> Annex ()
copyMetaData oldkey newkey
| oldkey == newkey = noop
| otherwise = do
l <- getMetaData oldkey
unless (S.null l) $
Annex.Branch.change (metaDataLogFile newkey) $
const $ showLog l

View file

@ -158,9 +158,7 @@ tryGitConfigRead r
| haveconfig r' -> return r'
| otherwise -> configlist_failed
Left _ -> configlist_failed
| Git.repoIsHttp r = do
headers <- getHttpHeaders
store $ geturlconfig headers
| Git.repoIsHttp r = store geturlconfig
| Git.GCrypt.isEncrypted r = handlegcrypt =<< getConfigMaybe (remoteConfig r "uuid")
| Git.repoIsUrl r = return r
| otherwise = store $ safely $ onLocal r $ do
@ -185,11 +183,11 @@ tryGitConfigRead r
return $ Right r'
Left l -> return $ Left l
geturlconfig headers = do
ua <- Url.getUserAgent
geturlconfig = do
uo <- Url.getUrlOptions
v <- liftIO $ withTmpFile "git-annex.tmp" $ \tmpfile h -> do
hClose h
ifM (Url.downloadQuiet (Git.repoLocation r ++ "/config") headers [] tmpfile ua)
ifM (Url.downloadQuiet (Git.repoLocation r ++ "/config") tmpfile uo)
( pipedconfig "git" [Param "config", Param "--null", Param "--list", Param "--file", File tmpfile]
, return $ Left undefined
)
@ -255,14 +253,14 @@ tryGitConfigRead r
-}
inAnnex :: Remote -> Key -> Annex (Either String Bool)
inAnnex rmt key
| Git.repoIsHttp r = checkhttp =<< getHttpHeaders
| Git.repoIsHttp r = checkhttp
| Git.repoIsUrl r = checkremote
| otherwise = checklocal
where
r = repo rmt
checkhttp headers = do
checkhttp = do
showChecking r
ifM (anyM (\u -> Url.withUserAgent $ Url.checkBoth u headers (keySize key)) (keyUrls rmt key))
ifM (Url.withUrlOptions $ \uo -> anyM (\u -> Url.checkBoth u (keySize key) uo) (keyUrls rmt key))
( return $ Right True
, return $ Left "not found"
)

View file

@ -73,16 +73,16 @@ gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost
glacierSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID)
glacierSetup mu mcreds c = do
u <- maybe (liftIO genUUID) return mu
glacierSetup' (isJust mu) u mcreds c
glacierSetup' :: Bool -> UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID)
glacierSetup' enabling u mcreds c = do
c' <- setRemoteCredPair c (AWS.creds u) mcreds
glacierSetup' (isJust mu) u c'
glacierSetup' :: Bool -> UUID -> RemoteConfig -> Annex (RemoteConfig, UUID)
glacierSetup' enabling u c = do
c' <- encryptionSetup c
let fullconfig = c' `M.union` defaults
unless enabling $
genVault fullconfig u
gitConfigSpecialRemote u fullconfig "glacier" "true"
c'' <- setRemoteCredPair fullconfig (AWS.creds u) mcreds
return (c'', u)
return (c', u)
where
remotename = fromJust (M.lookup "name" c)
defvault = remotename ++ "-" ++ fromUUID u

View file

@ -76,9 +76,10 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost
s3Setup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID)
s3Setup mu mcreds c = do
u <- maybe (liftIO genUUID) return mu
s3Setup' u mcreds c
s3Setup' :: UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID)
s3Setup' u mcreds c = if isIA c then archiveorg else defaulthost
c' <- setRemoteCredPair c (AWS.creds u) mcreds
s3Setup' u c'
s3Setup' :: UUID -> RemoteConfig -> Annex (RemoteConfig, UUID)
s3Setup' u c = if isIA c then archiveorg else defaulthost
where
remotename = fromJust (M.lookup "name" c)
defbucket = remotename ++ "-" ++ fromUUID u
@ -92,8 +93,7 @@ s3Setup' u mcreds c = if isIA c then archiveorg else defaulthost
use fullconfig = do
gitConfigSpecialRemote u fullconfig "s3" "true"
c' <- setRemoteCredPair fullconfig (AWS.creds u) mcreds
return (c', u)
return (fullconfig, u)
defaulthost = do
c' <- encryptionSetup c

View file

@ -14,7 +14,6 @@ import Types.Remote
import qualified Git
import qualified Git.Construct
import Annex.Content
import Config
import Config.Cost
import Logs.Web
import Types.Key
@ -117,9 +116,8 @@ checkKey' key us = firsthit us (Right False) $ \u -> do
return $ Left "quvi support needed for this url"
#endif
DefaultDownloader -> do
headers <- getHttpHeaders
Url.withUserAgent $ catchMsgIO .
Url.checkBoth u' headers (keySize key)
Url.withUrlOptions $ catchMsgIO .
Url.checkBoth u' (keySize key)
where
firsthit [] miss _ = return miss
firsthit (u:rest) _ a = do

View file

@ -16,7 +16,12 @@ import qualified Data.ByteString.Lazy.UTF8 as L8
import qualified Data.ByteString.Lazy as L
import Network.URI (normalizePathSegments)
import qualified Control.Exception as E
import qualified Control.Exception.Lifted as EL
#if MIN_VERSION_DAV(0,6,0)
import Network.HTTP.Client (HttpException(..))
#else
import Network.HTTP.Conduit (HttpException(..))
#endif
import Network.HTTP.Types
import System.IO.Error
@ -82,10 +87,10 @@ webdavSetup mu mcreds c = do
let url = fromMaybe (error "Specify url=") $
M.lookup "url" c
c' <- encryptionSetup c
creds <- getCreds c' u
creds <- maybe (getCreds c' u) (return . Just) mcreds
testDav url creds
gitConfigSpecialRemote u c' "webdav" "true"
c'' <- setRemoteCredPair c' (davCreds u) mcreds
c'' <- setRemoteCredPair c' (davCreds u) creds
return (c'', u)
store :: Remote -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool
@ -105,7 +110,7 @@ storeEncrypted r (cipher, enck) k p = metered (Just p) k $ \meterupdate ->
storeHelper :: Remote -> Key -> DavUrl -> DavUser -> DavPass -> L.ByteString -> IO Bool
storeHelper r k baseurl user pass b = catchBoolIO $ do
davMkdir tmpurl user pass
mkdirRecursiveDAV tmpurl user pass
storeChunks k tmpurl keyurl chunksize storer recorder finalizer
where
tmpurl = tmpLocation baseurl k
@ -114,11 +119,10 @@ storeHelper r k baseurl user pass b = catchBoolIO $ do
storer urls = storeChunked chunksize urls storehttp b
recorder url s = storehttp url (L8.fromString s)
finalizer srcurl desturl = do
void $ catchMaybeHttp (deleteContent desturl user pass)
davMkdir (urlParent desturl) user pass
moveContent srcurl (B8.fromString desturl) user pass
storehttp url v = putContent url user pass
(contentType, v)
void $ tryNonAsync (deleteDAV desturl user pass)
mkdirRecursiveDAV (urlParent desturl) user pass
moveDAV srcurl desturl user pass
storehttp url = putDAV url user pass
retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool
retrieveCheap _ _ _ = return False
@ -128,7 +132,7 @@ retrieve r k _f d p = metered (Just p) k $ \meterupdate ->
davAction r False $ \(baseurl, user, pass) -> liftIO $ catchBoolIO $
withStoredFiles r k baseurl user pass onerr $ \urls -> do
meteredWriteFileChunks meterupdate d urls $ \url -> do
mb <- davGetUrlContent url user pass
mb <- getDAV url user pass
case mb of
Nothing -> throwIO "download failed"
Just b -> return b
@ -148,7 +152,7 @@ retrieveEncrypted r (cipher, enck) k d p = metered (Just p) k $ \meterupdate ->
feeder _ _ [] _ = noop
feeder user pass (url:urls) h = do
mb <- davGetUrlContent url user pass
mb <- getDAV url user pass
case mb of
Nothing -> throwIO "download failed"
Just b -> do
@ -160,7 +164,7 @@ remove r k = davAction r False $ \(baseurl, user, pass) -> liftIO $ do
-- Delete the key's whole directory, including any chunked
-- files, etc, in a single action.
let url = davLocation baseurl k
isJust <$> catchMaybeHttp (deleteContent url user pass)
isJust . eitherToMaybe <$> tryNonAsync (deleteDAV url user pass)
checkPresent :: Remote -> Key -> Annex (Either String Bool)
checkPresent r k = davAction r noconn go
@ -173,7 +177,7 @@ checkPresent r k = davAction r noconn go
where
check [] = return $ Right True
check (url:urls) = do
v <- davUrlExists url user pass
v <- existsDAV url user pass
if v == Right True
then check urls
else return v
@ -182,7 +186,7 @@ checkPresent r k = davAction r noconn go
- or if there's a problem accessing it,
- or perhaps this was an intermittent error. -}
onerr url = do
v <- davUrlExists url user pass
v <- existsDAV url user pass
return $ if v == Right True
then Left $ "failed to read " ++ url
else v
@ -199,11 +203,11 @@ withStoredFiles
withStoredFiles r k baseurl user pass onerr a
| isJust $ chunkSize $ config r = do
let chunkcount = keyurl ++ chunkCount
v <- davGetUrlContent chunkcount user pass
v <- getDAV chunkcount user pass
case v of
Just s -> a $ listChunks keyurl $ L8.toString s
Nothing -> do
chunks <- probeChunks keyurl $ \u -> (== Right True) <$> davUrlExists u user pass
chunks <- probeChunks keyurl $ \u -> (== Right True) <$> existsDAV u user pass
if null chunks
then onerr chunkcount
else a chunks
@ -244,33 +248,12 @@ tmpLocation baseurl k = addTrailingPathSeparator $
davUrl :: DavUrl -> FilePath -> DavUrl
davUrl baseurl file = baseurl </> file
davUrlExists :: DavUrl -> DavUser -> DavPass -> IO (Either String Bool)
davUrlExists url user pass = decode <$> catchHttp get
where
decode (Right _) = Right True
#if ! MIN_VERSION_http_conduit(1,9,0)
decode (Left (Left (StatusCodeException status _)))
#else
decode (Left (Left (StatusCodeException status _ _)))
#endif
| statusCode status == statusCode notFound404 = Right False
decode (Left e) = Left $ showEitherException e
#if ! MIN_VERSION_DAV(0,4,0)
get = getProps url user pass
#else
get = getProps url user pass Nothing
#endif
davGetUrlContent :: DavUrl -> DavUser -> DavPass -> IO (Maybe L.ByteString)
davGetUrlContent url user pass = fmap (snd . snd) <$>
catchMaybeHttp (getPropsAndContent url user pass)
{- Creates a directory in WebDAV, if not already present; also creating
- any missing parent directories. -}
davMkdir :: DavUrl -> DavUser -> DavPass -> IO ()
davMkdir url user pass = go url
mkdirRecursiveDAV :: DavUrl -> DavUser -> DavPass -> IO ()
mkdirRecursiveDAV url user pass = go url
where
make u = makeCollection u user pass
make u = mkdirDAV u user pass
go u = do
r <- E.try (make u) :: IO (Either E.SomeException Bool)
@ -287,35 +270,6 @@ davMkdir url user pass = go url
- to use this directory will fail. -}
Left _ -> return ()
{- Catches HTTP and IO exceptions. -}
catchMaybeHttp :: IO a -> IO (Maybe a)
catchMaybeHttp a = (Just <$> a) `E.catches`
[ E.Handler $ \(_e :: HttpException) -> return Nothing
, E.Handler $ \(_e :: E.IOException) -> return Nothing
]
{- Catches HTTP and IO exceptions -}
catchHttp :: IO a -> IO (Either EitherException a)
catchHttp a = (Right <$> a) `E.catches`
[ E.Handler $ \(e :: HttpException) -> return $ Left $ Left e
, E.Handler $ \(e :: E.IOException) -> return $ Left $ Right e
]
type EitherException = Either HttpException E.IOException
showEitherException :: EitherException -> String
#if ! MIN_VERSION_http_conduit(1,9,0)
showEitherException (Left (StatusCodeException status _)) =
#else
showEitherException (Left (StatusCodeException status _ _)) =
#endif
show $ statusMessage status
showEitherException (Left httpexception) = show httpexception
showEitherException (Right ioexception) = show ioexception
throwIO :: String -> IO a
throwIO msg = ioError $ mkIOError userErrorType msg Nothing Nothing
urlParent :: DavUrl -> DavUrl
urlParent url = dropTrailingPathSeparator $
normalizePathSegments (dropTrailingPathSeparator url ++ "/..")
@ -326,25 +280,20 @@ urlParent url = dropTrailingPathSeparator $
testDav :: String -> Maybe CredPair -> Annex ()
testDav baseurl (Just (u, p)) = do
showSideAction "testing WebDAV server"
test "make directory" $ davMkdir baseurl user pass
test "write file" $ putContent testurl user pass
(contentType, L.empty)
test "delete file" $ deleteContent testurl user pass
test "make directory" $ mkdirRecursiveDAV baseurl user pass
test "write file" $ putDAV testurl user pass L.empty
test "delete file" $ deleteDAV testurl user pass
where
test desc a = liftIO $
either (\e -> throwIO $ "WebDAV failed to " ++ desc ++ ": " ++ showEitherException e)
either (\e -> throwIO $ "WebDAV failed to " ++ desc ++ ": " ++ show e)
(const noop)
=<< catchHttp a
=<< tryNonAsync a
user = toDavUser u
pass = toDavPass p
testurl = davUrl baseurl "git-annex-test"
testDav _ Nothing = error "Need to configure webdav username and password."
{- Content-Type to use for files uploaded to WebDAV. -}
contentType :: Maybe B8.ByteString
contentType = Just $ B8.fromString "application/octet-stream"
getCreds :: RemoteConfig -> UUID -> Annex (Maybe CredPair)
getCreds c u = getRemoteCredPairFor "webdav" c (davCreds u)
@ -354,3 +303,103 @@ davCreds u = CredPairStorage
, credPairEnvironment = ("WEBDAV_USERNAME", "WEBDAV_PASSWORD")
, credPairRemoteKey = Just "davcreds"
}
{- Content-Type to use for files uploaded to WebDAV. -}
contentType :: Maybe B8.ByteString
contentType = Just $ B8.fromString "application/octet-stream"
throwIO :: String -> IO a
throwIO msg = ioError $ mkIOError userErrorType msg Nothing Nothing
{---------------------------------------------------------------------
- Low-level DAV operations, using the new DAV monad when available.
---------------------------------------------------------------------}
putDAV :: DavUrl -> DavUser -> DavPass -> L.ByteString -> IO ()
putDAV url user pass b =
#if MIN_VERSION_DAV(0,6,0)
goDAV url user pass $ putContentM (contentType, b)
#else
putContent url user pass (contentType, b)
#endif
getDAV :: DavUrl -> DavUser -> DavPass -> IO (Maybe L.ByteString)
getDAV url user pass = eitherToMaybe <$> tryNonAsync go
where
#if MIN_VERSION_DAV(0,6,0)
go = goDAV url user pass $ snd <$> getContentM
#else
go = snd . snd <$> getPropsAndContent url user pass
#endif
deleteDAV :: DavUrl -> DavUser -> DavPass -> IO ()
deleteDAV url user pass =
#if MIN_VERSION_DAV(0,6,0)
goDAV url user pass delContentM
#else
deleteContent url user pass
#endif
moveDAV :: DavUrl -> DavUrl -> DavUser -> DavPass -> IO ()
moveDAV url newurl user pass =
#if MIN_VERSION_DAV(0,6,0)
goDAV url user pass $ moveContentM newurl'
#else
moveContent url newurl' user pass
#endif
where
newurl' = B8.fromString newurl
mkdirDAV :: DavUrl -> DavUser -> DavPass -> IO Bool
mkdirDAV url user pass =
#if MIN_VERSION_DAV(0,6,0)
goDAV url user pass mkCol
#else
makeCollection url user pass
#endif
existsDAV :: DavUrl -> DavUser -> DavPass -> IO (Either String Bool)
existsDAV url user pass = either (Left . show) id <$> tryNonAsync check
where
ispresent = return . Right
#if MIN_VERSION_DAV(0,6,0)
check = goDAV url user pass $ do
setDepth Nothing
EL.catchJust
(matchStatusCodeException notFound404)
(getPropsM >> ispresent True)
(const $ ispresent False)
#else
check = E.catchJust
(matchStatusCodeException notFound404)
#if ! MIN_VERSION_DAV(0,4,0)
(getProps url user pass >> ispresent True)
#else
(getProps url user pass Nothing >> ispresent True)
#endif
(const $ ispresent False)
#endif
matchStatusCodeException :: Status -> HttpException -> Maybe ()
#if MIN_VERSION_DAV(0,6,0)
matchStatusCodeException want (StatusCodeException s _ _)
#else
matchStatusCodeException want (StatusCodeException s _)
#endif
| s == want = Just ()
| otherwise = Nothing
matchStatusCodeException _ _ = Nothing
#if MIN_VERSION_DAV(0,6,0)
goDAV :: DavUrl -> DavUser -> DavPass -> DAVT IO a -> IO a
goDAV url user pass a = choke $ evalDAVT url $ do
setCreds user pass
a
where
choke :: IO (Either String a) -> IO a
choke f = do
x <- f
case x of
Left e -> error e
Right r -> return r
#endif

View file

@ -55,6 +55,7 @@ import qualified Crypto
import qualified Annex.Init
import qualified Annex.CatFile
import qualified Annex.View
import qualified Annex.View.ViewedFile
import qualified Logs.View
import qualified Utility.Path
import qualified Utility.FileMode
@ -151,6 +152,7 @@ properties = localOption (QuickCheckTests 1000) $ testGroup "QuickCheck"
, testProperty "prop_metadata_serialize" Types.MetaData.prop_metadata_serialize
, testProperty "prop_branchView_legal" Logs.View.prop_branchView_legal
, testProperty "prop_view_roundtrips" Annex.View.prop_view_roundtrips
, testProperty "prop_viewedFile_rountrips" Annex.View.ViewedFile.prop_viewedFile_roundtrips
]
{- These tests set up the test environment, but also test some basic parts

View file

@ -49,6 +49,7 @@ data GitConfig = GitConfig
, annexAutoUpgrade :: AutoUpgrade
, annexExpireUnused :: Maybe (Maybe Duration)
, annexSecureEraseCommand :: Maybe String
, annexGenMetaData :: Bool
, coreSymlinks :: Bool
, gcryptId :: Maybe String
}
@ -81,6 +82,7 @@ extractGitConfig r = GitConfig
, annexExpireUnused = maybe Nothing Just . parseDuration
<$> getmaybe (annex "expireunused")
, annexSecureEraseCommand = getmaybe (annex "secure-erase-command")
, annexGenMetaData = getbool (annex "genmetadata") False
, coreSymlinks = getbool "core.symlinks" True
, gcryptId = getmaybe "core.gcrypt-id"
}

View file

@ -17,7 +17,6 @@ module Types.MetaData (
MetaSerializable,
toMetaField,
mkMetaField,
tagMetaField,
fromMetaField,
toMetaValue,
mkMetaValue,
@ -25,7 +24,7 @@ module Types.MetaData (
unsetMetaData,
fromMetaValue,
fromMetaData,
newMetaData,
emptyMetaData,
updateMetaData,
unionMetaData,
differenceMetaData,
@ -81,7 +80,7 @@ instance MetaSerializable MetaData where
serialize (MetaData m) = unwords $ concatMap go $ M.toList m
where
go (f, vs) = serialize f : map serialize (S.toList vs)
deserialize = Just . getfield newMetaData . words
deserialize = Just . getfield emptyMetaData . words
where
getfield m [] = m
getfield m (w:ws) = maybe m (getvalues m ws) (deserialize w)
@ -116,19 +115,29 @@ instance MetaSerializable CurrentlySet where
deserialize "-" = Just (CurrentlySet False)
deserialize _ = Nothing
{- Fields cannot be empty, contain whitespace, or start with "+-" as
- that would break the serialization. -}
toMetaField :: String -> Maybe MetaField
toMetaField f
| legalField f = Just $ MetaField f
| otherwise = Nothing
{- Fields cannot be empty, contain whitespace, or start with "+-" as
- that would break the serialization.
-
- Additionally, fields should not contain any form of path separator, as
- that would break views.
-
- So, require they have an alphanumeric first letter, with the remainder
- being either alphanumeric or a small set of shitelisted common punctuation.
-}
legalField :: String -> Bool
legalField f
| null f = False
| any isSpace f = False
| any (`isPrefixOf` f) ["+", "-"] = False
| otherwise = True
legalField [] = False
legalField (c1:cs)
| not (isAlphaNum c1) = False
| otherwise = all legalchars cs
where
legalchars c
| isAlphaNum c = True
| otherwise = c `elem` "_-."
toMetaValue :: String -> MetaValue
toMetaValue = MetaValue (CurrentlySet True)
@ -152,8 +161,8 @@ fromMetaValue (MetaValue _ f) = f
fromMetaData :: MetaData -> [(MetaField, S.Set MetaValue)]
fromMetaData (MetaData m) = M.toList m
newMetaData :: MetaData
newMetaData = MetaData M.empty
emptyMetaData :: MetaData
emptyMetaData = MetaData M.empty
{- Can be used to set a value, or to unset it, depending on whether
- the MetaValue has CurrentlySet or not. -}
@ -202,10 +211,10 @@ data ModMeta
- Note that the new MetaData does not include all the
- values set in the input metadata. It only contains changed values. -}
modMeta :: MetaData -> ModMeta -> MetaData
modMeta _ (AddMeta f v) = updateMetaData f v newMetaData
modMeta _ (DelMeta f oldv) = updateMetaData f (unsetMetaValue oldv) newMetaData
modMeta _ (AddMeta f v) = updateMetaData f v emptyMetaData
modMeta _ (DelMeta f oldv) = updateMetaData f (unsetMetaValue oldv) emptyMetaData
modMeta m (SetMeta f v) = updateMetaData f v $
foldr (updateMetaData f) newMetaData $
foldr (updateMetaData f) emptyMetaData $
map unsetMetaValue $ S.toList $ currentMetaDataValues f m
{- Parses field=value, field+=value, field-=value -}
@ -233,9 +242,6 @@ mkMetaField f = maybe (Left $ badField f) Right (toMetaField f)
badField :: String -> String
badField f = "Illegal metadata field name, \"" ++ f ++ "\""
tagMetaField :: MetaField
tagMetaField = MetaField "tag"
{- Avoid putting too many fields in the map; extremely large maps make
- the seriaization test slow due to the sheer amount of data.
- It's unlikely that more than 100 fields of metadata will be used. -}
@ -254,7 +260,7 @@ prop_metadata_sane :: MetaData -> MetaField -> MetaValue -> Bool
prop_metadata_sane m f v = and
[ S.member v $ metaDataValues f m'
, not (isSet v) || S.member v (currentMetaDataValues f m')
, differenceMetaData m' newMetaData == m'
, differenceMetaData m' emptyMetaData == m'
]
where
m' = updateMetaData f v m

View file

@ -35,10 +35,6 @@ data ViewComponent = ViewComponent
instance Arbitrary ViewComponent where
arbitrary = ViewComponent <$> arbitrary <*> arbitrary <*> arbitrary
{- Only files with metadata matching the view are displayed. -}
type FileView = FilePath
type MkFileView = FilePath -> FileView
data ViewFilter
= FilterValues (S.Set MetaValue)
| FilterGlob String

57
Utility/Glob.hs Normal file
View file

@ -0,0 +1,57 @@
{- file globbing
-
- This uses TDFA when available, with a fallback to regex-compat.
- TDFA is less buggy in its support for non-unicode characters.
-
- Copyright 2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Utility.Glob (
Glob,
GlobCase(..),
compileGlob,
matchGlob
) where
import System.Path.WildMatch
#ifdef WITH_TDFA
import Text.Regex.TDFA
import Text.Regex.TDFA.String
#else
import Text.Regex
#endif
newtype Glob = Glob Regex
data GlobCase = CaseSensative | CaseInsensative
{- Compiles a glob to a regex, that can be repeatedly used. -}
compileGlob :: String -> GlobCase -> Glob
compileGlob glob globcase = Glob $
#ifdef WITH_TDFA
case compile (defaultCompOpt {caseSensitive = casesentitive}) defaultExecOpt regex of
Right r -> r
Left _ -> error $ "failed to compile regex: " ++ regex
#else
mkRegexWithOpts regex casesentitive True
#endif
where
regex = '^':wildToRegex glob
casesentitive = case globcase of
CaseSensative -> True
CaseInsensative -> False
matchGlob :: Glob -> String -> Bool
matchGlob (Glob regex) val =
#ifdef WITH_TDFA
case execute regex val of
Right (Just _) -> True
_ -> False
#else
isJust $ matchRegex regex val
#endif

View file

@ -10,6 +10,7 @@
module Utility.Url (
URLString,
UserAgent,
UrlOptions(..),
check,
checkBoth,
exists,
@ -23,6 +24,7 @@ import Network.URI
import qualified Network.Browser as Browser
import Network.HTTP
import Data.Either
import Data.Default
import qualified Build.SysConfig
@ -32,14 +34,24 @@ type Headers = [String]
type UserAgent = String
data UrlOptions = UrlOptions
{ userAgent :: Maybe UserAgent
, reqHeaders :: Headers
, reqParams :: [CommandParam]
}
instance Default UrlOptions
where
def = UrlOptions Nothing [] []
{- Checks that an url exists and could be successfully downloaded,
- also checking that its size, if available, matches a specified size. -}
checkBoth :: URLString -> Headers -> Maybe Integer -> Maybe UserAgent -> IO Bool
checkBoth url headers expected_size ua = do
v <- check url headers expected_size ua
checkBoth :: URLString -> Maybe Integer -> UrlOptions -> IO Bool
checkBoth url expected_size uo = do
v <- check url expected_size uo
return (fst v && snd v)
check :: URLString -> Headers -> Maybe Integer -> Maybe UserAgent -> IO (Bool, Bool)
check url headers expected_size = handle <$$> exists url headers
check :: URLString -> Maybe Integer -> UrlOptions -> IO (Bool, Bool)
check url expected_size = handle <$$> exists url
where
handle (False, _) = (False, False)
handle (True, Nothing) = (True, True)
@ -55,8 +67,8 @@ check url headers expected_size = handle <$$> exists url headers
- Uses curl otherwise, when available, since curl handles https better
- than does Haskell's Network.Browser.
-}
exists :: URLString -> Headers -> Maybe UserAgent -> IO (Bool, Maybe Integer)
exists url headers ua = case parseURIRelaxed url of
exists :: URLString -> UrlOptions -> IO (Bool, Maybe Integer)
exists url uo = case parseURIRelaxed url of
Just u
| uriScheme u == "file:" -> do
s <- catchMaybeIO $ getFileStatus (unEscapeString $ uriPath u)
@ -70,7 +82,7 @@ exists url headers ua = case parseURIRelaxed url of
Just ('2':_:_) -> return (True, extractsize output)
_ -> dne
else do
r <- request u headers HEAD ua
r <- request u HEAD uo
case rspCode r of
(2,_,_) -> return (True, size r)
_ -> return (False, Nothing)
@ -78,12 +90,12 @@ exists url headers ua = case parseURIRelaxed url of
where
dne = return (False, Nothing)
curlparams = addUserAgent ua $
curlparams = addUserAgent uo $
[ Param "-s"
, Param "--head"
, Param "-L", Param url
, Param "-w", Param "%{http_code}"
] ++ concatMap (\h -> [Param "-H", Param h]) headers
] ++ concatMap (\h -> [Param "-H", Param h]) (reqHeaders uo) ++ (reqParams uo)
extractsize s = case lastMaybe $ filter ("Content-Length:" `isPrefixOf`) (lines s) of
Just l -> case lastMaybe $ words l of
@ -94,9 +106,10 @@ exists url headers ua = case parseURIRelaxed url of
size = liftM Prelude.read . lookupHeader HdrContentLength . rspHeaders
-- works for both wget and curl commands
addUserAgent :: Maybe UserAgent -> [CommandParam] -> [CommandParam]
addUserAgent Nothing ps = ps
addUserAgent (Just ua) ps = ps ++ [Param "--user-agent", Param ua]
addUserAgent :: UrlOptions -> [CommandParam] -> [CommandParam]
addUserAgent uo ps = case userAgent uo of
Nothing -> ps
Just ua -> ps ++ [Param "--user-agent", Param ua]
{- Used to download large files, such as the contents of keys.
-
@ -105,15 +118,15 @@ addUserAgent (Just ua) ps = ps ++ [Param "--user-agent", Param ua]
- would not be appropriate to test at configure time and build support
- for only one in.
-}
download :: URLString -> Headers -> [CommandParam] -> FilePath -> Maybe UserAgent -> IO Bool
download :: URLString -> FilePath -> UrlOptions -> IO Bool
download = download' False
{- No output, even on error. -}
downloadQuiet :: URLString -> Headers -> [CommandParam] -> FilePath -> Maybe UserAgent -> IO Bool
downloadQuiet :: URLString -> FilePath -> UrlOptions -> IO Bool
downloadQuiet = download' True
download' :: Bool -> URLString -> Headers -> [CommandParam] -> FilePath -> Maybe UserAgent -> IO Bool
download' quiet url headers options file ua =
download' :: Bool -> URLString -> FilePath -> UrlOptions -> IO Bool
download' quiet url file uo =
case parseURIRelaxed url of
Just u
| uriScheme u == "file:" -> do
@ -124,7 +137,7 @@ download' quiet url headers options file ua =
| otherwise -> ifM (inPath "wget") (wget , curl)
_ -> return False
where
headerparams = map (\h -> Param $ "--header=" ++ h) headers
headerparams = map (\h -> Param $ "--header=" ++ h) (reqHeaders uo)
wget = go "wget" $ headerparams ++ quietopt "-q" ++ wgetparams
{- Regular wget needs --clobber to continue downloading an existing
- file. On Android, busybox wget is used, which does not
@ -142,7 +155,7 @@ download' quiet url headers options file ua =
curl = go "curl" $ headerparams ++ quietopt "-s" ++
[Params "-f -L -C - -# -o"]
go cmd opts = boolSystem cmd $
addUserAgent ua $ options++opts++[File file, File url]
addUserAgent uo $ reqParams uo++opts++[File file, File url]
quietopt s
| quiet = [Param s]
| otherwise = []
@ -157,14 +170,14 @@ download' quiet url headers options file ua =
- Unfortunately, does not handle https, so should only be used
- when curl is not available.
-}
request :: URI -> Headers -> RequestMethod -> Maybe UserAgent -> IO (Response String)
request url headers requesttype ua = go 5 url
request :: URI -> RequestMethod -> UrlOptions -> IO (Response String)
request url requesttype uo = go 5 url
where
go :: Int -> URI -> IO (Response String)
go 0 _ = error "Too many redirects "
go n u = do
rsp <- Browser.browse $ do
maybe noop Browser.setUserAgent ua
maybe noop Browser.setUserAgent (userAgent uo)
Browser.setErrHandler ignore
Browser.setOutHandler ignore
Browser.setAllowRedirects False
@ -174,7 +187,7 @@ request url headers requesttype ua = go 5 url
(3,0,x) | x /= 5 -> redir (n - 1) u rsp
_ -> return rsp
addheaders req = setHeaders req (rqHeaders req ++ userheaders)
userheaders = rights $ map parseHeader headers
userheaders = rights $ map parseHeader (reqHeaders uo)
ignore = const noop
redir n u rsp = case retrieveHeaders HdrLocation rsp of
[] -> return rsp

21
debian/changelog vendored
View file

@ -2,6 +2,27 @@ git-annex (5.20140222) UNRELEASED; urgency=medium
* Fix handling of rsync remote urls containing a username,
including rsync.net.
* --metadata field=value can now use globs to match, and matches
case insensatively, the same as git annex view field=value does.
* When constructing views, metadata is available about the location of the
file in the view's reference branch. Allows incorporating parts of the
directory hierarchy in a view.
For example `git annex view tag=* podcasts/=*` makes a view in the form
tag/showname.
* annex.genmetadata can be set to make git-annex automatically set
metadata (year and month) when adding files.
* Preserve metadata when staging a new version of an annexed file.
* metadata: Field names limited to alphanumerics and a few whitelisted
punctuation characters to avoid issues with views, etc.
* metadata: Support --json
* webapp: Fix creation of box.com and Amazon S3 and Glacier
repositories, broken in 5.20140221.
* webdav: When built with DAV 0.6.0, use the new DAV monad to avoid
locking files, which is not needed by git-annex's use of webdav, and
does not work on Box.com.
* repair: Optimise unpacking of pack files, and avoid repeated error
messages about corrupt pack files.
* Make annex.web-options be used in several places that call curl.
-- Joey Hess <joeyh@debian.org> Fri, 21 Feb 2014 13:03:04 -0400

1
debian/control vendored
View file

@ -6,6 +6,7 @@ Build-Depends:
ghc (>= 7.4),
libghc-mtl-dev (>= 2.1.1),
libghc-missingh-dev,
libghc-data-default-dev,
libghc-hslogger-dev,
libghc-pcre-light-dev,
libghc-sha-dev,

View file

@ -0,0 +1,19 @@
### Please describe the problem.
The assistant regulary ends up trying to perform repair (I don't know why, it happens fairly often, once a week or so). When it does so, it ends up creating a huge (2.4G) .git/objects directory, and a git prune-packed process uses so much I/O the machine really slows down.
### What steps will reproduce the problem?
I don't have any reliable way to reproduce it. The repository ends up being attempted to be repaired around once a week. This week the repair (and the slowdown) also happened on a second computer.
### What version of git-annex are you using? On what operating system?
git-annex version: 5.20140221-gbdfc8e1 (using the standalone 64bit builds)
This is on an up-to-date Arch Linux. It also happened on Fedora 20.
### Please provide any additional information below.
The daemon.log is fairly long, but not particulary interesting: [[https://ssl.zerodogg.org/~zerodogg/private/tmp/daemon.log-2014-02-25.1]]
The «resource vanished (Broken pipe)» at the end is the result of me killing the prune-packed in order to be able to use the machine again.

View file

@ -18,3 +18,5 @@ show these then running,
git annex dropunused 1-3 --force
reports ok for each drop operation but rerunning git annex unused --from cloud still shows these three files as unused. I am using git-annex on mac os x (current dmg) on a direct repo. I have similar problems dropping files on the current repo even though I drop unused they still show up as unused.
> [[fixed|done]] --[[Joey]]

View file

@ -35,3 +35,7 @@ ubuntu 13.10 (saucy), i686
> Seems that [DAV-0.6 is badly broken](http://bugs.debian.org/737902).
> I have adjusted the cabal file to refuse to build with that broken
> version.
>
>> Update: Had to work around additional breakage in DAV-0.6. It's
>> fully tested and working now, although not yet uploaded to Debian
>> unstable. [[done]] --[[Joey]]

View file

@ -0,0 +1,13 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawk9nck8WX8-ADF3Fdh5vFo4Qrw1I_bJcR8"
nickname="Jon Ander"
subject="comment 7"
date="2014-02-24T13:20:27Z"
content="""
This is what I get in the log in version 5.20140221 in Debian Sid:
100% 46.5KB/s 0sInternalIOException <socket: 28>: hPutBuf: illegal operation (handle is closed)
InternalIOException <socket: 25>: hPutBuf: illegal operation (handle is closed)
It seams that the file is being uploaded (folders are being created in box.com) but it crashes when reaching 100%
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawmZgZuUhZlHpd_AbbcixY0QQiutb2I7GWY"
nickname="Jimmy"
subject="comment 3"
date="2014-02-21T22:05:06Z"
content="""
And yep, it's fixed in 5.20140221-g1a47f5f. Thanks guys!
"""]]

View file

@ -0,0 +1,9 @@
[[!comment format=mdwn
username="stp"
ip="84.56.21.11"
subject="Ídea"
date="2014-02-23T14:25:22Z"
content="""
I thought about the implementation need for git annex sync --content --all. If preferred content expressions would work it would be needed. Everything else. could be done via a split usage.
Run \"git annex sync --content\" to satisfy the preferred content expressions on the working tree and the numcopies on the working tree and then loop through all backup/archive repositories with \"git annex get --auto\" this should at least prevent archives from getting objects numcopies is already satisfying and sync the objects not yet satisfied right?
"""]]

View file

@ -0,0 +1,30 @@
### Please describe the problem.
A repair that runs for ages. In the log file, pages and pages and pages of:
error: packfile /Volumes/BandZbackup2/annex/.git/objects/pack/pack-f0ae2f5cc83f11eab406518b9f06a344acf9c93c.pack does not match index
warning: packfile /Volumes/BandZbackup2/annex/.git/objects/pack/pack-f0ae2f5cc83f11eab406518b9f06a344acf9c93c.pack cannot be accessed
error: packfile /Volumes/BandZbackup2/annex/.git/objects/pack/pack-f0ae2f5cc83f11eab406518b9f06a344acf9c93c.pack does not match index
warning: packfile /Volumes/BandZbackup2/annex/.git/objects/pack/pack-f0ae2f5cc83f11eab406518b9f06a344acf9c93c.pack cannot be accessed
error: packfile /Volumes/BandZbackup2/annex/.git/objects/pack/pack-f0ae2f5cc83f11eab406518b9f06a344acf9c93c.pack does not match index
warning: packfile /Volumes/BandZbackup2/annex/.git/objects/pack/pack-f0ae2f5cc83f11eab406518b9f06a344acf9c93c.pack cannot be accessed
error: packfile /Volumes/BandZbackup2/annex/.git/objects/pack/pack-f0ae2f5cc83f11eab406518b9f06a344acf9c93c.pack does not match index
warning: packfile /Volumes/BandZbackup2/annex/.git/objects/pack/pack-f0ae2f5cc83f11eab406518b9f06a344acf9c93c.pack cannot be accessed
### What steps will reproduce the problem?
Running git-annex, plugging in my external drive
### What version of git-annex are you using? On what operating system?
Auto-updated latest, I thought, but the about page says: Version: 5.20131230-g9a495e6
### Please provide any additional information below.
[[!format sh """
# If you can, paste a complete transcript of the problem occurring here.
# If the problem is with the git-annex assistant, paste in .git/annex/daemon.log
# End of transcript or log.
"""]]

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="http://joeyh.name/"
ip="209.250.56.172"
subject="comment 1"
date="2014-02-24T18:32:31Z"
content="""
Well, you seem to have a corrupt git repository on your removable drive. git-annex seems to be in the process of repairing it, which can take some time.
I don't see a bug here, from what you've described so far..
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawkQafKy7hNSEolLs6TvbgUnkklTctUY9LI"
nickname="Zellyn"
subject="sounds good"
date="2014-02-24T19:39:12Z"
content="""
Is it normal for the same error to repeat thousands of times like that in the log?
"""]]

View file

@ -0,0 +1,39 @@
[[!comment format=mdwn
username="http://joeyh.name/"
ip="209.250.56.172"
subject="comment 3"
date="2014-02-24T23:39:46Z"
content="""
Well, if there's a bug here, it might be that this particular problem has caused the repair process to loop repeatedly trying to unpack a pack file.
I don't see how that could happen, looking at the code it will try to unpack each pack file only once.
If you run `git annex repair --debug`, you can see the git commands it runs, and so see if it's somehow looping. When I do this with some corrupt pack files (actually, I swapped one pack file for another one), I see, for example:
<pre>
[2014-02-24 19:11:42 JEST] feed: git [\"--git-dir=/home/joey/tmp/git/.git\",\"--work-tree=/home/joey/tmp/git\",\"unpack-objects\",\"-r\"]
error: packfile /home/joey/tmp/git/.git/objects/pack/pack-857c07e35d98e8f063fdae6846d1f6f7453e1312.pack claims to have 862 objects while index indicates 1431 objects
warning: packfile /home/joey/tmp/git/.git/objects/pack/pack-857c07e35d98e8f063fdae6846d1f6f7453e1312.pack cannot be accessed
error: packfile /home/joey/tmp/git/.git/objects/pack/pack-857c07e35d98e8f063fdae6846d1f6f7453e1312.pack claims to have 862 objects while index indicates 1431 objects
warning: packfile /home/joey/tmp/git/.git/objects/pack/pack-857c07e35d98e8f063fdae6846d1f6f7453e1312.pack cannot be accessed
error: packfile /home/joey/tmp/git/.git/objects/pack/pack-857c07e35d98e8f063fdae6846d1f6f7453e1312.pack claims to have 862 objects while index indicates 1431 objects
...
</pre>
Which shows that git-annex only ran `git unpack-objects -r` once, and yet it printed out the same error repeatedly.
One possibility is a problem using `-r`, which makes it keep going on errors. Which seemed like a good idea at the time to unpack as much as possible from a damaged file. It might be that `git unpack-objects` is itself getting stuck in some kind of loop with the -r.
In my case, it did not get stuck; it eventually quit and it moved on to the next pack file, after 900-some repitions of the error message:
<pre>
[2014-02-24 19:16:47 JEST] feed: git [\"--git-dir=/home/joey/tmp/git/.git\",\"--work-tree=/home/joey/tmp/git\",\"unpack-objects\",\"-r\"]
error: packfile /home/joey/tmp/git/.git/objects/pack/pack-857c07e35d98e8f063fdae6846d1f6f7453e1312.pack claims to have 862 objects while index indicates 1431 objects
warning: packfile /home/joey/tmp/git/.git/objects/pack/pack-857c07e35d98e8f063fdae6846d1f6f7453e1312.pack cannot be accessed
</pre>
Intesting that it's again complaining about the same pack file, despite having moved from one pack file on to the next one. I think what's going on here is while unpacking pack files A..Y (which may all be fine), it's checking pack file Z, which is corrupt, to see if the objects exist in it, and complaining each time.
So, I can improve this a lot by moving *all* the pack files out of the way before trying to unpack any of them. In my test case, that completely eliminated the errors, and probably also sped it up a bit.
If I were you, I'd either try stopping your running git-annex and run `git annex repair --debug` and analize the log like I did above, or get the next daily build which has that change, and see if it helps in your case.
"""]]

View file

@ -29,7 +29,7 @@ directories nest.
relevant metadata from the files.
TODO: It's not clear that
removing a file should nuke all the metadata used to filter it into the
branch (especially if it's derived metadata like the year).
branch
Currently, only metadata used for visible subdirs is added and removed
this way.
Also, this is not usable in direct mode because deleting the
@ -56,21 +56,9 @@ For example, by examining MP3 metadata.
Also auto add metadata when adding files to view branches. See below.
## derived metadata
## directory hierarchy metadata
This is probably not stored anywhere. It's computed on demand by a pure
function from the other metadata.
(Should be a general mechanism for this. (It probably generalizes to
sql queries if we want to go that far.))
### data metadata
TODO From the ctime, some additional
metadata is derived, at least year=yyyy and probably also month, etc.
### directory hierarchy metadata
TODO From the original filename used in the master branch, when
From the original filename used in the master branch, when
constructing a view, generate fields. For example foo/bar/baz.mp3
would get /=foo, foo/=bar, foo/bar/=baz, and .=mp3.
@ -82,11 +70,10 @@ This allows using whatever directory hierarchy exists to inform the view,
without locking the view into using it.
Complication: When refining a view, it only looks at the filenames in
the view, so it would need to map from
the view, so it has to map from
those filenames to derive the same metadata, unless there is persistent
storage. Luckily, the filenames used in the views currently include the
subdirs (although not quite in a parseable format, would need some small
changes).
subdirs.
# other uses for metadata
@ -185,14 +172,15 @@ So, possible approaches:
* Git has a complex set of rules for what is legal in a ref name.
View branch names will need to filter out any illegal stuff. **done**
* Metadata should be copied to the new key when adding a modified version
of a file. **done**
* Filesystems that are not case sensative (including case preserving OSX)
will cause problems if view branches try to use different cases for
2 directories representing the value of some metadata. But, users
probably want at least case-preserving metadata values.
2 directories representing a metadata field.
Solution might be to compare metadata case-insensitively, and
pick one representation consistently, so if, for example an author
field uses mixed case, it will be used in the view branch.
Solution might be to compare fields names case-insensitively, and
pick one representation consistently.
Alternatively, it could escape `A` to `_A` when such a filesystem
is detected and avoid collisions that way (double `_` to escape it).
@ -207,3 +195,7 @@ So, possible approaches:
* What happens if git annex add or the assistant add a new file while on a
view? If the file is not also added to the master branch, it will be lost
when exiting the view. TODO
* The filename mangling can result in a filename in a view
that is too long for its containing filesystem. Should detect and do
something reasonable to avoid. TODO

View file

@ -0,0 +1,22 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawm3vKzS4eOWYpKMoYXqMIjNsIg_nYF-loU"
nickname="Konubinix"
subject="Already existing metadata implementation "
date="2014-02-22T21:45:25Z"
content="""
Hi,
I love the idea behing storing metadata.
I suggest to exchange ideas (and maybe code) with projects already implementing metadata systems.
I have tried several implementations and particularly noticed tmsu (http://tmsu.org/). This tool stores tags into a sqlite database and uses also a SHA-256 fingerprint of the file to be aware of file moves. It provides a fuse view of the tags with the ability to change tags by moving files (like in the git annex metadata view).
Paul Ruane is particularly responsive on the mailing list and he already supports git annexed files (with SHAE-256 fingerprint) (see the end of the thread https://groups.google.com/forum/#!topic/tmsu/A5EGpnCcJ2w).
Even if you cannot reuse the project, they are interresting ideas that might be worth looking at like the implications of tags: a file tagged \"film\" being automatically tagged \"video\".
Tagsistant (http://www.tagsistant.net/) may also be a good source of inspirations. I just don't like the fact that it uses a backstore of tagged files.
Thanks for reading.
"""]]

View file

@ -0,0 +1,14 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus"
nickname="Jimmy"
subject="comment 2"
date="2014-02-25T09:51:17Z"
content="""
Some additional ideas for metadata...
Instead of having a simplistic scheme like 'field=value' it might be advantageous to consider a scheme like 'attribute=XXX, value=YYY, unit=ZZZ' that way you could do intesting things with the metadata like adding counters to things, and allow for doing interesting queries like give me all 'things' tagged with a unit of \"audio_file\", this assumes one had trawled through an entire annex and then tagged all files based on type with the unix file tool or something like that.
The above idea is already in use in irods and its a really nice and powerful way to let users add meta-data and to build up more interesting use cases and tools.
btw, I plan on taking a look at seeing if I can map some of the meta that we have in work into this new git-annex feature to see how well/bad it works. Either way this feature looks cool! +1!!!
"""]]

View file

@ -0,0 +1,12 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus"
nickname="Jimmy"
subject="comment 3"
date="2014-02-25T09:57:09Z"
content="""
actually in your mp3 example you could have ....
ATTRIBUTE=sample_rate, VALUE=22100, UNIT=Hertz
another example use case is to always be consistent with the AVU order then you could stick in ntriples from RDF to do other cool things by looking up various linked data sources -- see http://www.w3.org/2001/sw/RDFCore/ntriples/ and http://www.freebase.com/, actually this would be quite cool if git-annex examined the mp3's id3 tag, the created an ntriple styled entry can be automatically parsed with the web-based annex gui and automatically pull in additional meta-data from the likes of freebase. I guess the list of ideas can just only get bigger with this potential metadata capability.
"""]]

View file

@ -6,10 +6,10 @@ Now in the
* Month 1 [[!traillink assistant/encrypted_git_remotes]]
* Month 2 [[!traillink assistant/disaster_recovery]]
* Month 3 user-driven features and polishing [[todo/direct_mode_guard]] [[assistant/upgrading]]
* Month 4 [[Windows_webapp|assistant/Windows]], Linux arm, [[todo/support_for_writing_external_special_remotes]]
* Month 3 user-driven features and polishing [[!traillink todo/direct_mode_guard]] [[!traillink assistant/upgrading]]
* Month 4 [[!traillink assistant/windows text="Windows webapp"]], Linux arm, [[!traillink todo/support_for_writing_external_special_remotes]]
* Month 5 user-driven features and polishing
* **Month 6 get Windows out of beta, [[metadata and views|design/metadata]]**
* **Month 6 get Windows out of beta, [[!traillink design/metadata text="metadata and views"]]**
* Month 7 user-driven features and polishing
* Month 8 [[!traillink assistant/telehash]]
* Month 9 [[!traillink assistant/gpgkeys]] [[!traillink assistant/sshpassword]]

View file

@ -0,0 +1,12 @@
[[!comment format=mdwn
username="stp"
ip="84.56.21.11"
subject="New findings"
date="2014-02-24T12:28:03Z"
content="""
Another thing I found, which was annoying is that I have objects in my annex not tracked anywhere it seems.
\"git annex fsck --all\" complains about not having access to the object. \"git log --stat -S '$key'\" doesn't have any record. \"git annex fsck\" has no issues and \"git annex unused\" comes up empty too.
I'm not sure where these objects still reside or why how to remove this annoying failure.
So not only should \"git annex forget $key\" remove references from within all branches, but should also clean up the aforementioned loose objects, which are neither unused, nor available, nor referenced.
"""]]

View file

@ -0,0 +1,17 @@
When generating a view, there's now a way to reuse part of the directory
hierarchy of the parent branch. For example, `git annex view tag=* podcasts/=*`
makes a view where the first level is the tags, and the second level is
whatever `podcasts/*` directories the files were in.
Also, year and month metadata can be automatically recorded when
adding files to the annex. I made this only be done when annex.genmetadata
is turned on, to avoid polluting repositories that don't want to use metadata.
It would be nice if there was a way to add a hook script that's run
when files are added, to collect their metadata. I am not sure yet if
I am going to add that to git-annex though. It's already possible to do via
the regular git `post-commit` hook. Just make it look at the commit to see
what files were added, and then run `git annex metadata` to set their
metadata appropriately. It would be good to at least have an example of
such a script to eg, extract EXIF or ID3 metadata. Perhaps someone can
contribute one?

View file

@ -0,0 +1,23 @@
Turns out that in the last release I broke making box.com, Amazon S3 and
Glacier remotes from the webapp. Fixed that.
Also, dealt with changes in the haskell DAV library that broke support for
box.com, and worked around an exception handling bug in the library.
I think I should try to enhance the test suite so it can run live tests
on special remotes, which would at least have caught the some of these
recent problems...
----
Since metadata is tied to a particular key, editing an annexed file,
which causes the key to change, made the metadata seem to get lost.
I've now fixed this; it copies the metadata from the old version to the new
one. (Taking care to copy the log file identically, so git can reuse its
blob.)
That meant that `git annex add` has to check every file it adds to see if
there's an old version. Happily, that check is fairly fast; I benchmarked my
laptop running 2500 such checks a second. So it's not going to slow things
down appreciably.

View file

@ -0,0 +1 @@
Is it possible to convert a regular git annex repo (git clone then git annex init in the folder), to an rsync remote. I have an annex with alot of remotes which makes the sync operation take a really long time. I would like to convert some of those remotes to rsync. This particular repo has a TB of data so I would like to avoid dropping content from the remote than re download everything.

View file

@ -0,0 +1,12 @@
[[!comment format=mdwn
username="http://joeyh.name/"
ip="209.250.56.172"
subject="comment 1"
date="2014-02-23T18:57:42Z"
content="""
This is doable. It works best if the remote repo is a bare git repository, because then the filenames line up 100% with the filenames used in a rsync special remote. If the git repo is not bare, the rsync special remote will first try the paths it expects, and only then fall back to the right paths, so a little extra work done. (If this became a big problem, it would not be infesable to move the files around with a script.)
Anyway, if it's a bare repo, then repo.git/annex/objects is where you want to point the rsync special remote at. With a non-bare repo, repo/.git/annex/objects/ is the location. I'd recommend moving the objects directory out to a new location, and pointing the rsyncurl at that. This way, there's no possibility of git-annex thinking one files accessed 2 ways is 2 copies.
Of course, you can't use encryption for the rsync special remote.
"""]]

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="http://joeyh.name/"
ip="209.250.56.172"
subject="comment 2"
date="2014-02-23T19:07:59Z"
content="""
However, if the only problem is that pushing and pulling with a git repository makes `git annex sync` take too long, another option is setting `git config remote.$foo.annex-sync false`. You can still then use git-annex commands to get and push data to the remote, and can even `git annex sync $foo` from time to time, but it won't slow down the normal `git annex sync`.
However, this also prevents the assistant from uploading new files to the remote automatically.
"""]]

View file

@ -0,0 +1,16 @@
[[!comment format=mdwn
username="https://me.yahoo.com/a/FHnTlSBo1eCGJRwueeKeB6.RCaPbGMPr5jxx8A--#ce0d8"
nickname="Hamza"
subject="comment 3"
date="2014-02-23T19:39:28Z"
content="""
Thanks for the reply, just to make sure I got you right,
It is indeed a non bare git repo. So I will move the folder repo/.git/annex/objects/ to repo/
then run,
git annex initremote myrsync type=rsync rsyncurl=ssh.example.com:~/repo
and enable the remote on other annexes (disks are connected to an ssh server there is no encryption setup right now so I do not mind not having it.). And everything should be setup correctly.
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="https://me.yahoo.com/a/FHnTlSBo1eCGJRwueeKeB6.RCaPbGMPr5jxx8A--#ce0d8"
nickname="Hamza"
subject="comment 4"
date="2014-02-23T19:45:09Z"
content="""
and as a follow up do I have rename the repos or can I reuse the same names for the repos?
"""]]

View file

@ -0,0 +1,12 @@
[[!comment format=mdwn
username="http://joeyh.name/"
ip="209.250.56.172"
subject="comment 5"
date="2014-02-24T19:07:33Z"
content="""
That looks all-right, although initremote will ask you to tell it what encryption to use, and you'll need to specify `encryption=none`
One thing I forgot to mention is that the UUID of the new rsync repository won't be the same, so git-annex won't know about the files in there. This can be fixed by `git annex fsck --fast --from myrsync`. Which doesn't re-download all the files, but you still may want to run it on a repository close to or on the server for speed.
You can re-use the name you're currently using for the git remote for the new rsync special remote if you like.
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="https://me.yahoo.com/a/FHnTlSBo1eCGJRwueeKeB6.RCaPbGMPr5jxx8A--#ce0d8"
nickname="Hamza"
subject="comment 6"
date="2014-02-25T09:34:16Z"
content="""
assuming the remote I am converting is called some-repo should mark it as dead before converting and reinitting as rsync some-repo again?
"""]]

View file

@ -0,0 +1,5 @@
Is there any way to find all files that do not have a certain field assigned in metadata. E.g. I want to find all files that do not have an author field set and
git-annex find --not --metadata "author=*"
doesn't give any results.

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="http://joeyh.name/"
ip="209.250.56.172"
subject="comment 1"
date="2014-02-21T22:36:51Z"
content="""
--metadata does not support globs, so your example is asking for all files that don't have an author field with a literal \"*\" value. When I try that command, it lists all files ... as expected.
It seems that adding glob support to it would get to the result you want, and makes sense to parallel git annex view. Change made in git!
"""]]

View file

@ -0,0 +1,20 @@
Hi,
My Webapp isn't working:
$ git-annex webapp error: refs/gcrypt/gitception+ does not point to a valid object!
error: refs/remotes/Beta/git-annex does not point to a valid object!
error: refs/remotes/Beta/master does not point to a valid object!
fatal: unable to read tree 656e7db5be172f01c0b6994d01f1a08d1273af12
So I tried to repair it:
$ git-annex repair Running git fsck ...
Stack space overflow: current size 8388608 bytes. Use `+RTS -Ksize -RTS' to increase it.
So I tried to follow your advice here and increase the stack:
$ git-annex +RTS -K35000000 -RTS fsck
git-annex: Most RTS options are disabled. Link with -rtsopts to enable them.
I wasn't sure what to do next, so any help would be appreciated.

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="http://joeyh.name/"
ip="209.250.56.172"
subject="comment 1"
date="2014-02-23T18:51:45Z"
content="""
I suspect that git fsck is outputting so many lines about problems that it's taking more memory than it's limited to using to hold them all.
Can you paste the output of: git fsck --no-dangling --no-reflogs
"""]]

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="http://joeyh.name/"
ip="209.250.56.172"
subject="comment 2"
date="2014-02-23T19:09:30Z"
content="""
Erm, that output is liable to be big, I only care how many lines and characters of output there are!
git fsck --no-dangling --no-reflogs |wc
"""]]

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="http://joeyh.name/"
ip="209.250.56.172"
subject="comment 3"
date="2014-02-23T19:12:10Z"
content="""
Also, you can build git-annex from source with the RTS options enabled by running `cabal install git-annex --ghc-options=-rtsopts`
(or just build git-repair which has the repository repair parts of git-annex)
"""]]

View file

@ -0,0 +1,14 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawnNqLKszWk9EoD4CDCqNXJRIklKFBCN1Ao"
nickname="maurizio"
subject="the startup check is not a small issue"
date="2014-02-25T11:37:15Z"
content="""
I would like to add that this startup check has probably been a blocker for my use case for a long long time. I tried to use git-annex to synchronize a huge number of files, most of them never changing. My plan was to have a few tens of GB of data which more or less never change in an archive directory and then add from time to time new data (by batches of a few hundreds of files, each of them not necessarily very large) to the annex. Once this new data has been processed or otherwise become less immediately useful, it would be shifted to the archive. It would have been very useful to have such a setup, because the amount of data is too large to be replicated everywhere, especially on a laptop. After finding this post I finally understand that the seemingly never ending \"performing startup scan\" that I observed are probably not due to the assistant somehow hanging, contrary to what I thought. It seems it is just normal operation. The problem is that this normal operation makes it unusable for the use case I was considering, since it does not make much sense to have git-annex scanning about 10^6 files or links on every boot of a laptop. On my workstation this \"startup scan\" has now been running for close to one hour now and is not finished yet, this is not thinkable on laptop boot.
Maybe an analysis of how well git-annex operation scales with number of files should be part of the documentation, since \"large files\" is not the only issue when trying to sync different computers. One finds references to \"very large number of files\" about annex.queuesize, but \"very large\" has no clear meaning. One also finds a reference to \"1 million files\" being a bit of a git limitation on comments of a bug report <https://git-annex.branchable.com/bugs/Stress_test/>.
Orders of magnitude of the number of files that git-annex is supposed to be able to handle would be very useful.
"""]]

View file

@ -715,20 +715,29 @@ subdirectories).
git annex metadata annexscreencast.ogv -t video -t screencast -s author+=Alice
* `view [field=value ...] [tag ...]`
* `view [tag ...] [field=value ...] [location/=value]`
Uses metadata to build a view branch of the files in the current branch,
and checks out the view branch. Only files in the current branch whose
metadata matches all the specified field values and tags will be
shown in the view.
Once within a view, you can make additional directories, and
copy or move files into them. When you commit, the metadata will
be updated to correspond to your changes.
Multiple values for a metadata field can be specified, either by using
a glob (`field="*"`) or by listing each wanted value. The resulting view
will put files in subdirectories according to the value of their fields.
Once within a view, you can make additional directories, and
copy or move files into them. When you commit, the metadata will
be updated to correspond to your changes.
There are fields corresponding to the path to the file. So a file
"foo/bar/baz/file" has fields "/=foo", "foo/=bar", and "foo/bar/=baz".
These location fields can be used the same as other metadata to construct
the view.
For example, `/=podcasts` will only include files from the podcasts
directory in the view, while `podcasts/=*` will preserve the
subdirectories of the podcasts directory in the view.
* `vpop [N]`
@ -737,12 +746,12 @@ subdirectories).
The optional number tells how many views to pop.
* `vfilter [field=value ...] [tag ...]`
* `vfilter [tag ...] [field=value ...] [location/=value]`
Filters the current view to only the files that have the
specified values and tags.
specified field values, tags, and locations.
* `vadd [field=glob ...]`
* `vadd [field=glob ...] [location/=glob]`
Changes the current view, adding an additional level of directories
to categorize the files.
@ -942,7 +951,7 @@ subdirectories).
Rather than the normal output, generate JSON. This is intended to be
parsed by programs that use git-annex. Each line of output is a JSON
object. Note that JSON output is only usable with some git-annex commands,
like info, find, and whereis.
like info, find, whereis, and metadata.
* `--debug`
@ -1133,10 +1142,11 @@ file contents are present at either of two repositories.
The size can be specified with any commonly used units, for example,
"0.5 gb" or "100 KiloBytes"
* `--metadata field=value`
* `--metadata field=glob`
Matches only files that have a metadata field attached with the specified
value.
Matches only files that have a metadata field attached with a value that
matches the glob. The values of metadata fields are matched case
insensitively.
* `--want-get`
@ -1269,6 +1279,12 @@ Here are all the supported configuration settings.
Note that setting numcopies to 0 is very unsafe.
* `annex.genmetadata`
Set this to `true` to make git-annex automatically generate some metadata
when adding files to the repository. In particular, it stores
year and month metadata, from the file's modification date.
* `annex.queuesize`
git-annex builds a queue of git commands, in order to combine similar

View file

@ -5,6 +5,7 @@ quite a lot.
* [The Haskell Platform](http://haskell.org/platform/) (GHC 7.4 or newer)
* [mtl](http://hackage.haskell.org.package/mtl) (2.1.1 or newer)
* [MissingH](http://github.com/jgoerzen/missingh/wiki)
* [data-default](http://hackage.haskell.org/package/data-default)
* [utf8-string](http://hackage.haskell.org/package/utf8-string)
* [SHA](http://hackage.haskell.org/package/SHA)
* [cryptohash](http://hackage.haskell.org/package/cryptohash) (optional but recommended)

41
doc/metadata.mdwn Normal file
View file

@ -0,0 +1,41 @@
git-annex allows you to store arbitrary metadata about files stored in the
git-annex repository. The metadata is stored in the `git-annex` branch, and
so is automatically kept in sync with the rest of git-annex's state, such
as [[location_tracking]] information.
Some of the things you can do with metadata include:
* Using `git annex metadata file` to show all
the metadata associated with a file.
* [[tips/metadata_driven_views]]
* Limiting the files git-annex commands act on to those with
or without particular metadata.
For example `git annex find --metadata tag=foo --or --metadata tag=bar`
* Using it in [[preferred_content]] expressions.
For example "tag=important or not author=me"
Each file (actually the underlying key) can have any number of metadata
fields, which each can have any number of values. For example, to tag
files, the `tag` field is typically used, with values set to each tag that
applies to the file.
The field names are limited to alphanumerics (and `[_-.]`). The metadata
values can contain absolutely anything you like -- but you're recommended
to keep it simple and reasonably short.
Here are some recommended metadata fields to use:
* `tag` - With each tag being a different value.
* `year`, `month` - When this particular version of the file came into
being.
To make git-annex automatically set the year and month when adding files,
run `git config annex.genmetadata true`
git-annex's metadata can be updated in a distributed fashion. For example,
two users, each with their own clone of a repository, can set and unset
metadata at the same time, even for the same field of the same file.
When they push their changes, `git annex merge` will combine their
metadata changes in a consistent and (probably) intuitive way.
See [[the metadata design page|design/metadata]] for more details.

View file

@ -1,5 +1,5 @@
git-annex now has support for storing
[[arbitrary metadata|design/metadata]] about annexed files. For example, this can be
[[arbitrary metadata|metadata]] about annexed files. For example, this can be
used to tag files, to record the author of a file, etc. The metadata is
synced around between repositories with the other information git-annex
keeps track of.
@ -14,6 +14,12 @@ refine or reorder a view.
Let's get started by setting some tags on files. No views yet, just some
metadata:
[[!template id=note text="""
To avoid needing to manually tag files with the year (and month),
run `annex.genmetadata true`, and git-annex will do it for you
when adding files.
"""]]
# git annex metadata --tag todo work/2014/*
# git annex metadata --untag todo work/2014/done/*
# git annex metadata --tag urgent work/2014/presentation_for_tomorrow.odt
@ -24,8 +30,8 @@ metadata:
# git annex metadata --tag done videos/old
# git annex metadata --tag new videos/lotsofcats.ogv
# git annex metadata --tag sound podcasts
# git annex metadata --tag done podcasts/old
# git annex metadata --tag new podcasts/recent
# git annex metadata --tag done podcasts/*/old
# git annex metadata --tag new podcasts/*/recent
So, you had a bunch of different kinds of files sorted into a directory
structure. But that didn't really reflect how you approach the files.
@ -39,6 +45,12 @@ Ok, metadata is in place, but how to use it? Time to change views!
Switched to branch 'views/_'
ok
[[!template id=note text="""
Notice that a single file may appear in multiple directories
depending on its tags. For example, `lotsofcats.ogv` is in
both `new/` and `video/`.
"""]]
This searched for all files with any tag, and created a new git branch
that sorts the files according to their tags.
@ -51,10 +63,6 @@ that sorts the files according to their tags.
video
sound
Notice that a single file may appear in multiple directories
depending on its tags. For example, `lotsofcats.ogv` is in
both `new/` and `video/`.
Ah, but you're at work now, and don't want to be distracted by cat videos.
Time to filter the view:
@ -81,9 +89,11 @@ all the way out of all views, you'll be back on the regular git branch you
originally started from. You can also use `git checkout` to switch between
views and other branches.
Beyond simple tags, you can add whatever kinds of metadata you like, and
use that metadata in more elaborate views. For example, let's add a year
field.
## fields
Beyond simple tags and directories, you can add whatever kinds of metadata
you like, and use that metadata in more elaborate views. For example, let's
add a year field.
# git checkout master
# git annex metadata --set year=2014 work/2014
@ -118,4 +128,25 @@ Oh, did you want it the other way around? Easy!
|-- 2014
`-- 2013
## location fields
Let's switch to a view containing only new podcasts. And since the
podcasts are organized into one subdirectory per show, let's
include those subdirectories in the view.
# git checkout master
# git annex view tag=new podcasts/=*
# tree -d
This_Developers_Life
Escape_Pod
GitMinutes
The_Haskell_Cast
StarShipSofa
That's an example of using part of the directory layout of the original
branch to inform the view. Every file gets fields automatically set up
corresponding to the directory it's in. So a file"foo/bar/baz/file" has
fields "/=foo", "foo/=bar", and "foo/bar/=baz". These location fields
can be used the same as other metadata to construct the view.
This has probably only scratched the surface of what you can do with views.

13
doc/todo/Views_Demo.mdwn Normal file
View file

@ -0,0 +1,13 @@
Joey,
I've been thinking about leveraging git-annex for a workgroup document repository and I have just watched your views demo. The timing of the demo is great because I need to deploy a document repository with per-document metadata and your views concept seems like a great mechanism for associating metadata to documents and for displaying that metadata.
While I don't expect to use your views concept for my workgroup repostory, a later iteration might do.
The metadata in my use case begins with all the weird metadata seen on a book's copyright page. In addition, per-document provenance, like how one found the document and (if we're lucky) a URL where the latest version of the document may be found. Metadata values may be simple strings or may be markdown text.
So, are you considering a metadata syntax that can support complex metadata? One example is multiple authors. Another issue is complex metadata values, like key=abstract and value="markdown text...".
FWIW,
Bob

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="http://joeyh.name/"
ip="209.250.56.172"
subject="comment 1"
date="2014-02-24T18:17:04Z"
content="""
All that should work fine. All metadata fields are multivalued, and the value can be any arbitrary data.
"""]]

View file

@ -0,0 +1,5 @@
Sometimes I start off a large file transfer to a new remote (a la "git-annex copy . --to glacier").
I believe all of the special remotes transfer the files one at a time, which is good, and provides a sensible place to interrupt a copy/move operation.
Wish: When I press ctrl+c in the terminal, git-annex will catch that and finish it's current transfer and then exit cleanly (ie: no odd backtraces in the special remote code). For the case where the file currently being transfered also needs to be killed (ie: it's a big .iso) then subsequent ctrl+c's can do that.

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="http://joeyh.name/"
ip="209.250.56.172"
subject="comment 1"
date="2014-02-21T21:36:14Z"
content="""
This really depends on the remote, some can resume where they were interrupted, such as rsync, and some cannot, such as glacier (and, er, encrypted rsync).
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="http://grossmeier.net/"
nickname="greg"
subject="very remote specific"
date="2014-02-21T22:11:16Z"
content="""
Yeah, this is very remote specific and probably means adding the functionality there as well (eg: in the glacier.py code, not only in git-annex haskell). Maybe I should file bugs there accordingly :)
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="http://joeyh.name/"
ip="209.250.56.172"
subject="comment 3"
date="2014-02-21T22:34:14Z"
content="""
Hmm, I forget if it's possible for git-annex to mask SIGINT when it runs glacier or rsync, so that the child process does not receive it, but the parent git-annex does.
"""]]

View file

@ -93,7 +93,8 @@ Executable git-annex
extensible-exceptions, dataenc, SHA, process, json,
base (>= 4.5 && < 4.9), monad-control, MonadCatchIO-transformers,
IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance, process,
SafeSemaphore, uuid, random, dlist, unix-compat, async, stm (>= 2.3)
SafeSemaphore, uuid, random, dlist, unix-compat, async, stm (>= 2.3),
data-default
CC-Options: -Wall
GHC-Options: -Wall
Extensions: PackageImports
@ -133,7 +134,7 @@ Executable git-annex
if flag(WebDAV)
Build-Depends: DAV ((>= 0.3 && < 0.6) || > 0.6),
http-conduit, xml-conduit, http-types
http-client, http-conduit, http-types, lifted-base
CPP-Options: -DWITH_WEBDAV
if flag(Assistant) && ! os(solaris)