From 141fa3c94d9b5d9404b8e875b6806f27340f2cf3 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Jun 2012 00:01:17 -0400 Subject: [PATCH 01/29] update --- doc/design/assistant/inotify.mdwn | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/design/assistant/inotify.mdwn b/doc/design/assistant/inotify.mdwn index 5d903a9b04..e7c61c68b1 100644 --- a/doc/design/assistant/inotify.mdwn +++ b/doc/design/assistant/inotify.mdwn @@ -69,7 +69,8 @@ Many races need to be dealt with by this code. Here are some of them. Possible fixes: Somehow track or detect if a file is open for write by any processes. Or, when possible, making a copy on write copy - before adding the file would avoid this. + before adding the file would avoid this. Or, as a last resort, make + an expensive copy of the file and add that. * File is added and then replaced with another file before the annex add makes its symlink. From f1bd72ea546be705334ba8f6d01d9dcfb0c33cf9 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Jun 2012 00:03:08 -0400 Subject: [PATCH 02/29] factor out generic update-index code from unionmerge code --- Annex/Branch.hs | 9 +++++---- Git/UnionMerge.hs | 41 ++------------------------------------ Git/UpdateIndex.hs | 49 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 43 deletions(-) create mode 100644 Git/UpdateIndex.hs diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 706522f3b3..c8d0719b0b 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -33,6 +33,7 @@ import qualified Git.Command import qualified Git.Ref import qualified Git.Branch import qualified Git.UnionMerge +import qualified Git.UpdateIndex import Git.HashObject import qualified Git.Index import Annex.CatFile @@ -258,8 +259,8 @@ files = withIndexUpdate $ do - in changes from other branches. -} genIndex :: Git.Repo -> IO () -genIndex g = Git.UnionMerge.stream_update_index g - [Git.UnionMerge.ls_tree fullname g] +genIndex g = Git.UpdateIndex.stream_update_index g + [Git.UpdateIndex.ls_tree fullname g] {- Merges the specified refs into the index. - Any changes staged in the index will be preserved. -} @@ -335,13 +336,13 @@ stageJournal = do g <- gitRepo withIndex $ liftIO $ do h <- hashObjectStart g - Git.UnionMerge.stream_update_index g + Git.UpdateIndex.stream_update_index g [genstream (gitAnnexJournalDir g) h fs] hashObjectStop h where genstream dir h fs streamer = forM_ fs $ \file -> do let path = dir file sha <- hashFile h path - _ <- streamer $ Git.UnionMerge.update_index_line + _ <- streamer $ Git.UpdateIndex.update_index_line sha (fileJournal file) removeFile path diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs index d68bb61ab1..9ff820dc91 100644 --- a/Git/UnionMerge.hs +++ b/Git/UnionMerge.hs @@ -7,11 +7,7 @@ module Git.UnionMerge ( merge, - merge_index, - update_index, - stream_update_index, - update_index_line, - ls_tree + merge_index ) where import System.Cmd.Utils @@ -24,8 +20,7 @@ import Git import Git.Sha import Git.CatFile import Git.Command - -type Streamer = (String -> IO ()) -> IO () +import Git.UpdateIndex {- Performs a union merge between two branches, staging it in the index. - Any previously staged changes in the index will be lost. @@ -47,38 +42,6 @@ merge_index :: CatFileHandle -> Repo -> [Ref] -> IO () merge_index h repo bs = stream_update_index repo $ map (\b -> merge_tree_index b h repo) bs -{- Feeds content into update-index. Later items in the list can override - - earlier ones, so the list can be generated from any combination of - - ls_tree, merge_trees, and merge_tree_index. -} -update_index :: Repo -> [String] -> IO () -update_index repo ls = stream_update_index repo [(`mapM_` ls)] - -{- Streams content into update-index. -} -stream_update_index :: Repo -> [Streamer] -> IO () -stream_update_index repo as = do - (p, h) <- hPipeTo "git" (toCommand $ gitCommandLine params repo) - fileEncoding h - forM_ as (stream h) - hClose h - forceSuccess p - where - params = map Param ["update-index", "-z", "--index-info"] - stream h a = a (streamer h) - streamer h s = do - hPutStr h s - hPutStr h "\0" - -{- Generates a line suitable to be fed into update-index, to add - - a given file with a given sha. -} -update_index_line :: Sha -> FilePath -> String -update_index_line sha file = "100644 blob " ++ show sha ++ "\t" ++ file - -{- Gets the current tree for a ref. -} -ls_tree :: Ref -> Repo -> Streamer -ls_tree (Ref x) repo streamer = mapM_ streamer =<< pipeNullSplit params repo - where - params = map Param ["ls-tree", "-z", "-r", "--full-tree", x] - {- For merging two trees. -} merge_trees :: Ref -> Ref -> CatFileHandle -> Repo -> Streamer merge_trees (Ref x) (Ref y) h = calc_merge h $ "diff-tree":diff_opts ++ [x, y] diff --git a/Git/UpdateIndex.hs b/Git/UpdateIndex.hs new file mode 100644 index 0000000000..04bc4da5ba --- /dev/null +++ b/Git/UpdateIndex.hs @@ -0,0 +1,49 @@ +{- git-update-index library + - + - Copyright 2011, 2012 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.UpdateIndex ( + Streamer, + stream_update_index, + update_index_line, + ls_tree +) where + +import System.Cmd.Utils + +import Common +import Git +import Git.Command + +{- Streamers are passed a callback and should feed it lines in the form + - read by update-index, and generated by ls-tree. -} +type Streamer = (String -> IO ()) -> IO () + +{- Streams content into update-index from a list of Streamers. -} +stream_update_index :: Repo -> [Streamer] -> IO () +stream_update_index repo as = do + (p, h) <- hPipeTo "git" (toCommand $ gitCommandLine params repo) + fileEncoding h + forM_ as (stream h) + hClose h + forceSuccess p + where + params = map Param ["update-index", "-z", "--index-info"] + stream h a = a (streamer h) + streamer h s = do + hPutStr h s + hPutStr h "\0" + +{- Generates a line suitable to be fed into update-index, to add + - a given file with a given sha. -} +update_index_line :: Sha -> FilePath -> String +update_index_line sha file = "100644 blob " ++ show sha ++ "\t" ++ file + +{- Gets the current tree for a ref. -} +ls_tree :: Ref -> Repo -> Streamer +ls_tree (Ref x) repo streamer = mapM_ streamer =<< pipeNullSplit params repo + where + params = map Param ["ls-tree", "-z", "-r", "--full-tree", x] From f596084a599fb363dcbb425dce7c4ca46bb56ca0 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Jun 2012 02:31:31 -0400 Subject: [PATCH 03/29] move hashObject to HashObject library and generalize it to support all git object types --- Git/CatFile.hs | 8 ++------ Git/HashObject.hs | 18 ++++++++++++++++-- Git/Types.hs | 15 +++++++++++++++ Git/UnionMerge.hs | 17 +++-------------- 4 files changed, 36 insertions(+), 22 deletions(-) diff --git a/Git/CatFile.hs b/Git/CatFile.hs index c598d7aa4b..d5b367945f 100644 --- a/Git/CatFile.hs +++ b/Git/CatFile.hs @@ -21,6 +21,7 @@ import Common import Git import Git.Sha import Git.Command +import Git.Types import qualified Utility.CoProcess as CoProcess type CatFileHandle = CoProcess.CoProcessHandle @@ -52,7 +53,7 @@ catObject h object = CoProcess.query h send receive case words header of [sha, objtype, size] | length sha == shaSize && - validobjtype objtype -> + isJust (readObjectType objtype) -> case reads size of [(bytes, "")] -> readcontent bytes from _ -> dne @@ -67,8 +68,3 @@ catObject h object = CoProcess.query h send receive error "missing newline from git cat-file" return $ L.fromChunks [content] dne = return L.empty - validobjtype t - | t == "blob" = True - | t == "commit" = True - | t == "tree" = True - | otherwise = False diff --git a/Git/HashObject.hs b/Git/HashObject.hs index 617e5ac28f..b052413fdb 100644 --- a/Git/HashObject.hs +++ b/Git/HashObject.hs @@ -9,7 +9,9 @@ module Git.HashObject where import Common import Git +import Git.Sha import Git.Command +import Git.Types import qualified Utility.CoProcess as CoProcess type HashObjectHandle = CoProcess.CoProcessHandle @@ -24,11 +26,23 @@ hashObjectStart = CoProcess.start "git" . toCommand . gitCommandLine hashObjectStop :: HashObjectHandle -> IO () hashObjectStop = CoProcess.stop -{- Injects a file into git, returning the shas of the objects. -} +{- Injects a file into git, returning the Sha of the object. -} hashFile :: HashObjectHandle -> FilePath -> IO Sha hashFile h file = CoProcess.query h send receive where send to = do fileEncoding to hPutStrLn to file - receive from = Ref <$> hGetLine from + receive from = getSha "hash-object" $ hGetLine from + +{- Injects some content into git, returning its Sha. -} +hashObject :: Repo -> ObjectType -> String -> IO Sha +hashObject repo objtype content = getSha subcmd $ do + (h, s) <- pipeWriteRead (map Param params) content repo + length s `seq` do + forceSuccess h + reap -- XXX unsure why this is needed + return s + where + subcmd = "hash-object" + params = [subcmd, "-t", show objtype, "-w", "--stdin"] diff --git a/Git/Types.hs b/Git/Types.hs index deb14ebd48..64d418a041 100644 --- a/Git/Types.hs +++ b/Git/Types.hs @@ -48,3 +48,18 @@ instance Show Ref where type Branch = Ref type Sha = Ref type Tag = Ref + +{- Types of objects that can be stored in git. -} +data ObjectType = BlobObject | CommitObject | TreeObject + +instance Show ObjectType where + show BlobObject = "blob" + show CommitObject = "commit" + show TreeObject = "tree" + +readObjectType :: String -> Maybe ObjectType +readObjectType "blob" = Just BlobObject +readObjectType "commit" = Just CommitObject +readObjectType "tree" = Just TreeObject +readObjectType _ = Nothing + diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs index 9ff820dc91..822e6abbf0 100644 --- a/Git/UnionMerge.hs +++ b/Git/UnionMerge.hs @@ -10,7 +10,6 @@ module Git.UnionMerge ( merge_index ) where -import System.Cmd.Utils import qualified Data.Text.Lazy as L import qualified Data.Text.Lazy.Encoding as L import qualified Data.Set as S @@ -21,6 +20,8 @@ import Git.Sha import Git.CatFile import Git.Command import Git.UpdateIndex +import Git.HashObject +import Git.Types {- Performs a union merge between two branches, staging it in the index. - Any previously staged changes in the index will be lost. @@ -72,7 +73,7 @@ mergeFile :: String -> FilePath -> CatFileHandle -> Repo -> IO (Maybe String) mergeFile info file h repo = case filter (/= nullSha) [Ref asha, Ref bsha] of [] -> return Nothing (sha:[]) -> use sha - shas -> use =<< either return (hashObject repo . unlines) =<< + shas -> use =<< either return (hashObject repo BlobObject . unlines) =<< calcMerge . zip shas <$> mapM getcontents shas where [_colonmode, _bmode, asha, bsha, _status] = words info @@ -80,18 +81,6 @@ mergeFile info file h repo = case filter (/= nullSha) [Ref asha, Ref bsha] of L.decodeUtf8 <$> catObject h s use sha = return $ Just $ update_index_line sha file -{- Injects some content into git, returning its Sha. -} -hashObject :: Repo -> String -> IO Sha -hashObject repo content = getSha subcmd $ do - (h, s) <- pipeWriteRead (map Param params) content repo - length s `seq` do - forceSuccess h - reap -- XXX unsure why this is needed - return s - where - subcmd = "hash-object" - params = [subcmd, "-w", "--stdin"] - {- Calculates a union merge between a list of refs, with contents. - - When possible, reuses the content of an existing ref, rather than From 455fca65bfb9ca4270fa7f89986d09ee62188d43 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Jun 2012 11:58:08 -0400 Subject: [PATCH 04/29] layout --- Types/Backend.hs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/Types/Backend.hs b/Types/Backend.hs index 5abb0896dc..97f7cef907 100644 --- a/Types/Backend.hs +++ b/Types/Backend.hs @@ -18,14 +18,11 @@ data KeySource = KeySource , contentLocation :: FilePath } -data BackendA a = Backend { - -- name of this backend - name :: String, - -- gets the key to use for a given content - getKey :: KeySource -> a (Maybe Key), - -- called during fsck to check a key, if the backend has its own checks - fsckKey :: Maybe (Key -> FilePath -> a Bool) -} +data BackendA a = Backend + { name :: String + , getKey :: KeySource -> a (Maybe Key) + , fsckKey :: Maybe (Key -> FilePath -> a Bool) + } instance Show (BackendA a) where show backend = "Backend { name =\"" ++ name backend ++ "\" }" From 899334223f4c46307af6c34ae103971b5f37ce8f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Jun 2012 14:33:12 -0400 Subject: [PATCH 05/29] fixed 2 races! Only 1 serious race to go! --- doc/design/assistant/inotify.mdwn | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/design/assistant/inotify.mdwn b/doc/design/assistant/inotify.mdwn index e7c61c68b1..079941f597 100644 --- a/doc/design/assistant/inotify.mdwn +++ b/doc/design/assistant/inotify.mdwn @@ -82,16 +82,14 @@ Many races need to be dealt with by this code. Here are some of them. * File is added and then replaced with another file before the annex add stages the symlink in git. - **Currently unfixed**; `git add` will be run on the new file, which is - not at all good when it's big. Could be dealt with by using `git - update-index` to manually put the symlink into the index without git + Now fixed; `git annex watch` avoids running `git add` because of this + race. Instead, it stages symlinks directly into the index, without looking at what's currently on disk. * Link is moved, fixed link is written by fix event, but then that is removed by the user and replaced with a file before the event finishes. - **Currently unfixed**: `git add` will be run on the file. Basically same - effect as previous race above. + Now fixed; same fix as previous race above. * File is removed and then re-added before the removal event starts. From 81d4991fa3b9b9a303d37eef407cc7dcdc997a94 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Jun 2012 15:13:18 -0400 Subject: [PATCH 06/29] thoughts --- doc/design/assistant/inotify.mdwn | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/doc/design/assistant/inotify.mdwn b/doc/design/assistant/inotify.mdwn index 079941f597..28b3130e33 100644 --- a/doc/design/assistant/inotify.mdwn +++ b/doc/design/assistant/inotify.mdwn @@ -67,10 +67,18 @@ Many races need to be dealt with by this code. Here are some of them. **Currently unfixed**; This changes content in the annex, and fsck will later catch the inconsistency. - Possible fixes: Somehow track or detect if a file is open for write - by any processes. Or, when possible, making a copy on write copy - before adding the file would avoid this. Or, as a last resort, make - an expensive copy of the file and add that. + Possible fixes: + + * Somehow track or detect if a file is open for write by any processes. + * Or, when possible, making a copy on write copy before adding the file + would avoid this. + * Or, as a last resort, make an expensive copy of the file and add that. + * Tracking file opens and closes with inotify could tell if any other + processes have the file open. But there are problems.. It doesn't + seem to differentiate between files opened for read and for write. + And there would still be a race after the last close and before it's + injected into the annex, where it could be opened for write again. + Would need to detect that and undo the annex injection or something. * File is added and then replaced with another file before the annex add makes its symlink. From e17bc40c312ee04019bdd65cf992e0caa1819df1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Jun 2012 15:24:51 -0400 Subject: [PATCH 07/29] update --- doc/design/assistant/inotify.mdwn | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/design/assistant/inotify.mdwn b/doc/design/assistant/inotify.mdwn index 28b3130e33..b963597c83 100644 --- a/doc/design/assistant/inotify.mdwn +++ b/doc/design/assistant/inotify.mdwn @@ -22,9 +22,7 @@ really useful, it needs to: - periodically auto-commit staged changes (avoid autocommitting when lots of changes are coming in) - tunable delays before adding new files, etc -- Coleasce related add/rm events. See commit - cbdaccd44aa8f0ca30afba23fc06dd244c242075 for some details of the problems - with doing this. +- coleasce related add/rm events for speed and less disk IO - don't annex `.gitignore` and `.gitattributes` files, but do auto-stage changes to them - configurable option to only annex files meeting certian size or From 8aa194bf806c4620f0794ff7a13084e328061cc8 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Jun 2012 15:51:34 -0400 Subject: [PATCH 08/29] update --- doc/design/assistant/inotify.mdwn | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/design/assistant/inotify.mdwn b/doc/design/assistant/inotify.mdwn index b963597c83..0f4e8d28db 100644 --- a/doc/design/assistant/inotify.mdwn +++ b/doc/design/assistant/inotify.mdwn @@ -23,10 +23,10 @@ really useful, it needs to: lots of changes are coming in) - tunable delays before adding new files, etc - coleasce related add/rm events for speed and less disk IO -- don't annex `.gitignore` and `.gitattributes` files, but do auto-stage - changes to them +- don't annex `.gitignore` and `.gitattributes` files **done** - configurable option to only annex files meeting certian size or filename criteria +- option to check files not meeting annex criteria into git directly - honor .gitignore, not adding files it excludes (difficult, probably needs my own .gitignore parser to avoid excessive running of git commands to check for ignored files) From a79aebbe2a15a05f7475b08d98279bb88ef07305 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Jun 2012 16:54:39 -0400 Subject: [PATCH 09/29] update --- doc/design/assistant/inotify.mdwn | 5 +++-- doc/design/assistant/webapp.mdwn | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/design/assistant/inotify.mdwn b/doc/design/assistant/inotify.mdwn index 0f4e8d28db..ab88210b2b 100644 --- a/doc/design/assistant/inotify.mdwn +++ b/doc/design/assistant/inotify.mdwn @@ -19,6 +19,9 @@ really useful, it needs to: - notice deleted files and stage the deletion (tricky; there's a race with add since it replaces the file with a symlink..) **done** +- Gracefully handle when the default limit of 8192 inotified directories + is exceeded. This can be tuned by root, so help the user fix it. + **done** - periodically auto-commit staged changes (avoid autocommitting when lots of changes are coming in) - tunable delays before adding new files, etc @@ -32,8 +35,6 @@ really useful, it needs to: to check for ignored files) - Possibly, when a directory is moved out of the annex location, unannex its contents. -- Gracefully handle when the default limit of 8192 inotified directories - is exceeded. This can be tuned by root, so help the user fix it. - Support OSes other than Linux; it only uses inotify currently. OSX and FreeBSD use the same mechanism, and there is a Haskell interface for it, diff --git a/doc/design/assistant/webapp.mdwn b/doc/design/assistant/webapp.mdwn index abf7b38c94..598c1ff3a4 100644 --- a/doc/design/assistant/webapp.mdwn +++ b/doc/design/assistant/webapp.mdwn @@ -23,6 +23,8 @@ The webapp is a web server that displays a shiny interface. * there could be a UI to export a file, which would make it be served up over http by the web app +* Display any relevant warning messages. One is the `inotify max_user_watches` + exceeded message. ## implementation From baf9c7102ec58b3b31ef9d6a9424cf5e749bcc04 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Jun 2012 16:56:48 -0400 Subject: [PATCH 10/29] blog for the day --- .../assistant/blog/day_3__more_races.mdwn | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 doc/design/assistant/blog/day_3__more_races.mdwn diff --git a/doc/design/assistant/blog/day_3__more_races.mdwn b/doc/design/assistant/blog/day_3__more_races.mdwn new file mode 100644 index 0000000000..9c11828420 --- /dev/null +++ b/doc/design/assistant/blog/day_3__more_races.mdwn @@ -0,0 +1,26 @@ +Today I worked on the race conditions, and fixed two of them. Both +were fixed by avoiding using `git add`, which looks at the files currently +on disk. Instead, `git annex watch` injects symlinks directly into git's +index, using `git update-index`. + +There is one bad race condition remaining. If multiple processes have a +file open for write, one can close it, and it will be added to the annex. +But then the other can still write to it. + +---- + +Getting away from race conditions for a while, I made `git annex watch` +not annex `.gitignore` and `.gitattributes` files. + +And, I made it handle running out of inotify descriptors. By default, +`/proc/sys/fs/inotify/max_user_watches` is 8192, and that's how many +directories inotify can watch. Now when it needs more, it will print +a nice message showing how to increase it with `sysctl`. + +FWIW, DropBox also uses inotify and has the same limit. It seems to not +tell the user how to fix it when it goes over. Here's what `git annex +watch` will say: + + Too many directories to watch! (Not watching ./dir4299) + Increase the limit by running: + echo fs.inotify.max_user_watches=81920 | sudo tee -a /etc/sysctl.conf; sudo sysctl -p From 9ee59f62d56ae8bdbe6a4600a6274108d89b0187 Mon Sep 17 00:00:00 2001 From: "https://www.google.com/accounts/o8/id?id=AItOawkmtR6oVColYKoU0SjBORLDGrwR10G-mKo" Date: Wed, 6 Jun 2012 22:03:29 +0000 Subject: [PATCH 11/29] Added a comment: Dropbox Inotify --- .../comment_1_d6015338f602b574a3805de5481fc45e._comment | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 doc/design/assistant/blog/day_3__more_races/comment_1_d6015338f602b574a3805de5481fc45e._comment diff --git a/doc/design/assistant/blog/day_3__more_races/comment_1_d6015338f602b574a3805de5481fc45e._comment b/doc/design/assistant/blog/day_3__more_races/comment_1_d6015338f602b574a3805de5481fc45e._comment new file mode 100644 index 0000000000..2d330f3327 --- /dev/null +++ b/doc/design/assistant/blog/day_3__more_races/comment_1_d6015338f602b574a3805de5481fc45e._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkmtR6oVColYKoU0SjBORLDGrwR10G-mKo" + nickname="Jo-Herman" + subject="Dropbox Inotify" + date="2012-06-06T22:03:29Z" + content=""" +Actually, Dropbox giver you a warning via libnotify inotify. It tends to go away too quickly to properly read though, much less actually copy down the command... +"""]] From 54f5462ad4a6f3e95a76d008f5eabbff988e7f9f Mon Sep 17 00:00:00 2001 From: "http://joeyh.name/" Date: Wed, 6 Jun 2012 23:25:57 +0000 Subject: [PATCH 12/29] Added a comment --- .../comment_2_4d6b23fc6442e0ee0303523cb69d0fba._comment | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 doc/design/assistant/blog/day_3__more_races/comment_2_4d6b23fc6442e0ee0303523cb69d0fba._comment diff --git a/doc/design/assistant/blog/day_3__more_races/comment_2_4d6b23fc6442e0ee0303523cb69d0fba._comment b/doc/design/assistant/blog/day_3__more_races/comment_2_4d6b23fc6442e0ee0303523cb69d0fba._comment new file mode 100644 index 0000000000..523e6d85ff --- /dev/null +++ b/doc/design/assistant/blog/day_3__more_races/comment_2_4d6b23fc6442e0ee0303523cb69d0fba._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joeyh.name/" + ip="4.252.8.36" + subject="comment 2" + date="2012-06-06T23:25:57Z" + content=""" +When I work on the [[webapp]], I'm planning to make it display this warning, and any other similar warning messages that might come up. +"""]] From c7efb2888cb612ea7635d8840d22ab1b38a8cb80 Mon Sep 17 00:00:00 2001 From: "https://www.google.com/accounts/o8/id?id=AItOawnBJ6Dv1glxzzi4qIzGFNa6F-mfHIvv9Ck" Date: Thu, 7 Jun 2012 03:43:19 +0000 Subject: [PATCH 13/29] Added a comment: Wording --- ...ent_3_03f5b2344c2a47dea60086f217d60f9b._comment | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 doc/design/assistant/blog/day_3__more_races/comment_3_03f5b2344c2a47dea60086f217d60f9b._comment diff --git a/doc/design/assistant/blog/day_3__more_races/comment_3_03f5b2344c2a47dea60086f217d60f9b._comment b/doc/design/assistant/blog/day_3__more_races/comment_3_03f5b2344c2a47dea60086f217d60f9b._comment new file mode 100644 index 0000000000..92f5dcbd62 --- /dev/null +++ b/doc/design/assistant/blog/day_3__more_races/comment_3_03f5b2344c2a47dea60086f217d60f9b._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawnBJ6Dv1glxzzi4qIzGFNa6F-mfHIvv9Ck" + nickname="Jim" + subject="Wording" + date="2012-06-07T03:43:19Z" + content=""" +For the unfamiliar, it's hard to tell if a command like that would persist. I'd suggest being as clear as possible, e.g.: + + Increase the limit for now by running: + sudo sysctl fs.inotify.max_user_watches=81920 + Increase the limit now and automatically at every boot by running: + echo fs.inotify.max_user_watches=81920 | sudo tee -a /etc/sysctl.conf; sudo sysctl -p + +"""]] From 2478aca3c531d6d60fed87d9aa5eb42f6dbe6517 Mon Sep 17 00:00:00 2001 From: "http://joeyh.name/" Date: Thu, 7 Jun 2012 04:48:15 +0000 Subject: [PATCH 14/29] Added a comment --- .../comment_4_860e90e989ec022100001c65e353a91e._comment | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 doc/design/assistant/blog/day_3__more_races/comment_4_860e90e989ec022100001c65e353a91e._comment diff --git a/doc/design/assistant/blog/day_3__more_races/comment_4_860e90e989ec022100001c65e353a91e._comment b/doc/design/assistant/blog/day_3__more_races/comment_4_860e90e989ec022100001c65e353a91e._comment new file mode 100644 index 0000000000..05b601eafe --- /dev/null +++ b/doc/design/assistant/blog/day_3__more_races/comment_4_860e90e989ec022100001c65e353a91e._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joeyh.name/" + ip="4.252.8.36" + subject="comment 4" + date="2012-06-07T04:48:15Z" + content=""" +Good thought Jim. I've done something like that. +"""]] From 8408a91082f440e139884117b2698bb8e0bd3fe9 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Jun 2012 10:37:06 -0400 Subject: [PATCH 15/29] fixie --- doc/tips/using_box.com_as_a_special_remote.mdwn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/tips/using_box.com_as_a_special_remote.mdwn b/doc/tips/using_box.com_as_a_special_remote.mdwn index 917c7a93bd..cafbc033c4 100644 --- a/doc/tips/using_box.com_as_a_special_remote.mdwn +++ b/doc/tips/using_box.com_as_a_special_remote.mdwn @@ -24,7 +24,7 @@ With a little setup, git-annex can use Box as a * Create `~/.davfs2/davfs2.conf` with some important settings: mkdir ~/.davfs2/ - echo use_locks 0 >> ~/.davfs2/davfs2.conf + echo use_locks 0 > ~/.davfs2/davfs2.conf echo cache_size 1 >> ~/.davfs2/davfs2.conf echo delay_upload 0 >> ~/.davfs2/davfs2.conf From 4d1c114e4d27ae339f6fb408d398945fa68c2435 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Jun 2012 11:16:48 -0400 Subject: [PATCH 16/29] initremote: Automatically describe a remote when creating it. This ensures that all special remotes show up in git annex status. Before, a special remote that was not manually described, and was not a current git remote, did not show up there, although initremote did list it. --- Command/InitRemote.hs | 2 ++ Remote.hs | 7 ++----- debian/changelog | 1 + 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Command/InitRemote.hs b/Command/InitRemote.hs index 698d604552..a78505a197 100644 --- a/Command/InitRemote.hs +++ b/Command/InitRemote.hs @@ -15,6 +15,7 @@ import qualified Remote import qualified Logs.Remote import qualified Types.Remote as R import Annex.UUID +import Logs.UUID def :: [Command] def = [command "initremote" @@ -60,6 +61,7 @@ findByName name = do where generate = do uuid <- liftIO genUUID + describeUUID uuid name return (uuid, M.insert nameKey name M.empty) findByName' :: String -> M.Map UUID R.RemoteConfig -> Maybe (UUID, R.RemoteConfig) diff --git a/Remote.hs b/Remote.hs index e9e66990c5..839c6ddb09 100644 --- a/Remote.hs +++ b/Remote.hs @@ -54,9 +54,9 @@ remoteMap :: (Remote -> a) -> Annex (M.Map UUID a) remoteMap c = M.fromList . map (\r -> (uuid r, c r)) . filter (\r -> uuid r /= NoUUID) <$> remoteList -{- Map of UUIDs and their descriptions. +{- Map of UUIDs of remotes and their descriptions. - The names of Remotes are added to suppliment any description that has - - been set for a repository. -} + - been set for a repository. -} uuidDescriptions :: Annex (M.Map UUID String) uuidDescriptions = M.unionWith addName <$> uuidMap <*> remoteMap name @@ -101,9 +101,6 @@ nameToUUID n = byName' n >>= go double (a, _) = (a, a) {- Pretty-prints a list of UUIDs of remotes, for human display. - - - - Shows descriptions from the uuid log, falling back to remote names, - - as some remotes may not be in the uuid log. - - When JSON is enabled, also generates a machine-readable description - of the UUIDs. -} diff --git a/debian/changelog b/debian/changelog index 9a010327df..8a734e0aa7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -2,6 +2,7 @@ git-annex (3.20120606) UNRELEASED; urgency=low * add: Prevent (most) modifications from being made to a file while it is being added to the annex. + * initremote: Automatically describe a remote when creating it. -- Joey Hess Tue, 05 Jun 2012 20:25:51 -0400 From 021325ce7107be71e067f964e4a57864eac59ea6 Mon Sep 17 00:00:00 2001 From: "https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" Date: Thu, 7 Jun 2012 20:22:58 +0000 Subject: [PATCH 17/29] Added a comment --- .../comment_3_05223be50c889b2ed6bc4abf74116450._comment | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 doc/design/assistant/comment_3_05223be50c889b2ed6bc4abf74116450._comment diff --git a/doc/design/assistant/comment_3_05223be50c889b2ed6bc4abf74116450._comment b/doc/design/assistant/comment_3_05223be50c889b2ed6bc4abf74116450._comment new file mode 100644 index 0000000000..a78fa33439 --- /dev/null +++ b/doc/design/assistant/comment_3_05223be50c889b2ed6bc4abf74116450._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 3" + date="2012-06-07T20:22:55Z" + content=""" +I'd agree getting it into the main distros is the way to go, if you need OSX binaries, I could volunteer to setup an autobuilder to generate binaries for OSX users, however it would rely on users to have macports with the correct ports installed to use it (things like coreutils etc...) + +"""]] From b778b9b345dc9686a2721fa946d46313bfd66876 Mon Sep 17 00:00:00 2001 From: "https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" Date: Thu, 7 Jun 2012 20:23:09 +0000 Subject: [PATCH 18/29] Added a comment --- .../comment_4_5643ebf22a0f3764a88f5d2e66562e59._comment | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 doc/design/assistant/comment_4_5643ebf22a0f3764a88f5d2e66562e59._comment diff --git a/doc/design/assistant/comment_4_5643ebf22a0f3764a88f5d2e66562e59._comment b/doc/design/assistant/comment_4_5643ebf22a0f3764a88f5d2e66562e59._comment new file mode 100644 index 0000000000..73b01947c0 --- /dev/null +++ b/doc/design/assistant/comment_4_5643ebf22a0f3764a88f5d2e66562e59._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 4" + date="2012-06-07T20:23:08Z" + content=""" +I'd agree getting it into the main distros is the way to go, if you need OSX binaries, I could volunteer to setup an autobuilder to generate binaries for OSX users, however it would rely on users to have macports with the correct ports installed to use it (things like coreutils etc...) + +"""]] From 3f03e58dc681a82fef156d97316ac8035304e306 Mon Sep 17 00:00:00 2001 From: "https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" Date: Thu, 7 Jun 2012 20:25:14 +0000 Subject: [PATCH 19/29] removed --- .../comment_4_5643ebf22a0f3764a88f5d2e66562e59._comment | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 doc/design/assistant/comment_4_5643ebf22a0f3764a88f5d2e66562e59._comment diff --git a/doc/design/assistant/comment_4_5643ebf22a0f3764a88f5d2e66562e59._comment b/doc/design/assistant/comment_4_5643ebf22a0f3764a88f5d2e66562e59._comment deleted file mode 100644 index 73b01947c0..0000000000 --- a/doc/design/assistant/comment_4_5643ebf22a0f3764a88f5d2e66562e59._comment +++ /dev/null @@ -1,9 +0,0 @@ -[[!comment format=mdwn - username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" - nickname="Jimmy" - subject="comment 4" - date="2012-06-07T20:23:08Z" - content=""" -I'd agree getting it into the main distros is the way to go, if you need OSX binaries, I could volunteer to setup an autobuilder to generate binaries for OSX users, however it would rely on users to have macports with the correct ports installed to use it (things like coreutils etc...) - -"""]] From 109bd9c08b2f7d26720ebb26ed7e165d38accb11 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Jun 2012 21:37:59 -0400 Subject: [PATCH 20/29] blog for the day --- doc/design/assistant/blog/day_4__speed.mdwn | 46 +++++++++++++++++++++ doc/design/assistant/inotify.mdwn | 3 ++ 2 files changed, 49 insertions(+) create mode 100644 doc/design/assistant/blog/day_4__speed.mdwn diff --git a/doc/design/assistant/blog/day_4__speed.mdwn b/doc/design/assistant/blog/day_4__speed.mdwn new file mode 100644 index 0000000000..151f4af2a9 --- /dev/null +++ b/doc/design/assistant/blog/day_4__speed.mdwn @@ -0,0 +1,46 @@ +Only had a few hours to work today, but my current focus is speed, and I +have indeed sped up parts of `git annex watch`. + +One thing folks don't realize about git is that despite a rep for being +fast, it can be rather slow in one area: Writing the index. You don't +notice it until you have a lot of files, and the index gets big. So I've +put a lot of effort into git-annex in the past to avoid writing the index +repeatedly, and queue up big index changes that can happen all at once. The +new `git annex watch` was not able to use that queue. Today I reworked the +queue machinery to support the types of direct index writes it needs, and +now repeated index writes are eliminated. + +... Eliminated too far, it turns out, since it doesn't yet *ever* flush +that queue until shutdown! So the next step here will be to have a worker +thread that wakes up periodically, flushes the queue, and autocommits. +There's lots of room here for smart behavior. Like, if a lot of changes are +being made close together, wait for them to die down before committing. Or, +if it's been idle and a single file appears, commit it immediatly, since +this is probably something the user wants synced out right away. I'll start +with something stupid and then add the smarts. + +(BTW, in all my years of programming, I have avoided threads like the nasty +bug-prone plague they are. Here I already have three threads, and am going to +add probably 4 or 5 more before I'm done with the git annex assistant. So +far, it's working well -- I give credit to Haskell for making it easy to +manage state in ways that make it possible to reason about how the threads +will interact.) + +What about the races I've been stressing over? Well, I have an ulterior +motive in speeding up `git annex watch`, and that's to also be able to +**slow it down**. Running in slow-mo makes it easy to try things that might +cause a race and watch how it reacts. I'll be using this technique when +I circle back around to dealing with the races. + +Another tricky speed problem came up today that I also need to fix. On +startup, `git annex watch` scans the whole tree to find files that have +been added or moved etc while it was not running, and take care of them. +Currently, this scan involves re-staging every symlink in the tree. That's +slow! I need to find a way to avoid re-staging symlinks; I may use `git +cat-file` to check if the currently staged symlink is correct, or I may +come up with some better and faster solution. Sleeping on this problem. + +---- + +Oh yeah, I also found one more race bug today. It only happens at startup +and could only make it miss staging file deletions. diff --git a/doc/design/assistant/inotify.mdwn b/doc/design/assistant/inotify.mdwn index ab88210b2b..7cdde33ac6 100644 --- a/doc/design/assistant/inotify.mdwn +++ b/doc/design/assistant/inotify.mdwn @@ -108,3 +108,6 @@ Many races need to be dealt with by this code. Here are some of them. Not a problem; The removal event removes the old file from the index, and the add event adds the new one. +* At startup, `git add --update` is run, to notice deleted files. + Then inotify starts up. Files deleted in between won't have their + removals staged. From 4f6b522d8cf25993bc5827b6bbce9b14a1ffdf2d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Jun 2012 21:40:31 -0400 Subject: [PATCH 21/29] update --- doc/design/assistant/blog/day_4__speed.mdwn | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/design/assistant/blog/day_4__speed.mdwn b/doc/design/assistant/blog/day_4__speed.mdwn index 151f4af2a9..badc6b7b18 100644 --- a/doc/design/assistant/blog/day_4__speed.mdwn +++ b/doc/design/assistant/blog/day_4__speed.mdwn @@ -13,6 +13,7 @@ now repeated index writes are eliminated. ... Eliminated too far, it turns out, since it doesn't yet *ever* flush that queue until shutdown! So the next step here will be to have a worker thread that wakes up periodically, flushes the queue, and autocommits. +(This will, in fact, be the start of the [[syncing]] phase of my roadmap!) There's lots of room here for smart behavior. Like, if a lot of changes are being made close together, wait for them to die down before committing. Or, if it's been idle and a single file appears, commit it immediatly, since From 63290903aa5c92013dd7383464fe3c6061aa835d Mon Sep 17 00:00:00 2001 From: "http://joeyh.name/" Date: Fri, 8 Jun 2012 01:56:52 +0000 Subject: [PATCH 22/29] Added a comment --- .../comment_4_fbbd93b55803ae21e6ba4b6568c2fafd._comment | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 doc/design/assistant/comment_4_fbbd93b55803ae21e6ba4b6568c2fafd._comment diff --git a/doc/design/assistant/comment_4_fbbd93b55803ae21e6ba4b6568c2fafd._comment b/doc/design/assistant/comment_4_fbbd93b55803ae21e6ba4b6568c2fafd._comment new file mode 100644 index 0000000000..cd3b5aaef7 --- /dev/null +++ b/doc/design/assistant/comment_4_fbbd93b55803ae21e6ba4b6568c2fafd._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="http://joeyh.name/" + subject="comment 4" + date="2012-06-08T01:56:52Z" + content=""" +I always appreciate your OSX work Jimmy... + +Could it be put into macports? +"""]] From 12afa8fb560ae877dcf8b78d352806e50e1f215e Mon Sep 17 00:00:00 2001 From: "https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" Date: Fri, 8 Jun 2012 07:22:34 +0000 Subject: [PATCH 23/29] Added a comment --- ...mment_5_f4e9af3fed6c27e8ff39badb9794064d._comment | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 doc/design/assistant/comment_5_f4e9af3fed6c27e8ff39badb9794064d._comment diff --git a/doc/design/assistant/comment_5_f4e9af3fed6c27e8ff39badb9794064d._comment b/doc/design/assistant/comment_5_f4e9af3fed6c27e8ff39badb9794064d._comment new file mode 100644 index 0000000000..bf8d9709e8 --- /dev/null +++ b/doc/design/assistant/comment_5_f4e9af3fed6c27e8ff39badb9794064d._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 5" + date="2012-06-08T07:22:34Z" + content=""" +In relation to macports, I often found that haskell in macports are often behind other distros, and I'm not willing to put much effort into maintaining or updating those ports. I found that to build git-annex, installing macports manually and then installing haskell-platform from the upstream to be the best way to get the most up to date dependancies for git-annex. + +fyi in macports ghc is at version 6.10.4 and haskell platform is at version 2009.2, so there are a significant number of ports to update. + +I was thinking about this a bit more and I reckon it might be easier to try and build a self contained .pkg package and have all the needed binaries in a .app styled package, that would work well when the webapp comes along. I will take a look at it in a week or two (currently moving house so I dont have much time) +"""]] From c388e90dc9f0f09ea5ef639cb4c9aabb9568ae0e Mon Sep 17 00:00:00 2001 From: "https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" Date: Fri, 8 Jun 2012 15:21:21 +0000 Subject: [PATCH 24/29] Added a comment --- ...comment_6_c7ad07cade1f44f9a8b61f92225bb9c5._comment | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 doc/design/assistant/comment_6_c7ad07cade1f44f9a8b61f92225bb9c5._comment diff --git a/doc/design/assistant/comment_6_c7ad07cade1f44f9a8b61f92225bb9c5._comment b/doc/design/assistant/comment_6_c7ad07cade1f44f9a8b61f92225bb9c5._comment new file mode 100644 index 0000000000..9fa66d6d31 --- /dev/null +++ b/doc/design/assistant/comment_6_c7ad07cade1f44f9a8b61f92225bb9c5._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus" + nickname="Jimmy" + subject="comment 6" + date="2012-06-08T15:21:18Z" + content=""" +It's not much for now... but see I'm ignoring the debian-stable and pristine-tar branches for now, as I am just building and testing on osx 10.7. + +Hope the autobuilder will help you develop the OSX side of things without having direct access to an osx machine! I will try and get gitbuilder to spit out appropriately named tarballs of the compiled binaries in a few days when I have more time. +"""]] From 3141c36f2e5cefb67eb200f604ac5b38a04b3e3d Mon Sep 17 00:00:00 2001 From: "http://joeyh.name/" Date: Sat, 9 Jun 2012 18:07:51 +0000 Subject: [PATCH 25/29] Added a comment --- .../comment_7_609d38e993267195a80fecd84c93d1e2._comment | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 doc/design/assistant/comment_7_609d38e993267195a80fecd84c93d1e2._comment diff --git a/doc/design/assistant/comment_7_609d38e993267195a80fecd84c93d1e2._comment b/doc/design/assistant/comment_7_609d38e993267195a80fecd84c93d1e2._comment new file mode 100644 index 0000000000..6685c6548e --- /dev/null +++ b/doc/design/assistant/comment_7_609d38e993267195a80fecd84c93d1e2._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joeyh.name/" + ip="4.153.8.126" + subject="comment 7" + date="2012-06-09T18:07:51Z" + content=""" +Thanks, that's already been useful to me. You might as well skip the debian-specific \"bpo\" tags too. +"""]] From 6c8f76ca28859b1e025189e4f669508e2c516a9b Mon Sep 17 00:00:00 2001 From: "http://rmunn.myopenid.com/" Date: Sat, 9 Jun 2012 18:54:14 +0000 Subject: [PATCH 26/29] New idea: using youtube-dl to download video URLs --- ...cial-case_handling_of_Youtube_URLs_in_Web_special_remote.mdwn | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/todo/wishlist:_special-case_handling_of_Youtube_URLs_in_Web_special_remote.mdwn diff --git a/doc/todo/wishlist:_special-case_handling_of_Youtube_URLs_in_Web_special_remote.mdwn b/doc/todo/wishlist:_special-case_handling_of_Youtube_URLs_in_Web_special_remote.mdwn new file mode 100644 index 0000000000..a368c98a01 --- /dev/null +++ b/doc/todo/wishlist:_special-case_handling_of_Youtube_URLs_in_Web_special_remote.mdwn @@ -0,0 +1 @@ +The [Web special remote](http://git-annex.branchable.com/special_remotes/web/) could possibly be improved by detecting when URLs reference a Youtube video page and using [youtube-dl](http://rg3.github.com/youtube-dl/) instead of wget to download the page. Youtube-dl can also handle several other video sites such as vimeo.com and blip.tv, so if this idea were to be implemented, it might make sense to borrow the regular expressions that youtube-dl uses to identify video URLs. A quick grep through the youtube-dl source for the identifier _VALID_URL should find those regexes (in Python's regex format). From affd52be0e3e2dae1ffa633551cff919a44c624a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 9 Jun 2012 17:07:40 -0400 Subject: [PATCH 27/29] response --- ...andling_of_Youtube_URLs_in_Web_special_remote.mdwn | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/doc/todo/wishlist:_special-case_handling_of_Youtube_URLs_in_Web_special_remote.mdwn b/doc/todo/wishlist:_special-case_handling_of_Youtube_URLs_in_Web_special_remote.mdwn index a368c98a01..e11989e521 100644 --- a/doc/todo/wishlist:_special-case_handling_of_Youtube_URLs_in_Web_special_remote.mdwn +++ b/doc/todo/wishlist:_special-case_handling_of_Youtube_URLs_in_Web_special_remote.mdwn @@ -1 +1,12 @@ The [Web special remote](http://git-annex.branchable.com/special_remotes/web/) could possibly be improved by detecting when URLs reference a Youtube video page and using [youtube-dl](http://rg3.github.com/youtube-dl/) instead of wget to download the page. Youtube-dl can also handle several other video sites such as vimeo.com and blip.tv, so if this idea were to be implemented, it might make sense to borrow the regular expressions that youtube-dl uses to identify video URLs. A quick grep through the youtube-dl source for the identifier _VALID_URL should find those regexes (in Python's regex format). + +> This is something I've thought about doing for a while.. +> Two things I have not figured out: +> +> * Seems that this should really be user-configurable or a plugin system, +> to handle more than just this one case. +> * Youtube-dl breaks from time to time, I really trust these urls a lot +> less than regular urls. Perhaps per-url trust levels are called for by +> this. +> +> --[[Joey]] From 3a213ced1e6167f3b595fb1327954d147fd5a34d Mon Sep 17 00:00:00 2001 From: "https://www.google.com/accounts/o8/id?id=AItOawldKnauegZulM7X6JoHJs7Gd5PnDjcgx-E" Date: Sat, 9 Jun 2012 22:34:31 +0000 Subject: [PATCH 28/29] Added a comment: open source? --- .../comment_1_bf3c9c33cc0dea5eaeb6f2af110b924b._comment | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 doc/design/assistant/blog/day_4__speed/comment_1_bf3c9c33cc0dea5eaeb6f2af110b924b._comment diff --git a/doc/design/assistant/blog/day_4__speed/comment_1_bf3c9c33cc0dea5eaeb6f2af110b924b._comment b/doc/design/assistant/blog/day_4__speed/comment_1_bf3c9c33cc0dea5eaeb6f2af110b924b._comment new file mode 100644 index 0000000000..fb5b95490f --- /dev/null +++ b/doc/design/assistant/blog/day_4__speed/comment_1_bf3c9c33cc0dea5eaeb6f2af110b924b._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawldKnauegZulM7X6JoHJs7Gd5PnDjcgx-E" + nickname="Matt" + subject="open source?" + date="2012-06-09T22:34:30Z" + content=""" +Are you publishing the source code for git-annex assistant somewhere? +"""]] From 6a71a9729fbe02aa6c179bb6c617278257edf71c Mon Sep 17 00:00:00 2001 From: "http://joeyh.name/" Date: Sat, 9 Jun 2012 23:01:29 +0000 Subject: [PATCH 29/29] Added a comment --- .../comment_2_33aba4c9abaa3e6a05a2c87ab7df9d0e._comment | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 doc/design/assistant/blog/day_4__speed/comment_2_33aba4c9abaa3e6a05a2c87ab7df9d0e._comment diff --git a/doc/design/assistant/blog/day_4__speed/comment_2_33aba4c9abaa3e6a05a2c87ab7df9d0e._comment b/doc/design/assistant/blog/day_4__speed/comment_2_33aba4c9abaa3e6a05a2c87ab7df9d0e._comment new file mode 100644 index 0000000000..1fcc197ab7 --- /dev/null +++ b/doc/design/assistant/blog/day_4__speed/comment_2_33aba4c9abaa3e6a05a2c87ab7df9d0e._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joeyh.name/" + ip="4.153.8.126" + subject="comment 2" + date="2012-06-09T23:01:29Z" + content=""" +Yes, it's in [[git|download]] with the rest of git-annex. Currently in the `watch` branch. +"""]]