autocorrection

git-annex (but not git-annex-shell) supports the git help.autocorrect
configuration setting, doing fuzzy matching using the restricted
Damerau-Levenshtein edit distance, just as git does. This adds a build
dependency on the haskell edit-distance library.
This commit is contained in:
Joey Hess 2012-04-12 15:34:41 -04:00
parent fa45175210
commit 52a158a7c6
8 changed files with 115 additions and 21 deletions

View file

@ -1,6 +1,6 @@
{- git-annex command line parsing and dispatch {- git-annex command line parsing and dispatch
- -
- Copyright 2010 Joey Hess <joey@kitenet.net> - Copyright 2010-2012 Joey Hess <joey@kitenet.net>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -21,6 +21,7 @@ import qualified Annex
import qualified Annex.Queue import qualified Annex.Queue
import qualified Git import qualified Git
import qualified Git.Command import qualified Git.Command
import qualified Git.AutoCorrect
import Annex.Content import Annex.Content
import Annex.Ssh import Annex.Ssh
import Command import Command
@ -29,8 +30,8 @@ type Params = [String]
type Flags = [Annex ()] type Flags = [Annex ()]
{- Runs the passed command line. -} {- Runs the passed command line. -}
dispatch :: Params -> [Command] -> [Option] -> String -> IO Git.Repo -> IO () dispatch :: Bool -> Params -> [Command] -> [Option] -> String -> IO Git.Repo -> IO ()
dispatch args cmds commonoptions header getgitrepo = do dispatch fuzzyok allargs allcmds commonoptions header getgitrepo = do
setupConsole setupConsole
r <- E.try getgitrepo :: IO (Either E.SomeException Git.Repo) r <- E.try getgitrepo :: IO (Either E.SomeException Git.Repo)
case r of case r of
@ -38,30 +39,46 @@ dispatch args cmds commonoptions header getgitrepo = do
Right g -> do Right g -> do
state <- Annex.new g state <- Annex.new g
(actions, state') <- Annex.run state $ do (actions, state') <- Annex.run state $ do
checkfuzzy
sequence_ flags sequence_ flags
prepCommand cmd params prepCommand cmd params
tryRun state' cmd $ [startup] ++ actions ++ [shutdown $ cmdoneshot cmd] tryRun state' cmd $ [startup] ++ actions ++ [shutdown $ cmdoneshot cmd]
where where
(flags, cmd, params) = parseCmd args cmds commonoptions header err msg = msg ++ "\n\n" ++ usage header allcmds commonoptions
cmd = Prelude.head cmds
(cmds, name, args) = findCmd fuzzyok allargs allcmds err
(flags, params) = getOptCmd args cmd commonoptions err
checkfuzzy = when (length cmds > 1) $
inRepo $ Git.AutoCorrect.prepare name cmdname cmds
{- Parses command line, and returns actions to run to configure flags, {- Parses command line params far enough to find the Command to run, and
- the Command being run, and the remaining parameters for the command. -} - returns the remaining params.
parseCmd :: Params -> [Command] -> [Option] -> String -> (Flags, Command, Params) - Does fuzzy matching if necessary, which may result in multiple Commands. -}
parseCmd argv cmds commonoptions header findCmd :: Bool -> Params -> [Command] -> (String -> String) -> ([Command], String, Params)
| isNothing name = err "missing command" findCmd fuzzyok argv cmds err
| null matches = err $ "unknown command " ++ fromJust name | isNothing name = error $ err "missing command"
| otherwise = check $ getOpt Permute (commonoptions ++ cmdoptions cmd) args | not (null exactcmds) = (exactcmds, fromJust name, args)
| fuzzyok && not (null inexactcmds) = (inexactcmds, fromJust name, args)
| otherwise = error $ err $ "unknown command " ++ fromJust name
where where
(name, args) = findname argv [] (name, args) = findname argv []
findname [] c = (Nothing, reverse c) findname [] c = (Nothing, reverse c)
findname (a:as) c findname (a:as) c
| "-" `isPrefixOf` a = findname as (a:c) | "-" `isPrefixOf` a = findname as (a:c)
| otherwise = (Just a, reverse c ++ as) | otherwise = (Just a, reverse c ++ as)
matches = filter (\c -> name == Just (cmdname c)) cmds exactcmds = filter (\c -> name == Just (cmdname c)) cmds
cmd = Prelude.head matches inexactcmds = case name of
check (flags, rest, []) = (flags, cmd, rest) Nothing -> []
check (_, _, errs) = err $ concat errs Just n -> Git.AutoCorrect.fuzzymatches n cmdname cmds
err msg = error $ msg ++ "\n\n" ++ usage header cmds commonoptions
{- Parses command line options, and returns actions to run to configure flags
- and the remaining parameters for the command. -}
getOptCmd :: Params -> Command -> [Option] -> (String -> String) -> (Flags, Params)
getOptCmd argv cmd commonoptions err = check $
getOpt Permute (commonoptions ++ cmdoptions cmd) argv
where
check (flags, rest, []) = (flags, rest)
check (_, _, errs) = error $ err $ concat errs
{- Runs a list of Annex actions. Catches IO errors and continues {- Runs a list of Annex actions. Catches IO errors and continues
- (but explicitly thrown errors terminate the whole command). - (but explicitly thrown errors terminate the whole command).

71
Git/AutoCorrect.hs Normal file
View file

@ -0,0 +1,71 @@
{- git autocorrection using Damerau-Levenshtein edit distance
-
- Copyright 2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Git.AutoCorrect where
import Common
import Git.Types
import qualified Git.Config
import Text.EditDistance
import Control.Concurrent
{- These are the same cost values as used in git. -}
gitEditCosts :: EditCosts
gitEditCosts = EditCosts
{ deletionCosts = ConstantCost 4
, insertionCosts = ConstantCost 1
, substitutionCosts = ConstantCost 2
, transpositionCosts = ConstantCost 0
}
{- Git's source calls this "an empirically derived magic number" -}
similarityFloor :: Int
similarityFloor = 7
{- Finds inexact matches for the input amoung the choices.
- Returns an ordered list of good enough matches, or an empty list if
- nothing matches well. -}
fuzzymatches :: String -> (c -> String) -> [c] -> [c]
fuzzymatches input showchoice choices = fst $ unzip $
sortBy comparecost $ filter similarEnough $ zip choices costs
where
distance v = restrictedDamerauLevenshteinDistance gitEditCosts v input
costs = map (distance . showchoice) choices
comparecost a b = compare (snd a) (snd b)
similarEnough (_, cst) = cst < similarityFloor
{- Takes action based on git's autocorrect configuration, in preparation for
- an autocorrected command being run. -}
prepare :: String -> (c -> String) -> [c] -> Repo -> IO ()
prepare input showmatch matches r =
case readish $ Git.Config.get "help.autocorrect" "0" r of
Just n
| n == 0 -> list
| n < 0 -> warn
| otherwise -> sleep n
Nothing -> list
where
list = error $ unlines $
[ "Unknown command '" ++ input ++ "'"
, ""
, "Did you mean one of these?"
] ++ map (\m -> "\t" ++ showmatch m) matches
warn =
hPutStr stderr $ unlines
[ "WARNING: You called a command named '" ++
input ++ "', which does not exist."
, "Continuing under the assumption that you meant '" ++
showmatch (Prelude.head matches) ++ "'"
]
sleep n = do
warn
hPutStrLn stderr $ unwords
[ "in"
, show (fromIntegral n / 10 :: Float)
, "seconds automatically..."]
threadDelay (n * 100000) -- deciseconds to microseconds

View file

@ -131,4 +131,4 @@ header :: String
header = "Usage: git-annex command [option ..]" header = "Usage: git-annex command [option ..]"
run :: [String] -> IO () run :: [String] -> IO ()
run args = dispatch args cmds options header Git.Construct.fromCurrent run args = dispatch True args cmds options header Git.Construct.fromCurrent

View file

@ -83,7 +83,7 @@ builtins = map cmdname cmds
builtin :: String -> String -> [String] -> IO () builtin :: String -> String -> [String] -> IO ()
builtin cmd dir params = do builtin cmd dir params = do
checkNotReadOnly cmd checkNotReadOnly cmd
dispatch (cmd : filterparams params) cmds options header $ dispatch False (cmd : filterparams params) cmds options header $
Git.Construct.repoAbsPath dir >>= Git.Construct.fromAbsPath Git.Construct.repoAbsPath dir >>= Git.Construct.fromAbsPath
external :: [String] -> IO () external :: [String] -> IO ()

4
debian/changelog vendored
View file

@ -3,6 +3,10 @@ git-annex (3.20120407) UNRELEASED; urgency=low
* bugfix: Adding a dotfile also caused all non-dotfiles to be added. * bugfix: Adding a dotfile also caused all non-dotfiles to be added.
* bup: Properly handle key names with spaces or other things that are * bup: Properly handle key names with spaces or other things that are
not legal git refs. not legal git refs.
* git-annex (but not git-annex-shell) supports the git help.autocorrect
configuration setting, doing fuzzy matching using the restricted
Damerau-Levenshtein edit distance, just as git does. This adds a build
dependency on the haskell edit-distance library.
-- Joey Hess <joeyh@debian.org> Sun, 08 Apr 2012 12:23:42 -0400 -- Joey Hess <joeyh@debian.org> Sun, 08 Apr 2012 12:23:42 -0400

1
debian/control vendored
View file

@ -19,6 +19,7 @@ Build-Depends:
libghc-json-dev, libghc-json-dev,
libghc-ifelse-dev, libghc-ifelse-dev,
libghc-bloomfilter-dev, libghc-bloomfilter-dev,
libghc-edit-distance-dev,
ikiwiki, ikiwiki,
perlmagick, perlmagick,
git, git,

View file

@ -36,6 +36,7 @@ To build and use git-annex, you will need:
* [json](http://hackage.haskell.org/package/json) * [json](http://hackage.haskell.org/package/json)
* [IfElse](http://hackage.haskell.org/package/IfElse) * [IfElse](http://hackage.haskell.org/package/IfElse)
* [bloomfilter](http://hackage.haskell.org/package/bloomfilter) * [bloomfilter](http://hackage.haskell.org/package/bloomfilter)
* [edit-distance](http://hackage.haskell.org/package/edit-distance)
* Shell commands * Shell commands
* [git](http://git-scm.com/) * [git](http://git-scm.com/)
* [uuid](http://www.ossp.org/pkg/lib/uuid/) * [uuid](http://www.ossp.org/pkg/lib/uuid/)

View file

@ -1,5 +1,5 @@
Name: git-annex Name: git-annex
Version: 3.20120406 Version: 3.20120407
Cabal-Version: >= 1.8 Cabal-Version: >= 1.8
License: GPL License: GPL
Maintainer: Joey Hess <joey@kitenet.net> Maintainer: Joey Hess <joey@kitenet.net>
@ -32,7 +32,7 @@ Executable git-annex
unix, containers, utf8-string, network, mtl, bytestring, old-locale, time, unix, containers, utf8-string, network, mtl, bytestring, old-locale, time,
pcre-light, extensible-exceptions, dataenc, SHA, process, hS3, json, HTTP, pcre-light, extensible-exceptions, dataenc, SHA, process, hS3, json, HTTP,
base >= 4.5, base < 5, monad-control, transformers-base, lifted-base, base >= 4.5, base < 5, monad-control, transformers-base, lifted-base,
IfElse, text, QuickCheck >= 2.1, bloomfilter IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance
Other-Modules: Utility.Touch Other-Modules: Utility.Touch
C-Sources: Utility/diskfree.c C-Sources: Utility/diskfree.c