autocorrection

git-annex (but not git-annex-shell) supports the git help.autocorrect
configuration setting, doing fuzzy matching using the restricted
Damerau-Levenshtein edit distance, just as git does. This adds a build
dependency on the haskell edit-distance library.
This commit is contained in:
Joey Hess 2012-04-12 15:34:41 -04:00
parent fa45175210
commit 52a158a7c6
8 changed files with 115 additions and 21 deletions

View file

@ -1,6 +1,6 @@
{- git-annex command line parsing and dispatch
-
- Copyright 2010 Joey Hess <joey@kitenet.net>
- Copyright 2010-2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -21,6 +21,7 @@ import qualified Annex
import qualified Annex.Queue
import qualified Git
import qualified Git.Command
import qualified Git.AutoCorrect
import Annex.Content
import Annex.Ssh
import Command
@ -29,8 +30,8 @@ type Params = [String]
type Flags = [Annex ()]
{- Runs the passed command line. -}
dispatch :: Params -> [Command] -> [Option] -> String -> IO Git.Repo -> IO ()
dispatch args cmds commonoptions header getgitrepo = do
dispatch :: Bool -> Params -> [Command] -> [Option] -> String -> IO Git.Repo -> IO ()
dispatch fuzzyok allargs allcmds commonoptions header getgitrepo = do
setupConsole
r <- E.try getgitrepo :: IO (Either E.SomeException Git.Repo)
case r of
@ -38,30 +39,46 @@ dispatch args cmds commonoptions header getgitrepo = do
Right g -> do
state <- Annex.new g
(actions, state') <- Annex.run state $ do
checkfuzzy
sequence_ flags
prepCommand cmd params
tryRun state' cmd $ [startup] ++ actions ++ [shutdown $ cmdoneshot cmd]
tryRun state' cmd $ [startup] ++ actions ++ [shutdown $ cmdoneshot cmd]
where
(flags, cmd, params) = parseCmd args cmds commonoptions header
err msg = msg ++ "\n\n" ++ usage header allcmds commonoptions
cmd = Prelude.head cmds
(cmds, name, args) = findCmd fuzzyok allargs allcmds err
(flags, params) = getOptCmd args cmd commonoptions err
checkfuzzy = when (length cmds > 1) $
inRepo $ Git.AutoCorrect.prepare name cmdname cmds
{- Parses command line, and returns actions to run to configure flags,
- the Command being run, and the remaining parameters for the command. -}
parseCmd :: Params -> [Command] -> [Option] -> String -> (Flags, Command, Params)
parseCmd argv cmds commonoptions header
| isNothing name = err "missing command"
| null matches = err $ "unknown command " ++ fromJust name
| otherwise = check $ getOpt Permute (commonoptions ++ cmdoptions cmd) args
{- Parses command line params far enough to find the Command to run, and
- returns the remaining params.
- Does fuzzy matching if necessary, which may result in multiple Commands. -}
findCmd :: Bool -> Params -> [Command] -> (String -> String) -> ([Command], String, Params)
findCmd fuzzyok argv cmds err
| isNothing name = error $ err "missing command"
| not (null exactcmds) = (exactcmds, fromJust name, args)
| fuzzyok && not (null inexactcmds) = (inexactcmds, fromJust name, args)
| otherwise = error $ err $ "unknown command " ++ fromJust name
where
(name, args) = findname argv []
findname [] c = (Nothing, reverse c)
findname (a:as) c
| "-" `isPrefixOf` a = findname as (a:c)
| otherwise = (Just a, reverse c ++ as)
matches = filter (\c -> name == Just (cmdname c)) cmds
cmd = Prelude.head matches
check (flags, rest, []) = (flags, cmd, rest)
check (_, _, errs) = err $ concat errs
err msg = error $ msg ++ "\n\n" ++ usage header cmds commonoptions
exactcmds = filter (\c -> name == Just (cmdname c)) cmds
inexactcmds = case name of
Nothing -> []
Just n -> Git.AutoCorrect.fuzzymatches n cmdname cmds
{- Parses command line options, and returns actions to run to configure flags
- and the remaining parameters for the command. -}
getOptCmd :: Params -> Command -> [Option] -> (String -> String) -> (Flags, Params)
getOptCmd argv cmd commonoptions err = check $
getOpt Permute (commonoptions ++ cmdoptions cmd) argv
where
check (flags, rest, []) = (flags, rest)
check (_, _, errs) = error $ err $ concat errs
{- Runs a list of Annex actions. Catches IO errors and continues
- (but explicitly thrown errors terminate the whole command).

71
Git/AutoCorrect.hs Normal file
View file

@ -0,0 +1,71 @@
{- git autocorrection using Damerau-Levenshtein edit distance
-
- Copyright 2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Git.AutoCorrect where
import Common
import Git.Types
import qualified Git.Config
import Text.EditDistance
import Control.Concurrent
{- These are the same cost values as used in git. -}
gitEditCosts :: EditCosts
gitEditCosts = EditCosts
{ deletionCosts = ConstantCost 4
, insertionCosts = ConstantCost 1
, substitutionCosts = ConstantCost 2
, transpositionCosts = ConstantCost 0
}
{- Git's source calls this "an empirically derived magic number" -}
similarityFloor :: Int
similarityFloor = 7
{- Finds inexact matches for the input amoung the choices.
- Returns an ordered list of good enough matches, or an empty list if
- nothing matches well. -}
fuzzymatches :: String -> (c -> String) -> [c] -> [c]
fuzzymatches input showchoice choices = fst $ unzip $
sortBy comparecost $ filter similarEnough $ zip choices costs
where
distance v = restrictedDamerauLevenshteinDistance gitEditCosts v input
costs = map (distance . showchoice) choices
comparecost a b = compare (snd a) (snd b)
similarEnough (_, cst) = cst < similarityFloor
{- Takes action based on git's autocorrect configuration, in preparation for
- an autocorrected command being run. -}
prepare :: String -> (c -> String) -> [c] -> Repo -> IO ()
prepare input showmatch matches r =
case readish $ Git.Config.get "help.autocorrect" "0" r of
Just n
| n == 0 -> list
| n < 0 -> warn
| otherwise -> sleep n
Nothing -> list
where
list = error $ unlines $
[ "Unknown command '" ++ input ++ "'"
, ""
, "Did you mean one of these?"
] ++ map (\m -> "\t" ++ showmatch m) matches
warn =
hPutStr stderr $ unlines
[ "WARNING: You called a command named '" ++
input ++ "', which does not exist."
, "Continuing under the assumption that you meant '" ++
showmatch (Prelude.head matches) ++ "'"
]
sleep n = do
warn
hPutStrLn stderr $ unwords
[ "in"
, show (fromIntegral n / 10 :: Float)
, "seconds automatically..."]
threadDelay (n * 100000) -- deciseconds to microseconds

View file

@ -131,4 +131,4 @@ header :: String
header = "Usage: git-annex command [option ..]"
run :: [String] -> IO ()
run args = dispatch args cmds options header Git.Construct.fromCurrent
run args = dispatch True args cmds options header Git.Construct.fromCurrent

View file

@ -83,7 +83,7 @@ builtins = map cmdname cmds
builtin :: String -> String -> [String] -> IO ()
builtin cmd dir params = do
checkNotReadOnly cmd
dispatch (cmd : filterparams params) cmds options header $
dispatch False (cmd : filterparams params) cmds options header $
Git.Construct.repoAbsPath dir >>= Git.Construct.fromAbsPath
external :: [String] -> IO ()

4
debian/changelog vendored
View file

@ -3,6 +3,10 @@ git-annex (3.20120407) UNRELEASED; urgency=low
* bugfix: Adding a dotfile also caused all non-dotfiles to be added.
* bup: Properly handle key names with spaces or other things that are
not legal git refs.
* git-annex (but not git-annex-shell) supports the git help.autocorrect
configuration setting, doing fuzzy matching using the restricted
Damerau-Levenshtein edit distance, just as git does. This adds a build
dependency on the haskell edit-distance library.
-- Joey Hess <joeyh@debian.org> Sun, 08 Apr 2012 12:23:42 -0400

1
debian/control vendored
View file

@ -19,6 +19,7 @@ Build-Depends:
libghc-json-dev,
libghc-ifelse-dev,
libghc-bloomfilter-dev,
libghc-edit-distance-dev,
ikiwiki,
perlmagick,
git,

View file

@ -36,6 +36,7 @@ To build and use git-annex, you will need:
* [json](http://hackage.haskell.org/package/json)
* [IfElse](http://hackage.haskell.org/package/IfElse)
* [bloomfilter](http://hackage.haskell.org/package/bloomfilter)
* [edit-distance](http://hackage.haskell.org/package/edit-distance)
* Shell commands
* [git](http://git-scm.com/)
* [uuid](http://www.ossp.org/pkg/lib/uuid/)

View file

@ -1,5 +1,5 @@
Name: git-annex
Version: 3.20120406
Version: 3.20120407
Cabal-Version: >= 1.8
License: GPL
Maintainer: Joey Hess <joey@kitenet.net>
@ -32,7 +32,7 @@ Executable git-annex
unix, containers, utf8-string, network, mtl, bytestring, old-locale, time,
pcre-light, extensible-exceptions, dataenc, SHA, process, hS3, json, HTTP,
base >= 4.5, base < 5, monad-control, transformers-base, lifted-base,
IfElse, text, QuickCheck >= 2.1, bloomfilter
IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance
Other-Modules: Utility.Touch
C-Sources: Utility/diskfree.c