From a2d94bd62727508ce688da5686a7c2dcddc8a125 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Mar 2013 15:29:01 -0400 Subject: [PATCH] Switch from using regex-compat to regex-tdfa, as the C regex library is rather buggy. --- Limit.hs | 14 ++++++++++---- Makefile | 2 +- debian/changelog | 2 ++ debian/control | 1 + ...ist_files_containing_ISO8859-15_characters.mdwn | 13 +++++++++++++ doc/install/fromscratch.mdwn | 2 +- git-annex.cabal | 2 +- 7 files changed, 29 insertions(+), 7 deletions(-) diff --git a/Limit.hs b/Limit.hs index 1d02decbe2..e43b640da5 100644 --- a/Limit.hs +++ b/Limit.hs @@ -13,7 +13,8 @@ import Data.Time.Clock.POSIX import qualified Data.Set as S import qualified Data.Map as M import System.Path.WildMatch -import Text.Regex +import Text.Regex.TDFA +import Text.Regex.TDFA.String import Common.Annex import qualified Annex @@ -83,12 +84,17 @@ limitExclude :: MkLimit limitExclude glob = Right $ const $ return . not . matchglob glob {- Could just use wildCheckCase, but this way the regex is only compiled - - once. -} + - once. Also, we use regex-TDFA because it's less buggy in its support + - of non-unicode characters. -} matchglob :: String -> Annex.FileInfo -> Bool matchglob glob (Annex.FileInfo { Annex.matchFile = f }) = - isJust $ matchRegex cregex f + case cregex of + Right r -> case execute r f of + Right (Just _) -> True + _ -> False + Left _ -> error $ "failed to compile regex: " ++ regex where - cregex = mkRegex regex + cregex = compile defaultCompOpt defaultExecOpt regex regex = '^':wildToRegex glob {- Adds a limit to skip files not believed to be present diff --git a/Makefile b/Makefile index 1ddaeaa8bb..3419a0a34e 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ fast: dist/caballog @$$(grep 'ghc --make' dist/caballog | head -n 1) @ln -sf dist/build/git-annex/git-annex git-annex -dist/caballog: +dist/caballog: git-annex.cabal $(CABAL) configure -f"-Production" -O0 $(CABAL) build -v2 | tee $@ diff --git a/debian/changelog b/debian/changelog index 6ca99c62ee..3f2907af97 100644 --- a/debian/changelog +++ b/debian/changelog @@ -34,6 +34,8 @@ git-annex (4.20130228) UNRELEASED; urgency=low should add files in the current directory, but not act on unlocked files elsewhere in the tree. * assistant: Sync with all git remotes on startup. + * Switch from using regex-compat to regex-tdfa, as the C regex library + is rather buggy. -- Joey Hess Wed, 27 Feb 2013 23:20:40 -0400 diff --git a/debian/control b/debian/control index 3cc796c573..c8a15ed80c 100644 --- a/debian/control +++ b/debian/control @@ -9,6 +9,7 @@ Build-Depends: libghc-hslogger-dev, libghc-pcre-light-dev, libghc-sha-dev, + libghc-regex-tdfa-dev, libghc-dataenc-dev, libghc-utf8-string-dev, libghc-hs3-dev (>= 0.5.6), diff --git a/doc/bugs/git-annex_doesn__39__t_list_files_containing_ISO8859-15_characters.mdwn b/doc/bugs/git-annex_doesn__39__t_list_files_containing_ISO8859-15_characters.mdwn index 4ec042840d..382ca9a0cd 100644 --- a/doc/bugs/git-annex_doesn__39__t_list_files_containing_ISO8859-15_characters.mdwn +++ b/doc/bugs/git-annex_doesn__39__t_list_files_containing_ISO8859-15_characters.mdwn @@ -33,3 +33,16 @@ git-annex 4.20130227, on Debian GNU/Linux (sid, i386). LC_ALL= +> Tracked this back to a bug in either the C library or the haskell +> regex-posix wrpaper around it. I'm not sure which, but I emailed the +> maintainer of the haskell library. It just doesn't think these +> things are characters; even `.` fails to match them! Everything should +> match that... +> +> There are apparently quite a lot of bugs on POSIX regex libraries +> as implemented on different systems: +> +> +> It seemed best to jettison this dependency entirely; I've switched it to +> haskell's pure regex-tdfa library, which works nicely. [[done]] +> --[[Joey]] diff --git a/doc/install/fromscratch.mdwn b/doc/install/fromscratch.mdwn index 0adadc637c..4c48d7502c 100644 --- a/doc/install/fromscratch.mdwn +++ b/doc/install/fromscratch.mdwn @@ -19,7 +19,7 @@ quite a lot. * [DAV](http://hackage.haskell.org/package/DAV) (optional) * [SafeSemaphore](http://hackage.haskell.org/package/SafeSemaphore) * [UUID](http://hackage.haskell.org/package/uuid) - * [Glob](http://hackage.haskell.org/package/Glob) + * [regex-tdfa](http://hackage.haskell.org/package/regex-tdfa) * Optional haskell stuff, used by the [[assistant]] and its webapp (edit Makefile to disable) * [stm](http://hackage.haskell.org/package/stm) (version 2.3 or newer) diff --git a/git-annex.cabal b/git-annex.cabal index 938b456389..e113164fb5 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -70,7 +70,7 @@ Executable git-annex extensible-exceptions, dataenc, SHA, process, json, base (>= 4.5 && < 4.8), monad-control, transformers-base, lifted-base, IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance, process, - SafeSemaphore, uuid, random, regex-compat + SafeSemaphore, uuid, random, regex-tdfa -- Need to list these because they're generated from .hsc files. Other-Modules: Utility.Touch Utility.Mounts Include-Dirs: Utility