improve importfeed --force; try to match existing files to avoid unncessary duplication

This commit is contained in:
Joey Hess 2013-08-01 11:57:05 -04:00
parent 12e269482f
commit 03c76b5a30
2 changed files with 49 additions and 40 deletions

View file

@ -104,41 +104,63 @@ downloadFeed url = do
downloadEnclosure :: Bool -> Cache -> ToDownload -> Annex () downloadEnclosure :: Bool -> Cache -> ToDownload -> Annex ()
downloadEnclosure relaxed cache enclosure downloadEnclosure relaxed cache enclosure
| S.member url (knownurls cache) = | S.member url (knownurls cache) =
whenM (Annex.getState Annex.force) go whenM forced go
| otherwise = go | otherwise = go
where where
forced = Annex.getState Annex.force
url = location enclosure url = location enclosure
go = do go = do
dest <- liftIO $ feedFile (template cache) enclosure dest <- makeunique (1 :: Integer) $ feedFile (template cache) enclosure
showStart "addurl" dest case dest of
ifM (addUrlFile relaxed url dest) Nothing -> noop
( showEndOk Just f -> do
, showEndFail showStart "addurl" f
ifM (addUrlFile relaxed url f)
( showEndOk
, showEndFail
)
{- Find a unique filename to save the url to.
- If the file exists, prefixes it with a number.
- When forced, the file may already exist and have the same
- url, in which case Nothing is returned as it does not need
- to be re-downloaded. -}
makeunique n file = ifM alreadyexists
( ifM forced
( ifAnnexed f checksameurl tryanother
, tryanother
)
, return $ Just f
)
where
f = if n < 2
then file
else
let (d, base) = splitFileName file
in d </> show n ++ "_" ++ base
tryanother = makeunique (n + 1) file
alreadyexists = liftIO $ isJust <$> catchMaybeIO (getSymbolicLinkStatus f)
checksameurl (k, _) = ifM (elem url <$> getUrls k)
( return Nothing
, tryanother
) )
defaultTemplate :: String defaultTemplate :: String
defaultTemplate = "${feedtitle}/${itemtitle}${extension}" defaultTemplate = "${feedtitle}/${itemtitle}${extension}"
{- Generate a unique filename for the feed item by filling {- Generates a filename to use for a feed item by filling out the template.
- out the template. - The filename may not be unique. -}
- feedFile :: Utility.Format.Format -> ToDownload -> FilePath
- Since each feed url is only downloaded once, feedFile tmpl i = Utility.Format.format tmpl $ M.fromList
- if the file already exists, two items with different urls [ field "feedtitle" $ getFeedTitle $ feed i
- are conflicting. A number is added to disambiguate. , fieldMaybe "itemtitle" $ getItemTitle $ item i
-} , fieldMaybe "feedauthor" $ getFeedAuthor $ feed i
feedFile :: Utility.Format.Format -> ToDownload -> IO FilePath , fieldMaybe "itemauthor" $ getItemAuthor $ item i
feedFile tmpl i = makeUnique 1 $ , fieldMaybe "itemsummary" $ getItemSummary $ item i
Utility.Format.format tmpl $ M.fromList , fieldMaybe "itemdescription" $ getItemDescription $ item i
[ field "feedtitle" $ getFeedTitle $ feed i , fieldMaybe "itemrights" $ getItemRights $ item i
, fieldMaybe "itemtitle" $ getItemTitle $ item i , fieldMaybe "itemid" $ snd <$> getItemId (item i)
, fieldMaybe "feedauthor" $ getFeedAuthor $ feed i , ("extension", map sanitize $ takeExtension $ location i)
, fieldMaybe "itemauthor" $ getItemAuthor $ item i ]
, fieldMaybe "itemsummary" $ getItemSummary $ item i
, fieldMaybe "itemdescription" $ getItemDescription $ item i
, fieldMaybe "itemrights" $ getItemRights $ item i
, fieldMaybe "itemid" $ snd <$> getItemId (item i)
, ("extension", map sanitize $ takeExtension $ location i)
]
where where
field k v = field k v =
let s = map sanitize v in let s = map sanitize v in
@ -149,16 +171,3 @@ feedFile tmpl i = makeUnique 1 $
sanitize c sanitize c
| isSpace c || isPunctuation c || c == '/' = '_' | isSpace c || isPunctuation c || c == '/' = '_'
| otherwise = c | otherwise = c
makeUnique :: Integer -> FilePath -> IO FilePath
makeUnique n file =
ifM (isJust <$> catchMaybeIO (getSymbolicLinkStatus f))
( makeUnique (n + 1) file
, return f
)
where
f = if n < 2
then file
else
let (d, base) = splitFileName file
in d </> show n ++ "_" ++ base

View file

@ -195,7 +195,7 @@ subdirectories).
Imports the contents of podcast feeds. Only downloads files whose Imports the contents of podcast feeds. Only downloads files whose
urls have not already been added to the repository before, so you can urls have not already been added to the repository before, so you can
delete, rename, etc the resulting files and repeated runs won't duplicate delete, rename, etc the resulting files and repeated runs won't duplicate
them. (Use --force to force downloading files it's seen before.) them. (Use --force to force downloading urls it's seen before.)
Use --template to control where the files are stored. Use --template to control where the files are stored.
The default template is '${feedtitle}/${itemtitle}${extension}' The default template is '${feedtitle}/${itemtitle}${extension}'