Software Tools in Haskell: paginate
format lines with page numbers and headers
This page is part of a series on Software Tools in Haskell.
This post is literate Haskell; you can load the source into GHCi and play along.
As usual, we start with some imports.
{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, ConstrainedClassMethods #-}
-- paginate: format lines with page numbers and headers
module Main where
import System.Console.GetOpt
import System.Environment (getArgs, getProgName)
import System.Exit (exitSuccess, exitFailure)
import System.IO (hPutStrLn, stderr)
import Data.List (unfoldr, inits, isPrefixOf, intercalate)
import Data.Foldable (foldl')Our virtual line printer pslineprint is nice enough, but extremely simple; it does nothing at all to prettify the documents it prints. Our first attempt at this is paginate. This program will split a sequence of lines into “pages”, giving each page a header and page number. It will also be able to print more than one file sequentially, making each file start on its own page and ensuring that page numbers are correct across files. If any file name happens to be -, we read lines from stdin. Finally, it will optionally print a table of contents page in case we are printing a large number of long files.
I think this is the ugliest tool in our kit so far, and that part of the reason for this is that paginate depends on several arbitrary choices; frequently if there is a single “natural” or “obvious” choice, the resulting code is simple, but if we are making an arbitrary choice among several options our code feels complicated.
First lets look at the main program. Like pslineprint, there are enough options to make it worth our while to use GetOpt to process them. For now lets suppose we have functions paginateLines and tableOfContents that handle all the heavy lifting; the main program logic is mostly straightforward.
main :: IO ()
main = do
args <- getArgs
let
argErr = reportErrorMsgs [usageInfo "options" options]
corrErr = reportErrorMsgs ["corrupt asacc input"]
-- read options
(flags, filenames) <- case getOpt Permute options args of
(opts, rest, []) -> case foldl (>>=) (Just defaultFlags) opts of
Nothing -> argErr >> exitFailure
Just fs -> return (fs, rest)
otherwise -> argErr >> exitFailure
-- process options
let
pageOpts = PO
{ linesPerPage = fLinesPerPage flags
, lineLength = fLineLength flags
}
-- paginate files
case fMode flags of
Lines -> do
let
readLines name = case name of
"-" -> do
lns <- fmap getLines getContents
return ("-", lns)
otherwise -> do
lns <- fmap getLines $ readFile name
return (name, lns)
docs <- sequence $ map readLines filenames
if fPrintTOC flags == False
then return ()
else sequence_ $ map putStrLn $ tableOfContents pageOpts docs
sequence_ $ map putStrLn $ paginateLines pageOpts docs
ASACC -> do
let
readLines name = case name of
"-" -> do
lns <- fmap readCCLines getContents
case lns of
Nothing -> corrErr >> exitFailure
Just xs -> return ("-", xs)
otherwise -> do
lns <- fmap readCCLines $ readFile name
case lns of
Nothing -> corrErr >> exitFailure
Just xs -> return (name, xs)
docs <- sequence $ map readLines filenames
if fPrintTOC flags == False
then return ()
else sequence_ $ map putStrLn $ map renderCCLine $ tableOfContents pageOpts docs
sequence_ $ map putStrLn $ map renderCCLine $ paginateCCLines pageOpts docs
exitSuccess
data Mode = Lines | ASACC
data Flags = Flags
{ fLinesPerPage :: Int
, fLineLength :: Int
, fPrintTOC :: Bool
, fMode :: Mode
}
defaultFlags :: Flags
defaultFlags = Flags
{ fLinesPerPage = 52
, fLineLength = 75
, fPrintTOC = False
, fMode = Lines
}
options :: [OptDescr (Flags -> Maybe Flags)]
options =
[ Option [] ["lines-per-page"]
(ReqArg readLinesPerPage "INT")
"number of lines per page (including header)"
, Option [] ["line-length"]
(ReqArg readLineLength "INT")
"length of header lines"
, Option [] ["toc"]
(NoArg (\opts -> Just $ opts { fPrintTOC = True }))
"print table of contents page"
, Option [] ["asacc"]
(NoArg (\opts -> Just $ opts { fMode = ASACC }))
"interpret basic ASA carriage control codes"
]
where
readLinesPerPage str opts = do
k <- readDecimalNat str
return $ opts { fLinesPerPage = k }
readLineLength str opts = do
k <- readDecimalNat str
return $ opts { fLineLength = k }Now for the actual pagination. Generally speaking, paginate takes a list of lines and inserts new lines – the headers – as well as some blank lines in appropriate places, so that the lines can then be taken in chunks of \(n\) at a time (called “pages”). But exactly what a “line” is is already ambiguous; of course the usual “text separated by newlines” consists of lines, but so also does a file formatted using ASA carriage control codes. Both kinds of “line” are handled properly by pslineprint, and we already have at least one program, overstrike, which produces carriage control formatted text. So it seems worth our while to make paginate handle carriage controls as well.
An initial version of this program handled both kinds of line separately, which led to lots of duplicated code. To avoid this, we introduce an abstract Line type class.
class Line t where
fromString :: String -> t
blankLine :: t
blankLine = fromString ""
instance Line String where
fromString x = x
instance Line CCLine where
fromString x = CCLine [x]Also, there are a few tweakable parameters we’d like to be able to adjust: the number of “lines” to appear on each page, and the width (in characters) of the header lines. We wrap these into a type, PaginateOpts, that can be more easily (and meaningfully) be passed around.
data PaginateOpts = PO
{ linesPerPage :: Int
, lineLength :: Int
} deriving (Show)
pageCount :: (Line t) => PaginateOpts -> [t] -> Int
pageCount opts xs = if r == 0 then q else q+1
where
slpp = (linesPerPage opts) - 2
(q,r) = ((count xs) `div` slpp, (count xs) `rem` slpp)
startPages :: (Line t) => PaginateOpts -> [[t]] -> [Int]
startPages opts lnss
= map (\ks -> 1 + sum ks)
$ inits
$ map (pageCount opts) lnss
totalPages :: (Line t) => PaginateOpts -> [[t]] -> Int
totalPages opts lnss = sum $ map (pageCount opts) lnssNote that from a PaginateOpts and a list of (abstract) documents we can compute the total number of pages used and the starting page numbers of each document. These will be used later.
Next we define an abstract page header. Our headers will include three pieces of information: the name of the file being paginated, the current page number, and the total number of pages. We also need a way to convert an abstract header to a list of lines; this is done with renderHeader. We define this function as part of a type class so that we can have different implementations for each kind of line.
data Header = Header
{ title :: String
, pageNumber :: Int
, pageTotal :: Int
} deriving (Show)
class RenderHeader t where
renderHeader :: (Line t) => PaginateOpts -> Header -> [t]
instance RenderHeader String where
renderHeader opts h = [fn ++ (replicate (ll - nfn - npg) ' ') ++ pg, ""]
where
pg = show (pageNumber h) ++ "/" ++ show (pageTotal h)
npg = count pg
ll = lineLength opts
fn = if (count $ title h) + npg + 1 > ll
then abbr
else title h
abbr = "..." ++ (reverse $ take (ll - npg - 4) $ reverse $ title h)
nfn = count fn
instance RenderHeader CCLine where
renderHeader opts h = map (\x -> CCLine [x]) $ renderHeader opts hThe actual pagination is handled by a few different functions:
splitPagesdivides a document into abstract pages, without proper page numbers.numberPagesFromOffixes the page numbers of a list of abstract pages, with parameters allowing us to specify where to begin counting from and the total number of pages.renderPageconverts an abstract page to a list of lines.paginateOfFromcombinessplitPages,numberPagesOfFrom, andrenderPageto paginate a single document.paginateDocspaginates a list of named documents.
splitPages :: (Line t) => PaginateOpts -> String -> [t] -> [(Header, [t])]
splitPages opts name = unfoldr firstPage
where
slpp = (linesPerPage opts) - 2
firstPage :: [a] -> Maybe ((Header,[a]),[a])
firstPage [] = Nothing
firstPage ys = do
let
(zs,rest) = splitAt slpp ys
hdr = Header
{ title = name
, pageNumber = 0
, pageTotal = 0
}
return ((hdr, zs), rest)
numberPagesFromOf :: (Line t)
=> Int -> Int -> [(Header, [t])] -> [(Header, [t])]
numberPagesFromOf m n xs = zipWith fix xs [m..]
where
fix (h,y) k = (h {pageNumber = k, pageTotal = n}, y)
renderPage :: (Line t, RenderHeader t)
=> PaginateOpts -> (Header, [t]) -> [t]
renderPage opts (hdr,lns)
= take k ((renderHeader opts hdr) ++ lns ++ repeat blankLine)
where k = linesPerPage opts
paginateOfFrom :: (Line t, RenderHeader t)
=> PaginateOpts -> Int -> Int -> (String, [t]) -> [t]
paginateOfFrom opts n m (name, lns)
= concatMap (renderPage opts)
$ numberPagesFromOf m n
$ splitPages opts name lns
paginateDocs :: (Line t, RenderHeader t)
=> PaginateOpts -> [(String, [t])] -> [t]
paginateDocs opts docs
= concat $ zipWith (paginateOfFrom opts tot) starts docs
where
starts = startPages opts $ map snd docs
tot = totalPages opts $ map snd docsThe actual functions we expose from this module are paginateLines and paginateCCLines, which are just monomorphic synonyms of paginateDocs for ordinary lines and carriage control formatted lines, and the constructor for PaginateOpts. As far as consumers of this module are concerned, these two black-boxes are implemented separately. Since (as of this writing) the Line class has only two instances there is no reason to expose the guts of pagination. But by writing our code against an abstract Line class, it will be easier to extend in the future if needed.
paginateLines :: PaginateOpts -> [(String, [String])] -> [String]
paginateLines = paginateDocs
paginateCCLines :: PaginateOpts -> [(String, [CCLine])] -> [CCLine]
paginateCCLines = paginateDocsAll that remains is to provide a function for building the table of contents. This part is kind of gross.
tableOfContents :: (Line t, RenderHeader t)
=> PaginateOpts -> [(String, [t])] -> [t]
tableOfContents opts docs = concat $ pad $
(fromString "Contents") : blankLine : tocLines
where
ks = startPages opts $ map snd docs
tocLines = zipWith tocLine (map fst docs) ks
tocLine name pg = fromString $ padNum pg ++ " " ++ abbr name
ll = lineLength opts
abbr str = if (count str) + 5 > ll
then "..." ++ (reverse $ take (ll - 9) $ reverse $ str)
else str
padNum k = reverse $ take 5 $ (reverse $ show k) ++ repeat ' '
pad = unfoldr padFirst
padFirst [] = Nothing
padFirst xs = Just (take (linesPerPage opts) (ys ++ repeat blankLine), rest)
where (ys, rest) = splitAt (linesPerPage opts) xsA few comments about the default options. I expect that the main use of paginate will be to prepare documents for pslineprint, and the default settings of that program produce pages with 52 lines per page and about 75 characters per line. Using these as the defaults for paginate means we can say things like
paginate foo.txt | pslineprint
and get reasonable results.
Old Stuff
count :: (Num t) => [a] -> t
count = foldl' inc 0
where inc n _ = n+1
-- split on \n
getLines :: String -> [String]
getLines = unfoldr firstLine
where
firstLine :: String -> Maybe (String, String)
firstLine xs = case break (== '\n') xs of
("","") -> Nothing
(as,"") -> Just (as,"")
(as,b:bs) -> Just (as,bs)
unfoldrMaybe :: (b -> Maybe (Maybe (a,b))) -> b -> Maybe [a]
unfoldrMaybe f x = case f x of
Nothing -> Nothing
Just Nothing -> Just []
Just (Just (a,b)) -> do
as <- unfoldrMaybe f b
Just (a:as)
-- write list of messages to stderr
reportErrorMsgs :: [String] -> IO ()
reportErrorMsgs errs = do
name <- getProgName
sequence_ $ map (hPutStrLn stderr) $ ((name ++ " error"):errs)
-- parse a natural number base 10
readDecimalNat :: String -> Maybe Int
readDecimalNat xs = do
ys <- sequence $ map decToInt $ reverse xs
return $ sum $ zipWith (*) ys [10^t | t <- [0..]]
where
decToInt :: Char -> Maybe Int
decToInt x = lookup x
[ ('0',0), ('1',1), ('2',2), ('3',3), ('4',4)
, ('5',5), ('6',6), ('7',7), ('8',8), ('9',9)
]
data CCLine
= CCLine [String]
deriving (Show)
fromCCLine :: CCLine -> [String]
fromCCLine (CCLine xs) = xs
renderCCLine :: CCLine -> String
renderCCLine (CCLine xs)
= intercalate "\n" $ zipWith (:) (' ' : (repeat '+')) xs
readCCLines :: String -> Maybe [CCLine]
readCCLines = unfoldrMaybe readFirstCCLine . getLines
where
readFirstCCLine :: [String] -> Maybe (Maybe (CCLine, [String]))
readFirstCCLine [] = Just Nothing
readFirstCCLine ((' ':cs):ds) = do
let
(us,vs) = span (isPrefixOf "+") ds
stripPlus xs = case xs of
'+':ys -> Just ys
otherwise -> Nothing
case sequence $ map stripPlus us of
Just ws -> Just (Just (CCLine $ cs:ws, vs))
Nothing -> Nothing
readFirstCCLine _ = Nothing