{-----------------------------------------------------------------------------
A LIBRARY OF MONADIC PARSER COMBINATORS
29th July 1996
Graham Hutton Erik Meijer
University of Nottingham University of Utrecht
This Haskell 1.3 script defines a library of parser combinators, and is taken
from sections 1-6 of our article "Monadic Parser Combinators". Some changes
to the library have been made in the move from Gofer to Haskell:
* Do notation is used in place of monad comprehension notation;
* The parser datatype is defined using "newtype", to avoid the overhead
of tagging and untagging parsers with the P constructor.
-----------------------------------------------------------------------------}
module ParseLib
(Parser(..), item, papply, (+++), {-sat,-} tok, many, many1, sepby, sepby1, chainl,
chainl1, chainr, chainr1, ops, bracket, elserror, cut {-, char, digit, lower, upper,
letter, alphanum, string, ident, nat, int, spaces, comment, junk,
parse, token, natural, integer, symbol, identifier-}) where
import Char
import HandLex (Token(..), TokenT, Posn)
import Monad
infixr 5 +++
#if defined (__HASKELL98__)
#define MPLUS `mplus`
#else
#define fmap map
#define mzero zero
#define MPLUS ++
#endif
--- The parser monad ---------------------------------------------------------
newtype Parser a = P ([Token] -> [(a,[Token])])
instance Functor Parser where
-- fmap :: (a -> b) -> (Parser a -> Parser b)
fmap f (P p) = P (\inp -> [(f v, out) | (v,out) <- p inp])
instance Monad Parser where
-- return :: a -> Parser a
return v = P (\inp -> [(v,inp)])
-- >>= :: Parser a -> (a -> Parser b) -> Parser b
(P p) >>= f = P (\inp -> concat [papply (f v) out | (v,out) <- p inp])
#if defined(__HASKELL98__)
fail s = P (\inp -> [])
#endif
#if defined(__HASKELL98__)
instance MonadPlus Parser where
#else
instance MonadZero Parser where
#endif
-- mzero :: Parser a
mzero = P (\inp -> [])
#if !defined(__HASKELL98__)
instance MonadPlus Parser where
#endif
-- mplus :: Parser a -> Parser a -> Parser a
(P p) MPLUS (P q) = P (\inp -> (p inp ++ q inp))
--- Other primitive parser combinators ---------------------------------------
--item :: Parser Char
--item = P (\inp -> case inp of
-- [] -> []
-- (x:xs) -> [(x,xs)])
item :: Parser Token
item = P (\inp -> case inp of
[] -> []
(x:xs) -> [(x,xs)])
force :: Parser a -> Parser a
force (P p) = P (\inp -> let x = p inp in
(fst (head x), snd (head x)) : tail x)
first :: Parser a -> Parser a
first (P p) = P (\inp -> case p inp of
[] -> []
(x:xs) -> [x])
papply :: Parser a -> [Token] -> [(a,[Token])]
papply (P p) inp = p inp
cut :: Parser a -> Parser b -> Parser b
(P p) `cut` q = P (\inp -> case p inp of
[] -> []
((x,ss):_) -> papply q ss)
--- Derived combinators ------------------------------------------------------
(+++) :: Parser a -> Parser a -> Parser a
p +++ q = first (p MPLUS q)
--sat :: (Char -> Bool) -> Parser Char
--sat p = do {x <- item; if p x then return x else mzero}
tok :: TokenT -> Parser TokenT
tok t = do {x <- item; if t==snd x then return t else mzero}
many :: Parser a -> Parser [a]
many p = many1 p +++ return []
--many p = force (many1 p +++ return [])
many1 :: Parser a -> Parser [a]
many1 p = do {x <- p; xs <- many p; return (x:xs)}
sepby :: Parser a -> Parser b -> Parser [a]
p `sepby` sep = (p `sepby1` sep) +++ return []
sepby1 :: Parser a -> Parser b -> Parser [a]
p `sepby1` sep = do {x <- p; xs <- many (do {sep; p}); return (x:xs)}
chainl :: Parser a -> Parser (a -> a -> a) -> a -> Parser a
chainl p op v = (p `chainl1` op) +++ return v
chainl1 :: Parser a -> Parser (a -> a -> a) -> Parser a
p `chainl1` op = do {x <- p; rest x}
where
rest x = do {f <- op; y <- p; rest (f x y)}
+++ return x
chainr :: Parser a -> Parser (a -> a -> a) -> a -> Parser a
chainr p op v = (p `chainr1` op) +++ return v
chainr1 :: Parser a -> Parser (a -> a -> a) -> Parser a
p `chainr1` op = do {x <- p; rest x}
where
rest x = do {f <- op; y <- p `chainr1` op; return (f x y)}
+++ return x
ops :: [(Parser a, b)] -> Parser b
ops xs = foldr1 (+++) [do {p; return op} | (p,op) <- xs]
bracket :: Parser a -> Parser b -> Parser c -> Parser b
bracket open p close = do {open;
x <- p;
close;
return x}
elserror :: Parser a -> String -> Parser a
p `elserror` s = p +++
(P (\inp->
case inp of
[] -> error "Parse error: unexpected EOF\n"
((p,t):_) ->
error ("Parse error at "++show p++": "++s++"\n"++
" next token: "++show t)))
{-- Useful parsers -----------------------------------------------------------
char :: Char -> Parser Char
char x = sat (\y -> x == y)
digit :: Parser Char
digit = sat isDigit
lower :: Parser Char
lower = sat isLower
upper :: Parser Char
upper = sat isUpper
letter :: Parser Char
letter = sat isAlpha
alphanum :: Parser Char
alphanum = sat isAlphaNum
string :: String -> Parser String
string "" = return ""
string (x:xs) = do {char x; string xs; return (x:xs)}
ident :: Parser String
ident = do {x <- lower; xs <- many alphanum; return (x:xs)}
nat :: Parser Int
nat = do {x <- digit; return (fromEnum x - fromEnum '0')} `chainl1` return op
where
m `op` n = 10*m + n
int :: Parser Int
int = do {char '-'; n <- nat; return (-n)} +++ nat
--- Lexical combinators ------------------------------------------------------
spaces :: Parser ()
spaces = do {many1 (sat isSpace); return ()}
comment :: Parser ()
--comment = do {string "--"; many (sat (\x -> x /= '\n')); return ()}
comment = do
_ <- string "--"
_ <- many (sat (\x -> x /= '\n'))
return ()
junk :: Parser ()
junk = do {many (spaces +++ comment); return ()}
parse :: Parser a -> Parser a
parse p = do {junk; p}
token :: Parser a -> Parser a
token p = do {v <- p; junk; return v}
--- Token parsers ------------------------------------------------------------
natural :: Parser Int
natural = token nat
integer :: Parser Int
integer = token int
symbol :: String -> Parser String
symbol xs = token (string xs)
identifier :: [String] -> Parser String
identifier ks = token (do {x <- ident;
if not (elem x ks) then return x
else return mzero})
-----------------------------------------------------------------------------}