module HandLex
( gcLex -- :: String -> [Token]
, Posn(..)
, TokenT(..)
, Token
) where
import Char
#if !defined(__HASKELL98__)
#define isAlphaNum isAlphanum
#endif
type Token = (Posn, TokenT)
data Posn = Pn Int Int -- line and column
deriving Eq
instance Show Posn where
showsPrec p (Pn l c) = showString "line " . shows l .
showString " col " . shows c
data TokenT = TokFun
| TokCall
| TokResult
| TokFail
| TokConst
| TokDis
| TokPrefix
| TokOpen
| TokClose
| TokCurOpen
| TokCurClose
| TokSqOpen
| TokSqClose
| TokAngOpen
| TokAngClose
| TokAng2Open
| TokAng2Close
| TokSlash
| TokComma
| TokEqual
| TokArrow
| TokCoCo
| TokDeclare
| TokIn
| TokName String
| TokDisName String
| TokBaseTy String
| TokHaskell String
| TokC String
| TokCExp String
| TokCCode [String]
deriving (Eq,Show)
trim, revtrim :: String -> String
trim = f . f where f = reverse . dropWhile isSpace
revtrim = f.reverse.f where f = dropWhile isSpace
emit :: TokenT -> Posn -> Token
emit tok p = (p,tok)
lexerror :: Posn -> String -> a
lexerror p s = error ("Lexical error: "++s++" at "++show p++"\n")
addcol :: Int -> Posn -> Posn
addcol n (Pn r c) = Pn r (c+n)
newline, tab :: Posn -> Posn
newline (Pn r c) = Pn (r+1) 0
tab (Pn r c) = Pn r (((c`div`8)+1)*8)
----
gcLex :: String -> [Token]
gcLex = lextop gcAny (Pn 1 0) . lines
lextop :: (Posn->String->[String]->[Token]) -> Posn -> [String] -> [Token]
lextop k p [] = []
lextop k p (('%':s):ss) = gcStart k (addcol 1 p) s ss
lextop k p (s:ss) = emit (TokHaskell s) p :
lextop k (newline p) ss
gcStart :: (Posn->String->[String]->[Token]) ->
Posn -> String -> [String] -> [Token]
gcAny, gcConst :: Posn -> String -> [String] -> [Token]
gcUser :: String -> Posn -> String -> [String] -> [Token]
gcLine :: ([String]->TokenT) -> Posn -> String -> [String] -> [Token]
gcStart k p s ss
| take 1 s == "-" = skip 1 p s ss gcC
| take 1 s == "C" = skip 1 p s ss (blank gcC)
| take 3 s == "fun" = emit TokFun p: skip 3 p s ss gcAny
| take 3 s == "dis" = emit TokDis p: skip 3 p s ss gcAny
| take 4 s == "call" = emit TokCall p: skip 4 p s ss gcAny
| take 4 s == "fail" = emit TokFail p: skip 4 p s ss gcAny
| take 4 s == "code" = skip 4 p s ss (gcLine TokCCode)
| take 5 s == "const" = emit TokConst p: skip 5 p s ss gcConst
| take 6 s == "result" = emit TokResult p: skip 6 p s ss gcAny
| take 6 s == "prefix" = emit TokPrefix p: skip 6 p s ss gcAny
| otherwise = k p s ss
skip :: Int -> Posn -> String -> [String] ->
(Posn->String->[String]->[Token]) -> [Token]
skip n p s ss k = k (addcol n p) (drop n s) ss
blank :: (Posn->String->[String]->[Token]) ->
Posn -> String -> [String] -> [Token]
blank k p [] ss = lextop (blank k) (newline p) ss
blank k p ['\^M'] ss = lextop (blank k) (newline p) ss
blank k p (' ': s) ss = blank k (addcol 1 p) s ss
blank k p ('\t':s) ss = blank k (tab p) s ss
blank k p s ss = k p s ss
gcC p s ss = emit (TokC s) p : lextop gcAny (newline p) ss
gcAny = blank gcAny'
where
gcAny' p ('"':s) ss = gcCExp "" (addcol 1 p) s ss
gcAny' p ('{':s) ss = emit TokCurOpen p: gcAny (addcol 1 p) s ss
gcAny' p ('}':s) ss = emit TokCurClose p: gcAny (addcol 1 p) s ss
gcAny' p ('(': s) ss = emit TokOpen p: gcAny (addcol 1 p) s ss
gcAny' p (')': s) ss = emit TokClose p: gcAny (addcol 1 p) s ss
gcAny' p ('[': s) ss = emit TokSqOpen p: gcAny (addcol 1 p) s ss
gcAny' p (']': s) ss = emit TokSqClose p: gcAny (addcol 1 p) s ss
gcAny' p ('<': s) ss
| take 1 s == "<" = emit TokAng2Open p: skip 1 p s ss (gcUser [])
| otherwise = emit TokAngOpen p: gcUser [] (addcol 1 p) s ss
gcAny' p ('>': s) ss = emit TokAngClose p: gcAny (addcol 1 p) s ss
gcAny' p (',': s) ss = emit TokComma p: gcAny (addcol 1 p) s ss
gcAny' p ('=': s) ss = emit TokEqual p: gcAny (addcol 1 p) s ss
gcAny' p s ss
| take 2 s == "->" = emit TokArrow p: skip 2 p s ss gcAny
| take 2 s == "::" = emit TokCoCo p: skip 2 p s ss gcAny
| take 3 s == "in " = emit TokIn p: skip 3 p s ss gcAny
| take 7 s == "declare" = emit TokDeclare p: skip 7 p s ss gcAny
| take 2 s == "%%" = ident TokBaseTy (addcol 2 p) (drop 2 s) ss gcAny
| ('A'<=h && h<='Z') ||
('0'<=h && h<='9') ||
'_'==h || h=='\'' ||
'`'==h = ident TokName p s ss gcAny
| ('a'<=h && h<='z') = ident TokDisName p s ss gcAny
| otherwise = lexerror p "unrecognised input"
where h = head s
gcCExp acc = blank (lit acc)
where lit acc p ('"':s) ss = emit (TokCExp (reverse acc)) p:
gcAny (addcol 1 p) s ss
lit acc p ('%':'"':s) ss = lit ('"':acc) (addcol 2 p) s ss
lit acc p (h:s) ss = lit (h:acc) (addcol 1 p) s ss
lit acc p [] ss = lexerror p "missing \""
--lit acc p [] ss = lextop (lit ('\n':acc)) p ss
gcLine tok p s ss = multiline tok (p,[trim s]) (newline p) ss
gcConst = blank gcConst1
where
gcConst1 p s ss
| ('A'<=h && h<='Z') ||
('0'<=h && h<='9') ||
'_'==h || h=='\'' ||
'`'==h = ident TokName p s ss gcConst2
| ('a'<=h && h<='z') = ident TokDisName p s ss gcConst2
| otherwise = lexerror p "%const not followed by type or DISname"
where h = head s
gcConst2 = blank gcConst3
gcConst3 p ('[':s) ss = emit TokSqOpen p: gcAny (addcol 1 p) s ss
gcConst3 p s ss = lexerror p "%const type/DISname not followed by ["
gcUser acc = blank (gcUser' acc)
where gcUser' acc p ('/':s) ss = emit (TokName (revtrim acc)) p:
emit TokSlash p: gcUser [] (addcol 1 p) s ss
gcUser' acc p ('>':'>':s) ss
= emit (TokName (revtrim acc)) p:
emit TokAng2Close p: gcAny (addcol 1 p) s ss
gcUser' acc p ('>':s) ss = emit (TokName (revtrim acc)) p:
emit TokAngClose p: gcAny (addcol 1 p) s ss
gcUser' acc p ('-':'>':s) ss = gcUser' ('>':'-':acc) (addcol 2 p) s ss
gcUser' acc p (h:s) ss = gcUser' (h:acc) (addcol 1 p) s ss
gcUser' acc p [] ss = lextop (gcUser acc) p ss
ident :: (String->TokenT) ->
Posn -> String -> [String] ->
(Posn->String->[String]->[Token]) -> [Token]
ident tok p s ss k =
let (name,s0) = span (\c-> isAlphaNum c || c `elem` "`-_#.'/\\") s
in emit (tok name) p: skip (length name) p s ss k
multiline :: ([String]->TokenT) ->
(Posn,[String]) -> Posn -> [String] -> [Token]
multiline tok (p0,s0) p (('%':h:s):ss)
| isSpace h = multiline tok (p0, ({-trim-} s):s0) (newline p) ss
| otherwise = emit (tok (reverse s0)) p0: gcStart gcAny (addcol 1 p) (h:s) ss
multiline tok (p0,s0) p ss =
emit (tok (reverse s0)) p0: lextop gcAny p ss
|