module Main where import Text.ParserCombinators.ReadP import Control.Monad import Control.Applicative import Text.Printf import Debug.Trace type HeaderLevel = Int newtype URL = URL {getUrl :: String} newtype ImgPath = ImgPath {getPath :: String} parseMany :: ReadP a -> ReadP [a] parseMany = Text.ParserCombinators.ReadP.many data MdToken = Header HeaderLevel MdToken | Para [MdToken] | Linebreak | HorizontalRule | Blockquote MdToken | UnordList [MdToken] | OrdList [MdToken] | Code String | Codeblock String | Link MdToken URL | Image MdToken ImgPath | Bold MdToken | Italic MdToken | Strikethrough MdToken | Unit String -- Deriving Show for MdToken instance Show MdToken where show (Header level token) = "" ++ show token ++ "" show (Para tokens) = concat(map show tokens) show Linebreak = "\n" show HorizontalRule = "---------" show (Blockquote token) = "BLOCK" ++ show token show (UnordList tokens) = "UNORD" ++ concat(map show tokens) show (OrdList tokens) = "ORD" ++ concat(map show tokens) show (Code code) = show code show (Codeblock code) = show code show (Link txt url) = "" ++ show txt ++ "" show (Image txt path) = "" ++ show txt ++ "" show (Bold token) = "" ++ show token ++ "" show (Italic token) = "" ++ show token ++ "" show (Strikethrough token) = "" ++ show token ++ "" show (Unit unit) = printf "%s" unit -- --------------- -- Helpers mustBeHash :: ReadP Char mustBeHash = satisfy (\x -> x == '#') -- --------------- parseHeader :: ReadP MdToken parseHeader = do headers <- many1 mustBeHash when ((length headers) > 6) pfail _ <- string " " text <- munch1 (\x -> x /= '\n') -- Parse until EOL -- traceM text let allParsedText = readP_to_S parsePara text traceM (show allParsedText) let parsedText = fst . last $ allParsedText return (Header (length headers) parsedText) parseBold :: ReadP MdToken parseBold = do text <- choice[ (between (string "__") (string "__") (munch1 (/= '_'))), (between (string "**") (string "**") (munch1 (/= '*'))) ] -- text <- munch1 (\x -> x /= '_' && x /= '*') -- Parse until first asterisk/underscore -- traceM text -- _ <- char '_' <|> char '*' -- Throw away the second asterisk/underscore let parsedText = fst . last $ readP_to_S parsePara text return (Bold parsedText) parseItalic :: ReadP MdToken parseItalic = do text <- choice[ (between (string "_") (string "_") (munch1 (/= '_'))), (between (string "*") (string "*") (munch1 (/= '*'))) ] let parsedText = fst . last $ readP_to_S parsePara text return (Italic parsedText) parseString :: ReadP MdToken parseString = do firstChar <- get -- Must parse at least one character here text <- munch (\x -> not (elem x "#*_[")) return (Unit (firstChar:text)) --return (Unit text) parseLine :: ReadP MdToken parseLine = choice [parseHeader, parseBold, parseItalic, parseString] parsePara :: ReadP MdToken parsePara = ------ parsePara :: ReadP MdToken ------ parsePara = do ------ parsed <- parseMany parseLine ------ -- traceM $ show parsed ------ return (Para parsed) main :: IO () main = do let res = readP_to_S parsePara "## Hello __world_*" putStrLn (show res)