module MdToHTML where import Control.Applicative import Control.Monad import Data.List import Debug.Trace import Text.ParserCombinators.ReadP import Text.Printf type HeaderLevel = Int newtype URL = URL {getUrl :: String} newtype ImgPath = ImgPath {getPath :: String} parseMany :: ReadP a -> ReadP [a] parseMany = Text.ParserCombinators.ReadP.many data MdToken = Document [MdToken] | Header HeaderLevel MdToken | Para MdToken | Line [MdToken] | SingleNewline -- A single newline is rendered as a space. | Linebreak | HorizontalRule | Blockquote MdToken | UnordList [MdToken] | OrdList [MdToken] | Code String | Codeblock String | Link MdToken URL | Image MdToken ImgPath | Bold MdToken | Italic MdToken | Strikethrough MdToken | Unit String -- Deriving Show for MdToken instance Show MdToken where show (Document tokens) = concat (map show tokens) show (Header level token) = "" ++ show token ++ "" show (Para token) = "

" ++ show token ++ "

" show (Line tokens) = concat (map show tokens) show Linebreak = "
" show SingleNewline = " " show HorizontalRule = "---------" show (Blockquote token) = "BLOCK" ++ show token show (UnordList tokens) = "UNORD" ++ concat (map show tokens) show (OrdList tokens) = "ORD" ++ concat (map show tokens) show (Code code) = show code show (Codeblock code) = show code show (Link txt url) = "" ++ show txt ++ "" show (Image txt imgPath) = "" ++ show txt ++ "" show (Bold token) = "" ++ show token ++ "" show (Italic token) = "" ++ show token ++ "" show (Strikethrough token) = "" ++ show token ++ "" show (Unit unit) = printf "%s" unit -- --------------- -- Helpers leftmostLongest :: (Foldable t) => [(a, t b)] -> (a, t b) leftmostLongest xs = let lastElem = (last xs) filteredLst = (filter (\val -> (length $ snd val) == (length $ snd lastElem)) xs) in head filteredLst -- Get the first parse returned by readP_to_S that consumed the most input leftmostLongestParse :: ReadP a -> String -> (a, String) leftmostLongestParse parser input = leftmostLongest $ readP_to_S parser input -- Parse if the string that's left matches the string comparator function lookaheadParse :: (String -> Bool) -> ReadP Char lookaheadParse stringCmp = do lookahead <- look case stringCmp lookahead of True -> get False -> pfail lineToList :: MdToken -> [MdToken] lineToList (Line tokens) = tokens -- --------------- -- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL. parseHeader :: ReadP MdToken parseHeader = do skipSpaces headers <- munch1 (== '#') when ((length headers) > 6) pfail skipSpaces text <- munch1 (/= '\n') Text.ParserCombinators.ReadP.optional (char '\n') let parsedText = fst $ leftmostLongestParse parseLine text return (Header (length headers) parsedText) -- Parse bold text parseBold :: ReadP MdToken parseBold = do text <- choice [ between (string "__") (string "__") (many1 (lookaheadParse (/= "__"))), between (string "**") (string "**") (many1 (lookaheadParse (/= "**"))) ] let parsedText = fst $ leftmostLongestParse parseLine text return (Bold parsedText) -- Parse italic text parseItalic :: ReadP MdToken parseItalic = do text <- choice [ between (string "_") (string "_") (munch1 (/= '_')), between (string "*") (string "*") (munch1 (/= '*')) ] let parsedText = fst $ leftmostLongestParse parseLine text return (Italic parsedText) -- Parse a linebreak character parseLinebreak :: ReadP MdToken parseLinebreak = do char ' ' many1 (char ' ') char '\n' return Linebreak parseSingleNewline :: ReadP MdToken parseSingleNewline = do char '\n' return SingleNewline -- Parse a regular string as a Unit. parseString :: ReadP MdToken parseString = do firstChar <- satisfy (/= '\n') -- Must parse at least one non-newline character here text <- munch (\x -> not (elem x "#*_[\n ")) return (Unit (firstChar : text)) lineParsers :: [ReadP MdToken] lineParsers = [parseLinebreak, parseSingleNewline, parseBold, parseItalic, parseString] -- A 'line' doesn't include a 'header' --lineParsers = [parseSingleNewline, parseString] -- List of all parsers allParsers :: [ReadP MdToken] allParsers = parseHeader : lineParsers -- Parse any of the above tokens. parseLineToken :: ReadP MdToken parseLineToken = choice lineParsers -- Parse a line, consisting of one or more tokens. parseLine :: ReadP MdToken parseLine = do skipSpaces -- Fail if we have reached the end of the document. remaining <- look when (null remaining) pfail parsed <- parseMany parseLineToken return (Line parsed) -- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines. -- As a weird special case, a 'Paragraph' can also be a 'Header'. parsePara :: ReadP MdToken parsePara = do parseMany (char '\n') -- text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline. -- string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF. text <- (manyTill get ((string "\n\n") <|> (eof >> return ""))) when (null text) pfail let parsedText = fst $ leftmostLongestParse (parseHeader <|> parseLine) text -- Parse either a line or a header. -- If the paragraph is a header, return a Header token. Otheriwse return a Para token. case parsedText of Header level token -> return (Header level token) _ -> return (Para parsedText) -- Parse a document, which is multiple paragraphs. parseDocument :: ReadP MdToken parseDocument = (many1 parsePara) >>= (\res -> return (Document (res)))