Huge rewrite - use megaparsec instead of readP

usingMegaparsec
Aadhavan Srinivasan 4 days ago
parent 1915628a2b
commit 05e5548aa9

@ -4,24 +4,26 @@
module MdToHTML where module MdToHTML where
import Control.Applicative import Control.Applicative hiding (many, some)
import Control.Monad import Control.Monad
import Data.Char import Data.Char
import Data.List import Data.List
import Data.Ord (comparing) import Data.Ord (comparing)
import qualified Data.Text as T
import Data.Void
import Debug.Trace import Debug.Trace
import Text.ParserCombinators.ReadP import Text.Megaparsec
import Text.Megaparsec.Char
import Text.Printf import Text.Printf
type Parser = Parsec Void T.Text
type HeaderLevel = Int type HeaderLevel = Int
newtype URL = URL {getUrl :: String} deriving (Eq) newtype URL = URL {getUrl :: String} deriving (Eq)
newtype ImgPath = ImgPath {getPath :: String} deriving (Eq) newtype ImgPath = ImgPath {getPath :: String} deriving (Eq)
parseMany :: ReadP a -> ReadP [a]
parseMany = Text.ParserCombinators.ReadP.many
data MdToken data MdToken
= Document [MdToken] = Document [MdToken]
| Header HeaderLevel MdToken | Header HeaderLevel MdToken
@ -36,8 +38,8 @@ data MdToken
| Code MdToken | Code MdToken
| Codeblock String | Codeblock String
| Link MdToken URL | Link MdToken URL
| Image MdToken ImgPath | Image MdToken URL
| Figure MdToken ImgPath | Figure MdToken URL
| Bold MdToken | Bold MdToken
| Italic MdToken | Italic MdToken
| Strikethrough MdToken | Strikethrough MdToken
@ -48,7 +50,7 @@ data MdToken
instance Show MdToken where instance Show MdToken where
show (Document tokens) = concatMap show tokens show (Document tokens) = concatMap show tokens
show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">" show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">"
show (Para token) = "<p>" ++ show token ++ "</p>" show (Para token) = "<p>" ++ show token ++ "</p>\n"
show (Line tokens) = concatMap show tokens show (Line tokens) = concatMap show tokens
show Linebreak = "<br>" show Linebreak = "<br>"
show SingleNewline = " " show SingleNewline = " "
@ -59,8 +61,8 @@ instance Show MdToken where
show (Code code) = "<code>" ++ show code ++ "</code>" show (Code code) = "<code>" ++ show code ++ "</code>"
show (Codeblock code) = show code show (Codeblock code) = show code
show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>" show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
show (Image txt imgPath) = "<img src=\"" ++ getPath imgPath ++ "\"" ++ " alt=\"" ++ show txt ++ "\" />" show (Image txt url) = "<img src=\"" ++ getUrl url ++ "\"" ++ " alt=\"" ++ show txt ++ "\" />"
show (Figure txt imgPath) = "<figure><img src=\"" ++ getPath imgPath ++ "\" alt=\"" ++ show txt ++ "\"/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>" show (Figure txt url) = "<figure><img src=\"" ++ getUrl url ++ "\" alt=\"" ++ show txt ++ "\"/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
show (Bold token) = "<b>" ++ show token ++ "</b>" show (Bold token) = "<b>" ++ show token ++ "</b>"
show (Italic token) = "<i>" ++ show token ++ "</i>" show (Italic token) = "<i>" ++ show token ++ "</i>"
show (Strikethrough token) = "<s>" ++ show token ++ "</s>" show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
@ -83,27 +85,26 @@ leftmostLongest xs =
(x : xs) -> Just x (x : xs) -> Just x
-- Get the first parse returned by readP_to_S that consumed the most input -- Get the first parse returned by readP_to_S that consumed the most input
leftmostLongestParse :: (Monoid a) => ReadP a -> String -> (a, String) leftmostLongestParse :: (Monoid a) => Parser a -> String -> a
leftmostLongestParse parser input = leftmostLongestParse parser input =
let res = leftmostLongest $ readP_to_S parser input case runParser parser "input" (T.pack input) of
in case res of (Left a) -> mempty
Nothing -> (mempty, mempty) (Right a) -> a
Just x -> x
specialChars = "\n\\`*_{}[]()<>#+|" specialChars = ">\n\\`*_{}[]#+|"
escapableChars = "-~!." ++ specialChars escapableChars = "-~!.$()" ++ specialChars
-- Makes a parser greedy. Instead of returning all possible parses, only the longest one is returned. -- Makes a parser greedy. Instead of returning all possible parses, only the longest one is returned.
greedyParse :: ReadP a -> ReadP [a] greedyParse :: Parser a -> Parser [a]
greedyParse parser = do greedyParse parser = do
greedyParse1 parser <++ return [] greedyParse1 parser <|> return []
-- Like greedyParse, but the parser must succeed atleast once. -- Like greedyParse, but the parser must succeed atleast once.
greedyParse1 :: ReadP a -> ReadP [a] greedyParse1 :: Parser a -> Parser [a]
greedyParse1 parser = do greedyParse1 parser = do
parsed1 <- parser parsed1 <- parser
parsed2 <- greedyParse1 parser <++ return [] parsed2 <- greedyParse1 parser <|> return []
return (parsed1 : parsed2) return (parsed1 : parsed2)
prepend :: [a] -> [a] -> [a] prepend :: [a] -> [a] -> [a]
@ -113,122 +114,130 @@ append :: [a] -> [a] -> [a]
append x1 x2 = x2 ++ x1 append x1 x2 = x2 ++ x1
-- Parse until EOL or EOF -- Parse until EOL or EOF
parseTillEol :: ReadP String parseTillEol :: Parser String
parseTillEol = manyTill get (void (char '\n') <++ eof) parseTillEol = manyTill anySingle (void (char '\n') <|> eof)
-- Takes a list of parsers. Returns a parser that will try them in -- Takes a list of parsers. Returns a parser that will try them in
-- order, moving to the next one only if the current one fails. -- order, moving to the next one only if the current one fails.
fallthroughParser :: [ReadP a] -> ReadP a fallthroughParser :: [Parser a] -> Parser a
fallthroughParser [x] = x fallthroughParser [x] = x
fallthroughParser (x : xs) = x <++ fallthroughParser xs fallthroughParser (x : xs) = try x <|> fallthroughParser xs
escapeChar :: Char -> String
escapeChar '>' = "&gt;"
escapeChar '<' = "&lt;"
escapeChar '&' = "&amp;"
escapeChar x = [x]
htmlEscapeChars :: T.Text -> T.Text
htmlEscapeChars = T.concatMap (T.pack . escapeChar)
-- --------------- -- ---------------
-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL. -- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
parseHeader :: ReadP MdToken parseHeader :: Parser MdToken
parseHeader = do parseHeader = do
skipSpaces space
headers <- munch1 (== '#') headers <- greedyParse1 (char '#')
when when
(length headers > 6) (length headers > 6)
pfail empty
skipSpaces space
parsedText <- manyTill parseLineToken (void (char '\n') <++ eof) parsedText <- manyTill parseLineToken (void (char '\n') <|> eof)
greedyParse (char '\n') greedyParse (char '\n')
return (Header (length headers) (Line parsedText)) return (Header (length headers) (Line parsedText))
asteriskBold = T.pack "**"
underscoreBold = T.pack "__"
-- Parse bold text -- Parse bold text
parseBold :: ReadP MdToken parseBold :: Parser MdToken
parseBold = parseBoldWith "**" <|> parseBoldWith "__" parseBold = parseBoldWith asteriskBold <|> parseBoldWith underscoreBold
where where
parseBoldWith delim = do parseBoldWith delim = do
string delim string delim
inside <- greedyParse1 parseLineToken inside <- someTill parseLineToken $ string delim
string delim
return (Bold (Line inside)) return (Bold (Line inside))
-- Parse italic text -- Parse italic text
parseItalic :: ReadP MdToken parseItalic :: Parser MdToken
parseItalic = parseItalicWith "*" <|> parseItalicWith "_" parseItalic = parseItalicWith '*' <|> parseItalicWith '_'
where where
parseItalicWith delim = do parseItalicWith delim = do
string delim char delim
inside <- greedyParse1 parseLineToken inside <- someTill parseLineToken (char delim)
string delim
return (Italic (Line inside)) return (Italic (Line inside))
-- Parse strikethrough text -- Parse strikethrough text
parseStrikethrough :: ReadP MdToken parseStrikethrough :: Parser MdToken
parseStrikethrough = do parseStrikethrough = do
string "~~" string (T.pack "~~")
inside <- many1 parseLineToken inside <- someTill parseLineToken $ string (T.pack "~~")
string "~~"
return (Strikethrough (Line inside)) return (Strikethrough (Line inside))
-- Parse code -- Parse code
parseCode :: ReadP MdToken parseCode :: Parser MdToken
parseCode = do parseCode = do
string "`" char '`'
inside <- many1 get inside <- manyTill (satisfy (/= '\n')) (char '`')
string "`" return (Code (Unit (concatMap escapeChar inside)))
return (Code (Unit inside))
-- Parse a link -- Parse a link
parseLink :: ReadP MdToken parseLink :: Parser MdToken
parseLink = do parseLink = do
linkText <- between (string "[") (string "]") (many1 get) char '['
linkURL <- between (string "(") (string ")") (many1 get) linkText <- someTill parseLineToken (char ']')
let parsedLinkText = fst $ leftmostLongestParse parseLine linkText char '('
return $ Link parsedLinkText (URL linkURL) linkURL <- manyTill anySingle (char ')')
return $ Link (Line linkText) (URL linkURL)
-- Parse a linebreak character -- Parse a linebreak character
parseLinebreak :: ReadP MdToken parseLinebreak :: Parser MdToken
parseLinebreak = do parseLinebreak = do
char ' ' char ' '
many1 (char ' ') some (char ' ')
char '\n' char '\n'
return Linebreak return Linebreak
parseSingleNewline :: ReadP MdToken parseSingleNewline :: Parser MdToken
parseSingleNewline = do parseSingleNewline = do
char '\n' char '\n'
remaining <- look remaining <- getInput
case remaining of case T.unpack remaining of
[] -> return $ Unit "" [] -> return $ Unit ""
_ -> return SingleNewline _ -> return SingleNewline
parseImage :: ReadP MdToken parseImage :: Parser MdToken
parseImage = do parseImage = do
char '!' char '!'
char '[' link <- parseLink
altText <- many1 (parseEscapedChar <++ parseUnit) case link of
char ']' Link text path -> return $ Image text path
char '(' _ -> empty -- This should never be reached
path <- many1 get
char ')'
return $ Image (Line altText) (ImgPath path)
parseFigure = do parseFigure = do
img <- parseImage img <- parseImage
void (string "\n\n") <++ eof void (string doubleNewlineText) <|> eof
case img of case img of
Image text path -> return $ Figure text path Image text path -> return $ Figure text path
_ -> return img _ -> return img
-- Parse an escaped character -- Parse an escaped character
parseEscapedChar :: ReadP MdToken parseEscapedChar :: Parser MdToken
parseEscapedChar = do parseEscapedChar = do
char '\\' char '\\'
escapedChar <- choice (map char escapableChars) -- Parse any of the special chars. escapedChar <- choice (map char escapableChars) -- Parse any of the special chars.
return (Unit [escapedChar]) return (Unit [escapedChar])
-- Parse a character as a Unit. -- Parse a character as a Unit.
parseUnit :: ReadP MdToken parseUnit :: Parser MdToken
parseUnit = do parseUnit = do
text <- satisfy (`notElem` specialChars) -- text <- satisfy (`notElem` specialChars)
text <- anySingle
return (Unit [text]) return (Unit [text])
lineParsers :: [ReadP MdToken] lineParsers :: [Parser MdToken]
lineParsers = lineParsers =
[ parseLinebreak, [ parseLinebreak,
parseSingleNewline, parseSingleNewline,
@ -242,7 +251,7 @@ lineParsers =
parseUnit parseUnit
] -- A 'line' doesn't include a 'header' ] -- A 'line' doesn't include a 'header'
listLineParsers :: [ReadP MdToken] listLineParsers :: [Parser MdToken]
listLineParsers = listLineParsers =
[ parseLinebreak, [ parseLinebreak,
parseEscapedChar, parseEscapedChar,
@ -256,84 +265,85 @@ listLineParsers =
] -- A list line cannot contain newlines. ] -- A list line cannot contain newlines.
-- List of all parsers -- List of all parsers
allParsers :: [ReadP MdToken] allParsers :: [Parser MdToken]
allParsers = parseHeader : lineParsers allParsers = parseHeader : lineParsers
-- Parse any of the line tokens. -- Parse any of the line tokens.
parseLineToken :: ReadP MdToken parseLineToken :: Parser MdToken
parseLineToken = fallthroughParser lineParsers parseLineToken = fallthroughParser lineParsers
-- Parse any of the list line tokens. -- Parse any of the list line tokens.
parseListLineToken :: ReadP MdToken parseListLineToken :: Parser MdToken
parseListLineToken = fallthroughParser listLineParsers parseListLineToken = fallthroughParser listLineParsers
-- Parse a line, consisting of one or more tokens. -- Parse a line, consisting of one or more tokens.
parseLine :: ReadP MdToken parseLine :: Parser MdToken
parseLine = do parseLine = do
skipSpaces space
-- Fail if we have reached the end of the document. -- Fail if we have reached the end of the document.
parsed <- manyTill parseLineToken eof parsed <- manyTill parseLineToken eof
return (Line parsed) return (Line parsed)
-- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines. -- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
parsePara :: ReadP MdToken parsePara :: Parser MdToken
parsePara = do parsePara = do
parseMany (char '\n') space
-- text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline. -- text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline.
-- string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF. -- string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF.
text <- manyTill get (string "\n\n" <|> (eof >> return "")) parsedText <- someTill parseLineToken (try paraEnding)
when (null text) pfail many (char '\n')
let parsedText = fst $ leftmostLongestParse parseLine text -- Parse a line return (Para (Line parsedText))
parseMany (char '\n') where
return (Para parsedText) paraEnding = void (char '\n' *> (char '\n' <|> lookAhead (char '>'))) <|> eof
-- Parse a line starting with '>', return the line except for the '>'. -- Parse a line starting with '>', return the line except for the '>'.
parseQuotedLine :: ReadP String parseQuotedLine :: Parser String
parseQuotedLine = do parseQuotedLine = do
char '>' char '>'
greedyParse (char ' ' +++ char '\t') many (char ' ' <|> char '\t')
restOfLine <- munch (/= '\n') restOfLine <- many (satisfy (/= '\n'))
Text.ParserCombinators.ReadP.optional (char '\n') >> return "" void (char '\n') <|> eof
return restOfLine return restOfLine
-- Parse many 'quoted lines' until I see a non-quoted line. -- Parse many 'quoted lines' until I see a non-quoted line.
parseQuotedLines :: ReadP [String] parseQuotedLines :: Parser [String]
parseQuotedLines = parseQuotedLines = some parseQuotedLine
greedyParse1 $ do
look >>= \line -> -- some $ do
case line of -- getInput >>= \line ->
('>' : _) -> parseQuotedLine -- case T.unpack line of
_ -> pfail -- ('>' : _) -> parseQuotedLine
-- _ -> empty
-- Parse a blockquote, which is a greater-than sign followed by a paragraph. -- Parse a blockquote, which is a greater-than sign followed by a paragraph.
parseBlockquote :: ReadP MdToken parseBlockquote :: Parser MdToken
parseBlockquote = do parseBlockquote = do
quotedLines <- parseQuotedLines quotedLines <- parseQuotedLines
-- remaining <- look -- remaining <- look
-- let quotedLines = fst $ leftmostLongestParse parseQuotedLines remaining -- let quotedLines = fst $ leftmostLongestParse parseQuotedLines remaining
-- string (init $ unlines quotedLines) -- string (init $ unlines quotedLines)
let parsedQuotedLines = fst $ leftmostLongestParse (many1 (parseBlockquote <++ parsePara)) (init $ unlines quotedLines) -- unlines joins the lines together with a newline, and adds a trailing newline. init removes the trailing newline. let parsedQuotedLines = leftmostLongestParse (some (parseBlockquote <|> parsePara)) (init $ unlines quotedLines) -- unlines joins the lines together with a newline, and adds a trailing newline. init removes the trailing newline.
return (Blockquote parsedQuotedLines) return (Blockquote parsedQuotedLines)
-- Parse a nested list item. -- Parse a nested list item.
parseListNested :: ReadP MdToken parseListNested :: Parser MdToken
parseListNested = do parseListNested = do
let firstCharParser = string " " <++ string "\t" let firstCharParser = string (T.pack " ") <|> string (T.pack "\t")
let restOfLineParser = manyTill get (void (char '\n') <++ eof) let restOfLineParser = manyTill anySingle (void (char '\n') <|> eof)
lines <- greedyParse1 (firstCharParser *> restOfLineParser) lines <- greedyParse1 (firstCharParser *> restOfLineParser)
let linesParsed = fst $ leftmostLongestParse (parseUnorderedList <++ parseOrderedList) (init $ unlines lines) let linesParsed = leftmostLongestParse (parseUnorderedList <|> parseOrderedList) (init $ unlines lines)
when (null (show linesParsed)) pfail when (null (show linesParsed)) empty
return linesParsed return linesParsed
-- Parse an unordered list line item. -- Parse an unordered list line item.
parseUListLineItem :: ReadP MdToken parseUListLineItem :: Parser MdToken
parseUListLineItem = do parseUListLineItem = do
firstChar <- choice (map char ['*', '+', '-']) firstChar <- choice (map char ['*', '+', '-'])
char ' ' -- At least one space between list indicator and list text. char ' ' -- At least one space between list indicator and list text.
parseListLineItemCommon parseListLineItemCommon
-- Parse an ordered list line item. -- Parse an ordered list line item.
parseOListLineItem :: ReadP MdToken parseOListLineItem :: Parser MdToken
parseOListLineItem = do parseOListLineItem = do
num <- greedyParse1 (satisfy isDigit) num <- greedyParse1 (satisfy isDigit)
char '.' char '.'
@ -341,23 +351,22 @@ parseOListLineItem = do
parseListLineItemCommon parseListLineItemCommon
-- Common code for parsing list line items -- Common code for parsing list line items
parseListLineItemCommon :: ReadP MdToken parseListLineItemCommon :: Parser MdToken
parseListLineItemCommon = do parseListLineItemCommon = do
skipSpaces space
restOfLine <- many1 parseListLineToken restOfLine <- manyTill parseListLineToken (void (char '\n') <|> eof)
void (char '\n') <++ eof nestedList <- parseListNested <|> return (Unit "")
nestedList <- parseListNested <++ return (Unit "")
return $ Line [Line restOfLine, nestedList] return $ Line [Line restOfLine, nestedList]
-- Parse an unordered list paragraph item. -- Parse an unordered list paragraph item.
parseUListParaItem :: ReadP MdToken parseUListParaItem :: Parser MdToken
parseUListParaItem = do parseUListParaItem = do
firstLine <- parseUListLineItem firstLine <- parseUListLineItem
res <- parseListParaItemCommon res <- parseListParaItemCommon
return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document. return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
-- Parse an unordered list paragraph item. -- Parse an unordered list paragraph item.
parseOListParaItem :: ReadP MdToken parseOListParaItem :: Parser MdToken
parseOListParaItem = do parseOListParaItem = do
firstLine <- parseOListLineItem firstLine <- parseOListLineItem
res <- parseListParaItemCommon res <- parseListParaItemCommon
@ -367,48 +376,54 @@ parseOListParaItem = do
-- A list paragraph item is defined as a line item, followed by an empty line, followed by one or more -- A list paragraph item is defined as a line item, followed by an empty line, followed by one or more
-- lines indented by a space or tab. -- lines indented by a space or tab.
-- A list paragraph item can also be a blockquote. -- A list paragraph item can also be a blockquote.
parseListParaItemCommon :: ReadP [MdToken] parseListParaItemCommon :: Parser [MdToken]
parseListParaItemCommon = do parseListParaItemCommon = do
char '\n' char '\n'
lines <- greedyParse1 ((string " " <|> string "\t") *> parseTillEol) lines <- greedyParse1 ((string (T.pack " ") <|> string (T.pack "\t")) *> parseTillEol)
let res = fst $ leftmostLongestParse (greedyParse1 parseBlockquote <++ greedyParse1 parsePara) (init $ unlines lines) let res = leftmostLongestParse (greedyParse1 parseBlockquote <|> greedyParse1 parsePara) (init $ unlines lines)
char '\n' char '\n'
return res -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document. return res -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
-- Parse an unordered list item, which can be a line item or another list. -- Parse an unordered list item, which can be a line item or another list.
parseUListItem :: ReadP MdToken parseUListItem :: Parser MdToken
parseUListItem = parseUListParaItem <++ parseUListLineItem parseUListItem = try parseUListParaItem <|> parseUListLineItem
-- Parse an unordered list. -- Parse an unordered list.
parseUnorderedList :: ReadP MdToken parseUnorderedList :: Parser MdToken
parseUnorderedList = do parseUnorderedList = do
lineItems <- greedyParse1 parseUListItem lineItems <- some parseUListItem
void (char '\n') <++ eof -- A list must end in an extra newline or eof void (char '\n') <|> eof -- A list must end in an extra newline or eof
return $ UnordList lineItems return $ UnordList lineItems
-- -------- -- --------
parseOListItem :: ReadP MdToken parseOListItem :: Parser MdToken
parseOListItem = parseOListParaItem <++ parseOListLineItem parseOListItem = try parseOListParaItem <|> parseOListLineItem
-- Parses the first element of an ordered list, which must start with '1.' -- Parses the first element of an ordered list, which must start with '1.'
parseFirstOListItem :: ReadP MdToken parseFirstOListItem :: Parser MdToken
parseFirstOListItem = do parseFirstOListItem = do
remaining <- look remaining <- getInput
when (take 2 remaining /= "1.") pfail when (take 2 (T.unpack remaining) /= "1.") empty
parseOListLineItem parseOListLineItem
parseOrderedList :: ReadP MdToken parseOrderedList :: Parser MdToken
parseOrderedList = do parseOrderedList = do
firstLine <- parseFirstOListItem firstLine <- parseFirstOListItem
lineItems <- greedyParse1 parseOListItem lineItems <- some parseOListItem
void (char '\n') <++ eof void (char '\n') <|> eof
return $ OrdList (firstLine : lineItems) return $ OrdList (firstLine : lineItems)
parseHorizontalRule :: ReadP MdToken horizontalRuleText :: T.Text
parseHorizontalRule = string "---" *> (void (string "\n\n") <++ eof) *> return HorizontalRule horizontalRuleText = T.pack "---"
doubleNewlineText :: T.Text
doubleNewlineText = T.pack "\n\n"
parseHorizontalRule :: Parser MdToken
parseHorizontalRule = string horizontalRuleText *> (void (string doubleNewlineText) <|> eof) *> return HorizontalRule
documentParsers :: [ReadP MdToken] documentParsers :: [Parser MdToken]
documentParsers = documentParsers =
[ parseHorizontalRule, [ parseHorizontalRule,
parseHeader, parseHeader,
@ -420,7 +435,7 @@ documentParsers =
] ]
-- Parse a document, which is multiple paragraphs. -- Parse a document, which is multiple paragraphs.
parseDocument :: ReadP MdToken parseDocument :: Parser MdToken
parseDocument = do parseDocument = do
res <- manyTill (fallthroughParser documentParsers) eof res <- manyTill (fallthroughParser documentParsers) eof
return (Document res) return (Document res)

@ -7,7 +7,7 @@ check_equal :: String -> String -> String -> Test
check_equal desc expected actual = TestCase (assertEqual desc expected actual) check_equal desc expected actual = TestCase (assertEqual desc expected actual)
convert :: String -> String convert :: String -> String
convert md = show . fst $ leftmostLongestParse parseDocument md convert md = show $ leftmostLongestParse parseDocument md
headerTests = headerTests =
TestList TestList
@ -24,8 +24,8 @@ boldTests =
[ check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"), [ check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"), check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day"), check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day"),
check_equal "Should convert nested bold and italic" "<p><b>Bold then <i>Italic</i></b></p>" (convert "**Bold then *Italic***"), check_equal "Should convert nested bold and italic" "<p><b>Bold then <i>Italic</i></b></p>" (convert "**Bold then _Italic_**"),
check_equal "Should convert nested bold and italic" "<p><i>Italic then <b>Bold</b></i></p>" (convert "*Italic then **Bold***") check_equal "Should convert nested bold and italic" "<p><i>Italic then <b>Bold</b></i></p>" (convert "*Italic then __Bold__*")
] ]
strikethroughTests = strikethroughTests =
@ -93,11 +93,15 @@ orderedListTests =
check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3") check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3")
] ]
htmlTests =
TestList
[check_equal "Convert HTML element" "<p><center>a</center></p>" (convert "<center>a</center>")]
codeTests = codeTests =
TestList TestList
[ check_equal "Code by itself" "<p><code>Hello world!</code></p>" (convert "`Hello world!`"), [ check_equal "Code by itself" "<p><code>Hello world!</code></p>" (convert "`Hello world!`"),
check_equal "Code in a paragraph" "<p>The following <code>text</code> is code</p>" (convert "The following `text` is code"), check_equal "Code in a paragraph" "<p>The following <code>text</code> is code</p>" (convert "The following `text` is code"),
check_equal "Code across paragraphs (shouldn't work" "<p></p><p></p>" (convert "`Incomplete\n\nCode`") -- At the moment, this is just treated as a syntax error, so nothing is rendered. check_equal "Code across paragraphs (shouldn't work)" "<p>`Incomplete</p><p>Code`</p>" (convert "`Incomplete\n\nCode`") -- At the moment, this is just treated as a syntax error, so nothing is rendered.
] ]
imageTests = imageTests =
@ -149,6 +153,7 @@ tests =
unorderedListTests, unorderedListTests,
orderedListTests, orderedListTests,
imageTests, imageTests,
htmlTests,
figureTests, figureTests,
codeTests, codeTests,
horizontalRuleTests, horizontalRuleTests,

Loading…
Cancel
Save