Compare commits

..

No commits in common. '2893fa25e6f197b57454af63c369774e4085a221' and '1d9ac86a2addc6ebd22fa13bf803966bbdb6cdcb' have entirely different histories.

@ -23,5 +23,5 @@ main = do
fileContents <- case args of fileContents <- case args of
[] -> getContents [] -> getContents
x : _ -> readFile x x : _ -> readFile x
let res = leftmostLongestParse parseDocument fileContents let res = fst $ leftmostLongestParse parseDocument fileContents
print res print res

@ -58,9 +58,7 @@ library
exposed-modules: MdToHTML exposed-modules: MdToHTML
other-modules: MdToHtmlTest other-modules: MdToHtmlTest
build-depends: base ^>=4.19.1.0, build-depends: base ^>=4.19.1.0,
HUnit, HUnit
megaparsec,
text
executable md-to-html-runner executable md-to-html-runner
-- Import common warning flags. -- Import common warning flags.

@ -4,26 +4,24 @@
module MdToHTML where module MdToHTML where
import Control.Applicative hiding (many, some) import Control.Applicative
import Control.Monad import Control.Monad
import Data.Char import Data.Char
import Data.List import Data.List
import Data.Ord (comparing) import Data.Ord (comparing)
import qualified Data.Text as T
import Data.Void
import Debug.Trace import Debug.Trace
import Text.Megaparsec import Text.ParserCombinators.ReadP
import Text.Megaparsec.Char
import Text.Printf import Text.Printf
type Parser = Parsec Void T.Text
type HeaderLevel = Int type HeaderLevel = Int
newtype URL = URL {getUrl :: String} deriving (Eq) newtype URL = URL {getUrl :: String} deriving (Eq)
newtype ImgPath = ImgPath {getPath :: String} deriving (Eq) newtype ImgPath = ImgPath {getPath :: String} deriving (Eq)
parseMany :: ReadP a -> ReadP [a]
parseMany = Text.ParserCombinators.ReadP.many
data MdToken data MdToken
= Document [MdToken] = Document [MdToken]
| Header HeaderLevel MdToken | Header HeaderLevel MdToken
@ -38,8 +36,8 @@ data MdToken
| Code MdToken | Code MdToken
| Codeblock String | Codeblock String
| Link MdToken URL | Link MdToken URL
| Image MdToken URL | Image MdToken ImgPath
| Figure MdToken URL | Figure MdToken ImgPath
| Bold MdToken | Bold MdToken
| Italic MdToken | Italic MdToken
| Strikethrough MdToken | Strikethrough MdToken
@ -50,7 +48,7 @@ data MdToken
instance Show MdToken where instance Show MdToken where
show (Document tokens) = concatMap show tokens show (Document tokens) = concatMap show tokens
show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">" show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">"
show (Para token) = "<p>" ++ show token ++ "</p>\n" show (Para token) = "<p>" ++ show token ++ "</p>"
show (Line tokens) = concatMap show tokens show (Line tokens) = concatMap show tokens
show Linebreak = "<br>" show Linebreak = "<br>"
show SingleNewline = " " show SingleNewline = " "
@ -61,8 +59,8 @@ instance Show MdToken where
show (Code code) = "<code>" ++ show code ++ "</code>" show (Code code) = "<code>" ++ show code ++ "</code>"
show (Codeblock code) = show code show (Codeblock code) = show code
show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>" show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
show (Image txt url) = "<img src=\"" ++ getUrl url ++ "\"" ++ " alt=\"" ++ show txt ++ "\" />" show (Image txt imgPath) = "<img src=\"" ++ getPath imgPath ++ "\"" ++ " alt=\"" ++ show txt ++ "\" />"
show (Figure txt url) = "<figure><img src=\"" ++ getUrl url ++ "\" alt=\"" ++ show txt ++ "\"/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>" show (Figure txt imgPath) = "<figure><img src=\"" ++ getPath imgPath ++ "\" alt=\"" ++ show txt ++ "\"/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
show (Bold token) = "<b>" ++ show token ++ "</b>" show (Bold token) = "<b>" ++ show token ++ "</b>"
show (Italic token) = "<i>" ++ show token ++ "</i>" show (Italic token) = "<i>" ++ show token ++ "</i>"
show (Strikethrough token) = "<s>" ++ show token ++ "</s>" show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
@ -85,26 +83,27 @@ leftmostLongest xs =
(x : xs) -> Just x (x : xs) -> Just x
-- Get the first parse returned by readP_to_S that consumed the most input -- Get the first parse returned by readP_to_S that consumed the most input
leftmostLongestParse :: (Monoid a) => Parser a -> String -> a leftmostLongestParse :: (Monoid a) => ReadP a -> String -> (a, String)
leftmostLongestParse parser input = leftmostLongestParse parser input =
case runParser parser "input" (T.pack input) of let res = leftmostLongest $ readP_to_S parser input
(Left a) -> mempty in case res of
(Right a) -> a Nothing -> (mempty, mempty)
Just x -> x
specialChars = ">\n\\`*_{}[]#+|" specialChars = "\n\\`*_{}[]()<>#+|"
escapableChars = "-~!.$()" ++ specialChars escapableChars = "-~!." ++ specialChars
-- Makes a parser greedy. Instead of returning all possible parses, only the longest one is returned. -- Makes a parser greedy. Instead of returning all possible parses, only the longest one is returned.
greedyParse :: Parser a -> Parser [a] greedyParse :: ReadP a -> ReadP [a]
greedyParse parser = do greedyParse parser = do
greedyParse1 parser <|> return [] greedyParse1 parser <++ return []
-- Like greedyParse, but the parser must succeed atleast once. -- Like greedyParse, but the parser must succeed atleast once.
greedyParse1 :: Parser a -> Parser [a] greedyParse1 :: ReadP a -> ReadP [a]
greedyParse1 parser = do greedyParse1 parser = do
parsed1 <- parser parsed1 <- parser
parsed2 <- greedyParse1 parser <|> return [] parsed2 <- greedyParse1 parser <++ return []
return (parsed1 : parsed2) return (parsed1 : parsed2)
prepend :: [a] -> [a] -> [a] prepend :: [a] -> [a] -> [a]
@ -114,130 +113,124 @@ append :: [a] -> [a] -> [a]
append x1 x2 = x2 ++ x1 append x1 x2 = x2 ++ x1
-- Parse until EOL or EOF -- Parse until EOL or EOF
parseTillEol :: Parser String parseTillEol :: ReadP String
parseTillEol = manyTill anySingle (void (char '\n') <|> eof) parseTillEol = manyTill get (void (char '\n') <++ eof)
-- Takes a list of parsers. Returns a parser that will try them in -- Takes a list of parsers. Returns a parser that will try them in
-- order, moving to the next one only if the current one fails. -- order, moving to the next one only if the current one fails.
fallthroughParser :: [Parser a] -> Parser a fallthroughParser :: [ReadP a] -> ReadP a
fallthroughParser [x] = x fallthroughParser [x] = x
fallthroughParser (x : xs) = try x <|> fallthroughParser xs fallthroughParser (x : xs) = x <++ fallthroughParser xs
escapeChar :: Char -> String
escapeChar '>' = "&gt;"
escapeChar '<' = "&lt;"
escapeChar '&' = "&amp;"
escapeChar x = [x]
htmlEscapeChars :: T.Text -> T.Text
htmlEscapeChars = T.concatMap (T.pack . escapeChar)
-- --------------- -- ---------------
-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL. -- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
parseHeader :: Parser MdToken parseHeader :: ReadP MdToken
parseHeader = do parseHeader = do
space skipSpaces
headers <- greedyParse1 (char '#') headers <- munch1 (== '#')
when when
(length headers > 6) (length headers > 6)
empty pfail
space skipSpaces
parsedText <- manyTill parseLineToken (void (char '\n') <|> eof) text <- munch1 (/= '\n')
greedyParse (char '\n') -- Text.ParserCombinators.ReadP.optional (char '\n')
return (Header (length headers) (Line parsedText)) skipSpaces
let parsedText = fst $ leftmostLongestParse parseLine text
asteriskBold = T.pack "**" return (Header (length headers) parsedText)
underscoreBold = T.pack "__"
-- Parse bold text -- Parse bold text
parseBold :: Parser MdToken parseBold :: ReadP MdToken
parseBold = parseBoldWith asteriskBold <|> parseBoldWith underscoreBold parseBold = parseBoldWith "**" <|> parseBoldWith "__"
where where
parseBoldWith delim = do parseBoldWith delim = do
string delim string delim
inside <- someTill parseLineToken $ string delim inside <- greedyParse1 parseLineToken
string delim
return (Bold (Line inside)) return (Bold (Line inside))
-- Parse italic text -- Parse italic text
parseItalic :: Parser MdToken parseItalic :: ReadP MdToken
parseItalic = parseItalicWith '*' <|> parseItalicWith '_' parseItalic = parseItalicWith "*" <|> parseItalicWith "_"
where where
parseItalicWith delim = do parseItalicWith delim = do
char delim string delim
inside <- someTill parseLineToken (char delim) inside <- greedyParse1 parseLineToken
string delim
return (Italic (Line inside)) return (Italic (Line inside))
-- Parse strikethrough text -- Parse strikethrough text
parseStrikethrough :: Parser MdToken parseStrikethrough :: ReadP MdToken
parseStrikethrough = do parseStrikethrough = do
string (T.pack "~~") string "~~"
inside <- someTill parseLineToken $ string (T.pack "~~") inside <- many1 parseLineToken
string "~~"
return (Strikethrough (Line inside)) return (Strikethrough (Line inside))
-- Parse code -- Parse code
parseCode :: Parser MdToken parseCode :: ReadP MdToken
parseCode = do parseCode = do
char '`' string "`"
inside <- manyTill (satisfy (/= '\n')) (char '`') inside <- many1 get
return (Code (Unit (concatMap escapeChar inside))) string "`"
return (Code (Unit inside))
-- Parse a link -- Parse a link
parseLink :: Parser MdToken parseLink :: ReadP MdToken
parseLink = do parseLink = do
char '[' linkText <- between (string "[") (string "]") (many1 get)
linkText <- someTill parseLineToken (char ']') linkURL <- between (string "(") (string ")") (many1 get)
char '(' let parsedLinkText = fst $ leftmostLongestParse parseLine linkText
linkURL <- manyTill anySingle (char ')') return $ Link parsedLinkText (URL linkURL)
return $ Link (Line linkText) (URL linkURL)
-- Parse a linebreak character -- Parse a linebreak character
parseLinebreak :: Parser MdToken parseLinebreak :: ReadP MdToken
parseLinebreak = do parseLinebreak = do
char ' ' char ' '
some (char ' ') many1 (char ' ')
char '\n' char '\n'
return Linebreak return Linebreak
parseSingleNewline :: Parser MdToken parseSingleNewline :: ReadP MdToken
parseSingleNewline = do parseSingleNewline = do
char '\n' char '\n'
remaining <- getInput remaining <- look
case T.unpack remaining of case remaining of
[] -> return $ Unit "" [] -> return $ Unit ""
_ -> return SingleNewline _ -> return SingleNewline
parseImage :: Parser MdToken parseImage :: ReadP MdToken
parseImage = do parseImage = do
char '!' char '!'
link <- parseLink char '['
case link of altText <- many1 (parseEscapedChar <++ parseUnit)
Link text path -> return $ Image text path char ']'
_ -> empty -- This should never be reached char '('
path <- many1 get
char ')'
return $ Image (Line altText) (ImgPath path)
parseFigure = do parseFigure = do
img <- parseImage img <- parseImage
void (string doubleNewlineText) <|> eof void (string "\n\n") <++ eof
case img of case img of
Image text path -> return $ Figure text path Image text path -> return $ Figure text path
_ -> return img _ -> return img
-- Parse an escaped character -- Parse an escaped character
parseEscapedChar :: Parser MdToken parseEscapedChar :: ReadP MdToken
parseEscapedChar = do parseEscapedChar = do
char '\\' char '\\'
escapedChar <- choice (map char escapableChars) -- Parse any of the special chars. escapedChar <- choice (map char escapableChars) -- Parse any of the special chars.
return (Unit [escapedChar]) return (Unit [escapedChar])
-- Parse a character as a Unit. -- Parse a character as a Unit.
parseUnit :: Parser MdToken parseUnit :: ReadP MdToken
parseUnit = do parseUnit = do
-- text <- satisfy (`notElem` specialChars) text <- satisfy (`notElem` specialChars)
text <- anySingle
return (Unit [text]) return (Unit [text])
lineParsers :: [Parser MdToken] lineParsers :: [ReadP MdToken]
lineParsers = lineParsers =
[ parseLinebreak, [ parseLinebreak,
parseSingleNewline, parseSingleNewline,
@ -251,7 +244,7 @@ lineParsers =
parseUnit parseUnit
] -- A 'line' doesn't include a 'header' ] -- A 'line' doesn't include a 'header'
listLineParsers :: [Parser MdToken] listLineParsers :: [ReadP MdToken]
listLineParsers = listLineParsers =
[ parseLinebreak, [ parseLinebreak,
parseEscapedChar, parseEscapedChar,
@ -265,85 +258,84 @@ listLineParsers =
] -- A list line cannot contain newlines. ] -- A list line cannot contain newlines.
-- List of all parsers -- List of all parsers
allParsers :: [Parser MdToken] allParsers :: [ReadP MdToken]
allParsers = parseHeader : lineParsers allParsers = parseHeader : lineParsers
-- Parse any of the line tokens. -- Parse any of the line tokens.
parseLineToken :: Parser MdToken parseLineToken :: ReadP MdToken
parseLineToken = fallthroughParser lineParsers parseLineToken = fallthroughParser lineParsers
-- Parse any of the list line tokens. -- Parse any of the list line tokens.
parseListLineToken :: Parser MdToken parseListLineToken :: ReadP MdToken
parseListLineToken = fallthroughParser listLineParsers parseListLineToken = fallthroughParser listLineParsers
-- Parse a line, consisting of one or more tokens. -- Parse a line, consisting of one or more tokens.
parseLine :: Parser MdToken parseLine :: ReadP MdToken
parseLine = do parseLine = do
space skipSpaces
-- Fail if we have reached the end of the document. -- Fail if we have reached the end of the document.
parsed <- manyTill parseLineToken eof parsed <- manyTill parseLineToken eof
return (Line parsed) return (Line parsed)
-- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines. -- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
parsePara :: Parser MdToken parsePara :: ReadP MdToken
parsePara = do parsePara = do
space parseMany (char '\n')
-- text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline. -- text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline.
-- string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF. -- string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF.
parsedText <- someTill parseLineToken (try paraEnding) text <- manyTill get (string "\n\n" <|> (eof >> return ""))
many (char '\n') when (null text) pfail
return (Para (Line parsedText)) let parsedText = fst $ leftmostLongestParse parseLine text -- Parse a line
where parseMany (char '\n')
paraEnding = void (char '\n' *> (char '\n' <|> lookAhead (char '>'))) <|> eof return (Para parsedText)
-- Parse a line starting with '>', return the line except for the '>'. -- Parse a line starting with '>', return the line except for the '>'.
parseQuotedLine :: Parser String parseQuotedLine :: ReadP String
parseQuotedLine = do parseQuotedLine = do
char '>' char '>'
many (char ' ' <|> char '\t') greedyParse (char ' ' +++ char '\t')
restOfLine <- many (satisfy (/= '\n')) restOfLine <- munch (/= '\n')
void (char '\n') <|> eof Text.ParserCombinators.ReadP.optional (char '\n') >> return ""
return restOfLine return restOfLine
-- Parse many 'quoted lines' until I see a non-quoted line. -- Parse many 'quoted lines' until I see a non-quoted line.
parseQuotedLines :: Parser [String] parseQuotedLines :: ReadP [String]
parseQuotedLines = some parseQuotedLine parseQuotedLines =
greedyParse1 $ do
-- some $ do look >>= \line ->
-- getInput >>= \line -> case line of
-- case T.unpack line of ('>' : _) -> parseQuotedLine
-- ('>' : _) -> parseQuotedLine _ -> pfail
-- _ -> empty
-- Parse a blockquote, which is a greater-than sign followed by a paragraph. -- Parse a blockquote, which is a greater-than sign followed by a paragraph.
parseBlockquote :: Parser MdToken parseBlockquote :: ReadP MdToken
parseBlockquote = do parseBlockquote = do
quotedLines <- parseQuotedLines quotedLines <- parseQuotedLines
-- remaining <- look -- remaining <- look
-- let quotedLines = fst $ leftmostLongestParse parseQuotedLines remaining -- let quotedLines = fst $ leftmostLongestParse parseQuotedLines remaining
-- string (init $ unlines quotedLines) -- string (init $ unlines quotedLines)
let parsedQuotedLines = leftmostLongestParse (some (parseBlockquote <|> parsePara)) (init $ unlines quotedLines) -- unlines joins the lines together with a newline, and adds a trailing newline. init removes the trailing newline. let parsedQuotedLines = fst $ leftmostLongestParse (many1 (parseBlockquote <++ parsePara)) (init $ unlines quotedLines) -- unlines joins the lines together with a newline, and adds a trailing newline. init removes the trailing newline.
return (Blockquote parsedQuotedLines) return (Blockquote parsedQuotedLines)
-- Parse a nested list item. -- Parse a nested list item.
parseListNested :: Parser MdToken parseListNested :: ReadP MdToken
parseListNested = do parseListNested = do
let firstCharParser = string (T.pack " ") <|> string (T.pack "\t") let firstCharParser = string " " <++ string "\t"
let restOfLineParser = manyTill anySingle (void (char '\n') <|> eof) let restOfLineParser = manyTill get (void (char '\n') <++ eof)
lines <- greedyParse1 (firstCharParser *> restOfLineParser) lines <- greedyParse1 (firstCharParser *> restOfLineParser)
let linesParsed = leftmostLongestParse (parseUnorderedList <|> parseOrderedList) (init $ unlines lines) let linesParsed = fst $ leftmostLongestParse (parseUnorderedList <++ parseOrderedList) (init $ unlines lines)
when (null (show linesParsed)) empty when (null (show linesParsed)) pfail
return linesParsed return linesParsed
-- Parse an unordered list line item. -- Parse an unordered list line item.
parseUListLineItem :: Parser MdToken parseUListLineItem :: ReadP MdToken
parseUListLineItem = do parseUListLineItem = do
firstChar <- choice (map char ['*', '+', '-']) firstChar <- choice (map char ['*', '+', '-'])
char ' ' -- At least one space between list indicator and list text. char ' ' -- At least one space between list indicator and list text.
parseListLineItemCommon parseListLineItemCommon
-- Parse an ordered list line item. -- Parse an ordered list line item.
parseOListLineItem :: Parser MdToken parseOListLineItem :: ReadP MdToken
parseOListLineItem = do parseOListLineItem = do
num <- greedyParse1 (satisfy isDigit) num <- greedyParse1 (satisfy isDigit)
char '.' char '.'
@ -351,22 +343,23 @@ parseOListLineItem = do
parseListLineItemCommon parseListLineItemCommon
-- Common code for parsing list line items -- Common code for parsing list line items
parseListLineItemCommon :: Parser MdToken parseListLineItemCommon :: ReadP MdToken
parseListLineItemCommon = do parseListLineItemCommon = do
space skipSpaces
restOfLine <- manyTill parseListLineToken (void (char '\n') <|> eof) restOfLine <- many1 parseListLineToken
nestedList <- parseListNested <|> return (Unit "") void (char '\n') <++ eof
nestedList <- parseListNested <++ return (Unit "")
return $ Line [Line restOfLine, nestedList] return $ Line [Line restOfLine, nestedList]
-- Parse an unordered list paragraph item. -- Parse an unordered list paragraph item.
parseUListParaItem :: Parser MdToken parseUListParaItem :: ReadP MdToken
parseUListParaItem = do parseUListParaItem = do
firstLine <- parseUListLineItem firstLine <- parseUListLineItem
res <- parseListParaItemCommon res <- parseListParaItemCommon
return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document. return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
-- Parse an unordered list paragraph item. -- Parse an unordered list paragraph item.
parseOListParaItem :: Parser MdToken parseOListParaItem :: ReadP MdToken
parseOListParaItem = do parseOListParaItem = do
firstLine <- parseOListLineItem firstLine <- parseOListLineItem
res <- parseListParaItemCommon res <- parseListParaItemCommon
@ -376,54 +369,48 @@ parseOListParaItem = do
-- A list paragraph item is defined as a line item, followed by an empty line, followed by one or more -- A list paragraph item is defined as a line item, followed by an empty line, followed by one or more
-- lines indented by a space or tab. -- lines indented by a space or tab.
-- A list paragraph item can also be a blockquote. -- A list paragraph item can also be a blockquote.
parseListParaItemCommon :: Parser [MdToken] parseListParaItemCommon :: ReadP [MdToken]
parseListParaItemCommon = do parseListParaItemCommon = do
char '\n' char '\n'
lines <- greedyParse1 ((string (T.pack " ") <|> string (T.pack "\t")) *> parseTillEol) lines <- greedyParse1 ((string " " <|> string "\t") *> parseTillEol)
let res = leftmostLongestParse (greedyParse1 parseBlockquote <|> greedyParse1 parsePara) (init $ unlines lines) let res = fst $ leftmostLongestParse (greedyParse1 parseBlockquote <++ greedyParse1 parsePara) (init $ unlines lines)
char '\n' char '\n'
return res -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document. return res -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
-- Parse an unordered list item, which can be a line item or another list. -- Parse an unordered list item, which can be a line item or another list.
parseUListItem :: Parser MdToken parseUListItem :: ReadP MdToken
parseUListItem = try parseUListParaItem <|> parseUListLineItem parseUListItem = parseUListParaItem <++ parseUListLineItem
-- Parse an unordered list. -- Parse an unordered list.
parseUnorderedList :: Parser MdToken parseUnorderedList :: ReadP MdToken
parseUnorderedList = do parseUnorderedList = do
lineItems <- some parseUListItem lineItems <- greedyParse1 parseUListItem
void (char '\n') <|> eof -- A list must end in an extra newline or eof void (char '\n') <++ eof -- A list must end in an extra newline or eof
return $ UnordList lineItems return $ UnordList lineItems
-- -------- -- --------
parseOListItem :: Parser MdToken parseOListItem :: ReadP MdToken
parseOListItem = try parseOListParaItem <|> parseOListLineItem parseOListItem = parseOListParaItem <++ parseOListLineItem
-- Parses the first element of an ordered list, which must start with '1.' -- Parses the first element of an ordered list, which must start with '1.'
parseFirstOListItem :: Parser MdToken parseFirstOListItem :: ReadP MdToken
parseFirstOListItem = do parseFirstOListItem = do
remaining <- getInput remaining <- look
when (take 2 (T.unpack remaining) /= "1.") empty when (take 2 remaining /= "1.") pfail
parseOListLineItem parseOListLineItem
parseOrderedList :: Parser MdToken parseOrderedList :: ReadP MdToken
parseOrderedList = do parseOrderedList = do
firstLine <- parseFirstOListItem firstLine <- parseFirstOListItem
lineItems <- some parseOListItem lineItems <- greedyParse1 parseOListItem
void (char '\n') <|> eof void (char '\n') <++ eof
return $ OrdList (firstLine : lineItems) return $ OrdList (firstLine : lineItems)
horizontalRuleText :: T.Text parseHorizontalRule :: ReadP MdToken
horizontalRuleText = T.pack "---" parseHorizontalRule = string "---" *> (void (string "\n\n") <++ eof) *> return HorizontalRule
doubleNewlineText :: T.Text
doubleNewlineText = T.pack "\n\n"
parseHorizontalRule :: Parser MdToken
parseHorizontalRule = string horizontalRuleText *> (void (string doubleNewlineText) <|> eof) *> return HorizontalRule
documentParsers :: [Parser MdToken] documentParsers :: [ReadP MdToken]
documentParsers = documentParsers =
[ parseHorizontalRule, [ parseHorizontalRule,
parseHeader, parseHeader,
@ -435,7 +422,7 @@ documentParsers =
] ]
-- Parse a document, which is multiple paragraphs. -- Parse a document, which is multiple paragraphs.
parseDocument :: Parser MdToken parseDocument :: ReadP MdToken
parseDocument = do parseDocument = do
res <- manyTill (fallthroughParser documentParsers) eof res <- manyTill (fallthroughParser documentParsers) eof
return (Document res) return (Document res)

@ -7,7 +7,7 @@ check_equal :: String -> String -> String -> Test
check_equal desc expected actual = TestCase (assertEqual desc expected actual) check_equal desc expected actual = TestCase (assertEqual desc expected actual)
convert :: String -> String convert :: String -> String
convert md = show $ leftmostLongestParse parseDocument md convert md = show . fst $ leftmostLongestParse parseDocument md
headerTests = headerTests =
TestList TestList
@ -24,8 +24,8 @@ boldTests =
[ check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"), [ check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"), check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day"), check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day"),
check_equal "Should convert nested bold and italic" "<p><b>Bold then <i>Italic</i></b></p>" (convert "**Bold then _Italic_**"), check_equal "Should convert nested bold and italic" "<p><b>Bold then <i>Italic</i></b></p>" (convert "**Bold then *Italic***"),
check_equal "Should convert nested bold and italic" "<p><i>Italic then <b>Bold</b></i></p>" (convert "*Italic then __Bold__*") check_equal "Should convert nested bold and italic" "<p><i>Italic then <b>Bold</b></i></p>" (convert "*Italic then **Bold***")
] ]
strikethroughTests = strikethroughTests =
@ -93,15 +93,11 @@ orderedListTests =
check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3") check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3")
] ]
htmlTests =
TestList
[check_equal "Convert HTML element" "<p><center>a</center></p>" (convert "<center>a</center>")]
codeTests = codeTests =
TestList TestList
[ check_equal "Code by itself" "<p><code>Hello world!</code></p>" (convert "`Hello world!`"), [ check_equal "Code by itself" "<p><code>Hello world!</code></p>" (convert "`Hello world!`"),
check_equal "Code in a paragraph" "<p>The following <code>text</code> is code</p>" (convert "The following `text` is code"), check_equal "Code in a paragraph" "<p>The following <code>text</code> is code</p>" (convert "The following `text` is code"),
check_equal "Code across paragraphs (shouldn't work)" "<p>`Incomplete</p><p>Code`</p>" (convert "`Incomplete\n\nCode`") -- At the moment, this is just treated as a syntax error, so nothing is rendered. check_equal "Code across paragraphs (shouldn't work" "<p></p><p></p>" (convert "`Incomplete\n\nCode`") -- At the moment, this is just treated as a syntax error, so nothing is rendered.
] ]
imageTests = imageTests =
@ -153,7 +149,6 @@ tests =
unorderedListTests, unorderedListTests,
orderedListTests, orderedListTests,
imageTests, imageTests,
htmlTests,
figureTests, figureTests,
codeTests, codeTests,
horizontalRuleTests, horizontalRuleTests,

Loading…
Cancel
Save