Huge rewrite - use megaparsec instead of readP
This commit is contained in:
279
src/MdToHTML.hs
279
src/MdToHTML.hs
@@ -4,24 +4,26 @@
|
|||||||
|
|
||||||
module MdToHTML where
|
module MdToHTML where
|
||||||
|
|
||||||
import Control.Applicative
|
import Control.Applicative hiding (many, some)
|
||||||
import Control.Monad
|
import Control.Monad
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import Data.List
|
import Data.List
|
||||||
import Data.Ord (comparing)
|
import Data.Ord (comparing)
|
||||||
|
import qualified Data.Text as T
|
||||||
|
import Data.Void
|
||||||
import Debug.Trace
|
import Debug.Trace
|
||||||
import Text.ParserCombinators.ReadP
|
import Text.Megaparsec
|
||||||
|
import Text.Megaparsec.Char
|
||||||
import Text.Printf
|
import Text.Printf
|
||||||
|
|
||||||
|
type Parser = Parsec Void T.Text
|
||||||
|
|
||||||
type HeaderLevel = Int
|
type HeaderLevel = Int
|
||||||
|
|
||||||
newtype URL = URL {getUrl :: String} deriving (Eq)
|
newtype URL = URL {getUrl :: String} deriving (Eq)
|
||||||
|
|
||||||
newtype ImgPath = ImgPath {getPath :: String} deriving (Eq)
|
newtype ImgPath = ImgPath {getPath :: String} deriving (Eq)
|
||||||
|
|
||||||
parseMany :: ReadP a -> ReadP [a]
|
|
||||||
parseMany = Text.ParserCombinators.ReadP.many
|
|
||||||
|
|
||||||
data MdToken
|
data MdToken
|
||||||
= Document [MdToken]
|
= Document [MdToken]
|
||||||
| Header HeaderLevel MdToken
|
| Header HeaderLevel MdToken
|
||||||
@@ -36,8 +38,8 @@ data MdToken
|
|||||||
| Code MdToken
|
| Code MdToken
|
||||||
| Codeblock String
|
| Codeblock String
|
||||||
| Link MdToken URL
|
| Link MdToken URL
|
||||||
| Image MdToken ImgPath
|
| Image MdToken URL
|
||||||
| Figure MdToken ImgPath
|
| Figure MdToken URL
|
||||||
| Bold MdToken
|
| Bold MdToken
|
||||||
| Italic MdToken
|
| Italic MdToken
|
||||||
| Strikethrough MdToken
|
| Strikethrough MdToken
|
||||||
@@ -48,7 +50,7 @@ data MdToken
|
|||||||
instance Show MdToken where
|
instance Show MdToken where
|
||||||
show (Document tokens) = concatMap show tokens
|
show (Document tokens) = concatMap show tokens
|
||||||
show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">"
|
show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">"
|
||||||
show (Para token) = "<p>" ++ show token ++ "</p>"
|
show (Para token) = "<p>" ++ show token ++ "</p>\n"
|
||||||
show (Line tokens) = concatMap show tokens
|
show (Line tokens) = concatMap show tokens
|
||||||
show Linebreak = "<br>"
|
show Linebreak = "<br>"
|
||||||
show SingleNewline = " "
|
show SingleNewline = " "
|
||||||
@@ -59,8 +61,8 @@ instance Show MdToken where
|
|||||||
show (Code code) = "<code>" ++ show code ++ "</code>"
|
show (Code code) = "<code>" ++ show code ++ "</code>"
|
||||||
show (Codeblock code) = show code
|
show (Codeblock code) = show code
|
||||||
show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
|
show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
|
||||||
show (Image txt imgPath) = "<img src=\"" ++ getPath imgPath ++ "\"" ++ " alt=\"" ++ show txt ++ "\" />"
|
show (Image txt url) = "<img src=\"" ++ getUrl url ++ "\"" ++ " alt=\"" ++ show txt ++ "\" />"
|
||||||
show (Figure txt imgPath) = "<figure><img src=\"" ++ getPath imgPath ++ "\" alt=\"" ++ show txt ++ "\"/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
|
show (Figure txt url) = "<figure><img src=\"" ++ getUrl url ++ "\" alt=\"" ++ show txt ++ "\"/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
|
||||||
show (Bold token) = "<b>" ++ show token ++ "</b>"
|
show (Bold token) = "<b>" ++ show token ++ "</b>"
|
||||||
show (Italic token) = "<i>" ++ show token ++ "</i>"
|
show (Italic token) = "<i>" ++ show token ++ "</i>"
|
||||||
show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
|
show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
|
||||||
@@ -83,27 +85,26 @@ leftmostLongest xs =
|
|||||||
(x : xs) -> Just x
|
(x : xs) -> Just x
|
||||||
|
|
||||||
-- Get the first parse returned by readP_to_S that consumed the most input
|
-- Get the first parse returned by readP_to_S that consumed the most input
|
||||||
leftmostLongestParse :: (Monoid a) => ReadP a -> String -> (a, String)
|
leftmostLongestParse :: (Monoid a) => Parser a -> String -> a
|
||||||
leftmostLongestParse parser input =
|
leftmostLongestParse parser input =
|
||||||
let res = leftmostLongest $ readP_to_S parser input
|
case runParser parser "input" (T.pack input) of
|
||||||
in case res of
|
(Left a) -> mempty
|
||||||
Nothing -> (mempty, mempty)
|
(Right a) -> a
|
||||||
Just x -> x
|
|
||||||
|
|
||||||
specialChars = "\n\\`*_{}[]()<>#+|"
|
specialChars = ">\n\\`*_{}[]#+|"
|
||||||
|
|
||||||
escapableChars = "-~!." ++ specialChars
|
escapableChars = "-~!.$()" ++ specialChars
|
||||||
|
|
||||||
-- Makes a parser greedy. Instead of returning all possible parses, only the longest one is returned.
|
-- Makes a parser greedy. Instead of returning all possible parses, only the longest one is returned.
|
||||||
greedyParse :: ReadP a -> ReadP [a]
|
greedyParse :: Parser a -> Parser [a]
|
||||||
greedyParse parser = do
|
greedyParse parser = do
|
||||||
greedyParse1 parser <++ return []
|
greedyParse1 parser <|> return []
|
||||||
|
|
||||||
-- Like greedyParse, but the parser must succeed atleast once.
|
-- Like greedyParse, but the parser must succeed atleast once.
|
||||||
greedyParse1 :: ReadP a -> ReadP [a]
|
greedyParse1 :: Parser a -> Parser [a]
|
||||||
greedyParse1 parser = do
|
greedyParse1 parser = do
|
||||||
parsed1 <- parser
|
parsed1 <- parser
|
||||||
parsed2 <- greedyParse1 parser <++ return []
|
parsed2 <- greedyParse1 parser <|> return []
|
||||||
return (parsed1 : parsed2)
|
return (parsed1 : parsed2)
|
||||||
|
|
||||||
prepend :: [a] -> [a] -> [a]
|
prepend :: [a] -> [a] -> [a]
|
||||||
@@ -113,122 +114,130 @@ append :: [a] -> [a] -> [a]
|
|||||||
append x1 x2 = x2 ++ x1
|
append x1 x2 = x2 ++ x1
|
||||||
|
|
||||||
-- Parse until EOL or EOF
|
-- Parse until EOL or EOF
|
||||||
parseTillEol :: ReadP String
|
parseTillEol :: Parser String
|
||||||
parseTillEol = manyTill get (void (char '\n') <++ eof)
|
parseTillEol = manyTill anySingle (void (char '\n') <|> eof)
|
||||||
|
|
||||||
-- Takes a list of parsers. Returns a parser that will try them in
|
-- Takes a list of parsers. Returns a parser that will try them in
|
||||||
-- order, moving to the next one only if the current one fails.
|
-- order, moving to the next one only if the current one fails.
|
||||||
fallthroughParser :: [ReadP a] -> ReadP a
|
fallthroughParser :: [Parser a] -> Parser a
|
||||||
fallthroughParser [x] = x
|
fallthroughParser [x] = x
|
||||||
fallthroughParser (x : xs) = x <++ fallthroughParser xs
|
fallthroughParser (x : xs) = try x <|> fallthroughParser xs
|
||||||
|
|
||||||
|
escapeChar :: Char -> String
|
||||||
|
escapeChar '>' = ">"
|
||||||
|
escapeChar '<' = "<"
|
||||||
|
escapeChar '&' = "&"
|
||||||
|
escapeChar x = [x]
|
||||||
|
|
||||||
|
htmlEscapeChars :: T.Text -> T.Text
|
||||||
|
htmlEscapeChars = T.concatMap (T.pack . escapeChar)
|
||||||
|
|
||||||
-- ---------------
|
-- ---------------
|
||||||
|
|
||||||
-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
|
-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
|
||||||
parseHeader :: ReadP MdToken
|
parseHeader :: Parser MdToken
|
||||||
parseHeader = do
|
parseHeader = do
|
||||||
skipSpaces
|
space
|
||||||
headers <- munch1 (== '#')
|
headers <- greedyParse1 (char '#')
|
||||||
when
|
when
|
||||||
(length headers > 6)
|
(length headers > 6)
|
||||||
pfail
|
empty
|
||||||
skipSpaces
|
space
|
||||||
parsedText <- manyTill parseLineToken (void (char '\n') <++ eof)
|
parsedText <- manyTill parseLineToken (void (char '\n') <|> eof)
|
||||||
greedyParse (char '\n')
|
greedyParse (char '\n')
|
||||||
return (Header (length headers) (Line parsedText))
|
return (Header (length headers) (Line parsedText))
|
||||||
|
|
||||||
|
asteriskBold = T.pack "**"
|
||||||
|
|
||||||
|
underscoreBold = T.pack "__"
|
||||||
|
|
||||||
-- Parse bold text
|
-- Parse bold text
|
||||||
parseBold :: ReadP MdToken
|
parseBold :: Parser MdToken
|
||||||
parseBold = parseBoldWith "**" <|> parseBoldWith "__"
|
parseBold = parseBoldWith asteriskBold <|> parseBoldWith underscoreBold
|
||||||
where
|
where
|
||||||
parseBoldWith delim = do
|
parseBoldWith delim = do
|
||||||
string delim
|
string delim
|
||||||
inside <- greedyParse1 parseLineToken
|
inside <- someTill parseLineToken $ string delim
|
||||||
string delim
|
|
||||||
return (Bold (Line inside))
|
return (Bold (Line inside))
|
||||||
|
|
||||||
-- Parse italic text
|
-- Parse italic text
|
||||||
parseItalic :: ReadP MdToken
|
parseItalic :: Parser MdToken
|
||||||
parseItalic = parseItalicWith "*" <|> parseItalicWith "_"
|
parseItalic = parseItalicWith '*' <|> parseItalicWith '_'
|
||||||
where
|
where
|
||||||
parseItalicWith delim = do
|
parseItalicWith delim = do
|
||||||
string delim
|
char delim
|
||||||
inside <- greedyParse1 parseLineToken
|
inside <- someTill parseLineToken (char delim)
|
||||||
string delim
|
|
||||||
return (Italic (Line inside))
|
return (Italic (Line inside))
|
||||||
|
|
||||||
-- Parse strikethrough text
|
-- Parse strikethrough text
|
||||||
parseStrikethrough :: ReadP MdToken
|
parseStrikethrough :: Parser MdToken
|
||||||
parseStrikethrough = do
|
parseStrikethrough = do
|
||||||
string "~~"
|
string (T.pack "~~")
|
||||||
inside <- many1 parseLineToken
|
inside <- someTill parseLineToken $ string (T.pack "~~")
|
||||||
string "~~"
|
|
||||||
return (Strikethrough (Line inside))
|
return (Strikethrough (Line inside))
|
||||||
|
|
||||||
-- Parse code
|
-- Parse code
|
||||||
parseCode :: ReadP MdToken
|
parseCode :: Parser MdToken
|
||||||
parseCode = do
|
parseCode = do
|
||||||
string "`"
|
char '`'
|
||||||
inside <- many1 get
|
inside <- manyTill (satisfy (/= '\n')) (char '`')
|
||||||
string "`"
|
return (Code (Unit (concatMap escapeChar inside)))
|
||||||
return (Code (Unit inside))
|
|
||||||
|
|
||||||
-- Parse a link
|
-- Parse a link
|
||||||
parseLink :: ReadP MdToken
|
parseLink :: Parser MdToken
|
||||||
parseLink = do
|
parseLink = do
|
||||||
linkText <- between (string "[") (string "]") (many1 get)
|
char '['
|
||||||
linkURL <- between (string "(") (string ")") (many1 get)
|
linkText <- someTill parseLineToken (char ']')
|
||||||
let parsedLinkText = fst $ leftmostLongestParse parseLine linkText
|
char '('
|
||||||
return $ Link parsedLinkText (URL linkURL)
|
linkURL <- manyTill anySingle (char ')')
|
||||||
|
return $ Link (Line linkText) (URL linkURL)
|
||||||
|
|
||||||
-- Parse a linebreak character
|
-- Parse a linebreak character
|
||||||
parseLinebreak :: ReadP MdToken
|
parseLinebreak :: Parser MdToken
|
||||||
parseLinebreak = do
|
parseLinebreak = do
|
||||||
char ' '
|
char ' '
|
||||||
many1 (char ' ')
|
some (char ' ')
|
||||||
char '\n'
|
char '\n'
|
||||||
return Linebreak
|
return Linebreak
|
||||||
|
|
||||||
parseSingleNewline :: ReadP MdToken
|
parseSingleNewline :: Parser MdToken
|
||||||
parseSingleNewline = do
|
parseSingleNewline = do
|
||||||
char '\n'
|
char '\n'
|
||||||
remaining <- look
|
remaining <- getInput
|
||||||
case remaining of
|
case T.unpack remaining of
|
||||||
[] -> return $ Unit ""
|
[] -> return $ Unit ""
|
||||||
_ -> return SingleNewline
|
_ -> return SingleNewline
|
||||||
|
|
||||||
parseImage :: ReadP MdToken
|
parseImage :: Parser MdToken
|
||||||
parseImage = do
|
parseImage = do
|
||||||
char '!'
|
char '!'
|
||||||
char '['
|
link <- parseLink
|
||||||
altText <- many1 (parseEscapedChar <++ parseUnit)
|
case link of
|
||||||
char ']'
|
Link text path -> return $ Image text path
|
||||||
char '('
|
_ -> empty -- This should never be reached
|
||||||
path <- many1 get
|
|
||||||
char ')'
|
|
||||||
return $ Image (Line altText) (ImgPath path)
|
|
||||||
|
|
||||||
parseFigure = do
|
parseFigure = do
|
||||||
img <- parseImage
|
img <- parseImage
|
||||||
void (string "\n\n") <++ eof
|
void (string doubleNewlineText) <|> eof
|
||||||
case img of
|
case img of
|
||||||
Image text path -> return $ Figure text path
|
Image text path -> return $ Figure text path
|
||||||
_ -> return img
|
_ -> return img
|
||||||
|
|
||||||
-- Parse an escaped character
|
-- Parse an escaped character
|
||||||
parseEscapedChar :: ReadP MdToken
|
parseEscapedChar :: Parser MdToken
|
||||||
parseEscapedChar = do
|
parseEscapedChar = do
|
||||||
char '\\'
|
char '\\'
|
||||||
escapedChar <- choice (map char escapableChars) -- Parse any of the special chars.
|
escapedChar <- choice (map char escapableChars) -- Parse any of the special chars.
|
||||||
return (Unit [escapedChar])
|
return (Unit [escapedChar])
|
||||||
|
|
||||||
-- Parse a character as a Unit.
|
-- Parse a character as a Unit.
|
||||||
parseUnit :: ReadP MdToken
|
parseUnit :: Parser MdToken
|
||||||
parseUnit = do
|
parseUnit = do
|
||||||
text <- satisfy (`notElem` specialChars)
|
-- text <- satisfy (`notElem` specialChars)
|
||||||
|
text <- anySingle
|
||||||
return (Unit [text])
|
return (Unit [text])
|
||||||
|
|
||||||
lineParsers :: [ReadP MdToken]
|
lineParsers :: [Parser MdToken]
|
||||||
lineParsers =
|
lineParsers =
|
||||||
[ parseLinebreak,
|
[ parseLinebreak,
|
||||||
parseSingleNewline,
|
parseSingleNewline,
|
||||||
@@ -242,7 +251,7 @@ lineParsers =
|
|||||||
parseUnit
|
parseUnit
|
||||||
] -- A 'line' doesn't include a 'header'
|
] -- A 'line' doesn't include a 'header'
|
||||||
|
|
||||||
listLineParsers :: [ReadP MdToken]
|
listLineParsers :: [Parser MdToken]
|
||||||
listLineParsers =
|
listLineParsers =
|
||||||
[ parseLinebreak,
|
[ parseLinebreak,
|
||||||
parseEscapedChar,
|
parseEscapedChar,
|
||||||
@@ -256,84 +265,85 @@ listLineParsers =
|
|||||||
] -- A list line cannot contain newlines.
|
] -- A list line cannot contain newlines.
|
||||||
|
|
||||||
-- List of all parsers
|
-- List of all parsers
|
||||||
allParsers :: [ReadP MdToken]
|
allParsers :: [Parser MdToken]
|
||||||
allParsers = parseHeader : lineParsers
|
allParsers = parseHeader : lineParsers
|
||||||
|
|
||||||
-- Parse any of the line tokens.
|
-- Parse any of the line tokens.
|
||||||
parseLineToken :: ReadP MdToken
|
parseLineToken :: Parser MdToken
|
||||||
parseLineToken = fallthroughParser lineParsers
|
parseLineToken = fallthroughParser lineParsers
|
||||||
|
|
||||||
-- Parse any of the list line tokens.
|
-- Parse any of the list line tokens.
|
||||||
parseListLineToken :: ReadP MdToken
|
parseListLineToken :: Parser MdToken
|
||||||
parseListLineToken = fallthroughParser listLineParsers
|
parseListLineToken = fallthroughParser listLineParsers
|
||||||
|
|
||||||
-- Parse a line, consisting of one or more tokens.
|
-- Parse a line, consisting of one or more tokens.
|
||||||
parseLine :: ReadP MdToken
|
parseLine :: Parser MdToken
|
||||||
parseLine = do
|
parseLine = do
|
||||||
skipSpaces
|
space
|
||||||
-- Fail if we have reached the end of the document.
|
-- Fail if we have reached the end of the document.
|
||||||
parsed <- manyTill parseLineToken eof
|
parsed <- manyTill parseLineToken eof
|
||||||
return (Line parsed)
|
return (Line parsed)
|
||||||
|
|
||||||
-- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
|
-- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
|
||||||
parsePara :: ReadP MdToken
|
parsePara :: Parser MdToken
|
||||||
parsePara = do
|
parsePara = do
|
||||||
parseMany (char '\n')
|
space
|
||||||
-- text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline.
|
-- text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline.
|
||||||
-- string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF.
|
-- string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF.
|
||||||
text <- manyTill get (string "\n\n" <|> (eof >> return ""))
|
parsedText <- someTill parseLineToken (try paraEnding)
|
||||||
when (null text) pfail
|
many (char '\n')
|
||||||
let parsedText = fst $ leftmostLongestParse parseLine text -- Parse a line
|
return (Para (Line parsedText))
|
||||||
parseMany (char '\n')
|
where
|
||||||
return (Para parsedText)
|
paraEnding = void (char '\n' *> (char '\n' <|> lookAhead (char '>'))) <|> eof
|
||||||
|
|
||||||
-- Parse a line starting with '>', return the line except for the '>'.
|
-- Parse a line starting with '>', return the line except for the '>'.
|
||||||
parseQuotedLine :: ReadP String
|
parseQuotedLine :: Parser String
|
||||||
parseQuotedLine = do
|
parseQuotedLine = do
|
||||||
char '>'
|
char '>'
|
||||||
greedyParse (char ' ' +++ char '\t')
|
many (char ' ' <|> char '\t')
|
||||||
restOfLine <- munch (/= '\n')
|
restOfLine <- many (satisfy (/= '\n'))
|
||||||
Text.ParserCombinators.ReadP.optional (char '\n') >> return ""
|
void (char '\n') <|> eof
|
||||||
return restOfLine
|
return restOfLine
|
||||||
|
|
||||||
-- Parse many 'quoted lines' until I see a non-quoted line.
|
-- Parse many 'quoted lines' until I see a non-quoted line.
|
||||||
parseQuotedLines :: ReadP [String]
|
parseQuotedLines :: Parser [String]
|
||||||
parseQuotedLines =
|
parseQuotedLines = some parseQuotedLine
|
||||||
greedyParse1 $ do
|
|
||||||
look >>= \line ->
|
-- some $ do
|
||||||
case line of
|
-- getInput >>= \line ->
|
||||||
('>' : _) -> parseQuotedLine
|
-- case T.unpack line of
|
||||||
_ -> pfail
|
-- ('>' : _) -> parseQuotedLine
|
||||||
|
-- _ -> empty
|
||||||
|
|
||||||
-- Parse a blockquote, which is a greater-than sign followed by a paragraph.
|
-- Parse a blockquote, which is a greater-than sign followed by a paragraph.
|
||||||
parseBlockquote :: ReadP MdToken
|
parseBlockquote :: Parser MdToken
|
||||||
parseBlockquote = do
|
parseBlockquote = do
|
||||||
quotedLines <- parseQuotedLines
|
quotedLines <- parseQuotedLines
|
||||||
-- remaining <- look
|
-- remaining <- look
|
||||||
-- let quotedLines = fst $ leftmostLongestParse parseQuotedLines remaining
|
-- let quotedLines = fst $ leftmostLongestParse parseQuotedLines remaining
|
||||||
-- string (init $ unlines quotedLines)
|
-- string (init $ unlines quotedLines)
|
||||||
let parsedQuotedLines = fst $ leftmostLongestParse (many1 (parseBlockquote <++ parsePara)) (init $ unlines quotedLines) -- unlines joins the lines together with a newline, and adds a trailing newline. init removes the trailing newline.
|
let parsedQuotedLines = leftmostLongestParse (some (parseBlockquote <|> parsePara)) (init $ unlines quotedLines) -- unlines joins the lines together with a newline, and adds a trailing newline. init removes the trailing newline.
|
||||||
return (Blockquote parsedQuotedLines)
|
return (Blockquote parsedQuotedLines)
|
||||||
|
|
||||||
-- Parse a nested list item.
|
-- Parse a nested list item.
|
||||||
parseListNested :: ReadP MdToken
|
parseListNested :: Parser MdToken
|
||||||
parseListNested = do
|
parseListNested = do
|
||||||
let firstCharParser = string " " <++ string "\t"
|
let firstCharParser = string (T.pack " ") <|> string (T.pack "\t")
|
||||||
let restOfLineParser = manyTill get (void (char '\n') <++ eof)
|
let restOfLineParser = manyTill anySingle (void (char '\n') <|> eof)
|
||||||
lines <- greedyParse1 (firstCharParser *> restOfLineParser)
|
lines <- greedyParse1 (firstCharParser *> restOfLineParser)
|
||||||
let linesParsed = fst $ leftmostLongestParse (parseUnorderedList <++ parseOrderedList) (init $ unlines lines)
|
let linesParsed = leftmostLongestParse (parseUnorderedList <|> parseOrderedList) (init $ unlines lines)
|
||||||
when (null (show linesParsed)) pfail
|
when (null (show linesParsed)) empty
|
||||||
return linesParsed
|
return linesParsed
|
||||||
|
|
||||||
-- Parse an unordered list line item.
|
-- Parse an unordered list line item.
|
||||||
parseUListLineItem :: ReadP MdToken
|
parseUListLineItem :: Parser MdToken
|
||||||
parseUListLineItem = do
|
parseUListLineItem = do
|
||||||
firstChar <- choice (map char ['*', '+', '-'])
|
firstChar <- choice (map char ['*', '+', '-'])
|
||||||
char ' ' -- At least one space between list indicator and list text.
|
char ' ' -- At least one space between list indicator and list text.
|
||||||
parseListLineItemCommon
|
parseListLineItemCommon
|
||||||
|
|
||||||
-- Parse an ordered list line item.
|
-- Parse an ordered list line item.
|
||||||
parseOListLineItem :: ReadP MdToken
|
parseOListLineItem :: Parser MdToken
|
||||||
parseOListLineItem = do
|
parseOListLineItem = do
|
||||||
num <- greedyParse1 (satisfy isDigit)
|
num <- greedyParse1 (satisfy isDigit)
|
||||||
char '.'
|
char '.'
|
||||||
@@ -341,23 +351,22 @@ parseOListLineItem = do
|
|||||||
parseListLineItemCommon
|
parseListLineItemCommon
|
||||||
|
|
||||||
-- Common code for parsing list line items
|
-- Common code for parsing list line items
|
||||||
parseListLineItemCommon :: ReadP MdToken
|
parseListLineItemCommon :: Parser MdToken
|
||||||
parseListLineItemCommon = do
|
parseListLineItemCommon = do
|
||||||
skipSpaces
|
space
|
||||||
restOfLine <- many1 parseListLineToken
|
restOfLine <- manyTill parseListLineToken (void (char '\n') <|> eof)
|
||||||
void (char '\n') <++ eof
|
nestedList <- parseListNested <|> return (Unit "")
|
||||||
nestedList <- parseListNested <++ return (Unit "")
|
|
||||||
return $ Line [Line restOfLine, nestedList]
|
return $ Line [Line restOfLine, nestedList]
|
||||||
|
|
||||||
-- Parse an unordered list paragraph item.
|
-- Parse an unordered list paragraph item.
|
||||||
parseUListParaItem :: ReadP MdToken
|
parseUListParaItem :: Parser MdToken
|
||||||
parseUListParaItem = do
|
parseUListParaItem = do
|
||||||
firstLine <- parseUListLineItem
|
firstLine <- parseUListLineItem
|
||||||
res <- parseListParaItemCommon
|
res <- parseListParaItemCommon
|
||||||
return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
|
return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
|
||||||
|
|
||||||
-- Parse an unordered list paragraph item.
|
-- Parse an unordered list paragraph item.
|
||||||
parseOListParaItem :: ReadP MdToken
|
parseOListParaItem :: Parser MdToken
|
||||||
parseOListParaItem = do
|
parseOListParaItem = do
|
||||||
firstLine <- parseOListLineItem
|
firstLine <- parseOListLineItem
|
||||||
res <- parseListParaItemCommon
|
res <- parseListParaItemCommon
|
||||||
@@ -367,48 +376,54 @@ parseOListParaItem = do
|
|||||||
-- A list paragraph item is defined as a line item, followed by an empty line, followed by one or more
|
-- A list paragraph item is defined as a line item, followed by an empty line, followed by one or more
|
||||||
-- lines indented by a space or tab.
|
-- lines indented by a space or tab.
|
||||||
-- A list paragraph item can also be a blockquote.
|
-- A list paragraph item can also be a blockquote.
|
||||||
parseListParaItemCommon :: ReadP [MdToken]
|
parseListParaItemCommon :: Parser [MdToken]
|
||||||
parseListParaItemCommon = do
|
parseListParaItemCommon = do
|
||||||
char '\n'
|
char '\n'
|
||||||
lines <- greedyParse1 ((string " " <|> string "\t") *> parseTillEol)
|
lines <- greedyParse1 ((string (T.pack " ") <|> string (T.pack "\t")) *> parseTillEol)
|
||||||
let res = fst $ leftmostLongestParse (greedyParse1 parseBlockquote <++ greedyParse1 parsePara) (init $ unlines lines)
|
let res = leftmostLongestParse (greedyParse1 parseBlockquote <|> greedyParse1 parsePara) (init $ unlines lines)
|
||||||
char '\n'
|
char '\n'
|
||||||
return res -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
|
return res -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
|
||||||
|
|
||||||
-- Parse an unordered list item, which can be a line item or another list.
|
-- Parse an unordered list item, which can be a line item or another list.
|
||||||
parseUListItem :: ReadP MdToken
|
parseUListItem :: Parser MdToken
|
||||||
parseUListItem = parseUListParaItem <++ parseUListLineItem
|
parseUListItem = try parseUListParaItem <|> parseUListLineItem
|
||||||
|
|
||||||
-- Parse an unordered list.
|
-- Parse an unordered list.
|
||||||
parseUnorderedList :: ReadP MdToken
|
parseUnorderedList :: Parser MdToken
|
||||||
parseUnorderedList = do
|
parseUnorderedList = do
|
||||||
lineItems <- greedyParse1 parseUListItem
|
lineItems <- some parseUListItem
|
||||||
void (char '\n') <++ eof -- A list must end in an extra newline or eof
|
void (char '\n') <|> eof -- A list must end in an extra newline or eof
|
||||||
return $ UnordList lineItems
|
return $ UnordList lineItems
|
||||||
|
|
||||||
-- --------
|
-- --------
|
||||||
|
|
||||||
parseOListItem :: ReadP MdToken
|
parseOListItem :: Parser MdToken
|
||||||
parseOListItem = parseOListParaItem <++ parseOListLineItem
|
parseOListItem = try parseOListParaItem <|> parseOListLineItem
|
||||||
|
|
||||||
-- Parses the first element of an ordered list, which must start with '1.'
|
-- Parses the first element of an ordered list, which must start with '1.'
|
||||||
parseFirstOListItem :: ReadP MdToken
|
parseFirstOListItem :: Parser MdToken
|
||||||
parseFirstOListItem = do
|
parseFirstOListItem = do
|
||||||
remaining <- look
|
remaining <- getInput
|
||||||
when (take 2 remaining /= "1.") pfail
|
when (take 2 (T.unpack remaining) /= "1.") empty
|
||||||
parseOListLineItem
|
parseOListLineItem
|
||||||
|
|
||||||
parseOrderedList :: ReadP MdToken
|
parseOrderedList :: Parser MdToken
|
||||||
parseOrderedList = do
|
parseOrderedList = do
|
||||||
firstLine <- parseFirstOListItem
|
firstLine <- parseFirstOListItem
|
||||||
lineItems <- greedyParse1 parseOListItem
|
lineItems <- some parseOListItem
|
||||||
void (char '\n') <++ eof
|
void (char '\n') <|> eof
|
||||||
return $ OrdList (firstLine : lineItems)
|
return $ OrdList (firstLine : lineItems)
|
||||||
|
|
||||||
parseHorizontalRule :: ReadP MdToken
|
horizontalRuleText :: T.Text
|
||||||
parseHorizontalRule = string "---" *> (void (string "\n\n") <++ eof) *> return HorizontalRule
|
horizontalRuleText = T.pack "---"
|
||||||
|
|
||||||
documentParsers :: [ReadP MdToken]
|
doubleNewlineText :: T.Text
|
||||||
|
doubleNewlineText = T.pack "\n\n"
|
||||||
|
|
||||||
|
parseHorizontalRule :: Parser MdToken
|
||||||
|
parseHorizontalRule = string horizontalRuleText *> (void (string doubleNewlineText) <|> eof) *> return HorizontalRule
|
||||||
|
|
||||||
|
documentParsers :: [Parser MdToken]
|
||||||
documentParsers =
|
documentParsers =
|
||||||
[ parseHorizontalRule,
|
[ parseHorizontalRule,
|
||||||
parseHeader,
|
parseHeader,
|
||||||
@@ -420,7 +435,7 @@ documentParsers =
|
|||||||
]
|
]
|
||||||
|
|
||||||
-- Parse a document, which is multiple paragraphs.
|
-- Parse a document, which is multiple paragraphs.
|
||||||
parseDocument :: ReadP MdToken
|
parseDocument :: Parser MdToken
|
||||||
parseDocument = do
|
parseDocument = do
|
||||||
res <- manyTill (fallthroughParser documentParsers) eof
|
res <- manyTill (fallthroughParser documentParsers) eof
|
||||||
return (Document res)
|
return (Document res)
|
||||||
|
@@ -7,7 +7,7 @@ check_equal :: String -> String -> String -> Test
|
|||||||
check_equal desc expected actual = TestCase (assertEqual desc expected actual)
|
check_equal desc expected actual = TestCase (assertEqual desc expected actual)
|
||||||
|
|
||||||
convert :: String -> String
|
convert :: String -> String
|
||||||
convert md = show . fst $ leftmostLongestParse parseDocument md
|
convert md = show $ leftmostLongestParse parseDocument md
|
||||||
|
|
||||||
headerTests =
|
headerTests =
|
||||||
TestList
|
TestList
|
||||||
@@ -24,8 +24,8 @@ boldTests =
|
|||||||
[ check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
|
[ check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
|
||||||
check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
|
check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
|
||||||
check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day"),
|
check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day"),
|
||||||
check_equal "Should convert nested bold and italic" "<p><b>Bold then <i>Italic</i></b></p>" (convert "**Bold then *Italic***"),
|
check_equal "Should convert nested bold and italic" "<p><b>Bold then <i>Italic</i></b></p>" (convert "**Bold then _Italic_**"),
|
||||||
check_equal "Should convert nested bold and italic" "<p><i>Italic then <b>Bold</b></i></p>" (convert "*Italic then **Bold***")
|
check_equal "Should convert nested bold and italic" "<p><i>Italic then <b>Bold</b></i></p>" (convert "*Italic then __Bold__*")
|
||||||
]
|
]
|
||||||
|
|
||||||
strikethroughTests =
|
strikethroughTests =
|
||||||
@@ -93,11 +93,15 @@ orderedListTests =
|
|||||||
check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3")
|
check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3")
|
||||||
]
|
]
|
||||||
|
|
||||||
|
htmlTests =
|
||||||
|
TestList
|
||||||
|
[check_equal "Convert HTML element" "<p><center>a</center></p>" (convert "<center>a</center>")]
|
||||||
|
|
||||||
codeTests =
|
codeTests =
|
||||||
TestList
|
TestList
|
||||||
[ check_equal "Code by itself" "<p><code>Hello world!</code></p>" (convert "`Hello world!`"),
|
[ check_equal "Code by itself" "<p><code>Hello world!</code></p>" (convert "`Hello world!`"),
|
||||||
check_equal "Code in a paragraph" "<p>The following <code>text</code> is code</p>" (convert "The following `text` is code"),
|
check_equal "Code in a paragraph" "<p>The following <code>text</code> is code</p>" (convert "The following `text` is code"),
|
||||||
check_equal "Code across paragraphs (shouldn't work" "<p></p><p></p>" (convert "`Incomplete\n\nCode`") -- At the moment, this is just treated as a syntax error, so nothing is rendered.
|
check_equal "Code across paragraphs (shouldn't work)" "<p>`Incomplete</p><p>Code`</p>" (convert "`Incomplete\n\nCode`") -- At the moment, this is just treated as a syntax error, so nothing is rendered.
|
||||||
]
|
]
|
||||||
|
|
||||||
imageTests =
|
imageTests =
|
||||||
@@ -149,6 +153,7 @@ tests =
|
|||||||
unorderedListTests,
|
unorderedListTests,
|
||||||
orderedListTests,
|
orderedListTests,
|
||||||
imageTests,
|
imageTests,
|
||||||
|
htmlTests,
|
||||||
figureTests,
|
figureTests,
|
||||||
codeTests,
|
codeTests,
|
||||||
horizontalRuleTests,
|
horizontalRuleTests,
|
||||||
|
Reference in New Issue
Block a user