Compare commits

...

31 Commits

Author SHA1 Message Date
Aadhavan Srinivasan 7d45b1123f Renamed md-to-html-runner to mdtoh 2 weeks ago
Aadhavan Srinivasan 9627abcd12 Updated test 3 weeks ago
Aadhavan Srinivasan 82277e9ea8 Only add newlines for linebreak when pretty printing 3 weeks ago
Aadhavan Srinivasan d074b0131c Parse linebreaks as a backslash before a newline 3 weeks ago
Aadhavan Srinivasan 57cb3e68fa Import module for word wrapping; add package to cabal file 3 weeks ago
Aadhavan Srinivasan 4e9f84c2bb Add function to pretty print; commented out my word wrap and use a
built-in one instead
3 weeks ago
Aadhavan Srinivasan e025614324 Print extra newline if output text doesn't include a newline 3 weeks ago
Aadhavan Srinivasan e711444066 Add more packages to cabal file 3 weeks ago
Aadhavan Srinivasan 6b99a1835d Created a separate parser list for all parsers (except the unit parser
is replaced with the non-newline unit parser); use that parser when
parsing list lines
3 weeks ago
Aadhavan Srinivasan 04167e0f96 Parse CSS classes in image and figure 3 weeks ago
Aadhavan Srinivasan 0528e813c5 Parser for CSS classes 3 weeks ago
Aadhavan Srinivasan b1b99189c9 Link tect can be empty; inline code cannot be empty and can have nested
backticks; created a unit parser for all characters except newline
3 weeks ago
Aadhavan Srinivasan ade3768e29 Try and backtrack 3 weeks ago
Aadhavan Srinivasan fd6d39ecd6 Parse space at beginning of list 3 weeks ago
Aadhavan Srinivasan 0f04342867 More trying and backtracking; parse and discard extraneous spaces at
beginning of list
3 weeks ago
Aadhavan Srinivasan 80ef93bbc9 Try parsing an ordered list item, backtrack if not possible 3 weeks ago
Aadhavan Srinivasan b73d4131b6 Added support for tables and codeblocks
Defined the types, defined 'show', created the parsers, added them to
parser list
3 weeks ago
Aadhavan Srinivasan c48b8c5ae8 Images and figures now support CSS classes 3 weeks ago
Aadhavan Srinivasan cf4282b26e More imports 3 weeks ago
Aadhavan Srinivasan 7b40d6fe7c Imports 3 weeks ago
Aadhavan Srinivasan c4255d4578 Added a test for a list with just one item 3 weeks ago
Aadhavan Srinivasan dcbbff13cb Spacing change 3 weeks ago
Aadhavan Srinivasan 592fad2b46 Added tests for tables 3 weeks ago
Aadhavan Srinivasan b8ba27f240 Strip newlines when comparing in test 4 weeks ago
Aadhavan Srinivasan bb08b40512 Replaced nested bold with asterisks, with asterisks and underscores 4 weeks ago
Aadhavan Srinivasan 93548a4533 Never mind, doesn't seem to work well 4 weeks ago
Aadhavan Srinivasan 160cb0edeb Trying to get nested bold and italic to work 4 weeks ago
Aadhavan Srinivasan 2893fa25e6 Include new packages 4 weeks ago
Aadhavan Srinivasan 324e5da82d Use new definition for lefmostLongestParse 4 weeks ago
Aadhavan Srinivasan 05e5548aa9 Huge rewrite - use megaparsec instead of readP 4 weeks ago
Aadhavan Srinivasan 1915628a2b Used 'in-order' parsing for headers, instead of leftmostLongestParse 1 month ago

@ -23,5 +23,8 @@ main = do
fileContents <- case args of
[] -> getContents
x : _ -> readFile x
let res = fst $ leftmostLongestParse parseDocument fileContents
print res
let res = leftmostLongestParse parseDocument fileContents
let toPrint = prettyPrint res
case reverse toPrint of
'\n' : _ -> putStr toPrint
_ -> putStrLn toPrint

@ -58,9 +58,14 @@ library
exposed-modules: MdToHTML
other-modules: MdToHtmlTest
build-depends: base ^>=4.19.1.0,
HUnit
executable md-to-html-runner
HUnit,
megaparsec,
parser-combinators,
text,
MissingH,
word-wrap
executable mdtoh
-- Import common warning flags.
import: warnings

@ -4,24 +4,31 @@
module MdToHTML where
import Control.Applicative
import Control.Applicative hiding (many, some)
import Control.Monad
import Control.Monad.Combinators (count)
import Data.Char
import Data.List
import Data.Ord (comparing)
import Data.String.Utils
import qualified Data.Text as T
import Data.Void
import Debug.Trace
import Text.ParserCombinators.ReadP
import Text.Megaparsec
import Text.Megaparsec.Char
import Text.Printf
import Text.Wrap
type Parser = Parsec Void T.Text
type HeaderLevel = Int
type CssClass = String
newtype URL = URL {getUrl :: String} deriving (Eq)
newtype ImgPath = ImgPath {getPath :: String} deriving (Eq)
parseMany :: ReadP a -> ReadP [a]
parseMany = Text.ParserCombinators.ReadP.many
data MdToken
= Document [MdToken]
| Header HeaderLevel MdToken
@ -34,10 +41,11 @@ data MdToken
| UnordList [MdToken]
| OrdList [MdToken]
| Code MdToken
| Codeblock String
| Table [[MdToken]]
| Codeblock MdToken
| Link MdToken URL
| Image MdToken ImgPath
| Figure MdToken ImgPath
| Image MdToken URL (Maybe [CssClass])
| Figure MdToken URL (Maybe [CssClass])
| Bold MdToken
| Italic MdToken
| Strikethrough MdToken
@ -50,22 +58,34 @@ instance Show MdToken where
show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">"
show (Para token) = "<p>" ++ show token ++ "</p>"
show (Line tokens) = concatMap show tokens
show Linebreak = "<br>"
show Linebreak = "<br />"
show SingleNewline = " "
show HorizontalRule = "<hr>"
show (Blockquote tokens) = "<blockquote>" ++ concatMap show tokens ++ "</blockquote>"
show (UnordList tokens) = "<ul>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ul>"
show (OrdList tokens) = "<ol>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ol>"
show (Code code) = "<code>" ++ show code ++ "</code>"
show (Codeblock code) = show code
show (Code code) = "<code>" ++ strip (show code) ++ "</code>"
show (Table (thead : tokenGrid)) = "<table><thead><tr>" ++ concatMap (\x -> "<th>" ++ rstrip (show x) ++ "</th>") thead ++ "</tr></thead>" ++ "<tbody>" ++ concatMap (\x -> "<tr>" ++ concatMap (\y -> "<td>" ++ rstrip (show y) ++ "</td>") x ++ "</tr>") tokenGrid ++ "</tbody></table>"
show (Codeblock code) = "<pre><code>" ++ show code ++ "</code></pre>"
show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
show (Image txt imgPath) = "<img src=\"" ++ getPath imgPath ++ "\"" ++ " alt=\"" ++ show txt ++ "\" />"
show (Figure txt imgPath) = "<figure><img src=\"" ++ getPath imgPath ++ "\" alt=\"" ++ show txt ++ "\"/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
show (Image txt url cssClasses) = "<img src=\"" ++ getUrl url ++ "\"" ++ " alt=\"" ++ show txt ++ "\"" ++ maybe "" (\classes -> " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/>"
show (Figure txt url cssClasses) = "<figure><img src=\"" ++ getUrl url ++ "\" alt=\"" ++ show txt ++ "\"" ++ maybe "" (\classes -> " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
show (Bold token) = "<b>" ++ show token ++ "</b>"
show (Italic token) = "<i>" ++ show token ++ "</i>"
show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
show (Unit unit) = printf "%s" unit
-- Pretty print the given token into a string.
-- This is the same as calling 'show' for most tokens, but is different for paragraphs and tables,
-- which have newlines inserted into them.
prettyPrint :: MdToken -> String
prettyPrint (Para token) = "<p>" ++ T.unpack (wrapText defaultWrapSettings 70 (T.pack $ prettyPrint token)) ++ "</p>\n"
prettyPrint (Table (thead : tokenGrid)) = "<table>\n<thead>\n<tr>\n" ++ concatMap (\x -> "<th>" ++ rstrip (prettyPrint x) ++ "</th>\n") thead ++ "</tr>\n</thead>\n" ++ "<tbody>\n" ++ concatMap (\x -> "<tr>\n" ++ concatMap (\y -> "<td>" ++ rstrip (prettyPrint y) ++ "</td>\n") x ++ "</tr>\n") tokenGrid ++ "</tbody>\n</table>\n"
prettyPrint Linebreak = "<br />\n"
prettyPrint (Line tokens) = concatMap prettyPrint tokens
prettyPrint (Document tokens) = concatMap prettyPrint tokens
prettyPrint token = show token
instance Semigroup MdToken where
a <> b = Document [a, b]
@ -83,27 +103,26 @@ leftmostLongest xs =
(x : xs) -> Just x
-- Get the first parse returned by readP_to_S that consumed the most input
leftmostLongestParse :: (Monoid a) => ReadP a -> String -> (a, String)
leftmostLongestParse :: (Monoid a) => Parser a -> String -> a
leftmostLongestParse parser input =
let res = leftmostLongest $ readP_to_S parser input
in case res of
Nothing -> (mempty, mempty)
Just x -> x
case runParser parser "input" (T.pack input) of
(Left a) -> mempty
(Right a) -> a
specialChars = "\n\\`*_{}[]()<>#+|"
specialChars = ">\n\\`*_{}[]#+|"
escapableChars = "-~!." ++ specialChars
escapableChars = "-~!.$()" ++ specialChars
-- Makes a parser greedy. Instead of returning all possible parses, only the longest one is returned.
greedyParse :: ReadP a -> ReadP [a]
greedyParse :: Parser a -> Parser [a]
greedyParse parser = do
greedyParse1 parser <++ return []
greedyParse1 parser <|> return []
-- Like greedyParse, but the parser must succeed atleast once.
greedyParse1 :: ReadP a -> ReadP [a]
greedyParse1 :: Parser a -> Parser [a]
greedyParse1 parser = do
parsed1 <- parser
parsed2 <- greedyParse1 parser <++ return []
parsed2 <- greedyParse1 parser <|> return []
return (parsed1 : parsed2)
prepend :: [a] -> [a] -> [a]
@ -113,124 +132,190 @@ append :: [a] -> [a] -> [a]
append x1 x2 = x2 ++ x1
-- Parse until EOL or EOF
parseTillEol :: ReadP String
parseTillEol = manyTill get (void (char '\n') <++ eof)
parseTillEol :: Parser String
parseTillEol = manyTill anySingle (void (char '\n') <|> eof)
-- Takes a list of parsers. Returns a parser that will try them in
-- order, moving to the next one only if the current one fails.
fallthroughParser :: [ReadP a] -> ReadP a
fallthroughParser :: [Parser a] -> Parser a
fallthroughParser [x] = x
fallthroughParser (x : xs) = x <++ fallthroughParser xs
fallthroughParser (x : xs) = try x <|> fallthroughParser xs
escapeChar :: Char -> String
escapeChar '>' = "&gt;"
escapeChar '<' = "&lt;"
escapeChar '&' = "&amp;"
escapeChar x = [x]
htmlEscapeChars :: T.Text -> T.Text
htmlEscapeChars = T.concatMap (T.pack . escapeChar)
-- -- Wraps a list of words after (at most) the given number of characters, trying to prevent word-breaks
-- wordwrap :: Int -> String -> String
-- wordwrap wraplength str = if (length str) < wraplength
-- then str
-- else
-- let spaceIndex = lastgtSpaceIndex 0 (takeRev (length str) - wraplength str)
--
-- where
-- takeRev n = (reverse . take n . reverse)
-- lastSpaceIndex counter str = case str of
-- [] -> counter
-- x:xs -> if (isSpace x) counter else lastSpaceIndex counter+1 xs
-- ---------------
-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
parseHeader :: ReadP MdToken
parseHeader :: Parser MdToken
parseHeader = do
skipSpaces
headers <- munch1 (== '#')
space
headers <- greedyParse1 (char '#')
when
(length headers > 6)
pfail
skipSpaces
text <- munch1 (/= '\n')
-- Text.ParserCombinators.ReadP.optional (char '\n')
skipSpaces
let parsedText = fst $ leftmostLongestParse parseLine text
return (Header (length headers) parsedText)
empty
space
parsedText <- manyTill parseLineToken (void (char '\n') <|> eof)
greedyParse (char '\n')
return (Header (length headers) (Line parsedText))
asteriskBold = T.pack "**"
underscoreBold = T.pack "__"
-- Parse bold text
parseBold :: ReadP MdToken
parseBold = parseBoldWith "**" <|> parseBoldWith "__"
parseBold :: Parser MdToken
parseBold = parseBoldWith asteriskBold <|> parseBoldWith underscoreBold
where
parseBoldWith delim = do
string delim
inside <- greedyParse1 parseLineToken
string delim
inside <- someTill parseLineToken $ string delim
return (Bold (Line inside))
-- Parse italic text
parseItalic :: ReadP MdToken
parseItalic = parseItalicWith "*" <|> parseItalicWith "_"
parseItalic :: Parser MdToken
parseItalic = parseItalicWith '*' <|> parseItalicWith '_'
where
parseItalicWith delim = do
string delim
inside <- greedyParse1 parseLineToken
string delim
char delim
inside <- someTill parseLineToken (char delim)
return (Italic (Line inside))
-- Parse strikethrough text
parseStrikethrough :: ReadP MdToken
parseStrikethrough :: Parser MdToken
parseStrikethrough = do
string "~~"
inside <- many1 parseLineToken
string "~~"
string (T.pack "~~")
inside <- someTill parseLineToken $ string (T.pack "~~")
return (Strikethrough (Line inside))
-- Parse code
parseCode :: ReadP MdToken
parseCode :: Parser MdToken
parseCode = do
string "`"
inside <- many1 get
string "`"
return (Code (Unit inside))
opening <- some $ char '`'
inside <- someTill (satisfy (/= '\n')) (char '`')
closing <- count (length opening - 1) (char '`')
return (Code (Unit (concatMap escapeChar inside)))
-- Parse a link
parseLink :: ReadP MdToken
parseLink :: Parser MdToken
parseLink = do
linkText <- between (string "[") (string "]") (many1 get)
linkURL <- between (string "(") (string ")") (many1 get)
let parsedLinkText = fst $ leftmostLongestParse parseLine linkText
return $ Link parsedLinkText (URL linkURL)
char '['
linkText <- manyTill parseLineToken (char ']')
char '('
linkURL <- manyTill anySingle (char ')')
return $ Link (Line linkText) (URL linkURL)
-- Parse a linebreak character
parseLinebreak :: ReadP MdToken
parseLinebreak = do
char ' '
many1 (char ' ')
parseLinebreak :: Parser MdToken
parseLinebreak = parseLinebreakSpace <|> parseLinebreakBackslash
where
parseLinebreakSpace = do
char ' '
some (char ' ')
char '\n'
return Linebreak
parseLinebreakBackslash = try $ do
char '\\'
char '\n'
return Linebreak
parseTableRow :: Parser [MdToken]
parseTableRow = do
char '|'
row <- some (many (satisfy (\x -> x == ' ' || x == '\t')) *> someTill parseListLineToken (char '|'))
return (map Line row)
parseTable :: Parser MdToken
parseTable = do
tableHead <- parseTableRow
char '\n'
return Linebreak
char '|'
sepEndBy1 (some (char '-')) (char '|') *> char '\n'
tableBody <- sepEndBy parseTableRow (char '\n')
many (char '\n') -- Parse trailing newlines, if any
return $ Table (tableHead : tableBody)
parseSingleNewline :: ReadP MdToken
parseSingleNewline :: Parser MdToken
parseSingleNewline = do
char '\n'
remaining <- look
case remaining of
remaining <- getInput
case T.unpack remaining of
[] -> return $ Unit ""
_ -> return SingleNewline
parseImage :: ReadP MdToken
parseCssClasses :: Parser [CssClass]
parseCssClasses = do
char '{'
classes <- some parseCssClass
char '}'
return classes
where
parseCssClass :: Parser CssClass
parseCssClass = do
char '.'
let firstLetterParser = char '_' <|> char '-' <|> label "letter" (satisfy isAlpha)
cssClassFirstLetter <- firstLetterParser
cssClass <- many (firstLetterParser <|> label "digit" (satisfy isDigit))
space
return (cssClassFirstLetter : cssClass)
parseImage :: Parser MdToken
parseImage = do
char '!'
char '['
altText <- many1 (parseEscapedChar <++ parseUnit)
char ']'
char '('
path <- many1 get
char ')'
return $ Image (Line altText) (ImgPath path)
link <- parseLink
cssClasses <- optional $ try parseCssClasses
case link of
Link text path -> return $ Image text path cssClasses
_ -> empty -- This should never be reached
parseFigure = do
img <- parseImage
void (string "\n\n") <++ eof
void (string doubleNewlineText) <|> eof
case img of
Image text path -> return $ Figure text path
Image text path cssClasses -> return $ Figure text path cssClasses
_ -> return img
-- Parse an escaped character
parseEscapedChar :: ReadP MdToken
parseEscapedChar :: Parser MdToken
parseEscapedChar = do
char '\\'
escapedChar <- choice (map char escapableChars) -- Parse any of the special chars.
return (Unit [escapedChar])
-- Parse a character as a Unit.
parseUnit :: ReadP MdToken
parseUnit :: Parser MdToken
parseUnit = do
text <- satisfy (`notElem` specialChars)
-- text <- satisfy (`notElem` specialChars)
text <- anySingle
return (Unit [text])
-- Parse any character except a newline
parseUnitExceptNewline :: Parser MdToken
parseUnitExceptNewline = do
-- text <- satisfy (`notElem` specialChars)
text <- satisfy (/= '\n')
return (Unit [text])
lineParsers :: [ReadP MdToken]
lineParsers :: [Parser MdToken]
lineParsers =
[ parseLinebreak,
parseSingleNewline,
@ -244,98 +329,98 @@ lineParsers =
parseUnit
] -- A 'line' doesn't include a 'header'
listLineParsers :: [ReadP MdToken]
listLineParsers =
[ parseLinebreak,
parseEscapedChar,
lineParsersWithoutNewline :: [Parser MdToken]
lineParsersWithoutNewline =
[ parseEscapedChar,
parseCode,
parseImage,
parseBold,
parseItalic,
parseStrikethrough,
parseLink,
parseUnit
parseUnitExceptNewline
] -- A list line cannot contain newlines.
-- List of all parsers
allParsers :: [ReadP MdToken]
allParsers :: [Parser MdToken]
allParsers = parseHeader : lineParsers
-- Parse any of the line tokens.
parseLineToken :: ReadP MdToken
parseLineToken :: Parser MdToken
parseLineToken = fallthroughParser lineParsers
-- Parse any of the list line tokens.
parseListLineToken :: ReadP MdToken
parseListLineToken = fallthroughParser listLineParsers
parseListLineToken :: Parser MdToken
parseListLineToken = fallthroughParser lineParsersWithoutNewline
-- Parse a line, consisting of one or more tokens.
parseLine :: ReadP MdToken
parseLine :: Parser MdToken
parseLine = do
skipSpaces
space
-- Fail if we have reached the end of the document.
parsed <- manyTill parseLineToken eof
return (Line parsed)
-- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
parsePara :: ReadP MdToken
parsePara :: Parser MdToken
parsePara = do
parseMany (char '\n')
space
-- text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline.
-- string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF.
text <- manyTill get (string "\n\n" <|> (eof >> return ""))
when (null text) pfail
let parsedText = fst $ leftmostLongestParse parseLine text -- Parse a line
parseMany (char '\n')
return (Para parsedText)
parsedText <- someTill parseLineToken (try paraEnding)
many (char '\n')
return (Para (Line parsedText))
where
paraEnding = void (char '\n' *> (char '\n' <|> lookAhead (char '>'))) <|> eof
-- Parse a line starting with '>', return the line except for the '>'.
parseQuotedLine :: ReadP String
parseQuotedLine :: Parser String
parseQuotedLine = do
char '>'
greedyParse (char ' ' +++ char '\t')
restOfLine <- munch (/= '\n')
Text.ParserCombinators.ReadP.optional (char '\n') >> return ""
many (char ' ' <|> char '\t')
restOfLine <- many (satisfy (/= '\n'))
void (char '\n') <|> eof
return restOfLine
-- Parse many 'quoted lines' until I see a non-quoted line.
parseQuotedLines :: ReadP [String]
parseQuotedLines =
greedyParse1 $ do
look >>= \line ->
case line of
('>' : _) -> parseQuotedLine
_ -> pfail
parseQuotedLines :: Parser [String]
parseQuotedLines = some parseQuotedLine
-- some $ do
-- getInput >>= \line ->
-- case T.unpack line of
-- ('>' : _) -> parseQuotedLine
-- _ -> empty
-- Parse a blockquote, which is a greater-than sign followed by a paragraph.
parseBlockquote :: ReadP MdToken
parseBlockquote :: Parser MdToken
parseBlockquote = do
quotedLines <- parseQuotedLines
-- remaining <- look
-- let quotedLines = fst $ leftmostLongestParse parseQuotedLines remaining
-- string (init $ unlines quotedLines)
let parsedQuotedLines = fst $ leftmostLongestParse (many1 (parseBlockquote <++ parsePara)) (init $ unlines quotedLines) -- unlines joins the lines together with a newline, and adds a trailing newline. init removes the trailing newline.
let parsedQuotedLines = leftmostLongestParse (some (parseBlockquote <|> parsePara)) (init $ unlines quotedLines) -- unlines joins the lines together with a newline, and adds a trailing newline. init removes the trailing newline.
return (Blockquote parsedQuotedLines)
-- Parse a nested list item.
parseListNested :: ReadP MdToken
parseListNested :: Parser MdToken
parseListNested = do
let firstCharParser = string " " <++ string "\t"
let restOfLineParser = manyTill get (void (char '\n') <++ eof)
let firstCharParser = string (T.pack " ") <|> string (T.pack "\t")
let restOfLineParser = manyTill anySingle (void (char '\n') <|> eof)
lines <- greedyParse1 (firstCharParser *> restOfLineParser)
let linesParsed = fst $ leftmostLongestParse (parseUnorderedList <++ parseOrderedList) (init $ unlines lines)
when (null (show linesParsed)) pfail
let linesParsed = leftmostLongestParse (parseUnorderedList <|> parseOrderedList) (init $ unlines lines)
when (null (show linesParsed)) empty
return linesParsed
-- Parse an unordered list line item.
parseUListLineItem :: ReadP MdToken
parseUListLineItem :: Parser MdToken
parseUListLineItem = do
firstChar <- choice (map char ['*', '+', '-'])
char ' ' -- At least one space between list indicator and list text.
parseListLineItemCommon
-- Parse an ordered list line item.
parseOListLineItem :: ReadP MdToken
parseOListLineItem :: Parser MdToken
parseOListLineItem = do
num <- greedyParse1 (satisfy isDigit)
char '.'
@ -343,23 +428,22 @@ parseOListLineItem = do
parseListLineItemCommon
-- Common code for parsing list line items
parseListLineItemCommon :: ReadP MdToken
parseListLineItemCommon :: Parser MdToken
parseListLineItemCommon = do
skipSpaces
restOfLine <- many1 parseListLineToken
void (char '\n') <++ eof
nestedList <- parseListNested <++ return (Unit "")
space
restOfLine <- manyTill parseListLineToken (void (char '\n') <|> eof)
nestedList <- try parseListNested <|> return (Unit "")
return $ Line [Line restOfLine, nestedList]
-- Parse an unordered list paragraph item.
parseUListParaItem :: ReadP MdToken
parseUListParaItem :: Parser MdToken
parseUListParaItem = do
firstLine <- parseUListLineItem
res <- parseListParaItemCommon
return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
-- Parse an unordered list paragraph item.
parseOListParaItem :: ReadP MdToken
parseOListParaItem :: Parser MdToken
parseOListParaItem = do
firstLine <- parseOListLineItem
res <- parseListParaItemCommon
@ -369,50 +453,65 @@ parseOListParaItem = do
-- A list paragraph item is defined as a line item, followed by an empty line, followed by one or more
-- lines indented by a space or tab.
-- A list paragraph item can also be a blockquote.
parseListParaItemCommon :: ReadP [MdToken]
parseListParaItemCommon :: Parser [MdToken]
parseListParaItemCommon = do
char '\n'
lines <- greedyParse1 ((string " " <|> string "\t") *> parseTillEol)
let res = fst $ leftmostLongestParse (greedyParse1 parseBlockquote <++ greedyParse1 parsePara) (init $ unlines lines)
lines <- greedyParse1 ((string (T.pack " ") <|> string (T.pack "\t")) *> parseTillEol)
let res = leftmostLongestParse (greedyParse1 parseBlockquote <|> greedyParse1 parsePara) (init $ unlines lines)
char '\n'
return res -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
-- Parse an unordered list item, which can be a line item or another list.
parseUListItem :: ReadP MdToken
parseUListItem = parseUListParaItem <++ parseUListLineItem
parseUListItem :: Parser MdToken
parseUListItem = space *> (try parseUListParaItem <|> parseUListLineItem)
-- Parse an unordered list.
parseUnorderedList :: ReadP MdToken
parseUnorderedList :: Parser MdToken
parseUnorderedList = do
lineItems <- greedyParse1 parseUListItem
void (char '\n') <++ eof -- A list must end in an extra newline or eof
lineItems <- some $ try parseUListItem
void (char '\n') <|> eof -- A list must end in an extra newline or eof
return $ UnordList lineItems
-- --------
parseOListItem :: ReadP MdToken
parseOListItem = parseOListParaItem <++ parseOListLineItem
parseOListItem :: Parser MdToken
parseOListItem = space *> (try parseOListParaItem <|> parseOListLineItem)
-- Parses the first element of an ordered list, which must start with '1.'
parseFirstOListItem :: ReadP MdToken
parseFirstOListItem :: Parser MdToken
parseFirstOListItem = do
remaining <- look
when (take 2 remaining /= "1.") pfail
space
remaining <- getInput
when (take 2 (T.unpack remaining) /= "1.") empty
parseOListLineItem
parseOrderedList :: ReadP MdToken
parseOrderedList :: Parser MdToken
parseOrderedList = do
firstLine <- parseFirstOListItem
lineItems <- greedyParse1 parseOListItem
void (char '\n') <++ eof
firstLine <- try parseFirstOListItem
lineItems <- many $ try parseOListItem
void (char '\n') <|> eof
return $ OrdList (firstLine : lineItems)
parseHorizontalRule :: ReadP MdToken
parseHorizontalRule = string "---" *> (void (string "\n\n") <++ eof) *> return HorizontalRule
horizontalRuleText :: T.Text
horizontalRuleText = T.pack "---"
doubleNewlineText :: T.Text
doubleNewlineText = T.pack "\n\n"
parseHorizontalRule :: Parser MdToken
parseHorizontalRule = string horizontalRuleText *> (void (string doubleNewlineText) <|> eof) *> return HorizontalRule
parseCodeblock :: Parser MdToken
parseCodeblock = do
string (T.pack "```\n")
inside <- someTill anySingle (string (T.pack "\n```"))
return $ Codeblock (Unit (concatMap escapeChar inside))
documentParsers :: [ReadP MdToken]
documentParsers :: [Parser MdToken]
documentParsers =
[ parseHorizontalRule,
parseCodeblock,
parseTable,
parseHeader,
parseBlockquote,
parseUnorderedList,
@ -422,7 +521,7 @@ documentParsers =
]
-- Parse a document, which is multiple paragraphs.
parseDocument :: ReadP MdToken
parseDocument :: Parser MdToken
parseDocument = do
res <- manyTill (fallthroughParser documentParsers) eof
return (Document res)

@ -7,7 +7,7 @@ check_equal :: String -> String -> String -> Test
check_equal desc expected actual = TestCase (assertEqual desc expected actual)
convert :: String -> String
convert md = show . fst $ leftmostLongestParse parseDocument md
convert md = show $ leftmostLongestParse parseDocument md
headerTests =
TestList
@ -24,8 +24,8 @@ boldTests =
[ check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day"),
check_equal "Should convert nested bold and italic" "<p><b>Bold then <i>Italic</i></b></p>" (convert "**Bold then *Italic***"),
check_equal "Should convert nested bold and italic" "<p><i>Italic then <b>Bold</b></i></p>" (convert "*Italic then **Bold***")
check_equal "Should convert nested bold and italic" "<p><b>Bold then <i>Italic</i></b></p>" (convert "**Bold then _Italic_**"),
check_equal "Should convert nested bold and italic" "<p><i>Italic then <b>Bold</b></i></p>" (convert "*Italic then __Bold__*")
]
strikethroughTests =
@ -90,19 +90,25 @@ orderedListTests =
check_equal "Paragraph before list" "<h3>This is a list</h3><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "### This is a list\n\n1. Item 1\n200. Item 2"),
check_equal "Nested list then back" "<ol><li>Item 1</li><li>Item 2<ol><li>Item 3</li><li>Item 4</li></ol></li><li>Item 5</li></ol>" (convert "1. Item 1\n2. Item 2\n 1. Item 3\n 3. Item 4\n5. Item 5"),
check_equal "Blockquote in list" "<ol><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n > Quote\n\n3. Item 3"),
check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3")
check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3"),
check_equal "List with just 1 item" "<ol><li>Item 1</li></ol>" (convert "1. Item 1")
]
htmlTests =
TestList
[check_equal "Convert HTML element" "<p><center>a</center></p>" (convert "<center>a</center>")]
codeTests =
TestList
[ check_equal "Code by itself" "<p><code>Hello world!</code></p>" (convert "`Hello world!`"),
check_equal "Code in a paragraph" "<p>The following <code>text</code> is code</p>" (convert "The following `text` is code"),
check_equal "Code across paragraphs (shouldn't work" "<p></p><p></p>" (convert "`Incomplete\n\nCode`") -- At the moment, this is just treated as a syntax error, so nothing is rendered.
check_equal "Code across paragraphs (shouldn't work)" "<p>`Incomplete</p><p>Code`</p>" (convert "`Incomplete\n\nCode`") -- At the moment, this is just treated as a syntax error, so nothing is rendered.
]
imageTests =
TestList
[ check_equal "Image with text" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\" /></p>" (convert "This is an image ![Image 1](img.png)")
[ check_equal "Image with text" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\"/></p>" (convert "This is an image ![Image 1](img.png)"),
check_equal "Image with classes" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\" class=\"new-img\"/></p>" (convert "This is an image ![Image 1](img.png){.new-img}")
]
figureTests =
@ -114,6 +120,17 @@ horizontalRuleTests =
TestList
[check_equal "Horizontal Rule" "<p>a</p><hr><p>b</p>" (convert "a\n\n---\n\nb")]
tableTests =
TestList
[ check_equal
"Basic table"
"<table>\
\<thead><tr><th>Col 1</th><th>Col 2</th><th>Col 3</th></tr></thead>\
\<tbody><tr><td>Data 1</td><td>Data 2</td><td>Data 3</td></tr>\
\<tr><td>More Data 1</td><td>More Data 2</td><td>More Data 3</td></tr></tbody></table>"
(convert "| Col 1 | Col 2 | Col 3 |\n|---|---|---|\n| Data 1 | Data 2 | Data 3 |\n| More Data 1 | More Data 2 | More Data 3 |")
]
integrationTests =
TestList
[ check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n ## __Second__ _Heading_"),
@ -121,7 +138,7 @@ integrationTests =
check_equal "Integration 3" "<h1>Hello</h1><p>World</p>" (convert "# Hello\nWorld"),
check_equal "Integration 4" "<p>a b</p>" (convert "a\nb"),
check_equal "Integration 5" "<h1>Hello</h1>" (convert "# Hello\n"),
check_equal "Integration 6" "<p>First line<br>Second line</p>" (convert "First line \nSecond line"),
check_equal "Integration 6" "<p>First line<br />Second line</p>" (convert "First line \nSecond line"),
check_equal
"Integration 7"
"<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2>Second \
@ -134,7 +151,7 @@ integrationTests =
"# Sample Markdown\n\nThis is some basic, sample markdown.\n\n## Second \
\Heading\n\n- Unordered lists, and:\n 1. One\n 2. Two\n 3. Three\n\
\- More\n\n> Blockquote\n\nAnd **bold**, *italics*, and even *italics and \
\later **bold***. Even ~~strikethrough~~. [A link](https://markdowntohtml.com) to somewhere."
\later __bold__*. Even ~~strikethrough~~. [A link](https://markdowntohtml.com) to somewhere."
)
]
@ -149,9 +166,11 @@ tests =
unorderedListTests,
orderedListTests,
imageTests,
htmlTests,
figureTests,
codeTests,
horizontalRuleTests,
tableTests,
integrationTests
]

Loading…
Cancel
Save