34 Commits

Author SHA1 Message Date
e5795e0d75 Added more tests 2025-07-30 15:34:43 -04:00
eae897a2d6 Improve document parser to parse extra newlines 2025-07-30 15:32:35 -04:00
8152b89a23 Fix bug in fallthrough parser 2025-07-30 15:29:25 -04:00
9d3d656065 Improved parser for horizontal rule and codeblock 2025-07-30 15:29:03 -04:00
d4a550f6a7 Added token and parser for list checkbox 2025-07-30 15:28:35 -04:00
2b21aeae89 Added implementation and tests for subscript and superscript; fixed nested list parsing 2025-07-22 09:56:41 -04:00
ca328a464a Add prettyPrint definition for horizontal rule 2025-07-15 10:51:10 -04:00
7d45b1123f Renamed md-to-html-runner to mdtoh 2025-06-18 15:21:54 -04:00
9627abcd12 Updated test 2025-06-10 14:14:56 -04:00
82277e9ea8 Only add newlines for linebreak when pretty printing 2025-06-10 14:14:26 -04:00
d074b0131c Parse linebreaks as a backslash before a newline 2025-06-10 14:10:45 -04:00
57cb3e68fa Import module for word wrapping; add package to cabal file 2025-06-10 14:02:20 -04:00
4e9f84c2bb Add function to pretty print; commented out my word wrap and use a
built-in one instead
2025-06-10 14:01:52 -04:00
e025614324 Print extra newline if output text doesn't include a newline 2025-06-10 09:13:40 -04:00
e711444066 Add more packages to cabal file 2025-06-10 09:13:15 -04:00
6b99a1835d Created a separate parser list for all parsers (except the unit parser
is replaced with the non-newline unit parser); use that parser when
parsing list lines
2025-06-10 09:12:42 -04:00
04167e0f96 Parse CSS classes in image and figure 2025-06-10 09:11:56 -04:00
0528e813c5 Parser for CSS classes 2025-06-10 09:11:34 -04:00
b1b99189c9 Link tect can be empty; inline code cannot be empty and can have nested
backticks; created a unit parser for all characters except newline
2025-06-10 09:11:19 -04:00
ade3768e29 Try and backtrack 2025-06-10 09:10:10 -04:00
fd6d39ecd6 Parse space at beginning of list 2025-06-10 09:09:53 -04:00
0f04342867 More trying and backtracking; parse and discard extraneous spaces at
beginning of list
2025-06-10 09:09:31 -04:00
80ef93bbc9 Try parsing an ordered list item, backtrack if not possible 2025-06-10 09:08:52 -04:00
b73d4131b6 Added support for tables and codeblocks
Defined the types, defined 'show', created the parsers, added them to
parser list
2025-06-10 09:08:30 -04:00
c48b8c5ae8 Images and figures now support CSS classes 2025-06-10 09:05:31 -04:00
cf4282b26e More imports 2025-06-10 09:04:28 -04:00
7b40d6fe7c Imports 2025-06-10 09:04:18 -04:00
c4255d4578 Added a test for a list with just one item 2025-06-10 09:04:02 -04:00
dcbbff13cb Spacing change 2025-06-10 09:03:48 -04:00
592fad2b46 Added tests for tables 2025-06-10 09:03:32 -04:00
b8ba27f240 Strip newlines when comparing in test 2025-06-03 15:37:27 -04:00
bb08b40512 Replaced nested bold with asterisks, with asterisks and underscores 2025-06-03 15:37:12 -04:00
93548a4533 Never mind, doesn't seem to work well 2025-06-03 15:31:13 -04:00
160cb0edeb Trying to get nested bold and italic to work 2025-06-03 15:30:35 -04:00
4 changed files with 229 additions and 47 deletions

View File

@@ -24,4 +24,7 @@ main = do
[] -> getContents [] -> getContents
x : _ -> readFile x x : _ -> readFile x
let res = leftmostLongestParse parseDocument fileContents let res = leftmostLongestParse parseDocument fileContents
print res let toPrint = prettyPrint res
case reverse toPrint of
'\n' : _ -> putStr toPrint
_ -> putStrLn toPrint

View File

@@ -60,9 +60,12 @@ library
build-depends: base ^>=4.19.1.0, build-depends: base ^>=4.19.1.0,
HUnit, HUnit,
megaparsec, megaparsec,
text parser-combinators,
text,
MissingH,
word-wrap
executable md-to-html-runner executable mdtoh
-- Import common warning flags. -- Import common warning flags.
import: warnings import: warnings

View File

@@ -6,20 +6,25 @@ module MdToHTML where
import Control.Applicative hiding (many, some) import Control.Applicative hiding (many, some)
import Control.Monad import Control.Monad
import Control.Monad.Combinators (count)
import Data.Char import Data.Char
import Data.List import Data.List
import Data.Ord (comparing) import Data.Ord (comparing)
import Data.String.Utils
import qualified Data.Text as T import qualified Data.Text as T
import Data.Void import Data.Void
import Debug.Trace import Debug.Trace
import Text.Megaparsec import Text.Megaparsec
import Text.Megaparsec.Char import Text.Megaparsec.Char
import Text.Printf import Text.Printf
import Text.Wrap
type Parser = Parsec Void T.Text type Parser = Parsec Void T.Text
type HeaderLevel = Int type HeaderLevel = Int
type CssClass = String
newtype URL = URL {getUrl :: String} deriving (Eq) newtype URL = URL {getUrl :: String} deriving (Eq)
newtype ImgPath = ImgPath {getPath :: String} deriving (Eq) newtype ImgPath = ImgPath {getPath :: String} deriving (Eq)
@@ -35,13 +40,17 @@ data MdToken
| Blockquote [MdToken] | Blockquote [MdToken]
| UnordList [MdToken] | UnordList [MdToken]
| OrdList [MdToken] | OrdList [MdToken]
| Checkbox Bool
| Code MdToken | Code MdToken
| Codeblock String | Table [[MdToken]]
| Codeblock MdToken
| Link MdToken URL | Link MdToken URL
| Image MdToken URL | Image MdToken URL (Maybe [CssClass])
| Figure MdToken URL | Figure MdToken URL (Maybe [CssClass])
| Bold MdToken | Bold MdToken
| Italic MdToken | Italic MdToken
| Superscript MdToken
| Subscript MdToken
| Strikethrough MdToken | Strikethrough MdToken
| Unit String | Unit String
deriving (Eq) deriving (Eq)
@@ -50,24 +59,40 @@ data MdToken
instance Show MdToken where instance Show MdToken where
show (Document tokens) = concatMap show tokens show (Document tokens) = concatMap show tokens
show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">" show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">"
show (Para token) = "<p>" ++ show token ++ "</p>\n" show (Para token) = "<p>" ++ show token ++ "</p>"
show (Line tokens) = concatMap show tokens show (Line tokens) = concatMap show tokens
show Linebreak = "<br>" show Linebreak = "<br />"
show SingleNewline = " " show SingleNewline = " "
show HorizontalRule = "<hr>" show HorizontalRule = "<hr>"
show (Blockquote tokens) = "<blockquote>" ++ concatMap show tokens ++ "</blockquote>" show (Blockquote tokens) = "<blockquote>" ++ concatMap show tokens ++ "</blockquote>"
show (UnordList tokens) = "<ul>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ul>" show (UnordList tokens) = "<ul>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ul>"
show (OrdList tokens) = "<ol>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ol>" show (OrdList tokens) = "<ol>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ol>"
show (Code code) = "<code>" ++ show code ++ "</code>" show (Checkbox isChecked) = "<input type=\"checkbox\"" ++ (if isChecked then " checked=\"\"" else "") ++ " />"
show (Codeblock code) = show code show (Code code) = "<code>" ++ strip (show code) ++ "</code>"
show (Table (thead : tokenGrid)) = "<table><thead><tr>" ++ concatMap (\x -> "<th>" ++ rstrip (show x) ++ "</th>") thead ++ "</tr></thead>" ++ "<tbody>" ++ concatMap (\x -> "<tr>" ++ concatMap (\y -> "<td>" ++ rstrip (show y) ++ "</td>") x ++ "</tr>") tokenGrid ++ "</tbody></table>"
show (Codeblock code) = "<pre><code>" ++ show code ++ "</code></pre>"
show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>" show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
show (Image txt url) = "<img src=\"" ++ getUrl url ++ "\"" ++ " alt=\"" ++ show txt ++ "\" />" show (Image txt url cssClasses) = "<img src=\"" ++ getUrl url ++ "\"" ++ " alt=\"" ++ show txt ++ "\"" ++ maybe "" (\classes -> " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/>"
show (Figure txt url) = "<figure><img src=\"" ++ getUrl url ++ "\" alt=\"" ++ show txt ++ "\"/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>" show (Figure txt url cssClasses) = "<figure><img src=\"" ++ getUrl url ++ "\" alt=\"" ++ show txt ++ "\"" ++ maybe "" (\classes -> " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
show (Bold token) = "<b>" ++ show token ++ "</b>" show (Bold token) = "<b>" ++ show token ++ "</b>"
show (Italic token) = "<i>" ++ show token ++ "</i>" show (Italic token) = "<i>" ++ show token ++ "</i>"
show (Superscript token) = "<sup>" ++ show token ++ "</sup>"
show (Subscript token) = "<sub>" ++ show token ++ "</sub>"
show (Strikethrough token) = "<s>" ++ show token ++ "</s>" show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
show (Unit unit) = printf "%s" unit show (Unit unit) = printf "%s" unit
-- Pretty print the given token into a string.
-- This is the same as calling 'show' for most tokens, but is different for paragraphs and tables,
-- which have newlines inserted into them.
prettyPrint :: MdToken -> String
prettyPrint (Para token) = "<p>" ++ T.unpack (wrapText defaultWrapSettings 70 (T.pack $ prettyPrint token)) ++ "</p>\n"
prettyPrint (Table (thead : tokenGrid)) = "<table>\n<thead>\n<tr>\n" ++ concatMap (\x -> "<th>" ++ rstrip (prettyPrint x) ++ "</th>\n") thead ++ "</tr>\n</thead>\n" ++ "<tbody>\n" ++ concatMap (\x -> "<tr>\n" ++ concatMap (\y -> "<td>" ++ rstrip (prettyPrint y) ++ "</td>\n") x ++ "</tr>\n") tokenGrid ++ "</tbody>\n</table>\n"
prettyPrint Linebreak = "<br />\n"
prettyPrint HorizontalRule = "<hr>\n"
prettyPrint (Line tokens) = concatMap prettyPrint tokens
prettyPrint (Document tokens) = concatMap prettyPrint tokens
prettyPrint token = show token
instance Semigroup MdToken where instance Semigroup MdToken where
a <> b = Document [a, b] a <> b = Document [a, b]
@@ -120,7 +145,7 @@ parseTillEol = manyTill anySingle (void (char '\n') <|> eof)
-- Takes a list of parsers. Returns a parser that will try them in -- Takes a list of parsers. Returns a parser that will try them in
-- order, moving to the next one only if the current one fails. -- order, moving to the next one only if the current one fails.
fallthroughParser :: [Parser a] -> Parser a fallthroughParser :: [Parser a] -> Parser a
fallthroughParser [x] = x fallthroughParser [x] = try x
fallthroughParser (x : xs) = try x <|> fallthroughParser xs fallthroughParser (x : xs) = try x <|> fallthroughParser xs
escapeChar :: Char -> String escapeChar :: Char -> String
@@ -132,6 +157,19 @@ escapeChar x = [x]
htmlEscapeChars :: T.Text -> T.Text htmlEscapeChars :: T.Text -> T.Text
htmlEscapeChars = T.concatMap (T.pack . escapeChar) htmlEscapeChars = T.concatMap (T.pack . escapeChar)
-- -- Wraps a list of words after (at most) the given number of characters, trying to prevent word-breaks
-- wordwrap :: Int -> String -> String
-- wordwrap wraplength str = if (length str) < wraplength
-- then str
-- else
-- let spaceIndex = lastgtSpaceIndex 0 (takeRev (length str) - wraplength str)
--
-- where
-- takeRev n = (reverse . take n . reverse)
-- lastSpaceIndex counter str = case str of
-- [] -> counter
-- x:xs -> if (isSpace x) counter else lastSpaceIndex counter+1 xs
-- --------------- -- ---------------
-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL. -- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
@@ -169,6 +207,20 @@ parseItalic = parseItalicWith '*' <|> parseItalicWith '_'
inside <- someTill parseLineToken (char delim) inside <- someTill parseLineToken (char delim)
return (Italic (Line inside)) return (Italic (Line inside))
-- Parse subscript
parseSubscript :: Parser MdToken
parseSubscript = do
char '~'
inside <- someTill parseLineToken (char '~')
return (Subscript (Line inside))
-- Parse superscript
parseSuperscript :: Parser MdToken
parseSuperscript = do
char '^'
inside <- someTill parseLineToken (char '^')
return (Superscript (Line inside))
-- Parse strikethrough text -- Parse strikethrough text
parseStrikethrough :: Parser MdToken parseStrikethrough :: Parser MdToken
parseStrikethrough = do parseStrikethrough = do
@@ -179,26 +231,49 @@ parseStrikethrough = do
-- Parse code -- Parse code
parseCode :: Parser MdToken parseCode :: Parser MdToken
parseCode = do parseCode = do
char '`' opening <- some $ char '`'
inside <- manyTill (satisfy (/= '\n')) (char '`') inside <- someTill (satisfy (/= '\n')) (char '`')
closing <- count (length opening - 1) (char '`')
return (Code (Unit (concatMap escapeChar inside))) return (Code (Unit (concatMap escapeChar inside)))
-- Parse a link -- Parse a link
parseLink :: Parser MdToken parseLink :: Parser MdToken
parseLink = do parseLink = do
char '[' char '['
linkText <- someTill parseLineToken (char ']') linkText <- manyTill parseLineToken (char ']')
char '(' char '('
linkURL <- manyTill anySingle (char ')') linkURL <- manyTill anySingle (char ')')
return $ Link (Line linkText) (URL linkURL) return $ Link (Line linkText) (URL linkURL)
-- Parse a linebreak character -- Parse a linebreak character
parseLinebreak :: Parser MdToken parseLinebreak :: Parser MdToken
parseLinebreak = do parseLinebreak = parseLinebreakSpace <|> parseLinebreakBackslash
char ' ' where
some (char ' ') parseLinebreakSpace = do
char ' '
some (char ' ')
char '\n'
return Linebreak
parseLinebreakBackslash = try $ do
char '\\'
char '\n'
return Linebreak
parseTableRow :: Parser [MdToken]
parseTableRow = do
char '|'
row <- some (many (satisfy (\x -> x == ' ' || x == '\t')) *> someTill parseListLineToken (char '|'))
return (map Line row)
parseTable :: Parser MdToken
parseTable = do
tableHead <- parseTableRow
char '\n' char '\n'
return Linebreak char '|'
sepEndBy1 (some (char '-')) (char '|') *> char '\n'
tableBody <- sepEndBy parseTableRow (char '\n')
many (char '\n') -- Parse trailing newlines, if any
return $ Table (tableHead : tableBody)
parseSingleNewline :: Parser MdToken parseSingleNewline :: Parser MdToken
parseSingleNewline = do parseSingleNewline = do
@@ -208,19 +283,36 @@ parseSingleNewline = do
[] -> return $ Unit "" [] -> return $ Unit ""
_ -> return SingleNewline _ -> return SingleNewline
parseCssClasses :: Parser [CssClass]
parseCssClasses = do
char '{'
classes <- some parseCssClass
char '}'
return classes
where
parseCssClass :: Parser CssClass
parseCssClass = do
char '.'
let firstLetterParser = char '_' <|> char '-' <|> label "letter" (satisfy isAlpha)
cssClassFirstLetter <- firstLetterParser
cssClass <- many (firstLetterParser <|> label "digit" (satisfy isDigit))
space
return (cssClassFirstLetter : cssClass)
parseImage :: Parser MdToken parseImage :: Parser MdToken
parseImage = do parseImage = do
char '!' char '!'
link <- parseLink link <- parseLink
cssClasses <- optional $ try parseCssClasses
case link of case link of
Link text path -> return $ Image text path Link text path -> return $ Image text path cssClasses
_ -> empty -- This should never be reached _ -> empty -- This should never be reached
parseFigure = do parseFigure = do
img <- parseImage img <- parseImage
void (string doubleNewlineText) <|> eof void (string doubleNewlineText) <|> eof
case img of case img of
Image text path -> return $ Figure text path Image text path cssClasses -> return $ Figure text path cssClasses
_ -> return img _ -> return img
-- Parse an escaped character -- Parse an escaped character
@@ -237,6 +329,13 @@ parseUnit = do
text <- anySingle text <- anySingle
return (Unit [text]) return (Unit [text])
-- Parse any character except a newline
parseUnitExceptNewline :: Parser MdToken
parseUnitExceptNewline = do
-- text <- satisfy (`notElem` specialChars)
text <- satisfy (/= '\n')
return (Unit [text])
lineParsers :: [Parser MdToken] lineParsers :: [Parser MdToken]
lineParsers = lineParsers =
[ parseLinebreak, [ parseLinebreak,
@@ -247,21 +346,24 @@ lineParsers =
parseBold, parseBold,
parseItalic, parseItalic,
parseStrikethrough, parseStrikethrough,
parseSubscript,
parseSuperscript,
parseLink, parseLink,
parseUnit parseUnit
] -- A 'line' doesn't include a 'header' ] -- A 'line' doesn't include a 'header'
listLineParsers :: [Parser MdToken] lineParsersWithoutNewline :: [Parser MdToken]
listLineParsers = lineParsersWithoutNewline =
[ parseLinebreak, [ parseEscapedChar,
parseEscapedChar,
parseCode, parseCode,
parseImage, parseImage,
parseBold, parseBold,
parseItalic, parseItalic,
parseStrikethrough, parseStrikethrough,
parseSubscript,
parseSuperscript,
parseLink, parseLink,
parseUnit parseUnitExceptNewline
] -- A list line cannot contain newlines. ] -- A list line cannot contain newlines.
-- List of all parsers -- List of all parsers
@@ -274,7 +376,7 @@ parseLineToken = fallthroughParser lineParsers
-- Parse any of the list line tokens. -- Parse any of the list line tokens.
parseListLineToken :: Parser MdToken parseListLineToken :: Parser MdToken
parseListLineToken = fallthroughParser listLineParsers parseListLineToken = fallthroughParser lineParsersWithoutNewline
-- Parse a line, consisting of one or more tokens. -- Parse a line, consisting of one or more tokens.
parseLine :: Parser MdToken parseLine :: Parser MdToken
@@ -325,13 +427,28 @@ parseBlockquote = do
let parsedQuotedLines = leftmostLongestParse (some (parseBlockquote <|> parsePara)) (init $ unlines quotedLines) -- unlines joins the lines together with a newline, and adds a trailing newline. init removes the trailing newline. let parsedQuotedLines = leftmostLongestParse (some (parseBlockquote <|> parsePara)) (init $ unlines quotedLines) -- unlines joins the lines together with a newline, and adds a trailing newline. init removes the trailing newline.
return (Blockquote parsedQuotedLines) return (Blockquote parsedQuotedLines)
-- Parse a checkbox
parseCheckbox :: Parser MdToken
parseCheckbox = do
char '['
inside <- char ' ' <|> char 'x'
char ']'
space
return (if inside == 'x' then Checkbox True else Checkbox False)
-- Parse a nested list item. -- Parse a nested list item.
parseListNested :: Parser MdToken parseListNested :: Parser MdToken
parseListNested = do parseListNested = do
let firstCharParser = string (T.pack " ") <|> string (T.pack "\t") let firstCharParser = (<>) <$> (string (T.pack " ") <|> string (T.pack "\t")) <*> (T.pack <$> many (char ' '))
let restOfLineParser = manyTill anySingle (void (char '\n') <|> eof) let restOfLineParser = manyTill anySingle (void (char '\n') <|> eof)
lines <- greedyParse1 (firstCharParser *> restOfLineParser) -- For the first line, I manually run firstCharParser and restOfLineParser. The
let linesParsed = leftmostLongestParse (parseUnorderedList <|> parseOrderedList) (init $ unlines lines) -- result of firstCharParser is saved. For every subsequent line, I parse exactly
-- the same string as firstCharParser.
firstLineSpaces <- firstCharParser
firstLine <- restOfLineParser
lines <- greedyParse (string firstLineSpaces *> restOfLineParser)
let allLines = firstLine : lines
let linesParsed = leftmostLongestParse (parseUnorderedList <|> parseOrderedList) (init $ unlines allLines)
when (null (show linesParsed)) empty when (null (show linesParsed)) empty
return linesParsed return linesParsed
@@ -354,9 +471,12 @@ parseOListLineItem = do
parseListLineItemCommon :: Parser MdToken parseListLineItemCommon :: Parser MdToken
parseListLineItemCommon = do parseListLineItemCommon = do
space space
checkbox <- optional $ try parseCheckbox
restOfLine <- manyTill parseListLineToken (void (char '\n') <|> eof) restOfLine <- manyTill parseListLineToken (void (char '\n') <|> eof)
nestedList <- parseListNested <|> return (Unit "") nestedList <- try parseListNested <|> return (Unit "")
return $ Line [Line restOfLine, nestedList] case checkbox of
Just box -> return $ Line [box, Line restOfLine, nestedList]
Nothing -> return $ Line [Line restOfLine, nestedList]
-- Parse an unordered list paragraph item. -- Parse an unordered list paragraph item.
parseUListParaItem :: Parser MdToken parseUListParaItem :: Parser MdToken
@@ -386,31 +506,32 @@ parseListParaItemCommon = do
-- Parse an unordered list item, which can be a line item or another list. -- Parse an unordered list item, which can be a line item or another list.
parseUListItem :: Parser MdToken parseUListItem :: Parser MdToken
parseUListItem = try parseUListParaItem <|> parseUListLineItem parseUListItem = space *> (try parseUListParaItem <|> parseUListLineItem)
-- Parse an unordered list. -- Parse an unordered list.
parseUnorderedList :: Parser MdToken parseUnorderedList :: Parser MdToken
parseUnorderedList = do parseUnorderedList = do
lineItems <- some parseUListItem lineItems <- some $ try parseUListItem
void (char '\n') <|> eof -- A list must end in an extra newline or eof void (char '\n') <|> eof -- A list must end in an extra newline or eof
return $ UnordList lineItems return $ UnordList lineItems
-- -------- -- --------
parseOListItem :: Parser MdToken parseOListItem :: Parser MdToken
parseOListItem = try parseOListParaItem <|> parseOListLineItem parseOListItem = space *> (try parseOListParaItem <|> parseOListLineItem)
-- Parses the first element of an ordered list, which must start with '1.' -- Parses the first element of an ordered list, which must start with '1.'
parseFirstOListItem :: Parser MdToken parseFirstOListItem :: Parser MdToken
parseFirstOListItem = do parseFirstOListItem = do
space
remaining <- getInput remaining <- getInput
when (take 2 (T.unpack remaining) /= "1.") empty when (take 2 (T.unpack remaining) /= "1.") empty
parseOListLineItem parseOListLineItem
parseOrderedList :: Parser MdToken parseOrderedList :: Parser MdToken
parseOrderedList = do parseOrderedList = do
firstLine <- parseFirstOListItem firstLine <- try parseFirstOListItem
lineItems <- some parseOListItem lineItems <- many $ try parseOListItem
void (char '\n') <|> eof void (char '\n') <|> eof
return $ OrdList (firstLine : lineItems) return $ OrdList (firstLine : lineItems)
@@ -421,11 +542,23 @@ doubleNewlineText :: T.Text
doubleNewlineText = T.pack "\n\n" doubleNewlineText = T.pack "\n\n"
parseHorizontalRule :: Parser MdToken parseHorizontalRule :: Parser MdToken
parseHorizontalRule = string horizontalRuleText *> (void (string doubleNewlineText) <|> eof) *> return HorizontalRule parseHorizontalRule = parseHorizontalRuleLine *> (void (string doubleNewlineText) <|> eof) *> return HorizontalRule
where
parseHorizontalRuleLine = fallthroughParser (map (string . T.pack) ["---", "***", "___", "- - -", "* * *", "_ _ _"])
parseCodeblock :: Parser MdToken
parseCodeblock = do
string (T.pack "```")
_ <- many $ satisfy (/= '\n') -- Language name
char '\n'
inside <- someTill anySingle (string (T.pack "\n```"))
return $ Codeblock (Unit (concatMap escapeChar inside))
documentParsers :: [Parser MdToken] documentParsers :: [Parser MdToken]
documentParsers = documentParsers =
[ parseHorizontalRule, [ parseHorizontalRule,
parseCodeblock,
parseTable,
parseHeader, parseHeader,
parseBlockquote, parseBlockquote,
parseUnorderedList, parseUnorderedList,
@@ -437,5 +570,8 @@ documentParsers =
-- Parse a document, which is multiple paragraphs. -- Parse a document, which is multiple paragraphs.
parseDocument :: Parser MdToken parseDocument :: Parser MdToken
parseDocument = do parseDocument = do
res <- manyTill (fallthroughParser documentParsers) eof -- res <- manyTill (fallthroughParser documentParsers <|> (char '\n' *> return $ Unit "")) eof
res <- sepEndBy (fallthroughParser documentParsers) (many $ char '\n')
-- many $ char '\n'
eof
return (Document res) return (Document res)

View File

@@ -31,7 +31,7 @@ boldTests =
strikethroughTests = strikethroughTests =
TestList TestList
[ check_equal "Should convert strikethrough" "<p><s>Hello</s></p>" (convert "~~Hello~~"), [ check_equal "Should convert strikethrough" "<p><s>Hello</s></p>" (convert "~~Hello~~"),
check_equal "Should convert long sentence with tilde" "<p><s>The universe is ~7 days old</s>. The universe is 13 billion years old.</p>" (convert "~~The universe is ~7 days old~~. The universe is 13 billion years old.") check_equal "Should convert long sentence with tilde" "<p><s>The universe is ~7 days old</s>. The universe is 13 billion years old.</p>" (convert "~~The universe is \\~7 days old~~. The universe is 13 billion years old.")
] ]
linkTests = linkTests =
@@ -74,8 +74,17 @@ unorderedListTests =
check_equal "Paragraph before list" "<p>This is a list</p><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "This is a list\n\n* Item 1\n* Item 2"), check_equal "Paragraph before list" "<p>This is a list</p><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "This is a list\n\n* Item 1\n* Item 2"),
check_equal "Paragraph before list" "<h3>This is a list</h3><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "### This is a list\n\n* Item 1\n* Item 2"), check_equal "Paragraph before list" "<h3>This is a list</h3><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "### This is a list\n\n* Item 1\n* Item 2"),
check_equal "Nested list then back" "<ul><li>Item 1</li><li>Item 2<ul><li>Item 3</li><li>Item 4</li></ul></li><li>Item 5</li></ul>" (convert "- Item 1\n- Item 2\n - Item 3\n - Item 4\n- Item 5"), check_equal "Nested list then back" "<ul><li>Item 1</li><li>Item 2<ul><li>Item 3</li><li>Item 4</li></ul></li><li>Item 5</li></ul>" (convert "- Item 1\n- Item 2\n - Item 3\n - Item 4\n- Item 5"),
check_equal "Triply nested list" "<ul><li>Item 1</li><li>Item 2<ul><li>Item 3<ul><li>Item 4</li></ul></li></ul></li><li>Item 5</li></ul>" (convert "- Item 1\n- Item 2\n - Item 3\n - Item 4\n- Item 5"),
check_equal "Blockquote in list" "<ul><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n\n > Quote\n\n- Item 3"), check_equal "Blockquote in list" "<ul><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n\n > Quote\n\n- Item 3"),
check_equal "Ordered list in unordered list" "<ul><li>Item 1</li><li>Item 2<ol><li>Item 1</li><li>Item 2</li></ol></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n 1. Item 1\n 2. Item 2\n- Item 3") check_equal "Ordered list in unordered list" "<ul><li>Item 1</li><li>Item 2<ol><li>Item 1</li><li>Item 2</li></ol></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n 1. Item 1\n 2. Item 2\n- Item 3"),
check_equal
"Checkbox in unordered list"
"<ul>\
\<li><input type=\"checkbox\" />Not checked</li>\
\<li><input type=\"checkbox\" checked=\"\" />Checked</li>\
\<li>Normal list item</li></ul>"
(convert "- [ ] Not checked\n- [x] Checked\n- Normal list item"),
check_equal "List with link at the start" "<ul><li><a href=\"b\">a</a></li><li><a href=\"d\">c</a></li></ul>" (convert "- [a](b)\n- [c](d)")
] ]
orderedListTests = orderedListTests =
@@ -90,7 +99,15 @@ orderedListTests =
check_equal "Paragraph before list" "<h3>This is a list</h3><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "### This is a list\n\n1. Item 1\n200. Item 2"), check_equal "Paragraph before list" "<h3>This is a list</h3><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "### This is a list\n\n1. Item 1\n200. Item 2"),
check_equal "Nested list then back" "<ol><li>Item 1</li><li>Item 2<ol><li>Item 3</li><li>Item 4</li></ol></li><li>Item 5</li></ol>" (convert "1. Item 1\n2. Item 2\n 1. Item 3\n 3. Item 4\n5. Item 5"), check_equal "Nested list then back" "<ol><li>Item 1</li><li>Item 2<ol><li>Item 3</li><li>Item 4</li></ol></li><li>Item 5</li></ol>" (convert "1. Item 1\n2. Item 2\n 1. Item 3\n 3. Item 4\n5. Item 5"),
check_equal "Blockquote in list" "<ol><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n > Quote\n\n3. Item 3"), check_equal "Blockquote in list" "<ol><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n > Quote\n\n3. Item 3"),
check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3") check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3"),
check_equal "List with just 1 item" "<ol><li>Item 1</li></ol>" (convert "1. Item 1"),
check_equal
"Checkbox in ordered list"
"<ol>\
\<li><input type=\"checkbox\" />Not checked</li>\
\<li><input type=\"checkbox\" checked=\"\" />Checked</li>\
\<li>Normal list item</li></ol>"
(convert "1. [ ] Not checked\n2. [x] Checked\n3. Normal list item")
] ]
htmlTests = htmlTests =
@@ -101,12 +118,15 @@ codeTests =
TestList TestList
[ check_equal "Code by itself" "<p><code>Hello world!</code></p>" (convert "`Hello world!`"), [ check_equal "Code by itself" "<p><code>Hello world!</code></p>" (convert "`Hello world!`"),
check_equal "Code in a paragraph" "<p>The following <code>text</code> is code</p>" (convert "The following `text` is code"), check_equal "Code in a paragraph" "<p>The following <code>text</code> is code</p>" (convert "The following `text` is code"),
check_equal "Code across paragraphs (shouldn't work)" "<p>`Incomplete</p><p>Code`</p>" (convert "`Incomplete\n\nCode`") -- At the moment, this is just treated as a syntax error, so nothing is rendered. check_equal "Code across paragraphs (shouldn't work)" "<p>`Incomplete</p><p>Code`</p>" (convert "`Incomplete\n\nCode`"), -- At the moment, this is just treated as a syntax error, so nothing is rendered.
check_equal "Code block" "<pre><code>Test code block</code></pre>" (convert "```\nTest code block\n```"),
check_equal "Multiple code blocks" "<pre><code>Test code block</code></pre><pre><code>Next block</code></pre>" (convert "```\nTest code block\n```\n\n```\nNext block\n```")
] ]
imageTests = imageTests =
TestList TestList
[ check_equal "Image with text" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\" /></p>" (convert "This is an image ![Image 1](img.png)") [ check_equal "Image with text" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\"/></p>" (convert "This is an image ![Image 1](img.png)"),
check_equal "Image with classes" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\" class=\"new-img\"/></p>" (convert "This is an image ![Image 1](img.png){.new-img}")
] ]
figureTests = figureTests =
@@ -118,6 +138,25 @@ horizontalRuleTests =
TestList TestList
[check_equal "Horizontal Rule" "<p>a</p><hr><p>b</p>" (convert "a\n\n---\n\nb")] [check_equal "Horizontal Rule" "<p>a</p><hr><p>b</p>" (convert "a\n\n---\n\nb")]
subscriptTests =
TestList
[check_equal "Should convert subscript" "A<sub>b</sub>" (convert "A~b~")]
superscriptTests =
TestList
[check_equal "Should convert superscript" "A<sup>b</sup>" (convert "A^b^")]
tableTests =
TestList
[ check_equal
"Basic table"
"<table>\
\<thead><tr><th>Col 1</th><th>Col 2</th><th>Col 3</th></tr></thead>\
\<tbody><tr><td>Data 1</td><td>Data 2</td><td>Data 3</td></tr>\
\<tr><td>More Data 1</td><td>More Data 2</td><td>More Data 3</td></tr></tbody></table>"
(convert "| Col 1 | Col 2 | Col 3 |\n|---|---|---|\n| Data 1 | Data 2 | Data 3 |\n| More Data 1 | More Data 2 | More Data 3 |")
]
integrationTests = integrationTests =
TestList TestList
[ check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n ## __Second__ _Heading_"), [ check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n ## __Second__ _Heading_"),
@@ -125,7 +164,7 @@ integrationTests =
check_equal "Integration 3" "<h1>Hello</h1><p>World</p>" (convert "# Hello\nWorld"), check_equal "Integration 3" "<h1>Hello</h1><p>World</p>" (convert "# Hello\nWorld"),
check_equal "Integration 4" "<p>a b</p>" (convert "a\nb"), check_equal "Integration 4" "<p>a b</p>" (convert "a\nb"),
check_equal "Integration 5" "<h1>Hello</h1>" (convert "# Hello\n"), check_equal "Integration 5" "<h1>Hello</h1>" (convert "# Hello\n"),
check_equal "Integration 6" "<p>First line<br>Second line</p>" (convert "First line \nSecond line"), check_equal "Integration 6" "<p>First line<br />Second line</p>" (convert "First line \nSecond line"),
check_equal check_equal
"Integration 7" "Integration 7"
"<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2>Second \ "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2>Second \
@@ -138,7 +177,7 @@ integrationTests =
"# Sample Markdown\n\nThis is some basic, sample markdown.\n\n## Second \ "# Sample Markdown\n\nThis is some basic, sample markdown.\n\n## Second \
\Heading\n\n- Unordered lists, and:\n 1. One\n 2. Two\n 3. Three\n\ \Heading\n\n- Unordered lists, and:\n 1. One\n 2. Two\n 3. Three\n\
\- More\n\n> Blockquote\n\nAnd **bold**, *italics*, and even *italics and \ \- More\n\n> Blockquote\n\nAnd **bold**, *italics*, and even *italics and \
\later **bold***. Even ~~strikethrough~~. [A link](https://markdowntohtml.com) to somewhere." \later __bold__*. Even ~~strikethrough~~. [A link](https://markdowntohtml.com) to somewhere."
) )
] ]
@@ -157,6 +196,7 @@ tests =
figureTests, figureTests,
codeTests, codeTests,
horizontalRuleTests, horizontalRuleTests,
tableTests,
integrationTests integrationTests
] ]