Compare commits

16 Commits

Author SHA1 Message Date
e051c87f08 Factor list line common parsing into a separate function; refactored
OList and UList line parsing to us it
2025-05-20 16:48:31 -04:00
9b1c51897c A nested list can be ordered or unordered 2025-05-20 16:47:58 -04:00
2a3dddc7b0 Rename function 2025-05-20 16:47:31 -04:00
a8793b5adb Remove obsolete comments 2025-05-20 16:47:16 -04:00
eecec764ad A list can contain a blockquote as well. 2025-05-20 16:47:04 -04:00
cdca6ea95e A list item doesn't need to have a nested list item parser, because the
line item parser handles the nested list.
2025-05-20 16:46:52 -04:00
540b5430e5 Use list of document parsers in parseDocument 2025-05-20 16:45:56 -04:00
00dfba81eb Created a list of document parsers; implemented ordered list parsing. 2025-05-20 16:45:37 -04:00
39152c0034 Factor out common code for UList and OList parsing into a separate
function. Refactored UList function; wrote OList function.
2025-05-20 16:45:07 -04:00
41b35be7c9 Rename function 2025-05-20 16:43:35 -04:00
d2c8565f62 Import Data.Char for isDigit 2025-05-20 16:43:22 -04:00
62eeef2abb Removed unused functions 2025-05-20 16:43:10 -04:00
9c6634cfec Added ordered list tests 2025-05-20 16:42:40 -04:00
2a5a68b1de Fixed test name 2025-05-20 16:42:12 -04:00
f8e1a98bdf Remove obsolete comment 2025-05-20 14:03:30 -04:00
05433c31f1 Remove unused functions 2025-05-20 14:02:50 -04:00
2 changed files with 93 additions and 62 deletions

View File

@@ -6,6 +6,7 @@ module MdToHTML where
import Control.Applicative import Control.Applicative
import Control.Monad import Control.Monad
import Data.Char
import Data.List import Data.List
import Data.Ord (comparing) import Data.Ord (comparing)
import Debug.Trace import Debug.Trace
@@ -87,17 +88,6 @@ leftmostLongestParse parser input =
Nothing -> (mempty, mempty) Nothing -> (mempty, mempty)
Just x -> x Just x -> x
-- Parse if the string that's left matches the string comparator function
lookaheadParse :: (String -> Bool) -> ReadP Char
lookaheadParse stringCmp = do
lookahead <- look
case stringCmp lookahead of
True -> get
False -> pfail
lineToList :: MdToken -> [MdToken]
lineToList (Line tokens) = tokens
specialChars = "\\#*_[\n" specialChars = "\\#*_[\n"
-- Makes a parser greedy. Instead of returning all possible parses, only the longest one is returned. -- Makes a parser greedy. Instead of returning all possible parses, only the longest one is returned.
@@ -118,25 +108,6 @@ prepend x1 x2 = x1 ++ x2
append :: [a] -> [a] -> [a] append :: [a] -> [a] -> [a]
append x1 x2 = x2 ++ x1 append x1 x2 = x2 ++ x1
-- Sequence two parsers, running one after the other and returning the result.
sequenceParse :: ReadP a -> ReadP a -> ReadP [a]
sequenceParse p1 p2 = twoElemList <$> p1 <*> p2
where
twoElemList elem1 elem2 = [elem1, elem2]
-- Parses p1 until p2 succeeds, but doesn't actually consume anything from p2.
-- Similar to manyTill, except manyTill's second parser actually consumes characters.
manyTillLazy :: ReadP a -> ReadP b -> ReadP [a]
manyTillLazy p1 p2 = do
res <- p1
remaining <- look
let p2res = readP_to_S p2 remaining
case p2res of
[] -> do
res2 <- manyTillLazy p1 p2
return (res : res2)
_ -> return [res]
-- Parse until EOL or EOF -- Parse until EOL or EOF
parseTillEol :: ReadP String parseTillEol :: ReadP String
parseTillEol = manyTill get (void (char '\n') <++ eof) parseTillEol = manyTill get (void (char '\n') <++ eof)
@@ -176,9 +147,9 @@ parseBold = parseBoldWith "**" <|> parseBoldWith "__"
-- Parse italic text -- Parse italic text
parseItalic :: ReadP MdToken parseItalic :: ReadP MdToken
parseItalic = parseBoldWith "*" <|> parseBoldWith "_" parseItalic = parseItalicWith "*" <|> parseItalicWith "_"
where where
parseBoldWith delim = do parseItalicWith delim = do
string delim string delim
inside <- greedyParse1 parseLineToken inside <- greedyParse1 parseLineToken
string delim string delim
@@ -316,17 +287,13 @@ parseBlockquote = do
return (Blockquote parsedQuotedLines) return (Blockquote parsedQuotedLines)
-- Parse a nested list item. -- Parse a nested list item.
parseUListNested :: ReadP MdToken parseListNested :: ReadP MdToken
parseUListNested = do parseListNested = do
-- firstChar <- string " " <++ string "\t"
-- skipSpaces
-- restOfLine <- manyTill get (void (char '\n') <++ eof)
-- let restOfLineParsed = fst $ leftmostLongestParse parseLine restOfLine
-- return restOfLineParsed
let firstCharParser = string " " <++ string "\t" let firstCharParser = string " " <++ string "\t"
let restOfLineParser = manyTill get (void (char '\n') <++ eof) let restOfLineParser = manyTill get (void (char '\n') <++ eof)
lines <- greedyParse1 (firstCharParser *> restOfLineParser) lines <- greedyParse1 (firstCharParser *> restOfLineParser)
let linesParsed = fst $ leftmostLongestParse parseUnorderedList (init $ unlines lines) let linesParsed = fst $ leftmostLongestParse (parseUnorderedList <++ parseOrderedList) (init $ unlines lines)
when (null (show linesParsed)) pfail
return linesParsed return linesParsed
-- Parse an unordered list line item. -- Parse an unordered list line item.
@@ -334,36 +301,54 @@ parseUListLineItem :: ReadP MdToken
parseUListLineItem = do parseUListLineItem = do
firstChar <- choice (map char ['*', '+', '-']) firstChar <- choice (map char ['*', '+', '-'])
char ' ' -- At least one space between list indicator and list text. char ' ' -- At least one space between list indicator and list text.
parseListLineItemCommon
-- Parse an ordered list line item.
parseOListLineItem :: ReadP MdToken
parseOListLineItem = do
num <- greedyParse1 (satisfy isDigit)
char '.'
char ' ' -- At least one space between list indicator and list text.
parseListLineItemCommon
-- Common code for parsing list line items
parseListLineItemCommon :: ReadP MdToken
parseListLineItemCommon = do
skipSpaces skipSpaces
restOfLine <- many1 parseListLineToken restOfLine <- many1 parseListLineToken
void (char '\n') <++ eof void (char '\n') <++ eof
nestedList <- parseUListNested <++ return (Unit "") nestedList <- parseListNested <++ return (Unit "")
return $ Line [Line restOfLine, nestedList] return $ Line [Line restOfLine, nestedList]
-- restOfLine <- manyTill get (void (char '\n') <++ eof)
-- let restOfLineParsed = fst $ leftmostLongestParse parseLine restOfLine
-- nestedList <- parseUListNested <++ return (Unit "")
-- return $ Line [restOfLineParsed, nestedList]
-- Parse an unordered list paragraph item. -- Parse an unordered list paragraph item.
-- This is defined as a line item, followed by an empty line, followed by one or more
-- lines indented by a space or tab.
parseUListParaItem :: ReadP MdToken parseUListParaItem :: ReadP MdToken
parseUListParaItem = do parseUListParaItem = do
firstLine <- parseUListLineItem firstLine <- parseUListLineItem
char '\n' res <- parseListParaItemCommon
lines <- greedyParse1 ((string " " <|> string "\t") *> parseTillEol)
let res = fst $ leftmostLongestParse (greedyParse1 parsePara) (init $ unlines lines)
char '\n'
return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document. return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
-- This is hacky as hell -- Parse an unordered list paragraph item.
-- parsedParas <- manyTillLazy parsePara (string "\n\n" *> choice (map char "*-+")) parseOListParaItem :: ReadP MdToken
-- return $ Document parsedParas -- I wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document. parseOListParaItem = do
firstLine <- parseOListLineItem
res <- parseListParaItemCommon
return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
-- Common code for parsing list paragraph items.
-- A list paragraph item is defined as a line item, followed by an empty line, followed by one or more
-- lines indented by a space or tab.
-- A list paragraph item can also be a blockquote.
parseListParaItemCommon :: ReadP [MdToken]
parseListParaItemCommon = do
char '\n'
lines <- greedyParse1 ((string " " <|> string "\t") *> parseTillEol)
let res = fst $ leftmostLongestParse (greedyParse1 parseBlockquote <++ greedyParse1 parsePara) (init $ unlines lines)
char '\n'
return res -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
-- Parse an unordered list item, which can be a line item or another list. -- Parse an unordered list item, which can be a line item or another list.
parseUListItem :: ReadP MdToken parseUListItem :: ReadP MdToken
parseUListItem = parseUListParaItem <++ parseUListLineItem <++ parseUListNested parseUListItem = parseUListParaItem <++ parseUListLineItem
-- Parse an unordered list. -- Parse an unordered list.
parseUnorderedList :: ReadP MdToken parseUnorderedList :: ReadP MdToken
@@ -372,8 +357,36 @@ parseUnorderedList = do
void (char '\n') <++ eof -- A list must end in an extra newline or eof void (char '\n') <++ eof -- A list must end in an extra newline or eof
return $ UnordList lineItems return $ UnordList lineItems
-- --------
parseOListItem :: ReadP MdToken
parseOListItem = parseOListParaItem <++ parseOListLineItem
-- Parses the first element of an ordered list, which must start with '1.'
parseFirstOListItem :: ReadP MdToken
parseFirstOListItem = do
remaining <- look
when (take 2 remaining /= "1.") pfail
parseOListLineItem
parseOrderedList :: ReadP MdToken
parseOrderedList = do
firstLine <- parseFirstOListItem
lineItems <- greedyParse1 parseOListItem
void (char '\n') <++ eof
return $ OrdList (firstLine : lineItems)
documentParsers :: [ReadP MdToken]
documentParsers =
[ parseHeader,
parseBlockquote,
parseUnorderedList,
parseOrderedList,
parsePara
]
-- Parse a document, which is multiple paragraphs. -- Parse a document, which is multiple paragraphs.
parseDocument :: ReadP MdToken parseDocument :: ReadP MdToken
parseDocument = do parseDocument = do
res <- manyTill (parseHeader <++ parseBlockquote <++ parseUnorderedList <++ parsePara) eof res <- manyTill (fallthroughParser documentParsers) eof
return (Document res) return (Document res)

View File

@@ -66,14 +66,31 @@ blockquoteTests =
unorderedListTests = unorderedListTests =
TestList TestList
[ check_equal "Basic ordered list" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>" (convert "* Item 1\n* Item 2\n* Item 3"), [ check_equal "Basic unordered list" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>" (convert "* Item 1\n* Item 2\n* Item 3"),
check_equal "Mixing list indicators" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>" (convert "* Item 1\n+ Item 2\n- Item 3"), check_equal "Mixing list indicators" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>" (convert "* Item 1\n+ Item 2\n- Item 3"),
check_equal "Formatted lists" "<ul><li><b>Item 1</b></li><li><i>Item 2</i></li><li><b><i>Item 3</i></b></li></ul>" (convert "* __Item 1__\n+ _Item 2_\n- ***Item 3***"), check_equal "Formatted lists" "<ul><li><b>Item 1</b></li><li><i>Item 2</i></li><li><b><i>Item 3</i></b></li></ul>" (convert "* __Item 1__\n+ _Item 2_\n- ***Item 3***"),
check_equal "Nested list" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3<ul><li>Subitem 1</li><li>Subitem 2</li></ul></li></ul>" (convert "* Item 1\n* Item 2\n* Item 3\n * Subitem 1\n * Subitem 2"), check_equal "Nested list" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3<ul><li>Subitem 1</li><li>Subitem 2</li></ul></li></ul>" (convert "* Item 1\n* Item 2\n* Item 3\n * Subitem 1\n * Subitem 2"),
check_equal "Paragraph in list" "<ul><li>Item 1</li><li><p>Item 2</p><p>More stuff</p></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n\n More stuff\n\n- Item 3"), check_equal "Paragraph in list" "<ul><li>Item 1</li><li><p>Item 2</p><p>More stuff</p></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n\n More stuff\n\n- Item 3"),
check_equal "Paragraph before list" "<p>This is a list</p><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "This is a list\n\n* Item 1\n* Item 2"), check_equal "Paragraph before list" "<p>This is a list</p><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "This is a list\n\n* Item 1\n* Item 2"),
check_equal "Paragraph before list" "<h3>This is a list</h3><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "### This is a list\n\n* Item 1\n* Item 2"), check_equal "Paragraph before list" "<h3>This is a list</h3><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "### This is a list\n\n* Item 1\n* Item 2"),
check_equal "Nested list then back" "<ul><li>Item 1</li><li>Item 2<ul><li>Item 3</li><li>Item 4</li></ul></li><li>Item 5</li></ul>" (convert "- Item 1\n- Item 2\n - Item 3\n - Item 4\n- Item 5") check_equal "Nested list then back" "<ul><li>Item 1</li><li>Item 2<ul><li>Item 3</li><li>Item 4</li></ul></li><li>Item 5</li></ul>" (convert "- Item 1\n- Item 2\n - Item 3\n - Item 4\n- Item 5"),
check_equal "Blockquote in list" "<ul><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n\n > Quote\n\n- Item 3"),
check_equal "Ordered list in unordered list" "<ul><li>Item 1</li><li>Item 2<ol><li>Item 1</li><li>Item 2</li></ol></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n 1. Item 1\n 2. Item 2\n- Item 3")
]
orderedListTests =
TestList
[ check_equal "Basic ordered list" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n3. Item 3"),
check_equal "Mixing list numbering" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3</li></ol>" (convert "1. Item 1\n3. Item 2\n2. Item 3"),
check_equal "Should not convert list without number 1" "<p>2. Item 1 1. Item 2</p>" (convert "2. Item 1\n1. Item 2"),
check_equal "Formatted lists" "<ol><li><b>Item 1</b></li><li><i>Item 2</i></li><li><b><i>Item 3</i></b></li></ol>" (convert "1. __Item 1__\n2. _Item 2_\n3. ***Item 3***"),
check_equal "Nested list" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3<ol><li>Subitem 1</li><li>Subitem 2</li></ol></li></ol>" (convert "1. Item 1\n2. Item 2\n3. Item 3\n 1. Subitem 1\n 2. Subitem 2"),
check_equal "Paragraph in list" "<ol><li>Item 1</li><li><p>Item 2</p><p>More stuff</p></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n More stuff\n\n1. Item 3"),
check_equal "Paragraph before list" "<p>This is a list</p><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "This is a list\n\n1. Item 1\n1. Item 2"),
check_equal "Paragraph before list" "<h3>This is a list</h3><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "### This is a list\n\n1. Item 1\n200. Item 2"),
check_equal "Nested list then back" "<ol><li>Item 1</li><li>Item 2<ol><li>Item 3</li><li>Item 4</li></ol></li><li>Item 5</li></ol>" (convert "1. Item 1\n2. Item 2\n 1. Item 3\n 3. Item 4\n5. Item 5"),
check_equal "Blockquote in list" "<ol><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n > Quote\n\n3. Item 3"),
check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3")
] ]
integrationTests = integrationTests =
@@ -87,14 +104,14 @@ integrationTests =
check_equal check_equal
"Integration 7" "Integration 7"
"<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2>Second \ "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2>Second \
\Heading</h2><ul><li>Unordered lists, and:<ul><li>One</li><li>Two</li><li>\ \Heading</h2><ul><li>Unordered lists, and:<ol><li>One</li><li>Two</li><li>\
\Three</li></ul></li><li>More</li></ul><blockquote><p>Blockquote</p>\ \Three</li></ol></li><li>More</li></ul><blockquote><p>Blockquote</p>\
\</blockquote><p>And <b>bold</b>, <i>italics</i>, and even <i>italics \ \</blockquote><p>And <b>bold</b>, <i>italics</i>, and even <i>italics \
\and later <b>bold</b></i>. Even <s>strikethrough</s>. \ \and later <b>bold</b></i>. Even <s>strikethrough</s>. \
\<a href=\"https://markdowntohtml.com\">A link</a> to somewhere.</p>" \<a href=\"https://markdowntohtml.com\">A link</a> to somewhere.</p>"
( convert ( convert
"# Sample Markdown\n\nThis is some basic, sample markdown.\n\n## Second \ "# Sample Markdown\n\nThis is some basic, sample markdown.\n\n## Second \
\Heading\n\n- Unordered lists, and:\n - One\n - Two\n - Three\n\ \Heading\n\n- Unordered lists, and:\n 1. One\n 2. Two\n 3. Three\n\
\- More\n\n> Blockquote\n\nAnd **bold**, *italics*, and even *italics and \ \- More\n\n> Blockquote\n\nAnd **bold**, *italics*, and even *italics and \
\later **bold***. Even ~~strikethrough~~. [A link](https://markdowntohtml.com) to somewhere." \later **bold***. Even ~~strikethrough~~. [A link](https://markdowntohtml.com) to somewhere."
) )
@@ -109,6 +126,7 @@ tests =
escapedCharTests, escapedCharTests,
blockquoteTests, blockquoteTests,
unorderedListTests, unorderedListTests,
orderedListTests,
integrationTests integrationTests
] ]