Compare commits
16 Commits
5c871f2b25
...
e051c87f08
Author | SHA1 | Date | |
---|---|---|---|
e051c87f08 | |||
9b1c51897c | |||
2a3dddc7b0 | |||
a8793b5adb | |||
eecec764ad | |||
cdca6ea95e | |||
540b5430e5 | |||
00dfba81eb | |||
39152c0034 | |||
41b35be7c9 | |||
d2c8565f62 | |||
62eeef2abb | |||
9c6634cfec | |||
2a5a68b1de | |||
f8e1a98bdf | |||
05433c31f1 |
127
src/MdToHTML.hs
127
src/MdToHTML.hs
@@ -6,6 +6,7 @@ module MdToHTML where
|
||||
|
||||
import Control.Applicative
|
||||
import Control.Monad
|
||||
import Data.Char
|
||||
import Data.List
|
||||
import Data.Ord (comparing)
|
||||
import Debug.Trace
|
||||
@@ -87,17 +88,6 @@ leftmostLongestParse parser input =
|
||||
Nothing -> (mempty, mempty)
|
||||
Just x -> x
|
||||
|
||||
-- Parse if the string that's left matches the string comparator function
|
||||
lookaheadParse :: (String -> Bool) -> ReadP Char
|
||||
lookaheadParse stringCmp = do
|
||||
lookahead <- look
|
||||
case stringCmp lookahead of
|
||||
True -> get
|
||||
False -> pfail
|
||||
|
||||
lineToList :: MdToken -> [MdToken]
|
||||
lineToList (Line tokens) = tokens
|
||||
|
||||
specialChars = "\\#*_[\n"
|
||||
|
||||
-- Makes a parser greedy. Instead of returning all possible parses, only the longest one is returned.
|
||||
@@ -118,25 +108,6 @@ prepend x1 x2 = x1 ++ x2
|
||||
append :: [a] -> [a] -> [a]
|
||||
append x1 x2 = x2 ++ x1
|
||||
|
||||
-- Sequence two parsers, running one after the other and returning the result.
|
||||
sequenceParse :: ReadP a -> ReadP a -> ReadP [a]
|
||||
sequenceParse p1 p2 = twoElemList <$> p1 <*> p2
|
||||
where
|
||||
twoElemList elem1 elem2 = [elem1, elem2]
|
||||
|
||||
-- Parses p1 until p2 succeeds, but doesn't actually consume anything from p2.
|
||||
-- Similar to manyTill, except manyTill's second parser actually consumes characters.
|
||||
manyTillLazy :: ReadP a -> ReadP b -> ReadP [a]
|
||||
manyTillLazy p1 p2 = do
|
||||
res <- p1
|
||||
remaining <- look
|
||||
let p2res = readP_to_S p2 remaining
|
||||
case p2res of
|
||||
[] -> do
|
||||
res2 <- manyTillLazy p1 p2
|
||||
return (res : res2)
|
||||
_ -> return [res]
|
||||
|
||||
-- Parse until EOL or EOF
|
||||
parseTillEol :: ReadP String
|
||||
parseTillEol = manyTill get (void (char '\n') <++ eof)
|
||||
@@ -176,9 +147,9 @@ parseBold = parseBoldWith "**" <|> parseBoldWith "__"
|
||||
|
||||
-- Parse italic text
|
||||
parseItalic :: ReadP MdToken
|
||||
parseItalic = parseBoldWith "*" <|> parseBoldWith "_"
|
||||
parseItalic = parseItalicWith "*" <|> parseItalicWith "_"
|
||||
where
|
||||
parseBoldWith delim = do
|
||||
parseItalicWith delim = do
|
||||
string delim
|
||||
inside <- greedyParse1 parseLineToken
|
||||
string delim
|
||||
@@ -316,17 +287,13 @@ parseBlockquote = do
|
||||
return (Blockquote parsedQuotedLines)
|
||||
|
||||
-- Parse a nested list item.
|
||||
parseUListNested :: ReadP MdToken
|
||||
parseUListNested = do
|
||||
-- firstChar <- string " " <++ string "\t"
|
||||
-- skipSpaces
|
||||
-- restOfLine <- manyTill get (void (char '\n') <++ eof)
|
||||
-- let restOfLineParsed = fst $ leftmostLongestParse parseLine restOfLine
|
||||
-- return restOfLineParsed
|
||||
parseListNested :: ReadP MdToken
|
||||
parseListNested = do
|
||||
let firstCharParser = string " " <++ string "\t"
|
||||
let restOfLineParser = manyTill get (void (char '\n') <++ eof)
|
||||
lines <- greedyParse1 (firstCharParser *> restOfLineParser)
|
||||
let linesParsed = fst $ leftmostLongestParse parseUnorderedList (init $ unlines lines)
|
||||
let linesParsed = fst $ leftmostLongestParse (parseUnorderedList <++ parseOrderedList) (init $ unlines lines)
|
||||
when (null (show linesParsed)) pfail
|
||||
return linesParsed
|
||||
|
||||
-- Parse an unordered list line item.
|
||||
@@ -334,36 +301,54 @@ parseUListLineItem :: ReadP MdToken
|
||||
parseUListLineItem = do
|
||||
firstChar <- choice (map char ['*', '+', '-'])
|
||||
char ' ' -- At least one space between list indicator and list text.
|
||||
parseListLineItemCommon
|
||||
|
||||
-- Parse an ordered list line item.
|
||||
parseOListLineItem :: ReadP MdToken
|
||||
parseOListLineItem = do
|
||||
num <- greedyParse1 (satisfy isDigit)
|
||||
char '.'
|
||||
char ' ' -- At least one space between list indicator and list text.
|
||||
parseListLineItemCommon
|
||||
|
||||
-- Common code for parsing list line items
|
||||
parseListLineItemCommon :: ReadP MdToken
|
||||
parseListLineItemCommon = do
|
||||
skipSpaces
|
||||
restOfLine <- many1 parseListLineToken
|
||||
void (char '\n') <++ eof
|
||||
nestedList <- parseUListNested <++ return (Unit "")
|
||||
nestedList <- parseListNested <++ return (Unit "")
|
||||
return $ Line [Line restOfLine, nestedList]
|
||||
|
||||
-- restOfLine <- manyTill get (void (char '\n') <++ eof)
|
||||
-- let restOfLineParsed = fst $ leftmostLongestParse parseLine restOfLine
|
||||
-- nestedList <- parseUListNested <++ return (Unit "")
|
||||
-- return $ Line [restOfLineParsed, nestedList]
|
||||
|
||||
-- Parse an unordered list paragraph item.
|
||||
-- This is defined as a line item, followed by an empty line, followed by one or more
|
||||
-- lines indented by a space or tab.
|
||||
parseUListParaItem :: ReadP MdToken
|
||||
parseUListParaItem = do
|
||||
firstLine <- parseUListLineItem
|
||||
char '\n'
|
||||
lines <- greedyParse1 ((string " " <|> string "\t") *> parseTillEol)
|
||||
let res = fst $ leftmostLongestParse (greedyParse1 parsePara) (init $ unlines lines)
|
||||
char '\n'
|
||||
res <- parseListParaItemCommon
|
||||
return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
|
||||
|
||||
-- This is hacky as hell
|
||||
-- parsedParas <- manyTillLazy parsePara (string "\n\n" *> choice (map char "*-+"))
|
||||
-- return $ Document parsedParas -- I wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
|
||||
-- Parse an unordered list paragraph item.
|
||||
parseOListParaItem :: ReadP MdToken
|
||||
parseOListParaItem = do
|
||||
firstLine <- parseOListLineItem
|
||||
res <- parseListParaItemCommon
|
||||
return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
|
||||
|
||||
-- Common code for parsing list paragraph items.
|
||||
-- A list paragraph item is defined as a line item, followed by an empty line, followed by one or more
|
||||
-- lines indented by a space or tab.
|
||||
-- A list paragraph item can also be a blockquote.
|
||||
parseListParaItemCommon :: ReadP [MdToken]
|
||||
parseListParaItemCommon = do
|
||||
char '\n'
|
||||
lines <- greedyParse1 ((string " " <|> string "\t") *> parseTillEol)
|
||||
let res = fst $ leftmostLongestParse (greedyParse1 parseBlockquote <++ greedyParse1 parsePara) (init $ unlines lines)
|
||||
char '\n'
|
||||
return res -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
|
||||
|
||||
-- Parse an unordered list item, which can be a line item or another list.
|
||||
parseUListItem :: ReadP MdToken
|
||||
parseUListItem = parseUListParaItem <++ parseUListLineItem <++ parseUListNested
|
||||
parseUListItem = parseUListParaItem <++ parseUListLineItem
|
||||
|
||||
-- Parse an unordered list.
|
||||
parseUnorderedList :: ReadP MdToken
|
||||
@@ -372,8 +357,36 @@ parseUnorderedList = do
|
||||
void (char '\n') <++ eof -- A list must end in an extra newline or eof
|
||||
return $ UnordList lineItems
|
||||
|
||||
-- --------
|
||||
|
||||
parseOListItem :: ReadP MdToken
|
||||
parseOListItem = parseOListParaItem <++ parseOListLineItem
|
||||
|
||||
-- Parses the first element of an ordered list, which must start with '1.'
|
||||
parseFirstOListItem :: ReadP MdToken
|
||||
parseFirstOListItem = do
|
||||
remaining <- look
|
||||
when (take 2 remaining /= "1.") pfail
|
||||
parseOListLineItem
|
||||
|
||||
parseOrderedList :: ReadP MdToken
|
||||
parseOrderedList = do
|
||||
firstLine <- parseFirstOListItem
|
||||
lineItems <- greedyParse1 parseOListItem
|
||||
void (char '\n') <++ eof
|
||||
return $ OrdList (firstLine : lineItems)
|
||||
|
||||
documentParsers :: [ReadP MdToken]
|
||||
documentParsers =
|
||||
[ parseHeader,
|
||||
parseBlockquote,
|
||||
parseUnorderedList,
|
||||
parseOrderedList,
|
||||
parsePara
|
||||
]
|
||||
|
||||
-- Parse a document, which is multiple paragraphs.
|
||||
parseDocument :: ReadP MdToken
|
||||
parseDocument = do
|
||||
res <- manyTill (parseHeader <++ parseBlockquote <++ parseUnorderedList <++ parsePara) eof
|
||||
res <- manyTill (fallthroughParser documentParsers) eof
|
||||
return (Document res)
|
||||
|
@@ -66,14 +66,31 @@ blockquoteTests =
|
||||
|
||||
unorderedListTests =
|
||||
TestList
|
||||
[ check_equal "Basic ordered list" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>" (convert "* Item 1\n* Item 2\n* Item 3"),
|
||||
[ check_equal "Basic unordered list" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>" (convert "* Item 1\n* Item 2\n* Item 3"),
|
||||
check_equal "Mixing list indicators" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>" (convert "* Item 1\n+ Item 2\n- Item 3"),
|
||||
check_equal "Formatted lists" "<ul><li><b>Item 1</b></li><li><i>Item 2</i></li><li><b><i>Item 3</i></b></li></ul>" (convert "* __Item 1__\n+ _Item 2_\n- ***Item 3***"),
|
||||
check_equal "Nested list" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3<ul><li>Subitem 1</li><li>Subitem 2</li></ul></li></ul>" (convert "* Item 1\n* Item 2\n* Item 3\n * Subitem 1\n * Subitem 2"),
|
||||
check_equal "Paragraph in list" "<ul><li>Item 1</li><li><p>Item 2</p><p>More stuff</p></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n\n More stuff\n\n- Item 3"),
|
||||
check_equal "Paragraph before list" "<p>This is a list</p><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "This is a list\n\n* Item 1\n* Item 2"),
|
||||
check_equal "Paragraph before list" "<h3>This is a list</h3><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "### This is a list\n\n* Item 1\n* Item 2"),
|
||||
check_equal "Nested list then back" "<ul><li>Item 1</li><li>Item 2<ul><li>Item 3</li><li>Item 4</li></ul></li><li>Item 5</li></ul>" (convert "- Item 1\n- Item 2\n - Item 3\n - Item 4\n- Item 5")
|
||||
check_equal "Nested list then back" "<ul><li>Item 1</li><li>Item 2<ul><li>Item 3</li><li>Item 4</li></ul></li><li>Item 5</li></ul>" (convert "- Item 1\n- Item 2\n - Item 3\n - Item 4\n- Item 5"),
|
||||
check_equal "Blockquote in list" "<ul><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n\n > Quote\n\n- Item 3"),
|
||||
check_equal "Ordered list in unordered list" "<ul><li>Item 1</li><li>Item 2<ol><li>Item 1</li><li>Item 2</li></ol></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n 1. Item 1\n 2. Item 2\n- Item 3")
|
||||
]
|
||||
|
||||
orderedListTests =
|
||||
TestList
|
||||
[ check_equal "Basic ordered list" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n3. Item 3"),
|
||||
check_equal "Mixing list numbering" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3</li></ol>" (convert "1. Item 1\n3. Item 2\n2. Item 3"),
|
||||
check_equal "Should not convert list without number 1" "<p>2. Item 1 1. Item 2</p>" (convert "2. Item 1\n1. Item 2"),
|
||||
check_equal "Formatted lists" "<ol><li><b>Item 1</b></li><li><i>Item 2</i></li><li><b><i>Item 3</i></b></li></ol>" (convert "1. __Item 1__\n2. _Item 2_\n3. ***Item 3***"),
|
||||
check_equal "Nested list" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3<ol><li>Subitem 1</li><li>Subitem 2</li></ol></li></ol>" (convert "1. Item 1\n2. Item 2\n3. Item 3\n 1. Subitem 1\n 2. Subitem 2"),
|
||||
check_equal "Paragraph in list" "<ol><li>Item 1</li><li><p>Item 2</p><p>More stuff</p></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n More stuff\n\n1. Item 3"),
|
||||
check_equal "Paragraph before list" "<p>This is a list</p><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "This is a list\n\n1. Item 1\n1. Item 2"),
|
||||
check_equal "Paragraph before list" "<h3>This is a list</h3><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "### This is a list\n\n1. Item 1\n200. Item 2"),
|
||||
check_equal "Nested list then back" "<ol><li>Item 1</li><li>Item 2<ol><li>Item 3</li><li>Item 4</li></ol></li><li>Item 5</li></ol>" (convert "1. Item 1\n2. Item 2\n 1. Item 3\n 3. Item 4\n5. Item 5"),
|
||||
check_equal "Blockquote in list" "<ol><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n > Quote\n\n3. Item 3"),
|
||||
check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3")
|
||||
]
|
||||
|
||||
integrationTests =
|
||||
@@ -87,14 +104,14 @@ integrationTests =
|
||||
check_equal
|
||||
"Integration 7"
|
||||
"<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2>Second \
|
||||
\Heading</h2><ul><li>Unordered lists, and:<ul><li>One</li><li>Two</li><li>\
|
||||
\Three</li></ul></li><li>More</li></ul><blockquote><p>Blockquote</p>\
|
||||
\Heading</h2><ul><li>Unordered lists, and:<ol><li>One</li><li>Two</li><li>\
|
||||
\Three</li></ol></li><li>More</li></ul><blockquote><p>Blockquote</p>\
|
||||
\</blockquote><p>And <b>bold</b>, <i>italics</i>, and even <i>italics \
|
||||
\and later <b>bold</b></i>. Even <s>strikethrough</s>. \
|
||||
\<a href=\"https://markdowntohtml.com\">A link</a> to somewhere.</p>"
|
||||
( convert
|
||||
"# Sample Markdown\n\nThis is some basic, sample markdown.\n\n## Second \
|
||||
\Heading\n\n- Unordered lists, and:\n - One\n - Two\n - Three\n\
|
||||
\Heading\n\n- Unordered lists, and:\n 1. One\n 2. Two\n 3. Three\n\
|
||||
\- More\n\n> Blockquote\n\nAnd **bold**, *italics*, and even *italics and \
|
||||
\later **bold***. Even ~~strikethrough~~. [A link](https://markdowntohtml.com) to somewhere."
|
||||
)
|
||||
@@ -109,6 +126,7 @@ tests =
|
||||
escapedCharTests,
|
||||
blockquoteTests,
|
||||
unorderedListTests,
|
||||
orderedListTests,
|
||||
integrationTests
|
||||
]
|
||||
|
||||
|
Reference in New Issue
Block a user