Compare commits
24 Commits
e7d94f225a
...
5c871f2b25
Author | SHA1 | Date | |
---|---|---|---|
5c871f2b25 | |||
5273c99e6e | |||
50888c9c3d | |||
45115c765c | |||
5b0d42fd2d | |||
2a585d00f2 | |||
11a3b14cb1 | |||
58d3142855 | |||
0fb651fffc | |||
bc05dede06 | |||
b69e34f823 | |||
2514ecdafc | |||
c52d5556a2 | |||
5fc1b1122a | |||
83dd0024c4 | |||
70761649ad | |||
b9c6cc4470 | |||
23691f9cfe | |||
8c220cc800 | |||
ee453c0259 | |||
c90d23617a | |||
c574699a8a | |||
f55e160e25 | |||
dddcca0185 |
193
src/MdToHTML.hs
193
src/MdToHTML.hs
@@ -7,15 +7,16 @@ module MdToHTML where
|
|||||||
import Control.Applicative
|
import Control.Applicative
|
||||||
import Control.Monad
|
import Control.Monad
|
||||||
import Data.List
|
import Data.List
|
||||||
|
import Data.Ord (comparing)
|
||||||
import Debug.Trace
|
import Debug.Trace
|
||||||
import Text.ParserCombinators.ReadP
|
import Text.ParserCombinators.ReadP
|
||||||
import Text.Printf
|
import Text.Printf
|
||||||
|
|
||||||
type HeaderLevel = Int
|
type HeaderLevel = Int
|
||||||
|
|
||||||
newtype URL = URL {getUrl :: String}
|
newtype URL = URL {getUrl :: String} deriving (Eq)
|
||||||
|
|
||||||
newtype ImgPath = ImgPath {getPath :: String}
|
newtype ImgPath = ImgPath {getPath :: String} deriving (Eq)
|
||||||
|
|
||||||
parseMany :: ReadP a -> ReadP [a]
|
parseMany :: ReadP a -> ReadP [a]
|
||||||
parseMany = Text.ParserCombinators.ReadP.many
|
parseMany = Text.ParserCombinators.ReadP.many
|
||||||
@@ -28,7 +29,7 @@ data MdToken
|
|||||||
| SingleNewline -- A single newline is rendered as a space.
|
| SingleNewline -- A single newline is rendered as a space.
|
||||||
| Linebreak
|
| Linebreak
|
||||||
| HorizontalRule
|
| HorizontalRule
|
||||||
| Blockquote MdToken
|
| Blockquote [MdToken]
|
||||||
| UnordList [MdToken]
|
| UnordList [MdToken]
|
||||||
| OrdList [MdToken]
|
| OrdList [MdToken]
|
||||||
| Code String
|
| Code String
|
||||||
@@ -39,6 +40,7 @@ data MdToken
|
|||||||
| Italic MdToken
|
| Italic MdToken
|
||||||
| Strikethrough MdToken
|
| Strikethrough MdToken
|
||||||
| Unit String
|
| Unit String
|
||||||
|
deriving (Eq)
|
||||||
|
|
||||||
-- Deriving Show for MdToken
|
-- Deriving Show for MdToken
|
||||||
instance Show MdToken where
|
instance Show MdToken where
|
||||||
@@ -96,7 +98,54 @@ lookaheadParse stringCmp = do
|
|||||||
lineToList :: MdToken -> [MdToken]
|
lineToList :: MdToken -> [MdToken]
|
||||||
lineToList (Line tokens) = tokens
|
lineToList (Line tokens) = tokens
|
||||||
|
|
||||||
specialChars = "\\#*_[\n "
|
specialChars = "\\#*_[\n"
|
||||||
|
|
||||||
|
-- Makes a parser greedy. Instead of returning all possible parses, only the longest one is returned.
|
||||||
|
greedyParse :: ReadP a -> ReadP [a]
|
||||||
|
greedyParse parser = do
|
||||||
|
greedyParse1 parser <++ return []
|
||||||
|
|
||||||
|
-- Like greedyParse, but the parser must succeed atleast once.
|
||||||
|
greedyParse1 :: ReadP a -> ReadP [a]
|
||||||
|
greedyParse1 parser = do
|
||||||
|
parsed1 <- parser
|
||||||
|
parsed2 <- greedyParse1 parser <++ return []
|
||||||
|
return (parsed1 : parsed2)
|
||||||
|
|
||||||
|
prepend :: [a] -> [a] -> [a]
|
||||||
|
prepend x1 x2 = x1 ++ x2
|
||||||
|
|
||||||
|
append :: [a] -> [a] -> [a]
|
||||||
|
append x1 x2 = x2 ++ x1
|
||||||
|
|
||||||
|
-- Sequence two parsers, running one after the other and returning the result.
|
||||||
|
sequenceParse :: ReadP a -> ReadP a -> ReadP [a]
|
||||||
|
sequenceParse p1 p2 = twoElemList <$> p1 <*> p2
|
||||||
|
where
|
||||||
|
twoElemList elem1 elem2 = [elem1, elem2]
|
||||||
|
|
||||||
|
-- Parses p1 until p2 succeeds, but doesn't actually consume anything from p2.
|
||||||
|
-- Similar to manyTill, except manyTill's second parser actually consumes characters.
|
||||||
|
manyTillLazy :: ReadP a -> ReadP b -> ReadP [a]
|
||||||
|
manyTillLazy p1 p2 = do
|
||||||
|
res <- p1
|
||||||
|
remaining <- look
|
||||||
|
let p2res = readP_to_S p2 remaining
|
||||||
|
case p2res of
|
||||||
|
[] -> do
|
||||||
|
res2 <- manyTillLazy p1 p2
|
||||||
|
return (res : res2)
|
||||||
|
_ -> return [res]
|
||||||
|
|
||||||
|
-- Parse until EOL or EOF
|
||||||
|
parseTillEol :: ReadP String
|
||||||
|
parseTillEol = manyTill get (void (char '\n') <++ eof)
|
||||||
|
|
||||||
|
-- Takes a list of parsers. Returns a parser that will try them in
|
||||||
|
-- order, moving to the next one only if the current one fails.
|
||||||
|
fallthroughParser :: [ReadP a] -> ReadP a
|
||||||
|
fallthroughParser [x] = x
|
||||||
|
fallthroughParser (x : xs) = x <++ fallthroughParser xs
|
||||||
|
|
||||||
-- ---------------
|
-- ---------------
|
||||||
|
|
||||||
@@ -110,31 +159,38 @@ parseHeader = do
|
|||||||
pfail
|
pfail
|
||||||
skipSpaces
|
skipSpaces
|
||||||
text <- munch1 (/= '\n')
|
text <- munch1 (/= '\n')
|
||||||
Text.ParserCombinators.ReadP.optional (char '\n')
|
-- Text.ParserCombinators.ReadP.optional (char '\n')
|
||||||
|
skipSpaces
|
||||||
let parsedText = fst $ leftmostLongestParse parseLine text
|
let parsedText = fst $ leftmostLongestParse parseLine text
|
||||||
return (Header (length headers) parsedText)
|
return (Header (length headers) parsedText)
|
||||||
|
|
||||||
-- Parse bold text
|
-- Parse bold text
|
||||||
parseBold :: ReadP MdToken
|
parseBold :: ReadP MdToken
|
||||||
parseBold = do
|
parseBold = parseBoldWith "**" <|> parseBoldWith "__"
|
||||||
text <-
|
where
|
||||||
choice
|
parseBoldWith delim = do
|
||||||
[ between (string "__") (string "__") (many1 (lookaheadParse (/= "__"))),
|
string delim
|
||||||
between (string "**") (string "**") (many1 (lookaheadParse (/= "**")))
|
inside <- greedyParse1 parseLineToken
|
||||||
]
|
string delim
|
||||||
let parsedText = fst $ leftmostLongestParse parseLine text
|
return (Bold (Line inside))
|
||||||
return (Bold parsedText)
|
|
||||||
|
|
||||||
-- Parse italic text
|
-- Parse italic text
|
||||||
parseItalic :: ReadP MdToken
|
parseItalic :: ReadP MdToken
|
||||||
parseItalic = do
|
parseItalic = parseBoldWith "*" <|> parseBoldWith "_"
|
||||||
text <-
|
where
|
||||||
choice
|
parseBoldWith delim = do
|
||||||
[ between (string "_") (string "_") (munch1 (/= '_')),
|
string delim
|
||||||
between (string "*") (string "*") (munch1 (/= '*'))
|
inside <- greedyParse1 parseLineToken
|
||||||
]
|
string delim
|
||||||
let parsedText = fst $ leftmostLongestParse parseLine text
|
return (Italic (Line inside))
|
||||||
return (Italic parsedText)
|
|
||||||
|
-- Parse strikethrough text
|
||||||
|
parseStrikethrough :: ReadP MdToken
|
||||||
|
parseStrikethrough = do
|
||||||
|
string "~~"
|
||||||
|
inside <- many1 parseLineToken
|
||||||
|
string "~~"
|
||||||
|
return (Strikethrough (Line inside))
|
||||||
|
|
||||||
-- Parse a link
|
-- Parse a link
|
||||||
parseLink :: ReadP MdToken
|
parseLink :: ReadP MdToken
|
||||||
@@ -164,6 +220,12 @@ parseEscapedChar = do
|
|||||||
escapedChar <- choice (map char specialChars) -- Parse any of the special chars.
|
escapedChar <- choice (map char specialChars) -- Parse any of the special chars.
|
||||||
return (Unit [escapedChar])
|
return (Unit [escapedChar])
|
||||||
|
|
||||||
|
-- Parse a character as a Unit.
|
||||||
|
parseUnit :: ReadP MdToken
|
||||||
|
parseUnit = do
|
||||||
|
text <- satisfy (`notElem` specialChars)
|
||||||
|
return (Unit [text])
|
||||||
|
|
||||||
-- Parse a regular string as a Unit.
|
-- Parse a regular string as a Unit.
|
||||||
parseString :: ReadP MdToken
|
parseString :: ReadP MdToken
|
||||||
parseString = do
|
parseString = do
|
||||||
@@ -178,17 +240,33 @@ lineParsers =
|
|||||||
parseEscapedChar,
|
parseEscapedChar,
|
||||||
parseBold,
|
parseBold,
|
||||||
parseItalic,
|
parseItalic,
|
||||||
|
parseStrikethrough,
|
||||||
parseLink,
|
parseLink,
|
||||||
parseString
|
parseUnit
|
||||||
] -- A 'line' doesn't include a 'header'
|
] -- A 'line' doesn't include a 'header'
|
||||||
|
|
||||||
|
listLineParsers :: [ReadP MdToken]
|
||||||
|
listLineParsers =
|
||||||
|
[ parseLinebreak,
|
||||||
|
parseEscapedChar,
|
||||||
|
parseBold,
|
||||||
|
parseItalic,
|
||||||
|
parseStrikethrough,
|
||||||
|
parseLink,
|
||||||
|
parseUnit
|
||||||
|
] -- A list line cannot contain newlines.
|
||||||
|
|
||||||
-- List of all parsers
|
-- List of all parsers
|
||||||
allParsers :: [ReadP MdToken]
|
allParsers :: [ReadP MdToken]
|
||||||
allParsers = parseHeader : lineParsers
|
allParsers = parseHeader : lineParsers
|
||||||
|
|
||||||
-- Parse any of the above tokens.
|
-- Parse any of the line tokens.
|
||||||
parseLineToken :: ReadP MdToken
|
parseLineToken :: ReadP MdToken
|
||||||
parseLineToken = choice lineParsers
|
parseLineToken = fallthroughParser lineParsers
|
||||||
|
|
||||||
|
-- Parse any of the list line tokens.
|
||||||
|
parseListLineToken :: ReadP MdToken
|
||||||
|
parseListLineToken = fallthroughParser listLineParsers
|
||||||
|
|
||||||
-- Parse a line, consisting of one or more tokens.
|
-- Parse a line, consisting of one or more tokens.
|
||||||
parseLine :: ReadP MdToken
|
parseLine :: ReadP MdToken
|
||||||
@@ -230,11 +308,72 @@ parseQuotedLines =
|
|||||||
-- Parse a blockquote, which is a greater-than sign followed by a paragraph.
|
-- Parse a blockquote, which is a greater-than sign followed by a paragraph.
|
||||||
parseBlockquote :: ReadP MdToken
|
parseBlockquote :: ReadP MdToken
|
||||||
parseBlockquote = do
|
parseBlockquote = do
|
||||||
char '>'
|
quotedLines <- parseQuotedLines
|
||||||
Blockquote <$> (parseBlockquote <++ parsePara) -- Parse another blockquote or a regular paragraph, wrap it in a blockquote.
|
-- remaining <- look
|
||||||
|
-- let quotedLines = fst $ leftmostLongestParse parseQuotedLines remaining
|
||||||
|
-- string (init $ unlines quotedLines)
|
||||||
|
let parsedQuotedLines = fst $ leftmostLongestParse (many1 (parseBlockquote <++ parsePara)) (init $ unlines quotedLines) -- unlines joins the lines together with a newline, and adds a trailing newline. init removes the trailing newline.
|
||||||
|
return (Blockquote parsedQuotedLines)
|
||||||
|
|
||||||
|
-- Parse a nested list item.
|
||||||
|
parseUListNested :: ReadP MdToken
|
||||||
|
parseUListNested = do
|
||||||
|
-- firstChar <- string " " <++ string "\t"
|
||||||
|
-- skipSpaces
|
||||||
|
-- restOfLine <- manyTill get (void (char '\n') <++ eof)
|
||||||
|
-- let restOfLineParsed = fst $ leftmostLongestParse parseLine restOfLine
|
||||||
|
-- return restOfLineParsed
|
||||||
|
let firstCharParser = string " " <++ string "\t"
|
||||||
|
let restOfLineParser = manyTill get (void (char '\n') <++ eof)
|
||||||
|
lines <- greedyParse1 (firstCharParser *> restOfLineParser)
|
||||||
|
let linesParsed = fst $ leftmostLongestParse parseUnorderedList (init $ unlines lines)
|
||||||
|
return linesParsed
|
||||||
|
|
||||||
|
-- Parse an unordered list line item.
|
||||||
|
parseUListLineItem :: ReadP MdToken
|
||||||
|
parseUListLineItem = do
|
||||||
|
firstChar <- choice (map char ['*', '+', '-'])
|
||||||
|
char ' ' -- At least one space between list indicator and list text.
|
||||||
|
skipSpaces
|
||||||
|
restOfLine <- many1 parseListLineToken
|
||||||
|
void (char '\n') <++ eof
|
||||||
|
nestedList <- parseUListNested <++ return (Unit "")
|
||||||
|
return $ Line [Line restOfLine, nestedList]
|
||||||
|
|
||||||
|
-- restOfLine <- manyTill get (void (char '\n') <++ eof)
|
||||||
|
-- let restOfLineParsed = fst $ leftmostLongestParse parseLine restOfLine
|
||||||
|
-- nestedList <- parseUListNested <++ return (Unit "")
|
||||||
|
-- return $ Line [restOfLineParsed, nestedList]
|
||||||
|
|
||||||
|
-- Parse an unordered list paragraph item.
|
||||||
|
-- This is defined as a line item, followed by an empty line, followed by one or more
|
||||||
|
-- lines indented by a space or tab.
|
||||||
|
parseUListParaItem :: ReadP MdToken
|
||||||
|
parseUListParaItem = do
|
||||||
|
firstLine <- parseUListLineItem
|
||||||
|
char '\n'
|
||||||
|
lines <- greedyParse1 ((string " " <|> string "\t") *> parseTillEol)
|
||||||
|
let res = fst $ leftmostLongestParse (greedyParse1 parsePara) (init $ unlines lines)
|
||||||
|
char '\n'
|
||||||
|
return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
|
||||||
|
|
||||||
|
-- This is hacky as hell
|
||||||
|
-- parsedParas <- manyTillLazy parsePara (string "\n\n" *> choice (map char "*-+"))
|
||||||
|
-- return $ Document parsedParas -- I wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
|
||||||
|
|
||||||
|
-- Parse an unordered list item, which can be a line item or another list.
|
||||||
|
parseUListItem :: ReadP MdToken
|
||||||
|
parseUListItem = parseUListParaItem <++ parseUListLineItem <++ parseUListNested
|
||||||
|
|
||||||
|
-- Parse an unordered list.
|
||||||
|
parseUnorderedList :: ReadP MdToken
|
||||||
|
parseUnorderedList = do
|
||||||
|
lineItems <- greedyParse1 parseUListItem
|
||||||
|
void (char '\n') <++ eof -- A list must end in an extra newline or eof
|
||||||
|
return $ UnordList lineItems
|
||||||
|
|
||||||
-- Parse a document, which is multiple paragraphs.
|
-- Parse a document, which is multiple paragraphs.
|
||||||
parseDocument :: ReadP MdToken
|
parseDocument :: ReadP MdToken
|
||||||
parseDocument = do
|
parseDocument = do
|
||||||
res <- manyTill (parseHeader <++ parseBlockquote <++ parsePara) eof
|
res <- manyTill (parseHeader <++ parseBlockquote <++ parseUnorderedList <++ parsePara) eof
|
||||||
return (Document res)
|
return (Document res)
|
||||||
|
@@ -23,7 +23,15 @@ boldTests =
|
|||||||
TestList
|
TestList
|
||||||
[ check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
|
[ check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
|
||||||
check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
|
check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
|
||||||
check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day")
|
check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day"),
|
||||||
|
check_equal "Should convert nested bold and italic" "<p><b>Bold then <i>Italic</i></b></p>" (convert "**Bold then *Italic***"),
|
||||||
|
check_equal "Should convert nested bold and italic" "<p><i>Italic then <b>Bold</b></i></p>" (convert "*Italic then **Bold***")
|
||||||
|
]
|
||||||
|
|
||||||
|
strikethroughTests =
|
||||||
|
TestList
|
||||||
|
[ check_equal "Should convert strikethrough" "<p><s>Hello</s></p>" (convert "~~Hello~~"),
|
||||||
|
check_equal "Should convert long sentence with tilde" "<p><s>The universe is ~7 days old</s>. The universe is 13 billion years old.</p>" (convert "~~The universe is ~7 days old~~. The universe is 13 billion years old.")
|
||||||
]
|
]
|
||||||
|
|
||||||
linkTests =
|
linkTests =
|
||||||
@@ -56,9 +64,16 @@ blockquoteTests =
|
|||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
orderedListTests =
|
unorderedListTests =
|
||||||
TestList
|
TestList
|
||||||
[ check_equal "Basic ordered list" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3</li></ol" (convert "1. Item 1\n2. Item2\n3. Item3")
|
[ check_equal "Basic ordered list" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>" (convert "* Item 1\n* Item 2\n* Item 3"),
|
||||||
|
check_equal "Mixing list indicators" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>" (convert "* Item 1\n+ Item 2\n- Item 3"),
|
||||||
|
check_equal "Formatted lists" "<ul><li><b>Item 1</b></li><li><i>Item 2</i></li><li><b><i>Item 3</i></b></li></ul>" (convert "* __Item 1__\n+ _Item 2_\n- ***Item 3***"),
|
||||||
|
check_equal "Nested list" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3<ul><li>Subitem 1</li><li>Subitem 2</li></ul></li></ul>" (convert "* Item 1\n* Item 2\n* Item 3\n * Subitem 1\n * Subitem 2"),
|
||||||
|
check_equal "Paragraph in list" "<ul><li>Item 1</li><li><p>Item 2</p><p>More stuff</p></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n\n More stuff\n\n- Item 3"),
|
||||||
|
check_equal "Paragraph before list" "<p>This is a list</p><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "This is a list\n\n* Item 1\n* Item 2"),
|
||||||
|
check_equal "Paragraph before list" "<h3>This is a list</h3><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "### This is a list\n\n* Item 1\n* Item 2"),
|
||||||
|
check_equal "Nested list then back" "<ul><li>Item 1</li><li>Item 2<ul><li>Item 3</li><li>Item 4</li></ul></li><li>Item 5</li></ul>" (convert "- Item 1\n- Item 2\n - Item 3\n - Item 4\n- Item 5")
|
||||||
]
|
]
|
||||||
|
|
||||||
integrationTests =
|
integrationTests =
|
||||||
@@ -68,17 +83,32 @@ integrationTests =
|
|||||||
check_equal "Integration 3" "<h1>Hello</h1><p>World</p>" (convert "# Hello\nWorld"),
|
check_equal "Integration 3" "<h1>Hello</h1><p>World</p>" (convert "# Hello\nWorld"),
|
||||||
check_equal "Integration 4" "<p>a b</p>" (convert "a\nb"),
|
check_equal "Integration 4" "<p>a b</p>" (convert "a\nb"),
|
||||||
check_equal "Integration 5" "<h1>Hello</h1>" (convert "# Hello\n"),
|
check_equal "Integration 5" "<h1>Hello</h1>" (convert "# Hello\n"),
|
||||||
check_equal "Integration 6" "<p>First line<br>Second line</p>" (convert "First line \nSecond line")
|
check_equal "Integration 6" "<p>First line<br>Second line</p>" (convert "First line \nSecond line"),
|
||||||
|
check_equal
|
||||||
|
"Integration 7"
|
||||||
|
"<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2>Second \
|
||||||
|
\Heading</h2><ul><li>Unordered lists, and:<ul><li>One</li><li>Two</li><li>\
|
||||||
|
\Three</li></ul></li><li>More</li></ul><blockquote><p>Blockquote</p>\
|
||||||
|
\</blockquote><p>And <b>bold</b>, <i>italics</i>, and even <i>italics \
|
||||||
|
\and later <b>bold</b></i>. Even <s>strikethrough</s>. \
|
||||||
|
\<a href=\"https://markdowntohtml.com\">A link</a> to somewhere.</p>"
|
||||||
|
( convert
|
||||||
|
"# Sample Markdown\n\nThis is some basic, sample markdown.\n\n## Second \
|
||||||
|
\Heading\n\n- Unordered lists, and:\n - One\n - Two\n - Three\n\
|
||||||
|
\- More\n\n> Blockquote\n\nAnd **bold**, *italics*, and even *italics and \
|
||||||
|
\later **bold***. Even ~~strikethrough~~. [A link](https://markdowntohtml.com) to somewhere."
|
||||||
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
tests =
|
tests =
|
||||||
TestList
|
TestList
|
||||||
[ headerTests,
|
[ headerTests,
|
||||||
boldTests,
|
boldTests,
|
||||||
|
strikethroughTests,
|
||||||
linkTests,
|
linkTests,
|
||||||
escapedCharTests,
|
escapedCharTests,
|
||||||
blockquoteTests,
|
blockquoteTests,
|
||||||
orderedListTests,
|
unorderedListTests,
|
||||||
integrationTests
|
integrationTests
|
||||||
]
|
]
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user