Compare commits
15 Commits
b8ba27f240
...
6b99a1835d
Author | SHA1 | Date | |
---|---|---|---|
6b99a1835d | |||
04167e0f96 | |||
0528e813c5 | |||
b1b99189c9 | |||
ade3768e29 | |||
fd6d39ecd6 | |||
0f04342867 | |||
80ef93bbc9 | |||
b73d4131b6 | |||
c48b8c5ae8 | |||
cf4282b26e | |||
7b40d6fe7c | |||
c4255d4578 | |||
dcbbff13cb | |||
592fad2b46 |
101
src/MdToHTML.hs
101
src/MdToHTML.hs
@@ -6,9 +6,11 @@ module MdToHTML where
|
|||||||
|
|
||||||
import Control.Applicative hiding (many, some)
|
import Control.Applicative hiding (many, some)
|
||||||
import Control.Monad
|
import Control.Monad
|
||||||
|
import Control.Monad.Combinators (count)
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import Data.List
|
import Data.List
|
||||||
import Data.Ord (comparing)
|
import Data.Ord (comparing)
|
||||||
|
import Data.String.Utils
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import Data.Void
|
import Data.Void
|
||||||
import Debug.Trace
|
import Debug.Trace
|
||||||
@@ -20,6 +22,8 @@ type Parser = Parsec Void T.Text
|
|||||||
|
|
||||||
type HeaderLevel = Int
|
type HeaderLevel = Int
|
||||||
|
|
||||||
|
type CssClass = String
|
||||||
|
|
||||||
newtype URL = URL {getUrl :: String} deriving (Eq)
|
newtype URL = URL {getUrl :: String} deriving (Eq)
|
||||||
|
|
||||||
newtype ImgPath = ImgPath {getPath :: String} deriving (Eq)
|
newtype ImgPath = ImgPath {getPath :: String} deriving (Eq)
|
||||||
@@ -36,10 +40,11 @@ data MdToken
|
|||||||
| UnordList [MdToken]
|
| UnordList [MdToken]
|
||||||
| OrdList [MdToken]
|
| OrdList [MdToken]
|
||||||
| Code MdToken
|
| Code MdToken
|
||||||
| Codeblock String
|
| Table [[MdToken]]
|
||||||
|
| Codeblock MdToken
|
||||||
| Link MdToken URL
|
| Link MdToken URL
|
||||||
| Image MdToken URL
|
| Image MdToken URL (Maybe [CssClass])
|
||||||
| Figure MdToken URL
|
| Figure MdToken URL (Maybe [CssClass])
|
||||||
| Bold MdToken
|
| Bold MdToken
|
||||||
| Italic MdToken
|
| Italic MdToken
|
||||||
| Strikethrough MdToken
|
| Strikethrough MdToken
|
||||||
@@ -59,10 +64,11 @@ instance Show MdToken where
|
|||||||
show (UnordList tokens) = "<ul>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ul>"
|
show (UnordList tokens) = "<ul>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ul>"
|
||||||
show (OrdList tokens) = "<ol>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ol>"
|
show (OrdList tokens) = "<ol>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ol>"
|
||||||
show (Code code) = "<code>" ++ show code ++ "</code>"
|
show (Code code) = "<code>" ++ show code ++ "</code>"
|
||||||
show (Codeblock code) = show code
|
show (Table (thead : tokenGrid)) = "<table>\n<thead>\n<tr>\n" ++ concatMap (\x -> "<th>" ++ rstrip (show x) ++ "</th>\n") thead ++ "</tr>\n</thead>\n" ++ "<tbody>\n" ++ concatMap (\x -> "<tr>\n" ++ concatMap (\y -> "<td>" ++ rstrip (show y) ++ "</td>\n") x ++ "</tr>\n") tokenGrid ++ "</tbody>\n</table>\n"
|
||||||
|
show (Codeblock code) = "<pre><code>" ++ show code ++ "</code></pre>"
|
||||||
show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
|
show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
|
||||||
show (Image txt url) = "<img src=\"" ++ getUrl url ++ "\"" ++ " alt=\"" ++ show txt ++ "\" />"
|
show (Image txt url cssClasses) = "<img src=\"" ++ getUrl url ++ "\"" ++ " alt=\"" ++ show txt ++ "\"" ++ maybe "" (\classes -> " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/>"
|
||||||
show (Figure txt url) = "<figure><img src=\"" ++ getUrl url ++ "\" alt=\"" ++ show txt ++ "\"/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
|
show (Figure txt url cssClasses) = "<figure><img src=\"" ++ getUrl url ++ "\" alt=\"" ++ show txt ++ "\"" ++ maybe "" (\classes -> " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
|
||||||
show (Bold token) = "<b>" ++ show token ++ "</b>"
|
show (Bold token) = "<b>" ++ show token ++ "</b>"
|
||||||
show (Italic token) = "<i>" ++ show token ++ "</i>"
|
show (Italic token) = "<i>" ++ show token ++ "</i>"
|
||||||
show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
|
show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
|
||||||
@@ -179,15 +185,16 @@ parseStrikethrough = do
|
|||||||
-- Parse code
|
-- Parse code
|
||||||
parseCode :: Parser MdToken
|
parseCode :: Parser MdToken
|
||||||
parseCode = do
|
parseCode = do
|
||||||
char '`'
|
opening <- some $ char '`'
|
||||||
inside <- manyTill (satisfy (/= '\n')) (char '`')
|
inside <- someTill (satisfy (/= '\n')) (char '`')
|
||||||
|
closing <- count (length opening - 1) (char '`')
|
||||||
return (Code (Unit (concatMap escapeChar inside)))
|
return (Code (Unit (concatMap escapeChar inside)))
|
||||||
|
|
||||||
-- Parse a link
|
-- Parse a link
|
||||||
parseLink :: Parser MdToken
|
parseLink :: Parser MdToken
|
||||||
parseLink = do
|
parseLink = do
|
||||||
char '['
|
char '['
|
||||||
linkText <- someTill parseLineToken (char ']')
|
linkText <- manyTill parseLineToken (char ']')
|
||||||
char '('
|
char '('
|
||||||
linkURL <- manyTill anySingle (char ')')
|
linkURL <- manyTill anySingle (char ')')
|
||||||
return $ Link (Line linkText) (URL linkURL)
|
return $ Link (Line linkText) (URL linkURL)
|
||||||
@@ -200,6 +207,22 @@ parseLinebreak = do
|
|||||||
char '\n'
|
char '\n'
|
||||||
return Linebreak
|
return Linebreak
|
||||||
|
|
||||||
|
parseTableRow :: Parser [MdToken]
|
||||||
|
parseTableRow = do
|
||||||
|
char '|'
|
||||||
|
row <- some (many (satisfy (\x -> x == ' ' || x == '\t')) *> someTill parseListLineToken (char '|'))
|
||||||
|
return (map Line row)
|
||||||
|
|
||||||
|
parseTable :: Parser MdToken
|
||||||
|
parseTable = do
|
||||||
|
tableHead <- parseTableRow
|
||||||
|
char '\n'
|
||||||
|
char '|'
|
||||||
|
sepEndBy1 (some (char '-')) (char '|') *> char '\n'
|
||||||
|
tableBody <- sepEndBy parseTableRow (char '\n')
|
||||||
|
many (char '\n') -- Parse trailing newlines, if any
|
||||||
|
return $ Table (tableHead : tableBody)
|
||||||
|
|
||||||
parseSingleNewline :: Parser MdToken
|
parseSingleNewline :: Parser MdToken
|
||||||
parseSingleNewline = do
|
parseSingleNewline = do
|
||||||
char '\n'
|
char '\n'
|
||||||
@@ -208,19 +231,36 @@ parseSingleNewline = do
|
|||||||
[] -> return $ Unit ""
|
[] -> return $ Unit ""
|
||||||
_ -> return SingleNewline
|
_ -> return SingleNewline
|
||||||
|
|
||||||
|
parseCssClasses :: Parser [CssClass]
|
||||||
|
parseCssClasses = do
|
||||||
|
char '{'
|
||||||
|
classes <- some parseCssClass
|
||||||
|
char '}'
|
||||||
|
return classes
|
||||||
|
where
|
||||||
|
parseCssClass :: Parser CssClass
|
||||||
|
parseCssClass = do
|
||||||
|
char '.'
|
||||||
|
let firstLetterParser = char '_' <|> char '-' <|> label "letter" (satisfy isAlpha)
|
||||||
|
cssClassFirstLetter <- firstLetterParser
|
||||||
|
cssClass <- many (firstLetterParser <|> label "digit" (satisfy isDigit))
|
||||||
|
space
|
||||||
|
return (cssClassFirstLetter : cssClass)
|
||||||
|
|
||||||
parseImage :: Parser MdToken
|
parseImage :: Parser MdToken
|
||||||
parseImage = do
|
parseImage = do
|
||||||
char '!'
|
char '!'
|
||||||
link <- parseLink
|
link <- parseLink
|
||||||
|
cssClasses <- optional $ try parseCssClasses
|
||||||
case link of
|
case link of
|
||||||
Link text path -> return $ Image text path
|
Link text path -> return $ Image text path cssClasses
|
||||||
_ -> empty -- This should never be reached
|
_ -> empty -- This should never be reached
|
||||||
|
|
||||||
parseFigure = do
|
parseFigure = do
|
||||||
img <- parseImage
|
img <- parseImage
|
||||||
void (string doubleNewlineText) <|> eof
|
void (string doubleNewlineText) <|> eof
|
||||||
case img of
|
case img of
|
||||||
Image text path -> return $ Figure text path
|
Image text path cssClasses -> return $ Figure text path cssClasses
|
||||||
_ -> return img
|
_ -> return img
|
||||||
|
|
||||||
-- Parse an escaped character
|
-- Parse an escaped character
|
||||||
@@ -237,6 +277,13 @@ parseUnit = do
|
|||||||
text <- anySingle
|
text <- anySingle
|
||||||
return (Unit [text])
|
return (Unit [text])
|
||||||
|
|
||||||
|
-- Parse any character except a newline
|
||||||
|
parseUnitExceptNewline :: Parser MdToken
|
||||||
|
parseUnitExceptNewline = do
|
||||||
|
-- text <- satisfy (`notElem` specialChars)
|
||||||
|
text <- satisfy (/= '\n')
|
||||||
|
return (Unit [text])
|
||||||
|
|
||||||
lineParsers :: [Parser MdToken]
|
lineParsers :: [Parser MdToken]
|
||||||
lineParsers =
|
lineParsers =
|
||||||
[ parseLinebreak,
|
[ parseLinebreak,
|
||||||
@@ -251,17 +298,16 @@ lineParsers =
|
|||||||
parseUnit
|
parseUnit
|
||||||
] -- A 'line' doesn't include a 'header'
|
] -- A 'line' doesn't include a 'header'
|
||||||
|
|
||||||
listLineParsers :: [Parser MdToken]
|
lineParsersWithoutNewline :: [Parser MdToken]
|
||||||
listLineParsers =
|
lineParsersWithoutNewline =
|
||||||
[ parseLinebreak,
|
[ parseEscapedChar,
|
||||||
parseEscapedChar,
|
|
||||||
parseCode,
|
parseCode,
|
||||||
parseImage,
|
parseImage,
|
||||||
parseBold,
|
parseBold,
|
||||||
parseItalic,
|
parseItalic,
|
||||||
parseStrikethrough,
|
parseStrikethrough,
|
||||||
parseLink,
|
parseLink,
|
||||||
parseUnit
|
parseUnitExceptNewline
|
||||||
] -- A list line cannot contain newlines.
|
] -- A list line cannot contain newlines.
|
||||||
|
|
||||||
-- List of all parsers
|
-- List of all parsers
|
||||||
@@ -274,7 +320,7 @@ parseLineToken = fallthroughParser lineParsers
|
|||||||
|
|
||||||
-- Parse any of the list line tokens.
|
-- Parse any of the list line tokens.
|
||||||
parseListLineToken :: Parser MdToken
|
parseListLineToken :: Parser MdToken
|
||||||
parseListLineToken = fallthroughParser listLineParsers
|
parseListLineToken = fallthroughParser lineParsersWithoutNewline
|
||||||
|
|
||||||
-- Parse a line, consisting of one or more tokens.
|
-- Parse a line, consisting of one or more tokens.
|
||||||
parseLine :: Parser MdToken
|
parseLine :: Parser MdToken
|
||||||
@@ -355,7 +401,7 @@ parseListLineItemCommon :: Parser MdToken
|
|||||||
parseListLineItemCommon = do
|
parseListLineItemCommon = do
|
||||||
space
|
space
|
||||||
restOfLine <- manyTill parseListLineToken (void (char '\n') <|> eof)
|
restOfLine <- manyTill parseListLineToken (void (char '\n') <|> eof)
|
||||||
nestedList <- parseListNested <|> return (Unit "")
|
nestedList <- try parseListNested <|> return (Unit "")
|
||||||
return $ Line [Line restOfLine, nestedList]
|
return $ Line [Line restOfLine, nestedList]
|
||||||
|
|
||||||
-- Parse an unordered list paragraph item.
|
-- Parse an unordered list paragraph item.
|
||||||
@@ -386,31 +432,32 @@ parseListParaItemCommon = do
|
|||||||
|
|
||||||
-- Parse an unordered list item, which can be a line item or another list.
|
-- Parse an unordered list item, which can be a line item or another list.
|
||||||
parseUListItem :: Parser MdToken
|
parseUListItem :: Parser MdToken
|
||||||
parseUListItem = try parseUListParaItem <|> parseUListLineItem
|
parseUListItem = space *> (try parseUListParaItem <|> parseUListLineItem)
|
||||||
|
|
||||||
-- Parse an unordered list.
|
-- Parse an unordered list.
|
||||||
parseUnorderedList :: Parser MdToken
|
parseUnorderedList :: Parser MdToken
|
||||||
parseUnorderedList = do
|
parseUnorderedList = do
|
||||||
lineItems <- some parseUListItem
|
lineItems <- some $ try parseUListItem
|
||||||
void (char '\n') <|> eof -- A list must end in an extra newline or eof
|
void (char '\n') <|> eof -- A list must end in an extra newline or eof
|
||||||
return $ UnordList lineItems
|
return $ UnordList lineItems
|
||||||
|
|
||||||
-- --------
|
-- --------
|
||||||
|
|
||||||
parseOListItem :: Parser MdToken
|
parseOListItem :: Parser MdToken
|
||||||
parseOListItem = try parseOListParaItem <|> parseOListLineItem
|
parseOListItem = space *> (try parseOListParaItem <|> parseOListLineItem)
|
||||||
|
|
||||||
-- Parses the first element of an ordered list, which must start with '1.'
|
-- Parses the first element of an ordered list, which must start with '1.'
|
||||||
parseFirstOListItem :: Parser MdToken
|
parseFirstOListItem :: Parser MdToken
|
||||||
parseFirstOListItem = do
|
parseFirstOListItem = do
|
||||||
|
space
|
||||||
remaining <- getInput
|
remaining <- getInput
|
||||||
when (take 2 (T.unpack remaining) /= "1.") empty
|
when (take 2 (T.unpack remaining) /= "1.") empty
|
||||||
parseOListLineItem
|
parseOListLineItem
|
||||||
|
|
||||||
parseOrderedList :: Parser MdToken
|
parseOrderedList :: Parser MdToken
|
||||||
parseOrderedList = do
|
parseOrderedList = do
|
||||||
firstLine <- parseFirstOListItem
|
firstLine <- try parseFirstOListItem
|
||||||
lineItems <- some parseOListItem
|
lineItems <- many $ try parseOListItem
|
||||||
void (char '\n') <|> eof
|
void (char '\n') <|> eof
|
||||||
return $ OrdList (firstLine : lineItems)
|
return $ OrdList (firstLine : lineItems)
|
||||||
|
|
||||||
@@ -423,9 +470,17 @@ doubleNewlineText = T.pack "\n\n"
|
|||||||
parseHorizontalRule :: Parser MdToken
|
parseHorizontalRule :: Parser MdToken
|
||||||
parseHorizontalRule = string horizontalRuleText *> (void (string doubleNewlineText) <|> eof) *> return HorizontalRule
|
parseHorizontalRule = string horizontalRuleText *> (void (string doubleNewlineText) <|> eof) *> return HorizontalRule
|
||||||
|
|
||||||
|
parseCodeblock :: Parser MdToken
|
||||||
|
parseCodeblock = do
|
||||||
|
string (T.pack "```\n")
|
||||||
|
inside <- someTill anySingle (string (T.pack "\n```"))
|
||||||
|
return $ Codeblock (Unit (concatMap escapeChar inside))
|
||||||
|
|
||||||
documentParsers :: [Parser MdToken]
|
documentParsers :: [Parser MdToken]
|
||||||
documentParsers =
|
documentParsers =
|
||||||
[ parseHorizontalRule,
|
[ parseHorizontalRule,
|
||||||
|
parseCodeblock,
|
||||||
|
parseTable,
|
||||||
parseHeader,
|
parseHeader,
|
||||||
parseBlockquote,
|
parseBlockquote,
|
||||||
parseUnorderedList,
|
parseUnorderedList,
|
||||||
|
@@ -90,7 +90,8 @@ orderedListTests =
|
|||||||
check_equal "Paragraph before list" "<h3>This is a list</h3><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "### This is a list\n\n1. Item 1\n200. Item 2"),
|
check_equal "Paragraph before list" "<h3>This is a list</h3><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "### This is a list\n\n1. Item 1\n200. Item 2"),
|
||||||
check_equal "Nested list then back" "<ol><li>Item 1</li><li>Item 2<ol><li>Item 3</li><li>Item 4</li></ol></li><li>Item 5</li></ol>" (convert "1. Item 1\n2. Item 2\n 1. Item 3\n 3. Item 4\n5. Item 5"),
|
check_equal "Nested list then back" "<ol><li>Item 1</li><li>Item 2<ol><li>Item 3</li><li>Item 4</li></ol></li><li>Item 5</li></ol>" (convert "1. Item 1\n2. Item 2\n 1. Item 3\n 3. Item 4\n5. Item 5"),
|
||||||
check_equal "Blockquote in list" "<ol><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n > Quote\n\n3. Item 3"),
|
check_equal "Blockquote in list" "<ol><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n > Quote\n\n3. Item 3"),
|
||||||
check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3")
|
check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3"),
|
||||||
|
check_equal "List with just 1 item" "<ol><li>Item 1</li></ol>" (convert "1. Item 1")
|
||||||
]
|
]
|
||||||
|
|
||||||
htmlTests =
|
htmlTests =
|
||||||
@@ -106,7 +107,8 @@ codeTests =
|
|||||||
|
|
||||||
imageTests =
|
imageTests =
|
||||||
TestList
|
TestList
|
||||||
[ check_equal "Image with text" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\" /></p>" (convert "This is an image ")
|
[ check_equal "Image with text" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\"/></p>" (convert "This is an image "),
|
||||||
|
check_equal "Image with classes" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\" class=\"new-img\"/></p>" (convert "This is an image {.new-img}")
|
||||||
]
|
]
|
||||||
|
|
||||||
figureTests =
|
figureTests =
|
||||||
@@ -118,6 +120,17 @@ horizontalRuleTests =
|
|||||||
TestList
|
TestList
|
||||||
[check_equal "Horizontal Rule" "<p>a</p><hr><p>b</p>" (convert "a\n\n---\n\nb")]
|
[check_equal "Horizontal Rule" "<p>a</p><hr><p>b</p>" (convert "a\n\n---\n\nb")]
|
||||||
|
|
||||||
|
tableTests =
|
||||||
|
TestList
|
||||||
|
[ check_equal
|
||||||
|
"Basic table"
|
||||||
|
"<table>\
|
||||||
|
\<thead><tr><th>Col 1</th><th>Col 2</th><th>Col 3</th></tr></thead>\
|
||||||
|
\<tbody><tr><td>Data 1</td><td>Data 2</td><td>Data 3</td></tr>\
|
||||||
|
\<tr><td>More Data 1</td><td>More Data 2</td><td>More Data 3</td></tr></tbody></table>"
|
||||||
|
(convert "| Col 1 | Col 2 | Col 3 |\n|---|---|---|\n| Data 1 | Data 2 | Data 3 |\n| More Data 1 | More Data 2 | More Data 3 |")
|
||||||
|
]
|
||||||
|
|
||||||
integrationTests =
|
integrationTests =
|
||||||
TestList
|
TestList
|
||||||
[ check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n ## __Second__ _Heading_"),
|
[ check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n ## __Second__ _Heading_"),
|
||||||
@@ -157,6 +170,7 @@ tests =
|
|||||||
figureTests,
|
figureTests,
|
||||||
codeTests,
|
codeTests,
|
||||||
horizontalRuleTests,
|
horizontalRuleTests,
|
||||||
|
tableTests,
|
||||||
integrationTests
|
integrationTests
|
||||||
]
|
]
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user