Compare commits

..

No commits in common. '6b99a1835d2c819bba554073d839fee17a6a6879' and 'b8ba27f24034d7d9ca09b14f4d882efd393850e8' have entirely different histories.

@ -6,11 +6,9 @@ module MdToHTML where
import Control.Applicative hiding (many, some) import Control.Applicative hiding (many, some)
import Control.Monad import Control.Monad
import Control.Monad.Combinators (count)
import Data.Char import Data.Char
import Data.List import Data.List
import Data.Ord (comparing) import Data.Ord (comparing)
import Data.String.Utils
import qualified Data.Text as T import qualified Data.Text as T
import Data.Void import Data.Void
import Debug.Trace import Debug.Trace
@ -22,8 +20,6 @@ type Parser = Parsec Void T.Text
type HeaderLevel = Int type HeaderLevel = Int
type CssClass = String
newtype URL = URL {getUrl :: String} deriving (Eq) newtype URL = URL {getUrl :: String} deriving (Eq)
newtype ImgPath = ImgPath {getPath :: String} deriving (Eq) newtype ImgPath = ImgPath {getPath :: String} deriving (Eq)
@ -40,11 +36,10 @@ data MdToken
| UnordList [MdToken] | UnordList [MdToken]
| OrdList [MdToken] | OrdList [MdToken]
| Code MdToken | Code MdToken
| Table [[MdToken]] | Codeblock String
| Codeblock MdToken
| Link MdToken URL | Link MdToken URL
| Image MdToken URL (Maybe [CssClass]) | Image MdToken URL
| Figure MdToken URL (Maybe [CssClass]) | Figure MdToken URL
| Bold MdToken | Bold MdToken
| Italic MdToken | Italic MdToken
| Strikethrough MdToken | Strikethrough MdToken
@ -64,11 +59,10 @@ instance Show MdToken where
show (UnordList tokens) = "<ul>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ul>" show (UnordList tokens) = "<ul>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ul>"
show (OrdList tokens) = "<ol>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ol>" show (OrdList tokens) = "<ol>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ol>"
show (Code code) = "<code>" ++ show code ++ "</code>" show (Code code) = "<code>" ++ show code ++ "</code>"
show (Table (thead : tokenGrid)) = "<table>\n<thead>\n<tr>\n" ++ concatMap (\x -> "<th>" ++ rstrip (show x) ++ "</th>\n") thead ++ "</tr>\n</thead>\n" ++ "<tbody>\n" ++ concatMap (\x -> "<tr>\n" ++ concatMap (\y -> "<td>" ++ rstrip (show y) ++ "</td>\n") x ++ "</tr>\n") tokenGrid ++ "</tbody>\n</table>\n" show (Codeblock code) = show code
show (Codeblock code) = "<pre><code>" ++ show code ++ "</code></pre>"
show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>" show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
show (Image txt url cssClasses) = "<img src=\"" ++ getUrl url ++ "\"" ++ " alt=\"" ++ show txt ++ "\"" ++ maybe "" (\classes -> " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/>" show (Image txt url) = "<img src=\"" ++ getUrl url ++ "\"" ++ " alt=\"" ++ show txt ++ "\" />"
show (Figure txt url cssClasses) = "<figure><img src=\"" ++ getUrl url ++ "\" alt=\"" ++ show txt ++ "\"" ++ maybe "" (\classes -> " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>" show (Figure txt url) = "<figure><img src=\"" ++ getUrl url ++ "\" alt=\"" ++ show txt ++ "\"/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
show (Bold token) = "<b>" ++ show token ++ "</b>" show (Bold token) = "<b>" ++ show token ++ "</b>"
show (Italic token) = "<i>" ++ show token ++ "</i>" show (Italic token) = "<i>" ++ show token ++ "</i>"
show (Strikethrough token) = "<s>" ++ show token ++ "</s>" show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
@ -185,16 +179,15 @@ parseStrikethrough = do
-- Parse code -- Parse code
parseCode :: Parser MdToken parseCode :: Parser MdToken
parseCode = do parseCode = do
opening <- some $ char '`' char '`'
inside <- someTill (satisfy (/= '\n')) (char '`') inside <- manyTill (satisfy (/= '\n')) (char '`')
closing <- count (length opening - 1) (char '`')
return (Code (Unit (concatMap escapeChar inside))) return (Code (Unit (concatMap escapeChar inside)))
-- Parse a link -- Parse a link
parseLink :: Parser MdToken parseLink :: Parser MdToken
parseLink = do parseLink = do
char '[' char '['
linkText <- manyTill parseLineToken (char ']') linkText <- someTill parseLineToken (char ']')
char '(' char '('
linkURL <- manyTill anySingle (char ')') linkURL <- manyTill anySingle (char ')')
return $ Link (Line linkText) (URL linkURL) return $ Link (Line linkText) (URL linkURL)
@ -207,22 +200,6 @@ parseLinebreak = do
char '\n' char '\n'
return Linebreak return Linebreak
parseTableRow :: Parser [MdToken]
parseTableRow = do
char '|'
row <- some (many (satisfy (\x -> x == ' ' || x == '\t')) *> someTill parseListLineToken (char '|'))
return (map Line row)
parseTable :: Parser MdToken
parseTable = do
tableHead <- parseTableRow
char '\n'
char '|'
sepEndBy1 (some (char '-')) (char '|') *> char '\n'
tableBody <- sepEndBy parseTableRow (char '\n')
many (char '\n') -- Parse trailing newlines, if any
return $ Table (tableHead : tableBody)
parseSingleNewline :: Parser MdToken parseSingleNewline :: Parser MdToken
parseSingleNewline = do parseSingleNewline = do
char '\n' char '\n'
@ -231,36 +208,19 @@ parseSingleNewline = do
[] -> return $ Unit "" [] -> return $ Unit ""
_ -> return SingleNewline _ -> return SingleNewline
parseCssClasses :: Parser [CssClass]
parseCssClasses = do
char '{'
classes <- some parseCssClass
char '}'
return classes
where
parseCssClass :: Parser CssClass
parseCssClass = do
char '.'
let firstLetterParser = char '_' <|> char '-' <|> label "letter" (satisfy isAlpha)
cssClassFirstLetter <- firstLetterParser
cssClass <- many (firstLetterParser <|> label "digit" (satisfy isDigit))
space
return (cssClassFirstLetter : cssClass)
parseImage :: Parser MdToken parseImage :: Parser MdToken
parseImage = do parseImage = do
char '!' char '!'
link <- parseLink link <- parseLink
cssClasses <- optional $ try parseCssClasses
case link of case link of
Link text path -> return $ Image text path cssClasses Link text path -> return $ Image text path
_ -> empty -- This should never be reached _ -> empty -- This should never be reached
parseFigure = do parseFigure = do
img <- parseImage img <- parseImage
void (string doubleNewlineText) <|> eof void (string doubleNewlineText) <|> eof
case img of case img of
Image text path cssClasses -> return $ Figure text path cssClasses Image text path -> return $ Figure text path
_ -> return img _ -> return img
-- Parse an escaped character -- Parse an escaped character
@ -277,13 +237,6 @@ parseUnit = do
text <- anySingle text <- anySingle
return (Unit [text]) return (Unit [text])
-- Parse any character except a newline
parseUnitExceptNewline :: Parser MdToken
parseUnitExceptNewline = do
-- text <- satisfy (`notElem` specialChars)
text <- satisfy (/= '\n')
return (Unit [text])
lineParsers :: [Parser MdToken] lineParsers :: [Parser MdToken]
lineParsers = lineParsers =
[ parseLinebreak, [ parseLinebreak,
@ -298,16 +251,17 @@ lineParsers =
parseUnit parseUnit
] -- A 'line' doesn't include a 'header' ] -- A 'line' doesn't include a 'header'
lineParsersWithoutNewline :: [Parser MdToken] listLineParsers :: [Parser MdToken]
lineParsersWithoutNewline = listLineParsers =
[ parseEscapedChar, [ parseLinebreak,
parseEscapedChar,
parseCode, parseCode,
parseImage, parseImage,
parseBold, parseBold,
parseItalic, parseItalic,
parseStrikethrough, parseStrikethrough,
parseLink, parseLink,
parseUnitExceptNewline parseUnit
] -- A list line cannot contain newlines. ] -- A list line cannot contain newlines.
-- List of all parsers -- List of all parsers
@ -320,7 +274,7 @@ parseLineToken = fallthroughParser lineParsers
-- Parse any of the list line tokens. -- Parse any of the list line tokens.
parseListLineToken :: Parser MdToken parseListLineToken :: Parser MdToken
parseListLineToken = fallthroughParser lineParsersWithoutNewline parseListLineToken = fallthroughParser listLineParsers
-- Parse a line, consisting of one or more tokens. -- Parse a line, consisting of one or more tokens.
parseLine :: Parser MdToken parseLine :: Parser MdToken
@ -401,7 +355,7 @@ parseListLineItemCommon :: Parser MdToken
parseListLineItemCommon = do parseListLineItemCommon = do
space space
restOfLine <- manyTill parseListLineToken (void (char '\n') <|> eof) restOfLine <- manyTill parseListLineToken (void (char '\n') <|> eof)
nestedList <- try parseListNested <|> return (Unit "") nestedList <- parseListNested <|> return (Unit "")
return $ Line [Line restOfLine, nestedList] return $ Line [Line restOfLine, nestedList]
-- Parse an unordered list paragraph item. -- Parse an unordered list paragraph item.
@ -432,32 +386,31 @@ parseListParaItemCommon = do
-- Parse an unordered list item, which can be a line item or another list. -- Parse an unordered list item, which can be a line item or another list.
parseUListItem :: Parser MdToken parseUListItem :: Parser MdToken
parseUListItem = space *> (try parseUListParaItem <|> parseUListLineItem) parseUListItem = try parseUListParaItem <|> parseUListLineItem
-- Parse an unordered list. -- Parse an unordered list.
parseUnorderedList :: Parser MdToken parseUnorderedList :: Parser MdToken
parseUnorderedList = do parseUnorderedList = do
lineItems <- some $ try parseUListItem lineItems <- some parseUListItem
void (char '\n') <|> eof -- A list must end in an extra newline or eof void (char '\n') <|> eof -- A list must end in an extra newline or eof
return $ UnordList lineItems return $ UnordList lineItems
-- -------- -- --------
parseOListItem :: Parser MdToken parseOListItem :: Parser MdToken
parseOListItem = space *> (try parseOListParaItem <|> parseOListLineItem) parseOListItem = try parseOListParaItem <|> parseOListLineItem
-- Parses the first element of an ordered list, which must start with '1.' -- Parses the first element of an ordered list, which must start with '1.'
parseFirstOListItem :: Parser MdToken parseFirstOListItem :: Parser MdToken
parseFirstOListItem = do parseFirstOListItem = do
space
remaining <- getInput remaining <- getInput
when (take 2 (T.unpack remaining) /= "1.") empty when (take 2 (T.unpack remaining) /= "1.") empty
parseOListLineItem parseOListLineItem
parseOrderedList :: Parser MdToken parseOrderedList :: Parser MdToken
parseOrderedList = do parseOrderedList = do
firstLine <- try parseFirstOListItem firstLine <- parseFirstOListItem
lineItems <- many $ try parseOListItem lineItems <- some parseOListItem
void (char '\n') <|> eof void (char '\n') <|> eof
return $ OrdList (firstLine : lineItems) return $ OrdList (firstLine : lineItems)
@ -470,17 +423,9 @@ doubleNewlineText = T.pack "\n\n"
parseHorizontalRule :: Parser MdToken parseHorizontalRule :: Parser MdToken
parseHorizontalRule = string horizontalRuleText *> (void (string doubleNewlineText) <|> eof) *> return HorizontalRule parseHorizontalRule = string horizontalRuleText *> (void (string doubleNewlineText) <|> eof) *> return HorizontalRule
parseCodeblock :: Parser MdToken
parseCodeblock = do
string (T.pack "```\n")
inside <- someTill anySingle (string (T.pack "\n```"))
return $ Codeblock (Unit (concatMap escapeChar inside))
documentParsers :: [Parser MdToken] documentParsers :: [Parser MdToken]
documentParsers = documentParsers =
[ parseHorizontalRule, [ parseHorizontalRule,
parseCodeblock,
parseTable,
parseHeader, parseHeader,
parseBlockquote, parseBlockquote,
parseUnorderedList, parseUnorderedList,

@ -90,8 +90,7 @@ orderedListTests =
check_equal "Paragraph before list" "<h3>This is a list</h3><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "### This is a list\n\n1. Item 1\n200. Item 2"), check_equal "Paragraph before list" "<h3>This is a list</h3><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "### This is a list\n\n1. Item 1\n200. Item 2"),
check_equal "Nested list then back" "<ol><li>Item 1</li><li>Item 2<ol><li>Item 3</li><li>Item 4</li></ol></li><li>Item 5</li></ol>" (convert "1. Item 1\n2. Item 2\n 1. Item 3\n 3. Item 4\n5. Item 5"), check_equal "Nested list then back" "<ol><li>Item 1</li><li>Item 2<ol><li>Item 3</li><li>Item 4</li></ol></li><li>Item 5</li></ol>" (convert "1. Item 1\n2. Item 2\n 1. Item 3\n 3. Item 4\n5. Item 5"),
check_equal "Blockquote in list" "<ol><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n > Quote\n\n3. Item 3"), check_equal "Blockquote in list" "<ol><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n > Quote\n\n3. Item 3"),
check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3"), check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3")
check_equal "List with just 1 item" "<ol><li>Item 1</li></ol>" (convert "1. Item 1")
] ]
htmlTests = htmlTests =
@ -107,8 +106,7 @@ codeTests =
imageTests = imageTests =
TestList TestList
[ check_equal "Image with text" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\"/></p>" (convert "This is an image ![Image 1](img.png)"), [ check_equal "Image with text" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\" /></p>" (convert "This is an image ![Image 1](img.png)")
check_equal "Image with classes" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\" class=\"new-img\"/></p>" (convert "This is an image ![Image 1](img.png){.new-img}")
] ]
figureTests = figureTests =
@ -120,17 +118,6 @@ horizontalRuleTests =
TestList TestList
[check_equal "Horizontal Rule" "<p>a</p><hr><p>b</p>" (convert "a\n\n---\n\nb")] [check_equal "Horizontal Rule" "<p>a</p><hr><p>b</p>" (convert "a\n\n---\n\nb")]
tableTests =
TestList
[ check_equal
"Basic table"
"<table>\
\<thead><tr><th>Col 1</th><th>Col 2</th><th>Col 3</th></tr></thead>\
\<tbody><tr><td>Data 1</td><td>Data 2</td><td>Data 3</td></tr>\
\<tr><td>More Data 1</td><td>More Data 2</td><td>More Data 3</td></tr></tbody></table>"
(convert "| Col 1 | Col 2 | Col 3 |\n|---|---|---|\n| Data 1 | Data 2 | Data 3 |\n| More Data 1 | More Data 2 | More Data 3 |")
]
integrationTests = integrationTests =
TestList TestList
[ check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n ## __Second__ _Heading_"), [ check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n ## __Second__ _Heading_"),
@ -170,7 +157,6 @@ tests =
figureTests, figureTests,
codeTests, codeTests,
horizontalRuleTests, horizontalRuleTests,
tableTests,
integrationTests integrationTests
] ]

Loading…
Cancel
Save