Compare commits

..

No commits in common. '6b99a1835d2c819bba554073d839fee17a6a6879' and 'b8ba27f24034d7d9ca09b14f4d882efd393850e8' have entirely different histories.

@ -6,11 +6,9 @@ module MdToHTML where
import Control.Applicative hiding (many, some)
import Control.Monad
import Control.Monad.Combinators (count)
import Data.Char
import Data.List
import Data.Ord (comparing)
import Data.String.Utils
import qualified Data.Text as T
import Data.Void
import Debug.Trace
@ -22,8 +20,6 @@ type Parser = Parsec Void T.Text
type HeaderLevel = Int
type CssClass = String
newtype URL = URL {getUrl :: String} deriving (Eq)
newtype ImgPath = ImgPath {getPath :: String} deriving (Eq)
@ -40,11 +36,10 @@ data MdToken
| UnordList [MdToken]
| OrdList [MdToken]
| Code MdToken
| Table [[MdToken]]
| Codeblock MdToken
| Codeblock String
| Link MdToken URL
| Image MdToken URL (Maybe [CssClass])
| Figure MdToken URL (Maybe [CssClass])
| Image MdToken URL
| Figure MdToken URL
| Bold MdToken
| Italic MdToken
| Strikethrough MdToken
@ -64,11 +59,10 @@ instance Show MdToken where
show (UnordList tokens) = "<ul>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ul>"
show (OrdList tokens) = "<ol>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ol>"
show (Code code) = "<code>" ++ show code ++ "</code>"
show (Table (thead : tokenGrid)) = "<table>\n<thead>\n<tr>\n" ++ concatMap (\x -> "<th>" ++ rstrip (show x) ++ "</th>\n") thead ++ "</tr>\n</thead>\n" ++ "<tbody>\n" ++ concatMap (\x -> "<tr>\n" ++ concatMap (\y -> "<td>" ++ rstrip (show y) ++ "</td>\n") x ++ "</tr>\n") tokenGrid ++ "</tbody>\n</table>\n"
show (Codeblock code) = "<pre><code>" ++ show code ++ "</code></pre>"
show (Codeblock code) = show code
show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
show (Image txt url cssClasses) = "<img src=\"" ++ getUrl url ++ "\"" ++ " alt=\"" ++ show txt ++ "\"" ++ maybe "" (\classes -> " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/>"
show (Figure txt url cssClasses) = "<figure><img src=\"" ++ getUrl url ++ "\" alt=\"" ++ show txt ++ "\"" ++ maybe "" (\classes -> " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
show (Image txt url) = "<img src=\"" ++ getUrl url ++ "\"" ++ " alt=\"" ++ show txt ++ "\" />"
show (Figure txt url) = "<figure><img src=\"" ++ getUrl url ++ "\" alt=\"" ++ show txt ++ "\"/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
show (Bold token) = "<b>" ++ show token ++ "</b>"
show (Italic token) = "<i>" ++ show token ++ "</i>"
show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
@ -185,16 +179,15 @@ parseStrikethrough = do
-- Parse code
parseCode :: Parser MdToken
parseCode = do
opening <- some $ char '`'
inside <- someTill (satisfy (/= '\n')) (char '`')
closing <- count (length opening - 1) (char '`')
char '`'
inside <- manyTill (satisfy (/= '\n')) (char '`')
return (Code (Unit (concatMap escapeChar inside)))
-- Parse a link
parseLink :: Parser MdToken
parseLink = do
char '['
linkText <- manyTill parseLineToken (char ']')
linkText <- someTill parseLineToken (char ']')
char '('
linkURL <- manyTill anySingle (char ')')
return $ Link (Line linkText) (URL linkURL)
@ -207,22 +200,6 @@ parseLinebreak = do
char '\n'
return Linebreak
parseTableRow :: Parser [MdToken]
parseTableRow = do
char '|'
row <- some (many (satisfy (\x -> x == ' ' || x == '\t')) *> someTill parseListLineToken (char '|'))
return (map Line row)
parseTable :: Parser MdToken
parseTable = do
tableHead <- parseTableRow
char '\n'
char '|'
sepEndBy1 (some (char '-')) (char '|') *> char '\n'
tableBody <- sepEndBy parseTableRow (char '\n')
many (char '\n') -- Parse trailing newlines, if any
return $ Table (tableHead : tableBody)
parseSingleNewline :: Parser MdToken
parseSingleNewline = do
char '\n'
@ -231,36 +208,19 @@ parseSingleNewline = do
[] -> return $ Unit ""
_ -> return SingleNewline
parseCssClasses :: Parser [CssClass]
parseCssClasses = do
char '{'
classes <- some parseCssClass
char '}'
return classes
where
parseCssClass :: Parser CssClass
parseCssClass = do
char '.'
let firstLetterParser = char '_' <|> char '-' <|> label "letter" (satisfy isAlpha)
cssClassFirstLetter <- firstLetterParser
cssClass <- many (firstLetterParser <|> label "digit" (satisfy isDigit))
space
return (cssClassFirstLetter : cssClass)
parseImage :: Parser MdToken
parseImage = do
char '!'
link <- parseLink
cssClasses <- optional $ try parseCssClasses
case link of
Link text path -> return $ Image text path cssClasses
Link text path -> return $ Image text path
_ -> empty -- This should never be reached
parseFigure = do
img <- parseImage
void (string doubleNewlineText) <|> eof
case img of
Image text path cssClasses -> return $ Figure text path cssClasses
Image text path -> return $ Figure text path
_ -> return img
-- Parse an escaped character
@ -277,13 +237,6 @@ parseUnit = do
text <- anySingle
return (Unit [text])
-- Parse any character except a newline
parseUnitExceptNewline :: Parser MdToken
parseUnitExceptNewline = do
-- text <- satisfy (`notElem` specialChars)
text <- satisfy (/= '\n')
return (Unit [text])
lineParsers :: [Parser MdToken]
lineParsers =
[ parseLinebreak,
@ -298,16 +251,17 @@ lineParsers =
parseUnit
] -- A 'line' doesn't include a 'header'
lineParsersWithoutNewline :: [Parser MdToken]
lineParsersWithoutNewline =
[ parseEscapedChar,
listLineParsers :: [Parser MdToken]
listLineParsers =
[ parseLinebreak,
parseEscapedChar,
parseCode,
parseImage,
parseBold,
parseItalic,
parseStrikethrough,
parseLink,
parseUnitExceptNewline
parseUnit
] -- A list line cannot contain newlines.
-- List of all parsers
@ -320,7 +274,7 @@ parseLineToken = fallthroughParser lineParsers
-- Parse any of the list line tokens.
parseListLineToken :: Parser MdToken
parseListLineToken = fallthroughParser lineParsersWithoutNewline
parseListLineToken = fallthroughParser listLineParsers
-- Parse a line, consisting of one or more tokens.
parseLine :: Parser MdToken
@ -401,7 +355,7 @@ parseListLineItemCommon :: Parser MdToken
parseListLineItemCommon = do
space
restOfLine <- manyTill parseListLineToken (void (char '\n') <|> eof)
nestedList <- try parseListNested <|> return (Unit "")
nestedList <- parseListNested <|> return (Unit "")
return $ Line [Line restOfLine, nestedList]
-- Parse an unordered list paragraph item.
@ -432,32 +386,31 @@ parseListParaItemCommon = do
-- Parse an unordered list item, which can be a line item or another list.
parseUListItem :: Parser MdToken
parseUListItem = space *> (try parseUListParaItem <|> parseUListLineItem)
parseUListItem = try parseUListParaItem <|> parseUListLineItem
-- Parse an unordered list.
parseUnorderedList :: Parser MdToken
parseUnorderedList = do
lineItems <- some $ try parseUListItem
lineItems <- some parseUListItem
void (char '\n') <|> eof -- A list must end in an extra newline or eof
return $ UnordList lineItems
-- --------
parseOListItem :: Parser MdToken
parseOListItem = space *> (try parseOListParaItem <|> parseOListLineItem)
parseOListItem = try parseOListParaItem <|> parseOListLineItem
-- Parses the first element of an ordered list, which must start with '1.'
parseFirstOListItem :: Parser MdToken
parseFirstOListItem = do
space
remaining <- getInput
when (take 2 (T.unpack remaining) /= "1.") empty
parseOListLineItem
parseOrderedList :: Parser MdToken
parseOrderedList = do
firstLine <- try parseFirstOListItem
lineItems <- many $ try parseOListItem
firstLine <- parseFirstOListItem
lineItems <- some parseOListItem
void (char '\n') <|> eof
return $ OrdList (firstLine : lineItems)
@ -470,17 +423,9 @@ doubleNewlineText = T.pack "\n\n"
parseHorizontalRule :: Parser MdToken
parseHorizontalRule = string horizontalRuleText *> (void (string doubleNewlineText) <|> eof) *> return HorizontalRule
parseCodeblock :: Parser MdToken
parseCodeblock = do
string (T.pack "```\n")
inside <- someTill anySingle (string (T.pack "\n```"))
return $ Codeblock (Unit (concatMap escapeChar inside))
documentParsers :: [Parser MdToken]
documentParsers =
[ parseHorizontalRule,
parseCodeblock,
parseTable,
parseHeader,
parseBlockquote,
parseUnorderedList,

@ -90,8 +90,7 @@ orderedListTests =
check_equal "Paragraph before list" "<h3>This is a list</h3><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "### This is a list\n\n1. Item 1\n200. Item 2"),
check_equal "Nested list then back" "<ol><li>Item 1</li><li>Item 2<ol><li>Item 3</li><li>Item 4</li></ol></li><li>Item 5</li></ol>" (convert "1. Item 1\n2. Item 2\n 1. Item 3\n 3. Item 4\n5. Item 5"),
check_equal "Blockquote in list" "<ol><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n > Quote\n\n3. Item 3"),
check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3"),
check_equal "List with just 1 item" "<ol><li>Item 1</li></ol>" (convert "1. Item 1")
check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3")
]
htmlTests =
@ -107,8 +106,7 @@ codeTests =
imageTests =
TestList
[ check_equal "Image with text" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\"/></p>" (convert "This is an image ![Image 1](img.png)"),
check_equal "Image with classes" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\" class=\"new-img\"/></p>" (convert "This is an image ![Image 1](img.png){.new-img}")
[ check_equal "Image with text" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\" /></p>" (convert "This is an image ![Image 1](img.png)")
]
figureTests =
@ -120,17 +118,6 @@ horizontalRuleTests =
TestList
[check_equal "Horizontal Rule" "<p>a</p><hr><p>b</p>" (convert "a\n\n---\n\nb")]
tableTests =
TestList
[ check_equal
"Basic table"
"<table>\
\<thead><tr><th>Col 1</th><th>Col 2</th><th>Col 3</th></tr></thead>\
\<tbody><tr><td>Data 1</td><td>Data 2</td><td>Data 3</td></tr>\
\<tr><td>More Data 1</td><td>More Data 2</td><td>More Data 3</td></tr></tbody></table>"
(convert "| Col 1 | Col 2 | Col 3 |\n|---|---|---|\n| Data 1 | Data 2 | Data 3 |\n| More Data 1 | More Data 2 | More Data 3 |")
]
integrationTests =
TestList
[ check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n ## __Second__ _Heading_"),
@ -170,7 +157,6 @@ tests =
figureTests,
codeTests,
horizontalRuleTests,
tableTests,
integrationTests
]

Loading…
Cancel
Save