Compare commits

..

15 Commits

Author SHA1 Message Date
Aadhavan Srinivasan 6b99a1835d Created a separate parser list for all parsers (except the unit parser
is replaced with the non-newline unit parser); use that parser when
parsing list lines
3 weeks ago
Aadhavan Srinivasan 04167e0f96 Parse CSS classes in image and figure 3 weeks ago
Aadhavan Srinivasan 0528e813c5 Parser for CSS classes 3 weeks ago
Aadhavan Srinivasan b1b99189c9 Link tect can be empty; inline code cannot be empty and can have nested
backticks; created a unit parser for all characters except newline
3 weeks ago
Aadhavan Srinivasan ade3768e29 Try and backtrack 3 weeks ago
Aadhavan Srinivasan fd6d39ecd6 Parse space at beginning of list 3 weeks ago
Aadhavan Srinivasan 0f04342867 More trying and backtracking; parse and discard extraneous spaces at
beginning of list
3 weeks ago
Aadhavan Srinivasan 80ef93bbc9 Try parsing an ordered list item, backtrack if not possible 3 weeks ago
Aadhavan Srinivasan b73d4131b6 Added support for tables and codeblocks
Defined the types, defined 'show', created the parsers, added them to
parser list
3 weeks ago
Aadhavan Srinivasan c48b8c5ae8 Images and figures now support CSS classes 3 weeks ago
Aadhavan Srinivasan cf4282b26e More imports 3 weeks ago
Aadhavan Srinivasan 7b40d6fe7c Imports 3 weeks ago
Aadhavan Srinivasan c4255d4578 Added a test for a list with just one item 3 weeks ago
Aadhavan Srinivasan dcbbff13cb Spacing change 3 weeks ago
Aadhavan Srinivasan 592fad2b46 Added tests for tables 3 weeks ago

@ -6,9 +6,11 @@ module MdToHTML where
import Control.Applicative hiding (many, some) import Control.Applicative hiding (many, some)
import Control.Monad import Control.Monad
import Control.Monad.Combinators (count)
import Data.Char import Data.Char
import Data.List import Data.List
import Data.Ord (comparing) import Data.Ord (comparing)
import Data.String.Utils
import qualified Data.Text as T import qualified Data.Text as T
import Data.Void import Data.Void
import Debug.Trace import Debug.Trace
@ -20,6 +22,8 @@ type Parser = Parsec Void T.Text
type HeaderLevel = Int type HeaderLevel = Int
type CssClass = String
newtype URL = URL {getUrl :: String} deriving (Eq) newtype URL = URL {getUrl :: String} deriving (Eq)
newtype ImgPath = ImgPath {getPath :: String} deriving (Eq) newtype ImgPath = ImgPath {getPath :: String} deriving (Eq)
@ -36,10 +40,11 @@ data MdToken
| UnordList [MdToken] | UnordList [MdToken]
| OrdList [MdToken] | OrdList [MdToken]
| Code MdToken | Code MdToken
| Codeblock String | Table [[MdToken]]
| Codeblock MdToken
| Link MdToken URL | Link MdToken URL
| Image MdToken URL | Image MdToken URL (Maybe [CssClass])
| Figure MdToken URL | Figure MdToken URL (Maybe [CssClass])
| Bold MdToken | Bold MdToken
| Italic MdToken | Italic MdToken
| Strikethrough MdToken | Strikethrough MdToken
@ -59,10 +64,11 @@ instance Show MdToken where
show (UnordList tokens) = "<ul>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ul>" show (UnordList tokens) = "<ul>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ul>"
show (OrdList tokens) = "<ol>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ol>" show (OrdList tokens) = "<ol>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ol>"
show (Code code) = "<code>" ++ show code ++ "</code>" show (Code code) = "<code>" ++ show code ++ "</code>"
show (Codeblock code) = show code show (Table (thead : tokenGrid)) = "<table>\n<thead>\n<tr>\n" ++ concatMap (\x -> "<th>" ++ rstrip (show x) ++ "</th>\n") thead ++ "</tr>\n</thead>\n" ++ "<tbody>\n" ++ concatMap (\x -> "<tr>\n" ++ concatMap (\y -> "<td>" ++ rstrip (show y) ++ "</td>\n") x ++ "</tr>\n") tokenGrid ++ "</tbody>\n</table>\n"
show (Codeblock code) = "<pre><code>" ++ show code ++ "</code></pre>"
show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>" show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
show (Image txt url) = "<img src=\"" ++ getUrl url ++ "\"" ++ " alt=\"" ++ show txt ++ "\" />" show (Image txt url cssClasses) = "<img src=\"" ++ getUrl url ++ "\"" ++ " alt=\"" ++ show txt ++ "\"" ++ maybe "" (\classes -> " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/>"
show (Figure txt url) = "<figure><img src=\"" ++ getUrl url ++ "\" alt=\"" ++ show txt ++ "\"/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>" show (Figure txt url cssClasses) = "<figure><img src=\"" ++ getUrl url ++ "\" alt=\"" ++ show txt ++ "\"" ++ maybe "" (\classes -> " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
show (Bold token) = "<b>" ++ show token ++ "</b>" show (Bold token) = "<b>" ++ show token ++ "</b>"
show (Italic token) = "<i>" ++ show token ++ "</i>" show (Italic token) = "<i>" ++ show token ++ "</i>"
show (Strikethrough token) = "<s>" ++ show token ++ "</s>" show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
@ -179,15 +185,16 @@ parseStrikethrough = do
-- Parse code -- Parse code
parseCode :: Parser MdToken parseCode :: Parser MdToken
parseCode = do parseCode = do
char '`' opening <- some $ char '`'
inside <- manyTill (satisfy (/= '\n')) (char '`') inside <- someTill (satisfy (/= '\n')) (char '`')
closing <- count (length opening - 1) (char '`')
return (Code (Unit (concatMap escapeChar inside))) return (Code (Unit (concatMap escapeChar inside)))
-- Parse a link -- Parse a link
parseLink :: Parser MdToken parseLink :: Parser MdToken
parseLink = do parseLink = do
char '[' char '['
linkText <- someTill parseLineToken (char ']') linkText <- manyTill parseLineToken (char ']')
char '(' char '('
linkURL <- manyTill anySingle (char ')') linkURL <- manyTill anySingle (char ')')
return $ Link (Line linkText) (URL linkURL) return $ Link (Line linkText) (URL linkURL)
@ -200,6 +207,22 @@ parseLinebreak = do
char '\n' char '\n'
return Linebreak return Linebreak
parseTableRow :: Parser [MdToken]
parseTableRow = do
char '|'
row <- some (many (satisfy (\x -> x == ' ' || x == '\t')) *> someTill parseListLineToken (char '|'))
return (map Line row)
parseTable :: Parser MdToken
parseTable = do
tableHead <- parseTableRow
char '\n'
char '|'
sepEndBy1 (some (char '-')) (char '|') *> char '\n'
tableBody <- sepEndBy parseTableRow (char '\n')
many (char '\n') -- Parse trailing newlines, if any
return $ Table (tableHead : tableBody)
parseSingleNewline :: Parser MdToken parseSingleNewline :: Parser MdToken
parseSingleNewline = do parseSingleNewline = do
char '\n' char '\n'
@ -208,19 +231,36 @@ parseSingleNewline = do
[] -> return $ Unit "" [] -> return $ Unit ""
_ -> return SingleNewline _ -> return SingleNewline
parseCssClasses :: Parser [CssClass]
parseCssClasses = do
char '{'
classes <- some parseCssClass
char '}'
return classes
where
parseCssClass :: Parser CssClass
parseCssClass = do
char '.'
let firstLetterParser = char '_' <|> char '-' <|> label "letter" (satisfy isAlpha)
cssClassFirstLetter <- firstLetterParser
cssClass <- many (firstLetterParser <|> label "digit" (satisfy isDigit))
space
return (cssClassFirstLetter : cssClass)
parseImage :: Parser MdToken parseImage :: Parser MdToken
parseImage = do parseImage = do
char '!' char '!'
link <- parseLink link <- parseLink
cssClasses <- optional $ try parseCssClasses
case link of case link of
Link text path -> return $ Image text path Link text path -> return $ Image text path cssClasses
_ -> empty -- This should never be reached _ -> empty -- This should never be reached
parseFigure = do parseFigure = do
img <- parseImage img <- parseImage
void (string doubleNewlineText) <|> eof void (string doubleNewlineText) <|> eof
case img of case img of
Image text path -> return $ Figure text path Image text path cssClasses -> return $ Figure text path cssClasses
_ -> return img _ -> return img
-- Parse an escaped character -- Parse an escaped character
@ -237,6 +277,13 @@ parseUnit = do
text <- anySingle text <- anySingle
return (Unit [text]) return (Unit [text])
-- Parse any character except a newline
parseUnitExceptNewline :: Parser MdToken
parseUnitExceptNewline = do
-- text <- satisfy (`notElem` specialChars)
text <- satisfy (/= '\n')
return (Unit [text])
lineParsers :: [Parser MdToken] lineParsers :: [Parser MdToken]
lineParsers = lineParsers =
[ parseLinebreak, [ parseLinebreak,
@ -251,17 +298,16 @@ lineParsers =
parseUnit parseUnit
] -- A 'line' doesn't include a 'header' ] -- A 'line' doesn't include a 'header'
listLineParsers :: [Parser MdToken] lineParsersWithoutNewline :: [Parser MdToken]
listLineParsers = lineParsersWithoutNewline =
[ parseLinebreak, [ parseEscapedChar,
parseEscapedChar,
parseCode, parseCode,
parseImage, parseImage,
parseBold, parseBold,
parseItalic, parseItalic,
parseStrikethrough, parseStrikethrough,
parseLink, parseLink,
parseUnit parseUnitExceptNewline
] -- A list line cannot contain newlines. ] -- A list line cannot contain newlines.
-- List of all parsers -- List of all parsers
@ -274,7 +320,7 @@ parseLineToken = fallthroughParser lineParsers
-- Parse any of the list line tokens. -- Parse any of the list line tokens.
parseListLineToken :: Parser MdToken parseListLineToken :: Parser MdToken
parseListLineToken = fallthroughParser listLineParsers parseListLineToken = fallthroughParser lineParsersWithoutNewline
-- Parse a line, consisting of one or more tokens. -- Parse a line, consisting of one or more tokens.
parseLine :: Parser MdToken parseLine :: Parser MdToken
@ -355,7 +401,7 @@ parseListLineItemCommon :: Parser MdToken
parseListLineItemCommon = do parseListLineItemCommon = do
space space
restOfLine <- manyTill parseListLineToken (void (char '\n') <|> eof) restOfLine <- manyTill parseListLineToken (void (char '\n') <|> eof)
nestedList <- parseListNested <|> return (Unit "") nestedList <- try parseListNested <|> return (Unit "")
return $ Line [Line restOfLine, nestedList] return $ Line [Line restOfLine, nestedList]
-- Parse an unordered list paragraph item. -- Parse an unordered list paragraph item.
@ -386,31 +432,32 @@ parseListParaItemCommon = do
-- Parse an unordered list item, which can be a line item or another list. -- Parse an unordered list item, which can be a line item or another list.
parseUListItem :: Parser MdToken parseUListItem :: Parser MdToken
parseUListItem = try parseUListParaItem <|> parseUListLineItem parseUListItem = space *> (try parseUListParaItem <|> parseUListLineItem)
-- Parse an unordered list. -- Parse an unordered list.
parseUnorderedList :: Parser MdToken parseUnorderedList :: Parser MdToken
parseUnorderedList = do parseUnorderedList = do
lineItems <- some parseUListItem lineItems <- some $ try parseUListItem
void (char '\n') <|> eof -- A list must end in an extra newline or eof void (char '\n') <|> eof -- A list must end in an extra newline or eof
return $ UnordList lineItems return $ UnordList lineItems
-- -------- -- --------
parseOListItem :: Parser MdToken parseOListItem :: Parser MdToken
parseOListItem = try parseOListParaItem <|> parseOListLineItem parseOListItem = space *> (try parseOListParaItem <|> parseOListLineItem)
-- Parses the first element of an ordered list, which must start with '1.' -- Parses the first element of an ordered list, which must start with '1.'
parseFirstOListItem :: Parser MdToken parseFirstOListItem :: Parser MdToken
parseFirstOListItem = do parseFirstOListItem = do
space
remaining <- getInput remaining <- getInput
when (take 2 (T.unpack remaining) /= "1.") empty when (take 2 (T.unpack remaining) /= "1.") empty
parseOListLineItem parseOListLineItem
parseOrderedList :: Parser MdToken parseOrderedList :: Parser MdToken
parseOrderedList = do parseOrderedList = do
firstLine <- parseFirstOListItem firstLine <- try parseFirstOListItem
lineItems <- some parseOListItem lineItems <- many $ try parseOListItem
void (char '\n') <|> eof void (char '\n') <|> eof
return $ OrdList (firstLine : lineItems) return $ OrdList (firstLine : lineItems)
@ -423,9 +470,17 @@ doubleNewlineText = T.pack "\n\n"
parseHorizontalRule :: Parser MdToken parseHorizontalRule :: Parser MdToken
parseHorizontalRule = string horizontalRuleText *> (void (string doubleNewlineText) <|> eof) *> return HorizontalRule parseHorizontalRule = string horizontalRuleText *> (void (string doubleNewlineText) <|> eof) *> return HorizontalRule
parseCodeblock :: Parser MdToken
parseCodeblock = do
string (T.pack "```\n")
inside <- someTill anySingle (string (T.pack "\n```"))
return $ Codeblock (Unit (concatMap escapeChar inside))
documentParsers :: [Parser MdToken] documentParsers :: [Parser MdToken]
documentParsers = documentParsers =
[ parseHorizontalRule, [ parseHorizontalRule,
parseCodeblock,
parseTable,
parseHeader, parseHeader,
parseBlockquote, parseBlockquote,
parseUnorderedList, parseUnorderedList,

@ -90,7 +90,8 @@ orderedListTests =
check_equal "Paragraph before list" "<h3>This is a list</h3><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "### This is a list\n\n1. Item 1\n200. Item 2"), check_equal "Paragraph before list" "<h3>This is a list</h3><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "### This is a list\n\n1. Item 1\n200. Item 2"),
check_equal "Nested list then back" "<ol><li>Item 1</li><li>Item 2<ol><li>Item 3</li><li>Item 4</li></ol></li><li>Item 5</li></ol>" (convert "1. Item 1\n2. Item 2\n 1. Item 3\n 3. Item 4\n5. Item 5"), check_equal "Nested list then back" "<ol><li>Item 1</li><li>Item 2<ol><li>Item 3</li><li>Item 4</li></ol></li><li>Item 5</li></ol>" (convert "1. Item 1\n2. Item 2\n 1. Item 3\n 3. Item 4\n5. Item 5"),
check_equal "Blockquote in list" "<ol><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n > Quote\n\n3. Item 3"), check_equal "Blockquote in list" "<ol><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n > Quote\n\n3. Item 3"),
check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3") check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3"),
check_equal "List with just 1 item" "<ol><li>Item 1</li></ol>" (convert "1. Item 1")
] ]
htmlTests = htmlTests =
@ -106,7 +107,8 @@ codeTests =
imageTests = imageTests =
TestList TestList
[ check_equal "Image with text" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\" /></p>" (convert "This is an image ![Image 1](img.png)") [ check_equal "Image with text" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\"/></p>" (convert "This is an image ![Image 1](img.png)"),
check_equal "Image with classes" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\" class=\"new-img\"/></p>" (convert "This is an image ![Image 1](img.png){.new-img}")
] ]
figureTests = figureTests =
@ -118,6 +120,17 @@ horizontalRuleTests =
TestList TestList
[check_equal "Horizontal Rule" "<p>a</p><hr><p>b</p>" (convert "a\n\n---\n\nb")] [check_equal "Horizontal Rule" "<p>a</p><hr><p>b</p>" (convert "a\n\n---\n\nb")]
tableTests =
TestList
[ check_equal
"Basic table"
"<table>\
\<thead><tr><th>Col 1</th><th>Col 2</th><th>Col 3</th></tr></thead>\
\<tbody><tr><td>Data 1</td><td>Data 2</td><td>Data 3</td></tr>\
\<tr><td>More Data 1</td><td>More Data 2</td><td>More Data 3</td></tr></tbody></table>"
(convert "| Col 1 | Col 2 | Col 3 |\n|---|---|---|\n| Data 1 | Data 2 | Data 3 |\n| More Data 1 | More Data 2 | More Data 3 |")
]
integrationTests = integrationTests =
TestList TestList
[ check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n ## __Second__ _Heading_"), [ check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n ## __Second__ _Heading_"),
@ -157,6 +170,7 @@ tests =
figureTests, figureTests,
codeTests, codeTests,
horizontalRuleTests, horizontalRuleTests,
tableTests,
integrationTests integrationTests
] ]

Loading…
Cancel
Save