@ -6,9 +6,11 @@ module MdToHTML where
import Control.Applicative hiding ( many , some )
import Control.Monad
import Control.Monad.Combinators ( count )
import Data.Char
import Data.List
import Data.Ord ( comparing )
import Data.String.Utils
import qualified Data.Text as T
import Data.Void
import Debug.Trace
@ -20,6 +22,8 @@ type Parser = Parsec Void T.Text
type HeaderLevel = Int
type CssClass = String
newtype URL = URL { getUrl :: String } deriving ( Eq )
newtype ImgPath = ImgPath { getPath :: String } deriving ( Eq )
@ -36,10 +40,11 @@ data MdToken
| UnordList [ MdToken ]
| OrdList [ MdToken ]
| Code MdToken
| Codeblock String
| Table [ [ MdToken ] ]
| Codeblock MdToken
| Link MdToken URL
| Image MdToken URL
| Figure MdToken URL
| Image MdToken URL ( Maybe [ CssClass ] )
| Figure MdToken URL ( Maybe [ CssClass ] )
| Bold MdToken
| Italic MdToken
| Strikethrough MdToken
@ -59,10 +64,11 @@ instance Show MdToken where
show ( UnordList tokens ) = " <ul> " ++ concatMap ( prepend " <li> " . append " </li> " . show ) tokens ++ " </ul> "
show ( OrdList tokens ) = " <ol> " ++ concatMap ( prepend " <li> " . append " </li> " . show ) tokens ++ " </ol> "
show ( Code code ) = " <code> " ++ show code ++ " </code> "
show ( Codeblock code ) = show code
show ( Table ( thead : tokenGrid ) ) = " <table> \ n <thead> \ n <tr> \ n " ++ concatMap ( \ x -> " <th> " ++ rstrip ( show x ) ++ " </th> \ n " ) thead ++ " </tr> \ n </thead> \ n " ++ " <tbody> \ n " ++ concatMap ( \ x -> " <tr> \ n " ++ concatMap ( \ y -> " <td> " ++ rstrip ( show y ) ++ " </td> \ n " ) x ++ " </tr> \ n " ) tokenGrid ++ " </tbody> \ n </table> \ n "
show ( Codeblock code ) = " <pre><code> " ++ show code ++ " </code></pre> "
show ( Link txt url ) = " <a href= \ " " ++ getUrl url ++ " \ " > " ++ show txt ++ " </a> "
show ( Image txt url ) = " <img src= \ " " ++ getUrl url ++ " \ " " ++ " alt= \ " " ++ show txt ++ " \ " />"
show ( Figure txt url ) = " <figure><img src= \ " " ++ getUrl url ++ " \ " alt= \ " " ++ show txt ++ " \ "/><figcaption aria-hidden= \ " true \ " > " ++ show txt ++ " </figcaption></figure> "
show ( Image txt url cssClasses ) = " <img src= \ " " ++ getUrl url ++ " \ " " ++ " alt= \ " " ++ show txt ++ " \ " " ++ maybe " " ( \ classes -> " class= \ " " ++ unwords classes ++ " \ " " ) cssClasses ++ " />"
show ( Figure txt url cssClasses ) = " <figure><img src= \ " " ++ getUrl url ++ " \ " alt= \ " " ++ show txt ++ " \ "" ++ maybe " " ( \ classes -> " class= \ " " ++ unwords classes ++ " \ " " ) cssClasses ++ "/><figcaption aria-hidden= \ " true \ " > " ++ show txt ++ " </figcaption></figure> "
show ( Bold token ) = " <b> " ++ show token ++ " </b> "
show ( Italic token ) = " <i> " ++ show token ++ " </i> "
show ( Strikethrough token ) = " <s> " ++ show token ++ " </s> "
@ -179,15 +185,16 @@ parseStrikethrough = do
-- Parse code
parseCode :: Parser MdToken
parseCode = do
char '`'
inside <- manyTill ( satisfy ( /= ' \ n ' ) ) ( char '`' )
opening <- some $ char '`'
inside <- someTill ( satisfy ( /= ' \ n ' ) ) ( char '`' )
closing <- count ( length opening - 1 ) ( char '`' )
return ( Code ( Unit ( concatMap escapeChar inside ) ) )
-- Parse a link
parseLink :: Parser MdToken
parseLink = do
char '['
linkText <- some Till parseLineToken ( char ']' )
linkText <- many Till parseLineToken ( char ']' )
char '('
linkURL <- manyTill anySingle ( char ')' )
return $ Link ( Line linkText ) ( URL linkURL )
@ -200,6 +207,22 @@ parseLinebreak = do
char ' \ n '
return Linebreak
parseTableRow :: Parser [ MdToken ]
parseTableRow = do
char '|'
row <- some ( many ( satisfy ( \ x -> x == ' ' || x == ' \ t ' ) ) *> someTill parseListLineToken ( char '|' ) )
return ( map Line row )
parseTable :: Parser MdToken
parseTable = do
tableHead <- parseTableRow
char ' \ n '
char '|'
sepEndBy1 ( some ( char '-' ) ) ( char '|' ) *> char ' \ n '
tableBody <- sepEndBy parseTableRow ( char ' \ n ' )
many ( char ' \ n ' ) -- Parse trailing newlines, if any
return $ Table ( tableHead : tableBody )
parseSingleNewline :: Parser MdToken
parseSingleNewline = do
char ' \ n '
@ -208,19 +231,36 @@ parseSingleNewline = do
[] -> return $ Unit " "
_ -> return SingleNewline
parseCssClasses :: Parser [ CssClass ]
parseCssClasses = do
char '{'
classes <- some parseCssClass
char '}'
return classes
where
parseCssClass :: Parser CssClass
parseCssClass = do
char '.'
let firstLetterParser = char '_' <|> char '-' <|> label " letter " ( satisfy isAlpha )
cssClassFirstLetter <- firstLetterParser
cssClass <- many ( firstLetterParser <|> label " digit " ( satisfy isDigit ) )
space
return ( cssClassFirstLetter : cssClass )
parseImage :: Parser MdToken
parseImage = do
char '!'
link <- parseLink
cssClasses <- optional $ try parseCssClasses
case link of
Link text path -> return $ Image text path
Link text path -> return $ Image text path cssClasses
_ -> empty -- This should never be reached
parseFigure = do
img <- parseImage
void ( string doubleNewlineText ) <|> eof
case img of
Image text path -> return $ Figure text path
Image text path cssClasses -> return $ Figure text path cssClasses
_ -> return img
-- Parse an escaped character
@ -237,6 +277,13 @@ parseUnit = do
text <- anySingle
return ( Unit [ text ] )
-- Parse any character except a newline
parseUnitExceptNewline :: Parser MdToken
parseUnitExceptNewline = do
-- text <- satisfy (`notElem` specialChars)
text <- satisfy ( /= ' \ n ' )
return ( Unit [ text ] )
lineParsers :: [ Parser MdToken ]
lineParsers =
[ parseLinebreak ,
@ -251,17 +298,16 @@ lineParsers =
parseUnit
] -- A 'line' doesn't include a 'header'
listLineParsers :: [ Parser MdToken ]
listLineParsers =
[ parseLinebreak ,
parseEscapedChar ,
lineParsersWithoutNewline :: [ Parser MdToken ]
lineParsersWithoutNewline =
[ parseEscapedChar ,
parseCode ,
parseImage ,
parseBold ,
parseItalic ,
parseStrikethrough ,
parseLink ,
parseUnit
parseUnit ExceptNewline
] -- A list line cannot contain newlines.
-- List of all parsers
@ -274,7 +320,7 @@ parseLineToken = fallthroughParser lineParsers
-- Parse any of the list line tokens.
parseListLineToken :: Parser MdToken
parseListLineToken = fallthroughParser li stLi neParsers
parseListLineToken = fallthroughParser li neParsersWithoutNewline
-- Parse a line, consisting of one or more tokens.
parseLine :: Parser MdToken
@ -355,7 +401,7 @@ parseListLineItemCommon :: Parser MdToken
parseListLineItemCommon = do
space
restOfLine <- manyTill parseListLineToken ( void ( char ' \ n ' ) <|> eof )
nestedList <- parseListNested <|> return ( Unit " " )
nestedList <- try parseListNested <|> return ( Unit " " )
return $ Line [ Line restOfLine , nestedList ]
-- Parse an unordered list paragraph item.
@ -386,31 +432,32 @@ parseListParaItemCommon = do
-- Parse an unordered list item, which can be a line item or another list.
parseUListItem :: Parser MdToken
parseUListItem = try parseUListParaItem <|> parseUListLineItem
parseUListItem = space *> ( try parseUListParaItem <|> parseUListLineItem )
-- Parse an unordered list.
parseUnorderedList :: Parser MdToken
parseUnorderedList = do
lineItems <- some parseUListItem
lineItems <- some $ try parseUListItem
void ( char ' \ n ' ) <|> eof -- A list must end in an extra newline or eof
return $ UnordList lineItems
-- --------
parseOListItem :: Parser MdToken
parseOListItem = try parseOListParaItem <|> parseOListLineItem
parseOListItem = space *> ( try parseOListParaItem <|> parseOListLineItem )
-- Parses the first element of an ordered list, which must start with '1.'
parseFirstOListItem :: Parser MdToken
parseFirstOListItem = do
space
remaining <- getInput
when ( take 2 ( T . unpack remaining ) /= " 1. " ) empty
parseOListLineItem
parseOrderedList :: Parser MdToken
parseOrderedList = do
firstLine <- parseFirstOListItem
lineItems <- some parseOListItem
firstLine <- try parseFirstOListItem
lineItems <- many $ try parseOListItem
void ( char ' \ n ' ) <|> eof
return $ OrdList ( firstLine : lineItems )
@ -423,9 +470,17 @@ doubleNewlineText = T.pack "\n\n"
parseHorizontalRule :: Parser MdToken
parseHorizontalRule = string horizontalRuleText *> ( void ( string doubleNewlineText ) <|> eof ) *> return HorizontalRule
parseCodeblock :: Parser MdToken
parseCodeblock = do
string ( T . pack " ``` \ n " )
inside <- someTill anySingle ( string ( T . pack " \ n ``` " ) )
return $ Codeblock ( Unit ( concatMap escapeChar inside ) )
documentParsers :: [ Parser MdToken ]
documentParsers =
[ parseHorizontalRule ,
parseCodeblock ,
parseTable ,
parseHeader ,
parseBlockquote ,
parseUnorderedList ,