You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

216 lines
6.4 KiB
Haskell

{-# LANGUAGE InstanceSigs #-}
module MdToHTML where
import Control.Applicative
import Control.Monad
import Data.List
import Debug.Trace
import Text.ParserCombinators.ReadP
import Text.Printf
type HeaderLevel = Int
newtype URL = URL {getUrl :: String}
newtype ImgPath = ImgPath {getPath :: String}
parseMany :: ReadP a -> ReadP [a]
parseMany = Text.ParserCombinators.ReadP.many
data MdToken
= Document [MdToken]
| Header HeaderLevel MdToken
| Para MdToken
| Line [MdToken]
| SingleNewline -- A single newline is rendered as a space.
| Linebreak
| HorizontalRule
| Blockquote MdToken
| UnordList [MdToken]
| OrdList [MdToken]
| Code String
| Codeblock String
| Link MdToken URL
| Image MdToken ImgPath
| Bold MdToken
| Italic MdToken
| Strikethrough MdToken
| Unit String
-- Deriving Show for MdToken
instance Show MdToken where
show :: MdToken -> String
show (Document tokens) = concatMap show tokens
show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">"
show (Para token) = "<p>" ++ show token ++ "</p>"
show (Line tokens) = concatMap show tokens
show Linebreak = "<br>"
show SingleNewline = " "
show HorizontalRule = "---------"
show (Blockquote token) = "<blockquote>" ++ show token ++ "</blockquote>"
show (UnordList tokens) = "UNORD" ++ concatMap show tokens
show (OrdList tokens) = "ORD" ++ concatMap show tokens
show (Code code) = show code
show (Codeblock code) = show code
show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
show (Image txt imgPath) = "<img src=" ++ getPath imgPath ++ ">" ++ show txt ++ "</img>"
show (Bold token) = "<b>" ++ show token ++ "</b>"
show (Italic token) = "<i>" ++ show token ++ "</i>"
show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
show (Unit unit) = printf "%s" unit
instance Semigroup MdToken where
a <> b = Document [a, b]
instance Monoid MdToken where
mempty = Unit ""
-- ---------------
-- Helpers
leftmostLongest :: (Foldable t) => [(a, t b)] -> (a, t b)
leftmostLongest xs =
let lastElem = last xs
filteredLst = filter (\val -> length (snd val) == length (snd lastElem)) xs
in head filteredLst
-- Get the first parse returned by readP_to_S that consumed the most input
leftmostLongestParse :: ReadP a -> String -> (a, String)
leftmostLongestParse parser input = leftmostLongest $ readP_to_S parser input
-- Parse if the string that's left matches the string comparator function
lookaheadParse :: (String -> Bool) -> ReadP Char
lookaheadParse stringCmp = do
lookahead <- look
case stringCmp lookahead of
True -> get
False -> pfail
lineToList :: MdToken -> [MdToken]
lineToList (Line tokens) = tokens
specialChars = "\\#*_[\n "
-- ---------------
-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
parseHeader :: ReadP MdToken
parseHeader = do
skipSpaces
headers <- munch1 (== '#')
when
(length headers > 6)
pfail
skipSpaces
text <- munch1 (/= '\n')
Text.ParserCombinators.ReadP.optional (char '\n')
let parsedText = fst $ leftmostLongestParse parseLine text
return (Header (length headers) parsedText)
-- Parse bold text
parseBold :: ReadP MdToken
parseBold = do
text <-
choice
[ between (string "__") (string "__") (many1 (lookaheadParse (/= "__"))),
between (string "**") (string "**") (many1 (lookaheadParse (/= "**")))
]
let parsedText = fst $ leftmostLongestParse parseLine text
return (Bold parsedText)
-- Parse italic text
parseItalic :: ReadP MdToken
parseItalic = do
text <-
choice
[ between (string "_") (string "_") (munch1 (/= '_')),
between (string "*") (string "*") (munch1 (/= '*'))
]
let parsedText = fst $ leftmostLongestParse parseLine text
return (Italic parsedText)
-- Parse a link
parseLink :: ReadP MdToken
parseLink = do
linkText <- between (string "[") (string "]") (many1 get)
linkURL <- between (string "(") (string ")") (many1 get)
let parsedLinkText = fst $ leftmostLongestParse parseLine linkText
return $ Link parsedLinkText (URL linkURL)
-- Parse a linebreak character
parseLinebreak :: ReadP MdToken
parseLinebreak = do
char ' '
many1 (char ' ')
char '\n'
return Linebreak
parseSingleNewline :: ReadP MdToken
parseSingleNewline = do
char '\n'
return SingleNewline
-- Parse an escaped character
parseEscapedChar :: ReadP MdToken
parseEscapedChar = do
char '\\'
escapedChar <- choice (map char specialChars) -- Parse any of the special chars.
return (Unit [escapedChar])
-- Parse a regular string as a Unit.
parseString :: ReadP MdToken
parseString = do
firstChar <- satisfy (/= '\n') -- Must parse at least one non-newline character here
text <- munch (`notElem` specialChars)
return (Unit (firstChar : text))
lineParsers :: [ReadP MdToken]
lineParsers =
[ parseLinebreak,
parseSingleNewline,
parseEscapedChar,
parseBold,
parseItalic,
parseLink,
parseString
] -- A 'line' doesn't include a 'header'
-- List of all parsers
allParsers :: [ReadP MdToken]
allParsers = parseHeader : lineParsers
-- Parse any of the above tokens.
parseLineToken :: ReadP MdToken
parseLineToken = choice lineParsers
-- Parse a line, consisting of one or more tokens.
parseLine :: ReadP MdToken
parseLine = do
skipSpaces
-- Fail if we have reached the end of the document.
parsed <- manyTill parseLineToken eof
return (Line parsed)
-- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
parsePara :: ReadP MdToken
parsePara = do
parseMany (char '\n')
-- text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline.
-- string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF.
text <- manyTill get (string "\n\n" <|> (eof >> return ""))
when (null text) pfail
let parsedText = fst $ leftmostLongestParse parseLine text -- Parse a line
return (Para parsedText)
-- Parse a blockquote, which is a greater-than sign followed by a paragraph.
parseBlockquote :: ReadP MdToken
parseBlockquote = do
char '>'
Blockquote <$> (parseBlockquote <++ parsePara) -- Parse another blockquote or a regular paragraph, wrap it in a blockquote.
-- Parse a document, which is multiple paragraphs.
parseDocument :: ReadP MdToken
parseDocument = do
res <- manyTill (parseHeader <++ parseBlockquote <++ parsePara) eof
return (Document res)