@ -1,21 +1,23 @@
module MdToHTML where
module MdToHTML where
import Text.ParserCombinators.ReadP
import Control.Monad
import Control.Applicative
import Control.Applicative
import Text.Printf
import Control.Monad
import Debug.Trace
import Data.List
import Data.List
import Debug.Trace
import Text.ParserCombinators.ReadP
import Text.Printf
type HeaderLevel = Int
type HeaderLevel = Int
newtype URL = URL { getUrl :: String }
newtype URL = URL { getUrl :: String }
newtype ImgPath = ImgPath { getPath :: String }
newtype ImgPath = ImgPath { getPath :: String }
parseMany :: ReadP a -> ReadP [ a ]
parseMany :: ReadP a -> ReadP [ a ]
parseMany = Text . ParserCombinators . ReadP . many
parseMany = Text . ParserCombinators . ReadP . many
data MdToken = Document [ MdToken ]
data MdToken
= Document [ MdToken ]
| Header HeaderLevel MdToken
| Header HeaderLevel MdToken
| Para MdToken
| Para MdToken
| Line [ MdToken ]
| Line [ MdToken ]
@ -35,15 +37,15 @@ data MdToken = Document [MdToken]
-- Deriving Show for MdToken
-- Deriving Show for MdToken
instance Show MdToken where
instance Show MdToken where
show ( Document tokens ) = concat ( map show tokens )
show ( Document tokens ) = concat ( map show tokens )
show ( Header level token ) = " <h " ++ show level ++ " > " ++ show token ++ " </h " ++ show level ++ " > "
show ( Header level token ) = " <h " ++ show level ++ " > " ++ show token ++ " </h " ++ show level ++ " > "
show ( Para token ) = " <p> " ++ show token ++ " </p> "
show ( Para token ) = " <p> " ++ show token ++ " </p> "
show ( Line tokens ) = concat ( map show tokens )
show ( Line tokens ) = concat ( map show tokens )
show Linebreak = " <br> "
show Linebreak = " <br> "
show HorizontalRule = " --------- "
show HorizontalRule = " --------- "
show ( Blockquote token ) = " BLOCK " ++ show token
show ( Blockquote token ) = " BLOCK " ++ show token
show ( UnordList tokens ) = " UNORD " ++ concat ( map show tokens )
show ( UnordList tokens ) = " UNORD " ++ concat ( map show tokens )
show ( OrdList tokens ) = " ORD " ++ concat ( map show tokens )
show ( OrdList tokens ) = " ORD " ++ concat ( map show tokens )
show ( Code code ) = show code
show ( Code code ) = show code
show ( Codeblock code ) = show code
show ( Codeblock code ) = show code
show ( Link txt url ) = " <a href= " ++ ( getUrl url ) ++ " > " ++ show txt ++ " </a> "
show ( Link txt url ) = " <a href= " ++ ( getUrl url ) ++ " > " ++ show txt ++ " </a> "
@ -53,8 +55,6 @@ instance Show MdToken where
show ( Strikethrough token ) = " <s> " ++ show token ++ " </s> "
show ( Strikethrough token ) = " <s> " ++ show token ++ " </s> "
show ( Unit unit ) = printf " %s " unit
show ( Unit unit ) = printf " %s " unit
-- ---------------
-- ---------------
-- Helpers
-- Helpers
mustBeHash :: ReadP Char
mustBeHash :: ReadP Char
@ -80,27 +80,33 @@ lookaheadParse stringCmp = do
lineToList :: MdToken -> [ MdToken ]
lineToList :: MdToken -> [ MdToken ]
lineToList ( Line tokens ) = tokens
lineToList ( Line tokens ) = tokens
-- ---------------
-- ---------------
-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
parseHeader :: ReadP MdToken
parseHeader :: ReadP MdToken
parseHeader = do
parseHeader = do
traceM " Reached parseHeader "
skipSpaces
skipSpaces
headers <- many1 mustBeHash
headers <- many1 mustBeHash
when ( ( length headers ) > 6 )
when
( ( length headers ) > 6 )
pfail
pfail
_ <- string " "
_ <- string " "
text <- munch1 ( \ x -> x /= ' \ n ' ) -- Parse until EOL
text <- munch1 ( \ x -> x /= ' \ n ' ) -- Parse until EOL
-- traceM text
let parsedText = fst $ leftmostLongestParse parseLine text
let parsedText = fst $ leftmostLongestParse parseLine text
traceM ( show parsedText )
traceM ( show ( length headers ) )
return ( Header ( length headers ) parsedText )
return ( Header ( length headers ) parsedText )
-- Parse bold text
-- Parse bold text
parseBold :: ReadP MdToken
parseBold :: ReadP MdToken
parseBold = do
parseBold = do
text <- choice [
traceM " Reached parseBold "
( between ( string " __ " ) ( string " __ " ) ( many1 ( lookaheadParse ( /= " __ " ) ) ) ) ,
text <-
( between ( string " ** " ) ( string " ** " ) ( many1 ( lookaheadParse ( /= " ** " ) ) ) )
choice
[ between ( string " __ " ) ( string " __ " ) ( many1 ( lookaheadParse ( /= " __ " ) ) ) ,
between ( string " ** " ) ( string " ** " ) ( many1 ( lookaheadParse ( /= " ** " ) ) )
]
]
let parsedText = fst $ leftmostLongestParse parseLine text
let parsedText = fst $ leftmostLongestParse parseLine text
return ( Bold parsedText )
return ( Bold parsedText )
@ -108,8 +114,10 @@ parseBold = do
-- Parse italic text
-- Parse italic text
parseItalic :: ReadP MdToken
parseItalic :: ReadP MdToken
parseItalic = do
parseItalic = do
text <- choice [
traceM " Reached parseItalic "
( between ( string " _ " ) ( string " _ " ) ( munch1 ( /= '_' ) ) ) ,
text <-
choice
[ ( between ( string " _ " ) ( string " _ " ) ( munch1 ( /= '_' ) ) ) ,
( between ( string " * " ) ( string " * " ) ( munch1 ( /= '*' ) ) )
( between ( string " * " ) ( string " * " ) ( munch1 ( /= '*' ) ) )
]
]
let parsedText = fst $ leftmostLongestParse parseLine text
let parsedText = fst $ leftmostLongestParse parseLine text
@ -118,22 +126,26 @@ parseItalic = do
-- Parse a linebreak character
-- Parse a linebreak character
parseLinebreak :: ReadP MdToken
parseLinebreak :: ReadP MdToken
parseLinebreak = do
parseLinebreak = do
traceM " Reached parseLinebreak "
char ' '
many1 ( char ' ' )
char ' \ n '
char ' \ n '
return Linebreak
return Linebreak
-- Parse a regular string as a Unit.
-- Parse a regular string as a Unit.
parseString :: ReadP MdToken
parseString :: ReadP MdToken
parseString = do
parseString = do
traceM " Reached parseString "
firstChar <- get -- Must parse at least one character here
firstChar <- get -- Must parse at least one character here
text <- munch ( \ x -> not ( elem x " #*_[ \ n " ) )
text <- munch ( \ x -> not ( elem x " #*_[ \ n " ) )
return ( Unit ( firstChar : text ) )
return ( Unit ( firstChar : text ) )
lineParsers :: [ ReadP MdToken ]
lineParsers :: [ ReadP MdToken ]
lineParsers = [ parse Header, parse Linebreak, parseBold , parseItalic , parseString ] -- A 'line' doesn't include a 'header'
lineParsers = [ parse Linebreak, parseBold , parseItalic , parseString ] -- A 'line' doesn't include a 'header'
-- List of all parsers
-- List of all parsers
allParsers :: [ ReadP MdToken ]
allParsers :: [ ReadP MdToken ]
allParsers = parseHeader : lineParsers
allParsers = parseHeader : lineParsers
-- Parse any of the above tokens.
-- Parse any of the above tokens.
parseLineToken :: ReadP MdToken
parseLineToken :: ReadP MdToken
@ -142,22 +154,27 @@ parseLineToken = choice lineParsers
-- Parse a line, consisting of one or more tokens.
-- Parse a line, consisting of one or more tokens.
parseLine :: ReadP MdToken
parseLine :: ReadP MdToken
parseLine = do
parseLine = do
traceM " Reached parseLine "
skipSpaces
skipSpaces
-- Fail if we have reached the end of the document.
-- Fail if we have reached the end of the document.
remaining <- look
remaining <- look
when ( null remaining ) pfail
when ( null remaining ) pfail
parsed <- parseMany parseLineToken
parsed <- parseMany parseLineToken
-- traceM $ show parsed
-- traceM $ show parsed
return ( Line parsed )
return ( Line parsed )
-- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
-- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
-- As a weird special case, a 'Paragraph' can also be a 'Header'.
-- As a weird special case, a 'Paragraph' can also be a 'Header'.
parsePara :: ReadP MdToken
parsePara :: ReadP MdToken
parsePara = do
parsePara = do
traceM " Reached parsePara "
parseMany ( char ' \ n ' )
parseMany ( char ' \ n ' )
text <- many1 ( lookaheadParse ( \ x -> ( ( length x ) < 2 ) || ( take 2 x ) /= " \ n \ n " ) ) -- Parse until a double-newline.
-- text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline.
string " \ n \ n " <|> ( eof >> return " " ) -- Consume the next double-newline or EOF.
-- string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF.
text <- ( manyTill get ( ( string " \ n \ n " ) <|> ( eof >> return " " ) ) )
when ( null text ) pfail
let parsedText = fst $ leftmostLongestParse ( parseHeader <|> parseLine ) text -- Parse either a line or a header.
let parsedText = fst $ leftmostLongestParse ( parseHeader <|> parseLine ) text -- Parse either a line or a header.
traceM ( show parsedText )
-- If the paragraph is a header, return a Header token. Otheriwse return a Para token.
-- If the paragraph is a header, return a Header token. Otheriwse return a Para token.
case parsedText of
case parsedText of
Header level token -> return ( Header level token )
Header level token -> return ( Header level token )