@ -1,59 +1,59 @@
module MdToHTML where
import Text.ParserCombinators.ReadP
import Control.Monad
import Control.Applicative
import Text.Printf
import Debug.Trace
import Control.Monad
import Data.List
import Debug.Trace
import Text.ParserCombinators.ReadP
import Text.Printf
type HeaderLevel = Int
newtype URL = URL { getUrl :: String }
newtype ImgPath = ImgPath { getPath :: String }
parseMany :: ReadP a -> ReadP [ a ]
parseMany = Text . ParserCombinators . ReadP . many
data MdToken = Document [ MdToken ]
| Header HeaderLevel MdToken
| Para MdToken
| Line [ MdToken ]
| Linebreak
| HorizontalRule
| Blockquote MdToken
| UnordList [ MdToken ]
| OrdList [ MdToken ]
| Code String
| Codeblock String
| Link MdToken URL
| Image MdToken ImgPath
| Bold MdToken
| Italic MdToken
| Strikethrough MdToken
| Unit String
data MdToken
= Document [ MdToken ]
| Header HeaderLevel MdToken
| Para MdToken
| Line [ MdToken ]
| Linebreak
| HorizontalRule
| Blockquote MdToken
| UnordList [ MdToken ]
| OrdList [ MdToken ]
| Code String
| Codeblock String
| Link MdToken URL
| Image MdToken ImgPath
| Bold MdToken
| Italic MdToken
| Strikethrough MdToken
| Unit String
-- Deriving Show for MdToken
instance Show MdToken where
show ( Document tokens ) = concat ( map show tokens )
show ( Header level token ) = " <h " ++ show level ++ " > " ++ show token ++ " </h " ++ show level ++ " > "
show ( Para token ) = " <p> " ++ show token ++ " </p> "
show ( Line tokens ) = concat ( map show tokens )
show Linebreak = " <br> "
show HorizontalRule = " --------- "
show ( Blockquote token ) = " BLOCK " ++ show token
show ( UnordList tokens ) = " UNORD " ++ concat ( map show tokens )
show ( OrdList tokens ) = " ORD " ++ concat ( map show tokens )
show ( Code code ) = show code
show ( Codeblock code ) = show code
show ( Link txt url ) = " <a href= " ++ ( getUrl url ) ++ " > " ++ show txt ++ " </a> "
show ( Image txt imgPath ) = " <img src= " ++ ( getPath imgPath ) ++ " > " ++ show txt ++ " </img> "
show ( Bold token ) = " <b> " ++ show token ++ " </b> "
show ( Italic token ) = " <i> " ++ show token ++ " </i> "
show ( Strikethrough token ) = " <s> " ++ show token ++ " </s> "
show ( Unit unit ) = printf " %s " unit
show ( Document tokens ) = concat ( map show tokens )
show ( Header level token ) = " <h " ++ show level ++ " > " ++ show token ++ " </h " ++ show level ++ " > "
show ( Para token ) = " <p> " ++ show token ++ " </p> "
show ( Line tokens ) = concat ( map show tokens )
show Linebreak = " <br> "
show HorizontalRule = " --------- "
show ( Blockquote token ) = " BLOCK " ++ show token
show ( UnordList tokens ) = " UNORD " ++ concat ( map show tokens )
show ( OrdList tokens ) = " ORD " ++ concat ( map show tokens )
show ( Code code ) = show code
show ( Codeblock code ) = show code
show ( Link txt url ) = " <a href= " ++ ( getUrl url ) ++ " > " ++ show txt ++ " </a> "
show ( Image txt imgPath ) = " <img src= " ++ ( getPath imgPath ) ++ " > " ++ show txt ++ " </img> "
show ( Bold token ) = " <b> " ++ show token ++ " </b> "
show ( Italic token ) = " <i> " ++ show token ++ " </i> "
show ( Strikethrough token ) = " <s> " ++ show token ++ " </s> "
show ( Unit unit ) = printf " %s " unit
-- ---------------
-- Helpers
@ -62,9 +62,9 @@ mustBeHash = satisfy (\x -> x == '#')
leftmostLongest :: ( Foldable t ) => [ ( a , t b ) ] -> ( a , t b )
leftmostLongest xs =
let lastElem = ( last xs )
filteredLst = ( filter ( \ val -> ( length $ snd val ) == ( length $ snd lastElem ) ) xs )
in head filteredLst
let lastElem = ( last xs )
filteredLst = ( filter ( \ val -> ( length $ snd val ) == ( length $ snd lastElem ) ) xs )
in head filteredLst
-- Get the first parse returned by readP_to_S that consumed the most input
leftmostLongestParse :: ReadP a -> String -> ( a , String )
@ -73,67 +73,79 @@ leftmostLongestParse parser input = leftmostLongest $ readP_to_S parser input
-- Parse if the string that's left matches the string comparator function
lookaheadParse :: ( String -> Bool ) -> ReadP Char
lookaheadParse stringCmp = do
lookahead <- look
case stringCmp lookahead of
True -> get
False -> pfail
lookahead <- look
case stringCmp lookahead of
True -> get
False -> pfail
lineToList :: MdToken -> [ MdToken ]
lineToList ( Line tokens ) = tokens
-- ---------------
-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
parseHeader :: ReadP MdToken
parseHeader = do
skipSpaces
headers <- many1 mustBeHash
when ( ( length headers ) > 6 )
pfail
_ <- string " "
text <- munch1 ( \ x -> x /= ' \ n ' ) -- Parse until EOL
-- traceM text
let parsedText = fst $ leftmostLongestParse parseLine text
return ( Header ( length headers ) parsedText )
traceM " Reached parseHeader "
skipSpaces
headers <- many1 mustBeHash
when
( ( length headers ) > 6 )
pfail
_ <- string " "
text <- munch1 ( \ x -> x /= ' \ n ' ) -- Parse until EOL
let parsedText = fst $ leftmostLongestParse parseLine text
traceM ( show parsedText )
traceM ( show ( length headers ) )
return ( Header ( length headers ) parsedText )
-- Parse bold text
parseBold :: ReadP MdToken
parseBold = do
text <- choice [
( between ( string " __ " ) ( string " __ " ) ( many1 ( lookaheadParse ( /= " __ " ) ) ) ) ,
( between ( string " ** " ) ( string " ** " ) ( many1 ( lookaheadParse ( /= " ** " ) ) ) )
]
let parsedText = fst $ leftmostLongestParse parseLine text
return ( Bold parsedText )
traceM " Reached parseBold "
text <-
choice
[ between ( string " __ " ) ( string " __ " ) ( many1 ( lookaheadParse ( /= " __ " ) ) ) ,
between ( string " ** " ) ( string " ** " ) ( many1 ( lookaheadParse ( /= " ** " ) ) )
]
let parsedText = fst $ leftmostLongestParse parseLine text
return ( Bold parsedText )
-- Parse italic text
parseItalic :: ReadP MdToken
parseItalic = do
text <- choice [
( between ( string " _ " ) ( string " _ " ) ( munch1 ( /= '_' ) ) ) ,
( between ( string " * " ) ( string " * " ) ( munch1 ( /= '*' ) ) )
]
let parsedText = fst $ leftmostLongestParse parseLine text
return ( Italic parsedText )
traceM " Reached parseItalic "
text <-
choice
[ ( between ( string " _ " ) ( string " _ " ) ( munch1 ( /= '_' ) ) ) ,
( between ( string " * " ) ( string " * " ) ( munch1 ( /= '*' ) ) )
]
let parsedText = fst $ leftmostLongestParse parseLine text
return ( Italic parsedText )
-- Parse a linebreak character
parseLinebreak :: ReadP MdToken
parseLinebreak = do
char ' \ n '
return Linebreak
traceM " Reached parseLinebreak "
char ' '
many1 ( char ' ' )
char ' \ n '
return Linebreak
-- Parse a regular string as a Unit.
parseString :: ReadP MdToken
parseString = do
firstChar <- get -- Must parse at least one character here
text <- munch ( \ x -> not ( elem x " #*_[ \ n " ) )
return ( Unit ( firstChar : text ) )
traceM " Reached parseString "
firstChar <- get -- Must parse at least one character here
text <- munch ( \ x -> not ( elem x " #*_[ \ n " ) )
return ( Unit ( firstChar : text ) )
lineParsers :: [ ReadP MdToken ]
lineParsers = [ parse Header, parse Linebreak, parseBold , parseItalic , parseString ] -- A 'line' doesn't include a 'header'
lineParsers = [ parse Linebreak, parseBold , parseItalic , parseString ] -- A 'line' doesn't include a 'header'
-- List of all parsers
allParsers :: [ ReadP MdToken ]
allParsers = parseHeader : lineParsers
allParsers = parseHeader : lineParsers
-- Parse any of the above tokens.
parseLineToken :: ReadP MdToken
@ -141,27 +153,32 @@ parseLineToken = choice lineParsers
-- Parse a line, consisting of one or more tokens.
parseLine :: ReadP MdToken
parseLine = do
skipSpaces
-- Fail if we have reached the end of the document.
remaining <- look
when ( null remaining ) pfail
parsed <- parseMany parseLineToken
-- traceM $ show parsed
return ( Line parsed )
parseLine = do
traceM " Reached parseLine "
skipSpaces
-- Fail if we have reached the end of the document.
remaining <- look
when ( null remaining ) pfail
parsed <- parseMany parseLineToken
-- traceM $ show parsed
return ( Line parsed )
-- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
-- As a weird special case, a 'Paragraph' can also be a 'Header'.
parsePara :: ReadP MdToken
parsePara = do
parseMany ( char ' \ n ' )
text <- many1 ( lookaheadParse ( \ x -> ( ( length x ) < 2 ) || ( take 2 x ) /= " \ n \ n " ) ) -- Parse until a double-newline.
string " \ n \ n " <|> ( eof >> return " " ) -- Consume the next double-newline or EOF.
let parsedText = fst $ leftmostLongestParse ( parseHeader <|> parseLine ) text -- Parse either a line or a header.
-- If the paragraph is a header, return a Header token. Otheriwse return a Para token.
case parsedText of
Header level token -> return ( Header level token )
_ -> return ( Para parsedText )
traceM " Reached parsePara "
parseMany ( char ' \ n ' )
-- text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline.
-- string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF.
text <- ( manyTill get ( ( string " \ n \ n " ) <|> ( eof >> return " " ) ) )
when ( null text ) pfail
let parsedText = fst $ leftmostLongestParse ( parseHeader <|> parseLine ) text -- Parse either a line or a header.
traceM ( show parsedText )
-- If the paragraph is a header, return a Header token. Otheriwse return a Para token.
case parsedText of
Header level token -> return ( Header level token )
_ -> return ( Para parsedText )
-- Parse a document, which is multiple paragraphs.
parseDocument :: ReadP MdToken