@ -1,21 +1,23 @@
module MdToHTML where
import Text.ParserCombinators.ReadP
import Control.Monad
import Control.Applicative
import Text.Printf
import Debug.Trace
import Control.Monad
import Data.List
import Debug.Trace
import Text.ParserCombinators.ReadP
import Text.Printf
type HeaderLevel = Int
newtype URL = URL { getUrl :: String }
newtype ImgPath = ImgPath { getPath :: String }
parseMany :: ReadP a -> ReadP [ a ]
parseMany = Text . ParserCombinators . ReadP . many
data MdToken = Document [ MdToken ]
data MdToken
= Document [ MdToken ]
| Header HeaderLevel MdToken
| Para MdToken
| Line [ MdToken ]
@ -53,8 +55,6 @@ instance Show MdToken where
show ( Strikethrough token ) = " <s> " ++ show token ++ " </s> "
show ( Unit unit ) = printf " %s " unit
-- ---------------
-- Helpers
mustBeHash :: ReadP Char
@ -80,27 +80,33 @@ lookaheadParse stringCmp = do
lineToList :: MdToken -> [ MdToken ]
lineToList ( Line tokens ) = tokens
-- ---------------
-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
parseHeader :: ReadP MdToken
parseHeader = do
traceM " Reached parseHeader "
skipSpaces
headers <- many1 mustBeHash
when ( ( length headers ) > 6 )
when
( ( length headers ) > 6 )
pfail
_ <- string " "
text <- munch1 ( \ x -> x /= ' \ n ' ) -- Parse until EOL
-- traceM text
let parsedText = fst $ leftmostLongestParse parseLine text
traceM ( show parsedText )
traceM ( show ( length headers ) )
return ( Header ( length headers ) parsedText )
-- Parse bold text
parseBold :: ReadP MdToken
parseBold = do
text <- choice [
( between ( string " __ " ) ( string " __ " ) ( many1 ( lookaheadParse ( /= " __ " ) ) ) ) ,
( between ( string " ** " ) ( string " ** " ) ( many1 ( lookaheadParse ( /= " ** " ) ) ) )
traceM " Reached parseBold "
text <-
choice
[ between ( string " __ " ) ( string " __ " ) ( many1 ( lookaheadParse ( /= " __ " ) ) ) ,
between ( string " ** " ) ( string " ** " ) ( many1 ( lookaheadParse ( /= " ** " ) ) )
]
let parsedText = fst $ leftmostLongestParse parseLine text
return ( Bold parsedText )
@ -108,8 +114,10 @@ parseBold = do
-- Parse italic text
parseItalic :: ReadP MdToken
parseItalic = do
text <- choice [
( between ( string " _ " ) ( string " _ " ) ( munch1 ( /= '_' ) ) ) ,
traceM " Reached parseItalic "
text <-
choice
[ ( between ( string " _ " ) ( string " _ " ) ( munch1 ( /= '_' ) ) ) ,
( between ( string " * " ) ( string " * " ) ( munch1 ( /= '*' ) ) )
]
let parsedText = fst $ leftmostLongestParse parseLine text
@ -118,18 +126,22 @@ parseItalic = do
-- Parse a linebreak character
parseLinebreak :: ReadP MdToken
parseLinebreak = do
traceM " Reached parseLinebreak "
char ' '
many1 ( char ' ' )
char ' \ n '
return Linebreak
-- Parse a regular string as a Unit.
parseString :: ReadP MdToken
parseString = do
traceM " Reached parseString "
firstChar <- get -- Must parse at least one character here
text <- munch ( \ x -> not ( elem x " #*_[ \ n " ) )
return ( Unit ( firstChar : text ) )
lineParsers :: [ ReadP MdToken ]
lineParsers = [ parse Header, parse Linebreak, parseBold , parseItalic , parseString ] -- A 'line' doesn't include a 'header'
lineParsers = [ parse Linebreak, parseBold , parseItalic , parseString ] -- A 'line' doesn't include a 'header'
-- List of all parsers
allParsers :: [ ReadP MdToken ]
@ -142,6 +154,7 @@ parseLineToken = choice lineParsers
-- Parse a line, consisting of one or more tokens.
parseLine :: ReadP MdToken
parseLine = do
traceM " Reached parseLine "
skipSpaces
-- Fail if we have reached the end of the document.
remaining <- look
@ -154,10 +167,14 @@ parseLine = do
-- As a weird special case, a 'Paragraph' can also be a 'Header'.
parsePara :: ReadP MdToken
parsePara = do
traceM " Reached parsePara "
parseMany ( char ' \ n ' )
text <- many1 ( lookaheadParse ( \ x -> ( ( length x ) < 2 ) || ( take 2 x ) /= " \ n \ n " ) ) -- Parse until a double-newline.
string " \ n \ n " <|> ( eof >> return " " ) -- Consume the next double-newline or EOF.
-- text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline.
-- string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF.
text <- ( manyTill get ( ( string " \ n \ n " ) <|> ( eof >> return " " ) ) )
when ( null text ) pfail
let parsedText = fst $ leftmostLongestParse ( parseHeader <|> parseLine ) text -- Parse either a line or a header.
traceM ( show parsedText )
-- If the paragraph is a header, return a Header token. Otheriwse return a Para token.
case parsedText of
Header level token -> return ( Header level token )