diff --git a/app/Main.hs b/app/Main.hs
index 64cbe5f..1dddb85 100644
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -4,5 +4,5 @@ import MdToHTML
main :: IO ()
main = do
- let res = fst $ leftmostLongestParse parseDocument "# _Hello_\n\n # Hello"
+ let res = fst $ leftmostLongestParse parseDocument "# _Hello_\n"
putStrLn (show res)
diff --git a/src/MdToHTML.hs b/src/MdToHTML.hs
index 6eff55e..82b6b02 100644
--- a/src/MdToHTML.hs
+++ b/src/MdToHTML.hs
@@ -1,59 +1,59 @@
module MdToHTML where
-import Text.ParserCombinators.ReadP
-import Control.Monad
import Control.Applicative
-import Text.Printf
-import Debug.Trace
+import Control.Monad
import Data.List
+import Debug.Trace
+import Text.ParserCombinators.ReadP
+import Text.Printf
type HeaderLevel = Int
newtype URL = URL {getUrl :: String}
+
newtype ImgPath = ImgPath {getPath :: String}
parseMany :: ReadP a -> ReadP [a]
parseMany = Text.ParserCombinators.ReadP.many
-data MdToken = Document [MdToken]
- | Header HeaderLevel MdToken
- | Para MdToken
- | Line [MdToken]
- | Linebreak
- | HorizontalRule
- | Blockquote MdToken
- | UnordList [MdToken]
- | OrdList [MdToken]
- | Code String
- | Codeblock String
- | Link MdToken URL
- | Image MdToken ImgPath
- | Bold MdToken
- | Italic MdToken
- | Strikethrough MdToken
- | Unit String
+data MdToken
+ = Document [MdToken]
+ | Header HeaderLevel MdToken
+ | Para MdToken
+ | Line [MdToken]
+ | Linebreak
+ | HorizontalRule
+ | Blockquote MdToken
+ | UnordList [MdToken]
+ | OrdList [MdToken]
+ | Code String
+ | Codeblock String
+ | Link MdToken URL
+ | Image MdToken ImgPath
+ | Bold MdToken
+ | Italic MdToken
+ | Strikethrough MdToken
+ | Unit String
-- Deriving Show for MdToken
instance Show MdToken where
- show (Document tokens) = concat(map show tokens)
- show (Header level token) = "" ++ show token ++ ""
- show (Para token) = "
" ++ show token ++ "
"
- show (Line tokens) = concat(map show tokens)
- show Linebreak = "
"
- show HorizontalRule = "---------"
- show (Blockquote token) = "BLOCK" ++ show token
- show (UnordList tokens) = "UNORD" ++ concat(map show tokens)
- show (OrdList tokens) = "ORD" ++ concat(map show tokens)
- show (Code code) = show code
- show (Codeblock code) = show code
- show (Link txt url) = "" ++ show txt ++ ""
- show (Image txt imgPath) = "
" ++ show txt ++ ""
- show (Bold token) = "" ++ show token ++ ""
- show (Italic token) = "" ++ show token ++ ""
- show (Strikethrough token) = "" ++ show token ++ ""
- show (Unit unit) = printf "%s" unit
-
-
+ show (Document tokens) = concat (map show tokens)
+ show (Header level token) = "" ++ show token ++ ""
+ show (Para token) = "" ++ show token ++ "
"
+ show (Line tokens) = concat (map show tokens)
+ show Linebreak = "
"
+ show HorizontalRule = "---------"
+ show (Blockquote token) = "BLOCK" ++ show token
+ show (UnordList tokens) = "UNORD" ++ concat (map show tokens)
+ show (OrdList tokens) = "ORD" ++ concat (map show tokens)
+ show (Code code) = show code
+ show (Codeblock code) = show code
+ show (Link txt url) = "" ++ show txt ++ ""
+ show (Image txt imgPath) = "
" ++ show txt ++ ""
+ show (Bold token) = "" ++ show token ++ ""
+ show (Italic token) = "" ++ show token ++ ""
+ show (Strikethrough token) = "" ++ show token ++ ""
+ show (Unit unit) = printf "%s" unit
-- ---------------
-- Helpers
@@ -61,79 +61,91 @@ mustBeHash :: ReadP Char
mustBeHash = satisfy (\x -> x == '#')
leftmostLongest :: (Foldable t) => [(a, t b)] -> (a, t b)
-leftmostLongest xs =
- let lastElem = (last xs)
- filteredLst = (filter (\val -> (length $ snd val) == (length $ snd lastElem)) xs)
- in head filteredLst
+leftmostLongest xs =
+ let lastElem = (last xs)
+ filteredLst = (filter (\val -> (length $ snd val) == (length $ snd lastElem)) xs)
+ in head filteredLst
-- Get the first parse returned by readP_to_S that consumed the most input
leftmostLongestParse :: ReadP a -> String -> (a, String)
-leftmostLongestParse parser input = leftmostLongest $ readP_to_S parser input
+leftmostLongestParse parser input = leftmostLongest $ readP_to_S parser input
-- Parse if the string that's left matches the string comparator function
lookaheadParse :: (String -> Bool) -> ReadP Char
lookaheadParse stringCmp = do
- lookahead <- look
- case stringCmp lookahead of
- True -> get
- False -> pfail
+ lookahead <- look
+ case stringCmp lookahead of
+ True -> get
+ False -> pfail
lineToList :: MdToken -> [MdToken]
lineToList (Line tokens) = tokens
--- ---------------
+
+-- ---------------
-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
parseHeader :: ReadP MdToken
parseHeader = do
- skipSpaces
- headers <- many1 mustBeHash
- when ((length headers) > 6)
- pfail
- _ <- string " "
- text <- munch1 (\x -> x /= '\n') -- Parse until EOL
--- traceM text
- let parsedText = fst $ leftmostLongestParse parseLine text
- return (Header (length headers) parsedText)
-
+ traceM "Reached parseHeader"
+ skipSpaces
+ headers <- many1 mustBeHash
+ when
+ ((length headers) > 6)
+ pfail
+ _ <- string " "
+ text <- munch1 (\x -> x /= '\n') -- Parse until EOL
+ let parsedText = fst $ leftmostLongestParse parseLine text
+ traceM (show parsedText)
+ traceM (show (length headers))
+ return (Header (length headers) parsedText)
+
-- Parse bold text
parseBold :: ReadP MdToken
parseBold = do
- text <- choice[
- (between (string "__") (string "__") (many1 (lookaheadParse (/= "__")))),
- (between (string "**") (string "**") (many1 (lookaheadParse (/= "**"))))
- ]
- let parsedText = fst $ leftmostLongestParse parseLine text
- return (Bold parsedText)
+ traceM "Reached parseBold"
+ text <-
+ choice
+ [ between (string "__") (string "__") (many1 (lookaheadParse (/= "__"))),
+ between (string "**") (string "**") (many1 (lookaheadParse (/= "**")))
+ ]
+ let parsedText = fst $ leftmostLongestParse parseLine text
+ return (Bold parsedText)
-- Parse italic text
parseItalic :: ReadP MdToken
parseItalic = do
- text <- choice[
- (between (string "_") (string "_") (munch1 (/= '_'))),
- (between (string "*") (string "*") (munch1 (/= '*')))
- ]
- let parsedText = fst $ leftmostLongestParse parseLine text
- return (Italic parsedText)
+ traceM "Reached parseItalic"
+ text <-
+ choice
+ [ (between (string "_") (string "_") (munch1 (/= '_'))),
+ (between (string "*") (string "*") (munch1 (/= '*')))
+ ]
+ let parsedText = fst $ leftmostLongestParse parseLine text
+ return (Italic parsedText)
-- Parse a linebreak character
parseLinebreak :: ReadP MdToken
parseLinebreak = do
- char '\n'
- return Linebreak
+ traceM "Reached parseLinebreak"
+ char ' '
+ many1 (char ' ')
+ char '\n'
+ return Linebreak
-- Parse a regular string as a Unit.
parseString :: ReadP MdToken
parseString = do
- firstChar <- get -- Must parse at least one character here
- text <- munch (\x -> not (elem x "#*_[\n"))
- return (Unit (firstChar:text))
+ traceM "Reached parseString"
+ firstChar <- get -- Must parse at least one character here
+ text <- munch (\x -> not (elem x "#*_[\n "))
+ return (Unit (firstChar : text))
lineParsers :: [ReadP MdToken]
-lineParsers = [parseHeader, parseLinebreak, parseBold, parseItalic, parseString] -- A 'line' doesn't include a 'header'
+lineParsers = [parseLinebreak, parseBold, parseItalic, parseString] -- A 'line' doesn't include a 'header'
-- List of all parsers
allParsers :: [ReadP MdToken]
-allParsers = parseHeader:lineParsers
+allParsers = parseHeader : lineParsers
-- Parse any of the above tokens.
parseLineToken :: ReadP MdToken
@@ -141,27 +153,32 @@ parseLineToken = choice lineParsers
-- Parse a line, consisting of one or more tokens.
parseLine :: ReadP MdToken
-parseLine = do
- skipSpaces
- -- Fail if we have reached the end of the document.
- remaining <- look
- when (null remaining) pfail
- parsed <- parseMany parseLineToken
--- traceM $ show parsed
- return (Line parsed)
+parseLine = do
+ traceM "Reached parseLine"
+ skipSpaces
+ -- Fail if we have reached the end of the document.
+ remaining <- look
+ when (null remaining) pfail
+ parsed <- parseMany parseLineToken
+ -- traceM $ show parsed
+ return (Line parsed)
-- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
-- As a weird special case, a 'Paragraph' can also be a 'Header'.
parsePara :: ReadP MdToken
parsePara = do
- parseMany (char '\n')
- text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline.
- string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF.
- let parsedText = fst $ leftmostLongestParse (parseHeader <|> parseLine) text -- Parse either a line or a header.
- -- If the paragraph is a header, return a Header token. Otheriwse return a Para token.
- case parsedText of
- Header level token -> return (Header level token)
- _ -> return (Para parsedText)
+ traceM "Reached parsePara"
+ parseMany (char '\n')
+ -- text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline.
+ -- string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF.
+ text <- (manyTill get ((string "\n\n") <|> (eof >> return "")))
+ when (null text) pfail
+ let parsedText = fst $ leftmostLongestParse (parseHeader <|> parseLine) text -- Parse either a line or a header.
+ traceM (show parsedText)
+ -- If the paragraph is a header, return a Header token. Otheriwse return a Para token.
+ case parsedText of
+ Header level token -> return (Header level token)
+ _ -> return (Para parsedText)
-- Parse a document, which is multiple paragraphs.
parseDocument :: ReadP MdToken
diff --git a/src/Test.hs b/src/Test.hs
index 4a72c64..d415adb 100644
--- a/src/Test.hs
+++ b/src/Test.hs
@@ -28,8 +28,9 @@ boldTests = TestList
integrationTests = TestList
[
- check_equal "Integration 1" "Sample Markdown
This is some basic, sample markdown.
Second Heading
" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n ## __Second__ _Heading_")
- -- Add a test for single-newlines.
+ check_equal "Integration 1" "Sample Markdown
This is some basic, sample markdown.
Second Heading
" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n ## __Second__ _Heading_"),
+ check_equal "Integration 2" "Hello World
" (convert "__Hello__\n_World_"),
+ check_equal "Integration 3" "Hello
WorldM/p>" (convert "# Hello\nWorld")
]