Trying to fix a bug where a header with a newline isn't recognized as a header

master
Aadhavan Srinivasan 6 days ago
parent d771460bb1
commit 7fdc1bcbf1

@ -4,5 +4,5 @@ import MdToHTML
main :: IO () main :: IO ()
main = do main = do
let res = fst $ leftmostLongestParse parseDocument "# _Hello_\n\n # Hello" let res = fst $ leftmostLongestParse parseDocument "# _Hello_\n"
putStrLn (show res) putStrLn (show res)

@ -1,21 +1,23 @@
module MdToHTML where module MdToHTML where
import Text.ParserCombinators.ReadP
import Control.Monad
import Control.Applicative import Control.Applicative
import Text.Printf import Control.Monad
import Debug.Trace
import Data.List import Data.List
import Debug.Trace
import Text.ParserCombinators.ReadP
import Text.Printf
type HeaderLevel = Int type HeaderLevel = Int
newtype URL = URL {getUrl :: String} newtype URL = URL {getUrl :: String}
newtype ImgPath = ImgPath {getPath :: String} newtype ImgPath = ImgPath {getPath :: String}
parseMany :: ReadP a -> ReadP [a] parseMany :: ReadP a -> ReadP [a]
parseMany = Text.ParserCombinators.ReadP.many parseMany = Text.ParserCombinators.ReadP.many
data MdToken = Document [MdToken] data MdToken
= Document [MdToken]
| Header HeaderLevel MdToken | Header HeaderLevel MdToken
| Para MdToken | Para MdToken
| Line [MdToken] | Line [MdToken]
@ -53,8 +55,6 @@ instance Show MdToken where
show (Strikethrough token) = "<s>" ++ show token ++ "</s>" show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
show (Unit unit) = printf "%s" unit show (Unit unit) = printf "%s" unit
-- --------------- -- ---------------
-- Helpers -- Helpers
mustBeHash :: ReadP Char mustBeHash :: ReadP Char
@ -80,27 +80,33 @@ lookaheadParse stringCmp = do
lineToList :: MdToken -> [MdToken] lineToList :: MdToken -> [MdToken]
lineToList (Line tokens) = tokens lineToList (Line tokens) = tokens
-- --------------- -- ---------------
-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL. -- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
parseHeader :: ReadP MdToken parseHeader :: ReadP MdToken
parseHeader = do parseHeader = do
traceM "Reached parseHeader"
skipSpaces skipSpaces
headers <- many1 mustBeHash headers <- many1 mustBeHash
when ((length headers) > 6) when
((length headers) > 6)
pfail pfail
_ <- string " " _ <- string " "
text <- munch1 (\x -> x /= '\n') -- Parse until EOL text <- munch1 (\x -> x /= '\n') -- Parse until EOL
-- traceM text
let parsedText = fst $ leftmostLongestParse parseLine text let parsedText = fst $ leftmostLongestParse parseLine text
traceM (show parsedText)
traceM (show (length headers))
return (Header (length headers) parsedText) return (Header (length headers) parsedText)
-- Parse bold text -- Parse bold text
parseBold :: ReadP MdToken parseBold :: ReadP MdToken
parseBold = do parseBold = do
text <- choice[ traceM "Reached parseBold"
(between (string "__") (string "__") (many1 (lookaheadParse (/= "__")))), text <-
(between (string "**") (string "**") (many1 (lookaheadParse (/= "**")))) choice
[ between (string "__") (string "__") (many1 (lookaheadParse (/= "__"))),
between (string "**") (string "**") (many1 (lookaheadParse (/= "**")))
] ]
let parsedText = fst $ leftmostLongestParse parseLine text let parsedText = fst $ leftmostLongestParse parseLine text
return (Bold parsedText) return (Bold parsedText)
@ -108,8 +114,10 @@ parseBold = do
-- Parse italic text -- Parse italic text
parseItalic :: ReadP MdToken parseItalic :: ReadP MdToken
parseItalic = do parseItalic = do
text <- choice[ traceM "Reached parseItalic"
(between (string "_") (string "_") (munch1 (/= '_'))), text <-
choice
[ (between (string "_") (string "_") (munch1 (/= '_'))),
(between (string "*") (string "*") (munch1 (/= '*'))) (between (string "*") (string "*") (munch1 (/= '*')))
] ]
let parsedText = fst $ leftmostLongestParse parseLine text let parsedText = fst $ leftmostLongestParse parseLine text
@ -118,18 +126,22 @@ parseItalic = do
-- Parse a linebreak character -- Parse a linebreak character
parseLinebreak :: ReadP MdToken parseLinebreak :: ReadP MdToken
parseLinebreak = do parseLinebreak = do
traceM "Reached parseLinebreak"
char ' '
many1 (char ' ')
char '\n' char '\n'
return Linebreak return Linebreak
-- Parse a regular string as a Unit. -- Parse a regular string as a Unit.
parseString :: ReadP MdToken parseString :: ReadP MdToken
parseString = do parseString = do
traceM "Reached parseString"
firstChar <- get -- Must parse at least one character here firstChar <- get -- Must parse at least one character here
text <- munch (\x -> not (elem x "#*_[\n ")) text <- munch (\x -> not (elem x "#*_[\n "))
return (Unit (firstChar : text)) return (Unit (firstChar : text))
lineParsers :: [ReadP MdToken] lineParsers :: [ReadP MdToken]
lineParsers = [parseHeader, parseLinebreak, parseBold, parseItalic, parseString] -- A 'line' doesn't include a 'header' lineParsers = [parseLinebreak, parseBold, parseItalic, parseString] -- A 'line' doesn't include a 'header'
-- List of all parsers -- List of all parsers
allParsers :: [ReadP MdToken] allParsers :: [ReadP MdToken]
@ -142,6 +154,7 @@ parseLineToken = choice lineParsers
-- Parse a line, consisting of one or more tokens. -- Parse a line, consisting of one or more tokens.
parseLine :: ReadP MdToken parseLine :: ReadP MdToken
parseLine = do parseLine = do
traceM "Reached parseLine"
skipSpaces skipSpaces
-- Fail if we have reached the end of the document. -- Fail if we have reached the end of the document.
remaining <- look remaining <- look
@ -154,10 +167,14 @@ parseLine = do
-- As a weird special case, a 'Paragraph' can also be a 'Header'. -- As a weird special case, a 'Paragraph' can also be a 'Header'.
parsePara :: ReadP MdToken parsePara :: ReadP MdToken
parsePara = do parsePara = do
traceM "Reached parsePara"
parseMany (char '\n') parseMany (char '\n')
text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline. -- text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline.
string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF. -- string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF.
text <- (manyTill get ((string "\n\n") <|> (eof >> return "")))
when (null text) pfail
let parsedText = fst $ leftmostLongestParse (parseHeader <|> parseLine) text -- Parse either a line or a header. let parsedText = fst $ leftmostLongestParse (parseHeader <|> parseLine) text -- Parse either a line or a header.
traceM (show parsedText)
-- If the paragraph is a header, return a Header token. Otheriwse return a Para token. -- If the paragraph is a header, return a Header token. Otheriwse return a Para token.
case parsedText of case parsedText of
Header level token -> return (Header level token) Header level token -> return (Header level token)

@ -28,8 +28,9 @@ boldTests = TestList
integrationTests = TestList integrationTests = TestList
[ [
check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n ## __Second__ _Heading_") check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n ## __Second__ _Heading_"),
-- Add a test for single-newlines. check_equal "Integration 2" "<p><b>Hello</b> <i>World</i></p>" (convert "__Hello__\n_World_"),
check_equal "Integration 3" "<h1>Hello</h1><p>WorldM/p>" (convert "# Hello\nWorld")
] ]

Loading…
Cancel
Save