Compare commits

..

10 Commits

@ -1,8 +1,27 @@
module Main where
import MdToHTML
import System.Environment
import System.IO
readLinesHelper :: [String] -> IO [String]
readLinesHelper xs = do
done <- isEOF
if done
then return xs
else do
line <- getLine
let xs' = line : xs
readLinesHelper xs'
readLines :: IO [String]
readLines = reverse <$> readLinesHelper []
main :: IO ()
main = do
let res = fst $ leftmostLongestParse parseDocument "# _Hello_\n"
putStrLn (show res)
args <- getArgs
fileContents <- case args of
[] -> getContents
x : _ -> readFile x
let res = fst $ leftmostLongestParse parseDocument fileContents
print res

@ -67,10 +67,6 @@ executable md-to-html-runner
-- .hs or .lhs file containing the Main module.
main-is: Main.hs
-- Modules included in this executable, other than Main.
other-modules:
MdToHTML
MdToHtmlTest
-- LANGUAGE extensions used by modules in this package.
-- other-extensions:

@ -37,6 +37,7 @@ data MdToken
| Codeblock String
| Link MdToken URL
| Image MdToken ImgPath
| Figure MdToken ImgPath
| Bold MdToken
| Italic MdToken
| Strikethrough MdToken
@ -58,7 +59,8 @@ instance Show MdToken where
show (Code code) = "<code>" ++ show code ++ "</code>"
show (Codeblock code) = show code
show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
show (Image txt imgPath) = "<img src=" ++ getPath imgPath ++ ">" ++ show txt ++ "</img>"
show (Image txt imgPath) = "<img src=\"" ++ getPath imgPath ++ "\"" ++ " alt=\"" ++ show txt ++ "\" />"
show (Figure txt imgPath) = "<figure><img src=\"" ++ getPath imgPath ++ "\" alt=\"" ++ show txt ++ "\"/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
show (Bold token) = "<b>" ++ show token ++ "</b>"
show (Italic token) = "<i>" ++ show token ++ "</i>"
show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
@ -88,9 +90,9 @@ leftmostLongestParse parser input =
Nothing -> (mempty, mempty)
Just x -> x
specialChars = "\\#*_[\n`"
specialChars = "\n\\`*_{}[]()<>#+|"
escapableChars = '~' : specialChars
escapableChars = "-~!." ++ specialChars
-- Makes a parser greedy. Instead of returning all possible parses, only the longest one is returned.
greedyParse :: ReadP a -> ReadP [a]
@ -120,16 +122,6 @@ fallthroughParser :: [ReadP a] -> ReadP a
fallthroughParser [x] = x
fallthroughParser (x : xs) = x <++ fallthroughParser xs
myMany :: (Monoid a) => ReadP a -> ReadP [a]
myMany p = do
remaining <- look
case remaining of
[] -> return []
_ -> return [] +++ myMany1 p
myMany1 :: (Monoid a) => ReadP a -> ReadP [a]
myMany1 p = liftM2 (:) p (myMany p)
-- ---------------
-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
@ -153,25 +145,19 @@ parseBold = parseBoldWith "**" <|> parseBoldWith "__"
where
parseBoldWith delim = do
string delim
inside <- myMany1 parseLineToken
inside <- greedyParse1 parseLineToken
string delim
return (Bold (Line inside))
-- Parse italic text
parseItalic :: ReadP MdToken
parseItalic = parseItalicWith '*' <|> parseItalicWith '_'
parseItalic = parseItalicWith "*" <|> parseItalicWith "_"
where
parseItalicWith delim = do
exactlyOnce delim
inside <- myMany1 parseLineToken
exactlyOnce delim
string delim
inside <- greedyParse1 parseLineToken
string delim
return (Italic (Line inside))
exactlyOnce ch = do
char ch
remaining <- look
case remaining of
[] -> return ch
x : xs -> if x == ch then pfail else return ch
-- Parse strikethrough text
parseStrikethrough :: ReadP MdToken
@ -208,7 +194,28 @@ parseLinebreak = do
parseSingleNewline :: ReadP MdToken
parseSingleNewline = do
char '\n'
return SingleNewline
remaining <- look
case remaining of
[] -> return $ Unit ""
_ -> return SingleNewline
parseImage :: ReadP MdToken
parseImage = do
char '!'
char '['
altText <- many1 (parseEscapedChar <++ parseUnit)
char ']'
char '('
path <- many1 get
char ')'
return $ Image (Line altText) (ImgPath path)
parseFigure = do
img <- parseImage
void (string "\n\n") <++ eof
case img of
Image text path -> return $ Figure text path
_ -> return img
-- Parse an escaped character
parseEscapedChar :: ReadP MdToken
@ -220,8 +227,7 @@ parseEscapedChar = do
-- Parse a character as a Unit.
parseUnit :: ReadP MdToken
parseUnit = do
-- text <- satisfy (`notElem` specialChars)
text <- get
text <- satisfy (`notElem` specialChars)
return (Unit [text])
lineParsers :: [ReadP MdToken]
@ -230,6 +236,7 @@ lineParsers =
parseSingleNewline,
parseEscapedChar,
parseCode,
parseImage,
parseBold,
parseItalic,
parseStrikethrough,
@ -242,6 +249,7 @@ listLineParsers =
[ parseLinebreak,
parseEscapedChar,
parseCode,
parseImage,
parseBold,
parseItalic,
parseStrikethrough,
@ -266,7 +274,7 @@ parseLine :: ReadP MdToken
parseLine = do
skipSpaces
-- Fail if we have reached the end of the document.
parsed <- myMany1 parseLineToken
parsed <- manyTill parseLineToken eof
return (Line parsed)
-- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
@ -278,6 +286,7 @@ parsePara = do
text <- manyTill get (string "\n\n" <|> (eof >> return ""))
when (null text) pfail
let parsedText = fst $ leftmostLongestParse parseLine text -- Parse a line
parseMany (char '\n')
return (Para parsedText)
-- Parse a line starting with '>', return the line except for the '>'.
@ -398,12 +407,17 @@ parseOrderedList = do
void (char '\n') <++ eof
return $ OrdList (firstLine : lineItems)
parseHorizontalRule :: ReadP MdToken
parseHorizontalRule = string "---" *> (void (string "\n\n") <++ eof) *> return HorizontalRule
documentParsers :: [ReadP MdToken]
documentParsers =
[ parseHeader,
[ parseHorizontalRule,
parseHeader,
parseBlockquote,
parseUnorderedList,
parseOrderedList,
parseFigure,
parsePara
]

@ -22,7 +22,6 @@ headerTests =
boldTests =
TestList
[ check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
check_equal " Should not convert incomplete bold" "<p>**Hello</p>" (convert "**Hello"),
check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day"),
check_equal "Should convert nested bold and italic" "<p><b>Bold then <i>Italic</i></b></p>" (convert "**Bold then *Italic***"),
@ -98,9 +97,23 @@ codeTests =
TestList
[ check_equal "Code by itself" "<p><code>Hello world!</code></p>" (convert "`Hello world!`"),
check_equal "Code in a paragraph" "<p>The following <code>text</code> is code</p>" (convert "The following `text` is code"),
check_equal "Code across paragraphs (shouldn't work" "<p>`Incomplete</p><p>Code`</p>" (convert "`Incomplete\n\nCode`")
check_equal "Code across paragraphs (shouldn't work" "<p></p><p></p>" (convert "`Incomplete\n\nCode`") -- At the moment, this is just treated as a syntax error, so nothing is rendered.
]
imageTests =
TestList
[ check_equal "Image with text" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\" /></p>" (convert "This is an image ![Image 1](img.png)")
]
figureTests =
TestList
[ check_equal "Image by itself" "<figure><img src=\"img.png\" alt=\"Image 1\"/><figcaption aria-hidden=\"true\">Image 1</figcaption></figure>" (convert "![Image 1](img.png)")
]
horizontalRuleTests =
TestList
[check_equal "Horizontal Rule" "<p>a</p><hr><p>b</p>" (convert "a\n\n---\n\nb")]
integrationTests =
TestList
[ check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n ## __Second__ _Heading_"),
@ -135,7 +148,10 @@ tests =
blockquoteTests,
unorderedListTests,
orderedListTests,
imageTests,
figureTests,
codeTests,
horizontalRuleTests,
integrationTests
]

Loading…
Cancel
Save