Implementation and test for horizontal rule

Fixed how trailing newlines are parsed
Rewrote readLines function to be more concise
4 changed files with 82 additions and 37 deletions
--- a/app/Main.hs
+++ b/app/Main.hs
@ -1,8 +1,27 @@
 module Main where

 import MdToHTML
+import System.Environment
+import System.IO
+
+readLinesHelper :: [String] -> IO [String]
+readLinesHelper xs = do
+  done <- isEOF
+  if done
+    then return xs
+    else do
+      line <- getLine
+      let xs' = line : xs
+      readLinesHelper xs'
+
+readLines :: IO [String]
+readLines = reverse <$> readLinesHelper []

 main :: IO ()
 main = do
-   let res = fst $ leftmostLongestParse parseDocument "# _Hello_\n"
-   putStrLn (show res)
+  args <- getArgs
+  fileContents <- case args of
+    [] -> getContents
+    x : _ -> readFile x
+  let res = fst $ leftmostLongestParse parseDocument fileContents
+  print res
--- a/md-to-html.cabal
+++ b/md-to-html.cabal
@ -67,10 +67,6 @@ executable md-to-html-runner
    -- .hs or .lhs file containing the Main module.
    main-is:          Main.hs

-    -- Modules included in this executable, other than Main.
-    other-modules:
-        MdToHTML
-        MdToHtmlTest

    -- LANGUAGE extensions used by modules in this package.
    -- other-extensions:
--- a/src/MdToHTML.hs
+++ b/src/MdToHTML.hs
@ -37,6 +37,7 @@ data MdToken
  | Codeblock String
  | Link MdToken URL
  | Image MdToken ImgPath
+  | Figure MdToken ImgPath
  | Bold MdToken
  | Italic MdToken
  | Strikethrough MdToken
@ -58,7 +59,8 @@ instance Show MdToken where
  show (Code code) = "<code>" ++ show code ++ "</code>"
  show (Codeblock code) = show code
  show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
-  show (Image txt imgPath) = "<img src=" ++ getPath imgPath ++ ">" ++ show txt ++ "</img>"
+  show (Image txt imgPath) = "<img src=\"" ++ getPath imgPath ++ "\"" ++ " alt=\"" ++ show txt ++ "\" />"
+  show (Figure txt imgPath) = "<figure><img src=\"" ++ getPath imgPath ++ "\" alt=\"" ++ show txt ++ "\"/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
  show (Bold token) = "<b>" ++ show token ++ "</b>"
  show (Italic token) = "<i>" ++ show token ++ "</i>"
  show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
@ -88,9 +90,9 @@ leftmostLongestParse parser input =
        Nothing -> (mempty, mempty)
        Just x -> x

-specialChars = "\\#*_[\n`"
+specialChars = "\n\\`*_{}[]()<>#+|"

-escapableChars = '~' : specialChars
+escapableChars = "-~!." ++ specialChars

 -- Makes a parser greedy. Instead of returning all possible parses, only the longest one is returned.
 greedyParse :: ReadP a -> ReadP [a]
@ -120,16 +122,6 @@ fallthroughParser :: [ReadP a] -> ReadP a
 fallthroughParser [x] = x
 fallthroughParser (x : xs) = x <++ fallthroughParser xs

-myMany :: (Monoid a) => ReadP a -> ReadP [a]
-myMany p = do
-  remaining <- look
-  case remaining of
-    [] -> return []
-    _ -> return [] +++ myMany1 p
-
-myMany1 :: (Monoid a) => ReadP a -> ReadP [a]
-myMany1 p = liftM2 (:) p (myMany p)
-
 -- ---------------

 -- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
@ -153,25 +145,19 @@ parseBold = parseBoldWith "**" <|> parseBoldWith "__"
  where
    parseBoldWith delim = do
      string delim
-      inside <- myMany1 parseLineToken
+      inside <- greedyParse1 parseLineToken
      string delim
      return (Bold (Line inside))

 -- Parse italic text
 parseItalic :: ReadP MdToken
-parseItalic = parseItalicWith '*' <|> parseItalicWith '_'
+parseItalic = parseItalicWith "*" <|> parseItalicWith "_"
  where
    parseItalicWith delim = do
-      exactlyOnce delim
-      inside <- myMany1 parseLineToken
-      exactlyOnce delim
+      string delim
+      inside <- greedyParse1 parseLineToken
+      string delim
      return (Italic (Line inside))
-    exactlyOnce ch = do
-      char ch
-      remaining <- look
-      case remaining of
-        [] -> return ch
-        x : xs -> if x == ch then pfail else return ch

 -- Parse strikethrough text
 parseStrikethrough :: ReadP MdToken
@ -208,7 +194,28 @@ parseLinebreak = do
 parseSingleNewline :: ReadP MdToken
 parseSingleNewline = do
  char '\n'
-  return SingleNewline
+  remaining <- look
+  case remaining of
+    [] -> return $ Unit ""
+    _ -> return SingleNewline
+
+parseImage :: ReadP MdToken
+parseImage = do
+  char '!'
+  char '['
+  altText <- many1 (parseEscapedChar <++ parseUnit)
+  char ']'
+  char '('
+  path <- many1 get
+  char ')'
+  return $ Image (Line altText) (ImgPath path)
+
+parseFigure = do
+  img <- parseImage
+  void (string "\n\n") <++ eof
+  case img of
+    Image text path -> return $ Figure text path
+    _ -> return img

 -- Parse an escaped character
 parseEscapedChar :: ReadP MdToken
@ -220,8 +227,7 @@ parseEscapedChar = do
 -- Parse a character as a Unit.
 parseUnit :: ReadP MdToken
 parseUnit = do
-  -- text <- satisfy (`notElem` specialChars)
-  text <- get
+  text <- satisfy (`notElem` specialChars)
  return (Unit [text])

 lineParsers :: [ReadP MdToken]
@ -230,6 +236,7 @@ lineParsers =
    parseSingleNewline,
    parseEscapedChar,
    parseCode,
+    parseImage,
    parseBold,
    parseItalic,
    parseStrikethrough,
@ -242,6 +249,7 @@ listLineParsers =
  [ parseLinebreak,
    parseEscapedChar,
    parseCode,
+    parseImage,
    parseBold,
    parseItalic,
    parseStrikethrough,
@ -266,7 +274,7 @@ parseLine :: ReadP MdToken
 parseLine = do
  skipSpaces
  -- Fail if we have reached the end of the document.
-  parsed <- myMany1 parseLineToken
+  parsed <- manyTill parseLineToken eof
  return (Line parsed)

 -- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
@ -278,6 +286,7 @@ parsePara = do
  text <- manyTill get (string "\n\n" <|> (eof >> return ""))
  when (null text) pfail
  let parsedText = fst $ leftmostLongestParse parseLine text -- Parse a line
+  parseMany (char '\n')
  return (Para parsedText)

 -- Parse a line starting with '>', return the line except for the '>'.
@ -398,12 +407,17 @@ parseOrderedList = do
  void (char '\n') <++ eof
  return $ OrdList (firstLine : lineItems)

+parseHorizontalRule :: ReadP MdToken
+parseHorizontalRule = string "---" *> (void (string "\n\n") <++ eof) *> return HorizontalRule
+
 documentParsers :: [ReadP MdToken]
 documentParsers =
-  [ parseHeader,
+  [ parseHorizontalRule,
+    parseHeader,
    parseBlockquote,
    parseUnorderedList,
    parseOrderedList,
+    parseFigure,
    parsePara
  ]

--- a/src/MdToHtmlTest.hs
+++ b/src/MdToHtmlTest.hs
@ -22,7 +22,6 @@ headerTests =
 boldTests =
  TestList
    [ check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
-      check_equal " Should not convert incomplete bold" "<p>**Hello</p>" (convert "**Hello"),
      check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
      check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day"),
      check_equal "Should convert nested bold and italic" "<p><b>Bold then <i>Italic</i></b></p>" (convert "**Bold then *Italic***"),
@ -98,9 +97,23 @@ codeTests =
  TestList
    [ check_equal "Code by itself" "<p><code>Hello world!</code></p>" (convert "`Hello world!`"),
      check_equal "Code in a paragraph" "<p>The following <code>text</code> is code</p>" (convert "The following `text` is code"),
-      check_equal "Code across paragraphs (shouldn't work" "<p>`Incomplete</p><p>Code`</p>" (convert "`Incomplete\n\nCode`")
+      check_equal "Code across paragraphs (shouldn't work" "<p></p><p></p>" (convert "`Incomplete\n\nCode`") -- At the moment, this is just treated as a syntax error, so nothing is rendered.
    ]

+imageTests =
+  TestList
+    [ check_equal "Image with text" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\" /></p>" (convert "This is an image ![Image 1](img.png)")
+    ]
+
+figureTests =
+  TestList
+    [ check_equal "Image by itself" "<figure><img src=\"img.png\" alt=\"Image 1\"/><figcaption aria-hidden=\"true\">Image 1</figcaption></figure>" (convert "![Image 1](img.png)")
+    ]
+
+horizontalRuleTests =
+  TestList
+    [check_equal "Horizontal Rule" "<p>a</p><hr><p>b</p>" (convert "a\n\n---\n\nb")]
+
 integrationTests =
  TestList
    [ check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n  ## __Second__ _Heading_"),
@ -135,7 +148,10 @@ tests =
      blockquoteTests,
      unorderedListTests,
      orderedListTests,
+      imageTests,
+      figureTests,
      codeTests,
+      horizontalRuleTests,
      integrationTests
    ]
Author	SHA1	Message	Date
Aadhavan Srinivasan	1d9ac86a2a	Implementation and test for horizontal rule	1 week ago
Aadhavan Srinivasan	0320402957	Fixed how trailing newlines are parsed	1 week ago
Aadhavan Srinivasan	8696a185a7	Rewrote readLines function to be more concise	1 week ago
Aadhavan Srinivasan	da38ac226f	Added figure implementation and tests	1 week ago
Aadhavan Srinivasan	1fcce32ef6	Updated to read from stdin/file	2 weeks ago
Aadhavan Srinivasan	e50081614a	Removed section from cabal file	2 weeks ago
Aadhavan Srinivasan	b98a8cc44f	Added image tests	2 weeks ago
Aadhavan Srinivasan	90c7a585d2	Added image parser	2 weeks ago
Aadhavan Srinivasan	4a15330874	Updated showing of image	2 weeks ago
Aadhavan Srinivasan	c14112d3e4	Updated special and escapable characters	2 weeks ago