Implementation and test for horizontal rule

Fixed how trailing newlines are parsed
Rewrote readLines function to be more concise
2025-05-27 13:41:38 -04:00 · 2025-05-27 09:28:19 -04:00 · 2025-05-27 08:55:10 -04:00 · 2025-05-27 08:54:52 -04:00 · 2025-05-23 19:03:23 -04:00 · 2025-05-23 19:03:14 -04:00
6 changed files with 505 additions and 135 deletions
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -1,8 +1,27 @@
 module Main where

 import MdToHTML
+import System.Environment
+import System.IO
+
+readLinesHelper :: [String] -> IO [String]
+readLinesHelper xs = do
+  done <- isEOF
+  if done
+    then return xs
+    else do
+      line <- getLine
+      let xs' = line : xs
+      readLinesHelper xs'
+
+readLines :: IO [String]
+readLines = reverse <$> readLinesHelper []

 main :: IO ()
 main = do
-   let res = fst $ leftmostLongestParse parseDocument "# _Hello_\n"
-   putStrLn (show res)
+  args <- getArgs
+  fileContents <- case args of
+    [] -> getContents
+    x : _ -> readFile x
+  let res = fst $ leftmostLongestParse parseDocument fileContents
+  print res
--- a/md-to-html.cabal
+++ b/md-to-html.cabal
@@ -56,6 +56,7 @@ common warnings
 library
   hs-source-dirs:   src
   exposed-modules: MdToHTML
+   other-modules: MdToHtmlTest
   build-depends:    base ^>=4.19.1.0,
                      HUnit

@@ -66,8 +67,6 @@ executable md-to-html-runner
    -- .hs or .lhs file containing the Main module.
    main-is:          Main.hs

-    -- Modules included in this executable, other than Main.
-    -- other-modules:

    -- LANGUAGE extensions used by modules in this package.
    -- other-extensions:
--- a/src/MdToHTML.hs
+++ b/src/MdToHTML.hs
@@ -1,17 +1,23 @@
+{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
+
+{-# HLINT ignore "Use lambda-case" #-}
+
 module MdToHTML where

 import Control.Applicative
 import Control.Monad
+import Data.Char
 import Data.List
+import Data.Ord (comparing)
 import Debug.Trace
 import Text.ParserCombinators.ReadP
 import Text.Printf

 type HeaderLevel = Int

-newtype URL = URL {getUrl :: String}
+newtype URL = URL {getUrl :: String} deriving (Eq)

-newtype ImgPath = ImgPath {getPath :: String}
+newtype ImgPath = ImgPath {getPath :: String} deriving (Eq)

 parseMany :: ReadP a -> ReadP [a]
 parseMany = Text.ParserCombinators.ReadP.many
@@ -21,165 +27,402 @@ data MdToken
  | Header HeaderLevel MdToken
  | Para MdToken
  | Line [MdToken]
+  | SingleNewline -- A single newline is rendered as a space.
  | Linebreak
  | HorizontalRule
-  | Blockquote MdToken
+  | Blockquote [MdToken]
  | UnordList [MdToken]
  | OrdList [MdToken]
-  | Code String
+  | Code MdToken
  | Codeblock String
  | Link MdToken URL
  | Image MdToken ImgPath
+  | Figure MdToken ImgPath
  | Bold MdToken
  | Italic MdToken
  | Strikethrough MdToken
  | Unit String
+  deriving (Eq)

 -- Deriving Show for MdToken
 instance Show MdToken where
-  show (Document tokens) = concat (map show tokens)
+  show (Document tokens) = concatMap show tokens
  show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">"
  show (Para token) = "<p>" ++ show token ++ "</p>"
-  show (Line tokens) = concat (map show tokens)
+  show (Line tokens) = concatMap show tokens
  show Linebreak = "<br>"
-  show HorizontalRule = "---------"
-  show (Blockquote token) = "BLOCK" ++ show token
-  show (UnordList tokens) = "UNORD" ++ concat (map show tokens)
-  show (OrdList tokens) = "ORD" ++ concat (map show tokens)
-  show (Code code) = show code
+  show SingleNewline = " "
+  show HorizontalRule = "<hr>"
+  show (Blockquote tokens) = "<blockquote>" ++ concatMap show tokens ++ "</blockquote>"
+  show (UnordList tokens) = "<ul>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ul>"
+  show (OrdList tokens) = "<ol>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ol>"
+  show (Code code) = "<code>" ++ show code ++ "</code>"
  show (Codeblock code) = show code
-  show (Link txt url) = "<a href=" ++ (getUrl url) ++ ">" ++ show txt ++ "</a>"
-  show (Image txt imgPath) = "<img src=" ++ (getPath imgPath) ++ ">" ++ show txt ++ "</img>"
+  show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
+  show (Image txt imgPath) = "<img src=\"" ++ getPath imgPath ++ "\"" ++ " alt=\"" ++ show txt ++ "\" />"
+  show (Figure txt imgPath) = "<figure><img src=\"" ++ getPath imgPath ++ "\" alt=\"" ++ show txt ++ "\"/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
  show (Bold token) = "<b>" ++ show token ++ "</b>"
  show (Italic token) = "<i>" ++ show token ++ "</i>"
  show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
  show (Unit unit) = printf "%s" unit

+instance Semigroup MdToken where
+  a <> b = Document [a, b]
+
+instance Monoid MdToken where
+  mempty = Unit ""
+
 -- ---------------
 -- Helpers
-mustBeHash :: ReadP Char
-mustBeHash = satisfy (\x -> x == '#')
-
-leftmostLongest :: (Foldable t) => [(a, t b)] -> (a, t b)
+leftmostLongest :: (Foldable t) => [(a, t b)] -> Maybe (a, t b)
 leftmostLongest xs =
-  let lastElem = (last xs)
-      filteredLst = (filter (\val -> (length $ snd val) == (length $ snd lastElem)) xs)
-   in head filteredLst
+  let lastElem = last xs
+      filteredLst = filter (\val -> length (snd val) == length (snd lastElem)) xs
+   in case filteredLst of
+        [] -> Nothing
+        (x : xs) -> Just x

 -- Get the first parse returned by readP_to_S that consumed the most input
-leftmostLongestParse :: ReadP a -> String -> (a, String)
-leftmostLongestParse parser input = leftmostLongest $ readP_to_S parser input
+leftmostLongestParse :: (Monoid a) => ReadP a -> String -> (a, String)
+leftmostLongestParse parser input =
+  let res = leftmostLongest $ readP_to_S parser input
+   in case res of
+        Nothing -> (mempty, mempty)
+        Just x -> x

-- Parse if the string that's left matches the string comparator function
-lookaheadParse :: (String -> Bool) -> ReadP Char
-lookaheadParse stringCmp = do
-  lookahead <- look
-  case stringCmp lookahead of
-    True -> get
-    False -> pfail
+specialChars = "\n\\`*_{}[]()<>#+|"

-lineToList :: MdToken -> [MdToken]
-lineToList (Line tokens) = tokens
+escapableChars = "-~!." ++ specialChars
+
+-- Makes a parser greedy. Instead of returning all possible parses, only the longest one is returned.
+greedyParse :: ReadP a -> ReadP [a]
+greedyParse parser = do
+  greedyParse1 parser <++ return []
+
+-- Like greedyParse, but the parser must succeed atleast once.
+greedyParse1 :: ReadP a -> ReadP [a]
+greedyParse1 parser = do
+  parsed1 <- parser
+  parsed2 <- greedyParse1 parser <++ return []
+  return (parsed1 : parsed2)
+
+prepend :: [a] -> [a] -> [a]
+prepend x1 x2 = x1 ++ x2
+
+append :: [a] -> [a] -> [a]
+append x1 x2 = x2 ++ x1
+
+-- Parse until EOL or EOF
+parseTillEol :: ReadP String
+parseTillEol = manyTill get (void (char '\n') <++ eof)
+
+-- Takes a list of parsers. Returns a parser that will try them in
+-- order, moving to the next one only if the current one fails.
+fallthroughParser :: [ReadP a] -> ReadP a
+fallthroughParser [x] = x
+fallthroughParser (x : xs) = x <++ fallthroughParser xs

 -- ---------------

 -- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
 parseHeader :: ReadP MdToken
 parseHeader = do
-  traceM "Reached parseHeader"
  skipSpaces
-  headers <- many1 mustBeHash
+  headers <- munch1 (== '#')
  when
-    ((length headers) > 6)
+    (length headers > 6)
    pfail
-  _ <- string " "
-  text <- munch1 (\x -> x /= '\n') -- Parse until EOL
+  skipSpaces
+  text <- munch1 (/= '\n')
+  --  Text.ParserCombinators.ReadP.optional (char '\n')
+  skipSpaces
  let parsedText = fst $ leftmostLongestParse parseLine text
-  traceM (show parsedText)
-  traceM (show (length headers))
  return (Header (length headers) parsedText)

 -- Parse bold text
 parseBold :: ReadP MdToken
-parseBold = do
-  traceM "Reached parseBold"
-  text <-
-    choice
-      [ between (string "__") (string "__") (many1 (lookaheadParse (/= "__"))),
-        between (string "**") (string "**") (many1 (lookaheadParse (/= "**")))
-      ]
-  let parsedText = fst $ leftmostLongestParse parseLine text
-  return (Bold parsedText)
+parseBold = parseBoldWith "**" <|> parseBoldWith "__"
+  where
+    parseBoldWith delim = do
+      string delim
+      inside <- greedyParse1 parseLineToken
+      string delim
+      return (Bold (Line inside))

 -- Parse italic text
 parseItalic :: ReadP MdToken
-parseItalic = do
-  traceM "Reached parseItalic"
-  text <-
-    choice
-      [ (between (string "_") (string "_") (munch1 (/= '_'))),
-        (between (string "*") (string "*") (munch1 (/= '*')))
-      ]
-  let parsedText = fst $ leftmostLongestParse parseLine text
-  return (Italic parsedText)
+parseItalic = parseItalicWith "*" <|> parseItalicWith "_"
+  where
+    parseItalicWith delim = do
+      string delim
+      inside <- greedyParse1 parseLineToken
+      string delim
+      return (Italic (Line inside))
+
+-- Parse strikethrough text
+parseStrikethrough :: ReadP MdToken
+parseStrikethrough = do
+  string "~~"
+  inside <- many1 parseLineToken
+  string "~~"
+  return (Strikethrough (Line inside))
+
+-- Parse code
+parseCode :: ReadP MdToken
+parseCode = do
+  string "`"
+  inside <- many1 get
+  string "`"
+  return (Code (Unit inside))
+
+-- Parse a link
+parseLink :: ReadP MdToken
+parseLink = do
+  linkText <- between (string "[") (string "]") (many1 get)
+  linkURL <- between (string "(") (string ")") (many1 get)
+  let parsedLinkText = fst $ leftmostLongestParse parseLine linkText
+  return $ Link parsedLinkText (URL linkURL)

 -- Parse a linebreak character
 parseLinebreak :: ReadP MdToken
 parseLinebreak = do
-  traceM "Reached parseLinebreak"
  char ' '
  many1 (char ' ')
  char '\n'
  return Linebreak

-- Parse a regular string as a Unit.
-parseString :: ReadP MdToken
-parseString = do
-  traceM "Reached parseString"
-  firstChar <- get -- Must parse at least one character here
-  text <- munch (\x -> not (elem x "#*_[\n "))
-  return (Unit (firstChar : text))
+parseSingleNewline :: ReadP MdToken
+parseSingleNewline = do
+  char '\n'
+  remaining <- look
+  case remaining of
+    [] -> return $ Unit ""
+    _ -> return SingleNewline
+
+parseImage :: ReadP MdToken
+parseImage = do
+  char '!'
+  char '['
+  altText <- many1 (parseEscapedChar <++ parseUnit)
+  char ']'
+  char '('
+  path <- many1 get
+  char ')'
+  return $ Image (Line altText) (ImgPath path)
+
+parseFigure = do
+  img <- parseImage
+  void (string "\n\n") <++ eof
+  case img of
+    Image text path -> return $ Figure text path
+    _ -> return img
+
+-- Parse an escaped character
+parseEscapedChar :: ReadP MdToken
+parseEscapedChar = do
+  char '\\'
+  escapedChar <- choice (map char escapableChars) -- Parse any of the special chars.
+  return (Unit [escapedChar])
+
+-- Parse a character as a Unit.
+parseUnit :: ReadP MdToken
+parseUnit = do
+  text <- satisfy (`notElem` specialChars)
+  return (Unit [text])

 lineParsers :: [ReadP MdToken]
-lineParsers = [parseLinebreak, parseBold, parseItalic, parseString] -- A 'line' doesn't include a 'header'
+lineParsers =
+  [ parseLinebreak,
+    parseSingleNewline,
+    parseEscapedChar,
+    parseCode,
+    parseImage,
+    parseBold,
+    parseItalic,
+    parseStrikethrough,
+    parseLink,
+    parseUnit
+  ] -- A 'line' doesn't include a 'header'
+
+listLineParsers :: [ReadP MdToken]
+listLineParsers =
+  [ parseLinebreak,
+    parseEscapedChar,
+    parseCode,
+    parseImage,
+    parseBold,
+    parseItalic,
+    parseStrikethrough,
+    parseLink,
+    parseUnit
+  ] -- A list line cannot contain newlines.

 -- List of all parsers
 allParsers :: [ReadP MdToken]
 allParsers = parseHeader : lineParsers

-- Parse any of the above tokens.
+-- Parse any of the line tokens.
 parseLineToken :: ReadP MdToken
-parseLineToken = choice lineParsers
+parseLineToken = fallthroughParser lineParsers
+
+-- Parse any of the list line tokens.
+parseListLineToken :: ReadP MdToken
+parseListLineToken = fallthroughParser listLineParsers

 -- Parse a line, consisting of one or more tokens.
 parseLine :: ReadP MdToken
 parseLine = do
-  traceM "Reached parseLine"
  skipSpaces
  -- Fail if we have reached the end of the document.
-  remaining <- look
-  when (null remaining) pfail
-  parsed <- parseMany parseLineToken
-  --         traceM $ show parsed
+  parsed <- manyTill parseLineToken eof
  return (Line parsed)

 -- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
-- As a weird special case, a 'Paragraph' can also be a 'Header'.
 parsePara :: ReadP MdToken
 parsePara = do
-  traceM "Reached parsePara"
  parseMany (char '\n')
  --  text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline.
  --  string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF.
-  text <- (manyTill get ((string "\n\n") <|> (eof >> return "")))
+  text <- manyTill get (string "\n\n" <|> (eof >> return ""))
  when (null text) pfail
-  let parsedText = fst $ leftmostLongestParse (parseHeader <|> parseLine) text -- Parse either a line or a header.
-  traceM (show parsedText)
-  -- If the paragraph is a header, return a Header token. Otheriwse return a Para token.
-  case parsedText of
-    Header level token -> return (Header level token)
-    _ -> return (Para parsedText)
+  let parsedText = fst $ leftmostLongestParse parseLine text -- Parse a line
+  parseMany (char '\n')
+  return (Para parsedText)
+
+-- Parse a line starting with '>', return the line except for the '>'.
+parseQuotedLine :: ReadP String
+parseQuotedLine = do
+  char '>'
+  greedyParse (char ' ' +++ char '\t')
+  restOfLine <- munch (/= '\n')
+  Text.ParserCombinators.ReadP.optional (char '\n') >> return ""
+  return restOfLine
+
+-- Parse many 'quoted lines' until I see a non-quoted line.
+parseQuotedLines :: ReadP [String]
+parseQuotedLines =
+  greedyParse1 $ do
+    look >>= \line ->
+      case line of
+        ('>' : _) -> parseQuotedLine
+        _ -> pfail
+
+-- Parse a blockquote, which is a greater-than sign followed by a paragraph.
+parseBlockquote :: ReadP MdToken
+parseBlockquote = do
+  quotedLines <- parseQuotedLines
+  -- remaining <- look
+  -- let quotedLines = fst $ leftmostLongestParse parseQuotedLines remaining
+  -- string (init $ unlines quotedLines)
+  let parsedQuotedLines = fst $ leftmostLongestParse (many1 (parseBlockquote <++ parsePara)) (init $ unlines quotedLines) -- unlines joins the lines together with a newline, and adds a trailing newline. init removes the trailing newline.
+  return (Blockquote parsedQuotedLines)
+
+-- Parse a nested list item.
+parseListNested :: ReadP MdToken
+parseListNested = do
+  let firstCharParser = string "    " <++ string "\t"
+  let restOfLineParser = manyTill get (void (char '\n') <++ eof)
+  lines <- greedyParse1 (firstCharParser *> restOfLineParser)
+  let linesParsed = fst $ leftmostLongestParse (parseUnorderedList <++ parseOrderedList) (init $ unlines lines)
+  when (null (show linesParsed)) pfail
+  return linesParsed
+
+-- Parse an unordered list line item.
+parseUListLineItem :: ReadP MdToken
+parseUListLineItem = do
+  firstChar <- choice (map char ['*', '+', '-'])
+  char ' ' -- At least one space between list indicator and list text.
+  parseListLineItemCommon
+
+-- Parse an ordered list line item.
+parseOListLineItem :: ReadP MdToken
+parseOListLineItem = do
+  num <- greedyParse1 (satisfy isDigit)
+  char '.'
+  char ' ' -- At least one space between list indicator and list text.
+  parseListLineItemCommon
+
+-- Common code for parsing list line items
+parseListLineItemCommon :: ReadP MdToken
+parseListLineItemCommon = do
+  skipSpaces
+  restOfLine <- many1 parseListLineToken
+  void (char '\n') <++ eof
+  nestedList <- parseListNested <++ return (Unit "")
+  return $ Line [Line restOfLine, nestedList]
+
+-- Parse an unordered list paragraph item.
+parseUListParaItem :: ReadP MdToken
+parseUListParaItem = do
+  firstLine <- parseUListLineItem
+  res <- parseListParaItemCommon
+  return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
+
+-- Parse an unordered list paragraph item.
+parseOListParaItem :: ReadP MdToken
+parseOListParaItem = do
+  firstLine <- parseOListLineItem
+  res <- parseListParaItemCommon
+  return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
+
+-- Common code for parsing list paragraph items.
+-- A list paragraph item is defined as a line item, followed by an empty line, followed by one or more
+-- lines indented by a space or tab.
+-- A list paragraph item can also be a blockquote.
+parseListParaItemCommon :: ReadP [MdToken]
+parseListParaItemCommon = do
+  char '\n'
+  lines <- greedyParse1 ((string "    " <|> string "\t") *> parseTillEol)
+  let res = fst $ leftmostLongestParse (greedyParse1 parseBlockquote <++ greedyParse1 parsePara) (init $ unlines lines)
+  char '\n'
+  return res -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
+
+-- Parse an unordered list item, which can be a line item or another list.
+parseUListItem :: ReadP MdToken
+parseUListItem = parseUListParaItem <++ parseUListLineItem
+
+-- Parse an unordered list.
+parseUnorderedList :: ReadP MdToken
+parseUnorderedList = do
+  lineItems <- greedyParse1 parseUListItem
+  void (char '\n') <++ eof -- A list must end in an extra newline or eof
+  return $ UnordList lineItems
+
+-- --------
+
+parseOListItem :: ReadP MdToken
+parseOListItem = parseOListParaItem <++ parseOListLineItem
+
+-- Parses the first element of an ordered list, which must start with '1.'
+parseFirstOListItem :: ReadP MdToken
+parseFirstOListItem = do
+  remaining <- look
+  when (take 2 remaining /= "1.") pfail
+  parseOListLineItem
+
+parseOrderedList :: ReadP MdToken
+parseOrderedList = do
+  firstLine <- parseFirstOListItem
+  lineItems <- greedyParse1 parseOListItem
+  void (char '\n') <++ eof
+  return $ OrdList (firstLine : lineItems)
+
+parseHorizontalRule :: ReadP MdToken
+parseHorizontalRule = string "---" *> (void (string "\n\n") <++ eof) *> return HorizontalRule
+
+documentParsers :: [ReadP MdToken]
+documentParsers =
+  [ parseHorizontalRule,
+    parseHeader,
+    parseBlockquote,
+    parseUnorderedList,
+    parseOrderedList,
+    parseFigure,
+    parsePara
+  ]

 -- Parse a document, which is multiple paragraphs.
 parseDocument :: ReadP MdToken
-parseDocument = (many1 parsePara) >>= (\res -> return (Document (res)))
+parseDocument = do
+  res <- manyTill (fallthroughParser documentParsers) eof
+  return (Document res)
--- a/src/MdToHtmlTest.hs
+++ b/src/MdToHtmlTest.hs
@@ -0,0 +1,158 @@
+module MdToHtmlTest where
+
+import MdToHTML
+import Test.HUnit
+
+check_equal :: String -> String -> String -> Test
+check_equal desc expected actual = TestCase (assertEqual desc expected actual)
+
+convert :: String -> String
+convert md = show . fst $ leftmostLongestParse parseDocument md
+
+headerTests =
+  TestList
+    [ check_equal "Should convert H1 heading" "<h1>Hello</h1>" (convert "# Hello"),
+      check_equal "Should convert H2 heading" "<h2>Hello</h2>" (convert "## Hello"),
+      check_equal "Should convert H3 heading" "<h3>Hello</h3>" (convert "### Hello"),
+      check_equal "Should convert H4 heading" "<h4>Hello</h4>" (convert "#### Hello"),
+      check_equal "Should convert H5 heading" "<h5>Hello</h5>" (convert "##### Hello"),
+      check_equal "Should convert H6 heading" "<h6>Hello</h6>" (convert "###### Hello")
+    ]
+
+boldTests =
+  TestList
+    [ check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
+      check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
+      check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day"),
+      check_equal "Should convert nested bold and italic" "<p><b>Bold then <i>Italic</i></b></p>" (convert "**Bold then *Italic***"),
+      check_equal "Should convert nested bold and italic" "<p><i>Italic then <b>Bold</b></i></p>" (convert "*Italic then **Bold***")
+    ]
+
+strikethroughTests =
+  TestList
+    [ check_equal "Should convert strikethrough" "<p><s>Hello</s></p>" (convert "~~Hello~~"),
+      check_equal "Should convert long sentence with tilde" "<p><s>The universe is ~7 days old</s>. The universe is 13 billion years old.</p>" (convert "~~The universe is ~7 days old~~. The universe is 13 billion years old.")
+    ]
+
+linkTests =
+  TestList
+    [ check_equal "Should convert normal link" "<p><a href=\"https://example.com\">This is an example link.</a></p>" (convert "[This is an example link.](https://example.com)"),
+      check_equal "Should convert styled link" "<p><a href=\"https://example.com\"><b>Fancy</b>!!!</a></p>" (convert "[__Fancy__!!!](https://example.com)")
+    ]
+
+escapedCharTests =
+  TestList
+    [ check_equal "Should print literal underscore" "<p>This is an underscore - _</p>" (convert "This is an underscore - \\_"),
+      check_equal "Should print literal asterisk" "<p>This is an asterisk - *</p>" (convert "This is an asterisk - \\*"),
+      check_equal "Should print literal asterisk in bold" "<p>This is a bolded asterisk - <b>*</b></p>" (convert "This is a bolded asterisk - **\\***")
+    ]
+
+blockquoteTests =
+  TestList
+    [ check_equal "Should wrap para in blockquote" "<blockquote><p>What a <b>truly</b> <i>lovely</i> day!!!</p></blockquote>" (convert "> What a __truly__ _lovely_ day!!!"),
+      check_equal "Simple nested blockquotes" "<blockquote><p>Hello</p><blockquote><p>World</p></blockquote></blockquote>" (convert "> Hello\n>\n>> World"),
+      check_equal
+        "Nested blockquotes"
+        "<blockquote><p>Dorothy followed her through many \
+        \of the beautiful rooms in her castle.</p><blockquote><p>The Witch \
+        \bade her clean the pots and kettles and sweep the floor and keep the fire \
+        \fed with wood.</p></blockquote></blockquote>"
+        ( convert
+            "> Dorothy followed her through many of the \
+            \beautiful rooms in her castle.\n> \n>> The Witch bade her \
+            \clean the pots and kettles and sweep the floor and keep the fire fed with wood."
+        )
+    ]
+
+unorderedListTests =
+  TestList
+    [ check_equal "Basic unordered list" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>" (convert "* Item 1\n* Item 2\n* Item 3"),
+      check_equal "Mixing list indicators" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>" (convert "* Item 1\n+ Item 2\n- Item 3"),
+      check_equal "Formatted lists" "<ul><li><b>Item 1</b></li><li><i>Item 2</i></li><li><b><i>Item 3</i></b></li></ul>" (convert "* __Item 1__\n+ _Item 2_\n- ***Item 3***"),
+      check_equal "Nested list" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3<ul><li>Subitem 1</li><li>Subitem 2</li></ul></li></ul>" (convert "* Item 1\n* Item 2\n* Item 3\n    * Subitem 1\n    * Subitem 2"),
+      check_equal "Paragraph in list" "<ul><li>Item 1</li><li><p>Item 2</p><p>More stuff</p></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n\n    More stuff\n\n- Item 3"),
+      check_equal "Paragraph before list" "<p>This is a list</p><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "This is a list\n\n* Item 1\n* Item 2"),
+      check_equal "Paragraph before list" "<h3>This is a list</h3><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "### This is a list\n\n* Item 1\n* Item 2"),
+      check_equal "Nested list then back" "<ul><li>Item 1</li><li>Item 2<ul><li>Item 3</li><li>Item 4</li></ul></li><li>Item 5</li></ul>" (convert "- Item 1\n- Item 2\n    - Item 3\n    - Item 4\n- Item 5"),
+      check_equal "Blockquote in list" "<ul><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n\n    > Quote\n\n- Item 3"),
+      check_equal "Ordered list in unordered list" "<ul><li>Item 1</li><li>Item 2<ol><li>Item 1</li><li>Item 2</li></ol></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n    1. Item 1\n    2. Item 2\n- Item 3")
+    ]
+
+orderedListTests =
+  TestList
+    [ check_equal "Basic ordered list" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n3. Item 3"),
+      check_equal "Mixing list numbering" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3</li></ol>" (convert "1. Item 1\n3. Item 2\n2. Item 3"),
+      check_equal "Should not convert list without number 1" "<p>2. Item 1 1. Item 2</p>" (convert "2. Item 1\n1. Item 2"),
+      check_equal "Formatted lists" "<ol><li><b>Item 1</b></li><li><i>Item 2</i></li><li><b><i>Item 3</i></b></li></ol>" (convert "1. __Item 1__\n2. _Item 2_\n3. ***Item 3***"),
+      check_equal "Nested list" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3<ol><li>Subitem 1</li><li>Subitem 2</li></ol></li></ol>" (convert "1. Item 1\n2. Item 2\n3. Item 3\n    1. Subitem 1\n    2. Subitem 2"),
+      check_equal "Paragraph in list" "<ol><li>Item 1</li><li><p>Item 2</p><p>More stuff</p></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n    More stuff\n\n1. Item 3"),
+      check_equal "Paragraph before list" "<p>This is a list</p><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "This is a list\n\n1. Item 1\n1. Item 2"),
+      check_equal "Paragraph before list" "<h3>This is a list</h3><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "### This is a list\n\n1. Item 1\n200. Item 2"),
+      check_equal "Nested list then back" "<ol><li>Item 1</li><li>Item 2<ol><li>Item 3</li><li>Item 4</li></ol></li><li>Item 5</li></ol>" (convert "1. Item 1\n2. Item 2\n    1. Item 3\n    3. Item 4\n5. Item 5"),
+      check_equal "Blockquote in list" "<ol><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n    > Quote\n\n3. Item 3"),
+      check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n    - Item 1\n    * Item 2\n4. Item 3")
+    ]
+
+codeTests =
+  TestList
+    [ check_equal "Code by itself" "<p><code>Hello world!</code></p>" (convert "`Hello world!`"),
+      check_equal "Code in a paragraph" "<p>The following <code>text</code> is code</p>" (convert "The following `text` is code"),
+      check_equal "Code across paragraphs (shouldn't work" "<p></p><p></p>" (convert "`Incomplete\n\nCode`") -- At the moment, this is just treated as a syntax error, so nothing is rendered.
+    ]
+
+imageTests =
+  TestList
+    [ check_equal "Image with text" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\" /></p>" (convert "This is an image ![Image 1](img.png)")
+    ]
+
+figureTests =
+  TestList
+    [ check_equal "Image by itself" "<figure><img src=\"img.png\" alt=\"Image 1\"/><figcaption aria-hidden=\"true\">Image 1</figcaption></figure>" (convert "![Image 1](img.png)")
+    ]
+
+horizontalRuleTests =
+  TestList
+    [check_equal "Horizontal Rule" "<p>a</p><hr><p>b</p>" (convert "a\n\n---\n\nb")]
+
+integrationTests =
+  TestList
+    [ check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n  ## __Second__ _Heading_"),
+      check_equal "Integration 2" "<p><b>Hello</b> <i>World</i></p>" (convert "__Hello__\n_World_"),
+      check_equal "Integration 3" "<h1>Hello</h1><p>World</p>" (convert "# Hello\nWorld"),
+      check_equal "Integration 4" "<p>a b</p>" (convert "a\nb"),
+      check_equal "Integration 5" "<h1>Hello</h1>" (convert "# Hello\n"),
+      check_equal "Integration 6" "<p>First line<br>Second line</p>" (convert "First line  \nSecond line"),
+      check_equal
+        "Integration 7"
+        "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2>Second \
+        \Heading</h2><ul><li>Unordered lists, and:<ol><li>One</li><li>Two</li><li>\
+        \Three</li></ol></li><li>More</li></ul><blockquote><p>Blockquote</p>\
+        \</blockquote><p>And <b>bold</b>, <i>italics</i>, and even <i>italics \
+        \and later <b>bold</b></i>. Even <s>strikethrough</s>. \
+        \<a href=\"https://markdowntohtml.com\">A link</a> to somewhere.</p>"
+        ( convert
+            "# Sample Markdown\n\nThis is some basic, sample markdown.\n\n## Second \
+            \Heading\n\n- Unordered lists, and:\n    1. One\n    2. Two\n    3. Three\n\
+            \- More\n\n> Blockquote\n\nAnd **bold**, *italics*, and even *italics and \
+            \later **bold***. Even ~~strikethrough~~. [A link](https://markdowntohtml.com) to somewhere."
+        )
+    ]
+
+tests =
+  TestList
+    [ headerTests,
+      boldTests,
+      strikethroughTests,
+      linkTests,
+      escapedCharTests,
+      blockquoteTests,
+      unorderedListTests,
+      orderedListTests,
+      imageTests,
+      figureTests,
+      codeTests,
+      horizontalRuleTests,
+      integrationTests
+    ]
+
+runTests = runTestTT tests
--- a/src/Test.hs
+++ b/src/Test.hs
@@ -1,44 +0,0 @@
-module MdToHtmlTest where
-
-import MdToHTML
-import Test.HUnit
-
-check_equal :: String -> String -> String -> Test
-check_equal desc expected actual = TestCase (assertEqual desc expected actual)
-
-convert :: String -> String
-convert md = show . fst $ leftmostLongestParse parseDocument md
-
-headerTests = TestList
-      [
-         check_equal "Should convert H1 heading" "<h1>Hello</h1>" (convert "# Hello"),
-         check_equal "Should convert H2 heading" "<h2>Hello</h2>" (convert "## Hello"),
-         check_equal "Should convert H3 heading" "<h3>Hello</h3>" (convert "### Hello"),
-         check_equal "Should convert H4 heading" "<h4>Hello</h4>" (convert "#### Hello"),
-         check_equal "Should convert H5 heading" "<h5>Hello</h5>" (convert "##### Hello"),
-         check_equal "Should convert H6 heading" "<h6>Hello</h6>" (convert "###### Hello")
-      ]
-
-boldTests = TestList
-   [
-         check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
-         check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
-         check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day")
-   ]
-
-integrationTests = TestList
-    [
-        check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n  ## __Second__ _Heading_"),
-        check_equal "Integration 2" "<p><b>Hello</b> <i>World</i></p>" (convert "__Hello__\n_World_"),
-        check_equal "Integration 3" "<h1>Hello</h1><p>WorldM/p>" (convert "# Hello\nWorld")
-    ]
-
-
-tests = TestList
-   [
-      headerTests,
-      boldTests,
-      integrationTests
-   ]
-
-runTests = runTestTT tests
--- a/test.hs
+++ b/test.hs
@@ -1,5 +0,0 @@
-leftmostLongest :: (Foldable t) => [t a] -> t a
-leftmostLongest xs = 
-        let lastElem = (last xs)
-            filteredLst = (filter (\val -> (length val) == (length lastElem)) xs)
-        in head filteredLst
Author	SHA1	Message	Date
Aadhavan Srinivasan	1d9ac86a2a	Implementation and test for horizontal rule	2025-05-27 13:41:38 -04:00
Aadhavan Srinivasan	0320402957	Fixed how trailing newlines are parsed	2025-05-27 09:28:19 -04:00
Aadhavan Srinivasan	8696a185a7	Rewrote readLines function to be more concise	2025-05-27 08:55:10 -04:00
Aadhavan Srinivasan	da38ac226f	Added figure implementation and tests	2025-05-27 08:54:52 -04:00
Aadhavan Srinivasan	1fcce32ef6	Updated to read from stdin/file	2025-05-23 19:03:23 -04:00
Aadhavan Srinivasan	e50081614a	Removed section from cabal file	2025-05-23 19:03:14 -04:00
Aadhavan Srinivasan	b98a8cc44f	Added image tests	2025-05-23 14:55:58 -04:00
Aadhavan Srinivasan	90c7a585d2	Added image parser	2025-05-23 14:04:37 -04:00
Aadhavan Srinivasan	4a15330874	Updated showing of image	2025-05-23 14:04:21 -04:00
Aadhavan Srinivasan	c14112d3e4	Updated special and escapable characters	2025-05-23 14:04:05 -04:00
Aadhavan Srinivasan	ed7d2c1ef1	Added code parser to list line parser list	2025-05-20 22:23:41 -04:00
Aadhavan Srinivasan	eb20f154a4	Removed unused function	2025-05-20 22:20:36 -04:00
Aadhavan Srinivasan	172985131b	Fixed HTML rendering for horizontal rule	2025-05-20 22:20:23 -04:00
Aadhavan Srinivasan	3781e67ab1	Created a separate list of escapable chars	2025-05-20 22:20:05 -04:00
Aadhavan Srinivasan	f2d54edd3f	Implemented inline code parsing	2025-05-20 22:19:44 -04:00
Aadhavan Srinivasan	5393dc4eb9	Added code tests	2025-05-20 22:18:46 -04:00
Aadhavan Srinivasan	e051c87f08	Factor list line common parsing into a separate function; refactored OList and UList line parsing to us it	2025-05-20 16:48:31 -04:00
Aadhavan Srinivasan	9b1c51897c	A nested list can be ordered or unordered	2025-05-20 16:47:58 -04:00
Aadhavan Srinivasan	2a3dddc7b0	Rename function	2025-05-20 16:47:31 -04:00
Aadhavan Srinivasan	a8793b5adb	Remove obsolete comments	2025-05-20 16:47:16 -04:00
Aadhavan Srinivasan	eecec764ad	A list can contain a blockquote as well.	2025-05-20 16:47:04 -04:00
Aadhavan Srinivasan	cdca6ea95e	A list item doesn't need to have a nested list item parser, because the line item parser handles the nested list.	2025-05-20 16:46:52 -04:00
Aadhavan Srinivasan	540b5430e5	Use list of document parsers in parseDocument	2025-05-20 16:45:56 -04:00
Aadhavan Srinivasan	00dfba81eb	Created a list of document parsers; implemented ordered list parsing.	2025-05-20 16:45:37 -04:00
Aadhavan Srinivasan	39152c0034	Factor out common code for UList and OList parsing into a separate function. Refactored UList function; wrote OList function.	2025-05-20 16:45:07 -04:00
Aadhavan Srinivasan	41b35be7c9	Rename function	2025-05-20 16:43:35 -04:00
Aadhavan Srinivasan	d2c8565f62	Import Data.Char for isDigit	2025-05-20 16:43:22 -04:00
Aadhavan Srinivasan	62eeef2abb	Removed unused functions	2025-05-20 16:43:10 -04:00
Aadhavan Srinivasan	9c6634cfec	Added ordered list tests	2025-05-20 16:42:40 -04:00
Aadhavan Srinivasan	2a5a68b1de	Fixed test name	2025-05-20 16:42:12 -04:00
Aadhavan Srinivasan	f8e1a98bdf	Remove obsolete comment	2025-05-20 14:03:30 -04:00
Aadhavan Srinivasan	05433c31f1	Remove unused functions	2025-05-20 14:02:50 -04:00
Rockingcool	5c871f2b25	Removed test file	2025-05-20 12:25:07 -05:00
Rockingcool	5273c99e6e	Added unordered list tests and integration tests; added strikethrough tests to test list	2025-05-20 12:24:20 -05:00
Rockingcool	50888c9c3d	Added bold and strikethrough tests	2025-05-20 12:23:47 -05:00
Rockingcool	45115c765c	An unordered list must end in a blank line.	2025-05-20 12:23:27 -05:00
Rockingcool	5b0d42fd2d	Use the in-order parsing approach instead of the post-order one.	2025-05-20 12:23:13 -05:00
Rockingcool	2a585d00f2	Enforce at least one space between list indicator and list text.	2025-05-20 12:22:37 -05:00
Rockingcool	11a3b14cb1	Define a parser for list line tokens, update line token parser	2025-05-20 12:22:19 -05:00
Rockingcool	58d3142855	Update comment	2025-05-20 12:21:45 -05:00
Rockingcool	0fb651fffc	Add parseUnit instead of parseString to lineParsers	2025-05-20 12:20:51 -05:00
Rockingcool	bc05dede06	Create a list of parsers that are used for list line items.	2025-05-20 12:20:35 -05:00
Rockingcool	b69e34f823	Parse a single character as a unit	2025-05-20 12:20:10 -05:00
Rockingcool	2514ecdafc	Parse bold, italic and strikethrough in-order, instead of trying to find the end, then parsing everything in the middle. The current approach parses the opening bold (or italic), some text, then the closing bold (or italic), instead of parsing the opening, closing, then everything in between.	2025-05-20 12:19:59 -05:00
Rockingcool	c52d5556a2	Allow for multiple blank lines after header	2025-05-20 12:18:52 -05:00
Rockingcool	5fc1b1122a	Create a function to 'fallthrough parse' ie. try the second parser only if the first one fails.	2025-05-20 12:18:23 -05:00
Rockingcool	83dd0024c4	Space doesn't have to be a reserved character anymore.	2025-05-20 12:17:59 -05:00
Rockingcool	70761649ad	Derive Eq for defined types	2025-05-20 12:17:43 -05:00
Rockingcool	b9c6cc4470	Implemented strikethrough parser	2025-05-16 19:29:39 -05:00
Rockingcool	23691f9cfe	Add strikethrough parser to line parser list	2025-05-16 19:28:41 -05:00
Rockingcool	8c220cc800	A document can consist of unordered lists as well	2025-05-14 21:40:03 -05:00
Rockingcool	ee453c0259	Fixed blockquote parser; implemented unordered list parser and relevant sub-parsers	2025-05-14 21:39:27 -05:00
Rockingcool	c90d23617a	A blockquote must have a list of tokens	2025-05-14 21:38:50 -05:00
Rockingcool	c574699a8a	Added an import	2025-05-14 21:38:38 -05:00
Rockingcool	f55e160e25	Added tests for unordered lists	2025-05-14 21:37:58 -05:00
Rockingcool	dddcca0185	Wrote a lot of helper functions - most importantly greedyParse	2025-05-13 21:40:42 -05:00
Rockingcool	e7d94f225a	Updated show definitions for some tokens; add HTML syntax	2025-05-09 23:14:09 -05:00
Rockingcool	e8eb22f3ae	Added pragmas to suppress LSP warnings; removed a random line that was added by the LSP	2025-05-09 23:13:31 -05:00
Rockingcool	ef1809970b	Greatly shortened parseQuotedLine, because I can just use greedyParse instead of using manyTill	2025-05-09 23:12:53 -05:00
Rockingcool	549504d650	Consume whitespace between greater-than sign and text in a blockquote line	2025-05-09 23:12:12 -05:00
Rockingcool	4f23592aeb	Add relevant modules to other-modules section, to get rid of cabal's warnings	2025-05-09 23:10:34 -05:00
Rockingcool	b00d79b9aa	Renamed Test.hs to MdToHtmlTest.hs so that the filename matches the module name; updated blockquote tests; started working on ordered list tests	2025-05-09 23:10:04 -05:00
Rockingcool	3cd9f24935	Wrote helper functions for parseBlockquote, to parse a quoted line and multiple quoted lines.	2025-05-09 16:12:09 -05:00
Rockingcool	a60b3754e4	Rewrite leftmostLongest and leftmostLongestParse so that they don't rely on 'head' which is a partial function	2025-05-09 12:17:53 -05:00
Rockingcool	3330185393	Make MdToken a Semigroup and a Monoid, so that I can use 'mempty' for it.	2025-05-09 12:17:22 -05:00
Rockingcool	1df7f64aec	Started working on blockquote parser	2025-05-08 17:54:00 -05:00
Rockingcool	bfd627c763	Added blockquote tests	2025-05-08 17:53:37 -05:00
Rockingcool	81671727b2	Added more parsers for escaped characters and links.	2025-05-07 14:21:13 -05:00
Rockingcool	1b821c4315	Declare separate variable for escaped characters.	2025-05-07 14:20:51 -05:00
Rockingcool	51728dd3a1	Added double-quotes to link URL when printing link.	2025-05-07 14:20:29 -05:00
Rockingcool	56e1514213	Added more tests	2025-05-07 14:19:50 -05:00
Rockingcool	e7ea7b6ba6	Removed unecessary case statement	2025-05-06 17:10:54 -05:00
Rockingcool	ef132791a1	Applied lots of hints, mostly redundant brackets	2025-05-06 17:09:54 -05:00
Rockingcool	ca0d09dfab	Added another test	2025-05-06 17:07:46 -05:00
Rockingcool	d1b0ce6b10	Move parseHeader up in the chain - parseDocument can either parse headers or a paragraph	2025-05-06 17:06:22 -05:00
Rockingcool	b6f51c33c7	Parse until EOF instead of adding a manual check.	2025-05-06 17:05:19 -05:00
Rockingcool	9ffbb7365c	Removed commented line	2025-05-06 17:05:02 -05:00
Rockingcool	71aacdd26a	Removed unecessary parentheses	2025-05-06 17:04:24 -05:00
Rockingcool	873795e267	Removed unecessary function	2025-05-06 17:03:45 -05:00
Aadhavan Srinivasan	9a128407cc	Fixed newline bug; working on fixing the last failing test	2025-05-05 11:31:24 -04:00
Rockingcool	a18d03e4ac	Working on parsing single newlines	2025-05-05 09:42:53 -05:00
Rockingcool	f916267d29	Changed parsing of header to consume optional newline; removed debug statements	2025-05-04 16:22:53 -05:00
Rockingcool	9555f55575	Fixed typo in test	2025-05-04 16:22:35 -05:00