Made some changes

Merge remote-tracking branch 'origin' into fixingIncompleteElements
Added code parser to list line parser list
2025-05-23 08:07:02 -04:00 · 2025-05-20 22:24:24 -04:00 · 2025-05-20 22:23:41 -04:00 · 2025-05-20 22:21:59 -04:00 · 2025-05-20 22:20:36 -04:00 · 2025-05-20 22:20:23 -04:00
5 changed files with 457 additions and 132 deletions
--- a/md-to-html.cabal
+++ b/md-to-html.cabal
@@ -56,6 +56,7 @@ common warnings
 library
   hs-source-dirs:   src
   exposed-modules: MdToHTML
   other-modules: MdToHtmlTest
   build-depends:    base ^>=4.19.1.0,
                      HUnit
@@ -67,7 +68,9 @@ executable md-to-html-runner
    main-is:          Main.hs
    -- Modules included in this executable, other than Main.
-    -- other-modules:
+    other-modules:
        MdToHTML
        MdToHtmlTest
    -- LANGUAGE extensions used by modules in this package.
    -- other-extensions:
--- a/src/MdToHTML.hs
+++ b/src/MdToHTML.hs
@@ -1,17 +1,23 @@
 {-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
 {-# HLINT ignore "Use lambda-case" #-}
 module MdToHTML where
 import Control.Applicative
 import Control.Monad
 import Data.Char
 import Data.List
 import Data.Ord (comparing)
 import Debug.Trace
 import Text.ParserCombinators.ReadP
 import Text.Printf
 type HeaderLevel = Int
-newtype URL = URL {getUrl :: String}
+newtype URL = URL {getUrl :: String} deriving (Eq)
-newtype ImgPath = ImgPath {getPath :: String}
+newtype ImgPath = ImgPath {getPath :: String} deriving (Eq)
 parseMany :: ReadP a -> ReadP [a]
 parseMany = Text.ParserCombinators.ReadP.many
@@ -21,12 +27,13 @@ data MdToken
  | Header HeaderLevel MdToken
  | Para MdToken
  | Line [MdToken]
  | SingleNewline -- A single newline is rendered as a space.
  | Linebreak
  | HorizontalRule
-  | Blockquote MdToken
+  | Blockquote [MdToken]
  | UnordList [MdToken]
  | OrdList [MdToken]
-  | Code String
+  | Code MdToken
  | Codeblock String
  | Link MdToken URL
  | Image MdToken ImgPath
@@ -34,152 +41,374 @@ data MdToken
  | Italic MdToken
  | Strikethrough MdToken
  | Unit String
  deriving (Eq)
 -- Deriving Show for MdToken
 instance Show MdToken where
-  show (Document tokens) = concat (map show tokens)
+  show (Document tokens) = concatMap show tokens
  show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">"
  show (Para token) = "<p>" ++ show token ++ "</p>"
-  show (Line tokens) = concat (map show tokens)
+  show (Line tokens) = concatMap show tokens
  show Linebreak = "<br>"
-  show HorizontalRule = "---------"
+  show SingleNewline = " "
-  show (Blockquote token) = "BLOCK" ++ show token
+  show HorizontalRule = "<hr>"
-  show (UnordList tokens) = "UNORD" ++ concat (map show tokens)
+  show (Blockquote tokens) = "<blockquote>" ++ concatMap show tokens ++ "</blockquote>"
-  show (OrdList tokens) = "ORD" ++ concat (map show tokens)
+  show (UnordList tokens) = "<ul>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ul>"
-  show (Code code) = show code
+  show (OrdList tokens) = "<ol>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ol>"
  show (Code code) = "<code>" ++ show code ++ "</code>"
  show (Codeblock code) = show code
-  show (Link txt url) = "<a href=" ++ (getUrl url) ++ ">" ++ show txt ++ "</a>"
+  show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
-  show (Image txt imgPath) = "<img src=" ++ (getPath imgPath) ++ ">" ++ show txt ++ "</img>"
+  show (Image txt imgPath) = "<img src=" ++ getPath imgPath ++ ">" ++ show txt ++ "</img>"
  show (Bold token) = "<b>" ++ show token ++ "</b>"
  show (Italic token) = "<i>" ++ show token ++ "</i>"
  show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
  show (Unit unit) = printf "%s" unit
 instance Semigroup MdToken where
  a <> b = Document [a, b]
 instance Monoid MdToken where
  mempty = Unit ""
 -- ---------------
 -- Helpers
-mustBeHash :: ReadP Char
+leftmostLongest :: (Foldable t) => [(a, t b)] -> Maybe (a, t b)
 mustBeHash = satisfy (\x -> x == '#')
 leftmostLongest :: (Foldable t) => [(a, t b)] -> (a, t b)
 leftmostLongest xs =
-  let lastElem = (last xs)
+  let lastElem = last xs
-      filteredLst = (filter (\val -> (length $ snd val) == (length $ snd lastElem)) xs)
+      filteredLst = filter (\val -> length (snd val) == length (snd lastElem)) xs
-   in head filteredLst
+   in case filteredLst of
        [] -> Nothing
        (x : xs) -> Just x
 -- Get the first parse returned by readP_to_S that consumed the most input
-leftmostLongestParse :: ReadP a -> String -> (a, String)
+leftmostLongestParse :: (Monoid a) => ReadP a -> String -> (a, String)
-leftmostLongestParse parser input = leftmostLongest $ readP_to_S parser input
+leftmostLongestParse parser input =
  let res = leftmostLongest $ readP_to_S parser input
   in case res of
        Nothing -> (mempty, mempty)
        Just x -> x
-- Parse if the string that's left matches the string comparator function
+specialChars = "\\#*_[\n`"
 lookaheadParse :: (String -> Bool) -> ReadP Char
 lookaheadParse stringCmp = do
  lookahead <- look
  case stringCmp lookahead of
    True -> get
    False -> pfail
-lineToList :: MdToken -> [MdToken]
+escapableChars = '~' : specialChars
-lineToList (Line tokens) = tokens
+
 -- Makes a parser greedy. Instead of returning all possible parses, only the longest one is returned.
 greedyParse :: ReadP a -> ReadP [a]
 greedyParse parser = do
  greedyParse1 parser <++ return []
 -- Like greedyParse, but the parser must succeed atleast once.
 greedyParse1 :: ReadP a -> ReadP [a]
 greedyParse1 parser = do
  parsed1 <- parser
  parsed2 <- greedyParse1 parser <++ return []
  return (parsed1 : parsed2)
 prepend :: [a] -> [a] -> [a]
 prepend x1 x2 = x1 ++ x2
 append :: [a] -> [a] -> [a]
 append x1 x2 = x2 ++ x1
 -- Parse until EOL or EOF
 parseTillEol :: ReadP String
 parseTillEol = manyTill get (void (char '\n') <++ eof)
 -- Takes a list of parsers. Returns a parser that will try them in
 -- order, moving to the next one only if the current one fails.
 fallthroughParser :: [ReadP a] -> ReadP a
 fallthroughParser [x] = x
 fallthroughParser (x : xs) = x <++ fallthroughParser xs
 myMany :: (Monoid a) => ReadP a -> ReadP [a]
 myMany p = do
  remaining <- look
  case remaining of
    [] -> return []
    _ -> return [] +++ myMany1 p
 myMany1 :: (Monoid a) => ReadP a -> ReadP [a]
 myMany1 p = liftM2 (:) p (myMany p)
 -- ---------------
 -- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
 parseHeader :: ReadP MdToken
 parseHeader = do
  traceM "Reached parseHeader"
  skipSpaces
-  headers <- many1 mustBeHash
+  headers <- munch1 (== '#')
  when
-    ((length headers) > 6)
+    (length headers > 6)
    pfail
-  _ <- string " "
+  skipSpaces
-  text <- munch1 (\x -> x /= '\n') -- Parse until EOL
+  text <- munch1 (/= '\n')
  --  Text.ParserCombinators.ReadP.optional (char '\n')
  skipSpaces
  let parsedText = fst $ leftmostLongestParse parseLine text
  traceM (show parsedText)
  traceM (show (length headers))
  return (Header (length headers) parsedText)
 -- Parse bold text
 parseBold :: ReadP MdToken
-parseBold = do
+parseBold = parseBoldWith "**" <|> parseBoldWith "__"
-  traceM "Reached parseBold"
+  where
-  text <-
+    parseBoldWith delim = do
-    choice
+      string delim
-      [ between (string "__") (string "__") (many1 (lookaheadParse (/= "__"))),
+      inside <- myMany1 parseLineToken
-        between (string "**") (string "**") (many1 (lookaheadParse (/= "**")))
+      string delim
-      ]
+      return (Bold (Line inside))
  let parsedText = fst $ leftmostLongestParse parseLine text
  return (Bold parsedText)
 -- Parse italic text
 parseItalic :: ReadP MdToken
-parseItalic = do
+parseItalic = parseItalicWith '*' <|> parseItalicWith '_'
-  traceM "Reached parseItalic"
+  where
-  text <-
+    parseItalicWith delim = do
-    choice
+      exactlyOnce delim
-      [ (between (string "_") (string "_") (munch1 (/= '_'))),
+      inside <- myMany1 parseLineToken
-        (between (string "*") (string "*") (munch1 (/= '*')))
+      exactlyOnce delim
-      ]
+      return (Italic (Line inside))
-  let parsedText = fst $ leftmostLongestParse parseLine text
+    exactlyOnce ch = do
-  return (Italic parsedText)
+      char ch
      remaining <- look
      case remaining of
        [] -> return ch
        x : xs -> if x == ch then pfail else return ch
 -- Parse strikethrough text
 parseStrikethrough :: ReadP MdToken
 parseStrikethrough = do
  string "~~"
  inside <- many1 parseLineToken
  string "~~"
  return (Strikethrough (Line inside))
 -- Parse code
 parseCode :: ReadP MdToken
 parseCode = do
  string "`"
  inside <- many1 get
  string "`"
  return (Code (Unit inside))
 -- Parse a link
 parseLink :: ReadP MdToken
 parseLink = do
  linkText <- between (string "[") (string "]") (many1 get)
  linkURL <- between (string "(") (string ")") (many1 get)
  let parsedLinkText = fst $ leftmostLongestParse parseLine linkText
  return $ Link parsedLinkText (URL linkURL)
 -- Parse a linebreak character
 parseLinebreak :: ReadP MdToken
 parseLinebreak = do
  traceM "Reached parseLinebreak"
  char ' '
  many1 (char ' ')
  char '\n'
  return Linebreak
-- Parse a regular string as a Unit.
+parseSingleNewline :: ReadP MdToken
-parseString :: ReadP MdToken
+parseSingleNewline = do
-parseString = do
+  char '\n'
-  traceM "Reached parseString"
+  return SingleNewline
-  firstChar <- get -- Must parse at least one character here
+
-  text <- munch (\x -> not (elem x "#*_[\n "))
+-- Parse an escaped character
-  return (Unit (firstChar : text))
+parseEscapedChar :: ReadP MdToken
 parseEscapedChar = do
  char '\\'
  escapedChar <- choice (map char escapableChars) -- Parse any of the special chars.
  return (Unit [escapedChar])
 -- Parse a character as a Unit.
 parseUnit :: ReadP MdToken
 parseUnit = do
  -- text <- satisfy (`notElem` specialChars)
  text <- get
  return (Unit [text])
 lineParsers :: [ReadP MdToken]
-lineParsers = [parseLinebreak, parseBold, parseItalic, parseString] -- A 'line' doesn't include a 'header'
+lineParsers =
  [ parseLinebreak,
    parseSingleNewline,
    parseEscapedChar,
    parseCode,
    parseBold,
    parseItalic,
    parseStrikethrough,
    parseLink,
    parseUnit
  ] -- A 'line' doesn't include a 'header'
 listLineParsers :: [ReadP MdToken]
 listLineParsers =
  [ parseLinebreak,
    parseEscapedChar,
    parseCode,
    parseBold,
    parseItalic,
    parseStrikethrough,
    parseLink,
    parseUnit
  ] -- A list line cannot contain newlines.
 -- List of all parsers
 allParsers :: [ReadP MdToken]
 allParsers = parseHeader : lineParsers
-- Parse any of the above tokens.
+-- Parse any of the line tokens.
 parseLineToken :: ReadP MdToken
-parseLineToken = choice lineParsers
+parseLineToken = fallthroughParser lineParsers
 -- Parse any of the list line tokens.
 parseListLineToken :: ReadP MdToken
 parseListLineToken = fallthroughParser listLineParsers
 -- Parse a line, consisting of one or more tokens.
 parseLine :: ReadP MdToken
 parseLine = do
  traceM "Reached parseLine"
  skipSpaces
  -- Fail if we have reached the end of the document.
-  remaining <- look
+  parsed <- myMany1 parseLineToken
  when (null remaining) pfail
  parsed <- parseMany parseLineToken
  --         traceM $ show parsed
  return (Line parsed)
 -- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
 -- As a weird special case, a 'Paragraph' can also be a 'Header'.
 parsePara :: ReadP MdToken
 parsePara = do
  traceM "Reached parsePara"
  parseMany (char '\n')
  --  text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline.
  --  string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF.
-  text <- (manyTill get ((string "\n\n") <|> (eof >> return "")))
+  text <- manyTill get (string "\n\n" <|> (eof >> return ""))
  when (null text) pfail
-  let parsedText = fst $ leftmostLongestParse (parseHeader <|> parseLine) text -- Parse either a line or a header.
+  let parsedText = fst $ leftmostLongestParse parseLine text -- Parse a line
-  traceM (show parsedText)
+  return (Para parsedText)
-  -- If the paragraph is a header, return a Header token. Otheriwse return a Para token.
+
-  case parsedText of
+-- Parse a line starting with '>', return the line except for the '>'.
-    Header level token -> return (Header level token)
+parseQuotedLine :: ReadP String
-    _ -> return (Para parsedText)
+parseQuotedLine = do
  char '>'
  greedyParse (char ' ' +++ char '\t')
  restOfLine <- munch (/= '\n')
  Text.ParserCombinators.ReadP.optional (char '\n') >> return ""
  return restOfLine
 -- Parse many 'quoted lines' until I see a non-quoted line.
 parseQuotedLines :: ReadP [String]
 parseQuotedLines =
  greedyParse1 $ do
    look >>= \line ->
      case line of
        ('>' : _) -> parseQuotedLine
        _ -> pfail
 -- Parse a blockquote, which is a greater-than sign followed by a paragraph.
 parseBlockquote :: ReadP MdToken
 parseBlockquote = do
  quotedLines <- parseQuotedLines
  -- remaining <- look
  -- let quotedLines = fst $ leftmostLongestParse parseQuotedLines remaining
  -- string (init $ unlines quotedLines)
  let parsedQuotedLines = fst $ leftmostLongestParse (many1 (parseBlockquote <++ parsePara)) (init $ unlines quotedLines) -- unlines joins the lines together with a newline, and adds a trailing newline. init removes the trailing newline.
  return (Blockquote parsedQuotedLines)
 -- Parse a nested list item.
 parseListNested :: ReadP MdToken
 parseListNested = do
  let firstCharParser = string "    " <++ string "\t"
  let restOfLineParser = manyTill get (void (char '\n') <++ eof)
  lines <- greedyParse1 (firstCharParser *> restOfLineParser)
  let linesParsed = fst $ leftmostLongestParse (parseUnorderedList <++ parseOrderedList) (init $ unlines lines)
  when (null (show linesParsed)) pfail
  return linesParsed
 -- Parse an unordered list line item.
 parseUListLineItem :: ReadP MdToken
 parseUListLineItem = do
  firstChar <- choice (map char ['*', '+', '-'])
  char ' ' -- At least one space between list indicator and list text.
  parseListLineItemCommon
 -- Parse an ordered list line item.
 parseOListLineItem :: ReadP MdToken
 parseOListLineItem = do
  num <- greedyParse1 (satisfy isDigit)
  char '.'
  char ' ' -- At least one space between list indicator and list text.
  parseListLineItemCommon
 -- Common code for parsing list line items
 parseListLineItemCommon :: ReadP MdToken
 parseListLineItemCommon = do
  skipSpaces
  restOfLine <- many1 parseListLineToken
  void (char '\n') <++ eof
  nestedList <- parseListNested <++ return (Unit "")
  return $ Line [Line restOfLine, nestedList]
 -- Parse an unordered list paragraph item.
 parseUListParaItem :: ReadP MdToken
 parseUListParaItem = do
  firstLine <- parseUListLineItem
  res <- parseListParaItemCommon
  return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
 -- Parse an unordered list paragraph item.
 parseOListParaItem :: ReadP MdToken
 parseOListParaItem = do
  firstLine <- parseOListLineItem
  res <- parseListParaItemCommon
  return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
 -- Common code for parsing list paragraph items.
 -- A list paragraph item is defined as a line item, followed by an empty line, followed by one or more
 -- lines indented by a space or tab.
 -- A list paragraph item can also be a blockquote.
 parseListParaItemCommon :: ReadP [MdToken]
 parseListParaItemCommon = do
  char '\n'
  lines <- greedyParse1 ((string "    " <|> string "\t") *> parseTillEol)
  let res = fst $ leftmostLongestParse (greedyParse1 parseBlockquote <++ greedyParse1 parsePara) (init $ unlines lines)
  char '\n'
  return res -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
 -- Parse an unordered list item, which can be a line item or another list.
 parseUListItem :: ReadP MdToken
 parseUListItem = parseUListParaItem <++ parseUListLineItem
 -- Parse an unordered list.
 parseUnorderedList :: ReadP MdToken
 parseUnorderedList = do
  lineItems <- greedyParse1 parseUListItem
  void (char '\n') <++ eof -- A list must end in an extra newline or eof
  return $ UnordList lineItems
 -- --------
 parseOListItem :: ReadP MdToken
 parseOListItem = parseOListParaItem <++ parseOListLineItem
 -- Parses the first element of an ordered list, which must start with '1.'
 parseFirstOListItem :: ReadP MdToken
 parseFirstOListItem = do
  remaining <- look
  when (take 2 remaining /= "1.") pfail
  parseOListLineItem
 parseOrderedList :: ReadP MdToken
 parseOrderedList = do
  firstLine <- parseFirstOListItem
  lineItems <- greedyParse1 parseOListItem
  void (char '\n') <++ eof
  return $ OrdList (firstLine : lineItems)
 documentParsers :: [ReadP MdToken]
 documentParsers =
  [ parseHeader,
    parseBlockquote,
    parseUnorderedList,
    parseOrderedList,
    parsePara
  ]
 -- Parse a document, which is multiple paragraphs.
 parseDocument :: ReadP MdToken
-parseDocument = (many1 parsePara) >>= (\res -> return (Document (res)))
+parseDocument = do
  res <- manyTill (fallthroughParser documentParsers) eof
  return (Document res)
--- a/src/MdToHtmlTest.hs
+++ b/src/MdToHtmlTest.hs
@@ -0,0 +1,142 @@
 module MdToHtmlTest where
 import MdToHTML
 import Test.HUnit
 check_equal :: String -> String -> String -> Test
 check_equal desc expected actual = TestCase (assertEqual desc expected actual)
 convert :: String -> String
 convert md = show . fst $ leftmostLongestParse parseDocument md
 headerTests =
  TestList
    [ check_equal "Should convert H1 heading" "<h1>Hello</h1>" (convert "# Hello"),
      check_equal "Should convert H2 heading" "<h2>Hello</h2>" (convert "## Hello"),
      check_equal "Should convert H3 heading" "<h3>Hello</h3>" (convert "### Hello"),
      check_equal "Should convert H4 heading" "<h4>Hello</h4>" (convert "#### Hello"),
      check_equal "Should convert H5 heading" "<h5>Hello</h5>" (convert "##### Hello"),
      check_equal "Should convert H6 heading" "<h6>Hello</h6>" (convert "###### Hello")
    ]
 boldTests =
  TestList
    [ check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
      check_equal " Should not convert incomplete bold" "<p>**Hello</p>" (convert "**Hello"),
      check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
      check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day"),
      check_equal "Should convert nested bold and italic" "<p><b>Bold then <i>Italic</i></b></p>" (convert "**Bold then *Italic***"),
      check_equal "Should convert nested bold and italic" "<p><i>Italic then <b>Bold</b></i></p>" (convert "*Italic then **Bold***")
    ]
 strikethroughTests =
  TestList
    [ check_equal "Should convert strikethrough" "<p><s>Hello</s></p>" (convert "~~Hello~~"),
      check_equal "Should convert long sentence with tilde" "<p><s>The universe is ~7 days old</s>. The universe is 13 billion years old.</p>" (convert "~~The universe is ~7 days old~~. The universe is 13 billion years old.")
    ]
 linkTests =
  TestList
    [ check_equal "Should convert normal link" "<p><a href=\"https://example.com\">This is an example link.</a></p>" (convert "[This is an example link.](https://example.com)"),
      check_equal "Should convert styled link" "<p><a href=\"https://example.com\"><b>Fancy</b>!!!</a></p>" (convert "[__Fancy__!!!](https://example.com)")
    ]
 escapedCharTests =
  TestList
    [ check_equal "Should print literal underscore" "<p>This is an underscore - _</p>" (convert "This is an underscore - \\_"),
      check_equal "Should print literal asterisk" "<p>This is an asterisk - *</p>" (convert "This is an asterisk - \\*"),
      check_equal "Should print literal asterisk in bold" "<p>This is a bolded asterisk - <b>*</b></p>" (convert "This is a bolded asterisk - **\\***")
    ]
 blockquoteTests =
  TestList
    [ check_equal "Should wrap para in blockquote" "<blockquote><p>What a <b>truly</b> <i>lovely</i> day!!!</p></blockquote>" (convert "> What a __truly__ _lovely_ day!!!"),
      check_equal "Simple nested blockquotes" "<blockquote><p>Hello</p><blockquote><p>World</p></blockquote></blockquote>" (convert "> Hello\n>\n>> World"),
      check_equal
        "Nested blockquotes"
        "<blockquote><p>Dorothy followed her through many \
        \of the beautiful rooms in her castle.</p><blockquote><p>The Witch \
        \bade her clean the pots and kettles and sweep the floor and keep the fire \
        \fed with wood.</p></blockquote></blockquote>"
        ( convert
            "> Dorothy followed her through many of the \
            \beautiful rooms in her castle.\n> \n>> The Witch bade her \
            \clean the pots and kettles and sweep the floor and keep the fire fed with wood."
        )
    ]
 unorderedListTests =
  TestList
    [ check_equal "Basic unordered list" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>" (convert "* Item 1\n* Item 2\n* Item 3"),
      check_equal "Mixing list indicators" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>" (convert "* Item 1\n+ Item 2\n- Item 3"),
      check_equal "Formatted lists" "<ul><li><b>Item 1</b></li><li><i>Item 2</i></li><li><b><i>Item 3</i></b></li></ul>" (convert "* __Item 1__\n+ _Item 2_\n- ***Item 3***"),
      check_equal "Nested list" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3<ul><li>Subitem 1</li><li>Subitem 2</li></ul></li></ul>" (convert "* Item 1\n* Item 2\n* Item 3\n    * Subitem 1\n    * Subitem 2"),
      check_equal "Paragraph in list" "<ul><li>Item 1</li><li><p>Item 2</p><p>More stuff</p></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n\n    More stuff\n\n- Item 3"),
      check_equal "Paragraph before list" "<p>This is a list</p><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "This is a list\n\n* Item 1\n* Item 2"),
      check_equal "Paragraph before list" "<h3>This is a list</h3><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "### This is a list\n\n* Item 1\n* Item 2"),
      check_equal "Nested list then back" "<ul><li>Item 1</li><li>Item 2<ul><li>Item 3</li><li>Item 4</li></ul></li><li>Item 5</li></ul>" (convert "- Item 1\n- Item 2\n    - Item 3\n    - Item 4\n- Item 5"),
      check_equal "Blockquote in list" "<ul><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n\n    > Quote\n\n- Item 3"),
      check_equal "Ordered list in unordered list" "<ul><li>Item 1</li><li>Item 2<ol><li>Item 1</li><li>Item 2</li></ol></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n    1. Item 1\n    2. Item 2\n- Item 3")
    ]
 orderedListTests =
  TestList
    [ check_equal "Basic ordered list" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n3. Item 3"),
      check_equal "Mixing list numbering" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3</li></ol>" (convert "1. Item 1\n3. Item 2\n2. Item 3"),
      check_equal "Should not convert list without number 1" "<p>2. Item 1 1. Item 2</p>" (convert "2. Item 1\n1. Item 2"),
      check_equal "Formatted lists" "<ol><li><b>Item 1</b></li><li><i>Item 2</i></li><li><b><i>Item 3</i></b></li></ol>" (convert "1. __Item 1__\n2. _Item 2_\n3. ***Item 3***"),
      check_equal "Nested list" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3<ol><li>Subitem 1</li><li>Subitem 2</li></ol></li></ol>" (convert "1. Item 1\n2. Item 2\n3. Item 3\n    1. Subitem 1\n    2. Subitem 2"),
      check_equal "Paragraph in list" "<ol><li>Item 1</li><li><p>Item 2</p><p>More stuff</p></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n    More stuff\n\n1. Item 3"),
      check_equal "Paragraph before list" "<p>This is a list</p><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "This is a list\n\n1. Item 1\n1. Item 2"),
      check_equal "Paragraph before list" "<h3>This is a list</h3><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "### This is a list\n\n1. Item 1\n200. Item 2"),
      check_equal "Nested list then back" "<ol><li>Item 1</li><li>Item 2<ol><li>Item 3</li><li>Item 4</li></ol></li><li>Item 5</li></ol>" (convert "1. Item 1\n2. Item 2\n    1. Item 3\n    3. Item 4\n5. Item 5"),
      check_equal "Blockquote in list" "<ol><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n    > Quote\n\n3. Item 3"),
      check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n    - Item 1\n    * Item 2\n4. Item 3")
    ]
 codeTests =
  TestList
    [ check_equal "Code by itself" "<p><code>Hello world!</code></p>" (convert "`Hello world!`"),
      check_equal "Code in a paragraph" "<p>The following <code>text</code> is code</p>" (convert "The following `text` is code"),
      check_equal "Code across paragraphs (shouldn't work" "<p>`Incomplete</p><p>Code`</p>" (convert "`Incomplete\n\nCode`")
    ]
 integrationTests =
  TestList
    [ check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n  ## __Second__ _Heading_"),
      check_equal "Integration 2" "<p><b>Hello</b> <i>World</i></p>" (convert "__Hello__\n_World_"),
      check_equal "Integration 3" "<h1>Hello</h1><p>World</p>" (convert "# Hello\nWorld"),
      check_equal "Integration 4" "<p>a b</p>" (convert "a\nb"),
      check_equal "Integration 5" "<h1>Hello</h1>" (convert "# Hello\n"),
      check_equal "Integration 6" "<p>First line<br>Second line</p>" (convert "First line  \nSecond line"),
      check_equal
        "Integration 7"
        "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2>Second \
        \Heading</h2><ul><li>Unordered lists, and:<ol><li>One</li><li>Two</li><li>\
        \Three</li></ol></li><li>More</li></ul><blockquote><p>Blockquote</p>\
        \</blockquote><p>And <b>bold</b>, <i>italics</i>, and even <i>italics \
        \and later <b>bold</b></i>. Even <s>strikethrough</s>. \
        \<a href=\"https://markdowntohtml.com\">A link</a> to somewhere.</p>"
        ( convert
            "# Sample Markdown\n\nThis is some basic, sample markdown.\n\n## Second \
            \Heading\n\n- Unordered lists, and:\n    1. One\n    2. Two\n    3. Three\n\
            \- More\n\n> Blockquote\n\nAnd **bold**, *italics*, and even *italics and \
            \later **bold***. Even ~~strikethrough~~. [A link](https://markdowntohtml.com) to somewhere."
        )
    ]
 tests =
  TestList
    [ headerTests,
      boldTests,
      strikethroughTests,
      linkTests,
      escapedCharTests,
      blockquoteTests,
      unorderedListTests,
      orderedListTests,
      codeTests,
      integrationTests
    ]
 runTests = runTestTT tests
--- a/src/Test.hs
+++ b/src/Test.hs
@@ -1,44 +0,0 @@
 module MdToHtmlTest where
 import MdToHTML
 import Test.HUnit
 check_equal :: String -> String -> String -> Test
 check_equal desc expected actual = TestCase (assertEqual desc expected actual)
 convert :: String -> String
 convert md = show . fst $ leftmostLongestParse parseDocument md
 headerTests = TestList
      [
         check_equal "Should convert H1 heading" "<h1>Hello</h1>" (convert "# Hello"),
         check_equal "Should convert H2 heading" "<h2>Hello</h2>" (convert "## Hello"),
         check_equal "Should convert H3 heading" "<h3>Hello</h3>" (convert "### Hello"),
         check_equal "Should convert H4 heading" "<h4>Hello</h4>" (convert "#### Hello"),
         check_equal "Should convert H5 heading" "<h5>Hello</h5>" (convert "##### Hello"),
         check_equal "Should convert H6 heading" "<h6>Hello</h6>" (convert "###### Hello")
      ]
 boldTests = TestList
   [
         check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
         check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
         check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day")
   ]
 integrationTests = TestList
    [
        check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n  ## __Second__ _Heading_"),
        check_equal "Integration 2" "<p><b>Hello</b> <i>World</i></p>" (convert "__Hello__\n_World_"),
        check_equal "Integration 3" "<h1>Hello</h1><p>WorldM/p>" (convert "# Hello\nWorld")
    ]
 tests = TestList
   [
      headerTests,
      boldTests,
      integrationTests
   ]
 runTests = runTestTT tests
--- a/test.hs
+++ b/test.hs
@@ -1,5 +0,0 @@
 leftmostLongest :: (Foldable t) => [t a] -> t a
 leftmostLongest xs = 
        let lastElem = (last xs)
            filteredLst = (filter (\val -> (length val) == (length lastElem)) xs)
        in head filteredLst
Author	SHA1	Message	Date
Aadhavan Srinivasan	09982f4ab1	Made some changes	2025-05-23 08:07:02 -04:00
Aadhavan Srinivasan	4101767aff	Merge remote-tracking branch 'origin' into fixingIncompleteElements	2025-05-20 22:24:24 -04:00
Aadhavan Srinivasan	ed7d2c1ef1	Added code parser to list line parser list	2025-05-20 22:23:41 -04:00
Aadhavan Srinivasan	234145bcb3	Started working on parsing changes to allow incomplete elements to be parsed (eg. opening bold, followed by text, without closing bold)	2025-05-20 22:21:59 -04:00
Aadhavan Srinivasan	eb20f154a4	Removed unused function	2025-05-20 22:20:36 -04:00
Aadhavan Srinivasan	172985131b	Fixed HTML rendering for horizontal rule	2025-05-20 22:20:23 -04:00
Aadhavan Srinivasan	3781e67ab1	Created a separate list of escapable chars	2025-05-20 22:20:05 -04:00
Aadhavan Srinivasan	f2d54edd3f	Implemented inline code parsing	2025-05-20 22:19:44 -04:00
Aadhavan Srinivasan	5393dc4eb9	Added code tests	2025-05-20 22:18:46 -04:00
Aadhavan Srinivasan	e051c87f08	Factor list line common parsing into a separate function; refactored OList and UList line parsing to us it	2025-05-20 16:48:31 -04:00
Aadhavan Srinivasan	9b1c51897c	A nested list can be ordered or unordered	2025-05-20 16:47:58 -04:00
Aadhavan Srinivasan	2a3dddc7b0	Rename function	2025-05-20 16:47:31 -04:00
Aadhavan Srinivasan	a8793b5adb	Remove obsolete comments	2025-05-20 16:47:16 -04:00
Aadhavan Srinivasan	eecec764ad	A list can contain a blockquote as well.	2025-05-20 16:47:04 -04:00
Aadhavan Srinivasan	cdca6ea95e	A list item doesn't need to have a nested list item parser, because the line item parser handles the nested list.	2025-05-20 16:46:52 -04:00
Aadhavan Srinivasan	540b5430e5	Use list of document parsers in parseDocument	2025-05-20 16:45:56 -04:00
Aadhavan Srinivasan	00dfba81eb	Created a list of document parsers; implemented ordered list parsing.	2025-05-20 16:45:37 -04:00
Aadhavan Srinivasan	39152c0034	Factor out common code for UList and OList parsing into a separate function. Refactored UList function; wrote OList function.	2025-05-20 16:45:07 -04:00
Aadhavan Srinivasan	41b35be7c9	Rename function	2025-05-20 16:43:35 -04:00
Aadhavan Srinivasan	d2c8565f62	Import Data.Char for isDigit	2025-05-20 16:43:22 -04:00
Aadhavan Srinivasan	62eeef2abb	Removed unused functions	2025-05-20 16:43:10 -04:00
Aadhavan Srinivasan	9c6634cfec	Added ordered list tests	2025-05-20 16:42:40 -04:00
Aadhavan Srinivasan	2a5a68b1de	Fixed test name	2025-05-20 16:42:12 -04:00
Aadhavan Srinivasan	f8e1a98bdf	Remove obsolete comment	2025-05-20 14:03:30 -04:00
Aadhavan Srinivasan	05433c31f1	Remove unused functions	2025-05-20 14:02:50 -04:00
Rockingcool	5c871f2b25	Removed test file	2025-05-20 12:25:07 -05:00
Rockingcool	5273c99e6e	Added unordered list tests and integration tests; added strikethrough tests to test list	2025-05-20 12:24:20 -05:00
Rockingcool	50888c9c3d	Added bold and strikethrough tests	2025-05-20 12:23:47 -05:00
Rockingcool	45115c765c	An unordered list must end in a blank line.	2025-05-20 12:23:27 -05:00
Rockingcool	5b0d42fd2d	Use the in-order parsing approach instead of the post-order one.	2025-05-20 12:23:13 -05:00
Rockingcool	2a585d00f2	Enforce at least one space between list indicator and list text.	2025-05-20 12:22:37 -05:00
Rockingcool	11a3b14cb1	Define a parser for list line tokens, update line token parser	2025-05-20 12:22:19 -05:00
Rockingcool	58d3142855	Update comment	2025-05-20 12:21:45 -05:00
Rockingcool	0fb651fffc	Add parseUnit instead of parseString to lineParsers	2025-05-20 12:20:51 -05:00
Rockingcool	bc05dede06	Create a list of parsers that are used for list line items.	2025-05-20 12:20:35 -05:00
Rockingcool	b69e34f823	Parse a single character as a unit	2025-05-20 12:20:10 -05:00
Rockingcool	2514ecdafc	Parse bold, italic and strikethrough in-order, instead of trying to find the end, then parsing everything in the middle. The current approach parses the opening bold (or italic), some text, then the closing bold (or italic), instead of parsing the opening, closing, then everything in between.	2025-05-20 12:19:59 -05:00
Rockingcool	c52d5556a2	Allow for multiple blank lines after header	2025-05-20 12:18:52 -05:00
Rockingcool	5fc1b1122a	Create a function to 'fallthrough parse' ie. try the second parser only if the first one fails.	2025-05-20 12:18:23 -05:00
Rockingcool	83dd0024c4	Space doesn't have to be a reserved character anymore.	2025-05-20 12:17:59 -05:00
Rockingcool	70761649ad	Derive Eq for defined types	2025-05-20 12:17:43 -05:00
Rockingcool	b9c6cc4470	Implemented strikethrough parser	2025-05-16 19:29:39 -05:00
Rockingcool	23691f9cfe	Add strikethrough parser to line parser list	2025-05-16 19:28:41 -05:00
Rockingcool	8c220cc800	A document can consist of unordered lists as well	2025-05-14 21:40:03 -05:00
Rockingcool	ee453c0259	Fixed blockquote parser; implemented unordered list parser and relevant sub-parsers	2025-05-14 21:39:27 -05:00
Rockingcool	c90d23617a	A blockquote must have a list of tokens	2025-05-14 21:38:50 -05:00
Rockingcool	c574699a8a	Added an import	2025-05-14 21:38:38 -05:00
Rockingcool	f55e160e25	Added tests for unordered lists	2025-05-14 21:37:58 -05:00
Rockingcool	dddcca0185	Wrote a lot of helper functions - most importantly greedyParse	2025-05-13 21:40:42 -05:00
Rockingcool	e7d94f225a	Updated show definitions for some tokens; add HTML syntax	2025-05-09 23:14:09 -05:00
Rockingcool	e8eb22f3ae	Added pragmas to suppress LSP warnings; removed a random line that was added by the LSP	2025-05-09 23:13:31 -05:00
Rockingcool	ef1809970b	Greatly shortened parseQuotedLine, because I can just use greedyParse instead of using manyTill	2025-05-09 23:12:53 -05:00
Rockingcool	549504d650	Consume whitespace between greater-than sign and text in a blockquote line	2025-05-09 23:12:12 -05:00
Rockingcool	4f23592aeb	Add relevant modules to other-modules section, to get rid of cabal's warnings	2025-05-09 23:10:34 -05:00
Rockingcool	b00d79b9aa	Renamed Test.hs to MdToHtmlTest.hs so that the filename matches the module name; updated blockquote tests; started working on ordered list tests	2025-05-09 23:10:04 -05:00
Rockingcool	3cd9f24935	Wrote helper functions for parseBlockquote, to parse a quoted line and multiple quoted lines.	2025-05-09 16:12:09 -05:00
Rockingcool	a60b3754e4	Rewrite leftmostLongest and leftmostLongestParse so that they don't rely on 'head' which is a partial function	2025-05-09 12:17:53 -05:00
Rockingcool	3330185393	Make MdToken a Semigroup and a Monoid, so that I can use 'mempty' for it.	2025-05-09 12:17:22 -05:00
Rockingcool	1df7f64aec	Started working on blockquote parser	2025-05-08 17:54:00 -05:00
Rockingcool	bfd627c763	Added blockquote tests	2025-05-08 17:53:37 -05:00
Rockingcool	81671727b2	Added more parsers for escaped characters and links.	2025-05-07 14:21:13 -05:00
Rockingcool	1b821c4315	Declare separate variable for escaped characters.	2025-05-07 14:20:51 -05:00
Rockingcool	51728dd3a1	Added double-quotes to link URL when printing link.	2025-05-07 14:20:29 -05:00
Rockingcool	56e1514213	Added more tests	2025-05-07 14:19:50 -05:00
Rockingcool	e7ea7b6ba6	Removed unecessary case statement	2025-05-06 17:10:54 -05:00
Rockingcool	ef132791a1	Applied lots of hints, mostly redundant brackets	2025-05-06 17:09:54 -05:00
Rockingcool	ca0d09dfab	Added another test	2025-05-06 17:07:46 -05:00
Rockingcool	d1b0ce6b10	Move parseHeader up in the chain - parseDocument can either parse headers or a paragraph	2025-05-06 17:06:22 -05:00
Rockingcool	b6f51c33c7	Parse until EOF instead of adding a manual check.	2025-05-06 17:05:19 -05:00
Rockingcool	9ffbb7365c	Removed commented line	2025-05-06 17:05:02 -05:00
Rockingcool	71aacdd26a	Removed unecessary parentheses	2025-05-06 17:04:24 -05:00
Rockingcool	873795e267	Removed unecessary function	2025-05-06 17:03:45 -05:00
Aadhavan Srinivasan	9a128407cc	Fixed newline bug; working on fixing the last failing test	2025-05-05 11:31:24 -04:00
Rockingcool	a18d03e4ac	Working on parsing single newlines	2025-05-05 09:42:53 -05:00
Rockingcool	f916267d29	Changed parsing of header to consume optional newline; removed debug statements	2025-05-04 16:22:53 -05:00
Rockingcool	9555f55575	Fixed typo in test	2025-05-04 16:22:35 -05:00