Added more tests

Improve document parser to parse extra newlines
Fix bug in fallthrough parser
2025-07-30 15:34:43 -04:00 · 2025-07-30 15:32:35 -04:00 · 2025-07-30 15:29:25 -04:00 · 2025-07-30 15:29:03 -04:00 · 2025-07-30 15:28:35 -04:00 · 2025-07-22 09:56:41 -04:00
6 changed files with 738 additions and 166 deletions
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -1,8 +1,30 @@
 module Main where

 import MdToHTML
+import System.Environment
+import System.IO
+
+readLinesHelper :: [String] -> IO [String]
+readLinesHelper xs = do
+  done <- isEOF
+  if done
+    then return xs
+    else do
+      line <- getLine
+      let xs' = line : xs
+      readLinesHelper xs'
+
+readLines :: IO [String]
+readLines = reverse <$> readLinesHelper []

 main :: IO ()
 main = do
-   let res = fst $ leftmostLongestParse parseDocument "# _Hello_\n"
-   putStrLn (show res)
+  args <- getArgs
+  fileContents <- case args of
+    [] -> getContents
+    x : _ -> readFile x
+  let res = leftmostLongestParse parseDocument fileContents
+  let toPrint = prettyPrint res
+  case reverse toPrint of
+    '\n' : _ -> putStr toPrint
+    _ -> putStrLn toPrint
--- a/md-to-html.cabal
+++ b/md-to-html.cabal
@@ -56,18 +56,22 @@ common warnings
 library
   hs-source-dirs:   src
   exposed-modules: MdToHTML
+   other-modules: MdToHtmlTest
   build-depends:    base ^>=4.19.1.0,
-                      HUnit
+                      HUnit,
+                      megaparsec,
+                      parser-combinators,
+                      text,
+                      MissingH,
+                      word-wrap

-executable md-to-html-runner
+executable mdtoh
    -- Import common warning flags.
    import:           warnings

    -- .hs or .lhs file containing the Main module.
    main-is:          Main.hs

-    -- Modules included in this executable, other than Main.
-    -- other-modules:

    -- LANGUAGE extensions used by modules in this package.
    -- other-extensions:
--- a/src/MdToHTML.hs
+++ b/src/MdToHTML.hs
@@ -1,185 +1,577 @@
+{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
+
+{-# HLINT ignore "Use lambda-case" #-}
+
 module MdToHTML where

-import Control.Applicative
+import Control.Applicative hiding (many, some)
 import Control.Monad
+import Control.Monad.Combinators (count)
+import Data.Char
 import Data.List
+import Data.Ord (comparing)
+import Data.String.Utils
+import qualified Data.Text as T
+import Data.Void
 import Debug.Trace
-import Text.ParserCombinators.ReadP
+import Text.Megaparsec
+import Text.Megaparsec.Char
 import Text.Printf
+import Text.Wrap
+
+type Parser = Parsec Void T.Text

 type HeaderLevel = Int

-newtype URL = URL {getUrl :: String}
+type CssClass = String

-newtype ImgPath = ImgPath {getPath :: String}
+newtype URL = URL {getUrl :: String} deriving (Eq)

-parseMany :: ReadP a -> ReadP [a]
-parseMany = Text.ParserCombinators.ReadP.many
+newtype ImgPath = ImgPath {getPath :: String} deriving (Eq)

 data MdToken
  = Document [MdToken]
  | Header HeaderLevel MdToken
  | Para MdToken
  | Line [MdToken]
+  | SingleNewline -- A single newline is rendered as a space.
  | Linebreak
  | HorizontalRule
-  | Blockquote MdToken
+  | Blockquote [MdToken]
  | UnordList [MdToken]
  | OrdList [MdToken]
-  | Code String
-  | Codeblock String
+  | Checkbox Bool
+  | Code MdToken
+  | Table [[MdToken]]
+  | Codeblock MdToken
  | Link MdToken URL
-  | Image MdToken ImgPath
+  | Image MdToken URL (Maybe [CssClass])
+  | Figure MdToken URL (Maybe [CssClass])
  | Bold MdToken
  | Italic MdToken
+  | Superscript MdToken
+  | Subscript MdToken
  | Strikethrough MdToken
  | Unit String
+  deriving (Eq)

 -- Deriving Show for MdToken
 instance Show MdToken where
-  show (Document tokens) = concat (map show tokens)
+  show (Document tokens) = concatMap show tokens
  show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">"
  show (Para token) = "<p>" ++ show token ++ "</p>"
-  show (Line tokens) = concat (map show tokens)
-  show Linebreak = "<br>"
-  show HorizontalRule = "---------"
-  show (Blockquote token) = "BLOCK" ++ show token
-  show (UnordList tokens) = "UNORD" ++ concat (map show tokens)
-  show (OrdList tokens) = "ORD" ++ concat (map show tokens)
-  show (Code code) = show code
-  show (Codeblock code) = show code
-  show (Link txt url) = "<a href=" ++ (getUrl url) ++ ">" ++ show txt ++ "</a>"
-  show (Image txt imgPath) = "<img src=" ++ (getPath imgPath) ++ ">" ++ show txt ++ "</img>"
+  show (Line tokens) = concatMap show tokens
+  show Linebreak = "<br />"
+  show SingleNewline = " "
+  show HorizontalRule = "<hr>"
+  show (Blockquote tokens) = "<blockquote>" ++ concatMap show tokens ++ "</blockquote>"
+  show (UnordList tokens) = "<ul>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ul>"
+  show (OrdList tokens) = "<ol>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ol>"
+  show (Checkbox isChecked) = "<input type=\"checkbox\"" ++ (if isChecked then " checked=\"\"" else "") ++ " />"
+  show (Code code) = "<code>" ++ strip (show code) ++ "</code>"
+  show (Table (thead : tokenGrid)) = "<table><thead><tr>" ++ concatMap (\x -> "<th>" ++ rstrip (show x) ++ "</th>") thead ++ "</tr></thead>" ++ "<tbody>" ++ concatMap (\x -> "<tr>" ++ concatMap (\y -> "<td>" ++ rstrip (show y) ++ "</td>") x ++ "</tr>") tokenGrid ++ "</tbody></table>"
+  show (Codeblock code) = "<pre><code>" ++ show code ++ "</code></pre>"
+  show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
+  show (Image txt url cssClasses) = "<img src=\"" ++ getUrl url ++ "\"" ++ " alt=\"" ++ show txt ++ "\"" ++ maybe "" (\classes -> " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/>"
+  show (Figure txt url cssClasses) = "<figure><img src=\"" ++ getUrl url ++ "\" alt=\"" ++ show txt ++ "\"" ++ maybe "" (\classes -> " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
  show (Bold token) = "<b>" ++ show token ++ "</b>"
  show (Italic token) = "<i>" ++ show token ++ "</i>"
+  show (Superscript token) = "<sup>" ++ show token ++ "</sup>"
+  show (Subscript token) = "<sub>" ++ show token ++ "</sub>"
  show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
  show (Unit unit) = printf "%s" unit

+-- Pretty print the given token into a string.
+-- This is the same as calling 'show' for most tokens, but is different for paragraphs and tables,
+-- which have newlines inserted into them.
+prettyPrint :: MdToken -> String
+prettyPrint (Para token) = "<p>" ++ T.unpack (wrapText defaultWrapSettings 70 (T.pack $ prettyPrint token)) ++ "</p>\n"
+prettyPrint (Table (thead : tokenGrid)) = "<table>\n<thead>\n<tr>\n" ++ concatMap (\x -> "<th>" ++ rstrip (prettyPrint x) ++ "</th>\n") thead ++ "</tr>\n</thead>\n" ++ "<tbody>\n" ++ concatMap (\x -> "<tr>\n" ++ concatMap (\y -> "<td>" ++ rstrip (prettyPrint y) ++ "</td>\n") x ++ "</tr>\n") tokenGrid ++ "</tbody>\n</table>\n"
+prettyPrint Linebreak = "<br />\n"
+prettyPrint HorizontalRule = "<hr>\n"
+prettyPrint (Line tokens) = concatMap prettyPrint tokens
+prettyPrint (Document tokens) = concatMap prettyPrint tokens
+prettyPrint token = show token
+
+instance Semigroup MdToken where
+  a <> b = Document [a, b]
+
+instance Monoid MdToken where
+  mempty = Unit ""
+
 -- ---------------
 -- Helpers
-mustBeHash :: ReadP Char
-mustBeHash = satisfy (\x -> x == '#')
-
-leftmostLongest :: (Foldable t) => [(a, t b)] -> (a, t b)
+leftmostLongest :: (Foldable t) => [(a, t b)] -> Maybe (a, t b)
 leftmostLongest xs =
-  let lastElem = (last xs)
-      filteredLst = (filter (\val -> (length $ snd val) == (length $ snd lastElem)) xs)
-   in head filteredLst
+  let lastElem = last xs
+      filteredLst = filter (\val -> length (snd val) == length (snd lastElem)) xs
+   in case filteredLst of
+        [] -> Nothing
+        (x : xs) -> Just x

 -- Get the first parse returned by readP_to_S that consumed the most input
-leftmostLongestParse :: ReadP a -> String -> (a, String)
-leftmostLongestParse parser input = leftmostLongest $ readP_to_S parser input
+leftmostLongestParse :: (Monoid a) => Parser a -> String -> a
+leftmostLongestParse parser input =
+  case runParser parser "input" (T.pack input) of
+    (Left a) -> mempty
+    (Right a) -> a

-- Parse if the string that's left matches the string comparator function
-lookaheadParse :: (String -> Bool) -> ReadP Char
-lookaheadParse stringCmp = do
-  lookahead <- look
-  case stringCmp lookahead of
-    True -> get
-    False -> pfail
+specialChars = ">\n\\`*_{}[]#+|"

-lineToList :: MdToken -> [MdToken]
-lineToList (Line tokens) = tokens
+escapableChars = "-~!.$()" ++ specialChars
+
+-- Makes a parser greedy. Instead of returning all possible parses, only the longest one is returned.
+greedyParse :: Parser a -> Parser [a]
+greedyParse parser = do
+  greedyParse1 parser <|> return []
+
+-- Like greedyParse, but the parser must succeed atleast once.
+greedyParse1 :: Parser a -> Parser [a]
+greedyParse1 parser = do
+  parsed1 <- parser
+  parsed2 <- greedyParse1 parser <|> return []
+  return (parsed1 : parsed2)
+
+prepend :: [a] -> [a] -> [a]
+prepend x1 x2 = x1 ++ x2
+
+append :: [a] -> [a] -> [a]
+append x1 x2 = x2 ++ x1
+
+-- Parse until EOL or EOF
+parseTillEol :: Parser String
+parseTillEol = manyTill anySingle (void (char '\n') <|> eof)
+
+-- Takes a list of parsers. Returns a parser that will try them in
+-- order, moving to the next one only if the current one fails.
+fallthroughParser :: [Parser a] -> Parser a
+fallthroughParser [x] = try x
+fallthroughParser (x : xs) = try x <|> fallthroughParser xs
+
+escapeChar :: Char -> String
+escapeChar '>' = "&gt;"
+escapeChar '<' = "&lt;"
+escapeChar '&' = "&amp;"
+escapeChar x = [x]
+
+htmlEscapeChars :: T.Text -> T.Text
+htmlEscapeChars = T.concatMap (T.pack . escapeChar)
+
+-- -- Wraps a list of words after (at most) the given number of characters, trying to prevent word-breaks
+-- wordwrap :: Int -> String -> String
+-- wordwrap wraplength str = if (length str) < wraplength
+--     then str
+--     else
+--         let spaceIndex = lastgtSpaceIndex 0 (takeRev (length str) - wraplength str)
+--
+--     where
+--         takeRev n = (reverse . take n . reverse)
+--         lastSpaceIndex counter str = case str of
+--             [] -> counter
+--             x:xs -> if (isSpace x) counter else lastSpaceIndex counter+1 xs

 -- ---------------

 -- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
-parseHeader :: ReadP MdToken
+parseHeader :: Parser MdToken
 parseHeader = do
-  traceM "Reached parseHeader"
-  skipSpaces
-  headers <- many1 mustBeHash
+  space
+  headers <- greedyParse1 (char '#')
  when
-    ((length headers) > 6)
-    pfail
-  _ <- string " "
-  text <- munch1 (\x -> x /= '\n') -- Parse until EOL
-  let parsedText = fst $ leftmostLongestParse parseLine text
-  traceM (show parsedText)
-  traceM (show (length headers))
-  return (Header (length headers) parsedText)
+    (length headers > 6)
+    empty
+  space
+  parsedText <- manyTill parseLineToken (void (char '\n') <|> eof)
+  greedyParse (char '\n')
+  return (Header (length headers) (Line parsedText))
+
+asteriskBold = T.pack "**"
+
+underscoreBold = T.pack "__"

 -- Parse bold text
-parseBold :: ReadP MdToken
-parseBold = do
-  traceM "Reached parseBold"
-  text <-
-    choice
-      [ between (string "__") (string "__") (many1 (lookaheadParse (/= "__"))),
-        between (string "**") (string "**") (many1 (lookaheadParse (/= "**")))
-      ]
-  let parsedText = fst $ leftmostLongestParse parseLine text
-  return (Bold parsedText)
+parseBold :: Parser MdToken
+parseBold = parseBoldWith asteriskBold <|> parseBoldWith underscoreBold
+  where
+    parseBoldWith delim = do
+      string delim
+      inside <- someTill parseLineToken $ string delim
+      return (Bold (Line inside))

 -- Parse italic text
-parseItalic :: ReadP MdToken
-parseItalic = do
-  traceM "Reached parseItalic"
-  text <-
-    choice
-      [ (between (string "_") (string "_") (munch1 (/= '_'))),
-        (between (string "*") (string "*") (munch1 (/= '*')))
-      ]
-  let parsedText = fst $ leftmostLongestParse parseLine text
-  return (Italic parsedText)
+parseItalic :: Parser MdToken
+parseItalic = parseItalicWith '*' <|> parseItalicWith '_'
+  where
+    parseItalicWith delim = do
+      char delim
+      inside <- someTill parseLineToken (char delim)
+      return (Italic (Line inside))
+
+-- Parse subscript
+parseSubscript :: Parser MdToken
+parseSubscript = do
+  char '~'
+  inside <- someTill parseLineToken (char '~')
+  return (Subscript (Line inside))
+
+-- Parse superscript
+parseSuperscript :: Parser MdToken
+parseSuperscript = do
+  char '^'
+  inside <- someTill parseLineToken (char '^')
+  return (Superscript (Line inside))
+
+-- Parse strikethrough text
+parseStrikethrough :: Parser MdToken
+parseStrikethrough = do
+  string (T.pack "~~")
+  inside <- someTill parseLineToken $ string (T.pack "~~")
+  return (Strikethrough (Line inside))
+
+-- Parse code
+parseCode :: Parser MdToken
+parseCode = do
+  opening <- some $ char '`'
+  inside <- someTill (satisfy (/= '\n')) (char '`')
+  closing <- count (length opening - 1) (char '`')
+  return (Code (Unit (concatMap escapeChar inside)))
+
+-- Parse a link
+parseLink :: Parser MdToken
+parseLink = do
+  char '['
+  linkText <- manyTill parseLineToken (char ']')
+  char '('
+  linkURL <- manyTill anySingle (char ')')
+  return $ Link (Line linkText) (URL linkURL)

 -- Parse a linebreak character
-parseLinebreak :: ReadP MdToken
-parseLinebreak = do
-  traceM "Reached parseLinebreak"
+parseLinebreak :: Parser MdToken
+parseLinebreak = parseLinebreakSpace <|> parseLinebreakBackslash
+  where
+    parseLinebreakSpace = do
      char ' '
-  many1 (char ' ')
+      some (char ' ')
+      char '\n'
+      return Linebreak
+    parseLinebreakBackslash = try $ do
+      char '\\'
      char '\n'
      return Linebreak

-- Parse a regular string as a Unit.
-parseString :: ReadP MdToken
-parseString = do
-  traceM "Reached parseString"
-  firstChar <- get -- Must parse at least one character here
-  text <- munch (\x -> not (elem x "#*_[\n "))
-  return (Unit (firstChar : text))
+parseTableRow :: Parser [MdToken]
+parseTableRow = do
+  char '|'
+  row <- some (many (satisfy (\x -> x == ' ' || x == '\t')) *> someTill parseListLineToken (char '|'))
+  return (map Line row)

-lineParsers :: [ReadP MdToken]
-lineParsers = [parseLinebreak, parseBold, parseItalic, parseString] -- A 'line' doesn't include a 'header'
+parseTable :: Parser MdToken
+parseTable = do
+  tableHead <- parseTableRow
+  char '\n'
+  char '|'
+  sepEndBy1 (some (char '-')) (char '|') *> char '\n'
+  tableBody <- sepEndBy parseTableRow (char '\n')
+  many (char '\n') -- Parse trailing newlines, if any
+  return $ Table (tableHead : tableBody)
+
+parseSingleNewline :: Parser MdToken
+parseSingleNewline = do
+  char '\n'
+  remaining <- getInput
+  case T.unpack remaining of
+    [] -> return $ Unit ""
+    _ -> return SingleNewline
+
+parseCssClasses :: Parser [CssClass]
+parseCssClasses = do
+  char '{'
+  classes <- some parseCssClass
+  char '}'
+  return classes
+  where
+    parseCssClass :: Parser CssClass
+    parseCssClass = do
+      char '.'
+      let firstLetterParser = char '_' <|> char '-' <|> label "letter" (satisfy isAlpha)
+      cssClassFirstLetter <- firstLetterParser
+      cssClass <- many (firstLetterParser <|> label "digit" (satisfy isDigit))
+      space
+      return (cssClassFirstLetter : cssClass)
+
+parseImage :: Parser MdToken
+parseImage = do
+  char '!'
+  link <- parseLink
+  cssClasses <- optional $ try parseCssClasses
+  case link of
+    Link text path -> return $ Image text path cssClasses
+    _ -> empty -- This should never be reached
+
+parseFigure = do
+  img <- parseImage
+  void (string doubleNewlineText) <|> eof
+  case img of
+    Image text path cssClasses -> return $ Figure text path cssClasses
+    _ -> return img
+
+-- Parse an escaped character
+parseEscapedChar :: Parser MdToken
+parseEscapedChar = do
+  char '\\'
+  escapedChar <- choice (map char escapableChars) -- Parse any of the special chars.
+  return (Unit [escapedChar])
+
+-- Parse a character as a Unit.
+parseUnit :: Parser MdToken
+parseUnit = do
+  -- text <- satisfy (`notElem` specialChars)
+  text <- anySingle
+  return (Unit [text])
+
+-- Parse any character except a newline
+parseUnitExceptNewline :: Parser MdToken
+parseUnitExceptNewline = do
+  -- text <- satisfy (`notElem` specialChars)
+  text <- satisfy (/= '\n')
+  return (Unit [text])
+
+lineParsers :: [Parser MdToken]
+lineParsers =
+  [ parseLinebreak,
+    parseSingleNewline,
+    parseEscapedChar,
+    parseCode,
+    parseImage,
+    parseBold,
+    parseItalic,
+    parseStrikethrough,
+    parseSubscript,
+    parseSuperscript,
+    parseLink,
+    parseUnit
+  ] -- A 'line' doesn't include a 'header'
+
+lineParsersWithoutNewline :: [Parser MdToken]
+lineParsersWithoutNewline =
+  [ parseEscapedChar,
+    parseCode,
+    parseImage,
+    parseBold,
+    parseItalic,
+    parseStrikethrough,
+    parseSubscript,
+    parseSuperscript,
+    parseLink,
+    parseUnitExceptNewline
+  ] -- A list line cannot contain newlines.

 -- List of all parsers
-allParsers :: [ReadP MdToken]
+allParsers :: [Parser MdToken]
 allParsers = parseHeader : lineParsers

-- Parse any of the above tokens.
-parseLineToken :: ReadP MdToken
-parseLineToken = choice lineParsers
+-- Parse any of the line tokens.
+parseLineToken :: Parser MdToken
+parseLineToken = fallthroughParser lineParsers
+
+-- Parse any of the list line tokens.
+parseListLineToken :: Parser MdToken
+parseListLineToken = fallthroughParser lineParsersWithoutNewline

 -- Parse a line, consisting of one or more tokens.
-parseLine :: ReadP MdToken
+parseLine :: Parser MdToken
 parseLine = do
-  traceM "Reached parseLine"
-  skipSpaces
+  space
  -- Fail if we have reached the end of the document.
-  remaining <- look
-  when (null remaining) pfail
-  parsed <- parseMany parseLineToken
-  --         traceM $ show parsed
+  parsed <- manyTill parseLineToken eof
  return (Line parsed)

 -- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
-- As a weird special case, a 'Paragraph' can also be a 'Header'.
-parsePara :: ReadP MdToken
+parsePara :: Parser MdToken
 parsePara = do
-  traceM "Reached parsePara"
-  parseMany (char '\n')
+  space
  --  text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline.
  --  string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF.
-  text <- (manyTill get ((string "\n\n") <|> (eof >> return "")))
-  when (null text) pfail
-  let parsedText = fst $ leftmostLongestParse (parseHeader <|> parseLine) text -- Parse either a line or a header.
-  traceM (show parsedText)
-  -- If the paragraph is a header, return a Header token. Otheriwse return a Para token.
-  case parsedText of
-    Header level token -> return (Header level token)
-    _ -> return (Para parsedText)
+  parsedText <- someTill parseLineToken (try paraEnding)
+  many (char '\n')
+  return (Para (Line parsedText))
+  where
+    paraEnding = void (char '\n' *> (char '\n' <|> lookAhead (char '>'))) <|> eof
+
+-- Parse a line starting with '>', return the line except for the '>'.
+parseQuotedLine :: Parser String
+parseQuotedLine = do
+  char '>'
+  many (char ' ' <|> char '\t')
+  restOfLine <- many (satisfy (/= '\n'))
+  void (char '\n') <|> eof
+  return restOfLine
+
+-- Parse many 'quoted lines' until I see a non-quoted line.
+parseQuotedLines :: Parser [String]
+parseQuotedLines = some parseQuotedLine
+
+-- some $ do
+--   getInput >>= \line ->
+--     case T.unpack line of
+--       ('>' : _) -> parseQuotedLine
+--       _ -> empty
+
+-- Parse a blockquote, which is a greater-than sign followed by a paragraph.
+parseBlockquote :: Parser MdToken
+parseBlockquote = do
+  quotedLines <- parseQuotedLines
+  -- remaining <- look
+  -- let quotedLines = fst $ leftmostLongestParse parseQuotedLines remaining
+  -- string (init $ unlines quotedLines)
+  let parsedQuotedLines = leftmostLongestParse (some (parseBlockquote <|> parsePara)) (init $ unlines quotedLines) -- unlines joins the lines together with a newline, and adds a trailing newline. init removes the trailing newline.
+  return (Blockquote parsedQuotedLines)
+
+-- Parse a checkbox
+parseCheckbox :: Parser MdToken
+parseCheckbox = do
+  char '['
+  inside <- char ' ' <|> char 'x'
+  char ']'
+  space
+  return (if inside == 'x' then Checkbox True else Checkbox False)
+
+-- Parse a nested list item.
+parseListNested :: Parser MdToken
+parseListNested = do
+  let firstCharParser = (<>) <$> (string (T.pack "  ") <|> string (T.pack "\t")) <*> (T.pack <$> many (char ' '))
+  let restOfLineParser = manyTill anySingle (void (char '\n') <|> eof)
+  -- For the first line, I manually run firstCharParser and restOfLineParser. The
+  -- result of firstCharParser is saved. For every subsequent line, I parse exactly
+  -- the same string as firstCharParser.
+  firstLineSpaces <- firstCharParser
+  firstLine <- restOfLineParser
+  lines <- greedyParse (string firstLineSpaces *> restOfLineParser)
+  let allLines = firstLine : lines
+  let linesParsed = leftmostLongestParse (parseUnorderedList <|> parseOrderedList) (init $ unlines allLines)
+  when (null (show linesParsed)) empty
+  return linesParsed
+
+-- Parse an unordered list line item.
+parseUListLineItem :: Parser MdToken
+parseUListLineItem = do
+  firstChar <- choice (map char ['*', '+', '-'])
+  char ' ' -- At least one space between list indicator and list text.
+  parseListLineItemCommon
+
+-- Parse an ordered list line item.
+parseOListLineItem :: Parser MdToken
+parseOListLineItem = do
+  num <- greedyParse1 (satisfy isDigit)
+  char '.'
+  char ' ' -- At least one space between list indicator and list text.
+  parseListLineItemCommon
+
+-- Common code for parsing list line items
+parseListLineItemCommon :: Parser MdToken
+parseListLineItemCommon = do
+  space
+  checkbox <- optional $ try parseCheckbox
+  restOfLine <- manyTill parseListLineToken (void (char '\n') <|> eof)
+  nestedList <- try parseListNested <|> return (Unit "")
+  case checkbox of
+    Just box -> return $ Line [box, Line restOfLine, nestedList]
+    Nothing -> return $ Line [Line restOfLine, nestedList]
+
+-- Parse an unordered list paragraph item.
+parseUListParaItem :: Parser MdToken
+parseUListParaItem = do
+  firstLine <- parseUListLineItem
+  res <- parseListParaItemCommon
+  return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
+
+-- Parse an unordered list paragraph item.
+parseOListParaItem :: Parser MdToken
+parseOListParaItem = do
+  firstLine <- parseOListLineItem
+  res <- parseListParaItemCommon
+  return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
+
+-- Common code for parsing list paragraph items.
+-- A list paragraph item is defined as a line item, followed by an empty line, followed by one or more
+-- lines indented by a space or tab.
+-- A list paragraph item can also be a blockquote.
+parseListParaItemCommon :: Parser [MdToken]
+parseListParaItemCommon = do
+  char '\n'
+  lines <- greedyParse1 ((string (T.pack "    ") <|> string (T.pack "\t")) *> parseTillEol)
+  let res = leftmostLongestParse (greedyParse1 parseBlockquote <|> greedyParse1 parsePara) (init $ unlines lines)
+  char '\n'
+  return res -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
+
+-- Parse an unordered list item, which can be a line item or another list.
+parseUListItem :: Parser MdToken
+parseUListItem = space *> (try parseUListParaItem <|> parseUListLineItem)
+
+-- Parse an unordered list.
+parseUnorderedList :: Parser MdToken
+parseUnorderedList = do
+  lineItems <- some $ try parseUListItem
+  void (char '\n') <|> eof -- A list must end in an extra newline or eof
+  return $ UnordList lineItems
+
+-- --------
+
+parseOListItem :: Parser MdToken
+parseOListItem = space *> (try parseOListParaItem <|> parseOListLineItem)
+
+-- Parses the first element of an ordered list, which must start with '1.'
+parseFirstOListItem :: Parser MdToken
+parseFirstOListItem = do
+  space
+  remaining <- getInput
+  when (take 2 (T.unpack remaining) /= "1.") empty
+  parseOListLineItem
+
+parseOrderedList :: Parser MdToken
+parseOrderedList = do
+  firstLine <- try parseFirstOListItem
+  lineItems <- many $ try parseOListItem
+  void (char '\n') <|> eof
+  return $ OrdList (firstLine : lineItems)
+
+horizontalRuleText :: T.Text
+horizontalRuleText = T.pack "---"
+
+doubleNewlineText :: T.Text
+doubleNewlineText = T.pack "\n\n"
+
+parseHorizontalRule :: Parser MdToken
+parseHorizontalRule = parseHorizontalRuleLine *> (void (string doubleNewlineText) <|> eof) *> return HorizontalRule
+  where
+    parseHorizontalRuleLine = fallthroughParser (map (string . T.pack) ["---", "***", "___", "- - -", "* * *", "_ _ _"])
+
+parseCodeblock :: Parser MdToken
+parseCodeblock = do
+  string (T.pack "```")
+  _ <- many $ satisfy (/= '\n') -- Language name
+  char '\n'
+  inside <- someTill anySingle (string (T.pack "\n```"))
+  return $ Codeblock (Unit (concatMap escapeChar inside))
+
+documentParsers :: [Parser MdToken]
+documentParsers =
+  [ parseHorizontalRule,
+    parseCodeblock,
+    parseTable,
+    parseHeader,
+    parseBlockquote,
+    parseUnorderedList,
+    parseOrderedList,
+    parseFigure,
+    parsePara
+  ]

 -- Parse a document, which is multiple paragraphs.
-parseDocument :: ReadP MdToken
-parseDocument = (many1 parsePara) >>= (\res -> return (Document (res)))
+parseDocument :: Parser MdToken
+parseDocument = do
+  --  res <- manyTill (fallthroughParser documentParsers <|> (char '\n' *> return $ Unit "")) eof
+  res <- sepEndBy (fallthroughParser documentParsers) (many $ char '\n')
+  -- many $ char '\n'
+  eof
+  return (Document res)
--- a/src/MdToHtmlTest.hs
+++ b/src/MdToHtmlTest.hs
@@ -0,0 +1,203 @@
+module MdToHtmlTest where
+
+import MdToHTML
+import Test.HUnit
+
+check_equal :: String -> String -> String -> Test
+check_equal desc expected actual = TestCase (assertEqual desc expected actual)
+
+convert :: String -> String
+convert md = show $ leftmostLongestParse parseDocument md
+
+headerTests =
+  TestList
+    [ check_equal "Should convert H1 heading" "<h1>Hello</h1>" (convert "# Hello"),
+      check_equal "Should convert H2 heading" "<h2>Hello</h2>" (convert "## Hello"),
+      check_equal "Should convert H3 heading" "<h3>Hello</h3>" (convert "### Hello"),
+      check_equal "Should convert H4 heading" "<h4>Hello</h4>" (convert "#### Hello"),
+      check_equal "Should convert H5 heading" "<h5>Hello</h5>" (convert "##### Hello"),
+      check_equal "Should convert H6 heading" "<h6>Hello</h6>" (convert "###### Hello")
+    ]
+
+boldTests =
+  TestList
+    [ check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
+      check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
+      check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day"),
+      check_equal "Should convert nested bold and italic" "<p><b>Bold then <i>Italic</i></b></p>" (convert "**Bold then _Italic_**"),
+      check_equal "Should convert nested bold and italic" "<p><i>Italic then <b>Bold</b></i></p>" (convert "*Italic then __Bold__*")
+    ]
+
+strikethroughTests =
+  TestList
+    [ check_equal "Should convert strikethrough" "<p><s>Hello</s></p>" (convert "~~Hello~~"),
+      check_equal "Should convert long sentence with tilde" "<p><s>The universe is ~7 days old</s>. The universe is 13 billion years old.</p>" (convert "~~The universe is \\~7 days old~~. The universe is 13 billion years old.")
+    ]
+
+linkTests =
+  TestList
+    [ check_equal "Should convert normal link" "<p><a href=\"https://example.com\">This is an example link.</a></p>" (convert "[This is an example link.](https://example.com)"),
+      check_equal "Should convert styled link" "<p><a href=\"https://example.com\"><b>Fancy</b>!!!</a></p>" (convert "[__Fancy__!!!](https://example.com)")
+    ]
+
+escapedCharTests =
+  TestList
+    [ check_equal "Should print literal underscore" "<p>This is an underscore - _</p>" (convert "This is an underscore - \\_"),
+      check_equal "Should print literal asterisk" "<p>This is an asterisk - *</p>" (convert "This is an asterisk - \\*"),
+      check_equal "Should print literal asterisk in bold" "<p>This is a bolded asterisk - <b>*</b></p>" (convert "This is a bolded asterisk - **\\***")
+    ]
+
+blockquoteTests =
+  TestList
+    [ check_equal "Should wrap para in blockquote" "<blockquote><p>What a <b>truly</b> <i>lovely</i> day!!!</p></blockquote>" (convert "> What a __truly__ _lovely_ day!!!"),
+      check_equal "Simple nested blockquotes" "<blockquote><p>Hello</p><blockquote><p>World</p></blockquote></blockquote>" (convert "> Hello\n>\n>> World"),
+      check_equal
+        "Nested blockquotes"
+        "<blockquote><p>Dorothy followed her through many \
+        \of the beautiful rooms in her castle.</p><blockquote><p>The Witch \
+        \bade her clean the pots and kettles and sweep the floor and keep the fire \
+        \fed with wood.</p></blockquote></blockquote>"
+        ( convert
+            "> Dorothy followed her through many of the \
+            \beautiful rooms in her castle.\n> \n>> The Witch bade her \
+            \clean the pots and kettles and sweep the floor and keep the fire fed with wood."
+        )
+    ]
+
+unorderedListTests =
+  TestList
+    [ check_equal "Basic unordered list" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>" (convert "* Item 1\n* Item 2\n* Item 3"),
+      check_equal "Mixing list indicators" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>" (convert "* Item 1\n+ Item 2\n- Item 3"),
+      check_equal "Formatted lists" "<ul><li><b>Item 1</b></li><li><i>Item 2</i></li><li><b><i>Item 3</i></b></li></ul>" (convert "* __Item 1__\n+ _Item 2_\n- ***Item 3***"),
+      check_equal "Nested list" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3<ul><li>Subitem 1</li><li>Subitem 2</li></ul></li></ul>" (convert "* Item 1\n* Item 2\n* Item 3\n    * Subitem 1\n    * Subitem 2"),
+      check_equal "Paragraph in list" "<ul><li>Item 1</li><li><p>Item 2</p><p>More stuff</p></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n\n    More stuff\n\n- Item 3"),
+      check_equal "Paragraph before list" "<p>This is a list</p><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "This is a list\n\n* Item 1\n* Item 2"),
+      check_equal "Paragraph before list" "<h3>This is a list</h3><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "### This is a list\n\n* Item 1\n* Item 2"),
+      check_equal "Nested list then back" "<ul><li>Item 1</li><li>Item 2<ul><li>Item 3</li><li>Item 4</li></ul></li><li>Item 5</li></ul>" (convert "- Item 1\n- Item 2\n    - Item 3\n    - Item 4\n- Item 5"),
+      check_equal "Triply nested list" "<ul><li>Item 1</li><li>Item 2<ul><li>Item 3<ul><li>Item 4</li></ul></li></ul></li><li>Item 5</li></ul>" (convert "- Item 1\n- Item 2\n    - Item 3\n      - Item 4\n- Item 5"),
+      check_equal "Blockquote in list" "<ul><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n\n    > Quote\n\n- Item 3"),
+      check_equal "Ordered list in unordered list" "<ul><li>Item 1</li><li>Item 2<ol><li>Item 1</li><li>Item 2</li></ol></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n    1. Item 1\n    2. Item 2\n- Item 3"),
+      check_equal
+        "Checkbox in unordered list"
+        "<ul>\
+        \<li><input type=\"checkbox\" />Not checked</li>\
+        \<li><input type=\"checkbox\" checked=\"\" />Checked</li>\
+        \<li>Normal list item</li></ul>"
+        (convert "- [ ]     Not checked\n- [x]   Checked\n- Normal list item"),
+      check_equal "List with link at the start" "<ul><li><a href=\"b\">a</a></li><li><a href=\"d\">c</a></li></ul>" (convert "- [a](b)\n- [c](d)")
+    ]
+
+orderedListTests =
+  TestList
+    [ check_equal "Basic ordered list" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n3. Item 3"),
+      check_equal "Mixing list numbering" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3</li></ol>" (convert "1. Item 1\n3. Item 2\n2. Item 3"),
+      check_equal "Should not convert list without number 1" "<p>2. Item 1 1. Item 2</p>" (convert "2. Item 1\n1. Item 2"),
+      check_equal "Formatted lists" "<ol><li><b>Item 1</b></li><li><i>Item 2</i></li><li><b><i>Item 3</i></b></li></ol>" (convert "1. __Item 1__\n2. _Item 2_\n3. ***Item 3***"),
+      check_equal "Nested list" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3<ol><li>Subitem 1</li><li>Subitem 2</li></ol></li></ol>" (convert "1. Item 1\n2. Item 2\n3. Item 3\n    1. Subitem 1\n    2. Subitem 2"),
+      check_equal "Paragraph in list" "<ol><li>Item 1</li><li><p>Item 2</p><p>More stuff</p></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n    More stuff\n\n1. Item 3"),
+      check_equal "Paragraph before list" "<p>This is a list</p><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "This is a list\n\n1. Item 1\n1. Item 2"),
+      check_equal "Paragraph before list" "<h3>This is a list</h3><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "### This is a list\n\n1. Item 1\n200. Item 2"),
+      check_equal "Nested list then back" "<ol><li>Item 1</li><li>Item 2<ol><li>Item 3</li><li>Item 4</li></ol></li><li>Item 5</li></ol>" (convert "1. Item 1\n2. Item 2\n    1. Item 3\n    3. Item 4\n5. Item 5"),
+      check_equal "Blockquote in list" "<ol><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n    > Quote\n\n3. Item 3"),
+      check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n    - Item 1\n    * Item 2\n4. Item 3"),
+      check_equal "List with just 1 item" "<ol><li>Item 1</li></ol>" (convert "1. Item 1"),
+      check_equal
+        "Checkbox in ordered list"
+        "<ol>\
+        \<li><input type=\"checkbox\" />Not checked</li>\
+        \<li><input type=\"checkbox\" checked=\"\" />Checked</li>\
+        \<li>Normal list item</li></ol>"
+        (convert "1. [ ]     Not checked\n2. [x]   Checked\n3. Normal list item")
+    ]
+
+htmlTests =
+  TestList
+    [check_equal "Convert HTML element" "<p><center>a</center></p>" (convert "<center>a</center>")]
+
+codeTests =
+  TestList
+    [ check_equal "Code by itself" "<p><code>Hello world!</code></p>" (convert "`Hello world!`"),
+      check_equal "Code in a paragraph" "<p>The following <code>text</code> is code</p>" (convert "The following `text` is code"),
+      check_equal "Code across paragraphs (shouldn't work)" "<p>`Incomplete</p><p>Code`</p>" (convert "`Incomplete\n\nCode`"), -- At the moment, this is just treated as a syntax error, so nothing is rendered.
+      check_equal "Code block" "<pre><code>Test code block</code></pre>" (convert "```\nTest code block\n```"),
+      check_equal "Multiple code blocks" "<pre><code>Test code block</code></pre><pre><code>Next block</code></pre>" (convert "```\nTest code block\n```\n\n```\nNext block\n```")
+    ]
+
+imageTests =
+  TestList
+    [ check_equal "Image with text" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\"/></p>" (convert "This is an image ![Image 1](img.png)"),
+      check_equal "Image with classes" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\" class=\"new-img\"/></p>" (convert "This is an image ![Image 1](img.png){.new-img}")
+    ]
+
+figureTests =
+  TestList
+    [ check_equal "Image by itself" "<figure><img src=\"img.png\" alt=\"Image 1\"/><figcaption aria-hidden=\"true\">Image 1</figcaption></figure>" (convert "![Image 1](img.png)")
+    ]
+
+horizontalRuleTests =
+  TestList
+    [check_equal "Horizontal Rule" "<p>a</p><hr><p>b</p>" (convert "a\n\n---\n\nb")]
+
+subscriptTests =
+  TestList
+    [check_equal "Should convert subscript" "A<sub>b</sub>" (convert "A~b~")]
+
+superscriptTests =
+  TestList
+    [check_equal "Should convert superscript" "A<sup>b</sup>" (convert "A^b^")]
+
+tableTests =
+  TestList
+    [ check_equal
+        "Basic table"
+        "<table>\
+        \<thead><tr><th>Col 1</th><th>Col 2</th><th>Col 3</th></tr></thead>\
+        \<tbody><tr><td>Data 1</td><td>Data 2</td><td>Data 3</td></tr>\
+        \<tr><td>More Data 1</td><td>More Data 2</td><td>More Data 3</td></tr></tbody></table>"
+        (convert "| Col 1 | Col 2 | Col 3 |\n|---|---|---|\n| Data 1 | Data 2 | Data 3 |\n| More Data 1 | More Data 2 | More Data 3 |")
+    ]
+
+integrationTests =
+  TestList
+    [ check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n  ## __Second__ _Heading_"),
+      check_equal "Integration 2" "<p><b>Hello</b> <i>World</i></p>" (convert "__Hello__\n_World_"),
+      check_equal "Integration 3" "<h1>Hello</h1><p>World</p>" (convert "# Hello\nWorld"),
+      check_equal "Integration 4" "<p>a b</p>" (convert "a\nb"),
+      check_equal "Integration 5" "<h1>Hello</h1>" (convert "# Hello\n"),
+      check_equal "Integration 6" "<p>First line<br />Second line</p>" (convert "First line  \nSecond line"),
+      check_equal
+        "Integration 7"
+        "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2>Second \
+        \Heading</h2><ul><li>Unordered lists, and:<ol><li>One</li><li>Two</li><li>\
+        \Three</li></ol></li><li>More</li></ul><blockquote><p>Blockquote</p>\
+        \</blockquote><p>And <b>bold</b>, <i>italics</i>, and even <i>italics \
+        \and later <b>bold</b></i>. Even <s>strikethrough</s>. \
+        \<a href=\"https://markdowntohtml.com\">A link</a> to somewhere.</p>"
+        ( convert
+            "# Sample Markdown\n\nThis is some basic, sample markdown.\n\n## Second \
+            \Heading\n\n- Unordered lists, and:\n    1. One\n    2. Two\n    3. Three\n\
+            \- More\n\n> Blockquote\n\nAnd **bold**, *italics*, and even *italics and \
+            \later __bold__*. Even ~~strikethrough~~. [A link](https://markdowntohtml.com) to somewhere."
+        )
+    ]
+
+tests =
+  TestList
+    [ headerTests,
+      boldTests,
+      strikethroughTests,
+      linkTests,
+      escapedCharTests,
+      blockquoteTests,
+      unorderedListTests,
+      orderedListTests,
+      imageTests,
+      htmlTests,
+      figureTests,
+      codeTests,
+      horizontalRuleTests,
+      tableTests,
+      integrationTests
+    ]
+
+runTests = runTestTT tests
--- a/src/Test.hs
+++ b/src/Test.hs
@@ -1,44 +0,0 @@
-module MdToHtmlTest where
-
-import MdToHTML
-import Test.HUnit
-
-check_equal :: String -> String -> String -> Test
-check_equal desc expected actual = TestCase (assertEqual desc expected actual)
-
-convert :: String -> String
-convert md = show . fst $ leftmostLongestParse parseDocument md
-
-headerTests = TestList
-      [
-         check_equal "Should convert H1 heading" "<h1>Hello</h1>" (convert "# Hello"),
-         check_equal "Should convert H2 heading" "<h2>Hello</h2>" (convert "## Hello"),
-         check_equal "Should convert H3 heading" "<h3>Hello</h3>" (convert "### Hello"),
-         check_equal "Should convert H4 heading" "<h4>Hello</h4>" (convert "#### Hello"),
-         check_equal "Should convert H5 heading" "<h5>Hello</h5>" (convert "##### Hello"),
-         check_equal "Should convert H6 heading" "<h6>Hello</h6>" (convert "###### Hello")
-      ]
-
-boldTests = TestList
-   [
-         check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
-         check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
-         check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day")
-   ]
-
-integrationTests = TestList
-    [
-        check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n  ## __Second__ _Heading_"),
-        check_equal "Integration 2" "<p><b>Hello</b> <i>World</i></p>" (convert "__Hello__\n_World_"),
-        check_equal "Integration 3" "<h1>Hello</h1><p>WorldM/p>" (convert "# Hello\nWorld")
-    ]
-
-
-tests = TestList
-   [
-      headerTests,
-      boldTests,
-      integrationTests
-   ]
-
-runTests = runTestTT tests
--- a/test.hs
+++ b/test.hs
@@ -1,5 +0,0 @@
-leftmostLongest :: (Foldable t) => [t a] -> t a
-leftmostLongest xs = 
-        let lastElem = (last xs)
-            filteredLst = (filter (\val -> (length val) == (length lastElem)) xs)
-        in head filteredLst
Author	SHA1	Message	Date
Aadhavan Srinivasan	e5795e0d75	Added more tests	2025-07-30 15:34:43 -04:00
Aadhavan Srinivasan	eae897a2d6	Improve document parser to parse extra newlines	2025-07-30 15:32:35 -04:00
Aadhavan Srinivasan	8152b89a23	Fix bug in fallthrough parser	2025-07-30 15:29:25 -04:00
Aadhavan Srinivasan	9d3d656065	Improved parser for horizontal rule and codeblock	2025-07-30 15:29:03 -04:00
Aadhavan Srinivasan	d4a550f6a7	Added token and parser for list checkbox	2025-07-30 15:28:35 -04:00
Aadhavan Srinivasan	2b21aeae89	Added implementation and tests for subscript and superscript; fixed nested list parsing	2025-07-22 09:56:41 -04:00
Aadhavan Srinivasan	ca328a464a	Add prettyPrint definition for horizontal rule	2025-07-15 10:51:10 -04:00
Aadhavan Srinivasan	7d45b1123f	Renamed md-to-html-runner to mdtoh	2025-06-18 15:21:54 -04:00
Aadhavan Srinivasan	9627abcd12	Updated test	2025-06-10 14:14:56 -04:00
Aadhavan Srinivasan	82277e9ea8	Only add newlines for linebreak when pretty printing	2025-06-10 14:14:26 -04:00
Aadhavan Srinivasan	d074b0131c	Parse linebreaks as a backslash before a newline	2025-06-10 14:10:45 -04:00
Aadhavan Srinivasan	57cb3e68fa	Import module for word wrapping; add package to cabal file	2025-06-10 14:02:20 -04:00
Aadhavan Srinivasan	4e9f84c2bb	Add function to pretty print; commented out my word wrap and use a built-in one instead	2025-06-10 14:01:52 -04:00
Aadhavan Srinivasan	e025614324	Print extra newline if output text doesn't include a newline	2025-06-10 09:13:40 -04:00
Aadhavan Srinivasan	e711444066	Add more packages to cabal file	2025-06-10 09:13:15 -04:00
Aadhavan Srinivasan	6b99a1835d	Created a separate parser list for all parsers (except the unit parser is replaced with the non-newline unit parser); use that parser when parsing list lines	2025-06-10 09:12:42 -04:00
Aadhavan Srinivasan	04167e0f96	Parse CSS classes in image and figure	2025-06-10 09:11:56 -04:00
Aadhavan Srinivasan	0528e813c5	Parser for CSS classes	2025-06-10 09:11:34 -04:00
Aadhavan Srinivasan	b1b99189c9	Link tect can be empty; inline code cannot be empty and can have nested backticks; created a unit parser for all characters except newline	2025-06-10 09:11:19 -04:00
Aadhavan Srinivasan	ade3768e29	Try and backtrack	2025-06-10 09:10:10 -04:00
Aadhavan Srinivasan	fd6d39ecd6	Parse space at beginning of list	2025-06-10 09:09:53 -04:00
Aadhavan Srinivasan	0f04342867	More trying and backtracking; parse and discard extraneous spaces at beginning of list	2025-06-10 09:09:31 -04:00
Aadhavan Srinivasan	80ef93bbc9	Try parsing an ordered list item, backtrack if not possible	2025-06-10 09:08:52 -04:00
Aadhavan Srinivasan	b73d4131b6	Added support for tables and codeblocks Defined the types, defined 'show', created the parsers, added them to parser list	2025-06-10 09:08:30 -04:00
Aadhavan Srinivasan	c48b8c5ae8	Images and figures now support CSS classes	2025-06-10 09:05:31 -04:00
Aadhavan Srinivasan	cf4282b26e	More imports	2025-06-10 09:04:28 -04:00
Aadhavan Srinivasan	7b40d6fe7c	Imports	2025-06-10 09:04:18 -04:00
Aadhavan Srinivasan	c4255d4578	Added a test for a list with just one item	2025-06-10 09:04:02 -04:00
Aadhavan Srinivasan	dcbbff13cb	Spacing change	2025-06-10 09:03:48 -04:00
Aadhavan Srinivasan	592fad2b46	Added tests for tables	2025-06-10 09:03:32 -04:00
Aadhavan Srinivasan	b8ba27f240	Strip newlines when comparing in test	2025-06-03 15:37:27 -04:00
Aadhavan Srinivasan	bb08b40512	Replaced nested bold with asterisks, with asterisks and underscores	2025-06-03 15:37:12 -04:00
Aadhavan Srinivasan	93548a4533	Never mind, doesn't seem to work well	2025-06-03 15:31:13 -04:00
Aadhavan Srinivasan	160cb0edeb	Trying to get nested bold and italic to work	2025-06-03 15:30:35 -04:00
Aadhavan Srinivasan	2893fa25e6	Include new packages	2025-06-03 11:20:02 -04:00
Aadhavan Srinivasan	324e5da82d	Use new definition for lefmostLongestParse	2025-06-03 11:19:42 -04:00
Aadhavan Srinivasan	05e5548aa9	Huge rewrite - use megaparsec instead of readP	2025-06-03 11:19:19 -04:00
Aadhavan Srinivasan	1915628a2b	Used 'in-order' parsing for headers, instead of leftmostLongestParse	2025-05-27 14:12:13 -04:00
Aadhavan Srinivasan	1d9ac86a2a	Implementation and test for horizontal rule	2025-05-27 13:41:38 -04:00
Aadhavan Srinivasan	0320402957	Fixed how trailing newlines are parsed	2025-05-27 09:28:19 -04:00
Aadhavan Srinivasan	8696a185a7	Rewrote readLines function to be more concise	2025-05-27 08:55:10 -04:00
Aadhavan Srinivasan	da38ac226f	Added figure implementation and tests	2025-05-27 08:54:52 -04:00
Aadhavan Srinivasan	1fcce32ef6	Updated to read from stdin/file	2025-05-23 19:03:23 -04:00
Aadhavan Srinivasan	e50081614a	Removed section from cabal file	2025-05-23 19:03:14 -04:00
Aadhavan Srinivasan	b98a8cc44f	Added image tests	2025-05-23 14:55:58 -04:00
Aadhavan Srinivasan	90c7a585d2	Added image parser	2025-05-23 14:04:37 -04:00
Aadhavan Srinivasan	4a15330874	Updated showing of image	2025-05-23 14:04:21 -04:00
Aadhavan Srinivasan	c14112d3e4	Updated special and escapable characters	2025-05-23 14:04:05 -04:00
Aadhavan Srinivasan	ed7d2c1ef1	Added code parser to list line parser list	2025-05-20 22:23:41 -04:00
Aadhavan Srinivasan	eb20f154a4	Removed unused function	2025-05-20 22:20:36 -04:00
Aadhavan Srinivasan	172985131b	Fixed HTML rendering for horizontal rule	2025-05-20 22:20:23 -04:00
Aadhavan Srinivasan	3781e67ab1	Created a separate list of escapable chars	2025-05-20 22:20:05 -04:00
Aadhavan Srinivasan	f2d54edd3f	Implemented inline code parsing	2025-05-20 22:19:44 -04:00
Aadhavan Srinivasan	5393dc4eb9	Added code tests	2025-05-20 22:18:46 -04:00
Aadhavan Srinivasan	e051c87f08	Factor list line common parsing into a separate function; refactored OList and UList line parsing to us it	2025-05-20 16:48:31 -04:00
Aadhavan Srinivasan	9b1c51897c	A nested list can be ordered or unordered	2025-05-20 16:47:58 -04:00
Aadhavan Srinivasan	2a3dddc7b0	Rename function	2025-05-20 16:47:31 -04:00
Aadhavan Srinivasan	a8793b5adb	Remove obsolete comments	2025-05-20 16:47:16 -04:00
Aadhavan Srinivasan	eecec764ad	A list can contain a blockquote as well.	2025-05-20 16:47:04 -04:00
Aadhavan Srinivasan	cdca6ea95e	A list item doesn't need to have a nested list item parser, because the line item parser handles the nested list.	2025-05-20 16:46:52 -04:00
Aadhavan Srinivasan	540b5430e5	Use list of document parsers in parseDocument	2025-05-20 16:45:56 -04:00
Aadhavan Srinivasan	00dfba81eb	Created a list of document parsers; implemented ordered list parsing.	2025-05-20 16:45:37 -04:00
Aadhavan Srinivasan	39152c0034	Factor out common code for UList and OList parsing into a separate function. Refactored UList function; wrote OList function.	2025-05-20 16:45:07 -04:00
Aadhavan Srinivasan	41b35be7c9	Rename function	2025-05-20 16:43:35 -04:00
Aadhavan Srinivasan	d2c8565f62	Import Data.Char for isDigit	2025-05-20 16:43:22 -04:00
Aadhavan Srinivasan	62eeef2abb	Removed unused functions	2025-05-20 16:43:10 -04:00
Aadhavan Srinivasan	9c6634cfec	Added ordered list tests	2025-05-20 16:42:40 -04:00
Aadhavan Srinivasan	2a5a68b1de	Fixed test name	2025-05-20 16:42:12 -04:00
Aadhavan Srinivasan	f8e1a98bdf	Remove obsolete comment	2025-05-20 14:03:30 -04:00
Aadhavan Srinivasan	05433c31f1	Remove unused functions	2025-05-20 14:02:50 -04:00
Rockingcool	5c871f2b25	Removed test file	2025-05-20 12:25:07 -05:00
Rockingcool	5273c99e6e	Added unordered list tests and integration tests; added strikethrough tests to test list	2025-05-20 12:24:20 -05:00
Rockingcool	50888c9c3d	Added bold and strikethrough tests	2025-05-20 12:23:47 -05:00
Rockingcool	45115c765c	An unordered list must end in a blank line.	2025-05-20 12:23:27 -05:00
Rockingcool	5b0d42fd2d	Use the in-order parsing approach instead of the post-order one.	2025-05-20 12:23:13 -05:00
Rockingcool	2a585d00f2	Enforce at least one space between list indicator and list text.	2025-05-20 12:22:37 -05:00
Rockingcool	11a3b14cb1	Define a parser for list line tokens, update line token parser	2025-05-20 12:22:19 -05:00
Rockingcool	58d3142855	Update comment	2025-05-20 12:21:45 -05:00
Rockingcool	0fb651fffc	Add parseUnit instead of parseString to lineParsers	2025-05-20 12:20:51 -05:00
Rockingcool	bc05dede06	Create a list of parsers that are used for list line items.	2025-05-20 12:20:35 -05:00
Rockingcool	b69e34f823	Parse a single character as a unit	2025-05-20 12:20:10 -05:00
Rockingcool	2514ecdafc	Parse bold, italic and strikethrough in-order, instead of trying to find the end, then parsing everything in the middle. The current approach parses the opening bold (or italic), some text, then the closing bold (or italic), instead of parsing the opening, closing, then everything in between.	2025-05-20 12:19:59 -05:00
Rockingcool	c52d5556a2	Allow for multiple blank lines after header	2025-05-20 12:18:52 -05:00
Rockingcool	5fc1b1122a	Create a function to 'fallthrough parse' ie. try the second parser only if the first one fails.	2025-05-20 12:18:23 -05:00
Rockingcool	83dd0024c4	Space doesn't have to be a reserved character anymore.	2025-05-20 12:17:59 -05:00
Rockingcool	70761649ad	Derive Eq for defined types	2025-05-20 12:17:43 -05:00
Rockingcool	b9c6cc4470	Implemented strikethrough parser	2025-05-16 19:29:39 -05:00
Rockingcool	23691f9cfe	Add strikethrough parser to line parser list	2025-05-16 19:28:41 -05:00
Rockingcool	8c220cc800	A document can consist of unordered lists as well	2025-05-14 21:40:03 -05:00
Rockingcool	ee453c0259	Fixed blockquote parser; implemented unordered list parser and relevant sub-parsers	2025-05-14 21:39:27 -05:00
Rockingcool	c90d23617a	A blockquote must have a list of tokens	2025-05-14 21:38:50 -05:00
Rockingcool	c574699a8a	Added an import	2025-05-14 21:38:38 -05:00
Rockingcool	f55e160e25	Added tests for unordered lists	2025-05-14 21:37:58 -05:00
Rockingcool	dddcca0185	Wrote a lot of helper functions - most importantly greedyParse	2025-05-13 21:40:42 -05:00
Rockingcool	e7d94f225a	Updated show definitions for some tokens; add HTML syntax	2025-05-09 23:14:09 -05:00
Rockingcool	e8eb22f3ae	Added pragmas to suppress LSP warnings; removed a random line that was added by the LSP	2025-05-09 23:13:31 -05:00
Rockingcool	ef1809970b	Greatly shortened parseQuotedLine, because I can just use greedyParse instead of using manyTill	2025-05-09 23:12:53 -05:00
Rockingcool	549504d650	Consume whitespace between greater-than sign and text in a blockquote line	2025-05-09 23:12:12 -05:00
Rockingcool	4f23592aeb	Add relevant modules to other-modules section, to get rid of cabal's warnings	2025-05-09 23:10:34 -05:00
Rockingcool	b00d79b9aa	Renamed Test.hs to MdToHtmlTest.hs so that the filename matches the module name; updated blockquote tests; started working on ordered list tests	2025-05-09 23:10:04 -05:00
Rockingcool	3cd9f24935	Wrote helper functions for parseBlockquote, to parse a quoted line and multiple quoted lines.	2025-05-09 16:12:09 -05:00
Rockingcool	a60b3754e4	Rewrite leftmostLongest and leftmostLongestParse so that they don't rely on 'head' which is a partial function	2025-05-09 12:17:53 -05:00
Rockingcool	3330185393	Make MdToken a Semigroup and a Monoid, so that I can use 'mempty' for it.	2025-05-09 12:17:22 -05:00
Rockingcool	1df7f64aec	Started working on blockquote parser	2025-05-08 17:54:00 -05:00
Rockingcool	bfd627c763	Added blockquote tests	2025-05-08 17:53:37 -05:00
Rockingcool	81671727b2	Added more parsers for escaped characters and links.	2025-05-07 14:21:13 -05:00
Rockingcool	1b821c4315	Declare separate variable for escaped characters.	2025-05-07 14:20:51 -05:00
Rockingcool	51728dd3a1	Added double-quotes to link URL when printing link.	2025-05-07 14:20:29 -05:00
Rockingcool	56e1514213	Added more tests	2025-05-07 14:19:50 -05:00
Rockingcool	e7ea7b6ba6	Removed unecessary case statement	2025-05-06 17:10:54 -05:00
Rockingcool	ef132791a1	Applied lots of hints, mostly redundant brackets	2025-05-06 17:09:54 -05:00
Rockingcool	ca0d09dfab	Added another test	2025-05-06 17:07:46 -05:00
Rockingcool	d1b0ce6b10	Move parseHeader up in the chain - parseDocument can either parse headers or a paragraph	2025-05-06 17:06:22 -05:00
Rockingcool	b6f51c33c7	Parse until EOF instead of adding a manual check.	2025-05-06 17:05:19 -05:00
Rockingcool	9ffbb7365c	Removed commented line	2025-05-06 17:05:02 -05:00
Rockingcool	71aacdd26a	Removed unecessary parentheses	2025-05-06 17:04:24 -05:00
Rockingcool	873795e267	Removed unecessary function	2025-05-06 17:03:45 -05:00
Aadhavan Srinivasan	9a128407cc	Fixed newline bug; working on fixing the last failing test	2025-05-05 11:31:24 -04:00
Rockingcool	a18d03e4ac	Working on parsing single newlines	2025-05-05 09:42:53 -05:00
Rockingcool	f916267d29	Changed parsing of header to consume optional newline; removed debug statements	2025-05-04 16:22:53 -05:00
Rockingcool	9555f55575	Fixed typo in test	2025-05-04 16:22:35 -05:00