Made some changes

Merge remote-tracking branch 'origin' into fixingIncompleteElements
Added code parser to list line parser list
2025-05-23 08:07:02 -04:00 · 2025-05-20 22:24:24 -04:00 · 2025-05-20 22:23:41 -04:00 · 2025-05-20 22:21:59 -04:00 · 2025-05-20 22:20:36 -04:00 · 2025-05-20 22:20:23 -04:00
2 changed files with 50 additions and 19 deletions
--- a/src/MdToHTML.hs
+++ b/src/MdToHTML.hs
@@ -33,7 +33,7 @@ data MdToken
  | Blockquote [MdToken]
  | UnordList [MdToken]
  | OrdList [MdToken]
-  | Code String
+  | Code MdToken
  | Codeblock String
  | Link MdToken URL
  | Image MdToken ImgPath
@@ -51,11 +51,11 @@ instance Show MdToken where
  show (Line tokens) = concatMap show tokens
  show Linebreak = "<br>"
  show SingleNewline = " "
-  show HorizontalRule = "---------"
+  show HorizontalRule = "<hr>"
  show (Blockquote tokens) = "<blockquote>" ++ concatMap show tokens ++ "</blockquote>"
  show (UnordList tokens) = "<ul>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ul>"
  show (OrdList tokens) = "<ol>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ol>"
-  show (Code code) = show code
+  show (Code code) = "<code>" ++ show code ++ "</code>"
  show (Codeblock code) = show code
  show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
  show (Image txt imgPath) = "<img src=" ++ getPath imgPath ++ ">" ++ show txt ++ "</img>"
@@ -88,7 +88,9 @@ leftmostLongestParse parser input =
        Nothing -> (mempty, mempty)
        Just x -> x

-specialChars = "\\#*_[\n"
+specialChars = "\\#*_[\n`"
+
+escapableChars = '~' : specialChars

 -- Makes a parser greedy. Instead of returning all possible parses, only the longest one is returned.
 greedyParse :: ReadP a -> ReadP [a]
@@ -118,6 +120,16 @@ fallthroughParser :: [ReadP a] -> ReadP a
 fallthroughParser [x] = x
 fallthroughParser (x : xs) = x <++ fallthroughParser xs

+myMany :: (Monoid a) => ReadP a -> ReadP [a]
+myMany p = do
+  remaining <- look
+  case remaining of
+    [] -> return []
+    _ -> return [] +++ myMany1 p
+
+myMany1 :: (Monoid a) => ReadP a -> ReadP [a]
+myMany1 p = liftM2 (:) p (myMany p)
+
 -- ---------------

 -- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
@@ -141,19 +153,25 @@ parseBold = parseBoldWith "**" <|> parseBoldWith "__"
  where
    parseBoldWith delim = do
      string delim
-      inside <- greedyParse1 parseLineToken
+      inside <- myMany1 parseLineToken
      string delim
      return (Bold (Line inside))

 -- Parse italic text
 parseItalic :: ReadP MdToken
-parseItalic = parseItalicWith "*" <|> parseItalicWith "_"
+parseItalic = parseItalicWith '*' <|> parseItalicWith '_'
  where
    parseItalicWith delim = do
-      string delim
-      inside <- greedyParse1 parseLineToken
-      string delim
+      exactlyOnce delim
+      inside <- myMany1 parseLineToken
+      exactlyOnce delim
      return (Italic (Line inside))
+    exactlyOnce ch = do
+      char ch
+      remaining <- look
+      case remaining of
+        [] -> return ch
+        x : xs -> if x == ch then pfail else return ch

 -- Parse strikethrough text
 parseStrikethrough :: ReadP MdToken
@@ -163,6 +181,14 @@ parseStrikethrough = do
  string "~~"
  return (Strikethrough (Line inside))

+-- Parse code
+parseCode :: ReadP MdToken
+parseCode = do
+  string "`"
+  inside <- many1 get
+  string "`"
+  return (Code (Unit inside))
+
 -- Parse a link
 parseLink :: ReadP MdToken
 parseLink = do
@@ -188,27 +214,22 @@ parseSingleNewline = do
 parseEscapedChar :: ReadP MdToken
 parseEscapedChar = do
  char '\\'
-  escapedChar <- choice (map char specialChars) -- Parse any of the special chars.
+  escapedChar <- choice (map char escapableChars) -- Parse any of the special chars.
  return (Unit [escapedChar])

 -- Parse a character as a Unit.
 parseUnit :: ReadP MdToken
 parseUnit = do
-  text <- satisfy (`notElem` specialChars)
+  -- text <- satisfy (`notElem` specialChars)
+  text <- get
  return (Unit [text])

-- Parse a regular string as a Unit.
-parseString :: ReadP MdToken
-parseString = do
-  firstChar <- satisfy (/= '\n') -- Must parse at least one non-newline character here
-  text <- munch (`notElem` specialChars)
-  return (Unit (firstChar : text))
-
 lineParsers :: [ReadP MdToken]
 lineParsers =
  [ parseLinebreak,
    parseSingleNewline,
    parseEscapedChar,
+    parseCode,
    parseBold,
    parseItalic,
    parseStrikethrough,
@@ -220,6 +241,7 @@ listLineParsers :: [ReadP MdToken]
 listLineParsers =
  [ parseLinebreak,
    parseEscapedChar,
+    parseCode,
    parseBold,
    parseItalic,
    parseStrikethrough,
@@ -244,7 +266,7 @@ parseLine :: ReadP MdToken
 parseLine = do
  skipSpaces
  -- Fail if we have reached the end of the document.
-  parsed <- manyTill parseLineToken eof
+  parsed <- myMany1 parseLineToken
  return (Line parsed)

 -- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
--- a/src/MdToHtmlTest.hs
+++ b/src/MdToHtmlTest.hs
@@ -22,6 +22,7 @@ headerTests =
 boldTests =
  TestList
    [ check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
+      check_equal " Should not convert incomplete bold" "<p>**Hello</p>" (convert "**Hello"),
      check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
      check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day"),
      check_equal "Should convert nested bold and italic" "<p><b>Bold then <i>Italic</i></b></p>" (convert "**Bold then *Italic***"),
@@ -93,6 +94,13 @@ orderedListTests =
      check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n    - Item 1\n    * Item 2\n4. Item 3")
    ]

+codeTests =
+  TestList
+    [ check_equal "Code by itself" "<p><code>Hello world!</code></p>" (convert "`Hello world!`"),
+      check_equal "Code in a paragraph" "<p>The following <code>text</code> is code</p>" (convert "The following `text` is code"),
+      check_equal "Code across paragraphs (shouldn't work" "<p>`Incomplete</p><p>Code`</p>" (convert "`Incomplete\n\nCode`")
+    ]
+
 integrationTests =
  TestList
    [ check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n  ## __Second__ _Heading_"),
@@ -127,6 +135,7 @@ tests =
      blockquoteTests,
      unorderedListTests,
      orderedListTests,
+      codeTests,
      integrationTests
    ]
Author	SHA1	Message	Date
Aadhavan Srinivasan	09982f4ab1	Made some changes	2025-05-23 08:07:02 -04:00
Aadhavan Srinivasan	4101767aff	Merge remote-tracking branch 'origin' into fixingIncompleteElements	2025-05-20 22:24:24 -04:00
Aadhavan Srinivasan	ed7d2c1ef1	Added code parser to list line parser list	2025-05-20 22:23:41 -04:00
Aadhavan Srinivasan	234145bcb3	Started working on parsing changes to allow incomplete elements to be parsed (eg. opening bold, followed by text, without closing bold)	2025-05-20 22:21:59 -04:00
Aadhavan Srinivasan	eb20f154a4	Removed unused function	2025-05-20 22:20:36 -04:00
Aadhavan Srinivasan	172985131b	Fixed HTML rendering for horizontal rule	2025-05-20 22:20:23 -04:00
Aadhavan Srinivasan	3781e67ab1	Created a separate list of escapable chars	2025-05-20 22:20:05 -04:00
Aadhavan Srinivasan	f2d54edd3f	Implemented inline code parsing	2025-05-20 22:19:44 -04:00
Aadhavan Srinivasan	5393dc4eb9	Added code tests	2025-05-20 22:18:46 -04:00