From 2b21aeae89f2775ac08c8ea3bded3dc1ccc2989d Mon Sep 17 00:00:00 2001 From: Aadhavan Srinivasan Date: Tue, 22 Jul 2025 09:56:41 -0400 Subject: [PATCH] Added implementation and tests for subscript and superscript; fixed nested list parsing --- src/MdToHTML.hs | 34 +++++++++++++++++++++++++++++++--- src/MdToHtmlTest.hs | 11 ++++++++++- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/src/MdToHTML.hs b/src/MdToHTML.hs index d7ec5f0..c9712a7 100644 --- a/src/MdToHTML.hs +++ b/src/MdToHTML.hs @@ -48,6 +48,8 @@ data MdToken | Figure MdToken URL (Maybe [CssClass]) | Bold MdToken | Italic MdToken + | Superscript MdToken + | Subscript MdToken | Strikethrough MdToken | Unit String deriving (Eq) @@ -72,6 +74,8 @@ instance Show MdToken where show (Figure txt url cssClasses) = "
\"" " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/>
" ++ show txt ++ "
" show (Bold token) = "" ++ show token ++ "" show (Italic token) = "" ++ show token ++ "" + show (Superscript token) = "" ++ show token ++ "" + show (Subscript token) = "" ++ show token ++ "" show (Strikethrough token) = "" ++ show token ++ "" show (Unit unit) = printf "%s" unit @@ -201,6 +205,20 @@ parseItalic = parseItalicWith '*' <|> parseItalicWith '_' inside <- someTill parseLineToken (char delim) return (Italic (Line inside)) +-- Parse subscript +parseSubscript :: Parser MdToken +parseSubscript = do + char '~' + inside <- someTill parseLineToken (char '~') + return (Subscript (Line inside)) + +-- Parse superscript +parseSuperscript :: Parser MdToken +parseSuperscript = do + char '^' + inside <- someTill parseLineToken (char '^') + return (Superscript (Line inside)) + -- Parse strikethrough text parseStrikethrough :: Parser MdToken parseStrikethrough = do @@ -326,6 +344,8 @@ lineParsers = parseBold, parseItalic, parseStrikethrough, + parseSubscript, + parseSuperscript, parseLink, parseUnit ] -- A 'line' doesn't include a 'header' @@ -338,6 +358,8 @@ lineParsersWithoutNewline = parseBold, parseItalic, parseStrikethrough, + parseSubscript, + parseSuperscript, parseLink, parseUnitExceptNewline ] -- A list line cannot contain newlines. @@ -406,10 +428,16 @@ parseBlockquote = do -- Parse a nested list item. parseListNested :: Parser MdToken parseListNested = do - let firstCharParser = string (T.pack " ") <|> string (T.pack "\t") + let firstCharParser = (<>) <$> (string (T.pack " ") <|> string (T.pack "\t")) <*> (T.pack <$> many (char ' ')) let restOfLineParser = manyTill anySingle (void (char '\n') <|> eof) - lines <- greedyParse1 (firstCharParser *> restOfLineParser) - let linesParsed = leftmostLongestParse (parseUnorderedList <|> parseOrderedList) (init $ unlines lines) + -- For the first line, I manually run firstCharParser and restOfLineParser. The + -- result of firstCharParser is saved. For every subsequent line, I parse exactly + -- the same string as firstCharParser. + firstLineSpaces <- firstCharParser + firstLine <- restOfLineParser + lines <- greedyParse (string firstLineSpaces *> restOfLineParser) + let allLines = firstLine : lines + let linesParsed = leftmostLongestParse (parseUnorderedList <|> parseOrderedList) (init $ unlines allLines) when (null (show linesParsed)) empty return linesParsed diff --git a/src/MdToHtmlTest.hs b/src/MdToHtmlTest.hs index 324afb0..5773ebe 100644 --- a/src/MdToHtmlTest.hs +++ b/src/MdToHtmlTest.hs @@ -31,7 +31,7 @@ boldTests = strikethroughTests = TestList [ check_equal "Should convert strikethrough" "

Hello

" (convert "~~Hello~~"), - check_equal "Should convert long sentence with tilde" "

The universe is ~7 days old. The universe is 13 billion years old.

" (convert "~~The universe is ~7 days old~~. The universe is 13 billion years old.") + check_equal "Should convert long sentence with tilde" "

The universe is ~7 days old. The universe is 13 billion years old.

" (convert "~~The universe is \\~7 days old~~. The universe is 13 billion years old.") ] linkTests = @@ -74,6 +74,7 @@ unorderedListTests = check_equal "Paragraph before list" "

This is a list

" (convert "This is a list\n\n* Item 1\n* Item 2"), check_equal "Paragraph before list" "

This is a list

" (convert "### This is a list\n\n* Item 1\n* Item 2"), check_equal "Nested list then back" "" (convert "- Item 1\n- Item 2\n - Item 3\n - Item 4\n- Item 5"), + check_equal "Triply nested list" "" (convert "- Item 1\n- Item 2\n - Item 3\n - Item 4\n- Item 5"), check_equal "Blockquote in list" "" (convert "- Item 1\n- Item 2\n\n > Quote\n\n- Item 3"), check_equal "Ordered list in unordered list" "" (convert "- Item 1\n- Item 2\n 1. Item 1\n 2. Item 2\n- Item 3") ] @@ -120,6 +121,14 @@ horizontalRuleTests = TestList [check_equal "Horizontal Rule" "

a


b

" (convert "a\n\n---\n\nb")] +subscriptTests = + TestList + [check_equal "Should convert subscript" "Ab" (convert "A~b~")] + +superscriptTests = + TestList + [check_equal "Should convert superscript" "Ab" (convert "A^b^")] + tableTests = TestList [ check_equal