Added implementation and tests for subscript and superscript; fixed nested list parsing

This commit is contained in:
2025-07-22 09:56:41 -04:00
parent ca328a464a
commit 2b21aeae89
2 changed files with 41 additions and 4 deletions

View File

@@ -48,6 +48,8 @@ data MdToken
| Figure MdToken URL (Maybe [CssClass])
| Bold MdToken
| Italic MdToken
| Superscript MdToken
| Subscript MdToken
| Strikethrough MdToken
| Unit String
deriving (Eq)
@@ -72,6 +74,8 @@ instance Show MdToken where
show (Figure txt url cssClasses) = "<figure><img src=\"" ++ getUrl url ++ "\" alt=\"" ++ show txt ++ "\"" ++ maybe "" (\classes -> " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
show (Bold token) = "<b>" ++ show token ++ "</b>"
show (Italic token) = "<i>" ++ show token ++ "</i>"
show (Superscript token) = "<sup>" ++ show token ++ "</sup>"
show (Subscript token) = "<sub>" ++ show token ++ "</sub>"
show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
show (Unit unit) = printf "%s" unit
@@ -201,6 +205,20 @@ parseItalic = parseItalicWith '*' <|> parseItalicWith '_'
inside <- someTill parseLineToken (char delim)
return (Italic (Line inside))
-- Parse subscript
parseSubscript :: Parser MdToken
parseSubscript = do
char '~'
inside <- someTill parseLineToken (char '~')
return (Subscript (Line inside))
-- Parse superscript
parseSuperscript :: Parser MdToken
parseSuperscript = do
char '^'
inside <- someTill parseLineToken (char '^')
return (Superscript (Line inside))
-- Parse strikethrough text
parseStrikethrough :: Parser MdToken
parseStrikethrough = do
@@ -326,6 +344,8 @@ lineParsers =
parseBold,
parseItalic,
parseStrikethrough,
parseSubscript,
parseSuperscript,
parseLink,
parseUnit
] -- A 'line' doesn't include a 'header'
@@ -338,6 +358,8 @@ lineParsersWithoutNewline =
parseBold,
parseItalic,
parseStrikethrough,
parseSubscript,
parseSuperscript,
parseLink,
parseUnitExceptNewline
] -- A list line cannot contain newlines.
@@ -406,10 +428,16 @@ parseBlockquote = do
-- Parse a nested list item.
parseListNested :: Parser MdToken
parseListNested = do
let firstCharParser = string (T.pack " ") <|> string (T.pack "\t")
let firstCharParser = (<>) <$> (string (T.pack " ") <|> string (T.pack "\t")) <*> (T.pack <$> many (char ' '))
let restOfLineParser = manyTill anySingle (void (char '\n') <|> eof)
lines <- greedyParse1 (firstCharParser *> restOfLineParser)
let linesParsed = leftmostLongestParse (parseUnorderedList <|> parseOrderedList) (init $ unlines lines)
-- For the first line, I manually run firstCharParser and restOfLineParser. The
-- result of firstCharParser is saved. For every subsequent line, I parse exactly
-- the same string as firstCharParser.
firstLineSpaces <- firstCharParser
firstLine <- restOfLineParser
lines <- greedyParse (string firstLineSpaces *> restOfLineParser)
let allLines = firstLine : lines
let linesParsed = leftmostLongestParse (parseUnorderedList <|> parseOrderedList) (init $ unlines allLines)
when (null (show linesParsed)) empty
return linesParsed

View File

@@ -31,7 +31,7 @@ boldTests =
strikethroughTests =
TestList
[ check_equal "Should convert strikethrough" "<p><s>Hello</s></p>" (convert "~~Hello~~"),
check_equal "Should convert long sentence with tilde" "<p><s>The universe is ~7 days old</s>. The universe is 13 billion years old.</p>" (convert "~~The universe is ~7 days old~~. The universe is 13 billion years old.")
check_equal "Should convert long sentence with tilde" "<p><s>The universe is ~7 days old</s>. The universe is 13 billion years old.</p>" (convert "~~The universe is \\~7 days old~~. The universe is 13 billion years old.")
]
linkTests =
@@ -74,6 +74,7 @@ unorderedListTests =
check_equal "Paragraph before list" "<p>This is a list</p><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "This is a list\n\n* Item 1\n* Item 2"),
check_equal "Paragraph before list" "<h3>This is a list</h3><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "### This is a list\n\n* Item 1\n* Item 2"),
check_equal "Nested list then back" "<ul><li>Item 1</li><li>Item 2<ul><li>Item 3</li><li>Item 4</li></ul></li><li>Item 5</li></ul>" (convert "- Item 1\n- Item 2\n - Item 3\n - Item 4\n- Item 5"),
check_equal "Triply nested list" "<ul><li>Item 1</li><li>Item 2<ul><li>Item 3<ul><li>Item 4</li></ul></li></ul></li><li>Item 5</li></ul>" (convert "- Item 1\n- Item 2\n - Item 3\n - Item 4\n- Item 5"),
check_equal "Blockquote in list" "<ul><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n\n > Quote\n\n- Item 3"),
check_equal "Ordered list in unordered list" "<ul><li>Item 1</li><li>Item 2<ol><li>Item 1</li><li>Item 2</li></ol></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n 1. Item 1\n 2. Item 2\n- Item 3")
]
@@ -120,6 +121,14 @@ horizontalRuleTests =
TestList
[check_equal "Horizontal Rule" "<p>a</p><hr><p>b</p>" (convert "a\n\n---\n\nb")]
subscriptTests =
TestList
[check_equal "Should convert subscript" "A<sub>b</sub>" (convert "A~b~")]
superscriptTests =
TestList
[check_equal "Should convert superscript" "A<sup>b</sup>" (convert "A^b^")]
tableTests =
TestList
[ check_equal