113 Commits

Author SHA1 Message Date
ca328a464a Add prettyPrint definition for horizontal rule 2025-07-15 10:51:10 -04:00
7d45b1123f Renamed md-to-html-runner to mdtoh 2025-06-18 15:21:54 -04:00
9627abcd12 Updated test 2025-06-10 14:14:56 -04:00
82277e9ea8 Only add newlines for linebreak when pretty printing 2025-06-10 14:14:26 -04:00
d074b0131c Parse linebreaks as a backslash before a newline 2025-06-10 14:10:45 -04:00
57cb3e68fa Import module for word wrapping; add package to cabal file 2025-06-10 14:02:20 -04:00
4e9f84c2bb Add function to pretty print; commented out my word wrap and use a
built-in one instead
2025-06-10 14:01:52 -04:00
e025614324 Print extra newline if output text doesn't include a newline 2025-06-10 09:13:40 -04:00
e711444066 Add more packages to cabal file 2025-06-10 09:13:15 -04:00
6b99a1835d Created a separate parser list for all parsers (except the unit parser
is replaced with the non-newline unit parser); use that parser when
parsing list lines
2025-06-10 09:12:42 -04:00
04167e0f96 Parse CSS classes in image and figure 2025-06-10 09:11:56 -04:00
0528e813c5 Parser for CSS classes 2025-06-10 09:11:34 -04:00
b1b99189c9 Link tect can be empty; inline code cannot be empty and can have nested
backticks; created a unit parser for all characters except newline
2025-06-10 09:11:19 -04:00
ade3768e29 Try and backtrack 2025-06-10 09:10:10 -04:00
fd6d39ecd6 Parse space at beginning of list 2025-06-10 09:09:53 -04:00
0f04342867 More trying and backtracking; parse and discard extraneous spaces at
beginning of list
2025-06-10 09:09:31 -04:00
80ef93bbc9 Try parsing an ordered list item, backtrack if not possible 2025-06-10 09:08:52 -04:00
b73d4131b6 Added support for tables and codeblocks
Defined the types, defined 'show', created the parsers, added them to
parser list
2025-06-10 09:08:30 -04:00
c48b8c5ae8 Images and figures now support CSS classes 2025-06-10 09:05:31 -04:00
cf4282b26e More imports 2025-06-10 09:04:28 -04:00
7b40d6fe7c Imports 2025-06-10 09:04:18 -04:00
c4255d4578 Added a test for a list with just one item 2025-06-10 09:04:02 -04:00
dcbbff13cb Spacing change 2025-06-10 09:03:48 -04:00
592fad2b46 Added tests for tables 2025-06-10 09:03:32 -04:00
b8ba27f240 Strip newlines when comparing in test 2025-06-03 15:37:27 -04:00
bb08b40512 Replaced nested bold with asterisks, with asterisks and underscores 2025-06-03 15:37:12 -04:00
93548a4533 Never mind, doesn't seem to work well 2025-06-03 15:31:13 -04:00
160cb0edeb Trying to get nested bold and italic to work 2025-06-03 15:30:35 -04:00
2893fa25e6 Include new packages 2025-06-03 11:20:02 -04:00
324e5da82d Use new definition for lefmostLongestParse 2025-06-03 11:19:42 -04:00
05e5548aa9 Huge rewrite - use megaparsec instead of readP 2025-06-03 11:19:19 -04:00
1915628a2b Used 'in-order' parsing for headers, instead of leftmostLongestParse 2025-05-27 14:12:13 -04:00
1d9ac86a2a Implementation and test for horizontal rule 2025-05-27 13:41:38 -04:00
0320402957 Fixed how trailing newlines are parsed 2025-05-27 09:28:19 -04:00
8696a185a7 Rewrote readLines function to be more concise 2025-05-27 08:55:10 -04:00
da38ac226f Added figure implementation and tests 2025-05-27 08:54:52 -04:00
1fcce32ef6 Updated to read from stdin/file 2025-05-23 19:03:23 -04:00
e50081614a Removed section from cabal file 2025-05-23 19:03:14 -04:00
b98a8cc44f Added image tests 2025-05-23 14:55:58 -04:00
90c7a585d2 Added image parser 2025-05-23 14:04:37 -04:00
4a15330874 Updated showing of image 2025-05-23 14:04:21 -04:00
c14112d3e4 Updated special and escapable characters 2025-05-23 14:04:05 -04:00
ed7d2c1ef1 Added code parser to list line parser list 2025-05-20 22:23:41 -04:00
eb20f154a4 Removed unused function 2025-05-20 22:20:36 -04:00
172985131b Fixed HTML rendering for horizontal rule 2025-05-20 22:20:23 -04:00
3781e67ab1 Created a separate list of escapable chars 2025-05-20 22:20:05 -04:00
f2d54edd3f Implemented inline code parsing 2025-05-20 22:19:44 -04:00
5393dc4eb9 Added code tests 2025-05-20 22:18:46 -04:00
e051c87f08 Factor list line common parsing into a separate function; refactored
OList and UList line parsing to us it
2025-05-20 16:48:31 -04:00
9b1c51897c A nested list can be ordered or unordered 2025-05-20 16:47:58 -04:00
2a3dddc7b0 Rename function 2025-05-20 16:47:31 -04:00
a8793b5adb Remove obsolete comments 2025-05-20 16:47:16 -04:00
eecec764ad A list can contain a blockquote as well. 2025-05-20 16:47:04 -04:00
cdca6ea95e A list item doesn't need to have a nested list item parser, because the
line item parser handles the nested list.
2025-05-20 16:46:52 -04:00
540b5430e5 Use list of document parsers in parseDocument 2025-05-20 16:45:56 -04:00
00dfba81eb Created a list of document parsers; implemented ordered list parsing. 2025-05-20 16:45:37 -04:00
39152c0034 Factor out common code for UList and OList parsing into a separate
function. Refactored UList function; wrote OList function.
2025-05-20 16:45:07 -04:00
41b35be7c9 Rename function 2025-05-20 16:43:35 -04:00
d2c8565f62 Import Data.Char for isDigit 2025-05-20 16:43:22 -04:00
62eeef2abb Removed unused functions 2025-05-20 16:43:10 -04:00
9c6634cfec Added ordered list tests 2025-05-20 16:42:40 -04:00
2a5a68b1de Fixed test name 2025-05-20 16:42:12 -04:00
f8e1a98bdf Remove obsolete comment 2025-05-20 14:03:30 -04:00
05433c31f1 Remove unused functions 2025-05-20 14:02:50 -04:00
5c871f2b25 Removed test file 2025-05-20 12:25:07 -05:00
5273c99e6e Added unordered list tests and integration tests; added strikethrough
tests to test list
2025-05-20 12:24:20 -05:00
50888c9c3d Added bold and strikethrough tests 2025-05-20 12:23:47 -05:00
45115c765c An unordered list must end in a blank line. 2025-05-20 12:23:27 -05:00
5b0d42fd2d Use the in-order parsing approach instead of the post-order one. 2025-05-20 12:23:13 -05:00
2a585d00f2 Enforce at least one space between list indicator and list text. 2025-05-20 12:22:37 -05:00
11a3b14cb1 Define a parser for list line tokens, update line token parser 2025-05-20 12:22:19 -05:00
58d3142855 Update comment 2025-05-20 12:21:45 -05:00
0fb651fffc Add parseUnit instead of parseString to lineParsers 2025-05-20 12:20:51 -05:00
bc05dede06 Create a list of parsers that are used for list line items. 2025-05-20 12:20:35 -05:00
b69e34f823 Parse a single character as a unit 2025-05-20 12:20:10 -05:00
2514ecdafc Parse bold, italic and strikethrough in-order, instead of trying to find
the end, then parsing everything in the middle.

The current approach parses the opening bold (or italic), some text,
then the closing bold (or italic), instead of parsing the opening,
closing, then everything in between.
2025-05-20 12:19:59 -05:00
c52d5556a2 Allow for multiple blank lines after header 2025-05-20 12:18:52 -05:00
5fc1b1122a Create a function to 'fallthrough parse' ie. try the second parser only
if the first one fails.
2025-05-20 12:18:23 -05:00
83dd0024c4 Space doesn't have to be a reserved character anymore. 2025-05-20 12:17:59 -05:00
70761649ad Derive Eq for defined types 2025-05-20 12:17:43 -05:00
b9c6cc4470 Implemented strikethrough parser 2025-05-16 19:29:39 -05:00
23691f9cfe Add strikethrough parser to line parser list 2025-05-16 19:28:41 -05:00
8c220cc800 A document can consist of unordered lists as well 2025-05-14 21:40:03 -05:00
ee453c0259 Fixed blockquote parser; implemented unordered list parser and relevant
sub-parsers
2025-05-14 21:39:27 -05:00
c90d23617a A blockquote must have a list of tokens 2025-05-14 21:38:50 -05:00
c574699a8a Added an import 2025-05-14 21:38:38 -05:00
f55e160e25 Added tests for unordered lists 2025-05-14 21:37:58 -05:00
dddcca0185 Wrote a lot of helper functions - most importantly greedyParse 2025-05-13 21:40:42 -05:00
e7d94f225a Updated show definitions for some tokens; add HTML syntax 2025-05-09 23:14:09 -05:00
e8eb22f3ae Added pragmas to suppress LSP warnings; removed a random line that was
added by the LSP
2025-05-09 23:13:31 -05:00
ef1809970b Greatly shortened parseQuotedLine, because I can just use greedyParse
instead of using manyTill
2025-05-09 23:12:53 -05:00
549504d650 Consume whitespace between greater-than sign and text in a blockquote
line
2025-05-09 23:12:12 -05:00
4f23592aeb Add relevant modules to other-modules section, to get rid of cabal's
warnings
2025-05-09 23:10:34 -05:00
b00d79b9aa Renamed Test.hs to MdToHtmlTest.hs so that the filename matches the
module name; updated blockquote tests; started working on ordered list
tests
2025-05-09 23:10:04 -05:00
3cd9f24935 Wrote helper functions for parseBlockquote, to parse a quoted line and
multiple quoted lines.
2025-05-09 16:12:09 -05:00
a60b3754e4 Rewrite leftmostLongest and leftmostLongestParse so that they don't rely
on 'head' which is a partial function
2025-05-09 12:17:53 -05:00
3330185393 Make MdToken a Semigroup and a Monoid, so that I can use 'mempty' for
it.
2025-05-09 12:17:22 -05:00
1df7f64aec Started working on blockquote parser 2025-05-08 17:54:00 -05:00
bfd627c763 Added blockquote tests 2025-05-08 17:53:37 -05:00
81671727b2 Added more parsers for escaped characters and links. 2025-05-07 14:21:13 -05:00
1b821c4315 Declare separate variable for escaped characters. 2025-05-07 14:20:51 -05:00
51728dd3a1 Added double-quotes to link URL when printing link. 2025-05-07 14:20:29 -05:00
56e1514213 Added more tests 2025-05-07 14:19:50 -05:00
e7ea7b6ba6 Removed unecessary case statement 2025-05-06 17:10:54 -05:00
ef132791a1 Applied lots of hints, mostly redundant brackets 2025-05-06 17:09:54 -05:00
ca0d09dfab Added another test 2025-05-06 17:07:46 -05:00
d1b0ce6b10 Move parseHeader up in the chain - parseDocument can either parse
headers or a paragraph
2025-05-06 17:06:22 -05:00
b6f51c33c7 Parse until EOF instead of adding a manual check. 2025-05-06 17:05:19 -05:00
9ffbb7365c Removed commented line 2025-05-06 17:05:02 -05:00
71aacdd26a Removed unecessary parentheses 2025-05-06 17:04:24 -05:00
873795e267 Removed unecessary function 2025-05-06 17:03:45 -05:00
9a128407cc Fixed newline bug; working on fixing the last failing test 2025-05-05 11:31:24 -04:00
a18d03e4ac Working on parsing single newlines 2025-05-05 09:42:53 -05:00
6 changed files with 663 additions and 158 deletions

View File

@@ -1,8 +1,30 @@
module Main where module Main where
import MdToHTML import MdToHTML
import System.Environment
import System.IO
readLinesHelper :: [String] -> IO [String]
readLinesHelper xs = do
done <- isEOF
if done
then return xs
else do
line <- getLine
let xs' = line : xs
readLinesHelper xs'
readLines :: IO [String]
readLines = reverse <$> readLinesHelper []
main :: IO () main :: IO ()
main = do main = do
let res = fst $ leftmostLongestParse parseDocument "# _Hello_\n" args <- getArgs
putStrLn (show res) fileContents <- case args of
[] -> getContents
x : _ -> readFile x
let res = leftmostLongestParse parseDocument fileContents
let toPrint = prettyPrint res
case reverse toPrint of
'\n' : _ -> putStr toPrint
_ -> putStrLn toPrint

View File

@@ -56,18 +56,22 @@ common warnings
library library
hs-source-dirs: src hs-source-dirs: src
exposed-modules: MdToHTML exposed-modules: MdToHTML
other-modules: MdToHtmlTest
build-depends: base ^>=4.19.1.0, build-depends: base ^>=4.19.1.0,
HUnit HUnit,
megaparsec,
parser-combinators,
text,
MissingH,
word-wrap
executable md-to-html-runner executable mdtoh
-- Import common warning flags. -- Import common warning flags.
import: warnings import: warnings
-- .hs or .lhs file containing the Main module. -- .hs or .lhs file containing the Main module.
main-is: Main.hs main-is: Main.hs
-- Modules included in this executable, other than Main.
-- other-modules:
-- LANGUAGE extensions used by modules in this package. -- LANGUAGE extensions used by modules in this package.
-- other-extensions: -- other-extensions:

View File

@@ -1,177 +1,528 @@
{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
{-# HLINT ignore "Use lambda-case" #-}
module MdToHTML where module MdToHTML where
import Control.Applicative import Control.Applicative hiding (many, some)
import Control.Monad import Control.Monad
import Control.Monad.Combinators (count)
import Data.Char
import Data.List import Data.List
import Data.Ord (comparing)
import Data.String.Utils
import qualified Data.Text as T
import Data.Void
import Debug.Trace import Debug.Trace
import Text.ParserCombinators.ReadP import Text.Megaparsec
import Text.Megaparsec.Char
import Text.Printf import Text.Printf
import Text.Wrap
type Parser = Parsec Void T.Text
type HeaderLevel = Int type HeaderLevel = Int
newtype URL = URL {getUrl :: String} type CssClass = String
newtype ImgPath = ImgPath {getPath :: String} newtype URL = URL {getUrl :: String} deriving (Eq)
parseMany :: ReadP a -> ReadP [a] newtype ImgPath = ImgPath {getPath :: String} deriving (Eq)
parseMany = Text.ParserCombinators.ReadP.many
data MdToken data MdToken
= Document [MdToken] = Document [MdToken]
| Header HeaderLevel MdToken | Header HeaderLevel MdToken
| Para MdToken | Para MdToken
| Line [MdToken] | Line [MdToken]
| SingleNewline -- A single newline is rendered as a space.
| Linebreak | Linebreak
| HorizontalRule | HorizontalRule
| Blockquote MdToken | Blockquote [MdToken]
| UnordList [MdToken] | UnordList [MdToken]
| OrdList [MdToken] | OrdList [MdToken]
| Code String | Code MdToken
| Codeblock String | Table [[MdToken]]
| Codeblock MdToken
| Link MdToken URL | Link MdToken URL
| Image MdToken ImgPath | Image MdToken URL (Maybe [CssClass])
| Figure MdToken URL (Maybe [CssClass])
| Bold MdToken | Bold MdToken
| Italic MdToken | Italic MdToken
| Strikethrough MdToken | Strikethrough MdToken
| Unit String | Unit String
deriving (Eq)
-- Deriving Show for MdToken -- Deriving Show for MdToken
instance Show MdToken where instance Show MdToken where
show (Document tokens) = concat (map show tokens) show (Document tokens) = concatMap show tokens
show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">" show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">"
show (Para token) = "<p>" ++ show token ++ "</p>" show (Para token) = "<p>" ++ show token ++ "</p>"
show (Line tokens) = concat (map show tokens) show (Line tokens) = concatMap show tokens
show Linebreak = "<br>" show Linebreak = "<br />"
show HorizontalRule = "---------" show SingleNewline = " "
show (Blockquote token) = "BLOCK" ++ show token show HorizontalRule = "<hr>"
show (UnordList tokens) = "UNORD" ++ concat (map show tokens) show (Blockquote tokens) = "<blockquote>" ++ concatMap show tokens ++ "</blockquote>"
show (OrdList tokens) = "ORD" ++ concat (map show tokens) show (UnordList tokens) = "<ul>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ul>"
show (Code code) = show code show (OrdList tokens) = "<ol>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ol>"
show (Codeblock code) = show code show (Code code) = "<code>" ++ strip (show code) ++ "</code>"
show (Link txt url) = "<a href=" ++ (getUrl url) ++ ">" ++ show txt ++ "</a>" show (Table (thead : tokenGrid)) = "<table><thead><tr>" ++ concatMap (\x -> "<th>" ++ rstrip (show x) ++ "</th>") thead ++ "</tr></thead>" ++ "<tbody>" ++ concatMap (\x -> "<tr>" ++ concatMap (\y -> "<td>" ++ rstrip (show y) ++ "</td>") x ++ "</tr>") tokenGrid ++ "</tbody></table>"
show (Image txt imgPath) = "<img src=" ++ (getPath imgPath) ++ ">" ++ show txt ++ "</img>" show (Codeblock code) = "<pre><code>" ++ show code ++ "</code></pre>"
show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
show (Image txt url cssClasses) = "<img src=\"" ++ getUrl url ++ "\"" ++ " alt=\"" ++ show txt ++ "\"" ++ maybe "" (\classes -> " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/>"
show (Figure txt url cssClasses) = "<figure><img src=\"" ++ getUrl url ++ "\" alt=\"" ++ show txt ++ "\"" ++ maybe "" (\classes -> " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
show (Bold token) = "<b>" ++ show token ++ "</b>" show (Bold token) = "<b>" ++ show token ++ "</b>"
show (Italic token) = "<i>" ++ show token ++ "</i>" show (Italic token) = "<i>" ++ show token ++ "</i>"
show (Strikethrough token) = "<s>" ++ show token ++ "</s>" show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
show (Unit unit) = printf "%s" unit show (Unit unit) = printf "%s" unit
-- Pretty print the given token into a string.
-- This is the same as calling 'show' for most tokens, but is different for paragraphs and tables,
-- which have newlines inserted into them.
prettyPrint :: MdToken -> String
prettyPrint (Para token) = "<p>" ++ T.unpack (wrapText defaultWrapSettings 70 (T.pack $ prettyPrint token)) ++ "</p>\n"
prettyPrint (Table (thead : tokenGrid)) = "<table>\n<thead>\n<tr>\n" ++ concatMap (\x -> "<th>" ++ rstrip (prettyPrint x) ++ "</th>\n") thead ++ "</tr>\n</thead>\n" ++ "<tbody>\n" ++ concatMap (\x -> "<tr>\n" ++ concatMap (\y -> "<td>" ++ rstrip (prettyPrint y) ++ "</td>\n") x ++ "</tr>\n") tokenGrid ++ "</tbody>\n</table>\n"
prettyPrint Linebreak = "<br />\n"
prettyPrint HorizontalRule = "<hr>\n"
prettyPrint (Line tokens) = concatMap prettyPrint tokens
prettyPrint (Document tokens) = concatMap prettyPrint tokens
prettyPrint token = show token
instance Semigroup MdToken where
a <> b = Document [a, b]
instance Monoid MdToken where
mempty = Unit ""
-- --------------- -- ---------------
-- Helpers -- Helpers
mustBeHash :: ReadP Char leftmostLongest :: (Foldable t) => [(a, t b)] -> Maybe (a, t b)
mustBeHash = satisfy (\x -> x == '#')
leftmostLongest :: (Foldable t) => [(a, t b)] -> (a, t b)
leftmostLongest xs = leftmostLongest xs =
let lastElem = (last xs) let lastElem = last xs
filteredLst = (filter (\val -> (length $ snd val) == (length $ snd lastElem)) xs) filteredLst = filter (\val -> length (snd val) == length (snd lastElem)) xs
in head filteredLst in case filteredLst of
[] -> Nothing
(x : xs) -> Just x
-- Get the first parse returned by readP_to_S that consumed the most input -- Get the first parse returned by readP_to_S that consumed the most input
leftmostLongestParse :: ReadP a -> String -> (a, String) leftmostLongestParse :: (Monoid a) => Parser a -> String -> a
leftmostLongestParse parser input = leftmostLongest $ readP_to_S parser input leftmostLongestParse parser input =
case runParser parser "input" (T.pack input) of
(Left a) -> mempty
(Right a) -> a
-- Parse if the string that's left matches the string comparator function specialChars = ">\n\\`*_{}[]#+|"
lookaheadParse :: (String -> Bool) -> ReadP Char
lookaheadParse stringCmp = do
lookahead <- look
case stringCmp lookahead of
True -> get
False -> pfail
lineToList :: MdToken -> [MdToken] escapableChars = "-~!.$()" ++ specialChars
lineToList (Line tokens) = tokens
-- Makes a parser greedy. Instead of returning all possible parses, only the longest one is returned.
greedyParse :: Parser a -> Parser [a]
greedyParse parser = do
greedyParse1 parser <|> return []
-- Like greedyParse, but the parser must succeed atleast once.
greedyParse1 :: Parser a -> Parser [a]
greedyParse1 parser = do
parsed1 <- parser
parsed2 <- greedyParse1 parser <|> return []
return (parsed1 : parsed2)
prepend :: [a] -> [a] -> [a]
prepend x1 x2 = x1 ++ x2
append :: [a] -> [a] -> [a]
append x1 x2 = x2 ++ x1
-- Parse until EOL or EOF
parseTillEol :: Parser String
parseTillEol = manyTill anySingle (void (char '\n') <|> eof)
-- Takes a list of parsers. Returns a parser that will try them in
-- order, moving to the next one only if the current one fails.
fallthroughParser :: [Parser a] -> Parser a
fallthroughParser [x] = x
fallthroughParser (x : xs) = try x <|> fallthroughParser xs
escapeChar :: Char -> String
escapeChar '>' = "&gt;"
escapeChar '<' = "&lt;"
escapeChar '&' = "&amp;"
escapeChar x = [x]
htmlEscapeChars :: T.Text -> T.Text
htmlEscapeChars = T.concatMap (T.pack . escapeChar)
-- -- Wraps a list of words after (at most) the given number of characters, trying to prevent word-breaks
-- wordwrap :: Int -> String -> String
-- wordwrap wraplength str = if (length str) < wraplength
-- then str
-- else
-- let spaceIndex = lastgtSpaceIndex 0 (takeRev (length str) - wraplength str)
--
-- where
-- takeRev n = (reverse . take n . reverse)
-- lastSpaceIndex counter str = case str of
-- [] -> counter
-- x:xs -> if (isSpace x) counter else lastSpaceIndex counter+1 xs
-- --------------- -- ---------------
-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL. -- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
parseHeader :: ReadP MdToken parseHeader :: Parser MdToken
parseHeader = do parseHeader = do
skipSpaces space
headers <- many1 mustBeHash headers <- greedyParse1 (char '#')
when when
((length headers) > 6) (length headers > 6)
pfail empty
_ <- string " " space
-- text <- manyTill (get) ((string "\n") <|> (eof >> return ""))-- Parse until EOL or EOF parsedText <- manyTill parseLineToken (void (char '\n') <|> eof)
text <- munch1 (/= '\n') greedyParse (char '\n')
Text.ParserCombinators.ReadP.optional (char '\n') return (Header (length headers) (Line parsedText))
let parsedText = fst $ leftmostLongestParse parseLine text
return (Header (length headers) parsedText) asteriskBold = T.pack "**"
underscoreBold = T.pack "__"
-- Parse bold text -- Parse bold text
parseBold :: ReadP MdToken parseBold :: Parser MdToken
parseBold = do parseBold = parseBoldWith asteriskBold <|> parseBoldWith underscoreBold
text <- where
choice parseBoldWith delim = do
[ between (string "__") (string "__") (many1 (lookaheadParse (/= "__"))), string delim
between (string "**") (string "**") (many1 (lookaheadParse (/= "**"))) inside <- someTill parseLineToken $ string delim
] return (Bold (Line inside))
let parsedText = fst $ leftmostLongestParse parseLine text
return (Bold parsedText)
-- Parse italic text -- Parse italic text
parseItalic :: ReadP MdToken parseItalic :: Parser MdToken
parseItalic = do parseItalic = parseItalicWith '*' <|> parseItalicWith '_'
text <- where
choice parseItalicWith delim = do
[ (between (string "_") (string "_") (munch1 (/= '_'))), char delim
(between (string "*") (string "*") (munch1 (/= '*'))) inside <- someTill parseLineToken (char delim)
] return (Italic (Line inside))
let parsedText = fst $ leftmostLongestParse parseLine text
return (Italic parsedText) -- Parse strikethrough text
parseStrikethrough :: Parser MdToken
parseStrikethrough = do
string (T.pack "~~")
inside <- someTill parseLineToken $ string (T.pack "~~")
return (Strikethrough (Line inside))
-- Parse code
parseCode :: Parser MdToken
parseCode = do
opening <- some $ char '`'
inside <- someTill (satisfy (/= '\n')) (char '`')
closing <- count (length opening - 1) (char '`')
return (Code (Unit (concatMap escapeChar inside)))
-- Parse a link
parseLink :: Parser MdToken
parseLink = do
char '['
linkText <- manyTill parseLineToken (char ']')
char '('
linkURL <- manyTill anySingle (char ')')
return $ Link (Line linkText) (URL linkURL)
-- Parse a linebreak character -- Parse a linebreak character
parseLinebreak :: ReadP MdToken parseLinebreak :: Parser MdToken
parseLinebreak = do parseLinebreak = parseLinebreakSpace <|> parseLinebreakBackslash
char ' ' where
many1 (char ' ') parseLinebreakSpace = do
char ' '
some (char ' ')
char '\n'
return Linebreak
parseLinebreakBackslash = try $ do
char '\\'
char '\n'
return Linebreak
parseTableRow :: Parser [MdToken]
parseTableRow = do
char '|'
row <- some (many (satisfy (\x -> x == ' ' || x == '\t')) *> someTill parseListLineToken (char '|'))
return (map Line row)
parseTable :: Parser MdToken
parseTable = do
tableHead <- parseTableRow
char '\n' char '\n'
return Linebreak char '|'
sepEndBy1 (some (char '-')) (char '|') *> char '\n'
tableBody <- sepEndBy parseTableRow (char '\n')
many (char '\n') -- Parse trailing newlines, if any
return $ Table (tableHead : tableBody)
-- Parse a regular string as a Unit. parseSingleNewline :: Parser MdToken
parseString :: ReadP MdToken parseSingleNewline = do
parseString = do char '\n'
firstChar <- get -- Must parse at least one character here remaining <- getInput
text <- munch (\x -> not (elem x "#*_[\n ")) case T.unpack remaining of
return (Unit (firstChar : text)) [] -> return $ Unit ""
_ -> return SingleNewline
lineParsers :: [ReadP MdToken] parseCssClasses :: Parser [CssClass]
lineParsers = [parseLinebreak, parseBold, parseItalic, parseString] -- A 'line' doesn't include a 'header' parseCssClasses = do
char '{'
classes <- some parseCssClass
char '}'
return classes
where
parseCssClass :: Parser CssClass
parseCssClass = do
char '.'
let firstLetterParser = char '_' <|> char '-' <|> label "letter" (satisfy isAlpha)
cssClassFirstLetter <- firstLetterParser
cssClass <- many (firstLetterParser <|> label "digit" (satisfy isDigit))
space
return (cssClassFirstLetter : cssClass)
parseImage :: Parser MdToken
parseImage = do
char '!'
link <- parseLink
cssClasses <- optional $ try parseCssClasses
case link of
Link text path -> return $ Image text path cssClasses
_ -> empty -- This should never be reached
parseFigure = do
img <- parseImage
void (string doubleNewlineText) <|> eof
case img of
Image text path cssClasses -> return $ Figure text path cssClasses
_ -> return img
-- Parse an escaped character
parseEscapedChar :: Parser MdToken
parseEscapedChar = do
char '\\'
escapedChar <- choice (map char escapableChars) -- Parse any of the special chars.
return (Unit [escapedChar])
-- Parse a character as a Unit.
parseUnit :: Parser MdToken
parseUnit = do
-- text <- satisfy (`notElem` specialChars)
text <- anySingle
return (Unit [text])
-- Parse any character except a newline
parseUnitExceptNewline :: Parser MdToken
parseUnitExceptNewline = do
-- text <- satisfy (`notElem` specialChars)
text <- satisfy (/= '\n')
return (Unit [text])
lineParsers :: [Parser MdToken]
lineParsers =
[ parseLinebreak,
parseSingleNewline,
parseEscapedChar,
parseCode,
parseImage,
parseBold,
parseItalic,
parseStrikethrough,
parseLink,
parseUnit
] -- A 'line' doesn't include a 'header'
lineParsersWithoutNewline :: [Parser MdToken]
lineParsersWithoutNewline =
[ parseEscapedChar,
parseCode,
parseImage,
parseBold,
parseItalic,
parseStrikethrough,
parseLink,
parseUnitExceptNewline
] -- A list line cannot contain newlines.
-- List of all parsers -- List of all parsers
allParsers :: [ReadP MdToken] allParsers :: [Parser MdToken]
allParsers = parseHeader : lineParsers allParsers = parseHeader : lineParsers
-- Parse any of the above tokens. -- Parse any of the line tokens.
parseLineToken :: ReadP MdToken parseLineToken :: Parser MdToken
parseLineToken = choice lineParsers parseLineToken = fallthroughParser lineParsers
-- Parse any of the list line tokens.
parseListLineToken :: Parser MdToken
parseListLineToken = fallthroughParser lineParsersWithoutNewline
-- Parse a line, consisting of one or more tokens. -- Parse a line, consisting of one or more tokens.
parseLine :: ReadP MdToken parseLine :: Parser MdToken
parseLine = do parseLine = do
skipSpaces space
-- Fail if we have reached the end of the document. -- Fail if we have reached the end of the document.
remaining <- look parsed <- manyTill parseLineToken eof
when (null remaining) pfail
parsed <- parseMany parseLineToken
-- traceM $ show parsed
return (Line parsed) return (Line parsed)
-- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines. -- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
-- As a weird special case, a 'Paragraph' can also be a 'Header'. parsePara :: Parser MdToken
parsePara :: ReadP MdToken
parsePara = do parsePara = do
parseMany (char '\n') space
-- text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline. -- text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline.
-- string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF. -- string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF.
text <- (manyTill get ((string "\n\n") <|> (eof >> return ""))) parsedText <- someTill parseLineToken (try paraEnding)
when (null text) pfail many (char '\n')
let parsedText = fst $ leftmostLongestParse (parseHeader <|> parseLine) text -- Parse either a line or a header. return (Para (Line parsedText))
-- If the paragraph is a header, return a Header token. Otheriwse return a Para token. where
case parsedText of paraEnding = void (char '\n' *> (char '\n' <|> lookAhead (char '>'))) <|> eof
Header level token -> return (Header level token)
_ -> return (Para parsedText) -- Parse a line starting with '>', return the line except for the '>'.
parseQuotedLine :: Parser String
parseQuotedLine = do
char '>'
many (char ' ' <|> char '\t')
restOfLine <- many (satisfy (/= '\n'))
void (char '\n') <|> eof
return restOfLine
-- Parse many 'quoted lines' until I see a non-quoted line.
parseQuotedLines :: Parser [String]
parseQuotedLines = some parseQuotedLine
-- some $ do
-- getInput >>= \line ->
-- case T.unpack line of
-- ('>' : _) -> parseQuotedLine
-- _ -> empty
-- Parse a blockquote, which is a greater-than sign followed by a paragraph.
parseBlockquote :: Parser MdToken
parseBlockquote = do
quotedLines <- parseQuotedLines
-- remaining <- look
-- let quotedLines = fst $ leftmostLongestParse parseQuotedLines remaining
-- string (init $ unlines quotedLines)
let parsedQuotedLines = leftmostLongestParse (some (parseBlockquote <|> parsePara)) (init $ unlines quotedLines) -- unlines joins the lines together with a newline, and adds a trailing newline. init removes the trailing newline.
return (Blockquote parsedQuotedLines)
-- Parse a nested list item.
parseListNested :: Parser MdToken
parseListNested = do
let firstCharParser = string (T.pack " ") <|> string (T.pack "\t")
let restOfLineParser = manyTill anySingle (void (char '\n') <|> eof)
lines <- greedyParse1 (firstCharParser *> restOfLineParser)
let linesParsed = leftmostLongestParse (parseUnorderedList <|> parseOrderedList) (init $ unlines lines)
when (null (show linesParsed)) empty
return linesParsed
-- Parse an unordered list line item.
parseUListLineItem :: Parser MdToken
parseUListLineItem = do
firstChar <- choice (map char ['*', '+', '-'])
char ' ' -- At least one space between list indicator and list text.
parseListLineItemCommon
-- Parse an ordered list line item.
parseOListLineItem :: Parser MdToken
parseOListLineItem = do
num <- greedyParse1 (satisfy isDigit)
char '.'
char ' ' -- At least one space between list indicator and list text.
parseListLineItemCommon
-- Common code for parsing list line items
parseListLineItemCommon :: Parser MdToken
parseListLineItemCommon = do
space
restOfLine <- manyTill parseListLineToken (void (char '\n') <|> eof)
nestedList <- try parseListNested <|> return (Unit "")
return $ Line [Line restOfLine, nestedList]
-- Parse an unordered list paragraph item.
parseUListParaItem :: Parser MdToken
parseUListParaItem = do
firstLine <- parseUListLineItem
res <- parseListParaItemCommon
return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
-- Parse an unordered list paragraph item.
parseOListParaItem :: Parser MdToken
parseOListParaItem = do
firstLine <- parseOListLineItem
res <- parseListParaItemCommon
return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
-- Common code for parsing list paragraph items.
-- A list paragraph item is defined as a line item, followed by an empty line, followed by one or more
-- lines indented by a space or tab.
-- A list paragraph item can also be a blockquote.
parseListParaItemCommon :: Parser [MdToken]
parseListParaItemCommon = do
char '\n'
lines <- greedyParse1 ((string (T.pack " ") <|> string (T.pack "\t")) *> parseTillEol)
let res = leftmostLongestParse (greedyParse1 parseBlockquote <|> greedyParse1 parsePara) (init $ unlines lines)
char '\n'
return res -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
-- Parse an unordered list item, which can be a line item or another list.
parseUListItem :: Parser MdToken
parseUListItem = space *> (try parseUListParaItem <|> parseUListLineItem)
-- Parse an unordered list.
parseUnorderedList :: Parser MdToken
parseUnorderedList = do
lineItems <- some $ try parseUListItem
void (char '\n') <|> eof -- A list must end in an extra newline or eof
return $ UnordList lineItems
-- --------
parseOListItem :: Parser MdToken
parseOListItem = space *> (try parseOListParaItem <|> parseOListLineItem)
-- Parses the first element of an ordered list, which must start with '1.'
parseFirstOListItem :: Parser MdToken
parseFirstOListItem = do
space
remaining <- getInput
when (take 2 (T.unpack remaining) /= "1.") empty
parseOListLineItem
parseOrderedList :: Parser MdToken
parseOrderedList = do
firstLine <- try parseFirstOListItem
lineItems <- many $ try parseOListItem
void (char '\n') <|> eof
return $ OrdList (firstLine : lineItems)
horizontalRuleText :: T.Text
horizontalRuleText = T.pack "---"
doubleNewlineText :: T.Text
doubleNewlineText = T.pack "\n\n"
parseHorizontalRule :: Parser MdToken
parseHorizontalRule = string horizontalRuleText *> (void (string doubleNewlineText) <|> eof) *> return HorizontalRule
parseCodeblock :: Parser MdToken
parseCodeblock = do
string (T.pack "```\n")
inside <- someTill anySingle (string (T.pack "\n```"))
return $ Codeblock (Unit (concatMap escapeChar inside))
documentParsers :: [Parser MdToken]
documentParsers =
[ parseHorizontalRule,
parseCodeblock,
parseTable,
parseHeader,
parseBlockquote,
parseUnorderedList,
parseOrderedList,
parseFigure,
parsePara
]
-- Parse a document, which is multiple paragraphs. -- Parse a document, which is multiple paragraphs.
parseDocument :: ReadP MdToken parseDocument :: Parser MdToken
parseDocument = (many1 parsePara) >>= (\res -> return (Document (res))) parseDocument = do
res <- manyTill (fallthroughParser documentParsers) eof
return (Document res)

177
src/MdToHtmlTest.hs Normal file
View File

@@ -0,0 +1,177 @@
module MdToHtmlTest where
import MdToHTML
import Test.HUnit
check_equal :: String -> String -> String -> Test
check_equal desc expected actual = TestCase (assertEqual desc expected actual)
convert :: String -> String
convert md = show $ leftmostLongestParse parseDocument md
headerTests =
TestList
[ check_equal "Should convert H1 heading" "<h1>Hello</h1>" (convert "# Hello"),
check_equal "Should convert H2 heading" "<h2>Hello</h2>" (convert "## Hello"),
check_equal "Should convert H3 heading" "<h3>Hello</h3>" (convert "### Hello"),
check_equal "Should convert H4 heading" "<h4>Hello</h4>" (convert "#### Hello"),
check_equal "Should convert H5 heading" "<h5>Hello</h5>" (convert "##### Hello"),
check_equal "Should convert H6 heading" "<h6>Hello</h6>" (convert "###### Hello")
]
boldTests =
TestList
[ check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day"),
check_equal "Should convert nested bold and italic" "<p><b>Bold then <i>Italic</i></b></p>" (convert "**Bold then _Italic_**"),
check_equal "Should convert nested bold and italic" "<p><i>Italic then <b>Bold</b></i></p>" (convert "*Italic then __Bold__*")
]
strikethroughTests =
TestList
[ check_equal "Should convert strikethrough" "<p><s>Hello</s></p>" (convert "~~Hello~~"),
check_equal "Should convert long sentence with tilde" "<p><s>The universe is ~7 days old</s>. The universe is 13 billion years old.</p>" (convert "~~The universe is ~7 days old~~. The universe is 13 billion years old.")
]
linkTests =
TestList
[ check_equal "Should convert normal link" "<p><a href=\"https://example.com\">This is an example link.</a></p>" (convert "[This is an example link.](https://example.com)"),
check_equal "Should convert styled link" "<p><a href=\"https://example.com\"><b>Fancy</b>!!!</a></p>" (convert "[__Fancy__!!!](https://example.com)")
]
escapedCharTests =
TestList
[ check_equal "Should print literal underscore" "<p>This is an underscore - _</p>" (convert "This is an underscore - \\_"),
check_equal "Should print literal asterisk" "<p>This is an asterisk - *</p>" (convert "This is an asterisk - \\*"),
check_equal "Should print literal asterisk in bold" "<p>This is a bolded asterisk - <b>*</b></p>" (convert "This is a bolded asterisk - **\\***")
]
blockquoteTests =
TestList
[ check_equal "Should wrap para in blockquote" "<blockquote><p>What a <b>truly</b> <i>lovely</i> day!!!</p></blockquote>" (convert "> What a __truly__ _lovely_ day!!!"),
check_equal "Simple nested blockquotes" "<blockquote><p>Hello</p><blockquote><p>World</p></blockquote></blockquote>" (convert "> Hello\n>\n>> World"),
check_equal
"Nested blockquotes"
"<blockquote><p>Dorothy followed her through many \
\of the beautiful rooms in her castle.</p><blockquote><p>The Witch \
\bade her clean the pots and kettles and sweep the floor and keep the fire \
\fed with wood.</p></blockquote></blockquote>"
( convert
"> Dorothy followed her through many of the \
\beautiful rooms in her castle.\n> \n>> The Witch bade her \
\clean the pots and kettles and sweep the floor and keep the fire fed with wood."
)
]
unorderedListTests =
TestList
[ check_equal "Basic unordered list" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>" (convert "* Item 1\n* Item 2\n* Item 3"),
check_equal "Mixing list indicators" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>" (convert "* Item 1\n+ Item 2\n- Item 3"),
check_equal "Formatted lists" "<ul><li><b>Item 1</b></li><li><i>Item 2</i></li><li><b><i>Item 3</i></b></li></ul>" (convert "* __Item 1__\n+ _Item 2_\n- ***Item 3***"),
check_equal "Nested list" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3<ul><li>Subitem 1</li><li>Subitem 2</li></ul></li></ul>" (convert "* Item 1\n* Item 2\n* Item 3\n * Subitem 1\n * Subitem 2"),
check_equal "Paragraph in list" "<ul><li>Item 1</li><li><p>Item 2</p><p>More stuff</p></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n\n More stuff\n\n- Item 3"),
check_equal "Paragraph before list" "<p>This is a list</p><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "This is a list\n\n* Item 1\n* Item 2"),
check_equal "Paragraph before list" "<h3>This is a list</h3><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "### This is a list\n\n* Item 1\n* Item 2"),
check_equal "Nested list then back" "<ul><li>Item 1</li><li>Item 2<ul><li>Item 3</li><li>Item 4</li></ul></li><li>Item 5</li></ul>" (convert "- Item 1\n- Item 2\n - Item 3\n - Item 4\n- Item 5"),
check_equal "Blockquote in list" "<ul><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n\n > Quote\n\n- Item 3"),
check_equal "Ordered list in unordered list" "<ul><li>Item 1</li><li>Item 2<ol><li>Item 1</li><li>Item 2</li></ol></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n 1. Item 1\n 2. Item 2\n- Item 3")
]
orderedListTests =
TestList
[ check_equal "Basic ordered list" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n3. Item 3"),
check_equal "Mixing list numbering" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3</li></ol>" (convert "1. Item 1\n3. Item 2\n2. Item 3"),
check_equal "Should not convert list without number 1" "<p>2. Item 1 1. Item 2</p>" (convert "2. Item 1\n1. Item 2"),
check_equal "Formatted lists" "<ol><li><b>Item 1</b></li><li><i>Item 2</i></li><li><b><i>Item 3</i></b></li></ol>" (convert "1. __Item 1__\n2. _Item 2_\n3. ***Item 3***"),
check_equal "Nested list" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3<ol><li>Subitem 1</li><li>Subitem 2</li></ol></li></ol>" (convert "1. Item 1\n2. Item 2\n3. Item 3\n 1. Subitem 1\n 2. Subitem 2"),
check_equal "Paragraph in list" "<ol><li>Item 1</li><li><p>Item 2</p><p>More stuff</p></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n More stuff\n\n1. Item 3"),
check_equal "Paragraph before list" "<p>This is a list</p><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "This is a list\n\n1. Item 1\n1. Item 2"),
check_equal "Paragraph before list" "<h3>This is a list</h3><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "### This is a list\n\n1. Item 1\n200. Item 2"),
check_equal "Nested list then back" "<ol><li>Item 1</li><li>Item 2<ol><li>Item 3</li><li>Item 4</li></ol></li><li>Item 5</li></ol>" (convert "1. Item 1\n2. Item 2\n 1. Item 3\n 3. Item 4\n5. Item 5"),
check_equal "Blockquote in list" "<ol><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n > Quote\n\n3. Item 3"),
check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3"),
check_equal "List with just 1 item" "<ol><li>Item 1</li></ol>" (convert "1. Item 1")
]
htmlTests =
TestList
[check_equal "Convert HTML element" "<p><center>a</center></p>" (convert "<center>a</center>")]
codeTests =
TestList
[ check_equal "Code by itself" "<p><code>Hello world!</code></p>" (convert "`Hello world!`"),
check_equal "Code in a paragraph" "<p>The following <code>text</code> is code</p>" (convert "The following `text` is code"),
check_equal "Code across paragraphs (shouldn't work)" "<p>`Incomplete</p><p>Code`</p>" (convert "`Incomplete\n\nCode`") -- At the moment, this is just treated as a syntax error, so nothing is rendered.
]
imageTests =
TestList
[ check_equal "Image with text" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\"/></p>" (convert "This is an image ![Image 1](img.png)"),
check_equal "Image with classes" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\" class=\"new-img\"/></p>" (convert "This is an image ![Image 1](img.png){.new-img}")
]
figureTests =
TestList
[ check_equal "Image by itself" "<figure><img src=\"img.png\" alt=\"Image 1\"/><figcaption aria-hidden=\"true\">Image 1</figcaption></figure>" (convert "![Image 1](img.png)")
]
horizontalRuleTests =
TestList
[check_equal "Horizontal Rule" "<p>a</p><hr><p>b</p>" (convert "a\n\n---\n\nb")]
tableTests =
TestList
[ check_equal
"Basic table"
"<table>\
\<thead><tr><th>Col 1</th><th>Col 2</th><th>Col 3</th></tr></thead>\
\<tbody><tr><td>Data 1</td><td>Data 2</td><td>Data 3</td></tr>\
\<tr><td>More Data 1</td><td>More Data 2</td><td>More Data 3</td></tr></tbody></table>"
(convert "| Col 1 | Col 2 | Col 3 |\n|---|---|---|\n| Data 1 | Data 2 | Data 3 |\n| More Data 1 | More Data 2 | More Data 3 |")
]
integrationTests =
TestList
[ check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n ## __Second__ _Heading_"),
check_equal "Integration 2" "<p><b>Hello</b> <i>World</i></p>" (convert "__Hello__\n_World_"),
check_equal "Integration 3" "<h1>Hello</h1><p>World</p>" (convert "# Hello\nWorld"),
check_equal "Integration 4" "<p>a b</p>" (convert "a\nb"),
check_equal "Integration 5" "<h1>Hello</h1>" (convert "# Hello\n"),
check_equal "Integration 6" "<p>First line<br />Second line</p>" (convert "First line \nSecond line"),
check_equal
"Integration 7"
"<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2>Second \
\Heading</h2><ul><li>Unordered lists, and:<ol><li>One</li><li>Two</li><li>\
\Three</li></ol></li><li>More</li></ul><blockquote><p>Blockquote</p>\
\</blockquote><p>And <b>bold</b>, <i>italics</i>, and even <i>italics \
\and later <b>bold</b></i>. Even <s>strikethrough</s>. \
\<a href=\"https://markdowntohtml.com\">A link</a> to somewhere.</p>"
( convert
"# Sample Markdown\n\nThis is some basic, sample markdown.\n\n## Second \
\Heading\n\n- Unordered lists, and:\n 1. One\n 2. Two\n 3. Three\n\
\- More\n\n> Blockquote\n\nAnd **bold**, *italics*, and even *italics and \
\later __bold__*. Even ~~strikethrough~~. [A link](https://markdowntohtml.com) to somewhere."
)
]
tests =
TestList
[ headerTests,
boldTests,
strikethroughTests,
linkTests,
escapedCharTests,
blockquoteTests,
unorderedListTests,
orderedListTests,
imageTests,
htmlTests,
figureTests,
codeTests,
horizontalRuleTests,
tableTests,
integrationTests
]
runTests = runTestTT tests

View File

@@ -1,44 +0,0 @@
module MdToHtmlTest where
import MdToHTML
import Test.HUnit
check_equal :: String -> String -> String -> Test
check_equal desc expected actual = TestCase (assertEqual desc expected actual)
convert :: String -> String
convert md = show . fst $ leftmostLongestParse parseDocument md
headerTests = TestList
[
check_equal "Should convert H1 heading" "<h1>Hello</h1>" (convert "# Hello"),
check_equal "Should convert H2 heading" "<h2>Hello</h2>" (convert "## Hello"),
check_equal "Should convert H3 heading" "<h3>Hello</h3>" (convert "### Hello"),
check_equal "Should convert H4 heading" "<h4>Hello</h4>" (convert "#### Hello"),
check_equal "Should convert H5 heading" "<h5>Hello</h5>" (convert "##### Hello"),
check_equal "Should convert H6 heading" "<h6>Hello</h6>" (convert "###### Hello")
]
boldTests = TestList
[
check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day")
]
integrationTests = TestList
[
check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n ## __Second__ _Heading_"),
check_equal "Integration 2" "<p><b>Hello</b> <i>World</i></p>" (convert "__Hello__\n_World_"),
check_equal "Integration 3" "<h1>Hello</h1><p>World</p>" (convert "# Hello\nWorld")
]
tests = TestList
[
headerTests,
boldTests,
integrationTests
]
runTests = runTestTT tests

View File

@@ -1,5 +0,0 @@
leftmostLongest :: (Foldable t) => [t a] -> t a
leftmostLongest xs =
let lastElem = (last xs)
filteredLst = (filter (\val -> (length val) == (length lastElem)) xs)
in head filteredLst