Compare commits
113 Commits
f916267d29
...
usingMegap
Author | SHA1 | Date | |
---|---|---|---|
ca328a464a | |||
7d45b1123f | |||
9627abcd12 | |||
82277e9ea8 | |||
d074b0131c | |||
57cb3e68fa | |||
4e9f84c2bb | |||
e025614324 | |||
e711444066 | |||
6b99a1835d | |||
04167e0f96 | |||
0528e813c5 | |||
b1b99189c9 | |||
ade3768e29 | |||
fd6d39ecd6 | |||
0f04342867 | |||
80ef93bbc9 | |||
b73d4131b6 | |||
c48b8c5ae8 | |||
cf4282b26e | |||
7b40d6fe7c | |||
c4255d4578 | |||
dcbbff13cb | |||
592fad2b46 | |||
b8ba27f240 | |||
bb08b40512 | |||
93548a4533 | |||
160cb0edeb | |||
2893fa25e6 | |||
324e5da82d | |||
05e5548aa9 | |||
1915628a2b | |||
1d9ac86a2a | |||
0320402957 | |||
8696a185a7 | |||
da38ac226f | |||
1fcce32ef6 | |||
e50081614a | |||
b98a8cc44f | |||
90c7a585d2 | |||
4a15330874 | |||
c14112d3e4 | |||
ed7d2c1ef1 | |||
eb20f154a4 | |||
172985131b | |||
3781e67ab1 | |||
f2d54edd3f | |||
5393dc4eb9 | |||
e051c87f08 | |||
9b1c51897c | |||
2a3dddc7b0 | |||
a8793b5adb | |||
eecec764ad | |||
cdca6ea95e | |||
540b5430e5 | |||
00dfba81eb | |||
39152c0034 | |||
41b35be7c9 | |||
d2c8565f62 | |||
62eeef2abb | |||
9c6634cfec | |||
2a5a68b1de | |||
f8e1a98bdf | |||
05433c31f1 | |||
5c871f2b25 | |||
5273c99e6e | |||
50888c9c3d | |||
45115c765c | |||
5b0d42fd2d | |||
2a585d00f2 | |||
11a3b14cb1 | |||
58d3142855 | |||
0fb651fffc | |||
bc05dede06 | |||
b69e34f823 | |||
2514ecdafc | |||
c52d5556a2 | |||
5fc1b1122a | |||
83dd0024c4 | |||
70761649ad | |||
b9c6cc4470 | |||
23691f9cfe | |||
8c220cc800 | |||
ee453c0259 | |||
c90d23617a | |||
c574699a8a | |||
f55e160e25 | |||
dddcca0185 | |||
e7d94f225a | |||
e8eb22f3ae | |||
ef1809970b | |||
549504d650 | |||
4f23592aeb | |||
b00d79b9aa | |||
3cd9f24935 | |||
a60b3754e4 | |||
3330185393 | |||
1df7f64aec | |||
bfd627c763 | |||
81671727b2 | |||
1b821c4315 | |||
51728dd3a1 | |||
56e1514213 | |||
e7ea7b6ba6 | |||
ef132791a1 | |||
ca0d09dfab | |||
d1b0ce6b10 | |||
b6f51c33c7 | |||
9ffbb7365c | |||
71aacdd26a | |||
873795e267 | |||
9a128407cc | |||
a18d03e4ac |
26
app/Main.hs
26
app/Main.hs
@@ -1,8 +1,30 @@
|
|||||||
module Main where
|
module Main where
|
||||||
|
|
||||||
import MdToHTML
|
import MdToHTML
|
||||||
|
import System.Environment
|
||||||
|
import System.IO
|
||||||
|
|
||||||
|
readLinesHelper :: [String] -> IO [String]
|
||||||
|
readLinesHelper xs = do
|
||||||
|
done <- isEOF
|
||||||
|
if done
|
||||||
|
then return xs
|
||||||
|
else do
|
||||||
|
line <- getLine
|
||||||
|
let xs' = line : xs
|
||||||
|
readLinesHelper xs'
|
||||||
|
|
||||||
|
readLines :: IO [String]
|
||||||
|
readLines = reverse <$> readLinesHelper []
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main = do
|
main = do
|
||||||
let res = fst $ leftmostLongestParse parseDocument "# _Hello_\n"
|
args <- getArgs
|
||||||
putStrLn (show res)
|
fileContents <- case args of
|
||||||
|
[] -> getContents
|
||||||
|
x : _ -> readFile x
|
||||||
|
let res = leftmostLongestParse parseDocument fileContents
|
||||||
|
let toPrint = prettyPrint res
|
||||||
|
case reverse toPrint of
|
||||||
|
'\n' : _ -> putStr toPrint
|
||||||
|
_ -> putStrLn toPrint
|
||||||
|
@@ -56,18 +56,22 @@ common warnings
|
|||||||
library
|
library
|
||||||
hs-source-dirs: src
|
hs-source-dirs: src
|
||||||
exposed-modules: MdToHTML
|
exposed-modules: MdToHTML
|
||||||
|
other-modules: MdToHtmlTest
|
||||||
build-depends: base ^>=4.19.1.0,
|
build-depends: base ^>=4.19.1.0,
|
||||||
HUnit
|
HUnit,
|
||||||
|
megaparsec,
|
||||||
|
parser-combinators,
|
||||||
|
text,
|
||||||
|
MissingH,
|
||||||
|
word-wrap
|
||||||
|
|
||||||
executable md-to-html-runner
|
executable mdtoh
|
||||||
-- Import common warning flags.
|
-- Import common warning flags.
|
||||||
import: warnings
|
import: warnings
|
||||||
|
|
||||||
-- .hs or .lhs file containing the Main module.
|
-- .hs or .lhs file containing the Main module.
|
||||||
main-is: Main.hs
|
main-is: Main.hs
|
||||||
|
|
||||||
-- Modules included in this executable, other than Main.
|
|
||||||
-- other-modules:
|
|
||||||
|
|
||||||
-- LANGUAGE extensions used by modules in this package.
|
-- LANGUAGE extensions used by modules in this package.
|
||||||
-- other-extensions:
|
-- other-extensions:
|
||||||
|
553
src/MdToHTML.hs
553
src/MdToHTML.hs
@@ -1,177 +1,528 @@
|
|||||||
|
{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
|
||||||
|
|
||||||
|
{-# HLINT ignore "Use lambda-case" #-}
|
||||||
|
|
||||||
module MdToHTML where
|
module MdToHTML where
|
||||||
|
|
||||||
import Control.Applicative
|
import Control.Applicative hiding (many, some)
|
||||||
import Control.Monad
|
import Control.Monad
|
||||||
|
import Control.Monad.Combinators (count)
|
||||||
|
import Data.Char
|
||||||
import Data.List
|
import Data.List
|
||||||
|
import Data.Ord (comparing)
|
||||||
|
import Data.String.Utils
|
||||||
|
import qualified Data.Text as T
|
||||||
|
import Data.Void
|
||||||
import Debug.Trace
|
import Debug.Trace
|
||||||
import Text.ParserCombinators.ReadP
|
import Text.Megaparsec
|
||||||
|
import Text.Megaparsec.Char
|
||||||
import Text.Printf
|
import Text.Printf
|
||||||
|
import Text.Wrap
|
||||||
|
|
||||||
|
type Parser = Parsec Void T.Text
|
||||||
|
|
||||||
type HeaderLevel = Int
|
type HeaderLevel = Int
|
||||||
|
|
||||||
newtype URL = URL {getUrl :: String}
|
type CssClass = String
|
||||||
|
|
||||||
newtype ImgPath = ImgPath {getPath :: String}
|
newtype URL = URL {getUrl :: String} deriving (Eq)
|
||||||
|
|
||||||
parseMany :: ReadP a -> ReadP [a]
|
newtype ImgPath = ImgPath {getPath :: String} deriving (Eq)
|
||||||
parseMany = Text.ParserCombinators.ReadP.many
|
|
||||||
|
|
||||||
data MdToken
|
data MdToken
|
||||||
= Document [MdToken]
|
= Document [MdToken]
|
||||||
| Header HeaderLevel MdToken
|
| Header HeaderLevel MdToken
|
||||||
| Para MdToken
|
| Para MdToken
|
||||||
| Line [MdToken]
|
| Line [MdToken]
|
||||||
|
| SingleNewline -- A single newline is rendered as a space.
|
||||||
| Linebreak
|
| Linebreak
|
||||||
| HorizontalRule
|
| HorizontalRule
|
||||||
| Blockquote MdToken
|
| Blockquote [MdToken]
|
||||||
| UnordList [MdToken]
|
| UnordList [MdToken]
|
||||||
| OrdList [MdToken]
|
| OrdList [MdToken]
|
||||||
| Code String
|
| Code MdToken
|
||||||
| Codeblock String
|
| Table [[MdToken]]
|
||||||
|
| Codeblock MdToken
|
||||||
| Link MdToken URL
|
| Link MdToken URL
|
||||||
| Image MdToken ImgPath
|
| Image MdToken URL (Maybe [CssClass])
|
||||||
|
| Figure MdToken URL (Maybe [CssClass])
|
||||||
| Bold MdToken
|
| Bold MdToken
|
||||||
| Italic MdToken
|
| Italic MdToken
|
||||||
| Strikethrough MdToken
|
| Strikethrough MdToken
|
||||||
| Unit String
|
| Unit String
|
||||||
|
deriving (Eq)
|
||||||
|
|
||||||
-- Deriving Show for MdToken
|
-- Deriving Show for MdToken
|
||||||
instance Show MdToken where
|
instance Show MdToken where
|
||||||
show (Document tokens) = concat (map show tokens)
|
show (Document tokens) = concatMap show tokens
|
||||||
show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">"
|
show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">"
|
||||||
show (Para token) = "<p>" ++ show token ++ "</p>"
|
show (Para token) = "<p>" ++ show token ++ "</p>"
|
||||||
show (Line tokens) = concat (map show tokens)
|
show (Line tokens) = concatMap show tokens
|
||||||
show Linebreak = "<br>"
|
show Linebreak = "<br />"
|
||||||
show HorizontalRule = "---------"
|
show SingleNewline = " "
|
||||||
show (Blockquote token) = "BLOCK" ++ show token
|
show HorizontalRule = "<hr>"
|
||||||
show (UnordList tokens) = "UNORD" ++ concat (map show tokens)
|
show (Blockquote tokens) = "<blockquote>" ++ concatMap show tokens ++ "</blockquote>"
|
||||||
show (OrdList tokens) = "ORD" ++ concat (map show tokens)
|
show (UnordList tokens) = "<ul>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ul>"
|
||||||
show (Code code) = show code
|
show (OrdList tokens) = "<ol>" ++ concatMap (prepend "<li>" . append "</li>" . show) tokens ++ "</ol>"
|
||||||
show (Codeblock code) = show code
|
show (Code code) = "<code>" ++ strip (show code) ++ "</code>"
|
||||||
show (Link txt url) = "<a href=" ++ (getUrl url) ++ ">" ++ show txt ++ "</a>"
|
show (Table (thead : tokenGrid)) = "<table><thead><tr>" ++ concatMap (\x -> "<th>" ++ rstrip (show x) ++ "</th>") thead ++ "</tr></thead>" ++ "<tbody>" ++ concatMap (\x -> "<tr>" ++ concatMap (\y -> "<td>" ++ rstrip (show y) ++ "</td>") x ++ "</tr>") tokenGrid ++ "</tbody></table>"
|
||||||
show (Image txt imgPath) = "<img src=" ++ (getPath imgPath) ++ ">" ++ show txt ++ "</img>"
|
show (Codeblock code) = "<pre><code>" ++ show code ++ "</code></pre>"
|
||||||
|
show (Link txt url) = "<a href=\"" ++ getUrl url ++ "\">" ++ show txt ++ "</a>"
|
||||||
|
show (Image txt url cssClasses) = "<img src=\"" ++ getUrl url ++ "\"" ++ " alt=\"" ++ show txt ++ "\"" ++ maybe "" (\classes -> " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/>"
|
||||||
|
show (Figure txt url cssClasses) = "<figure><img src=\"" ++ getUrl url ++ "\" alt=\"" ++ show txt ++ "\"" ++ maybe "" (\classes -> " class=\"" ++ unwords classes ++ "\"") cssClasses ++ "/><figcaption aria-hidden=\"true\">" ++ show txt ++ "</figcaption></figure>"
|
||||||
show (Bold token) = "<b>" ++ show token ++ "</b>"
|
show (Bold token) = "<b>" ++ show token ++ "</b>"
|
||||||
show (Italic token) = "<i>" ++ show token ++ "</i>"
|
show (Italic token) = "<i>" ++ show token ++ "</i>"
|
||||||
show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
|
show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
|
||||||
show (Unit unit) = printf "%s" unit
|
show (Unit unit) = printf "%s" unit
|
||||||
|
|
||||||
|
-- Pretty print the given token into a string.
|
||||||
|
-- This is the same as calling 'show' for most tokens, but is different for paragraphs and tables,
|
||||||
|
-- which have newlines inserted into them.
|
||||||
|
prettyPrint :: MdToken -> String
|
||||||
|
prettyPrint (Para token) = "<p>" ++ T.unpack (wrapText defaultWrapSettings 70 (T.pack $ prettyPrint token)) ++ "</p>\n"
|
||||||
|
prettyPrint (Table (thead : tokenGrid)) = "<table>\n<thead>\n<tr>\n" ++ concatMap (\x -> "<th>" ++ rstrip (prettyPrint x) ++ "</th>\n") thead ++ "</tr>\n</thead>\n" ++ "<tbody>\n" ++ concatMap (\x -> "<tr>\n" ++ concatMap (\y -> "<td>" ++ rstrip (prettyPrint y) ++ "</td>\n") x ++ "</tr>\n") tokenGrid ++ "</tbody>\n</table>\n"
|
||||||
|
prettyPrint Linebreak = "<br />\n"
|
||||||
|
prettyPrint HorizontalRule = "<hr>\n"
|
||||||
|
prettyPrint (Line tokens) = concatMap prettyPrint tokens
|
||||||
|
prettyPrint (Document tokens) = concatMap prettyPrint tokens
|
||||||
|
prettyPrint token = show token
|
||||||
|
|
||||||
|
instance Semigroup MdToken where
|
||||||
|
a <> b = Document [a, b]
|
||||||
|
|
||||||
|
instance Monoid MdToken where
|
||||||
|
mempty = Unit ""
|
||||||
|
|
||||||
-- ---------------
|
-- ---------------
|
||||||
-- Helpers
|
-- Helpers
|
||||||
mustBeHash :: ReadP Char
|
leftmostLongest :: (Foldable t) => [(a, t b)] -> Maybe (a, t b)
|
||||||
mustBeHash = satisfy (\x -> x == '#')
|
|
||||||
|
|
||||||
leftmostLongest :: (Foldable t) => [(a, t b)] -> (a, t b)
|
|
||||||
leftmostLongest xs =
|
leftmostLongest xs =
|
||||||
let lastElem = (last xs)
|
let lastElem = last xs
|
||||||
filteredLst = (filter (\val -> (length $ snd val) == (length $ snd lastElem)) xs)
|
filteredLst = filter (\val -> length (snd val) == length (snd lastElem)) xs
|
||||||
in head filteredLst
|
in case filteredLst of
|
||||||
|
[] -> Nothing
|
||||||
|
(x : xs) -> Just x
|
||||||
|
|
||||||
-- Get the first parse returned by readP_to_S that consumed the most input
|
-- Get the first parse returned by readP_to_S that consumed the most input
|
||||||
leftmostLongestParse :: ReadP a -> String -> (a, String)
|
leftmostLongestParse :: (Monoid a) => Parser a -> String -> a
|
||||||
leftmostLongestParse parser input = leftmostLongest $ readP_to_S parser input
|
leftmostLongestParse parser input =
|
||||||
|
case runParser parser "input" (T.pack input) of
|
||||||
|
(Left a) -> mempty
|
||||||
|
(Right a) -> a
|
||||||
|
|
||||||
-- Parse if the string that's left matches the string comparator function
|
specialChars = ">\n\\`*_{}[]#+|"
|
||||||
lookaheadParse :: (String -> Bool) -> ReadP Char
|
|
||||||
lookaheadParse stringCmp = do
|
|
||||||
lookahead <- look
|
|
||||||
case stringCmp lookahead of
|
|
||||||
True -> get
|
|
||||||
False -> pfail
|
|
||||||
|
|
||||||
lineToList :: MdToken -> [MdToken]
|
escapableChars = "-~!.$()" ++ specialChars
|
||||||
lineToList (Line tokens) = tokens
|
|
||||||
|
-- Makes a parser greedy. Instead of returning all possible parses, only the longest one is returned.
|
||||||
|
greedyParse :: Parser a -> Parser [a]
|
||||||
|
greedyParse parser = do
|
||||||
|
greedyParse1 parser <|> return []
|
||||||
|
|
||||||
|
-- Like greedyParse, but the parser must succeed atleast once.
|
||||||
|
greedyParse1 :: Parser a -> Parser [a]
|
||||||
|
greedyParse1 parser = do
|
||||||
|
parsed1 <- parser
|
||||||
|
parsed2 <- greedyParse1 parser <|> return []
|
||||||
|
return (parsed1 : parsed2)
|
||||||
|
|
||||||
|
prepend :: [a] -> [a] -> [a]
|
||||||
|
prepend x1 x2 = x1 ++ x2
|
||||||
|
|
||||||
|
append :: [a] -> [a] -> [a]
|
||||||
|
append x1 x2 = x2 ++ x1
|
||||||
|
|
||||||
|
-- Parse until EOL or EOF
|
||||||
|
parseTillEol :: Parser String
|
||||||
|
parseTillEol = manyTill anySingle (void (char '\n') <|> eof)
|
||||||
|
|
||||||
|
-- Takes a list of parsers. Returns a parser that will try them in
|
||||||
|
-- order, moving to the next one only if the current one fails.
|
||||||
|
fallthroughParser :: [Parser a] -> Parser a
|
||||||
|
fallthroughParser [x] = x
|
||||||
|
fallthroughParser (x : xs) = try x <|> fallthroughParser xs
|
||||||
|
|
||||||
|
escapeChar :: Char -> String
|
||||||
|
escapeChar '>' = ">"
|
||||||
|
escapeChar '<' = "<"
|
||||||
|
escapeChar '&' = "&"
|
||||||
|
escapeChar x = [x]
|
||||||
|
|
||||||
|
htmlEscapeChars :: T.Text -> T.Text
|
||||||
|
htmlEscapeChars = T.concatMap (T.pack . escapeChar)
|
||||||
|
|
||||||
|
-- -- Wraps a list of words after (at most) the given number of characters, trying to prevent word-breaks
|
||||||
|
-- wordwrap :: Int -> String -> String
|
||||||
|
-- wordwrap wraplength str = if (length str) < wraplength
|
||||||
|
-- then str
|
||||||
|
-- else
|
||||||
|
-- let spaceIndex = lastgtSpaceIndex 0 (takeRev (length str) - wraplength str)
|
||||||
|
--
|
||||||
|
-- where
|
||||||
|
-- takeRev n = (reverse . take n . reverse)
|
||||||
|
-- lastSpaceIndex counter str = case str of
|
||||||
|
-- [] -> counter
|
||||||
|
-- x:xs -> if (isSpace x) counter else lastSpaceIndex counter+1 xs
|
||||||
|
|
||||||
-- ---------------
|
-- ---------------
|
||||||
|
|
||||||
-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
|
-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
|
||||||
parseHeader :: ReadP MdToken
|
parseHeader :: Parser MdToken
|
||||||
parseHeader = do
|
parseHeader = do
|
||||||
skipSpaces
|
space
|
||||||
headers <- many1 mustBeHash
|
headers <- greedyParse1 (char '#')
|
||||||
when
|
when
|
||||||
((length headers) > 6)
|
(length headers > 6)
|
||||||
pfail
|
empty
|
||||||
_ <- string " "
|
space
|
||||||
-- text <- manyTill (get) ((string "\n") <|> (eof >> return ""))-- Parse until EOL or EOF
|
parsedText <- manyTill parseLineToken (void (char '\n') <|> eof)
|
||||||
text <- munch1 (/= '\n')
|
greedyParse (char '\n')
|
||||||
Text.ParserCombinators.ReadP.optional (char '\n')
|
return (Header (length headers) (Line parsedText))
|
||||||
let parsedText = fst $ leftmostLongestParse parseLine text
|
|
||||||
return (Header (length headers) parsedText)
|
asteriskBold = T.pack "**"
|
||||||
|
|
||||||
|
underscoreBold = T.pack "__"
|
||||||
|
|
||||||
-- Parse bold text
|
-- Parse bold text
|
||||||
parseBold :: ReadP MdToken
|
parseBold :: Parser MdToken
|
||||||
parseBold = do
|
parseBold = parseBoldWith asteriskBold <|> parseBoldWith underscoreBold
|
||||||
text <-
|
where
|
||||||
choice
|
parseBoldWith delim = do
|
||||||
[ between (string "__") (string "__") (many1 (lookaheadParse (/= "__"))),
|
string delim
|
||||||
between (string "**") (string "**") (many1 (lookaheadParse (/= "**")))
|
inside <- someTill parseLineToken $ string delim
|
||||||
]
|
return (Bold (Line inside))
|
||||||
let parsedText = fst $ leftmostLongestParse parseLine text
|
|
||||||
return (Bold parsedText)
|
|
||||||
|
|
||||||
-- Parse italic text
|
-- Parse italic text
|
||||||
parseItalic :: ReadP MdToken
|
parseItalic :: Parser MdToken
|
||||||
parseItalic = do
|
parseItalic = parseItalicWith '*' <|> parseItalicWith '_'
|
||||||
text <-
|
where
|
||||||
choice
|
parseItalicWith delim = do
|
||||||
[ (between (string "_") (string "_") (munch1 (/= '_'))),
|
char delim
|
||||||
(between (string "*") (string "*") (munch1 (/= '*')))
|
inside <- someTill parseLineToken (char delim)
|
||||||
]
|
return (Italic (Line inside))
|
||||||
let parsedText = fst $ leftmostLongestParse parseLine text
|
|
||||||
return (Italic parsedText)
|
-- Parse strikethrough text
|
||||||
|
parseStrikethrough :: Parser MdToken
|
||||||
|
parseStrikethrough = do
|
||||||
|
string (T.pack "~~")
|
||||||
|
inside <- someTill parseLineToken $ string (T.pack "~~")
|
||||||
|
return (Strikethrough (Line inside))
|
||||||
|
|
||||||
|
-- Parse code
|
||||||
|
parseCode :: Parser MdToken
|
||||||
|
parseCode = do
|
||||||
|
opening <- some $ char '`'
|
||||||
|
inside <- someTill (satisfy (/= '\n')) (char '`')
|
||||||
|
closing <- count (length opening - 1) (char '`')
|
||||||
|
return (Code (Unit (concatMap escapeChar inside)))
|
||||||
|
|
||||||
|
-- Parse a link
|
||||||
|
parseLink :: Parser MdToken
|
||||||
|
parseLink = do
|
||||||
|
char '['
|
||||||
|
linkText <- manyTill parseLineToken (char ']')
|
||||||
|
char '('
|
||||||
|
linkURL <- manyTill anySingle (char ')')
|
||||||
|
return $ Link (Line linkText) (URL linkURL)
|
||||||
|
|
||||||
-- Parse a linebreak character
|
-- Parse a linebreak character
|
||||||
parseLinebreak :: ReadP MdToken
|
parseLinebreak :: Parser MdToken
|
||||||
parseLinebreak = do
|
parseLinebreak = parseLinebreakSpace <|> parseLinebreakBackslash
|
||||||
|
where
|
||||||
|
parseLinebreakSpace = do
|
||||||
char ' '
|
char ' '
|
||||||
many1 (char ' ')
|
some (char ' ')
|
||||||
|
char '\n'
|
||||||
|
return Linebreak
|
||||||
|
parseLinebreakBackslash = try $ do
|
||||||
|
char '\\'
|
||||||
char '\n'
|
char '\n'
|
||||||
return Linebreak
|
return Linebreak
|
||||||
|
|
||||||
-- Parse a regular string as a Unit.
|
parseTableRow :: Parser [MdToken]
|
||||||
parseString :: ReadP MdToken
|
parseTableRow = do
|
||||||
parseString = do
|
char '|'
|
||||||
firstChar <- get -- Must parse at least one character here
|
row <- some (many (satisfy (\x -> x == ' ' || x == '\t')) *> someTill parseListLineToken (char '|'))
|
||||||
text <- munch (\x -> not (elem x "#*_[\n "))
|
return (map Line row)
|
||||||
return (Unit (firstChar : text))
|
|
||||||
|
|
||||||
lineParsers :: [ReadP MdToken]
|
parseTable :: Parser MdToken
|
||||||
lineParsers = [parseLinebreak, parseBold, parseItalic, parseString] -- A 'line' doesn't include a 'header'
|
parseTable = do
|
||||||
|
tableHead <- parseTableRow
|
||||||
|
char '\n'
|
||||||
|
char '|'
|
||||||
|
sepEndBy1 (some (char '-')) (char '|') *> char '\n'
|
||||||
|
tableBody <- sepEndBy parseTableRow (char '\n')
|
||||||
|
many (char '\n') -- Parse trailing newlines, if any
|
||||||
|
return $ Table (tableHead : tableBody)
|
||||||
|
|
||||||
|
parseSingleNewline :: Parser MdToken
|
||||||
|
parseSingleNewline = do
|
||||||
|
char '\n'
|
||||||
|
remaining <- getInput
|
||||||
|
case T.unpack remaining of
|
||||||
|
[] -> return $ Unit ""
|
||||||
|
_ -> return SingleNewline
|
||||||
|
|
||||||
|
parseCssClasses :: Parser [CssClass]
|
||||||
|
parseCssClasses = do
|
||||||
|
char '{'
|
||||||
|
classes <- some parseCssClass
|
||||||
|
char '}'
|
||||||
|
return classes
|
||||||
|
where
|
||||||
|
parseCssClass :: Parser CssClass
|
||||||
|
parseCssClass = do
|
||||||
|
char '.'
|
||||||
|
let firstLetterParser = char '_' <|> char '-' <|> label "letter" (satisfy isAlpha)
|
||||||
|
cssClassFirstLetter <- firstLetterParser
|
||||||
|
cssClass <- many (firstLetterParser <|> label "digit" (satisfy isDigit))
|
||||||
|
space
|
||||||
|
return (cssClassFirstLetter : cssClass)
|
||||||
|
|
||||||
|
parseImage :: Parser MdToken
|
||||||
|
parseImage = do
|
||||||
|
char '!'
|
||||||
|
link <- parseLink
|
||||||
|
cssClasses <- optional $ try parseCssClasses
|
||||||
|
case link of
|
||||||
|
Link text path -> return $ Image text path cssClasses
|
||||||
|
_ -> empty -- This should never be reached
|
||||||
|
|
||||||
|
parseFigure = do
|
||||||
|
img <- parseImage
|
||||||
|
void (string doubleNewlineText) <|> eof
|
||||||
|
case img of
|
||||||
|
Image text path cssClasses -> return $ Figure text path cssClasses
|
||||||
|
_ -> return img
|
||||||
|
|
||||||
|
-- Parse an escaped character
|
||||||
|
parseEscapedChar :: Parser MdToken
|
||||||
|
parseEscapedChar = do
|
||||||
|
char '\\'
|
||||||
|
escapedChar <- choice (map char escapableChars) -- Parse any of the special chars.
|
||||||
|
return (Unit [escapedChar])
|
||||||
|
|
||||||
|
-- Parse a character as a Unit.
|
||||||
|
parseUnit :: Parser MdToken
|
||||||
|
parseUnit = do
|
||||||
|
-- text <- satisfy (`notElem` specialChars)
|
||||||
|
text <- anySingle
|
||||||
|
return (Unit [text])
|
||||||
|
|
||||||
|
-- Parse any character except a newline
|
||||||
|
parseUnitExceptNewline :: Parser MdToken
|
||||||
|
parseUnitExceptNewline = do
|
||||||
|
-- text <- satisfy (`notElem` specialChars)
|
||||||
|
text <- satisfy (/= '\n')
|
||||||
|
return (Unit [text])
|
||||||
|
|
||||||
|
lineParsers :: [Parser MdToken]
|
||||||
|
lineParsers =
|
||||||
|
[ parseLinebreak,
|
||||||
|
parseSingleNewline,
|
||||||
|
parseEscapedChar,
|
||||||
|
parseCode,
|
||||||
|
parseImage,
|
||||||
|
parseBold,
|
||||||
|
parseItalic,
|
||||||
|
parseStrikethrough,
|
||||||
|
parseLink,
|
||||||
|
parseUnit
|
||||||
|
] -- A 'line' doesn't include a 'header'
|
||||||
|
|
||||||
|
lineParsersWithoutNewline :: [Parser MdToken]
|
||||||
|
lineParsersWithoutNewline =
|
||||||
|
[ parseEscapedChar,
|
||||||
|
parseCode,
|
||||||
|
parseImage,
|
||||||
|
parseBold,
|
||||||
|
parseItalic,
|
||||||
|
parseStrikethrough,
|
||||||
|
parseLink,
|
||||||
|
parseUnitExceptNewline
|
||||||
|
] -- A list line cannot contain newlines.
|
||||||
|
|
||||||
-- List of all parsers
|
-- List of all parsers
|
||||||
allParsers :: [ReadP MdToken]
|
allParsers :: [Parser MdToken]
|
||||||
allParsers = parseHeader : lineParsers
|
allParsers = parseHeader : lineParsers
|
||||||
|
|
||||||
-- Parse any of the above tokens.
|
-- Parse any of the line tokens.
|
||||||
parseLineToken :: ReadP MdToken
|
parseLineToken :: Parser MdToken
|
||||||
parseLineToken = choice lineParsers
|
parseLineToken = fallthroughParser lineParsers
|
||||||
|
|
||||||
|
-- Parse any of the list line tokens.
|
||||||
|
parseListLineToken :: Parser MdToken
|
||||||
|
parseListLineToken = fallthroughParser lineParsersWithoutNewline
|
||||||
|
|
||||||
-- Parse a line, consisting of one or more tokens.
|
-- Parse a line, consisting of one or more tokens.
|
||||||
parseLine :: ReadP MdToken
|
parseLine :: Parser MdToken
|
||||||
parseLine = do
|
parseLine = do
|
||||||
skipSpaces
|
space
|
||||||
-- Fail if we have reached the end of the document.
|
-- Fail if we have reached the end of the document.
|
||||||
remaining <- look
|
parsed <- manyTill parseLineToken eof
|
||||||
when (null remaining) pfail
|
|
||||||
parsed <- parseMany parseLineToken
|
|
||||||
-- traceM $ show parsed
|
|
||||||
return (Line parsed)
|
return (Line parsed)
|
||||||
|
|
||||||
-- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
|
-- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
|
||||||
-- As a weird special case, a 'Paragraph' can also be a 'Header'.
|
parsePara :: Parser MdToken
|
||||||
parsePara :: ReadP MdToken
|
|
||||||
parsePara = do
|
parsePara = do
|
||||||
parseMany (char '\n')
|
space
|
||||||
-- text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline.
|
-- text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) -- Parse until a double-newline.
|
||||||
-- string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF.
|
-- string "\n\n" <|> (eof >> return "") -- Consume the next double-newline or EOF.
|
||||||
text <- (manyTill get ((string "\n\n") <|> (eof >> return "")))
|
parsedText <- someTill parseLineToken (try paraEnding)
|
||||||
when (null text) pfail
|
many (char '\n')
|
||||||
let parsedText = fst $ leftmostLongestParse (parseHeader <|> parseLine) text -- Parse either a line or a header.
|
return (Para (Line parsedText))
|
||||||
-- If the paragraph is a header, return a Header token. Otheriwse return a Para token.
|
where
|
||||||
case parsedText of
|
paraEnding = void (char '\n' *> (char '\n' <|> lookAhead (char '>'))) <|> eof
|
||||||
Header level token -> return (Header level token)
|
|
||||||
_ -> return (Para parsedText)
|
-- Parse a line starting with '>', return the line except for the '>'.
|
||||||
|
parseQuotedLine :: Parser String
|
||||||
|
parseQuotedLine = do
|
||||||
|
char '>'
|
||||||
|
many (char ' ' <|> char '\t')
|
||||||
|
restOfLine <- many (satisfy (/= '\n'))
|
||||||
|
void (char '\n') <|> eof
|
||||||
|
return restOfLine
|
||||||
|
|
||||||
|
-- Parse many 'quoted lines' until I see a non-quoted line.
|
||||||
|
parseQuotedLines :: Parser [String]
|
||||||
|
parseQuotedLines = some parseQuotedLine
|
||||||
|
|
||||||
|
-- some $ do
|
||||||
|
-- getInput >>= \line ->
|
||||||
|
-- case T.unpack line of
|
||||||
|
-- ('>' : _) -> parseQuotedLine
|
||||||
|
-- _ -> empty
|
||||||
|
|
||||||
|
-- Parse a blockquote, which is a greater-than sign followed by a paragraph.
|
||||||
|
parseBlockquote :: Parser MdToken
|
||||||
|
parseBlockquote = do
|
||||||
|
quotedLines <- parseQuotedLines
|
||||||
|
-- remaining <- look
|
||||||
|
-- let quotedLines = fst $ leftmostLongestParse parseQuotedLines remaining
|
||||||
|
-- string (init $ unlines quotedLines)
|
||||||
|
let parsedQuotedLines = leftmostLongestParse (some (parseBlockquote <|> parsePara)) (init $ unlines quotedLines) -- unlines joins the lines together with a newline, and adds a trailing newline. init removes the trailing newline.
|
||||||
|
return (Blockquote parsedQuotedLines)
|
||||||
|
|
||||||
|
-- Parse a nested list item.
|
||||||
|
parseListNested :: Parser MdToken
|
||||||
|
parseListNested = do
|
||||||
|
let firstCharParser = string (T.pack " ") <|> string (T.pack "\t")
|
||||||
|
let restOfLineParser = manyTill anySingle (void (char '\n') <|> eof)
|
||||||
|
lines <- greedyParse1 (firstCharParser *> restOfLineParser)
|
||||||
|
let linesParsed = leftmostLongestParse (parseUnorderedList <|> parseOrderedList) (init $ unlines lines)
|
||||||
|
when (null (show linesParsed)) empty
|
||||||
|
return linesParsed
|
||||||
|
|
||||||
|
-- Parse an unordered list line item.
|
||||||
|
parseUListLineItem :: Parser MdToken
|
||||||
|
parseUListLineItem = do
|
||||||
|
firstChar <- choice (map char ['*', '+', '-'])
|
||||||
|
char ' ' -- At least one space between list indicator and list text.
|
||||||
|
parseListLineItemCommon
|
||||||
|
|
||||||
|
-- Parse an ordered list line item.
|
||||||
|
parseOListLineItem :: Parser MdToken
|
||||||
|
parseOListLineItem = do
|
||||||
|
num <- greedyParse1 (satisfy isDigit)
|
||||||
|
char '.'
|
||||||
|
char ' ' -- At least one space between list indicator and list text.
|
||||||
|
parseListLineItemCommon
|
||||||
|
|
||||||
|
-- Common code for parsing list line items
|
||||||
|
parseListLineItemCommon :: Parser MdToken
|
||||||
|
parseListLineItemCommon = do
|
||||||
|
space
|
||||||
|
restOfLine <- manyTill parseListLineToken (void (char '\n') <|> eof)
|
||||||
|
nestedList <- try parseListNested <|> return (Unit "")
|
||||||
|
return $ Line [Line restOfLine, nestedList]
|
||||||
|
|
||||||
|
-- Parse an unordered list paragraph item.
|
||||||
|
parseUListParaItem :: Parser MdToken
|
||||||
|
parseUListParaItem = do
|
||||||
|
firstLine <- parseUListLineItem
|
||||||
|
res <- parseListParaItemCommon
|
||||||
|
return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
|
||||||
|
|
||||||
|
-- Parse an unordered list paragraph item.
|
||||||
|
parseOListParaItem :: Parser MdToken
|
||||||
|
parseOListParaItem = do
|
||||||
|
firstLine <- parseOListLineItem
|
||||||
|
res <- parseListParaItemCommon
|
||||||
|
return $ Document (Para firstLine : res) -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
|
||||||
|
|
||||||
|
-- Common code for parsing list paragraph items.
|
||||||
|
-- A list paragraph item is defined as a line item, followed by an empty line, followed by one or more
|
||||||
|
-- lines indented by a space or tab.
|
||||||
|
-- A list paragraph item can also be a blockquote.
|
||||||
|
parseListParaItemCommon :: Parser [MdToken]
|
||||||
|
parseListParaItemCommon = do
|
||||||
|
char '\n'
|
||||||
|
lines <- greedyParse1 ((string (T.pack " ") <|> string (T.pack "\t")) *> parseTillEol)
|
||||||
|
let res = leftmostLongestParse (greedyParse1 parseBlockquote <|> greedyParse1 parsePara) (init $ unlines lines)
|
||||||
|
char '\n'
|
||||||
|
return res -- I only wrap this in a document because I want some way of converting [MdToken] to MdToken, without any overhead. There is no other reason to wrap it in a Document.
|
||||||
|
|
||||||
|
-- Parse an unordered list item, which can be a line item or another list.
|
||||||
|
parseUListItem :: Parser MdToken
|
||||||
|
parseUListItem = space *> (try parseUListParaItem <|> parseUListLineItem)
|
||||||
|
|
||||||
|
-- Parse an unordered list.
|
||||||
|
parseUnorderedList :: Parser MdToken
|
||||||
|
parseUnorderedList = do
|
||||||
|
lineItems <- some $ try parseUListItem
|
||||||
|
void (char '\n') <|> eof -- A list must end in an extra newline or eof
|
||||||
|
return $ UnordList lineItems
|
||||||
|
|
||||||
|
-- --------
|
||||||
|
|
||||||
|
parseOListItem :: Parser MdToken
|
||||||
|
parseOListItem = space *> (try parseOListParaItem <|> parseOListLineItem)
|
||||||
|
|
||||||
|
-- Parses the first element of an ordered list, which must start with '1.'
|
||||||
|
parseFirstOListItem :: Parser MdToken
|
||||||
|
parseFirstOListItem = do
|
||||||
|
space
|
||||||
|
remaining <- getInput
|
||||||
|
when (take 2 (T.unpack remaining) /= "1.") empty
|
||||||
|
parseOListLineItem
|
||||||
|
|
||||||
|
parseOrderedList :: Parser MdToken
|
||||||
|
parseOrderedList = do
|
||||||
|
firstLine <- try parseFirstOListItem
|
||||||
|
lineItems <- many $ try parseOListItem
|
||||||
|
void (char '\n') <|> eof
|
||||||
|
return $ OrdList (firstLine : lineItems)
|
||||||
|
|
||||||
|
horizontalRuleText :: T.Text
|
||||||
|
horizontalRuleText = T.pack "---"
|
||||||
|
|
||||||
|
doubleNewlineText :: T.Text
|
||||||
|
doubleNewlineText = T.pack "\n\n"
|
||||||
|
|
||||||
|
parseHorizontalRule :: Parser MdToken
|
||||||
|
parseHorizontalRule = string horizontalRuleText *> (void (string doubleNewlineText) <|> eof) *> return HorizontalRule
|
||||||
|
|
||||||
|
parseCodeblock :: Parser MdToken
|
||||||
|
parseCodeblock = do
|
||||||
|
string (T.pack "```\n")
|
||||||
|
inside <- someTill anySingle (string (T.pack "\n```"))
|
||||||
|
return $ Codeblock (Unit (concatMap escapeChar inside))
|
||||||
|
|
||||||
|
documentParsers :: [Parser MdToken]
|
||||||
|
documentParsers =
|
||||||
|
[ parseHorizontalRule,
|
||||||
|
parseCodeblock,
|
||||||
|
parseTable,
|
||||||
|
parseHeader,
|
||||||
|
parseBlockquote,
|
||||||
|
parseUnorderedList,
|
||||||
|
parseOrderedList,
|
||||||
|
parseFigure,
|
||||||
|
parsePara
|
||||||
|
]
|
||||||
|
|
||||||
-- Parse a document, which is multiple paragraphs.
|
-- Parse a document, which is multiple paragraphs.
|
||||||
parseDocument :: ReadP MdToken
|
parseDocument :: Parser MdToken
|
||||||
parseDocument = (many1 parsePara) >>= (\res -> return (Document (res)))
|
parseDocument = do
|
||||||
|
res <- manyTill (fallthroughParser documentParsers) eof
|
||||||
|
return (Document res)
|
||||||
|
177
src/MdToHtmlTest.hs
Normal file
177
src/MdToHtmlTest.hs
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
module MdToHtmlTest where
|
||||||
|
|
||||||
|
import MdToHTML
|
||||||
|
import Test.HUnit
|
||||||
|
|
||||||
|
check_equal :: String -> String -> String -> Test
|
||||||
|
check_equal desc expected actual = TestCase (assertEqual desc expected actual)
|
||||||
|
|
||||||
|
convert :: String -> String
|
||||||
|
convert md = show $ leftmostLongestParse parseDocument md
|
||||||
|
|
||||||
|
headerTests =
|
||||||
|
TestList
|
||||||
|
[ check_equal "Should convert H1 heading" "<h1>Hello</h1>" (convert "# Hello"),
|
||||||
|
check_equal "Should convert H2 heading" "<h2>Hello</h2>" (convert "## Hello"),
|
||||||
|
check_equal "Should convert H3 heading" "<h3>Hello</h3>" (convert "### Hello"),
|
||||||
|
check_equal "Should convert H4 heading" "<h4>Hello</h4>" (convert "#### Hello"),
|
||||||
|
check_equal "Should convert H5 heading" "<h5>Hello</h5>" (convert "##### Hello"),
|
||||||
|
check_equal "Should convert H6 heading" "<h6>Hello</h6>" (convert "###### Hello")
|
||||||
|
]
|
||||||
|
|
||||||
|
boldTests =
|
||||||
|
TestList
|
||||||
|
[ check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
|
||||||
|
check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
|
||||||
|
check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day"),
|
||||||
|
check_equal "Should convert nested bold and italic" "<p><b>Bold then <i>Italic</i></b></p>" (convert "**Bold then _Italic_**"),
|
||||||
|
check_equal "Should convert nested bold and italic" "<p><i>Italic then <b>Bold</b></i></p>" (convert "*Italic then __Bold__*")
|
||||||
|
]
|
||||||
|
|
||||||
|
strikethroughTests =
|
||||||
|
TestList
|
||||||
|
[ check_equal "Should convert strikethrough" "<p><s>Hello</s></p>" (convert "~~Hello~~"),
|
||||||
|
check_equal "Should convert long sentence with tilde" "<p><s>The universe is ~7 days old</s>. The universe is 13 billion years old.</p>" (convert "~~The universe is ~7 days old~~. The universe is 13 billion years old.")
|
||||||
|
]
|
||||||
|
|
||||||
|
linkTests =
|
||||||
|
TestList
|
||||||
|
[ check_equal "Should convert normal link" "<p><a href=\"https://example.com\">This is an example link.</a></p>" (convert "[This is an example link.](https://example.com)"),
|
||||||
|
check_equal "Should convert styled link" "<p><a href=\"https://example.com\"><b>Fancy</b>!!!</a></p>" (convert "[__Fancy__!!!](https://example.com)")
|
||||||
|
]
|
||||||
|
|
||||||
|
escapedCharTests =
|
||||||
|
TestList
|
||||||
|
[ check_equal "Should print literal underscore" "<p>This is an underscore - _</p>" (convert "This is an underscore - \\_"),
|
||||||
|
check_equal "Should print literal asterisk" "<p>This is an asterisk - *</p>" (convert "This is an asterisk - \\*"),
|
||||||
|
check_equal "Should print literal asterisk in bold" "<p>This is a bolded asterisk - <b>*</b></p>" (convert "This is a bolded asterisk - **\\***")
|
||||||
|
]
|
||||||
|
|
||||||
|
blockquoteTests =
|
||||||
|
TestList
|
||||||
|
[ check_equal "Should wrap para in blockquote" "<blockquote><p>What a <b>truly</b> <i>lovely</i> day!!!</p></blockquote>" (convert "> What a __truly__ _lovely_ day!!!"),
|
||||||
|
check_equal "Simple nested blockquotes" "<blockquote><p>Hello</p><blockquote><p>World</p></blockquote></blockquote>" (convert "> Hello\n>\n>> World"),
|
||||||
|
check_equal
|
||||||
|
"Nested blockquotes"
|
||||||
|
"<blockquote><p>Dorothy followed her through many \
|
||||||
|
\of the beautiful rooms in her castle.</p><blockquote><p>The Witch \
|
||||||
|
\bade her clean the pots and kettles and sweep the floor and keep the fire \
|
||||||
|
\fed with wood.</p></blockquote></blockquote>"
|
||||||
|
( convert
|
||||||
|
"> Dorothy followed her through many of the \
|
||||||
|
\beautiful rooms in her castle.\n> \n>> The Witch bade her \
|
||||||
|
\clean the pots and kettles and sweep the floor and keep the fire fed with wood."
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
unorderedListTests =
|
||||||
|
TestList
|
||||||
|
[ check_equal "Basic unordered list" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>" (convert "* Item 1\n* Item 2\n* Item 3"),
|
||||||
|
check_equal "Mixing list indicators" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>" (convert "* Item 1\n+ Item 2\n- Item 3"),
|
||||||
|
check_equal "Formatted lists" "<ul><li><b>Item 1</b></li><li><i>Item 2</i></li><li><b><i>Item 3</i></b></li></ul>" (convert "* __Item 1__\n+ _Item 2_\n- ***Item 3***"),
|
||||||
|
check_equal "Nested list" "<ul><li>Item 1</li><li>Item 2</li><li>Item 3<ul><li>Subitem 1</li><li>Subitem 2</li></ul></li></ul>" (convert "* Item 1\n* Item 2\n* Item 3\n * Subitem 1\n * Subitem 2"),
|
||||||
|
check_equal "Paragraph in list" "<ul><li>Item 1</li><li><p>Item 2</p><p>More stuff</p></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n\n More stuff\n\n- Item 3"),
|
||||||
|
check_equal "Paragraph before list" "<p>This is a list</p><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "This is a list\n\n* Item 1\n* Item 2"),
|
||||||
|
check_equal "Paragraph before list" "<h3>This is a list</h3><ul><li>Item 1</li><li>Item 2</li></ul>" (convert "### This is a list\n\n* Item 1\n* Item 2"),
|
||||||
|
check_equal "Nested list then back" "<ul><li>Item 1</li><li>Item 2<ul><li>Item 3</li><li>Item 4</li></ul></li><li>Item 5</li></ul>" (convert "- Item 1\n- Item 2\n - Item 3\n - Item 4\n- Item 5"),
|
||||||
|
check_equal "Blockquote in list" "<ul><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n\n > Quote\n\n- Item 3"),
|
||||||
|
check_equal "Ordered list in unordered list" "<ul><li>Item 1</li><li>Item 2<ol><li>Item 1</li><li>Item 2</li></ol></li><li>Item 3</li></ul>" (convert "- Item 1\n- Item 2\n 1. Item 1\n 2. Item 2\n- Item 3")
|
||||||
|
]
|
||||||
|
|
||||||
|
orderedListTests =
|
||||||
|
TestList
|
||||||
|
[ check_equal "Basic ordered list" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n3. Item 3"),
|
||||||
|
check_equal "Mixing list numbering" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3</li></ol>" (convert "1. Item 1\n3. Item 2\n2. Item 3"),
|
||||||
|
check_equal "Should not convert list without number 1" "<p>2. Item 1 1. Item 2</p>" (convert "2. Item 1\n1. Item 2"),
|
||||||
|
check_equal "Formatted lists" "<ol><li><b>Item 1</b></li><li><i>Item 2</i></li><li><b><i>Item 3</i></b></li></ol>" (convert "1. __Item 1__\n2. _Item 2_\n3. ***Item 3***"),
|
||||||
|
check_equal "Nested list" "<ol><li>Item 1</li><li>Item 2</li><li>Item 3<ol><li>Subitem 1</li><li>Subitem 2</li></ol></li></ol>" (convert "1. Item 1\n2. Item 2\n3. Item 3\n 1. Subitem 1\n 2. Subitem 2"),
|
||||||
|
check_equal "Paragraph in list" "<ol><li>Item 1</li><li><p>Item 2</p><p>More stuff</p></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n More stuff\n\n1. Item 3"),
|
||||||
|
check_equal "Paragraph before list" "<p>This is a list</p><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "This is a list\n\n1. Item 1\n1. Item 2"),
|
||||||
|
check_equal "Paragraph before list" "<h3>This is a list</h3><ol><li>Item 1</li><li>Item 2</li></ol>" (convert "### This is a list\n\n1. Item 1\n200. Item 2"),
|
||||||
|
check_equal "Nested list then back" "<ol><li>Item 1</li><li>Item 2<ol><li>Item 3</li><li>Item 4</li></ol></li><li>Item 5</li></ol>" (convert "1. Item 1\n2. Item 2\n 1. Item 3\n 3. Item 4\n5. Item 5"),
|
||||||
|
check_equal "Blockquote in list" "<ol><li>Item 1</li><li><p>Item 2</p><blockquote><p>Quote</p></blockquote></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n\n > Quote\n\n3. Item 3"),
|
||||||
|
check_equal "Unordered list in ordered list" "<ol><li>Item 1</li><li>Item 2<ul><li>Item 1</li><li>Item 2</li></ul></li><li>Item 3</li></ol>" (convert "1. Item 1\n2. Item 2\n - Item 1\n * Item 2\n4. Item 3"),
|
||||||
|
check_equal "List with just 1 item" "<ol><li>Item 1</li></ol>" (convert "1. Item 1")
|
||||||
|
]
|
||||||
|
|
||||||
|
htmlTests =
|
||||||
|
TestList
|
||||||
|
[check_equal "Convert HTML element" "<p><center>a</center></p>" (convert "<center>a</center>")]
|
||||||
|
|
||||||
|
codeTests =
|
||||||
|
TestList
|
||||||
|
[ check_equal "Code by itself" "<p><code>Hello world!</code></p>" (convert "`Hello world!`"),
|
||||||
|
check_equal "Code in a paragraph" "<p>The following <code>text</code> is code</p>" (convert "The following `text` is code"),
|
||||||
|
check_equal "Code across paragraphs (shouldn't work)" "<p>`Incomplete</p><p>Code`</p>" (convert "`Incomplete\n\nCode`") -- At the moment, this is just treated as a syntax error, so nothing is rendered.
|
||||||
|
]
|
||||||
|
|
||||||
|
imageTests =
|
||||||
|
TestList
|
||||||
|
[ check_equal "Image with text" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\"/></p>" (convert "This is an image "),
|
||||||
|
check_equal "Image with classes" "<p>This is an image <img src=\"img.png\" alt=\"Image 1\" class=\"new-img\"/></p>" (convert "This is an image {.new-img}")
|
||||||
|
]
|
||||||
|
|
||||||
|
figureTests =
|
||||||
|
TestList
|
||||||
|
[ check_equal "Image by itself" "<figure><img src=\"img.png\" alt=\"Image 1\"/><figcaption aria-hidden=\"true\">Image 1</figcaption></figure>" (convert "")
|
||||||
|
]
|
||||||
|
|
||||||
|
horizontalRuleTests =
|
||||||
|
TestList
|
||||||
|
[check_equal "Horizontal Rule" "<p>a</p><hr><p>b</p>" (convert "a\n\n---\n\nb")]
|
||||||
|
|
||||||
|
tableTests =
|
||||||
|
TestList
|
||||||
|
[ check_equal
|
||||||
|
"Basic table"
|
||||||
|
"<table>\
|
||||||
|
\<thead><tr><th>Col 1</th><th>Col 2</th><th>Col 3</th></tr></thead>\
|
||||||
|
\<tbody><tr><td>Data 1</td><td>Data 2</td><td>Data 3</td></tr>\
|
||||||
|
\<tr><td>More Data 1</td><td>More Data 2</td><td>More Data 3</td></tr></tbody></table>"
|
||||||
|
(convert "| Col 1 | Col 2 | Col 3 |\n|---|---|---|\n| Data 1 | Data 2 | Data 3 |\n| More Data 1 | More Data 2 | More Data 3 |")
|
||||||
|
]
|
||||||
|
|
||||||
|
integrationTests =
|
||||||
|
TestList
|
||||||
|
[ check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n ## __Second__ _Heading_"),
|
||||||
|
check_equal "Integration 2" "<p><b>Hello</b> <i>World</i></p>" (convert "__Hello__\n_World_"),
|
||||||
|
check_equal "Integration 3" "<h1>Hello</h1><p>World</p>" (convert "# Hello\nWorld"),
|
||||||
|
check_equal "Integration 4" "<p>a b</p>" (convert "a\nb"),
|
||||||
|
check_equal "Integration 5" "<h1>Hello</h1>" (convert "# Hello\n"),
|
||||||
|
check_equal "Integration 6" "<p>First line<br />Second line</p>" (convert "First line \nSecond line"),
|
||||||
|
check_equal
|
||||||
|
"Integration 7"
|
||||||
|
"<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2>Second \
|
||||||
|
\Heading</h2><ul><li>Unordered lists, and:<ol><li>One</li><li>Two</li><li>\
|
||||||
|
\Three</li></ol></li><li>More</li></ul><blockquote><p>Blockquote</p>\
|
||||||
|
\</blockquote><p>And <b>bold</b>, <i>italics</i>, and even <i>italics \
|
||||||
|
\and later <b>bold</b></i>. Even <s>strikethrough</s>. \
|
||||||
|
\<a href=\"https://markdowntohtml.com\">A link</a> to somewhere.</p>"
|
||||||
|
( convert
|
||||||
|
"# Sample Markdown\n\nThis is some basic, sample markdown.\n\n## Second \
|
||||||
|
\Heading\n\n- Unordered lists, and:\n 1. One\n 2. Two\n 3. Three\n\
|
||||||
|
\- More\n\n> Blockquote\n\nAnd **bold**, *italics*, and even *italics and \
|
||||||
|
\later __bold__*. Even ~~strikethrough~~. [A link](https://markdowntohtml.com) to somewhere."
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
tests =
|
||||||
|
TestList
|
||||||
|
[ headerTests,
|
||||||
|
boldTests,
|
||||||
|
strikethroughTests,
|
||||||
|
linkTests,
|
||||||
|
escapedCharTests,
|
||||||
|
blockquoteTests,
|
||||||
|
unorderedListTests,
|
||||||
|
orderedListTests,
|
||||||
|
imageTests,
|
||||||
|
htmlTests,
|
||||||
|
figureTests,
|
||||||
|
codeTests,
|
||||||
|
horizontalRuleTests,
|
||||||
|
tableTests,
|
||||||
|
integrationTests
|
||||||
|
]
|
||||||
|
|
||||||
|
runTests = runTestTT tests
|
44
src/Test.hs
44
src/Test.hs
@@ -1,44 +0,0 @@
|
|||||||
module MdToHtmlTest where
|
|
||||||
|
|
||||||
import MdToHTML
|
|
||||||
import Test.HUnit
|
|
||||||
|
|
||||||
check_equal :: String -> String -> String -> Test
|
|
||||||
check_equal desc expected actual = TestCase (assertEqual desc expected actual)
|
|
||||||
|
|
||||||
convert :: String -> String
|
|
||||||
convert md = show . fst $ leftmostLongestParse parseDocument md
|
|
||||||
|
|
||||||
headerTests = TestList
|
|
||||||
[
|
|
||||||
check_equal "Should convert H1 heading" "<h1>Hello</h1>" (convert "# Hello"),
|
|
||||||
check_equal "Should convert H2 heading" "<h2>Hello</h2>" (convert "## Hello"),
|
|
||||||
check_equal "Should convert H3 heading" "<h3>Hello</h3>" (convert "### Hello"),
|
|
||||||
check_equal "Should convert H4 heading" "<h4>Hello</h4>" (convert "#### Hello"),
|
|
||||||
check_equal "Should convert H5 heading" "<h5>Hello</h5>" (convert "##### Hello"),
|
|
||||||
check_equal "Should convert H6 heading" "<h6>Hello</h6>" (convert "###### Hello")
|
|
||||||
]
|
|
||||||
|
|
||||||
boldTests = TestList
|
|
||||||
[
|
|
||||||
check_equal "Should convert bold" "<p><b>Hello</b></p>" (convert "__Hello__"),
|
|
||||||
check_equal "Should convert italic" "<p><i>Hello</i></p>" (convert "_Hello_"),
|
|
||||||
check_equal "Should convert bold and italic in a sentence" "<p>It <i>is</i> a <b>wonderful</b> day</p>" (convert "It _is_ a __wonderful__ day")
|
|
||||||
]
|
|
||||||
|
|
||||||
integrationTests = TestList
|
|
||||||
[
|
|
||||||
check_equal "Integration 1" "<h1>Sample Markdown</h1><p>This is some basic, sample markdown.</p><h2><b>Second</b> <i>Heading</i></h2>" (convert "# Sample Markdown\n\n This is some basic, sample markdown.\n\n ## __Second__ _Heading_"),
|
|
||||||
check_equal "Integration 2" "<p><b>Hello</b> <i>World</i></p>" (convert "__Hello__\n_World_"),
|
|
||||||
check_equal "Integration 3" "<h1>Hello</h1><p>World</p>" (convert "# Hello\nWorld")
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
tests = TestList
|
|
||||||
[
|
|
||||||
headerTests,
|
|
||||||
boldTests,
|
|
||||||
integrationTests
|
|
||||||
]
|
|
||||||
|
|
||||||
runTests = runTestTT tests
|
|
Reference in New Issue
Block a user