Split package into library module 'src' and executable 'app'
parent
626eb71097
commit
d63cd98288
@ -1,121 +1,8 @@
|
||||
module Main where
|
||||
|
||||
import Text.ParserCombinators.ReadP
|
||||
import Control.Monad
|
||||
import Control.Applicative
|
||||
import Text.Printf
|
||||
import Debug.Trace
|
||||
import Data.List
|
||||
|
||||
type HeaderLevel = Int
|
||||
|
||||
newtype URL = URL {getUrl :: String}
|
||||
newtype ImgPath = ImgPath {getPath :: String}
|
||||
|
||||
parseMany :: ReadP a -> ReadP [a]
|
||||
parseMany = Text.ParserCombinators.ReadP.many
|
||||
|
||||
data MdToken = Header HeaderLevel MdToken
|
||||
| Para [MdToken]
|
||||
| Linebreak
|
||||
| HorizontalRule
|
||||
| Blockquote MdToken
|
||||
| UnordList [MdToken]
|
||||
| OrdList [MdToken]
|
||||
| Code String
|
||||
| Codeblock String
|
||||
| Link MdToken URL
|
||||
| Image MdToken ImgPath
|
||||
| Bold MdToken
|
||||
| Italic MdToken
|
||||
| Strikethrough MdToken
|
||||
| Unit String
|
||||
|
||||
-- Deriving Show for MdToken
|
||||
instance Show MdToken where
|
||||
show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">"
|
||||
show (Para tokens) = concat(map show tokens)
|
||||
show Linebreak = "\n"
|
||||
show HorizontalRule = "---------"
|
||||
show (Blockquote token) = "BLOCK" ++ show token
|
||||
show (UnordList tokens) = "UNORD" ++ concat(map show tokens)
|
||||
show (OrdList tokens) = "ORD" ++ concat(map show tokens)
|
||||
show (Code code) = show code
|
||||
show (Codeblock code) = show code
|
||||
show (Link txt url) = "<a href=" ++ (getUrl url) ++ ">" ++ show txt ++ "</a>"
|
||||
show (Image txt path) = "<img src=" ++ (getPath path) ++ ">" ++ show txt ++ "</img>"
|
||||
show (Bold token) = "<b>" ++ show token ++ "</b>"
|
||||
show (Italic token) = "<i>" ++ show token ++ "</i>"
|
||||
show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
|
||||
show (Unit unit) = printf "%s" unit
|
||||
|
||||
|
||||
|
||||
-- ---------------
|
||||
-- Helpers
|
||||
mustBeHash :: ReadP Char
|
||||
mustBeHash = satisfy (\x -> x == '#')
|
||||
|
||||
leftmostLongest :: (Foldable t) => [(a, t b)] -> (a, t b)
|
||||
leftmostLongest xs =
|
||||
let lastElem = (last xs)
|
||||
filteredLst = (filter (\val -> (length $ snd val) == (length $ snd lastElem)) xs)
|
||||
in head filteredLst
|
||||
|
||||
leftmostLongestParse :: ReadP a -> String -> (a, String)
|
||||
leftmostLongestParse parser input = leftmostLongest $ readP_to_S parser input
|
||||
-- ---------------
|
||||
|
||||
parseHeader :: ReadP MdToken
|
||||
parseHeader = do
|
||||
headers <- many1 mustBeHash
|
||||
when ((length headers) > 6)
|
||||
pfail
|
||||
_ <- string " "
|
||||
text <- munch1 (\x -> x /= '\n') -- Parse until EOL
|
||||
-- traceM text
|
||||
let parsedText = fst $ leftmostLongestParse parseLine text
|
||||
return (Header (length headers) parsedText)
|
||||
|
||||
parseBold :: ReadP MdToken
|
||||
parseBold = do
|
||||
text <- choice[
|
||||
(between (string "__") (string "__") (munch1 (/= '_'))),
|
||||
(between (string "**") (string "**") (munch1 (/= '*')))
|
||||
]
|
||||
-- text <- munch1 (\x -> x /= '_' && x /= '*') -- Parse until first asterisk/underscore
|
||||
-- traceM text
|
||||
-- _ <- char '_' <|> char '*' -- Throw away the second asterisk/underscore
|
||||
let parsedText = fst $ leftmostLongestParse parseLine text
|
||||
return (Bold parsedText)
|
||||
|
||||
parseItalic :: ReadP MdToken
|
||||
parseItalic = do
|
||||
text <- choice[
|
||||
(between (string "_") (string "_") (munch1 (/= '_'))),
|
||||
(between (string "*") (string "*") (munch1 (/= '*')))
|
||||
]
|
||||
let parsedText = fst $ leftmostLongestParse parseLine text
|
||||
return (Italic parsedText)
|
||||
|
||||
parseString :: ReadP MdToken
|
||||
parseString = do
|
||||
firstChar <- get -- Must parse at least one character here
|
||||
text <- munch (\x -> not (elem x "#*_[\n"))
|
||||
return (Unit (firstChar:text))
|
||||
--return (Unit text)
|
||||
|
||||
parseToken :: ReadP MdToken
|
||||
parseToken = choice [parseHeader, parseBold, parseItalic, parseString]
|
||||
|
||||
parseLine :: ReadP MdToken
|
||||
parseLine = do
|
||||
parsed <- parseMany parseToken
|
||||
-- traceM $ show parsed
|
||||
return (Para parsed)
|
||||
|
||||
import MdToHTML
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
let res = leftmostLongestParse parseLine "## Hello __world_*"
|
||||
let res = leftmostLongestParse parseLine "## Hello ___world___"
|
||||
putStrLn (show res)
|
||||
|
@ -0,0 +1,149 @@
|
||||
module MdToHTML where
|
||||
|
||||
import Text.ParserCombinators.ReadP
|
||||
import Control.Monad
|
||||
import Control.Applicative
|
||||
import Text.Printf
|
||||
import Debug.Trace
|
||||
import Data.List
|
||||
|
||||
type HeaderLevel = Int
|
||||
|
||||
newtype URL = URL {getUrl :: String}
|
||||
newtype ImgPath = ImgPath {getPath :: String}
|
||||
|
||||
parseMany :: ReadP a -> ReadP [a]
|
||||
parseMany = Text.ParserCombinators.ReadP.many
|
||||
|
||||
data MdToken = Header HeaderLevel MdToken
|
||||
| Para MdToken
|
||||
| Line [MdToken]
|
||||
| Linebreak
|
||||
| HorizontalRule
|
||||
| Blockquote MdToken
|
||||
| UnordList [MdToken]
|
||||
| OrdList [MdToken]
|
||||
| Code String
|
||||
| Codeblock String
|
||||
| Link MdToken URL
|
||||
| Image MdToken ImgPath
|
||||
| Bold MdToken
|
||||
| Italic MdToken
|
||||
| Strikethrough MdToken
|
||||
| Unit String
|
||||
|
||||
-- Deriving Show for MdToken
|
||||
instance Show MdToken where
|
||||
show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">"
|
||||
show (Para token) = "<p>" ++ show token ++ "</p>"
|
||||
show (Line tokens) = concat(map show tokens)
|
||||
show Linebreak = "<br>"
|
||||
show HorizontalRule = "---------"
|
||||
show (Blockquote token) = "BLOCK" ++ show token
|
||||
show (UnordList tokens) = "UNORD" ++ concat(map show tokens)
|
||||
show (OrdList tokens) = "ORD" ++ concat(map show tokens)
|
||||
show (Code code) = show code
|
||||
show (Codeblock code) = show code
|
||||
show (Link txt url) = "<a href=" ++ (getUrl url) ++ ">" ++ show txt ++ "</a>"
|
||||
show (Image txt imgPath) = "<img src=" ++ (getPath imgPath) ++ ">" ++ show txt ++ "</img>"
|
||||
show (Bold token) = "<b>" ++ show token ++ "</b>"
|
||||
show (Italic token) = "<i>" ++ show token ++ "</i>"
|
||||
show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
|
||||
show (Unit unit) = printf "%s" unit
|
||||
|
||||
|
||||
|
||||
-- ---------------
|
||||
-- Helpers
|
||||
mustBeHash :: ReadP Char
|
||||
mustBeHash = satisfy (\x -> x == '#')
|
||||
|
||||
leftmostLongest :: (Foldable t) => [(a, t b)] -> (a, t b)
|
||||
leftmostLongest xs =
|
||||
let lastElem = (last xs)
|
||||
filteredLst = (filter (\val -> (length $ snd val) == (length $ snd lastElem)) xs)
|
||||
in head filteredLst
|
||||
|
||||
-- Get the first parse returned by readP_to_S that consumed the most input
|
||||
leftmostLongestParse :: ReadP a -> String -> (a, String)
|
||||
leftmostLongestParse parser input = leftmostLongest $ readP_to_S parser input
|
||||
|
||||
-- Parse if the string that's left matches the string comparator function
|
||||
lookaheadParse :: (String -> Bool) -> ReadP Char
|
||||
lookaheadParse stringCmp = do
|
||||
lookahead <- look
|
||||
case stringCmp lookahead of
|
||||
True -> get
|
||||
False -> pfail
|
||||
|
||||
lineToList :: MdToken -> [MdToken]
|
||||
lineToList (Line tokens) = tokens
|
||||
-- ---------------
|
||||
|
||||
-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
|
||||
parseHeader :: ReadP MdToken
|
||||
parseHeader = do
|
||||
headers <- many1 mustBeHash
|
||||
when ((length headers) > 6)
|
||||
pfail
|
||||
_ <- string " "
|
||||
text <- munch1 (\x -> x /= '\n') -- Parse until EOL
|
||||
-- traceM text
|
||||
let parsedText = fst $ leftmostLongestParse parseLine text
|
||||
return (Header (length headers) parsedText)
|
||||
|
||||
-- Parse bold text
|
||||
parseBold :: ReadP MdToken
|
||||
parseBold = do
|
||||
text <- choice[
|
||||
(between (string "__") (string "__") (many1 (lookaheadParse (/= "__")))),
|
||||
(between (string "**") (string "**") (many1 (lookaheadParse (/= "**"))))
|
||||
]
|
||||
let parsedText = fst $ leftmostLongestParse parseLine text
|
||||
return (Bold parsedText)
|
||||
|
||||
-- Parse italic text
|
||||
parseItalic :: ReadP MdToken
|
||||
parseItalic = do
|
||||
text <- choice[
|
||||
(between (string "_") (string "_") (munch1 (/= '_'))),
|
||||
(between (string "*") (string "*") (munch1 (/= '*')))
|
||||
]
|
||||
let parsedText = fst $ leftmostLongestParse parseLine text
|
||||
return (Italic parsedText)
|
||||
|
||||
-- Parse a linebreak character
|
||||
parseLinebreak :: ReadP MdToken
|
||||
parseLinebreak = do
|
||||
char '\n'
|
||||
return Linebreak
|
||||
|
||||
-- Parse a regular string as a Unit.
|
||||
parseString :: ReadP MdToken
|
||||
parseString = do
|
||||
firstChar <- get -- Must parse at least one character here
|
||||
text <- munch (\x -> not (elem x "#*_[\n"))
|
||||
return (Unit (firstChar:text))
|
||||
|
||||
-- Parse any of the above tokens.
|
||||
parseToken :: ReadP MdToken
|
||||
parseToken = choice [parseHeader, parseLinebreak, parseBold, parseItalic, parseString]
|
||||
|
||||
-- Parse a line, consisting of one or more tokens.
|
||||
parseLine :: ReadP MdToken
|
||||
parseLine = do
|
||||
remaining <- look
|
||||
when (null remaining) pfail
|
||||
parsed <- parseMany parseToken
|
||||
-- traceM $ show parsed
|
||||
return (Line parsed)
|
||||
|
||||
-- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
|
||||
parsePara :: ReadP MdToken
|
||||
parsePara = do
|
||||
parseMany (char '\n')
|
||||
text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n"))
|
||||
string "\n\n"
|
||||
-- I don't consume the ending double-newline, because the next paragraph will consume it as part of its starting double-newline.
|
||||
let parsedText = fst $ leftmostLongestParse parseLine text
|
||||
return (Para parsedText)
|
@ -0,0 +1,22 @@
|
||||
module MdToHtmlTest where
|
||||
|
||||
import MdToHTML
|
||||
import Test.HUnit
|
||||
|
||||
headerTests = TestList
|
||||
[
|
||||
(TestCase (assertEqual "Should convert H1 heading" "<h1>Hello</h1>" (show . fst $ leftmostLongestParse parseLine "# Hello"))),
|
||||
(TestCase (assertEqual "Should convert H2 heading" "<h2>Hello</h2>" (show . fst $ leftmostLongestParse parseLine "## Hello"))),
|
||||
(TestCase (assertEqual "Should convert H3 heading" "<h3>Hello</h3>" (show . fst $ leftmostLongestParse parseLine "### Hello"))),
|
||||
(TestCase (assertEqual "Should convert H4 heading" "<h4>Hello</h4>" (show . fst $ leftmostLongestParse parseLine "#### Hello"))),
|
||||
(TestCase (assertEqual "Should convert H5 heading" "<h5>Hello</h5>" (show . fst $ leftmostLongestParse parseLine "##### Hello"))),
|
||||
(TestCase (assertEqual "Should convert H6 heading" "<h6>Hello</h6>" (show . fst $ leftmostLongestParse parseLine "###### Hello")))
|
||||
]
|
||||
|
||||
|
||||
tests = TestList
|
||||
[
|
||||
headerTests
|
||||
]
|
||||
|
||||
runTests = runTestTT tests
|
Loading…
Reference in New Issue