From d63cd9828851608734e9121bee2c1ad5771cd8f0 Mon Sep 17 00:00:00 2001 From: Aadhavan Srinivasan Date: Fri, 2 May 2025 10:30:08 -0400 Subject: [PATCH] Split package into library module 'src' and executable 'app' --- app/Main.hs | 117 +------------------------------------ src/MdToHtml.hs | 149 ++++++++++++++++++++++++++++++++++++++++++++++++ src/Test.hs | 22 +++++++ 3 files changed, 173 insertions(+), 115 deletions(-) create mode 100644 src/MdToHtml.hs create mode 100644 src/Test.hs diff --git a/app/Main.hs b/app/Main.hs index 05d1fd1..1b4c972 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -1,121 +1,8 @@ module Main where -import Text.ParserCombinators.ReadP -import Control.Monad -import Control.Applicative -import Text.Printf -import Debug.Trace -import Data.List - -type HeaderLevel = Int - -newtype URL = URL {getUrl :: String} -newtype ImgPath = ImgPath {getPath :: String} - -parseMany :: ReadP a -> ReadP [a] -parseMany = Text.ParserCombinators.ReadP.many - -data MdToken = Header HeaderLevel MdToken - | Para [MdToken] - | Linebreak - | HorizontalRule - | Blockquote MdToken - | UnordList [MdToken] - | OrdList [MdToken] - | Code String - | Codeblock String - | Link MdToken URL - | Image MdToken ImgPath - | Bold MdToken - | Italic MdToken - | Strikethrough MdToken - | Unit String - --- Deriving Show for MdToken -instance Show MdToken where - show (Header level token) = "" ++ show token ++ "" - show (Para tokens) = concat(map show tokens) - show Linebreak = "\n" - show HorizontalRule = "---------" - show (Blockquote token) = "BLOCK" ++ show token - show (UnordList tokens) = "UNORD" ++ concat(map show tokens) - show (OrdList tokens) = "ORD" ++ concat(map show tokens) - show (Code code) = show code - show (Codeblock code) = show code - show (Link txt url) = "" ++ show txt ++ "" - show (Image txt path) = "" ++ show txt ++ "" - show (Bold token) = "" ++ show token ++ "" - show (Italic token) = "" ++ show token ++ "" - show (Strikethrough token) = "" ++ show token ++ "" - show (Unit unit) = printf "%s" unit - - - --- --------------- --- Helpers -mustBeHash :: ReadP Char -mustBeHash = satisfy (\x -> x == '#') - -leftmostLongest :: (Foldable t) => [(a, t b)] -> (a, t b) -leftmostLongest xs = - let lastElem = (last xs) - filteredLst = (filter (\val -> (length $ snd val) == (length $ snd lastElem)) xs) - in head filteredLst - -leftmostLongestParse :: ReadP a -> String -> (a, String) -leftmostLongestParse parser input = leftmostLongest $ readP_to_S parser input --- --------------- - -parseHeader :: ReadP MdToken -parseHeader = do - headers <- many1 mustBeHash - when ((length headers) > 6) - pfail - _ <- string " " - text <- munch1 (\x -> x /= '\n') -- Parse until EOL --- traceM text - let parsedText = fst $ leftmostLongestParse parseLine text - return (Header (length headers) parsedText) - -parseBold :: ReadP MdToken -parseBold = do - text <- choice[ - (between (string "__") (string "__") (munch1 (/= '_'))), - (between (string "**") (string "**") (munch1 (/= '*'))) - ] --- text <- munch1 (\x -> x /= '_' && x /= '*') -- Parse until first asterisk/underscore - -- traceM text - -- _ <- char '_' <|> char '*' -- Throw away the second asterisk/underscore - let parsedText = fst $ leftmostLongestParse parseLine text - return (Bold parsedText) - -parseItalic :: ReadP MdToken -parseItalic = do - text <- choice[ - (between (string "_") (string "_") (munch1 (/= '_'))), - (between (string "*") (string "*") (munch1 (/= '*'))) - ] - let parsedText = fst $ leftmostLongestParse parseLine text - return (Italic parsedText) - -parseString :: ReadP MdToken -parseString = do - firstChar <- get -- Must parse at least one character here - text <- munch (\x -> not (elem x "#*_[\n")) - return (Unit (firstChar:text)) - --return (Unit text) - -parseToken :: ReadP MdToken -parseToken = choice [parseHeader, parseBold, parseItalic, parseString] - -parseLine :: ReadP MdToken -parseLine = do - parsed <- parseMany parseToken --- traceM $ show parsed - return (Para parsed) - +import MdToHTML main :: IO () main = do - let res = leftmostLongestParse parseLine "## Hello __world_*" + let res = leftmostLongestParse parseLine "## Hello ___world___" putStrLn (show res) diff --git a/src/MdToHtml.hs b/src/MdToHtml.hs new file mode 100644 index 0000000..568b6f0 --- /dev/null +++ b/src/MdToHtml.hs @@ -0,0 +1,149 @@ +module MdToHTML where + +import Text.ParserCombinators.ReadP +import Control.Monad +import Control.Applicative +import Text.Printf +import Debug.Trace +import Data.List + +type HeaderLevel = Int + +newtype URL = URL {getUrl :: String} +newtype ImgPath = ImgPath {getPath :: String} + +parseMany :: ReadP a -> ReadP [a] +parseMany = Text.ParserCombinators.ReadP.many + +data MdToken = Header HeaderLevel MdToken + | Para MdToken + | Line [MdToken] + | Linebreak + | HorizontalRule + | Blockquote MdToken + | UnordList [MdToken] + | OrdList [MdToken] + | Code String + | Codeblock String + | Link MdToken URL + | Image MdToken ImgPath + | Bold MdToken + | Italic MdToken + | Strikethrough MdToken + | Unit String + +-- Deriving Show for MdToken +instance Show MdToken where + show (Header level token) = "" ++ show token ++ "" + show (Para token) = "

" ++ show token ++ "

" + show (Line tokens) = concat(map show tokens) + show Linebreak = "
" + show HorizontalRule = "---------" + show (Blockquote token) = "BLOCK" ++ show token + show (UnordList tokens) = "UNORD" ++ concat(map show tokens) + show (OrdList tokens) = "ORD" ++ concat(map show tokens) + show (Code code) = show code + show (Codeblock code) = show code + show (Link txt url) = "" ++ show txt ++ "" + show (Image txt imgPath) = "" ++ show txt ++ "" + show (Bold token) = "" ++ show token ++ "" + show (Italic token) = "" ++ show token ++ "" + show (Strikethrough token) = "" ++ show token ++ "" + show (Unit unit) = printf "%s" unit + + + +-- --------------- +-- Helpers +mustBeHash :: ReadP Char +mustBeHash = satisfy (\x -> x == '#') + +leftmostLongest :: (Foldable t) => [(a, t b)] -> (a, t b) +leftmostLongest xs = + let lastElem = (last xs) + filteredLst = (filter (\val -> (length $ snd val) == (length $ snd lastElem)) xs) + in head filteredLst + +-- Get the first parse returned by readP_to_S that consumed the most input +leftmostLongestParse :: ReadP a -> String -> (a, String) +leftmostLongestParse parser input = leftmostLongest $ readP_to_S parser input + +-- Parse if the string that's left matches the string comparator function +lookaheadParse :: (String -> Bool) -> ReadP Char +lookaheadParse stringCmp = do + lookahead <- look + case stringCmp lookahead of + True -> get + False -> pfail + +lineToList :: MdToken -> [MdToken] +lineToList (Line tokens) = tokens +-- --------------- + +-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL. +parseHeader :: ReadP MdToken +parseHeader = do + headers <- many1 mustBeHash + when ((length headers) > 6) + pfail + _ <- string " " + text <- munch1 (\x -> x /= '\n') -- Parse until EOL +-- traceM text + let parsedText = fst $ leftmostLongestParse parseLine text + return (Header (length headers) parsedText) + +-- Parse bold text +parseBold :: ReadP MdToken +parseBold = do + text <- choice[ + (between (string "__") (string "__") (many1 (lookaheadParse (/= "__")))), + (between (string "**") (string "**") (many1 (lookaheadParse (/= "**")))) + ] + let parsedText = fst $ leftmostLongestParse parseLine text + return (Bold parsedText) + +-- Parse italic text +parseItalic :: ReadP MdToken +parseItalic = do + text <- choice[ + (between (string "_") (string "_") (munch1 (/= '_'))), + (between (string "*") (string "*") (munch1 (/= '*'))) + ] + let parsedText = fst $ leftmostLongestParse parseLine text + return (Italic parsedText) + +-- Parse a linebreak character +parseLinebreak :: ReadP MdToken +parseLinebreak = do + char '\n' + return Linebreak + +-- Parse a regular string as a Unit. +parseString :: ReadP MdToken +parseString = do + firstChar <- get -- Must parse at least one character here + text <- munch (\x -> not (elem x "#*_[\n")) + return (Unit (firstChar:text)) + +-- Parse any of the above tokens. +parseToken :: ReadP MdToken +parseToken = choice [parseHeader, parseLinebreak, parseBold, parseItalic, parseString] + +-- Parse a line, consisting of one or more tokens. +parseLine :: ReadP MdToken +parseLine = do + remaining <- look + when (null remaining) pfail + parsed <- parseMany parseToken +-- traceM $ show parsed + return (Line parsed) + +-- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines. +parsePara :: ReadP MdToken +parsePara = do + parseMany (char '\n') + text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n")) + string "\n\n" + -- I don't consume the ending double-newline, because the next paragraph will consume it as part of its starting double-newline. + let parsedText = fst $ leftmostLongestParse parseLine text + return (Para parsedText) diff --git a/src/Test.hs b/src/Test.hs new file mode 100644 index 0000000..e4333d1 --- /dev/null +++ b/src/Test.hs @@ -0,0 +1,22 @@ +module MdToHtmlTest where + +import MdToHTML +import Test.HUnit + +headerTests = TestList + [ + (TestCase (assertEqual "Should convert H1 heading" "

Hello

" (show . fst $ leftmostLongestParse parseLine "# Hello"))), + (TestCase (assertEqual "Should convert H2 heading" "

Hello

" (show . fst $ leftmostLongestParse parseLine "## Hello"))), + (TestCase (assertEqual "Should convert H3 heading" "

Hello

" (show . fst $ leftmostLongestParse parseLine "### Hello"))), + (TestCase (assertEqual "Should convert H4 heading" "

Hello

" (show . fst $ leftmostLongestParse parseLine "#### Hello"))), + (TestCase (assertEqual "Should convert H5 heading" "
Hello
" (show . fst $ leftmostLongestParse parseLine "##### Hello"))), + (TestCase (assertEqual "Should convert H6 heading" "
Hello
" (show . fst $ leftmostLongestParse parseLine "###### Hello"))) + ] + + +tests = TestList + [ + headerTests + ] + +runTests = runTestTT tests