From 1a838e343e052a530b5f6afb34ca166c5317da20 Mon Sep 17 00:00:00 2001 From: Aadhavan Srinivasan Date: Mon, 28 Apr 2025 17:20:01 -0400 Subject: [PATCH] Got recursive parsing working --- app/Main.hs | 66 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/app/Main.hs b/app/Main.hs index 86547cf..1ca827c 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -3,12 +3,17 @@ module Main where import Text.ParserCombinators.ReadP import Control.Monad import Control.Applicative +import Text.Printf +import Debug.Trace type HeaderLevel = Int newtype URL = URL {getUrl :: String} newtype ImgPath = ImgPath {getPath :: String} +parseMany :: ReadP a -> ReadP [a] +parseMany = Text.ParserCombinators.ReadP.many + data MdToken = Header HeaderLevel MdToken | Para [MdToken] | Linebreak @@ -18,11 +23,11 @@ data MdToken = Header HeaderLevel MdToken | OrdList [MdToken] | Code String | Codeblock String - | Link [MdToken] URL - | Image [MdToken] ImgPath - | Bold [MdToken] - | Italic [MdToken] - | Strikethrough [MdToken] + | Link MdToken URL + | Image MdToken ImgPath + | Bold MdToken + | Italic MdToken + | Strikethrough MdToken | Unit String -- Deriving Show for MdToken @@ -34,14 +39,14 @@ instance Show MdToken where show (Blockquote token) = "BLOCK" ++ show token show (UnordList tokens) = "UNORD" ++ concat(map show tokens) show (OrdList tokens) = "ORD" ++ concat(map show tokens) - show (Code code) = concat(map show code) - show (Codeblock code) = concat(map show code) - show (Link txt url) = "" ++ concat(map show txt) ++ "" - show (Image txt path) = "" ++ concat(map show txt) ++ "" - show (Bold tokens) = "" ++ concat(map show tokens) ++ "" - show (Italic tokens) = "" ++ concat(map show tokens) ++ "" - show (Strikethrough tokens) = "" ++ concat (map show tokens) ++ "" - show (Unit unit) = show unit + show (Code code) = show code + show (Codeblock code) = show code + show (Link txt url) = "" ++ show txt ++ "" + show (Image txt path) = "" ++ show txt ++ "" + show (Bold token) = "" ++ show token ++ "" + show (Italic token) = "" ++ show token ++ "" + show (Strikethrough token) = "" ++ show token ++ "" + show (Unit unit) = printf "%s" unit @@ -61,21 +66,38 @@ parseHeader = do when ((length headers) > 6) pfail _ <- string " " - text <- munch1 (const True) -- Parse until EOL - return (Header (length headers) text) + text <- munch1 (\x -> x /= '\n') -- Parse until EOL +-- traceM text + let allParsedText = readP_to_S parsePara text + let parsedText = fst . last $ allParsedText + return (Header (length headers) parsedText) parseBold :: ReadP MdToken parseBold = do _ <- string "__" <|> string "**" text <- munch1 (\x -> x /= '_' && x /= '*') -- Parse until first asterisk/underscore + -- traceM text _ <- char '_' <|> char '*' -- Throw away the second asterisk/underscore - return (Bold text) - - -parseMain :: String -> []MdToken - -parseMain + let parsedText = fst . last $ readP_to_S parsePara text + return (Bold parsedText) + +parseString :: ReadP MdToken +parseString = do + text <- munch1 (\x -> not (elem x "#*_[")) + -- traceM text + return (Unit text) + +parseLine :: ReadP MdToken +parseLine = choice [parseHeader,parseBold,parseString] + +parsePara :: ReadP MdToken +parsePara = do + parsed <- parseMany parseLine +-- traceM $ show parsed + return (Para parsed) main :: IO () -main = putStrLn "Hello, Haskell!" +main = do + let res = readP_to_S parsePara "## Hello **world**" + putStrLn (show res)