module Main where import Text.ParserCombinators.ReadP import Control.Monad import Control.Applicative import Text.Printf import Debug.Trace import Data.List type HeaderLevel = Int newtype URL = URL {getUrl :: String} newtype ImgPath = ImgPath {getPath :: String} parseMany :: ReadP a -> ReadP [a] parseMany = Text.ParserCombinators.ReadP.many data MdToken = Header HeaderLevel MdToken | Para [MdToken] | Linebreak | HorizontalRule | Blockquote MdToken | UnordList [MdToken] | OrdList [MdToken] | Code String | Codeblock String | Link MdToken URL | Image MdToken ImgPath | Bold MdToken | Italic MdToken | Strikethrough MdToken | Unit String -- Deriving Show for MdToken instance Show MdToken where show (Header level token) = "" ++ show token ++ "" show (Para tokens) = concat(map show tokens) show Linebreak = "\n" show HorizontalRule = "---------" show (Blockquote token) = "BLOCK" ++ show token show (UnordList tokens) = "UNORD" ++ concat(map show tokens) show (OrdList tokens) = "ORD" ++ concat(map show tokens) show (Code code) = show code show (Codeblock code) = show code show (Link txt url) = "" ++ show txt ++ "" show (Image txt path) = "" ++ show txt ++ "" show (Bold token) = "" ++ show token ++ "" show (Italic token) = "" ++ show token ++ "" show (Strikethrough token) = "" ++ show token ++ "" show (Unit unit) = printf "%s" unit -- --------------- -- Helpers mustBeHash :: ReadP Char mustBeHash = satisfy (\x -> x == '#') leftmostLongest :: (Foldable t) => [(a, t b)] -> (a, t b) leftmostLongest xs = let lastElem = (last xs) filteredLst = (filter (\val -> (length $ snd val) == (length $ snd lastElem)) xs) in head filteredLst leftmostLongestParse :: ReadP a -> String -> (a, String) leftmostLongestParse parser input = leftmostLongest $ readP_to_S parser input -- --------------- parseHeader :: ReadP MdToken parseHeader = do headers <- many1 mustBeHash when ((length headers) > 6) pfail _ <- string " " text <- munch1 (\x -> x /= '\n') -- Parse until EOL -- traceM text let parsedText = fst $ leftmostLongestParse parseLine text return (Header (length headers) parsedText) parseBold :: ReadP MdToken parseBold = do text <- choice[ (between (string "__") (string "__") (munch1 (/= '_'))), (between (string "**") (string "**") (munch1 (/= '*'))) ] -- text <- munch1 (\x -> x /= '_' && x /= '*') -- Parse until first asterisk/underscore -- traceM text -- _ <- char '_' <|> char '*' -- Throw away the second asterisk/underscore let parsedText = fst $ leftmostLongestParse parseLine text return (Bold parsedText) parseItalic :: ReadP MdToken parseItalic = do text <- choice[ (between (string "_") (string "_") (munch1 (/= '_'))), (between (string "*") (string "*") (munch1 (/= '*'))) ] let parsedText = fst $ leftmostLongestParse parseLine text return (Italic parsedText) parseString :: ReadP MdToken parseString = do firstChar <- get -- Must parse at least one character here text <- munch (\x -> not (elem x "#*_[\n")) return (Unit (firstChar:text)) --return (Unit text) parseToken :: ReadP MdToken parseToken = choice [parseHeader, parseBold, parseItalic, parseString] parseLine :: ReadP MdToken parseLine = do parsed <- parseMany parseToken -- traceM $ show parsed return (Para parsed) main :: IO () main = do let res = leftmostLongestParse parseLine "## Hello __world_*" putStrLn (show res)