Compare commits

...

5 Commits

2
.gitignore vendored

@ -0,0 +1,2 @@
dist-newstyle/
src/dist-newstyle/

@ -1,121 +1,8 @@
module Main where
import Text.ParserCombinators.ReadP
import Control.Monad
import Control.Applicative
import Text.Printf
import Debug.Trace
import Data.List
type HeaderLevel = Int
newtype URL = URL {getUrl :: String}
newtype ImgPath = ImgPath {getPath :: String}
parseMany :: ReadP a -> ReadP [a]
parseMany = Text.ParserCombinators.ReadP.many
data MdToken = Header HeaderLevel MdToken
| Para [MdToken]
| Linebreak
| HorizontalRule
| Blockquote MdToken
| UnordList [MdToken]
| OrdList [MdToken]
| Code String
| Codeblock String
| Link MdToken URL
| Image MdToken ImgPath
| Bold MdToken
| Italic MdToken
| Strikethrough MdToken
| Unit String
-- Deriving Show for MdToken
instance Show MdToken where
show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">"
show (Para tokens) = concat(map show tokens)
show Linebreak = "\n"
show HorizontalRule = "---------"
show (Blockquote token) = "BLOCK" ++ show token
show (UnordList tokens) = "UNORD" ++ concat(map show tokens)
show (OrdList tokens) = "ORD" ++ concat(map show tokens)
show (Code code) = show code
show (Codeblock code) = show code
show (Link txt url) = "<a href=" ++ (getUrl url) ++ ">" ++ show txt ++ "</a>"
show (Image txt path) = "<img src=" ++ (getPath path) ++ ">" ++ show txt ++ "</img>"
show (Bold token) = "<b>" ++ show token ++ "</b>"
show (Italic token) = "<i>" ++ show token ++ "</i>"
show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
show (Unit unit) = printf "%s" unit
-- ---------------
-- Helpers
mustBeHash :: ReadP Char
mustBeHash = satisfy (\x -> x == '#')
leftmostLongest :: (Foldable t) => [(a, t b)] -> (a, t b)
leftmostLongest xs =
let lastElem = (last xs)
filteredLst = (filter (\val -> (length $ snd val) == (length $ snd lastElem)) xs)
in head filteredLst
leftmostLongestParse :: ReadP a -> String -> (a, String)
leftmostLongestParse parser input = leftmostLongest $ readP_to_S parser input
-- ---------------
parseHeader :: ReadP MdToken
parseHeader = do
headers <- many1 mustBeHash
when ((length headers) > 6)
pfail
_ <- string " "
text <- munch1 (\x -> x /= '\n') -- Parse until EOL
-- traceM text
let parsedText = fst $ leftmostLongestParse parseLine text
return (Header (length headers) parsedText)
parseBold :: ReadP MdToken
parseBold = do
text <- choice[
(between (string "__") (string "__") (munch1 (/= '_'))),
(between (string "**") (string "**") (munch1 (/= '*')))
]
-- text <- munch1 (\x -> x /= '_' && x /= '*') -- Parse until first asterisk/underscore
-- traceM text
-- _ <- char '_' <|> char '*' -- Throw away the second asterisk/underscore
let parsedText = fst $ leftmostLongestParse parseLine text
return (Bold parsedText)
parseItalic :: ReadP MdToken
parseItalic = do
text <- choice[
(between (string "_") (string "_") (munch1 (/= '_'))),
(between (string "*") (string "*") (munch1 (/= '*')))
]
let parsedText = fst $ leftmostLongestParse parseLine text
return (Italic parsedText)
parseString :: ReadP MdToken
parseString = do
firstChar <- get -- Must parse at least one character here
text <- munch (\x -> not (elem x "#*_[\n"))
return (Unit (firstChar:text))
--return (Unit text)
parseToken :: ReadP MdToken
parseToken = choice [parseHeader, parseBold, parseItalic, parseString]
parseLine :: ReadP MdToken
parseLine = do
parsed <- parseMany parseToken
-- traceM $ show parsed
return (Para parsed)
import MdToHTML
main :: IO ()
main = do
let res = leftmostLongestParse parseLine "## Hello __world_*"
let res = leftmostLongestParse parseLine "## Hello ___world___"
putStrLn (show res)

@ -1,25 +0,0 @@
{-# LANGUAGE NoRebindableSyntax #-}
{-# OPTIONS_GHC -fno-warn-missing-import-lists #-}
{-# OPTIONS_GHC -w #-}
module PackageInfo_md_to_html (
name,
version,
synopsis,
copyright,
homepage,
) where
import Data.Version (Version(..))
import Prelude
name :: String
name = "md_to_html"
version :: Version
version = Version [0,1,0,0] []
synopsis :: String
synopsis = ""
copyright :: String
copyright = ""
homepage :: String
homepage = ""

@ -1,77 +0,0 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE NoRebindableSyntax #-}
#if __GLASGOW_HASKELL__ >= 810
{-# OPTIONS_GHC -Wno-prepositive-qualified-module #-}
#endif
{-# OPTIONS_GHC -fno-warn-missing-import-lists #-}
{-# OPTIONS_GHC -w #-}
module Paths_md_to_html (
version,
getBinDir, getLibDir, getDynLibDir, getDataDir, getLibexecDir,
getDataFileName, getSysconfDir
) where
import qualified Control.Exception as Exception
import qualified Data.List as List
import Data.Version (Version(..))
import System.Environment (getEnv)
import Prelude
#if defined(VERSION_base)
#if MIN_VERSION_base(4,0,0)
catchIO :: IO a -> (Exception.IOException -> IO a) -> IO a
#else
catchIO :: IO a -> (Exception.Exception -> IO a) -> IO a
#endif
#else
catchIO :: IO a -> (Exception.IOException -> IO a) -> IO a
#endif
catchIO = Exception.catch
version :: Version
version = Version [0,1,0,0] []
getDataFileName :: FilePath -> IO FilePath
getDataFileName name = do
dir <- getDataDir
return (dir `joinFileName` name)
getBinDir, getLibDir, getDynLibDir, getDataDir, getLibexecDir, getSysconfDir :: IO FilePath
bindir, libdir, dynlibdir, datadir, libexecdir, sysconfdir :: FilePath
bindir = "/home/aadhavan/.cabal/bin"
libdir = "/home/aadhavan/.cabal/lib/x86_64-linux-ghc-9.8.2-32bd/md-to-html-0.1.0.0-inplace-md-to-html"
dynlibdir = "/home/aadhavan/.cabal/lib/x86_64-linux-ghc-9.8.2-32bd"
datadir = "/home/aadhavan/.cabal/share/x86_64-linux-ghc-9.8.2-32bd/md-to-html-0.1.0.0"
libexecdir = "/home/aadhavan/.cabal/libexec/x86_64-linux-ghc-9.8.2-32bd/md-to-html-0.1.0.0"
sysconfdir = "/home/aadhavan/.cabal/etc"
getBinDir = catchIO (getEnv "md_to_html_bindir") (\_ -> return bindir)
getLibDir = catchIO (getEnv "md_to_html_libdir") (\_ -> return libdir)
getDynLibDir = catchIO (getEnv "md_to_html_dynlibdir") (\_ -> return dynlibdir)
getDataDir = catchIO (getEnv "md_to_html_datadir") (\_ -> return datadir)
getLibexecDir = catchIO (getEnv "md_to_html_libexecdir") (\_ -> return libexecdir)
getSysconfDir = catchIO (getEnv "md_to_html_sysconfdir") (\_ -> return sysconfdir)
joinFileName :: String -> String -> FilePath
joinFileName "" fname = fname
joinFileName "." fname = fname
joinFileName dir "" = dir
joinFileName dir fname
| isPathSeparator (List.last dir) = dir ++ fname
| otherwise = dir ++ pathSeparator : fname
pathSeparator :: Char
pathSeparator = '/'
isPathSeparator :: Char -> Bool
isPathSeparator c = c == '/'

@ -1,120 +0,0 @@
/* DO NOT EDIT: This file is automatically generated by Cabal */
/* package md-to-html-0.1.0.0 */
#ifndef VERSION_md_to_html
#define VERSION_md_to_html "0.1.0.0"
#endif /* VERSION_md_to_html */
#ifndef MIN_VERSION_md_to_html
#define MIN_VERSION_md_to_html(major1,major2,minor) (\
(major1) < 0 || \
(major1) == 0 && (major2) < 1 || \
(major1) == 0 && (major2) == 1 && (minor) <= 0)
#endif /* MIN_VERSION_md_to_html */
/* package base-4.19.1.0 */
#ifndef VERSION_base
#define VERSION_base "4.19.1.0"
#endif /* VERSION_base */
#ifndef MIN_VERSION_base
#define MIN_VERSION_base(major1,major2,minor) (\
(major1) < 4 || \
(major1) == 4 && (major2) < 19 || \
(major1) == 4 && (major2) == 19 && (minor) <= 1)
#endif /* MIN_VERSION_base */
/* tool gcc-14.2.1 */
#ifndef TOOL_VERSION_gcc
#define TOOL_VERSION_gcc "14.2.1"
#endif /* TOOL_VERSION_gcc */
#ifndef MIN_TOOL_VERSION_gcc
#define MIN_TOOL_VERSION_gcc(major1,major2,minor) (\
(major1) < 14 || \
(major1) == 14 && (major2) < 2 || \
(major1) == 14 && (major2) == 2 && (minor) <= 1)
#endif /* MIN_TOOL_VERSION_gcc */
/* tool ghc-9.8.2 */
#ifndef TOOL_VERSION_ghc
#define TOOL_VERSION_ghc "9.8.2"
#endif /* TOOL_VERSION_ghc */
#ifndef MIN_TOOL_VERSION_ghc
#define MIN_TOOL_VERSION_ghc(major1,major2,minor) (\
(major1) < 9 || \
(major1) == 9 && (major2) < 8 || \
(major1) == 9 && (major2) == 8 && (minor) <= 2)
#endif /* MIN_TOOL_VERSION_ghc */
/* tool ghc-pkg-9.8.2 */
#ifndef TOOL_VERSION_ghc_pkg
#define TOOL_VERSION_ghc_pkg "9.8.2"
#endif /* TOOL_VERSION_ghc_pkg */
#ifndef MIN_TOOL_VERSION_ghc_pkg
#define MIN_TOOL_VERSION_ghc_pkg(major1,major2,minor) (\
(major1) < 9 || \
(major1) == 9 && (major2) < 8 || \
(major1) == 9 && (major2) == 8 && (minor) <= 2)
#endif /* MIN_TOOL_VERSION_ghc_pkg */
/* tool haddock-2.30.0 */
#ifndef TOOL_VERSION_haddock
#define TOOL_VERSION_haddock "2.30.0"
#endif /* TOOL_VERSION_haddock */
#ifndef MIN_TOOL_VERSION_haddock
#define MIN_TOOL_VERSION_haddock(major1,major2,minor) (\
(major1) < 2 || \
(major1) == 2 && (major2) < 30 || \
(major1) == 2 && (major2) == 30 && (minor) <= 0)
#endif /* MIN_TOOL_VERSION_haddock */
/* tool hpc-0.69 */
#ifndef TOOL_VERSION_hpc
#define TOOL_VERSION_hpc "0.69"
#endif /* TOOL_VERSION_hpc */
#ifndef MIN_TOOL_VERSION_hpc
#define MIN_TOOL_VERSION_hpc(major1,major2,minor) (\
(major1) < 0 || \
(major1) == 0 && (major2) < 69 || \
(major1) == 0 && (major2) == 69 && (minor) <= 0)
#endif /* MIN_TOOL_VERSION_hpc */
/* tool hsc2hs-0.68.10 */
#ifndef TOOL_VERSION_hsc2hs
#define TOOL_VERSION_hsc2hs "0.68.10"
#endif /* TOOL_VERSION_hsc2hs */
#ifndef MIN_TOOL_VERSION_hsc2hs
#define MIN_TOOL_VERSION_hsc2hs(major1,major2,minor) (\
(major1) < 0 || \
(major1) == 0 && (major2) < 68 || \
(major1) == 0 && (major2) == 68 && (minor) <= 10)
#endif /* MIN_TOOL_VERSION_hsc2hs */
/* tool pkg-config-2.4.3 */
#ifndef TOOL_VERSION_pkg_config
#define TOOL_VERSION_pkg_config "2.4.3"
#endif /* TOOL_VERSION_pkg_config */
#ifndef MIN_TOOL_VERSION_pkg_config
#define MIN_TOOL_VERSION_pkg_config(major1,major2,minor) (\
(major1) < 2 || \
(major1) == 2 && (major2) < 4 || \
(major1) == 2 && (major2) == 4 && (minor) <= 3)
#endif /* MIN_TOOL_VERSION_pkg_config */
/* tool runghc-9.8.2 */
#ifndef TOOL_VERSION_runghc
#define TOOL_VERSION_runghc "9.8.2"
#endif /* TOOL_VERSION_runghc */
#ifndef MIN_TOOL_VERSION_runghc
#define MIN_TOOL_VERSION_runghc(major1,major2,minor) (\
(major1) < 9 || \
(major1) == 9 && (major2) < 8 || \
(major1) == 9 && (major2) == 8 && (minor) <= 2)
#endif /* MIN_TOOL_VERSION_runghc */
/* tool strip-2.44 */
#ifndef TOOL_VERSION_strip
#define TOOL_VERSION_strip "2.44"
#endif /* TOOL_VERSION_strip */
#ifndef MIN_TOOL_VERSION_strip
#define MIN_TOOL_VERSION_strip(major1,major2,minor) (\
(major1) < 2 || \
(major1) == 2 && (major2) < 44 || \
(major1) == 2 && (major2) == 44 && (minor) <= 0)
#endif /* MIN_TOOL_VERSION_strip */
#ifndef CURRENT_COMPONENT_ID
#define CURRENT_COMPONENT_ID "md-to-html-0.1.0.0-inplace-md-to-html"
#endif /* CURRENT_COMPONENT_ID */
#ifndef CURRENT_PACKAGE_VERSION
#define CURRENT_PACKAGE_VERSION "0.1.0.0"
#endif /* CURRENT_PACKAGE_VERSION */

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -1 +0,0 @@
{"cabal-version":"3.12.1.0","cabal-lib-version":"3.12.1.0","compiler-id":"ghc-9.8.2","os":"linux","arch":"x86_64","install-plan":[{"type":"pre-existing","id":"base-4.19.1.0-9254","pkg-name":"base","pkg-version":"4.19.1.0","depends":["ghc-bignum-1.3-c93f","ghc-prim-0.11.0-d19b","rts-1.0.2"]},{"type":"pre-existing","id":"ghc-bignum-1.3-c93f","pkg-name":"ghc-bignum","pkg-version":"1.3","depends":["ghc-prim-0.11.0-d19b"]},{"type":"pre-existing","id":"ghc-prim-0.11.0-d19b","pkg-name":"ghc-prim","pkg-version":"0.11.0","depends":["rts-1.0.2"]},{"type":"configured","id":"md-to-html-0.1.0.0-inplace-md-to-html","pkg-name":"md-to-html","pkg-version":"0.1.0.0","flags":{},"style":"local","pkg-src":{"type":"local","path":"/home/aadhavan/Programming/Haskell/md-to-html/."},"dist-dir":"/home/aadhavan/Programming/Haskell/md-to-html/dist-newstyle/build/x86_64-linux/ghc-9.8.2/md-to-html-0.1.0.0/x/md-to-html","build-info":"/home/aadhavan/Programming/Haskell/md-to-html/dist-newstyle/build/x86_64-linux/ghc-9.8.2/md-to-html-0.1.0.0/x/md-to-html/build-info.json","depends":["base-4.19.1.0-9254"],"exe-depends":[],"component-name":"exe:md-to-html","bin-file":"/home/aadhavan/Programming/Haskell/md-to-html/dist-newstyle/build/x86_64-linux/ghc-9.8.2/md-to-html-0.1.0.0/x/md-to-html/build/md-to-html/md-to-html"},{"type":"pre-existing","id":"rts-1.0.2","pkg-name":"rts","pkg-version":"1.0.2","depends":[]}]}

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -53,7 +53,13 @@ extra-doc-files: CHANGELOG.md
common warnings
ghc-options: -Wall
executable md-to-html
library
hs-source-dirs: src
exposed-modules: MdToHTML
build-depends: base ^>=4.19.1.0,
HUnit
executable md-to-html-runner
-- Import common warning flags.
import: warnings
@ -67,7 +73,9 @@ executable md-to-html
-- other-extensions:
-- Other library packages from which modules are imported.
build-depends: base ^>=4.19.1.0
build-depends: base ^>=4.19.1.0,
md-to-html,
HUnit
-- Directories containing source files.
hs-source-dirs: app

@ -0,0 +1,149 @@
module MdToHTML where
import Text.ParserCombinators.ReadP
import Control.Monad
import Control.Applicative
import Text.Printf
import Debug.Trace
import Data.List
type HeaderLevel = Int
newtype URL = URL {getUrl :: String}
newtype ImgPath = ImgPath {getPath :: String}
parseMany :: ReadP a -> ReadP [a]
parseMany = Text.ParserCombinators.ReadP.many
data MdToken = Header HeaderLevel MdToken
| Para MdToken
| Line [MdToken]
| Linebreak
| HorizontalRule
| Blockquote MdToken
| UnordList [MdToken]
| OrdList [MdToken]
| Code String
| Codeblock String
| Link MdToken URL
| Image MdToken ImgPath
| Bold MdToken
| Italic MdToken
| Strikethrough MdToken
| Unit String
-- Deriving Show for MdToken
instance Show MdToken where
show (Header level token) = "<h" ++ show level ++ ">" ++ show token ++ "</h" ++ show level ++ ">"
show (Para token) = "<p>" ++ show token ++ "</p>"
show (Line tokens) = concat(map show tokens)
show Linebreak = "<br>"
show HorizontalRule = "---------"
show (Blockquote token) = "BLOCK" ++ show token
show (UnordList tokens) = "UNORD" ++ concat(map show tokens)
show (OrdList tokens) = "ORD" ++ concat(map show tokens)
show (Code code) = show code
show (Codeblock code) = show code
show (Link txt url) = "<a href=" ++ (getUrl url) ++ ">" ++ show txt ++ "</a>"
show (Image txt imgPath) = "<img src=" ++ (getPath imgPath) ++ ">" ++ show txt ++ "</img>"
show (Bold token) = "<b>" ++ show token ++ "</b>"
show (Italic token) = "<i>" ++ show token ++ "</i>"
show (Strikethrough token) = "<s>" ++ show token ++ "</s>"
show (Unit unit) = printf "%s" unit
-- ---------------
-- Helpers
mustBeHash :: ReadP Char
mustBeHash = satisfy (\x -> x == '#')
leftmostLongest :: (Foldable t) => [(a, t b)] -> (a, t b)
leftmostLongest xs =
let lastElem = (last xs)
filteredLst = (filter (\val -> (length $ snd val) == (length $ snd lastElem)) xs)
in head filteredLst
-- Get the first parse returned by readP_to_S that consumed the most input
leftmostLongestParse :: ReadP a -> String -> (a, String)
leftmostLongestParse parser input = leftmostLongest $ readP_to_S parser input
-- Parse if the string that's left matches the string comparator function
lookaheadParse :: (String -> Bool) -> ReadP Char
lookaheadParse stringCmp = do
lookahead <- look
case stringCmp lookahead of
True -> get
False -> pfail
lineToList :: MdToken -> [MdToken]
lineToList (Line tokens) = tokens
-- ---------------
-- Parse a markdown header, denoted by 1-6 #'s followed by some text, followed by EOL.
parseHeader :: ReadP MdToken
parseHeader = do
headers <- many1 mustBeHash
when ((length headers) > 6)
pfail
_ <- string " "
text <- munch1 (\x -> x /= '\n') -- Parse until EOL
-- traceM text
let parsedText = fst $ leftmostLongestParse parseLine text
return (Header (length headers) parsedText)
-- Parse bold text
parseBold :: ReadP MdToken
parseBold = do
text <- choice[
(between (string "__") (string "__") (many1 (lookaheadParse (/= "__")))),
(between (string "**") (string "**") (many1 (lookaheadParse (/= "**"))))
]
let parsedText = fst $ leftmostLongestParse parseLine text
return (Bold parsedText)
-- Parse italic text
parseItalic :: ReadP MdToken
parseItalic = do
text <- choice[
(between (string "_") (string "_") (munch1 (/= '_'))),
(between (string "*") (string "*") (munch1 (/= '*')))
]
let parsedText = fst $ leftmostLongestParse parseLine text
return (Italic parsedText)
-- Parse a linebreak character
parseLinebreak :: ReadP MdToken
parseLinebreak = do
char '\n'
return Linebreak
-- Parse a regular string as a Unit.
parseString :: ReadP MdToken
parseString = do
firstChar <- get -- Must parse at least one character here
text <- munch (\x -> not (elem x "#*_[\n"))
return (Unit (firstChar:text))
-- Parse any of the above tokens.
parseToken :: ReadP MdToken
parseToken = choice [parseHeader, parseLinebreak, parseBold, parseItalic, parseString]
-- Parse a line, consisting of one or more tokens.
parseLine :: ReadP MdToken
parseLine = do
remaining <- look
when (null remaining) pfail
parsed <- parseMany parseToken
-- traceM $ show parsed
return (Line parsed)
-- Parse a paragraph, which is a 'Line' (can span multiple actual lines), separated by double-newlines.
parsePara :: ReadP MdToken
parsePara = do
parseMany (char '\n')
text <- many1 (lookaheadParse (\x -> ((length x) < 2) || (take 2 x) /= "\n\n"))
string "\n\n"
-- I don't consume the ending double-newline, because the next paragraph will consume it as part of its starting double-newline.
let parsedText = fst $ leftmostLongestParse parseLine text
return (Para parsedText)

@ -0,0 +1,31 @@
module MdToHtmlTest where
import MdToHTML
import Test.HUnit
check_equal expected actual
headerTests = TestList
[
(TestCase (assertEqual "Should convert H1 heading" "<h1>Hello</h1>" (show . fst $ leftmostLongestParse parseLine "# Hello"))),
(TestCase (assertEqual "Should convert H2 heading" "<h2>Hello</h2>" (show . fst $ leftmostLongestParse parseLine "## Hello"))),
(TestCase (assertEqual "Should convert H3 heading" "<h3>Hello</h3>" (show . fst $ leftmostLongestParse parseLine "### Hello"))),
(TestCase (assertEqual "Should convert H4 heading" "<h4>Hello</h4>" (show . fst $ leftmostLongestParse parseLine "#### Hello"))),
(TestCase (assertEqual "Should convert H5 heading" "<h5>Hello</h5>" (show . fst $ leftmostLongestParse parseLine "##### Hello"))),
(TestCase (assertEqual "Should convert H6 heading" "<h6>Hello</h6>" (show . fst $ leftmostLongestParse parseLine "###### Hello")))
]
boldTests = TestList
[
(TestCase (assertEqual "Should convert bold" "<b>Hello</b>" (show . fst $ leftmostLongestParse parseLine "__Hello__"))),
(TestCase (assertEqual "Should convert italic" "<i>Hello</i>" (show . fst $ leftmostLongestParse parseLine "_Hello_"))),
(TestCase (assertEqual "Should convert bold and italic in a sentence" "It <i>is</i> a <b>wonderful</b> day" (show . fst $ leftmostLongestParse parseLine "It _is_ a __wonderful__ day")))
]
tests = TestList
[
headerTests,
boldTests
]
runTests = runTestTT tests
Loading…
Cancel
Save