Skip newlines and YAML metadata at the beginning of a markdown file

Allow files to be placed inside 'drafts' inside 'source'; will be converted to HTML, but not shown on sitemap
Concatenate all lua filters into 1 file, then provide that file to 'pandoc'
2025-04-19 11:28:56 -04:00 · 2025-02-16 15:36:04 -05:00 · 2025-02-15 14:21:47 -05:00 · 2025-01-07 12:07:34 -06:00 · 2024-10-11 10:52:27 -04:00 · 2024-10-11 10:33:18 -04:00
1 changed files with 72 additions and 12 deletions
--- a/build.sh
+++ b/build.sh
@@ -13,7 +13,15 @@
 #
 #  To use it, create a directory for your project (e.g. 'website'). Inside 'website', create
 #  two directories: 'source' (which holds your Markdown files) and 'output' (which holds the
-#  converted CSS.
+#  converted HTML. 
 #
 #  To exclude files from the conversion process, place them inside a directory
 #  named 'exclude' inside 'source'. This directory will not be copied over into 'output', and 
 #  any files inside it will not be converted.
 #
 #  If you want a file to be converted, but not displayed in the sitemap, then place it inside a directory
 #  called 'drafts' inside 'source'. The converted file will still be accessible via its path, but it will not
 #  be shown on the sitemap.
 #
 #  In addition to these directories, three files are needed in 'website':
 #  1. 'header.html' - A header, which is prepended to every source file. Unfortunately, this must
@@ -21,6 +29,8 @@
 #  2. 'footer.html' - A footer, which is appended to every source file. Also must be written in HTML.
 #  3. 'styles.css' - A global stylesheet.
 #
 # The script takes in a parameter, which is the directory that contains the 'source' and 'output' folders.
 #
 # If you have any comments or questions, please email aadhavan@twomorecents.org.
@@ -31,8 +41,11 @@ set -o pipefail # Treat a pipeline as failing, even if one command in the pipeli
 if [[ "${TRACE-0}" == "1" ]]; then set -o xtrace; fi # Enable tracing (output of each command) if the TRACE variable is set
-
+if [ "$#" -ne 1 ]; then
-BASE_PATH="$(dirname "$0")"
+	echo "ERROR: Invalid number of paramters. Read script for more details."
 	exit
 fi
 BASE_PATH=$(cd "$1"; pwd)
 check_for_dirs() {
 	if [[ ! -d "${BASE_PATH}/source" ]]; then
@@ -69,11 +82,16 @@ setup_output_dir() {
 del_files_in_output() {
 	find "$BASE_PATH/output" -type f -name "*.md" -delete #Delete all .md files (which were copied over from 'source') in 'output'
 	# Delete the 'exclude' directory from the output folder.
 	# This folder contains markdown files which shouldn't be converted to HTML.
 	if [[ -d "${BASE_PATH}/output/exclude" ]]; then
 		rm -r "${BASE_PATH}/output/exclude"
 	fi
 }
 read_metadata() {
 #	Read the metadata from the top of a .md file into a string
-	metadata=$(awk 'BEGIN{RS = "\n\n"} {print $0}; {exit}' "$1")  # Reads from the .md file until a double-newline is encountered
+	metadata=$(awk 'BEGIN{RS = "\n\n"} ($0 != "") && ($0 !~ /^---/) {print $0; exit}' "$1")  # Reads from the .md file until a double-newline is encountered, as long as 1) the text read is non-empty and 2) it doesn't start with a triple hyphen (the triple hyphen denotes a pandoc metadata block)
 }
 convert_to_array() {
@@ -103,7 +121,8 @@ add_header_and_footer() {
 	cp "$BASE_PATH/header.html" "$BASE_PATH/temp/$parent_dir/temp_header.html"
 #	Check for relevant metadata, and perform corresponding action
-	if [[ ! -v "meta_array["date"]" ]]; then # If there is no date
+	# This syntax is intended (although it doesn't follow typical Bash array syntax). See https://stackoverflow.com/a/45385463 for more info.
 	if [[ ! -v "meta_array[date]" ]]; then # If there is no date
 		sed -i '$ d' "$BASE_PATH/temp/$parent_dir/temp_header.html" # remove the 'date published' section of the header
 	fi
@@ -138,7 +157,7 @@ add_header_and_footer_to_index() {
 replace_vars() {
 #	Loop through the keys of the 'meta_array' array, search for all occurences of the key in the HTML doc, and replace them with the corresponding value..
 	for arr_key in "${!meta_array[@]}"; do
-		meta_array["$arr_key"]="${meta_array["$arr_key"]/\//\\/}" # Escape all forward slashes in the value
+		meta_array["$arr_key"]="${meta_array["$arr_key"]//\//\\/}" # Escape all forward slashes in the value
 		sed -i "s/[\$][\$]$arr_key[\$][\$]/${meta_array[$arr_key]}/g" "$1"
 	done
@@ -156,7 +175,7 @@ convert_file() {
 #	one that doesn't exist in the 'temp' directory. This string
 #	will serve as the parent directory of our file.
 	parent_dir="$(tr -dc A-Za-z </dev/urandom | head -c 8; echo)"
-	while ls -r temp | grep -q "$parent_dir" ; do
+	while ls -r "$BASE_PATH"/temp | grep -q "$parent_dir" ; do
 		parent_dir="$(tr -dc A-Za-z </dev/urandom | head -c 8; echo)"
 	done
@@ -171,8 +190,10 @@ convert_file() {
 	path_for_output="${BASE_PATH}/output/${path_for_output}"
 	path_for_output="$(dirname $path_for_output)/$(basename $path_for_output .md).html"
 #	Get the combined contents of all lua filters
 	all_lua_filters="$(cat $BASE_PATH/pandoc_filters/*.lua)"
 #	Convert the file (using the given filters), and place the output in the correct location.
-	pandoc --lua-filter "$BASE_PATH"/pandoc_filters/* -f markdown --wrap=preserve "$BASE_PATH/temp/$parent_dir/$(basename "$file_to_conv")" > "${path_for_output}"
+	pandoc --lua-filter <(echo "$all_lua_filters") -f markdown --wrap=preserve "$BASE_PATH/temp/$parent_dir/$(basename "$file_to_conv")" > "${path_for_output}"
 	convert_to_array "$metadata" #Sets the 'meta_array' array
@@ -186,7 +207,8 @@ convert_file() {
 md_to_html() {
 #	Convert .md files from 'source' and place them into the correct locations into 'output'
-	local files=$(find "$BASE_PATH/source" -name "*.md")
+#	Exclude all files and folders inside the 'exclude' directory
 	local files=$(find "${BASE_PATH}/source" -not -path "${BASE_PATH}/source/exclude/*"  -name "*.md")
 #	Concurrently convert each document
 	for file in $files; do
@@ -199,8 +221,8 @@ md_to_html() {
 	rm -rf "$BASE_PATH"/temp/*
 }
-gen_sorted_file_list() { # Generate a list of the HTMl files, sorted by when they were last modified (read from the contents of the HTML file)
+gen_sorted_file_list() { # Generate a list of the HTMl files, sorted by when they were last modified (read from the contents of the HTML file). Exclude all files in the 'drafts' directory.
-	local files=$(find "$BASE_PATH/output" -name "*.html")
+	local files=$(find "$BASE_PATH/output" -name "*.html" -not -path "${BASE_PATH}/output/drafts/*")
 	local date_mod
 	for file in $files; do
@@ -228,11 +250,48 @@ gen_sorted_file_list() { # Generate a list of the HTMl files, sorted by when the
 	sorted_file_list=$(echo "$sorted_file_list" | awk '{print $1}') # Store only the first column (the file path) in the variable
 }
 gen_rss_feed() { # Uses the sorted_file_list variable to generate an RSS feed
 	echo "Generating RSS Feed..."
 	local RSS_FEED_PATH="${BASE_PATH}/output/rss.xml"
 	touch "$RSS_FEED_PATH" # Create the RSS file
 	local RSS_CONTENT="<rss version=\"2.0\">\n"
 	counter=0
 	RSS_CONTENT+="<channel>\n"
 	RSS_CONTENT+="<title>Two More Cents</title>\n"
 	RSS_CONTENT+="<link>http://twomorecents.org/</link>\n"
 	RSS_CONTENT+="<description>The personal website of Aadhavan Srinivasan.</description>\n"
 	RSS_CONTENT+="<language>en-us</language>\n"
 	RSS_CONTENT+="<lastBuildDate>$(date -R)</lastBuildDate>\n"
 	RSS_CONTENT+="<generator>s4g - Stupid Simple Static Site Generator</generator>\n"
 	for file in $1; do
 		if [ $counter -gt 9 ]; then
 			break
 		fi
 		RSS_CONTENT+="<item>\n"
 		RSS_CONTENT+="<title>\n"
 		RSS_CONTENT+=$(cat "$file" | grep "<title>" | head -n 1 | awk -F'[<>]' '{print $3}')$'\n'
 		RSS_CONTENT+="</title>\n"
 		RSS_CONTENT+="<link>\n"
 		RSS_CONTENT+="https://twomorecents.org/"
 		RSS_CONTENT+=$(realpath --relative-to="${BASE_PATH}/output" "$file")
 		RSS_CONTENT+="</link>\n"
 		RSS_CONTENT+="</item>\n"
 		((++counter))
 	done
 	RSS_CONTENT+="</channel>\n</rss>"
 	echo -e "$RSS_CONTENT" > $RSS_FEED_PATH
 }		
 gen_index_page() { # Generate an index page (site map) that includes links to the other pages
 	echo "Generating index page..."
-	local index_file_html="<nav class=\"toc\">"$'\n' # Variable to store the body HTML of the index page; enclose the list in a nav
+	local index_file_html="<nav class=\"toc\">"$'\n' # Variable to store the body HTML of the index page, enclose the list in a nav
 	index_file_html+="<p>(All dates are in MM/DD/YYYY format)</p>"$'\n'
 	for file in $1; do
 		local title=$(cat "$file" | grep "<title>" | head -n 1 | awk -F'[<>]' '{print $3}') # Find the title of the web page
@@ -279,6 +338,7 @@ setup_output_dir
 del_files_in_output
 md_to_html
 gen_sorted_file_list # Sets the 'sorted_file_list' variable
 gen_rss_feed "$sorted_file_list" # Uses the 'sorted_file_list' variable
 gen_index_page "$sorted_file_list" # Uses the 'sorted_file_list' variable
 copy_things_in
 clean_up
Author	SHA1	Message	Date
Aadhavan Srinivasan	9d5f038dad	Skip newlines and YAML metadata at the beginning of a markdown file	2025-04-19 11:28:56 -04:00
Aadhavan Srinivasan	afccdc5463	Allow files to be placed inside 'drafts' inside 'source'; will be converted to HTML, but not shown on sitemap	2025-02-16 15:36:04 -05:00
Aadhavan Srinivasan	5e48d58561	Concatenate all lua filters into 1 file, then provide that file to 'pandoc'	2025-02-15 14:21:47 -05:00
Rockingcool	628a03b2da	New feature: Exclude all files and directories inside the 'exclude' directory in 'source'	2025-01-07 12:07:34 -06:00
Aadhavan Srinivasan	71a242e1d6	Added double-slash to escape _all_ forward slashes	2024-10-11 10:52:27 -04:00
Aadhavan Srinivasan	423eba2213	Added comment to explain weird use of associative array syntax	2024-10-11 10:33:18 -04:00
Rockingcool	f1f5ab51b2	Fixed error where I would create a file called RSS_FEED_PATH	2024-07-25 10:55:03 -05:00
Rockingcool	dc61c92a4e	Added function to generate RSS file	2024-07-25 10:43:12 -05:00
Aadhavan Srinivasan	e0c79984b2	Added info to top of site-map page, and fixed bug with 'temp' directory name	2024-06-05 10:34:34 -05:00
Rockingcool	54dda40de4	Made the script take in a paramter, which is the directory containing 'source' and 'output'	2024-05-06 10:27:02 -05:00