#!/usr/bin/env bash
# TODO: Add a comment block at the top of the script, describing what it does.
# TODO: Add a '-h' or '--help' flag, that displays information about the script, and how to use it.
# s4g - Stupid Simple Static-Site Generator
# ---------------------------------------------
#
# This is a static-site generator, that can be used to convert Markdown files into HTML.
# It is extremely simple and extremely opinionated, as you can see if you read the code below.
# A lot of paths and directories are hard-coded, to suit my workflow.
#
# To use it, create a directory for your project (e.g. 'website'). Inside 'website', create
# two directories: 'source' (which holds your Markdown files) and 'output' (which holds the
# converted HTML. To exclude files from the conversion process, place them inside a directory
# named 'exclude' inside 'source'. This directory will not be copied over into 'output', and
# any files inside it will not be converted.
#
# In addition to these directories, three files are needed in 'website':
# 1. 'header.html' - A header, which is prepended to every source file. Unfortunately, this must
# be written in regular HTML.
# 2. 'footer.html' - A footer, which is appended to every source file. Also must be written in HTML.
# 3. 'styles.css' - A global stylesheet.
#
# The script takes in a parameter, which is the directory that contains the 'source' and 'output' folders.
#
# If you have any comments or questions, please email aadhavan@twomorecents.org.
set -o errexit # Stop executing when a command fails
set -o nounset # Stop executing when accessing an unset variable
set -o pipefail # Treat a pipeline as failing, even if one command in the pipeline fails
if [ [ " ${ TRACE -0 } " = = "1" ] ] ; then set -o xtrace; fi # Enable tracing (output of each command) if the TRACE variable is set
if [ " $# " -ne 1 ] ; then
echo "ERROR: Invalid number of paramters. Read script for more details."
exit
fi
BASE_PATH = $( cd " $1 " ; pwd )
check_for_dirs( ) {
if [ [ ! -d " ${ BASE_PATH } /source " ] ] ; then
echo "ERROR: 'source' folder does not exist. Your content is sourced from this folder." >& 2
exit
fi
if [ [ -d " ${ BASE_PATH } /temp " ] ] ; then
echo "ERROR: You have an existing 'temp' folder. Please delete this folder, and run the script again." >& 2
exit
fi
if [ [ ! -f " ${ BASE_PATH } /header.html " ] ] ; then
echo "ERROR: You do not have a header.html file. This file is used as a global header. Please create this file, and run the script again." >& 2
exit
fi
if [ [ ! -f " ${ BASE_PATH } /footer.html " ] ] ; then
echo "ERROR: You do not have a footer.html file. This file is used as a global footer. Please create this file, and run the script again." >& 2
exit
fi
}
setup_temp_dir( ) {
# Check if 'temp' already exists
mkdir " ${ BASE_PATH } /temp "
}
setup_output_dir( ) {
rm -rf " ${ BASE_PATH } /output " # Delete existing 'output' directory
cp -r " ${ BASE_PATH } /source " " ${ BASE_PATH } /output " #Copy directory structure from 'source' to 'output'
}
del_files_in_output( ) {
find " $BASE_PATH /output " -type f -name "*.md" -delete #Delete all .md files (which were copied over from 'source') in 'output'
# Delete the 'exclude' directory from the output folder.
# This folder contains markdown files which shouldn't be converted to HTML.
if [ [ -d " ${ BASE_PATH } /output/exclude " ] ] ; then
rm -r " ${ BASE_PATH } /output/exclude "
fi
}
read_metadata( ) {
# Read the metadata from the top of a .md file into a string
metadata = $( awk 'BEGIN{RS = "\n\n"} {print $0}; {exit}' " $1 " ) # Reads from the .md file until a double-newline is encountered
}
convert_to_array( ) {
local meta_key
local meta_value
# Converts the metadata into two arrays: one with the key, and the other with the value.
readarray -t meta_key < <( echo -e " $1 " | awk -F: '{print $1}' )
readarray -t meta_value < <( echo -e " $1 " | awk -F: '{st = index($0,":"); values = substr($0,st+1); print values}' | cut -c 2-)
# Merge both arrays into an associative array
declare -Ag meta_array
for index in $( seq 0 ` expr " ${# meta_key [@] } " - 1` ) ; do
meta_array[ " ${ meta_key [ $index ] } " ] = " ${ meta_value [ $index ] } "
done
}
add_date_to_array( ) {
if [ [ " ${ meta_array [date]- } " = = "auto" ] ] ; then # If the date is set to 'auto'
meta_array[ "date" ] = " $( date -r $1 +'%b %d, %Y' ) "
fi
}
add_header_and_footer( ) {
# Copy header to temporary location - 'parent_dir' is used to ensure that
# each temporary header is in its own directory
cp " $BASE_PATH /header.html " " $BASE_PATH /temp/ $parent_dir /temp_header.html "
# Check for relevant metadata, and perform corresponding action
# This syntax is intended (although it doesn't follow typical Bash array syntax). See https://stackoverflow.com/a/45385463 for more info.
if [ [ ! -v "meta_array[date]" ] ] ; then # If there is no date
sed -i '$ d' " $BASE_PATH /temp/ $parent_dir /temp_header.html " # remove the 'date published' section of the header
fi
if [ [ " ${ meta_array [noappend]- } " = = "true" ] ] ; then
sed -i 's/ - Two More Cents//g' " $BASE_PATH /temp/ $parent_dir /temp_header.html " # 'noappend' removes the suffix from the title
fi
# Add header
cat " $BASE_PATH /temp/ $parent_dir /temp_header.html " | cat - " $1 " > " $BASE_PATH /temp/ $parent_dir /temp.html "
# Add footer
echo >> " $BASE_PATH /temp/ $parent_dir /temp.html " # Add newline
cat " $BASE_PATH /footer.html " >> " $BASE_PATH /temp/ $parent_dir /temp.html "
# Move temp file to original location
mv " $BASE_PATH /temp/ $parent_dir /temp.html " " $1 "
}
add_header_and_footer_to_index( ) {
mkdir " $BASE_PATH /temp/index_page "
# Add header
cat " $BASE_PATH /header.html " | head -n -1 | cat - " $1 " > " $BASE_PATH /temp/index_page/temp.html " # For the index page, remove the last line of the header (date published)
# Add footer
echo >> " $BASE_PATH /temp/index_page/temp.html " # Add newline
cat " $BASE_PATH /footer.html " >> " $BASE_PATH /temp/index_page/temp.html "
# Move temp file to original location
mv " $BASE_PATH /temp/index_page/temp.html " " $1 "
}
replace_vars( ) {
# Loop through the keys of the 'meta_array' array, search for all occurences of the key in the HTML doc, and replace them with the corresponding value..
for arr_key in " ${ !meta_array[@] } " ; do
meta_array[ " $arr_key " ] = " ${ meta_array [ " $arr_key " ]// \/ / \\ / } " # Escape all forward slashes in the value
sed -i " s/[\$][\$] $arr_key [\$][\$]/ ${ meta_array [ $arr_key ] } /g " " $1 "
done
}
convert_file( ) {
# Helper function for md_to_html(). It takes in the file to convert as an argument,
# and converts that file.
file_to_conv = " $1 "
echo " Converting $file_to_conv "
read_metadata " $file_to_conv " # Sets the 'metadata' variable
# Generate a random 8-character alphabetic string, until we find
# one that doesn't exist in the 'temp' directory. This string
# will serve as the parent directory of our file.
parent_dir = " $( tr -dc A-Za-z </dev/urandom | head -c 8; echo ) "
while ls -r " $BASE_PATH " /temp | grep -q " $parent_dir " ; do
parent_dir = " $( tr -dc A-Za-z </dev/urandom | head -c 8; echo ) "
done
# Copy file to temp dir and strip metadata
mkdir -p " $BASE_PATH /temp/ $parent_dir / "
cp " $file_to_conv " " $BASE_PATH /temp/ $parent_dir / "
let num_lines = $( echo " $metadata " | wc -l) +1
sed -i " 1, ${ num_lines } d " " $BASE_PATH /temp/ $parent_dir / $( basename " $file_to_conv " ) "
# Construct path for output file
local path_for_output = $( realpath --relative-to= " ${ BASE_PATH } /source " " $file_to_conv " )
path_for_output = " ${ BASE_PATH } /output/ ${ path_for_output } "
path_for_output = " $( dirname $path_for_output ) / $( basename $path_for_output .md) .html "
# Convert the file (using the given filters), and place the output in the correct location.
pandoc --lua-filter " $BASE_PATH " /pandoc_filters/* -f markdown --wrap= preserve " $BASE_PATH /temp/ $parent_dir / $( basename " $file_to_conv " ) " > " ${ path_for_output } "
convert_to_array " $metadata " #Sets the 'meta_array' array
add_date_to_array " $file_to_conv " #Uses 'meta_array' array
add_header_and_footer " $path_for_output " # Uses 'meta_array' array and 'parent_dir' variable
replace_vars " $path_for_output " #Uses 'meta_array' array
unset metadata meta_key meta_value meta_array path_for_output
}
md_to_html( ) {
# Convert .md files from 'source' and place them into the correct locations into 'output'
# Exclude all files and folders inside the 'exclude' directory
local files = $( find " ${ BASE_PATH } /source " -not -path " ${ BASE_PATH } /source/exclude/* " -name "*.md" )
# Concurrently convert each document
for file in $files ; do
( convert_file " $file " ) &
unset metadata path_for_output
done
# Wait for all documents to finish converting, then remove all temporary files.
wait
rm -rf " $BASE_PATH " /temp/*
}
gen_sorted_file_list( ) { # Generate a list of the HTMl files, sorted by when they were last modified (read from the contents of the HTML file)
local files = $( find " $BASE_PATH /output " -name "*.html" )
local date_mod
for file in $files ; do
if grep -q "date-published" " $file " ; then
echo " $file " >> " $BASE_PATH /temp/file_listing.txt " # Write files that have a date published to a temp file (we only want the files with date modified, because only these files can be listed with their date on the site map)
date_mod += $( cat " $file " | grep "date-published" | awk -F'[<>]' '{print $3}' \
| cut -d' ' -f '1,2' --complement | tr -d "," | awk '{print $2" "$1" "$3}' \
| date -f - +"%s" )
# Explanation:
# Line 1 extracts the published date from the HTML file
# Line 2 re-arranges this information, and converts it into DD MM YY format
# Line 3 converts this into a UNIX timestamp
date_mod += $'\n'
fi
done
date_mod = $( echo " ${ date_mod - } " | head -n -1) # Remove last (empty) line from variable
echo " ${ date_mod - } " > " $BASE_PATH /temp/date_mod.txt " # Write the corresponding 'date modified' timestamps to a temp file
paste " $BASE_PATH /temp/file_listing.txt " " $BASE_PATH /temp/date_mod.txt " > " $BASE_PATH /temp/new_file_list.txt " # Combine file list and date modified into a single file
sorted_file_list = $( sort -r -k 2 " $BASE_PATH /temp/new_file_list.txt " ) # Sort the data in the file based on the timestamp (from newest to oldest), and store it into a variable
sorted_file_list = $( echo " $sorted_file_list " | awk '{print $1}' ) # Store only the first column (the file path) in the variable
}
gen_rss_feed( ) { # Uses the sorted_file_list variable to generate an RSS feed
echo "Generating RSS Feed..."
local RSS_FEED_PATH = " ${ BASE_PATH } /output/rss.xml "
touch " $RSS_FEED_PATH " # Create the RSS file
local RSS_CONTENT = "<rss version=\"2.0\">\n"
counter = 0
RSS_CONTENT += "<channel>\n"
RSS_CONTENT += "<title>Two More Cents</title>\n"
RSS_CONTENT += "<link>http://twomorecents.org/</link>\n"
RSS_CONTENT += "<description>The personal website of Aadhavan Srinivasan.</description>\n"
RSS_CONTENT += "<language>en-us</language>\n"
RSS_CONTENT += " <lastBuildDate> $( date -R) </lastBuildDate>\n "
RSS_CONTENT += "<generator>s4g - Stupid Simple Static Site Generator</generator>\n"
for file in $1 ; do
if [ $counter -gt 9 ] ; then
break
fi
RSS_CONTENT += "<item>\n"
RSS_CONTENT += "<title>\n"
RSS_CONTENT += $( cat " $file " | grep "<title>" | head -n 1 | awk -F'[<>]' '{print $3}' ) $'\n'
RSS_CONTENT += "</title>\n"
RSS_CONTENT += "<link>\n"
RSS_CONTENT += "https://twomorecents.org/"
RSS_CONTENT += $( realpath --relative-to= " ${ BASE_PATH } /output " " $file " )
RSS_CONTENT += "</link>\n"
RSS_CONTENT += "</item>\n"
( ( ++counter) )
done
RSS_CONTENT += "</channel>\n</rss>"
echo -e " $RSS_CONTENT " > $RSS_FEED_PATH
}
gen_index_page( ) { # Generate an index page (site map) that includes links to the other pages
echo "Generating index page..."
local index_file_html = "<nav class=\"toc\">" $'\n' # Variable to store the body HTML of the index page, enclose the list in a nav
index_file_html += "<p>(All dates are in MM/DD/YYYY format)</p>" $'\n'
for file in $1 ; do
local title = $( cat " $file " | grep "<title>" | head -n 1 | awk -F'[<>]' '{print $3}' ) # Find the title of the web page
local suffix = " - Two More Cents"
title = " ${ title % " $suffix " } " # Remove the website name from it
local pub_date = $( cat " $file " | grep "date-published" | head -n 1 | awk -F'[<>]' '{print $3}' ) # Find the date published
prefix = "Published on " # Find date published of webpage
pub_date = " ${ pub_date # " $prefix " } " # Remove the prefix from it
pub_date = $( echo " $pub_date " | tr -d "," | awk '{print $2" "$1" "$3}' | date -f - +"%m/%d/%Y" ) # Re-arrange the date and convert to mm/dd/yy
local file_path = $( realpath --relative-to= " ${ BASE_PATH } /output " " $file " )
index_file_html += " <li><time> ${ pub_date } </time> - <a href=\" $file_path \"> $title </a></li> " # Add a line of HTML containing the date and title of the article
index_file_html += $'\n'
done
index_file_html = $( echo " $index_file_html " | head -n -1) # Remove last (empty) line from variable
index_file_html += "</nav>"
path_for_output = " ${ BASE_PATH } /output/site-map.html "
echo " $index_file_html " > " $path_for_output " # Output variable to file
add_header_and_footer_to_index " $path_for_output " # Add header and footer to index file
sed -i 's/[\$][\$]title[\$][\$]/Site Map/g' " $path_for_output " # Replace title variable with 'site map' title
}
copy_things_in( ) {
cp " ${ BASE_PATH } /styles.css " " ${ BASE_PATH } /output/ "
cp -r " ${ BASE_PATH } /files " " ${ BASE_PATH } /output/ "
cp -r " ${ BASE_PATH } /fonts " " ${ BASE_PATH } /output/ "
}
clean_up( ) {
rm -r " ${ BASE_PATH } /temp "
}
check_for_dirs
setup_temp_dir
setup_output_dir
del_files_in_output
md_to_html
gen_sorted_file_list # Sets the 'sorted_file_list' variable
gen_rss_feed " $sorted_file_list " # Uses the 'sorted_file_list' variable
gen_index_page " $sorted_file_list " # Uses the 'sorted_file_list' variable
copy_things_in
clean_up