#!/usr/bin/env bash # TODO: Add a comment block at the top of the script, describing what it does. # TODO: Add a '-h' or '--help' flag, that displays information about the script, and how to use it. # s4g - Stupid Simple Static-Site Generator # --------------------------------------------- # # This is a static-site generator, that can be used to convert Markdown files into HTML. # It is extremely simple and extremely opinionated, as you can see if you read the code below. # A lot of paths and directories are hard-coded, to suit my workflow. # # To use it, create a directory for your project (e.g. 'website'). Inside 'website', create # two directories: 'source' (which holds your Markdown files) and 'output' (which holds the # converted HTML. To exclude files from the conversion process, place them inside a directory # named 'exclude' inside 'source'. This directory will not be copied over into 'output', and # any files inside it will not be converted. # # In addition to these directories, three files are needed in 'website': # 1. 'header.html' - A header, which is prepended to every source file. Unfortunately, this must # be written in regular HTML. # 2. 'footer.html' - A footer, which is appended to every source file. Also must be written in HTML. # 3. 'styles.css' - A global stylesheet. # # The script takes in a parameter, which is the directory that contains the 'source' and 'output' folders. # # If you have any comments or questions, please email aadhavan@twomorecents.org. set -o errexit # Stop executing when a command fails set -o nounset # Stop executing when accessing an unset variable set -o pipefail # Treat a pipeline as failing, even if one command in the pipeline fails if [[ "${TRACE-0}" == "1" ]]; then set -o xtrace; fi # Enable tracing (output of each command) if the TRACE variable is set if [ "$#" -ne 1 ]; then echo "ERROR: Invalid number of paramters. Read script for more details." exit fi BASE_PATH=$(cd "$1"; pwd) check_for_dirs() { if [[ ! -d "${BASE_PATH}/source" ]]; then echo "ERROR: 'source' folder does not exist. Your content is sourced from this folder." >&2 exit fi if [[ -d "${BASE_PATH}/temp" ]]; then echo "ERROR: You have an existing 'temp' folder. Please delete this folder, and run the script again." >&2 exit fi if [[ ! -f "${BASE_PATH}/header.html" ]]; then echo "ERROR: You do not have a header.html file. This file is used as a global header. Please create this file, and run the script again." >&2 exit fi if [[ ! -f "${BASE_PATH}/footer.html" ]]; then echo "ERROR: You do not have a footer.html file. This file is used as a global footer. Please create this file, and run the script again." >&2 exit fi } setup_temp_dir() { # Check if 'temp' already exists mkdir "${BASE_PATH}/temp" } setup_output_dir() { rm -rf "${BASE_PATH}/output" # Delete existing 'output' directory cp -r "${BASE_PATH}/source" "${BASE_PATH}/output" #Copy directory structure from 'source' to 'output' } del_files_in_output() { find "$BASE_PATH/output" -type f -name "*.md" -delete #Delete all .md files (which were copied over from 'source') in 'output' # Delete the 'exclude' directory from the output folder. # This folder contains markdown files which shouldn't be converted to HTML. if [[ -d "${BASE_PATH}/output/exclude" ]]; then rm -r "${BASE_PATH}/output/exclude" fi } read_metadata() { # Read the metadata from the top of a .md file into a string metadata=$(awk 'BEGIN{RS = "\n\n"} {print $0}; {exit}' "$1") # Reads from the .md file until a double-newline is encountered } convert_to_array() { local meta_key local meta_value # Converts the metadata into two arrays: one with the key, and the other with the value. readarray -t meta_key < <(echo -e "$1" | awk -F: '{print $1}') readarray -t meta_value < <(echo -e "$1" | awk -F: '{st = index($0,":"); values = substr($0,st+1); print values}' | cut -c 2-) # Merge both arrays into an associative array declare -Ag meta_array for index in $(seq 0 `expr "${#meta_key[@]}" - 1`); do meta_array["${meta_key[$index]}"]="${meta_value[$index]}" done } add_date_to_array() { if [[ "${meta_array[date]-}" == "auto" ]]; then # If the date is set to 'auto' meta_array["date"]="$(date -r $1 +'%b %d, %Y')" fi } add_header_and_footer() { # Copy header to temporary location - 'parent_dir' is used to ensure that # each temporary header is in its own directory cp "$BASE_PATH/header.html" "$BASE_PATH/temp/$parent_dir/temp_header.html" # Check for relevant metadata, and perform corresponding action # This syntax is intended (although it doesn't follow typical Bash array syntax). See https://stackoverflow.com/a/45385463 for more info. if [[ ! -v "meta_array[date]" ]]; then # If there is no date sed -i '$ d' "$BASE_PATH/temp/$parent_dir/temp_header.html" # remove the 'date published' section of the header fi if [[ "${meta_array[noappend]-}" == "true" ]]; then sed -i 's/ - Two More Cents//g' "$BASE_PATH/temp/$parent_dir/temp_header.html" # 'noappend' removes the suffix from the title fi # Add header cat "$BASE_PATH/temp/$parent_dir/temp_header.html" | cat - "$1" > "$BASE_PATH/temp/$parent_dir/temp.html" # Add footer echo >> "$BASE_PATH/temp/$parent_dir/temp.html" # Add newline cat "$BASE_PATH/footer.html" >> "$BASE_PATH/temp/$parent_dir/temp.html" # Move temp file to original location mv "$BASE_PATH/temp/$parent_dir/temp.html" "$1" } add_header_and_footer_to_index() { mkdir "$BASE_PATH/temp/index_page" # Add header cat "$BASE_PATH/header.html" | head -n -1 | cat - "$1" > "$BASE_PATH/temp/index_page/temp.html" # For the index page, remove the last line of the header (date published) # Add footer echo >> "$BASE_PATH/temp/index_page/temp.html" # Add newline cat "$BASE_PATH/footer.html" >> "$BASE_PATH/temp/index_page/temp.html" # Move temp file to original location mv "$BASE_PATH/temp/index_page/temp.html" "$1" } replace_vars() { # Loop through the keys of the 'meta_array' array, search for all occurences of the key in the HTML doc, and replace them with the corresponding value.. for arr_key in "${!meta_array[@]}"; do meta_array["$arr_key"]="${meta_array["$arr_key"]//\//\\/}" # Escape all forward slashes in the value sed -i "s/[\$][\$]$arr_key[\$][\$]/${meta_array[$arr_key]}/g" "$1" done } convert_file() { # Helper function for md_to_html(). It takes in the file to convert as an argument, # and converts that file. file_to_conv="$1" echo "Converting $file_to_conv" read_metadata "$file_to_conv" # Sets the 'metadata' variable # Generate a random 8-character alphabetic string, until we find # one that doesn't exist in the 'temp' directory. This string # will serve as the parent directory of our file. parent_dir="$(tr -dc A-Za-z "${path_for_output}" convert_to_array "$metadata" #Sets the 'meta_array' array add_date_to_array "$file_to_conv" #Uses 'meta_array' array add_header_and_footer "$path_for_output" # Uses 'meta_array' array and 'parent_dir' variable replace_vars "$path_for_output" #Uses 'meta_array' array unset metadata meta_key meta_value meta_array path_for_output } md_to_html() { # Convert .md files from 'source' and place them into the correct locations into 'output' # Exclude all files and folders inside the 'exclude' directory local files=$(find "${BASE_PATH}/source" -not -path "${BASE_PATH}/source/exclude/*" -name "*.md") # Concurrently convert each document for file in $files; do (convert_file "$file") & unset metadata path_for_output done # Wait for all documents to finish converting, then remove all temporary files. wait rm -rf "$BASE_PATH"/temp/* } gen_sorted_file_list() { # Generate a list of the HTMl files, sorted by when they were last modified (read from the contents of the HTML file) local files=$(find "$BASE_PATH/output" -name "*.html") local date_mod for file in $files; do if grep -q "date-published" "$file" ; then echo "$file" >> "$BASE_PATH/temp/file_listing.txt" # Write files that have a date published to a temp file (we only want the files with date modified, because only these files can be listed with their date on the site map) date_mod+=$(cat "$file" | grep "date-published" | awk -F'[<>]' '{print $3}' \ | cut -d' ' -f '1,2' --complement | tr -d "," | awk '{print $2" "$1" "$3}' \ | date -f - +"%s") # Explanation: # Line 1 extracts the published date from the HTML file # Line 2 re-arranges this information, and converts it into DD MM YY format # Line 3 converts this into a UNIX timestamp date_mod+=$'\n' fi done date_mod=$(echo "${date_mod-}" | head -n -1) # Remove last (empty) line from variable echo "${date_mod-}" > "$BASE_PATH/temp/date_mod.txt" # Write the corresponding 'date modified' timestamps to a temp file paste "$BASE_PATH/temp/file_listing.txt" "$BASE_PATH/temp/date_mod.txt" > "$BASE_PATH/temp/new_file_list.txt" # Combine file list and date modified into a single file sorted_file_list=$(sort -r -k 2 "$BASE_PATH/temp/new_file_list.txt") # Sort the data in the file based on the timestamp (from newest to oldest), and store it into a variable sorted_file_list=$(echo "$sorted_file_list" | awk '{print $1}') # Store only the first column (the file path) in the variable } gen_rss_feed() { # Uses the sorted_file_list variable to generate an RSS feed echo "Generating RSS Feed..." local RSS_FEED_PATH="${BASE_PATH}/output/rss.xml" touch "$RSS_FEED_PATH" # Create the RSS file local RSS_CONTENT="\n" counter=0 RSS_CONTENT+="\n" RSS_CONTENT+="Two More Cents\n" RSS_CONTENT+="http://twomorecents.org/\n" RSS_CONTENT+="The personal website of Aadhavan Srinivasan.\n" RSS_CONTENT+="en-us\n" RSS_CONTENT+="$(date -R)\n" RSS_CONTENT+="s4g - Stupid Simple Static Site Generator\n" for file in $1; do if [ $counter -gt 9 ]; then break fi RSS_CONTENT+="\n" RSS_CONTENT+="\n" RSS_CONTENT+=$(cat "$file" | grep "<title>" | head -n 1 | awk -F'[<>]' '{print $3}')$'\n' RSS_CONTENT+="\n" RSS_CONTENT+="\n" RSS_CONTENT+="https://twomorecents.org/" RSS_CONTENT+=$(realpath --relative-to="${BASE_PATH}/output" "$file") RSS_CONTENT+="\n" RSS_CONTENT+="\n" ((++counter)) done RSS_CONTENT+="\n" echo -e "$RSS_CONTENT" > $RSS_FEED_PATH } gen_index_page() { # Generate an index page (site map) that includes links to the other pages echo "Generating index page..." local index_file_html="