s4g/build.sh

#!/bin/bash

BASE_PATH="/home/aadhavan/Programming/Bash/sitegen"


check_for_dirs() {
	if [ ! -d "${BASE_PATH}/source" ]; then
		echo "ERROR: 'source' folder does not exist. Your content is sourced from this folder."
		exit
	fi

	if [ -d "${BASE_PATH}/temp" ]; then
		echo "ERROR: You have an existing 'temp' folder. Please delete this folder, and run the script again."
		exit
	fi

	if [ ! -f "${BASE_PATH}/header.html" ]; then
		echo "ERROR: You do not have a header.html file. This file is used as a global header. Please create this file, and run the script again."
		exit
	fi
	if [ ! -f "${BASE_PATH}/footer.html" ]; then
		echo "ERROR: You do not have a footer.html file. This file is used as a global footer. Please create this file, and run the script again."
		exit
	fi

}


setup_temp_dir() {
#	Check if 'temp' already exists
	mkdir "${BASE_PATH}/temp"
}

setup_output_dir() {
	rm -r "${BASE_PATH}/output" # Delete existing 'output' directory
	cp -r "${BASE_PATH}/source" "${BASE_PATH}/output" #Copy directory structure from 'source' to 'output'
}

del_files_in_output() {
	find $BASE_PATH/output -type f -name "*.md" -delete #Delete all .md files (which were copied over from 'source') in 'output'
}

read_metadata() {
#	Read the metadata from the top of a .md file into a string
	metadata=$(awk 'BEGIN{RS = "\n\n"} {print $0}; {exit}' $1)  # Reads from the .md file until a double-newline is encountered
}

convert_to_array() {
#	Converts the metadata into two arrays: one with the key, and the other with the value.
	readarray -t meta_key < <(echo -e "$1" | awk -F: '{print $1}')
	readarray -t meta_value < <(echo -e "$1" | awk -F: '{st = index($0,":"); values = substr($0,st+1); print values}' | cut -c 2-)

#	Merge both arrays into an associative array
	declare -Ag  meta_array
	for index in $(seq 0 `expr "${#meta_key[@]}" - 1`); do
		meta_array["${meta_key[$index]}"]="${meta_value[$index]}"
	done
}

add_date_to_array() {
	meta_array["date"]="$(date -r $1 +'%b %d, %Y')"

}

add_header_and_footer() {
#	Copy header to temporary location
	cp $BASE_PATH/header.html $BASE_PATH/temp/temp_header.html

#	Check for relevant metadata, and perform corresponding action
	if [[ "${meta_array[nodate]}" == "true"  ]]; then
		sed -i '$ d' $BASE_PATH/temp/temp_header.html # 'nodate' removes the 'date published' section of the header
	fi

	if [[ "${meta_array[notitle]}" == "true" ]]; then
		sed -i 's/ - Two More Cents//g' $BASE_PATH/temp/temp_header.html # 'notitle' removes the suffix from the title
	fi

#	Add header
	cat $BASE_PATH/temp/temp_header.html | cat - $1 > $BASE_PATH/temp/temp.html

#	Add footer
	echo >> $BASE_PATH/temp/temp.html
	cat $BASE_PATH/footer.html >> $BASE_PATH/temp/temp.html

#	Move temp file to original location
	mv $BASE_PATH/temp/temp.html $1
}

add_header_and_footer_to_index() {
#	Add header
	cat $BASE_PATH/header.html | head -n -1 | cat - $1 > $BASE_PATH/temp/temp.html # For the index page, remove the last line of the header (date published)

#	Add footer
	echo >> $BASE_PATH/temp/temp.html
	cat $BASE_PATH/footer.html >> $BASE_PATH/temp/temp.html

#	Move temp file to original location
	mv $BASE_PATH/temp/temp.html $1
}

replace_vars() {
#	Loop through the keys of the 'meta_array' array, search for all occurences of the key in the HTML doc, and replace them with the corresponding value..
	for arr_key in "${!meta_array[@]}"; do
		sed -i "s/[\$][\$]$arr_key[\$][\$]/${meta_array[$arr_key]}/g" $1
	done

}

md_to_html() {
#	Convert .md files from 'source' and place them into the correct locations into 'output'

	files=$(find $BASE_PATH/source -name "*.md")

	for file in $files; do
		read_metadata $file # Sets the 'metadata' variable

		convert_to_array "$metadata" #Sets the 'meta_array' array
		add_date_to_array "$file" #Uses 'meta_array' array

#		Copy file to temp dir and strip metadata
		cp $file $BASE_PATH/temp/
		let num_lines=$(echo "$metadata" | wc -l)+1
		sed -i "1,${num_lines}d" $BASE_PATH/temp/`basename $file`

#		Construct path for output file
		path_for_output=$(realpath --relative-to="${BASE_PATH}/source" $file)
		path_for_output="${BASE_PATH}/output/${path_for_output}"
		path_for_output="$(dirname $path_for_output)/$(basename $path_for_output .md).html"

#		Convert the file, and place the output in the correct location
		pandoc -f markdown --wrap=preserve $BASE_PATH/temp/`basename $file` > ${path_for_output}
		rm $BASE_PATH/temp/*

		add_header_and_footer $path_for_output # Uses 'meta_array' array
		replace_vars $path_for_output #Uses 'meta_array' array

		unset metadata meta_key meta_value meta_array
	done
}


gen_sorted_file_list() { # Generate a list of the HTMl files, sorted by when they were last modified (read from the contents of the HTML file)
	files=$(find $BASE_PATH/output -name "*.html")

	for file in $files; do
		if grep -q "date-published" $file; then
			echo "$file" >> $BASE_PATH/temp/file_listing.txt # Write files that have a date published to a temp file (we only want the files with date modified, because only these files can be listed with their date on the site map)

			date_mod+=$(cat "$file" | grep "date-published" | awk -F'[<>]' '{print $3}' \
				| cut -d' ' -f '1,2' --complement | tr -d "," | awk '{print $2" "$1" "$3}' \
				| date -f - +"%s")
#			Explanation:
#			Line 1 extracts the published date from the HTML file
#			Line 2 re-arranges this information, and converts it into DD MM YY format
#			Line 3 converts this into a UNIX timestamp

		date_mod+=$'\n'
		fi
	done

	date_mod=$(echo "$date_mod" | head -n -1) # Remove last (empty) line from variable
	echo "$date_mod" > $BASE_PATH/temp/date_mod.txt # Write the corresponding 'date modified' timestamps to a temp file

	paste $BASE_PATH/temp/file_listing.txt $BASE_PATH/temp/date_mod.txt > $BASE_PATH/temp/new_file_list.txt # Combine file list and date modified into a single file

	sorted_file_list=$(sort -r -k 2 $BASE_PATH/temp/new_file_list.txt) # Sort the data in the file based on the timestamp (from newest to oldest), and store it into a variable
	sorted_file_list=$(echo "$sorted_file_list" | awk '{print $1}') # Store only the first column (the file path) in the variable
}

gen_index_page() { # Generate an index page (site map) that includes links to the other pages

	index_file_html="<nav class=\"toc\">"$'\n' # Variable to store the body HTML of the index page; enclose the list in a nav

	for file in $1; do
		title=$(cat $file | grep "<title>" | head -n 1 | awk -F'[<>]' '{print $3}') # Find the title of the web page
		suffix=" - Two More Cents"
		title=${title%"$suffix"} # Remove the website name from it

		pub_date=$(cat $file | grep "date-published" | head -n 1 | awk -F'[<>]' '{print $3}') # Find the date published
		prefix="Published on " # Find date published of webpage
		pub_date=${pub_date#"$prefix"} # Remove the prefix from it
		pub_date=$(echo "$pub_date" | tr -d "," | awk '{print $2" "$1" "$3}' | date -f - +"%m/%d/%Y") # Re-arrange the date and convert to mm/dd/yy

		file_path=$(realpath --relative-to="${BASE_PATH}/output" $file)

		index_file_html+="<li><time>${pub_date}</time> - <a href=\"$file_path\">$title</a></li>" # Add a line of HTML containing the date and title of the article
		index_file_html+=$'\n'
	done

	index_file_html=$(echo "$index_file_html" | head -n -1) # Remove last (empty) line from variable
	index_file_html+="</nav>"

	path_for_output="${BASE_PATH}/output/site-map.html"
	echo "$index_file_html" > "$path_for_output" # Output variable to file

	add_header_and_footer_to_index "$path_for_output" # Add header and footer to index file
	sed -i 's/[\$][\$]title[\$][\$]/Site Map - Two More Cents/g' "$path_for_output" # Replace title variable with 'site map' title

}

clean_up() {
	rm -r ${BASE_PATH}/temp
}


check_for_dirs
setup_temp_dir
setup_output_dir
del_files_in_output
md_to_html
gen_sorted_file_list # Sets the 'sorted_file_list' variable
gen_index_page "$sorted_file_list" # Uses the 'sorted_file_list' variable
clean_up
First commit 10 months ago			`#!/bin/bash`

			`BASE_PATH="/home/aadhavan/Programming/Bash/sitegen"`


			`check_for_dirs() {`
			`if [ ! -d "${BASE_PATH}/source" ]; then`
Added more error checking to build script, and deleted 'temp' directory at the end 10 months ago			`echo "ERROR: 'source' folder does not exist. Your content is sourced from this folder."`
First commit 10 months ago			`exit`
			`fi`

Added more error checking to build script, and deleted 'temp' directory at the end 10 months ago			`if [ -d "${BASE_PATH}/temp" ]; then`
			`echo "ERROR: You have an existing 'temp' folder. Please delete this folder, and run the script again."`
			`exit`
			`fi`

			`if [ ! -f "${BASE_PATH}/header.html" ]; then`
			`echo "ERROR: You do not have a header.html file. This file is used as a global header. Please create this file, and run the script again."`
			`exit`
			`fi`
			`if [ ! -f "${BASE_PATH}/footer.html" ]; then`
			`echo "ERROR: You do not have a footer.html file. This file is used as a global footer. Please create this file, and run the script again."`
			`exit`
First commit 10 months ago			`fi`

			`}`


			`setup_temp_dir() {`
			`# Check if 'temp' already exists`
Added more error checking to build script, and deleted 'temp' directory at the end 10 months ago			`mkdir "${BASE_PATH}/temp"`
First commit 10 months ago			`}`

			`setup_output_dir() {`
Fixed a bug where folder structure of 'source' is not preserved in 'output' 10 months ago			`rm -r "${BASE_PATH}/output" # Delete existing 'output' directory`
			`cp -r "${BASE_PATH}/source" "${BASE_PATH}/output" #Copy directory structure from 'source' to 'output'`
First commit 10 months ago			`}`

			`del_files_in_output() {`
			`find $BASE_PATH/output -type f -name "*.md" -delete #Delete all .md files (which were copied over from 'source') in 'output'`
			`}`

			`read_metadata() {`
			`# Read the metadata from the top of a .md file into a string`
			`metadata=$(awk 'BEGIN{RS = "\n\n"} {print $0}; {exit}' $1) # Reads from the .md file until a double-newline is encountered`
			`}`

			`convert_to_array() {`
Started working on converting the regular arrays into an associative array 9 months ago			`# Converts the metadata into two arrays: one with the key, and the other with the value.`
First commit 10 months ago			`readarray -t meta_key < <(echo -e "$1" \| awk -F: '{print $1}')`
			`readarray -t meta_value < <(echo -e "$1" \| awk -F: '{st = index($0,":"); values = substr($0,st+1); print values}' \| cut -c 2-)`
Started working on converting the regular arrays into an associative array 9 months ago
			`# Merge both arrays into an associative array`
Switched to using associative arrays instead of two regular arrays for metadata, and ensured that the site map does not contain any file without a 'date-published' line 9 months ago			`declare -Ag meta_array`
Started working on converting the regular arrays into an associative array 9 months ago			for index in $(seq 0 `expr "${#meta_key[@]}" - 1`); do
			`meta_array["${meta_key[$index]}"]="${meta_value[$index]}"`
			`done`
First commit 10 months ago			`}`

			`add_date_to_array() {`
Switched to using associative arrays instead of two regular arrays for metadata, and ensured that the site map does not contain any file without a 'date-published' line 9 months ago			`meta_array["date"]="$(date -r $1 +'%b %d, %Y')"`

First commit 10 months ago			`}`

			`add_header_and_footer() {`
Switched to using associative arrays instead of two regular arrays for metadata, and ensured that the site map does not contain any file without a 'date-published' line 9 months ago			`# Copy header to temporary location`
			`cp $BASE_PATH/header.html $BASE_PATH/temp/temp_header.html`

			`# Check for relevant metadata, and perform corresponding action`
			`if [[ "${meta_array[nodate]}" == "true" ]]; then`
			`sed -i '$ d' $BASE_PATH/temp/temp_header.html # 'nodate' removes the 'date published' section of the header`
			`fi`

			`if [[ "${meta_array[notitle]}" == "true" ]]; then`
			`sed -i 's/ - Two More Cents//g' $BASE_PATH/temp/temp_header.html # 'notitle' removes the suffix from the title`
			`fi`

First commit 10 months ago			`# Add header`
Switched to using associative arrays instead of two regular arrays for metadata, and ensured that the site map does not contain any file without a 'date-published' line 9 months ago			`cat $BASE_PATH/temp/temp_header.html \| cat - $1 > $BASE_PATH/temp/temp.html`
First commit 10 months ago
			`# Add footer`
			`echo >> $BASE_PATH/temp/temp.html`
			`cat $BASE_PATH/footer.html >> $BASE_PATH/temp/temp.html`

			`# Move temp file to original location`
			`mv $BASE_PATH/temp/temp.html $1`
			`}`

Finished code for generating index file; created a separate function for adding header and footer to index file 9 months ago			`add_header_and_footer_to_index() {`
			`# Add header`
			`cat $BASE_PATH/header.html \| head -n -1 \| cat - $1 > $BASE_PATH/temp/temp.html # For the index page, remove the last line of the header (date published)`

			`# Add footer`
			`echo >> $BASE_PATH/temp/temp.html`
			`cat $BASE_PATH/footer.html >> $BASE_PATH/temp/temp.html`

			`# Move temp file to original location`
			`mv $BASE_PATH/temp/temp.html $1`
			`}`

First commit 10 months ago			`replace_vars() {`
Switched to using associative arrays instead of two regular arrays for metadata, and ensured that the site map does not contain any file without a 'date-published' line 9 months ago			`# Loop through the keys of the 'meta_array' array, search for all occurences of the key in the HTML doc, and replace them with the corresponding value..`
			`for arr_key in "${!meta_array[@]}"; do`
			`sed -i "s/[\$][\$]$arr_key[\$][\$]/${meta_array[$arr_key]}/g" $1`
First commit 10 months ago			`done`

			`}`

			`md_to_html() {`
			`# Convert .md files from 'source' and place them into the correct locations into 'output'`

			`files=$(find $BASE_PATH/source -name "*.md")`

			`for file in $files; do`
			`read_metadata $file # Sets the 'metadata' variable`

Switched to using associative arrays instead of two regular arrays for metadata, and ensured that the site map does not contain any file without a 'date-published' line 9 months ago			`convert_to_array "$metadata" #Sets the 'meta_array' array`
			`add_date_to_array "$file" #Uses 'meta_array' array`
First commit 10 months ago
			`# Copy file to temp dir and strip metadata`
			`cp $file $BASE_PATH/temp/`
Switched to using associative arrays instead of two regular arrays for metadata, and ensured that the site map does not contain any file without a 'date-published' line 9 months ago			`let num_lines=$(echo "$metadata" \| wc -l)+1`
First commit 10 months ago			sed -i "1,${num_lines}d" $BASE_PATH/temp/`basename $file`

Fixed a bug where folder structure of 'source' is not preserved in 'output' 10 months ago			`# Construct path for output file`
			`path_for_output=$(realpath --relative-to="${BASE_PATH}/source" $file)`
			`path_for_output="${BASE_PATH}/output/${path_for_output}"`
			`path_for_output="$(dirname $path_for_output)/$(basename $path_for_output .md).html"`

			`# Convert the file, and place the output in the correct location`
Added option to pandoc, to preserve line wrapping 9 months ago			pandoc -f markdown --wrap=preserve $BASE_PATH/temp/`basename $file` > ${path_for_output}
First commit 10 months ago			`rm $BASE_PATH/temp/*`

Switched to using associative arrays instead of two regular arrays for metadata, and ensured that the site map does not contain any file without a 'date-published' line 9 months ago			`add_header_and_footer $path_for_output # Uses 'meta_array' array`
			`replace_vars $path_for_output #Uses 'meta_array' array`
First commit 10 months ago
Switched to using associative arrays instead of two regular arrays for metadata, and ensured that the site map does not contain any file without a 'date-published' line 9 months ago			`unset metadata meta_key meta_value meta_array`
First commit 10 months ago			`done`
			`}`

Working on generating index page; moved the generation of a sorted file list to a separate function 9 months ago
			`gen_sorted_file_list() { # Generate a list of the HTMl files, sorted by when they were last modified (read from the contents of the HTML file)`
Changed replace_vars to replace every occurence of a variable, not just the first occurence; Started working on generating index page 9 months ago			`files=$(find $BASE_PATH/output -name "*.html")`

			`for file in $files; do`
Switched to using associative arrays instead of two regular arrays for metadata, and ensured that the site map does not contain any file without a 'date-published' line 9 months ago			`if grep -q "date-published" $file; then`
			`echo "$file" >> $BASE_PATH/temp/file_listing.txt # Write files that have a date published to a temp file (we only want the files with date modified, because only these files can be listed with their date on the site map)`
Working on generating index page; moved the generation of a sorted file list to a separate function 9 months ago
Switched to using associative arrays instead of two regular arrays for metadata, and ensured that the site map does not contain any file without a 'date-published' line 9 months ago			`date_mod+=$(cat "$file" \| grep "date-published" \| awk -F'[<>]' '{print $3}' \`
			`\| cut -d' ' -f '1,2' --complement \| tr -d "," \| awk '{print $2" "$1" "$3}' \`
			`\| date -f - +"%s")`
			`# Explanation:`
Working on generating index page; moved the generation of a sorted file list to a separate function 9 months ago			`# Line 1 extracts the published date from the HTML file`
			`# Line 2 re-arranges this information, and converts it into DD MM YY format`
			`# Line 3 converts this into a UNIX timestamp`

			`date_mod+=$'\n'`
Switched to using associative arrays instead of two regular arrays for metadata, and ensured that the site map does not contain any file without a 'date-published' line 9 months ago			`fi`
Working on generating index page; moved the generation of a sorted file list to a separate function 9 months ago			`done`

			`date_mod=$(echo "$date_mod" \| head -n -1) # Remove last (empty) line from variable`
			`echo "$date_mod" > $BASE_PATH/temp/date_mod.txt # Write the corresponding 'date modified' timestamps to a temp file`

			`paste $BASE_PATH/temp/file_listing.txt $BASE_PATH/temp/date_mod.txt > $BASE_PATH/temp/new_file_list.txt # Combine file list and date modified into a single file`

Reversed sorting order so that the site map displays posts from newest to oldest 9 months ago			`sorted_file_list=$(sort -r -k 2 $BASE_PATH/temp/new_file_list.txt) # Sort the data in the file based on the timestamp (from newest to oldest), and store it into a variable`
Working on generating index page; moved the generation of a sorted file list to a separate function 9 months ago			`sorted_file_list=$(echo "$sorted_file_list" \| awk '{print $1}') # Store only the first column (the file path) in the variable`
			`}`

			`gen_index_page() { # Generate an index page (site map) that includes links to the other pages`
Finished code for generating index file; created a separate function for adding header and footer to index file 9 months ago
			`index_file_html="<nav class=\"toc\">"$'\n' # Variable to store the body HTML of the index page; enclose the list in a nav`

Working on generating index page; moved the generation of a sorted file list to a separate function 9 months ago			`for file in $1; do`
Changed replace_vars to replace every occurence of a variable, not just the first occurence; Started working on generating index page 9 months ago			`title=$(cat $file \| grep "<title>" \| head -n 1 \| awk -F'[<>]' '{print $3}') # Find the title of the web page`
			`suffix=" - Two More Cents"`
			`title=${title%"$suffix"} # Remove the website name from it`

			`pub_date=$(cat $file \| grep "date-published" \| head -n 1 \| awk -F'[<>]' '{print $3}') # Find the date published`
			`prefix="Published on " # Find date published of webpage`
			`pub_date=${pub_date#"$prefix"} # Remove the prefix from it`
Working on generating index page; moved the generation of a sorted file list to a separate function 9 months ago			`pub_date=$(echo "$pub_date" \| tr -d "," \| awk '{print $2" "$1" "$3}' \| date -f - +"%m/%d/%Y") # Re-arrange the date and convert to mm/dd/yy`
Changed replace_vars to replace every occurence of a variable, not just the first occurence; Started working on generating index page 9 months ago
			`file_path=$(realpath --relative-to="${BASE_PATH}/output" $file)`

Finished code for generating index file; created a separate function for adding header and footer to index file 9 months ago			`index_file_html+="<li><time>${pub_date}</time> - <a href=\"$file_path\">$title</a></li>" # Add a line of HTML containing the date and title of the article`
			`index_file_html+=$'\n'`
Changed replace_vars to replace every occurence of a variable, not just the first occurence; Started working on generating index page 9 months ago			`done`
Working on generating index page; moved the generation of a sorted file list to a separate function 9 months ago
Finished code for generating index file; created a separate function for adding header and footer to index file 9 months ago			`index_file_html=$(echo "$index_file_html" \| head -n -1) # Remove last (empty) line from variable`
			`index_file_html+="</nav>"`

			`path_for_output="${BASE_PATH}/output/site-map.html"`
			`echo "$index_file_html" > "$path_for_output" # Output variable to file`

			`add_header_and_footer_to_index "$path_for_output" # Add header and footer to index file`
			`sed -i 's/[\$][\$]title[\$][\$]/Site Map - Two More Cents/g' "$path_for_output" # Replace title variable with 'site map' title`
Working on generating index page; moved the generation of a sorted file list to a separate function 9 months ago
Changed replace_vars to replace every occurence of a variable, not just the first occurence; Started working on generating index page 9 months ago			`}`

Added more error checking to build script, and deleted 'temp' directory at the end 10 months ago			`clean_up() {`
			`rm -r ${BASE_PATH}/temp`
			`}`


First commit 10 months ago			`check_for_dirs`
			`setup_temp_dir`
			`setup_output_dir`
			`del_files_in_output`
			`md_to_html`
Working on generating index page; moved the generation of a sorted file list to a separate function 9 months ago			`gen_sorted_file_list # Sets the 'sorted_file_list' variable`
			`gen_index_page "$sorted_file_list" # Uses the 'sorted_file_list' variable`
Added more error checking to build script, and deleted 'temp' directory at the end 10 months ago			`clean_up`
Switched to using associative arrays instead of two regular arrays for metadata, and ensured that the site map does not contain any file without a 'date-published' line 9 months ago