You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
217 lines
7.9 KiB
Bash
217 lines
7.9 KiB
Bash
#!/bin/bash
|
|
|
|
BASE_PATH="/home/aadhavan/Programming/Bash/sitegen"
|
|
|
|
|
|
check_for_dirs() {
|
|
if [ ! -d "${BASE_PATH}/source" ]; then
|
|
echo "ERROR: 'source' folder does not exist. Your content is sourced from this folder."
|
|
exit
|
|
fi
|
|
|
|
if [ -d "${BASE_PATH}/temp" ]; then
|
|
echo "ERROR: You have an existing 'temp' folder. Please delete this folder, and run the script again."
|
|
exit
|
|
fi
|
|
|
|
if [ ! -f "${BASE_PATH}/header.html" ]; then
|
|
echo "ERROR: You do not have a header.html file. This file is used as a global header. Please create this file, and run the script again."
|
|
exit
|
|
fi
|
|
if [ ! -f "${BASE_PATH}/footer.html" ]; then
|
|
echo "ERROR: You do not have a footer.html file. This file is used as a global footer. Please create this file, and run the script again."
|
|
exit
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
setup_temp_dir() {
|
|
# Check if 'temp' already exists
|
|
mkdir "${BASE_PATH}/temp"
|
|
}
|
|
|
|
setup_output_dir() {
|
|
rm -r "${BASE_PATH}/output" # Delete existing 'output' directory
|
|
cp -r "${BASE_PATH}/source" "${BASE_PATH}/output" #Copy directory structure from 'source' to 'output'
|
|
}
|
|
|
|
del_files_in_output() {
|
|
find $BASE_PATH/output -type f -name "*.md" -delete #Delete all .md files (which were copied over from 'source') in 'output'
|
|
}
|
|
|
|
read_metadata() {
|
|
# Read the metadata from the top of a .md file into a string
|
|
metadata=$(awk 'BEGIN{RS = "\n\n"} {print $0}; {exit}' $1) # Reads from the .md file until a double-newline is encountered
|
|
}
|
|
|
|
convert_to_array() {
|
|
# Converts the metadata into two arrays: one with the key, and the other with the value.
|
|
readarray -t meta_key < <(echo -e "$1" | awk -F: '{print $1}')
|
|
readarray -t meta_value < <(echo -e "$1" | awk -F: '{st = index($0,":"); values = substr($0,st+1); print values}' | cut -c 2-)
|
|
|
|
# Merge both arrays into an associative array
|
|
declare -Ag meta_array
|
|
for index in $(seq 0 `expr "${#meta_key[@]}" - 1`); do
|
|
meta_array["${meta_key[$index]}"]="${meta_value[$index]}"
|
|
done
|
|
}
|
|
|
|
add_date_to_array() {
|
|
if test -z "${meta_array[date]}"; then # If there is no existing date in the metadata
|
|
meta_array["date"]="$(date -r $1 +'%b %d, %Y')"
|
|
fi
|
|
echo "${meta_array[date]}"
|
|
}
|
|
|
|
add_header_and_footer() {
|
|
# Copy header to temporary location
|
|
cp $BASE_PATH/header.html $BASE_PATH/temp/temp_header.html
|
|
|
|
# Check for relevant metadata, and perform corresponding action
|
|
if [[ "${meta_array[nodate]}" == "true" ]]; then
|
|
sed -i '$ d' $BASE_PATH/temp/temp_header.html # 'nodate' removes the 'date published' section of the header
|
|
fi
|
|
|
|
if [[ "${meta_array[notitle]}" == "true" ]]; then
|
|
sed -i 's/ - Two More Cents//g' $BASE_PATH/temp/temp_header.html # 'notitle' removes the suffix from the title
|
|
fi
|
|
|
|
# Add header
|
|
cat $BASE_PATH/temp/temp_header.html | cat - $1 > $BASE_PATH/temp/temp.html
|
|
|
|
# Add footer
|
|
echo >> $BASE_PATH/temp/temp.html
|
|
cat $BASE_PATH/footer.html >> $BASE_PATH/temp/temp.html
|
|
|
|
# Move temp file to original location
|
|
mv $BASE_PATH/temp/temp.html $1
|
|
}
|
|
|
|
add_header_and_footer_to_index() {
|
|
# Add header
|
|
cat $BASE_PATH/header.html | head -n -1 | cat - $1 > $BASE_PATH/temp/temp.html # For the index page, remove the last line of the header (date published)
|
|
|
|
# Add footer
|
|
echo >> $BASE_PATH/temp/temp.html
|
|
cat $BASE_PATH/footer.html >> $BASE_PATH/temp/temp.html
|
|
|
|
# Move temp file to original location
|
|
mv $BASE_PATH/temp/temp.html $1
|
|
}
|
|
|
|
replace_vars() {
|
|
# Loop through the keys of the 'meta_array' array, search for all occurences of the key in the HTML doc, and replace them with the corresponding value..
|
|
for arr_key in "${!meta_array[@]}"; do
|
|
sed -i "s/[\$][\$]$arr_key[\$][\$]/${meta_array[$arr_key]}/g" $1
|
|
done
|
|
|
|
}
|
|
|
|
md_to_html() {
|
|
# Convert .md files from 'source' and place them into the correct locations into 'output'
|
|
|
|
files=$(find $BASE_PATH/source -name "*.md")
|
|
|
|
for file in $files; do
|
|
read_metadata $file # Sets the 'metadata' variable
|
|
|
|
convert_to_array "$metadata" #Sets the 'meta_array' array
|
|
add_date_to_array "$file" #Uses 'meta_array' array
|
|
|
|
# Copy file to temp dir and strip metadata
|
|
cp $file $BASE_PATH/temp/
|
|
let num_lines=$(echo "$metadata" | wc -l)+1
|
|
sed -i "1,${num_lines}d" $BASE_PATH/temp/`basename $file`
|
|
|
|
# Construct path for output file
|
|
path_for_output=$(realpath --relative-to="${BASE_PATH}/source" $file)
|
|
path_for_output="${BASE_PATH}/output/${path_for_output}"
|
|
path_for_output="$(dirname $path_for_output)/$(basename $path_for_output .md).html"
|
|
|
|
# Convert the file, and place the output in the correct location
|
|
pandoc -f markdown --wrap=preserve $BASE_PATH/temp/`basename $file` > ${path_for_output}
|
|
rm $BASE_PATH/temp/*
|
|
|
|
add_header_and_footer $path_for_output # Uses 'meta_array' array
|
|
replace_vars $path_for_output #Uses 'meta_array' array
|
|
|
|
unset metadata meta_key meta_value meta_array
|
|
done
|
|
}
|
|
|
|
|
|
gen_sorted_file_list() { # Generate a list of the HTMl files, sorted by when they were last modified (read from the contents of the HTML file)
|
|
files=$(find $BASE_PATH/output -name "*.html")
|
|
|
|
for file in $files; do
|
|
if grep -q "date-published" $file; then
|
|
echo "$file" >> $BASE_PATH/temp/file_listing.txt # Write files that have a date published to a temp file (we only want the files with date modified, because only these files can be listed with their date on the site map)
|
|
|
|
date_mod+=$(cat "$file" | grep "date-published" | awk -F'[<>]' '{print $3}' \
|
|
| cut -d' ' -f '1,2' --complement | tr -d "," | awk '{print $2" "$1" "$3}' \
|
|
| date -f - +"%s")
|
|
# Explanation:
|
|
# Line 1 extracts the published date from the HTML file
|
|
# Line 2 re-arranges this information, and converts it into DD MM YY format
|
|
# Line 3 converts this into a UNIX timestamp
|
|
|
|
date_mod+=$'\n'
|
|
fi
|
|
done
|
|
|
|
date_mod=$(echo "$date_mod" | head -n -1) # Remove last (empty) line from variable
|
|
echo "$date_mod" > $BASE_PATH/temp/date_mod.txt # Write the corresponding 'date modified' timestamps to a temp file
|
|
|
|
paste $BASE_PATH/temp/file_listing.txt $BASE_PATH/temp/date_mod.txt > $BASE_PATH/temp/new_file_list.txt # Combine file list and date modified into a single file
|
|
|
|
sorted_file_list=$(sort -r -k 2 $BASE_PATH/temp/new_file_list.txt) # Sort the data in the file based on the timestamp (from newest to oldest), and store it into a variable
|
|
sorted_file_list=$(echo "$sorted_file_list" | awk '{print $1}') # Store only the first column (the file path) in the variable
|
|
}
|
|
|
|
gen_index_page() { # Generate an index page (site map) that includes links to the other pages
|
|
|
|
index_file_html="<nav class=\"toc\">"$'\n' # Variable to store the body HTML of the index page; enclose the list in a nav
|
|
|
|
for file in $1; do
|
|
title=$(cat $file | grep "<title>" | head -n 1 | awk -F'[<>]' '{print $3}') # Find the title of the web page
|
|
suffix=" - Two More Cents"
|
|
title=${title%"$suffix"} # Remove the website name from it
|
|
|
|
pub_date=$(cat $file | grep "date-published" | head -n 1 | awk -F'[<>]' '{print $3}') # Find the date published
|
|
prefix="Published on " # Find date published of webpage
|
|
pub_date=${pub_date#"$prefix"} # Remove the prefix from it
|
|
pub_date=$(echo "$pub_date" | tr -d "," | awk '{print $2" "$1" "$3}' | date -f - +"%m/%d/%Y") # Re-arrange the date and convert to mm/dd/yy
|
|
|
|
file_path=$(realpath --relative-to="${BASE_PATH}/output" $file)
|
|
|
|
index_file_html+="<li><time>${pub_date}</time> - <a href=\"$file_path\">$title</a></li>" # Add a line of HTML containing the date and title of the article
|
|
index_file_html+=$'\n'
|
|
done
|
|
|
|
index_file_html=$(echo "$index_file_html" | head -n -1) # Remove last (empty) line from variable
|
|
index_file_html+="</nav>"
|
|
|
|
path_for_output="${BASE_PATH}/output/site-map.html"
|
|
echo "$index_file_html" > "$path_for_output" # Output variable to file
|
|
|
|
add_header_and_footer_to_index "$path_for_output" # Add header and footer to index file
|
|
sed -i 's/[\$][\$]title[\$][\$]/Site Map - Two More Cents/g' "$path_for_output" # Replace title variable with 'site map' title
|
|
|
|
}
|
|
|
|
clean_up() {
|
|
rm -r ${BASE_PATH}/temp
|
|
}
|
|
|
|
|
|
check_for_dirs
|
|
setup_temp_dir
|
|
setup_output_dir
|
|
del_files_in_output
|
|
md_to_html
|
|
gen_sorted_file_list # Sets the 'sorted_file_list' variable
|
|
gen_index_page "$sorted_file_list" # Uses the 'sorted_file_list' variable
|
|
clean_up
|
|
|