Switched to using associative arrays instead of two regular arrays for metadata, and ensured that the site map does not contain any file without a 'date-published' line

master
Aadhavan Srinivasan 1 year ago
parent 5e8445febf
commit 93fe404571

@ -51,24 +51,32 @@ convert_to_array() {
readarray -t meta_value < <(echo -e "$1" | awk -F: '{st = index($0,":"); values = substr($0,st+1); print values}' | cut -c 2-)
# Merge both arrays into an associative array
declare -A meta_array
declare -Ag meta_array
for index in $(seq 0 `expr "${#meta_key[@]}" - 1`); do
meta_array["${meta_key[$index]}"]="${meta_value[$index]}"
done
# for array_key in "${!meta_array[@]}"; do
# printf "[%s] = %s\n" "$array_key" "${meta_array[$array_key]}"
# done
}
add_date_to_array() {
meta_key+=("date")
meta_value+=("$(date -r $1 +'%b %d, %Y')")
meta_array["date"]="$(date -r $1 +'%b %d, %Y')"
}
add_header_and_footer() {
# Copy header to temporary location
cp $BASE_PATH/header.html $BASE_PATH/temp/temp_header.html
# Check for relevant metadata, and perform corresponding action
if [[ "${meta_array[nodate]}" == "true" ]]; then
sed -i '$ d' $BASE_PATH/temp/temp_header.html # 'nodate' removes the 'date published' section of the header
fi
if [[ "${meta_array[notitle]}" == "true" ]]; then
sed -i 's/ - Two More Cents//g' $BASE_PATH/temp/temp_header.html # 'notitle' removes the suffix from the title
fi
# Add header
cat $BASE_PATH/header.html | cat - $1 > $BASE_PATH/temp/temp.html
cat $BASE_PATH/temp/temp_header.html | cat - $1 > $BASE_PATH/temp/temp.html
# Add footer
echo >> $BASE_PATH/temp/temp.html
@ -91,9 +99,9 @@ add_header_and_footer_to_index() {
}
replace_vars() {
# Loop through 'meta_key' array, search for all occurences of the values in the HTML doc, and replace them with corresponding values in 'meta_value'.
for index in $(seq 0 `expr "${#meta_key[@]}" - 1`); do
sed -i "s/[\$][\$]${meta_key[$index]}[\$][\$]/${meta_value[index]}/g" $1
# Loop through the keys of the 'meta_array' array, search for all occurences of the key in the HTML doc, and replace them with the corresponding value..
for arr_key in "${!meta_array[@]}"; do
sed -i "s/[\$][\$]$arr_key[\$][\$]/${meta_array[$arr_key]}/g" $1
done
}
@ -106,12 +114,12 @@ md_to_html() {
for file in $files; do
read_metadata $file # Sets the 'metadata' variable
convert_to_array "$metadata" #Sets the 'meta_key' and 'meta_value' arrays
add_date_to_array "$file" #Uses 'meta_key' and 'meta_value' arrays
convert_to_array "$metadata" #Sets the 'meta_array' array
add_date_to_array "$file" #Uses 'meta_array' array
# Copy file to temp dir and strip metadata
cp $file $BASE_PATH/temp/
let num_lines=$(echo $metadata | wc -l)+1
let num_lines=$(echo "$metadata" | wc -l)+1
sed -i "1,${num_lines}d" $BASE_PATH/temp/`basename $file`
# Construct path for output file
@ -123,29 +131,31 @@ md_to_html() {
pandoc -f markdown --wrap=preserve $BASE_PATH/temp/`basename $file` > ${path_for_output}
rm $BASE_PATH/temp/*
add_header_and_footer $path_for_output
replace_vars $path_for_output #Uses 'meta_key' and 'meta_value' arrays
add_header_and_footer $path_for_output # Uses 'meta_array' array
replace_vars $path_for_output #Uses 'meta_array' array
unset metadata meta_key meta_value
unset metadata meta_key meta_value meta_array
done
}
gen_sorted_file_list() { # Generate a list of the HTMl files, sorted by when they were last modified (read from the contents of the HTML file)
files=$(find $BASE_PATH/output -name "*.html")
echo "$files" > $BASE_PATH/temp/file_listing.txt # Write file list to a temp file
for file in $files; do
date_mod+=$(cat "$file" | grep "date-published" | awk -F'[<>]' '{print $3}' \
| cut -d' ' -f '1,2' --complement | tr -d "," | awk '{print $2" "$1" "$3}' \
| date -f - +"%s")
if grep -q "date-published" $file; then
echo "$file" >> $BASE_PATH/temp/file_listing.txt # Write files that have a date published to a temp file (we only want the files with date modified, because only these files can be listed with their date on the site map)
# Explanation:
date_mod+=$(cat "$file" | grep "date-published" | awk -F'[<>]' '{print $3}' \
| cut -d' ' -f '1,2' --complement | tr -d "," | awk '{print $2" "$1" "$3}' \
| date -f - +"%s")
# Explanation:
# Line 1 extracts the published date from the HTML file
# Line 2 re-arranges this information, and converts it into DD MM YY format
# Line 3 converts this into a UNIX timestamp
date_mod+=$'\n'
fi
done
date_mod=$(echo "$date_mod" | head -n -1) # Remove last (empty) line from variable
@ -201,3 +211,4 @@ md_to_html
gen_sorted_file_list # Sets the 'sorted_file_list' variable
gen_index_page "$sorted_file_list" # Uses the 'sorted_file_list' variable
clean_up

Loading…
Cancel
Save