Fixed issues with concurrent document conversion

Before, the documents all used to have the same content due to a bug
in the concurrent conversion. I fixed this by creating a function,
'convert_file()', that converts an individual document. I then called
this function concurrently for each file that I want to convert. To
prevent race conditions, where two parallel conversions access the same
file, I ensured that each file gets placed into its own unique directory,
with a randomly-generated name.
master
Aadhavan Srinivasan 9 months ago
parent 0023d8ec7f
commit 1f05a69106

@ -15,6 +15,15 @@
# two directories: 'source' (which holds your Markdown files) and 'output' (which holds the
# converted CSS.
#
# --------------------------------------------------------------------------
# IMPORTANT - Each Markdown file MUST be in it's own directory!
# --------------------------------------------------------------------------
# This is because of the way I handle conversion. The conversion occurs
# concurrently, and each file is copied into the same temporary location.
# Each file is placed into a directory with the same name as the parent
# directory.
# --------------------------------------------------------------------------
#
# In addition to these directories, three files are needed in 'website':
# 1. 'header.html' - A header, which is prepended to every source file. Unfortunately, this must
# be written in regular HTML.
@ -98,39 +107,41 @@ add_date_to_array() {
}
add_header_and_footer() {
# Copy header to temporary location
cp "$BASE_PATH/header.html" "$BASE_PATH/temp/temp_header.html"
# Copy header to temporary location - 'parent_dir' is used to ensure that
# each temporary header is in its own directory
cp "$BASE_PATH/header.html" "$BASE_PATH/temp/$parent_dir/temp_header.html"
# Check for relevant metadata, and perform corresponding action
if [[ ! -v "meta_array["date"]" ]]; then # If there is no date
sed -i '$ d' "$BASE_PATH/temp/temp_header.html" # remove the 'date published' section of the header
sed -i '$ d' "$BASE_PATH/temp/$parent_dir/temp_header.html" # remove the 'date published' section of the header
fi
if [[ "${meta_array[noappend]-}" == "true" ]]; then
sed -i 's/ - Two More Cents//g' "$BASE_PATH/temp/temp_header.html" # 'noappend' removes the suffix from the title
sed -i 's/ - Two More Cents//g' "$BASE_PATH/temp/$parent_dir/temp_header.html" # 'noappend' removes the suffix from the title
fi
# Add header
cat "$BASE_PATH/temp/temp_header.html" | cat - "$1" > "$BASE_PATH/temp/temp.html"
cat "$BASE_PATH/temp/$parent_dir/temp_header.html" | cat - "$1" > "$BASE_PATH/temp/$parent_dir/temp.html"
# Add footer
echo >> "$BASE_PATH/temp/temp.html" # Add newline
cat "$BASE_PATH/footer.html" >> "$BASE_PATH/temp/temp.html"
echo >> "$BASE_PATH/temp/$parent_dir/temp.html" # Add newline
cat "$BASE_PATH/footer.html" >> "$BASE_PATH/temp/$parent_dir/temp.html"
# Move temp file to original location
mv "$BASE_PATH/temp/temp.html" "$1"
mv "$BASE_PATH/temp/$parent_dir/temp.html" "$1"
}
add_header_and_footer_to_index() {
mkdir "$BASE_PATH/temp/index_page"
# Add header
cat "$BASE_PATH/header.html" | head -n -1 | cat - "$1" > "$BASE_PATH/temp/temp.html" # For the index page, remove the last line of the header (date published)
cat "$BASE_PATH/header.html" | head -n -1 | cat - "$1" > "$BASE_PATH/temp/index_page/temp.html" # For the index page, remove the last line of the header (date published)
# Add footer
echo >> "$BASE_PATH/temp/temp.html" # Add newline
cat "$BASE_PATH/footer.html" >> "$BASE_PATH/temp/temp.html"
echo >> "$BASE_PATH/temp/index_page/temp.html" # Add newline
cat "$BASE_PATH/footer.html" >> "$BASE_PATH/temp/index_page/temp.html"
# Move temp file to original location
mv "$BASE_PATH/temp/temp.html" "$1"
mv "$BASE_PATH/temp/index_page/temp.html" "$1"
}
replace_vars() {
@ -142,62 +153,60 @@ replace_vars() {
}
md_to_html() {
# Convert .md files from 'source' and place them into the correct locations into 'output'
# Since the conversion is the slowest part of the script, I run each conversion process
# in a separate thread (first loop). Then I wait for all of them to finish, and add the
# header and footers and replace the variables in each converted document (second loop).
local files=$(find "$BASE_PATH/source" -name "*.md")
for file in $files; do
convert_file() {
# Helper function for md_to_html(). It takes in the file to convert as an argument,
# and converts that file.
file_to_conv="$1"
echo "Converting $file_to_conv"
echo "Converting $file"
read_metadata "$file_to_conv" # Sets the 'metadata' variable
read_metadata "$file" # Sets the 'metadata' variable
# Generate a random 8-character alphabetic string, until we find
# one that doesn't exist in the 'temp' directory. This string
# will serve as the parent directory of our file.
parent_dir="$(tr -dc A-Za-z </dev/urandom | head -c 8; echo)"
while ls -r temp | grep -q "$parent_dir" ; do
parent_dir="$(tr -dc A-Za-z </dev/urandom | head -c 8; echo)"
done
# Copy file to temp dir and strip metadata
cp "$file" "$BASE_PATH/temp/"
let num_lines=$(echo "$metadata" | wc -l)+1
sed -i "1,${num_lines}d" "$BASE_PATH/temp/$(basename "$file")"
# Copy file to temp dir and strip metadata
mkdir -p "$BASE_PATH/temp/$parent_dir/"
cp "$file_to_conv" "$BASE_PATH/temp/$parent_dir/"
let num_lines=$(echo "$metadata" | wc -l)+1
sed -i "1,${num_lines}d" "$BASE_PATH/temp/$parent_dir/$(basename "$file_to_conv")"
# Construct path for output file
local path_for_output=$(realpath --relative-to="${BASE_PATH}/source" "$file")
path_for_output="${BASE_PATH}/output/${path_for_output}"
path_for_output="$(dirname $path_for_output)/$(basename $path_for_output .md).html"
# Construct path for output file
local path_for_output=$(realpath --relative-to="${BASE_PATH}/source" "$file_to_conv")
path_for_output="${BASE_PATH}/output/${path_for_output}"
path_for_output="$(dirname $path_for_output)/$(basename $path_for_output .md).html"
# Convert the file (using the given filters), and place the output in the correct location.
# The process is forked, so that all documents can be converted concurrently.
(pandoc --lua-filter "$BASE_PATH"/pandoc_filters/* -f markdown --wrap=preserve "$BASE_PATH/temp/$(basename "$file")" > "${path_for_output}") &
# Convert the file (using the given filters), and place the output in the correct location.
pandoc --lua-filter "$BASE_PATH"/pandoc_filters/* -f markdown --wrap=preserve "$BASE_PATH/temp/$parent_dir/$(basename "$file_to_conv")" > "${path_for_output}"
unset metadata
done
convert_to_array "$metadata" #Sets the 'meta_array' array
# Wait for all documents to finish converting, then remove all temporary files.
wait
rm "$BASE_PATH"/temp/*
for file in $files; do
# This function was already called above in the previous loop.
# However, I need to call it again, since I need the metadata
# again for each file to create the 'meta_array' variable.
read_metadata "$file" # Sets the 'metadata' variable
add_date_to_array "$file_to_conv" #Uses 'meta_array' array
add_header_and_footer "$path_for_output" # Uses 'meta_array' array and 'parent_dir' variable
replace_vars "$path_for_output" #Uses 'meta_array' array
convert_to_array "$metadata" #Sets the 'meta_array' array
add_date_to_array "$file" #Uses 'meta_array' array
unset metadata meta_key meta_value meta_array path_for_output
# Construct path for output file. This is the same path that the converted
# document is written to, in the loop above.
local path_for_output=$(realpath --relative-to="${BASE_PATH}/source" "$file")
path_for_output="${BASE_PATH}/output/${path_for_output}"
path_for_output="$(dirname $path_for_output)/$(basename $path_for_output .md).html"
}
add_header_and_footer "$path_for_output" # Uses 'meta_array' array
replace_vars "$path_for_output" #Uses 'meta_array' array
md_to_html() {
# Convert .md files from 'source' and place them into the correct locations into 'output'
local files=$(find "$BASE_PATH/source" -name "*.md")
unset metadata meta_key meta_value meta_array
# Concurrently convert each document
for file in $files; do
(convert_file "$file") &
unset metadata path_for_output
done
}
# Wait for all documents to finish converting, then remove all temporary files.
wait
rm -rf "$BASE_PATH"/temp/*
}
gen_sorted_file_list() { # Generate a list of the HTMl files, sorted by when they were last modified (read from the contents of the HTML file)
local files=$(find "$BASE_PATH/output" -name "*.html")

Loading…
Cancel
Save