Fixed issues with concurrent document conversion
Before, the documents all used to have the same content due to a bug in the concurrent conversion. I fixed this by creating a function, 'convert_file()', that converts an individual document. I then called this function concurrently for each file that I want to convert. To prevent race conditions, where two parallel conversions access the same file, I ensured that each file gets placed into its own unique directory, with a randomly-generated name.
This commit is contained in:
125
build.sh
125
build.sh
@@ -15,6 +15,15 @@
|
||||
# two directories: 'source' (which holds your Markdown files) and 'output' (which holds the
|
||||
# converted CSS.
|
||||
#
|
||||
# --------------------------------------------------------------------------
|
||||
# IMPORTANT - Each Markdown file MUST be in it's own directory!
|
||||
# --------------------------------------------------------------------------
|
||||
# This is because of the way I handle conversion. The conversion occurs
|
||||
# concurrently, and each file is copied into the same temporary location.
|
||||
# Each file is placed into a directory with the same name as the parent
|
||||
# directory.
|
||||
# --------------------------------------------------------------------------
|
||||
#
|
||||
# In addition to these directories, three files are needed in 'website':
|
||||
# 1. 'header.html' - A header, which is prepended to every source file. Unfortunately, this must
|
||||
# be written in regular HTML.
|
||||
@@ -98,39 +107,41 @@ add_date_to_array() {
|
||||
}
|
||||
|
||||
add_header_and_footer() {
|
||||
# Copy header to temporary location
|
||||
cp "$BASE_PATH/header.html" "$BASE_PATH/temp/temp_header.html"
|
||||
# Copy header to temporary location - 'parent_dir' is used to ensure that
|
||||
# each temporary header is in its own directory
|
||||
cp "$BASE_PATH/header.html" "$BASE_PATH/temp/$parent_dir/temp_header.html"
|
||||
|
||||
# Check for relevant metadata, and perform corresponding action
|
||||
if [[ ! -v "meta_array["date"]" ]]; then # If there is no date
|
||||
sed -i '$ d' "$BASE_PATH/temp/temp_header.html" # remove the 'date published' section of the header
|
||||
sed -i '$ d' "$BASE_PATH/temp/$parent_dir/temp_header.html" # remove the 'date published' section of the header
|
||||
fi
|
||||
|
||||
if [[ "${meta_array[noappend]-}" == "true" ]]; then
|
||||
sed -i 's/ - Two More Cents//g' "$BASE_PATH/temp/temp_header.html" # 'noappend' removes the suffix from the title
|
||||
sed -i 's/ - Two More Cents//g' "$BASE_PATH/temp/$parent_dir/temp_header.html" # 'noappend' removes the suffix from the title
|
||||
fi
|
||||
|
||||
# Add header
|
||||
cat "$BASE_PATH/temp/temp_header.html" | cat - "$1" > "$BASE_PATH/temp/temp.html"
|
||||
cat "$BASE_PATH/temp/$parent_dir/temp_header.html" | cat - "$1" > "$BASE_PATH/temp/$parent_dir/temp.html"
|
||||
|
||||
# Add footer
|
||||
echo >> "$BASE_PATH/temp/temp.html" # Add newline
|
||||
cat "$BASE_PATH/footer.html" >> "$BASE_PATH/temp/temp.html"
|
||||
echo >> "$BASE_PATH/temp/$parent_dir/temp.html" # Add newline
|
||||
cat "$BASE_PATH/footer.html" >> "$BASE_PATH/temp/$parent_dir/temp.html"
|
||||
|
||||
# Move temp file to original location
|
||||
mv "$BASE_PATH/temp/temp.html" "$1"
|
||||
mv "$BASE_PATH/temp/$parent_dir/temp.html" "$1"
|
||||
}
|
||||
|
||||
add_header_and_footer_to_index() {
|
||||
mkdir "$BASE_PATH/temp/index_page"
|
||||
# Add header
|
||||
cat "$BASE_PATH/header.html" | head -n -1 | cat - "$1" > "$BASE_PATH/temp/temp.html" # For the index page, remove the last line of the header (date published)
|
||||
cat "$BASE_PATH/header.html" | head -n -1 | cat - "$1" > "$BASE_PATH/temp/index_page/temp.html" # For the index page, remove the last line of the header (date published)
|
||||
|
||||
# Add footer
|
||||
echo >> "$BASE_PATH/temp/temp.html" # Add newline
|
||||
cat "$BASE_PATH/footer.html" >> "$BASE_PATH/temp/temp.html"
|
||||
echo >> "$BASE_PATH/temp/index_page/temp.html" # Add newline
|
||||
cat "$BASE_PATH/footer.html" >> "$BASE_PATH/temp/index_page/temp.html"
|
||||
|
||||
# Move temp file to original location
|
||||
mv "$BASE_PATH/temp/temp.html" "$1"
|
||||
mv "$BASE_PATH/temp/index_page/temp.html" "$1"
|
||||
}
|
||||
|
||||
replace_vars() {
|
||||
@@ -142,63 +153,61 @@ replace_vars() {
|
||||
|
||||
}
|
||||
|
||||
convert_file() {
|
||||
# Helper function for md_to_html(). It takes in the file to convert as an argument,
|
||||
# and converts that file.
|
||||
file_to_conv="$1"
|
||||
echo "Converting $file_to_conv"
|
||||
|
||||
read_metadata "$file_to_conv" # Sets the 'metadata' variable
|
||||
|
||||
# Generate a random 8-character alphabetic string, until we find
|
||||
# one that doesn't exist in the 'temp' directory. This string
|
||||
# will serve as the parent directory of our file.
|
||||
parent_dir="$(tr -dc A-Za-z </dev/urandom | head -c 8; echo)"
|
||||
while ls -r temp | grep -q "$parent_dir" ; do
|
||||
parent_dir="$(tr -dc A-Za-z </dev/urandom | head -c 8; echo)"
|
||||
done
|
||||
|
||||
# Copy file to temp dir and strip metadata
|
||||
mkdir -p "$BASE_PATH/temp/$parent_dir/"
|
||||
cp "$file_to_conv" "$BASE_PATH/temp/$parent_dir/"
|
||||
let num_lines=$(echo "$metadata" | wc -l)+1
|
||||
sed -i "1,${num_lines}d" "$BASE_PATH/temp/$parent_dir/$(basename "$file_to_conv")"
|
||||
|
||||
# Construct path for output file
|
||||
local path_for_output=$(realpath --relative-to="${BASE_PATH}/source" "$file_to_conv")
|
||||
path_for_output="${BASE_PATH}/output/${path_for_output}"
|
||||
path_for_output="$(dirname $path_for_output)/$(basename $path_for_output .md).html"
|
||||
|
||||
# Convert the file (using the given filters), and place the output in the correct location.
|
||||
pandoc --lua-filter "$BASE_PATH"/pandoc_filters/* -f markdown --wrap=preserve "$BASE_PATH/temp/$parent_dir/$(basename "$file_to_conv")" > "${path_for_output}"
|
||||
|
||||
convert_to_array "$metadata" #Sets the 'meta_array' array
|
||||
|
||||
add_date_to_array "$file_to_conv" #Uses 'meta_array' array
|
||||
add_header_and_footer "$path_for_output" # Uses 'meta_array' array and 'parent_dir' variable
|
||||
replace_vars "$path_for_output" #Uses 'meta_array' array
|
||||
|
||||
unset metadata meta_key meta_value meta_array path_for_output
|
||||
|
||||
}
|
||||
|
||||
md_to_html() {
|
||||
# Convert .md files from 'source' and place them into the correct locations into 'output'
|
||||
# Since the conversion is the slowest part of the script, I run each conversion process
|
||||
# in a separate thread (first loop). Then I wait for all of them to finish, and add the
|
||||
# header and footers and replace the variables in each converted document (second loop).
|
||||
local files=$(find "$BASE_PATH/source" -name "*.md")
|
||||
|
||||
# Concurrently convert each document
|
||||
for file in $files; do
|
||||
|
||||
echo "Converting $file"
|
||||
|
||||
read_metadata "$file" # Sets the 'metadata' variable
|
||||
|
||||
# Copy file to temp dir and strip metadata
|
||||
cp "$file" "$BASE_PATH/temp/"
|
||||
let num_lines=$(echo "$metadata" | wc -l)+1
|
||||
sed -i "1,${num_lines}d" "$BASE_PATH/temp/$(basename "$file")"
|
||||
|
||||
# Construct path for output file
|
||||
local path_for_output=$(realpath --relative-to="${BASE_PATH}/source" "$file")
|
||||
path_for_output="${BASE_PATH}/output/${path_for_output}"
|
||||
path_for_output="$(dirname $path_for_output)/$(basename $path_for_output .md).html"
|
||||
|
||||
# Convert the file (using the given filters), and place the output in the correct location.
|
||||
# The process is forked, so that all documents can be converted concurrently.
|
||||
(pandoc --lua-filter "$BASE_PATH"/pandoc_filters/* -f markdown --wrap=preserve "$BASE_PATH/temp/$(basename "$file")" > "${path_for_output}") &
|
||||
|
||||
unset metadata
|
||||
(convert_file "$file") &
|
||||
unset metadata path_for_output
|
||||
done
|
||||
|
||||
# Wait for all documents to finish converting, then remove all temporary files.
|
||||
wait
|
||||
rm "$BASE_PATH"/temp/*
|
||||
|
||||
for file in $files; do
|
||||
# This function was already called above in the previous loop.
|
||||
# However, I need to call it again, since I need the metadata
|
||||
# again for each file to create the 'meta_array' variable.
|
||||
read_metadata "$file" # Sets the 'metadata' variable
|
||||
|
||||
convert_to_array "$metadata" #Sets the 'meta_array' array
|
||||
add_date_to_array "$file" #Uses 'meta_array' array
|
||||
|
||||
# Construct path for output file. This is the same path that the converted
|
||||
# document is written to, in the loop above.
|
||||
local path_for_output=$(realpath --relative-to="${BASE_PATH}/source" "$file")
|
||||
path_for_output="${BASE_PATH}/output/${path_for_output}"
|
||||
path_for_output="$(dirname $path_for_output)/$(basename $path_for_output .md).html"
|
||||
|
||||
add_header_and_footer "$path_for_output" # Uses 'meta_array' array
|
||||
replace_vars "$path_for_output" #Uses 'meta_array' array
|
||||
|
||||
unset metadata meta_key meta_value meta_array
|
||||
done
|
||||
rm -rf "$BASE_PATH"/temp/*
|
||||
}
|
||||
|
||||
|
||||
gen_sorted_file_list() { # Generate a list of the HTMl files, sorted by when they were last modified (read from the contents of the HTML file)
|
||||
local files=$(find "$BASE_PATH/output" -name "*.html")
|
||||
local date_mod
|
||||
|
Reference in New Issue
Block a user