Fixed issues with concurrent document conversion

Before, the documents all used to have the same content due to a bug in the concurrent conversion. I fixed this by creating a function, 'convert_file()', that converts an individual document. I then called this function concurrently for each file that I want to convert. To prevent race conditions, where two parallel conversions access the same file, I ensured that each file gets placed into its own unique directory, with a randomly-generated name.
2024-04-29 17:56:58 -04:00
parent 0023d8ec7f
commit 1f05a69106
1 changed files with 67 additions and 58 deletions
--- a/build.sh
+++ b/build.sh
@@ -15,6 +15,15 @@
 #  two directories: 'source' (which holds your Markdown files) and 'output' (which holds the
 #  converted CSS.
 #
+#  --------------------------------------------------------------------------
+#  IMPORTANT - Each Markdown file MUST be in it's own directory!
+#  --------------------------------------------------------------------------
+#  This is because of the way I handle conversion. The conversion occurs 
+#  concurrently, and each file is copied into the same temporary location. 
+#  Each file is placed into a directory with the same name as the parent 
+#  directory.
+#  --------------------------------------------------------------------------
+#
 #  In addition to these directories, three files are needed in 'website':
 #  1. 'header.html' - A header, which is prepended to every source file. Unfortunately, this must
 #		      be written in regular HTML.
@@ -98,39 +107,41 @@ add_date_to_array() {
 }

 add_header_and_footer() {
-#	Copy header to temporary location
-	cp "$BASE_PATH/header.html" "$BASE_PATH/temp/temp_header.html"
+#	Copy header to temporary location - 'parent_dir' is used to ensure that
+#	each temporary header is in its own directory
+	cp "$BASE_PATH/header.html" "$BASE_PATH/temp/$parent_dir/temp_header.html"

 #	Check for relevant metadata, and perform corresponding action
 	if [[ ! -v "meta_array["date"]" ]]; then # If there is no date
-		sed -i '$ d' "$BASE_PATH/temp/temp_header.html" # remove the 'date published' section of the header
+		sed -i '$ d' "$BASE_PATH/temp/$parent_dir/temp_header.html" # remove the 'date published' section of the header
 	fi

 	if [[ "${meta_array[noappend]-}" == "true" ]]; then
-		sed -i 's/ - Two More Cents//g' "$BASE_PATH/temp/temp_header.html" # 'noappend' removes the suffix from the title
+		sed -i 's/ - Two More Cents//g' "$BASE_PATH/temp/$parent_dir/temp_header.html" # 'noappend' removes the suffix from the title
 	fi

 #	Add header
-	cat "$BASE_PATH/temp/temp_header.html" | cat - "$1" > "$BASE_PATH/temp/temp.html"
+	cat "$BASE_PATH/temp/$parent_dir/temp_header.html" | cat - "$1" > "$BASE_PATH/temp/$parent_dir/temp.html"

 #	Add footer
-	echo >> "$BASE_PATH/temp/temp.html" # Add newline
-	cat "$BASE_PATH/footer.html" >> "$BASE_PATH/temp/temp.html"
+	echo >> "$BASE_PATH/temp/$parent_dir/temp.html" # Add newline
+	cat "$BASE_PATH/footer.html" >> "$BASE_PATH/temp/$parent_dir/temp.html"

 #	Move temp file to original location
-	mv "$BASE_PATH/temp/temp.html" "$1"
+	mv "$BASE_PATH/temp/$parent_dir/temp.html" "$1"
 }

 add_header_and_footer_to_index() {
+	mkdir "$BASE_PATH/temp/index_page"
 #	Add header
-	cat "$BASE_PATH/header.html" | head -n -1 | cat - "$1" > "$BASE_PATH/temp/temp.html" # For the index page, remove the last line of the header (date published)
+	cat "$BASE_PATH/header.html" | head -n -1 | cat - "$1" > "$BASE_PATH/temp/index_page/temp.html" # For the index page, remove the last line of the header (date published)

 #	Add footer
- 	echo >> "$BASE_PATH/temp/temp.html" # Add newline
-	cat "$BASE_PATH/footer.html" >> "$BASE_PATH/temp/temp.html"
+ 	echo >> "$BASE_PATH/temp/index_page/temp.html" # Add newline
+	cat "$BASE_PATH/footer.html" >> "$BASE_PATH/temp/index_page/temp.html"

 #	Move temp file to original location
-	mv "$BASE_PATH/temp/temp.html" "$1"
+	mv "$BASE_PATH/temp/index_page/temp.html" "$1"
 }

 replace_vars() {
@@ -142,63 +153,61 @@ replace_vars() {

 }

+convert_file() {
+#	Helper function for md_to_html(). It takes in the file to convert as an argument,
+#	and converts that file.
+	file_to_conv="$1"
+	echo "Converting $file_to_conv"
+
+	read_metadata "$file_to_conv" # Sets the 'metadata' variable
+
+#	Generate a random 8-character alphabetic string, until we find
+#	one that doesn't exist in the 'temp' directory. This string
+#	will serve as the parent directory of our file.
+	parent_dir="$(tr -dc A-Za-z </dev/urandom | head -c 8; echo)"
+	while ls -r temp | grep -q "$parent_dir" ; do
+		parent_dir="$(tr -dc A-Za-z </dev/urandom | head -c 8; echo)"
+	done
+
+#	Copy file to temp dir and strip metadata
+	mkdir -p "$BASE_PATH/temp/$parent_dir/"
+	cp "$file_to_conv" "$BASE_PATH/temp/$parent_dir/"
+	let num_lines=$(echo "$metadata" | wc -l)+1
+	sed -i "1,${num_lines}d" "$BASE_PATH/temp/$parent_dir/$(basename "$file_to_conv")"
+
+#	Construct path for output file
+	local path_for_output=$(realpath --relative-to="${BASE_PATH}/source" "$file_to_conv")
+	path_for_output="${BASE_PATH}/output/${path_for_output}"
+	path_for_output="$(dirname $path_for_output)/$(basename $path_for_output .md).html"
+
+#	Convert the file (using the given filters), and place the output in the correct location.
+	pandoc --lua-filter "$BASE_PATH"/pandoc_filters/* -f markdown --wrap=preserve "$BASE_PATH/temp/$parent_dir/$(basename "$file_to_conv")" > "${path_for_output}"
+
+	convert_to_array "$metadata" #Sets the 'meta_array' array
+
+	add_date_to_array "$file_to_conv" #Uses 'meta_array' array
+	add_header_and_footer "$path_for_output" # Uses 'meta_array' array and 'parent_dir' variable
+	replace_vars "$path_for_output" #Uses 'meta_array' array
+
+	unset metadata meta_key meta_value meta_array path_for_output
+
+}
+
 md_to_html() {
 #	Convert .md files from 'source' and place them into the correct locations into 'output'
-#	Since the conversion is the slowest part of the script, I run each conversion process
-#	in a separate thread (first loop). Then I wait for all of them to finish, and add the 
-#	header and footers and replace the variables in each converted document (second loop).
 	local files=$(find "$BASE_PATH/source" -name "*.md")

+#	Concurrently convert each document
 	for file in $files; do
-
-		echo "Converting $file"
-
-		read_metadata "$file" # Sets the 'metadata' variable
-
-#		Copy file to temp dir and strip metadata
-		cp "$file" "$BASE_PATH/temp/"
-		let num_lines=$(echo "$metadata" | wc -l)+1
-		sed -i "1,${num_lines}d" "$BASE_PATH/temp/$(basename "$file")"
-
-#		Construct path for output file
-		local path_for_output=$(realpath --relative-to="${BASE_PATH}/source" "$file")
-		path_for_output="${BASE_PATH}/output/${path_for_output}"
-		path_for_output="$(dirname $path_for_output)/$(basename $path_for_output .md).html"
-
-#		Convert the file (using the given filters), and place the output in the correct location.
-#		The process is forked, so that all documents can be converted concurrently.
-		(pandoc --lua-filter "$BASE_PATH"/pandoc_filters/* -f markdown --wrap=preserve "$BASE_PATH/temp/$(basename "$file")" > "${path_for_output}") &
-
-		unset metadata
+		(convert_file "$file") &
+		unset metadata path_for_output
 	done

 #	Wait for all documents to finish converting, then remove all temporary files.
 	wait
-	rm "$BASE_PATH"/temp/*
-	
-	for file in $files; do
-#		This function was already called above in the previous loop.
-#		However, I need to call it again, since I need the metadata 
-#		again for each file to create the 'meta_array' variable.
-		read_metadata "$file" # Sets the 'metadata' variable
-
-		convert_to_array "$metadata" #Sets the 'meta_array' array
-		add_date_to_array "$file" #Uses 'meta_array' array
-
-#		Construct path for output file. This is the same path that the converted
-#		document is written to, in the loop above.
-		local path_for_output=$(realpath --relative-to="${BASE_PATH}/source" "$file")
-		path_for_output="${BASE_PATH}/output/${path_for_output}"
-		path_for_output="$(dirname $path_for_output)/$(basename $path_for_output .md).html"
-
-		add_header_and_footer "$path_for_output" # Uses 'meta_array' array
-		replace_vars "$path_for_output" #Uses 'meta_array' array
-
-		unset metadata meta_key meta_value meta_array
-	done
+	rm -rf "$BASE_PATH"/temp/*
 }

-
 gen_sorted_file_list() { # Generate a list of the HTMl files, sorted by when they were last modified (read from the contents of the HTML file)
 	local files=$(find "$BASE_PATH/output" -name "*.html")
 	local date_mod