Fixed issues with concurrent document conversion
Before, the documents all used to have the same content due to a bug in the concurrent conversion. I fixed this by creating a function, 'convert_file()', that converts an individual document. I then called this function concurrently for each file that I want to convert. To prevent race conditions, where two parallel conversions access the same file, I ensured that each file gets placed into its own unique directory, with a randomly-generated name.
This commit is contained in:
		
							
								
								
									
										125
									
								
								build.sh
									
									
									
									
									
								
							
							
						
						
									
										125
									
								
								build.sh
									
									
									
									
									
								
							| @@ -15,6 +15,15 @@ | ||||
| #  two directories: 'source' (which holds your Markdown files) and 'output' (which holds the | ||||
| #  converted CSS. | ||||
| # | ||||
| #  -------------------------------------------------------------------------- | ||||
| #  IMPORTANT - Each Markdown file MUST be in it's own directory! | ||||
| #  -------------------------------------------------------------------------- | ||||
| #  This is because of the way I handle conversion. The conversion occurs  | ||||
| #  concurrently, and each file is copied into the same temporary location.  | ||||
| #  Each file is placed into a directory with the same name as the parent  | ||||
| #  directory. | ||||
| #  -------------------------------------------------------------------------- | ||||
| # | ||||
| #  In addition to these directories, three files are needed in 'website': | ||||
| #  1. 'header.html' - A header, which is prepended to every source file. Unfortunately, this must | ||||
| #		      be written in regular HTML. | ||||
| @@ -98,39 +107,41 @@ add_date_to_array() { | ||||
| } | ||||
|  | ||||
| add_header_and_footer() { | ||||
| #	Copy header to temporary location | ||||
| 	cp "$BASE_PATH/header.html" "$BASE_PATH/temp/temp_header.html" | ||||
| #	Copy header to temporary location - 'parent_dir' is used to ensure that | ||||
| #	each temporary header is in its own directory | ||||
| 	cp "$BASE_PATH/header.html" "$BASE_PATH/temp/$parent_dir/temp_header.html" | ||||
|  | ||||
| #	Check for relevant metadata, and perform corresponding action | ||||
| 	if [[ ! -v "meta_array["date"]" ]]; then # If there is no date | ||||
| 		sed -i '$ d' "$BASE_PATH/temp/temp_header.html" # remove the 'date published' section of the header | ||||
| 		sed -i '$ d' "$BASE_PATH/temp/$parent_dir/temp_header.html" # remove the 'date published' section of the header | ||||
| 	fi | ||||
|  | ||||
| 	if [[ "${meta_array[noappend]-}" == "true" ]]; then | ||||
| 		sed -i 's/ - Two More Cents//g' "$BASE_PATH/temp/temp_header.html" # 'noappend' removes the suffix from the title | ||||
| 		sed -i 's/ - Two More Cents//g' "$BASE_PATH/temp/$parent_dir/temp_header.html" # 'noappend' removes the suffix from the title | ||||
| 	fi | ||||
|  | ||||
| #	Add header | ||||
| 	cat "$BASE_PATH/temp/temp_header.html" | cat - "$1" > "$BASE_PATH/temp/temp.html" | ||||
| 	cat "$BASE_PATH/temp/$parent_dir/temp_header.html" | cat - "$1" > "$BASE_PATH/temp/$parent_dir/temp.html" | ||||
|  | ||||
| #	Add footer | ||||
| 	echo >> "$BASE_PATH/temp/temp.html" # Add newline | ||||
| 	cat "$BASE_PATH/footer.html" >> "$BASE_PATH/temp/temp.html" | ||||
| 	echo >> "$BASE_PATH/temp/$parent_dir/temp.html" # Add newline | ||||
| 	cat "$BASE_PATH/footer.html" >> "$BASE_PATH/temp/$parent_dir/temp.html" | ||||
|  | ||||
| #	Move temp file to original location | ||||
| 	mv "$BASE_PATH/temp/temp.html" "$1" | ||||
| 	mv "$BASE_PATH/temp/$parent_dir/temp.html" "$1" | ||||
| } | ||||
|  | ||||
| add_header_and_footer_to_index() { | ||||
| 	mkdir "$BASE_PATH/temp/index_page" | ||||
| #	Add header | ||||
| 	cat "$BASE_PATH/header.html" | head -n -1 | cat - "$1" > "$BASE_PATH/temp/temp.html" # For the index page, remove the last line of the header (date published) | ||||
| 	cat "$BASE_PATH/header.html" | head -n -1 | cat - "$1" > "$BASE_PATH/temp/index_page/temp.html" # For the index page, remove the last line of the header (date published) | ||||
|  | ||||
| #	Add footer | ||||
|  	echo >> "$BASE_PATH/temp/temp.html" # Add newline | ||||
| 	cat "$BASE_PATH/footer.html" >> "$BASE_PATH/temp/temp.html" | ||||
|  	echo >> "$BASE_PATH/temp/index_page/temp.html" # Add newline | ||||
| 	cat "$BASE_PATH/footer.html" >> "$BASE_PATH/temp/index_page/temp.html" | ||||
|  | ||||
| #	Move temp file to original location | ||||
| 	mv "$BASE_PATH/temp/temp.html" "$1" | ||||
| 	mv "$BASE_PATH/temp/index_page/temp.html" "$1" | ||||
| } | ||||
|  | ||||
| replace_vars() { | ||||
| @@ -142,63 +153,61 @@ replace_vars() { | ||||
|  | ||||
| } | ||||
|  | ||||
| convert_file() { | ||||
| #	Helper function for md_to_html(). It takes in the file to convert as an argument, | ||||
| #	and converts that file. | ||||
| 	file_to_conv="$1" | ||||
| 	echo "Converting $file_to_conv" | ||||
|  | ||||
| 	read_metadata "$file_to_conv" # Sets the 'metadata' variable | ||||
|  | ||||
| #	Generate a random 8-character alphabetic string, until we find | ||||
| #	one that doesn't exist in the 'temp' directory. This string | ||||
| #	will serve as the parent directory of our file. | ||||
| 	parent_dir="$(tr -dc A-Za-z </dev/urandom | head -c 8; echo)" | ||||
| 	while ls -r temp | grep -q "$parent_dir" ; do | ||||
| 		parent_dir="$(tr -dc A-Za-z </dev/urandom | head -c 8; echo)" | ||||
| 	done | ||||
|  | ||||
| #	Copy file to temp dir and strip metadata | ||||
| 	mkdir -p "$BASE_PATH/temp/$parent_dir/" | ||||
| 	cp "$file_to_conv" "$BASE_PATH/temp/$parent_dir/" | ||||
| 	let num_lines=$(echo "$metadata" | wc -l)+1 | ||||
| 	sed -i "1,${num_lines}d" "$BASE_PATH/temp/$parent_dir/$(basename "$file_to_conv")" | ||||
|  | ||||
| #	Construct path for output file | ||||
| 	local path_for_output=$(realpath --relative-to="${BASE_PATH}/source" "$file_to_conv") | ||||
| 	path_for_output="${BASE_PATH}/output/${path_for_output}" | ||||
| 	path_for_output="$(dirname $path_for_output)/$(basename $path_for_output .md).html" | ||||
|  | ||||
| #	Convert the file (using the given filters), and place the output in the correct location. | ||||
| 	pandoc --lua-filter "$BASE_PATH"/pandoc_filters/* -f markdown --wrap=preserve "$BASE_PATH/temp/$parent_dir/$(basename "$file_to_conv")" > "${path_for_output}" | ||||
|  | ||||
| 	convert_to_array "$metadata" #Sets the 'meta_array' array | ||||
|  | ||||
| 	add_date_to_array "$file_to_conv" #Uses 'meta_array' array | ||||
| 	add_header_and_footer "$path_for_output" # Uses 'meta_array' array and 'parent_dir' variable | ||||
| 	replace_vars "$path_for_output" #Uses 'meta_array' array | ||||
|  | ||||
| 	unset metadata meta_key meta_value meta_array path_for_output | ||||
|  | ||||
| } | ||||
|  | ||||
| md_to_html() { | ||||
| #	Convert .md files from 'source' and place them into the correct locations into 'output' | ||||
| #	Since the conversion is the slowest part of the script, I run each conversion process | ||||
| #	in a separate thread (first loop). Then I wait for all of them to finish, and add the  | ||||
| #	header and footers and replace the variables in each converted document (second loop). | ||||
| 	local files=$(find "$BASE_PATH/source" -name "*.md") | ||||
|  | ||||
| #	Concurrently convert each document | ||||
| 	for file in $files; do | ||||
|  | ||||
| 		echo "Converting $file" | ||||
|  | ||||
| 		read_metadata "$file" # Sets the 'metadata' variable | ||||
|  | ||||
| #		Copy file to temp dir and strip metadata | ||||
| 		cp "$file" "$BASE_PATH/temp/" | ||||
| 		let num_lines=$(echo "$metadata" | wc -l)+1 | ||||
| 		sed -i "1,${num_lines}d" "$BASE_PATH/temp/$(basename "$file")" | ||||
|  | ||||
| #		Construct path for output file | ||||
| 		local path_for_output=$(realpath --relative-to="${BASE_PATH}/source" "$file") | ||||
| 		path_for_output="${BASE_PATH}/output/${path_for_output}" | ||||
| 		path_for_output="$(dirname $path_for_output)/$(basename $path_for_output .md).html" | ||||
|  | ||||
| #		Convert the file (using the given filters), and place the output in the correct location. | ||||
| #		The process is forked, so that all documents can be converted concurrently. | ||||
| 		(pandoc --lua-filter "$BASE_PATH"/pandoc_filters/* -f markdown --wrap=preserve "$BASE_PATH/temp/$(basename "$file")" > "${path_for_output}") & | ||||
|  | ||||
| 		unset metadata | ||||
| 		(convert_file "$file") & | ||||
| 		unset metadata path_for_output | ||||
| 	done | ||||
|  | ||||
| #	Wait for all documents to finish converting, then remove all temporary files. | ||||
| 	wait | ||||
| 	rm "$BASE_PATH"/temp/* | ||||
| 	 | ||||
| 	for file in $files; do | ||||
| #		This function was already called above in the previous loop. | ||||
| #		However, I need to call it again, since I need the metadata  | ||||
| #		again for each file to create the 'meta_array' variable. | ||||
| 		read_metadata "$file" # Sets the 'metadata' variable | ||||
|  | ||||
| 		convert_to_array "$metadata" #Sets the 'meta_array' array | ||||
| 		add_date_to_array "$file" #Uses 'meta_array' array | ||||
|  | ||||
| #		Construct path for output file. This is the same path that the converted | ||||
| #		document is written to, in the loop above. | ||||
| 		local path_for_output=$(realpath --relative-to="${BASE_PATH}/source" "$file") | ||||
| 		path_for_output="${BASE_PATH}/output/${path_for_output}" | ||||
| 		path_for_output="$(dirname $path_for_output)/$(basename $path_for_output .md).html" | ||||
|  | ||||
| 		add_header_and_footer "$path_for_output" # Uses 'meta_array' array | ||||
| 		replace_vars "$path_for_output" #Uses 'meta_array' array | ||||
|  | ||||
| 		unset metadata meta_key meta_value meta_array | ||||
| 	done | ||||
| 	rm -rf "$BASE_PATH"/temp/* | ||||
| } | ||||
|  | ||||
|  | ||||
| gen_sorted_file_list() { # Generate a list of the HTMl files, sorted by when they were last modified (read from the contents of the HTML file) | ||||
| 	local files=$(find "$BASE_PATH/output" -name "*.html") | ||||
| 	local date_mod | ||||
|   | ||||
		Reference in New Issue
	
	Block a user