Made the document conversion multithreaded, to speed up the script (50% reduction in time)

master
Aadhavan Srinivasan
parent 554071f7f4
commit 0023d8ec7f

@ -144,7 +144,9 @@ replace_vars() {
md_to_html() { md_to_html() {
# Convert .md files from 'source' and place them into the correct locations into 'output' # Convert .md files from 'source' and place them into the correct locations into 'output'
# Since the conversion is the slowest part of the script, I run each conversion process
# in a separate thread (first loop). Then I wait for all of them to finish, and add the
# header and footers and replace the variables in each converted document (second loop).
local files=$(find "$BASE_PATH/source" -name "*.md") local files=$(find "$BASE_PATH/source" -name "*.md")
for file in $files; do for file in $files; do
@ -153,9 +155,6 @@ md_to_html() {
read_metadata "$file" # Sets the 'metadata' variable read_metadata "$file" # Sets the 'metadata' variable
convert_to_array "$metadata" #Sets the 'meta_array' array
add_date_to_array "$file" #Uses 'meta_array' array
# Copy file to temp dir and strip metadata # Copy file to temp dir and strip metadata
cp "$file" "$BASE_PATH/temp/" cp "$file" "$BASE_PATH/temp/"
let num_lines=$(echo "$metadata" | wc -l)+1 let num_lines=$(echo "$metadata" | wc -l)+1
@ -166,10 +165,32 @@ md_to_html() {
path_for_output="${BASE_PATH}/output/${path_for_output}" path_for_output="${BASE_PATH}/output/${path_for_output}"
path_for_output="$(dirname $path_for_output)/$(basename $path_for_output .md).html" path_for_output="$(dirname $path_for_output)/$(basename $path_for_output .md).html"
# Convert the file (using the given filters), and place the output in the correct location # Convert the file (using the given filters), and place the output in the correct location.
pandoc --lua-filter "$BASE_PATH"/pandoc_filters/* -f markdown --wrap=preserve "$BASE_PATH/temp/$(basename "$file")" > "${path_for_output}" # The process is forked, so that all documents can be converted concurrently.
(pandoc --lua-filter "$BASE_PATH"/pandoc_filters/* -f markdown --wrap=preserve "$BASE_PATH/temp/$(basename "$file")" > "${path_for_output}") &
unset metadata
done
# Wait for all documents to finish converting, then remove all temporary files.
wait
rm "$BASE_PATH"/temp/* rm "$BASE_PATH"/temp/*
for file in $files; do
# This function was already called above in the previous loop.
# However, I need to call it again, since I need the metadata
# again for each file to create the 'meta_array' variable.
read_metadata "$file" # Sets the 'metadata' variable
convert_to_array "$metadata" #Sets the 'meta_array' array
add_date_to_array "$file" #Uses 'meta_array' array
# Construct path for output file. This is the same path that the converted
# document is written to, in the loop above.
local path_for_output=$(realpath --relative-to="${BASE_PATH}/source" "$file")
path_for_output="${BASE_PATH}/output/${path_for_output}"
path_for_output="$(dirname $path_for_output)/$(basename $path_for_output .md).html"
add_header_and_footer "$path_for_output" # Uses 'meta_array' array add_header_and_footer "$path_for_output" # Uses 'meta_array' array
replace_vars "$path_for_output" #Uses 'meta_array' array replace_vars "$path_for_output" #Uses 'meta_array' array

Loading…
Cancel
Save