In this example, all files ending in doseweighted.mrc are listed in a text document called doseweighted.txt
find . -maxdepth 1 -type f -name '*doseweighted.mrc' | sort -t '_' -k3,3n > doseweighted.txt
Will read from the previous file to create a directory of symbolic links (good for TOPAZ or crYOLO if the directory is too big). In this example, a directory of 4K links
#!/usr/bin/env bash
set -euo pipefail
# ---------------------- CONFIGURATION ----------------------
# 1. Path to your sorted list file (adjust this if needed)
SORTED_LIST="doseweighted.txt"
# 2. Directory where the original files actually reside (where you ran 'find .')
# This is used to construct the absolute source path.
SRC_DIR="/data/path/to/source"
# 3. Destination folder for the symlinks, add the new directory at the end
DEST_ROOT="/data/path/to/destination/new_directory_name"
# 4. Chunk definitions
START_LINE=1
END_LINE=4000
CHUNK_SIZE=$((END_LINE - START_LINE + 1))
# -----------------------------------------------------------
echo "Starting symlink creation for files ${START_LINE} to ${END_LINE}..."
# Check if the list file exists
if [ ! -f "${SORTED_LIST}" ]; then
echo "ERROR: Sorted list file '${SORTED_LIST}' not found in the current directory."
exit 1
fi
# 1. Read the required chunk from the text file
# We use tail to start at the right line, and head to limit the size.
tail -n +"${START_LINE}" "${SORTED_LIST}" | \\
head -n "${CHUNK_SIZE}" | \\
while IFS= read -r relpath; do
# Path Handling:
# relpath is relative (e.g., ./file_00001_...)
# Source: The path is correct relative to the 'SRC_DIR'
SOURCE_PATH="${SRC_DIR}/${relpath#./}"
# Destination: Remove the leading './' from $relpath before appending it to the root
DEST_REL_PATH="${relpath#./}"
DEST_PATH="${DEST_ROOT}/$DEST_REL_PATH"
# Symlink Creation:
# Create the necessary subdirectory structure
mkdir -p "$(dirname "$DEST_PATH")"
# Create the absolute symlink
ln -s "$SOURCE_PATH" "$DEST_PATH"
done
echo "Successfully linked ${CHUNK_SIZE} files to ${DEST_ROOT}."