#!/bin/bash

function print_help_and_exit
{
cat >&2 << 'EOF'

'voronota-pocket' script provides a way for identifying and describing
pockets, cavities and channels using the Voronoi tessellation vertices.

Options:
    --input | -i                   string   *  input structure file in PDB or mmCIF format
    --input-filter-query           string      input atoms filtering query, default is '--match-tags-not het'
    --probe-min                    number      scanning probe radius minimum, default is 2.0
    --probe-max                    number      scanning probe radius maximum, default is 30.0
    --buriedness-core              number      buriedness minimum for pocket start, default is 0.7
    --buriedness-rim               number      buriedness maximum for pocket end, default is 0.4
    --subpockets                   number      number of sorted subpockets to include, default is 999999
    --tangent-leeway               number      tangent sphere radius expansion for atom checks, default is 0.5
    --tangent-radius-min           number      tanget sphere radius minimum, default is 1.4
    --tetrahedron-edge-max         number      tetrahedron edge maximum, default is 999999
    --voxelization-factor          number      voxelization factor, default is 1.0
    --output-atoms                 string      file to output analyzed atoms with annotations
    --output-buriedness-pdb        string      file to output PDB file with buriedness in b-factors
    --output-pocketness-pdb        string      file to output PDB file with pocketness in b-factors
    --output-vertices              string      file to output Voronoi vertices of pocket
    --output-voxels-pdb            string      file to output voxels as PDB file
    --draw-tetrahedrons            string      file to output PyMol script for drawing pocket tetrahedrons
    --draw-spheres                 string      file to output PyMol script for drawing pocket spheres
    --output-log                   string      file to output detailed log on calculations
    --output-header                            flag to output header before result line
    --help | -h                                flag to display help message and exit

Standard output (one line):
    {input file} {max buriedness} {all atoms} {all vertices} {pocket atoms} {pocket vertices} {volume of pocket tetrahedrons}

Suggested parameters
	for pocket analysis:
		--probe-min 2 --probe-max 30 --buriedness-core 0.7 --buriedness-rim 0.4
	for large channel analysis:
		--probe-min 5 --probe-max 30 --buriedness-core 0.8 --buriedness-rim 0.6

Other suggestions:
	For large pockets or channels the spheres drawing option may
	produce very large files (too large for PyMol).

EOF
exit 1
}

readonly ZEROARG=$0

if [[ $ZEROARG == *"/"* ]]
then
	cd $(dirname $ZEROARG)
	export PATH=$(pwd):$PATH
	cd - &> /dev/null
fi

command -v voronota &> /dev/null || { echo >&2 "Error: 'voronota' executable not in binaries path"; exit 1; }
command -v voronota-resources &> /dev/null || { echo >&2 "Error: 'voronota-resources' executable not in binaries path"; exit 1; }
command -v jq &> /dev/null || { echo >&2 "Error: 'jq' executable not in binaries path"; exit 1; }
command -v bc &> /dev/null || { echo >&2 "Error: 'bc' executable not in binaries path"; exit 1; }

INFILE=""
INPUT_FILTER_QUERY="--match-tags-not het"
PROBE_MIN="2.0"
PROBE_MAX="30.0"
BURIEDNESS_CORE="0.7"
BURIEDNESS_RIM="0.4"
SUBPOCKETS="999999"
TANGENT_LEEWAY="0.5"
TANGENT_RADIUS_MIN="1.4"
TETRAHEDRON_EDGE_MAX="999999"
VOXELIZATION_FACTOR="1.0"
OUTPUT_ATOMS=""
OUTPUT_BURIEDNESS_PDB=""
OUTPUT_POCKETNESS_PDB=""
OUTPUT_VERTICES=""
OUTPUT_VOXELS_PDB=""
DRAW_TETRAHEDRONS=""
DRAW_SPHERES=""
OUTPUT_LOG=""
OUTPUT_HEADER=false
HELP_MODE=false

while [[ $# > 0 ]]
do
	OPTION="$1"
	OPTARG="$2"
	shift
	case $OPTION in
	-i|--input)
		INFILE="$OPTARG"
		shift
		;;
	--input-filter-query)
		INPUT_FILTER_QUERY="$OPTARG"
		shift
		;;
	--probe-min)
		PROBE_MIN="$OPTARG"
		shift
		;;
	--probe-max)
		PROBE_MAX="$OPTARG"
		shift
		;;
	--buriedness-core)
		BURIEDNESS_CORE="$OPTARG"
		shift
		;;
	--buriedness-rim)
		BURIEDNESS_RIM="$OPTARG"
		shift
		;;
	--subpockets)
		SUBPOCKETS="$OPTARG"
		shift
		;;
	--tangent-leeway)
		TANGENT_LEEWAY="$OPTARG"
		shift
		;;
	--tangent-radius-min)
		TANGENT_RADIUS_MIN="$OPTARG"
		shift
		;;
	--tetrahedron-edge-max)
		TETRAHEDRON_EDGE_MAX="$OPTARG"
		shift
		;;
	--voxelization-factor)
		VOXELIZATION_FACTOR="$OPTARG"
		shift
		;;
	--output-atoms)
		OUTPUT_ATOMS="$OPTARG"
		shift
		;;
	--output-buriedness-pdb)
		OUTPUT_BURIEDNESS_PDB="$OPTARG"
		shift
		;;
	--output-pocketness-pdb)
		OUTPUT_POCKETNESS_PDB="$OPTARG"
		shift
		;;
	--output-vertices)
		OUTPUT_VERTICES="$OPTARG"
		shift
		;;
	--output-voxels-pdb)
		OUTPUT_VOXELS_PDB="$OPTARG"
		shift
		;;
	--draw-tetrahedrons)
		DRAW_TETRAHEDRONS="$OPTARG"
		shift
		;;
	--draw-spheres)
		DRAW_SPHERES="$OPTARG"
		shift
		;;
	--output-log)
		OUTPUT_LOG="$OPTARG"
		shift
		;;
	--output-header)
		OUTPUT_HEADER=true
		;;
	-h|--help)
		HELP_MODE=true
		;;
	*)
		echo >&2 "Error: invalid command line option '$OPTION'"
		exit 1
		;;
	esac
done

if [ -z "$INFILE" ] || $HELP_MODE
then
	print_help_and_exit
fi

if [ ! -s "$INFILE" ]
then
	echo >&2 "Error: input file does not exist"
	exit 1
fi

if [ "$(echo "$BURIEDNESS_CORE <= 0.0" | bc -l)" == "1" ] || [ "$(echo "$BURIEDNESS_CORE > 1.0" | bc -l)" == "1" ]
then
	echo >&2 "Error: buriedness core value '$BURIEDNESS_CORE' not in range (0,1]"
	exit 1
fi

if [ "$(echo "$BURIEDNESS_RIM <= 0.0" | bc -l)" == "1" ] || [ "$(echo "$BURIEDNESS_RIM > $BURIEDNESS_CORE" | bc -l)" == "1" ]
then
	echo >&2 "Error: buriedness rim value '$BURIEDNESS_RIM' not in range (0,$BURIEDNESS_CORE] (rim value must be not greater than core value)"
	exit 1
fi

if [ "$(echo "$PROBE_MIN <= 0.0" | bc -l)" == "1" ] || [ "$(echo "$PROBE_MIN > $PROBE_MAX" | bc -l)" == "1" ]
then
	echo >&2 "Error: probe min value '$PROBE_MIN' not in range (0,$PROBE_MAX] (probe min value must be not greater than probe max value)"
	exit 1
fi

if [ "$(echo "$SUBPOCKETS < 1" | bc -l)" == "1" ]
then
	echo >&2 "Error: number of subpockets '$SUBPOCKETS' is less than 1"
	exit 1
fi

if [ "$(echo "$TANGENT_LEEWAY < 0.0" | bc -l)" == "1" ] || [ "$(echo "$TANGENT_LEEWAY > 1" | bc -l)" == "1" ]
then
	echo >&2 "Error: tangent leeway value '$PROBE_MIN' not in range [0,1]"
	exit 1
fi

if [ "$(echo "$TANGENT_RADIUS_MIN <= 0.0" | bc -l)" == "1" ] || [ "$(echo "$TANGENT_RADIUS_MIN > $PROBE_MAX" | bc -l)" == "1" ]
then
	echo >&2 "Error: tangent radius min value '$TANGENT_RADIUS_MIN' not in range (0,$PROBE_MAX] (tangent radius min value must be not greater than probe max value)"
	exit 1
fi

if [ "$(echo "$TETRAHEDRON_EDGE_MAX <= $TANGENT_RADIUS_MIN" | bc -l)" == "1" ]
then
	echo >&2 "Error: tetrahedron edge max value '$TETRAHEDRON_EDGE_MAX' is less than tangent radius min value '$TANGENT_RADIUS_MIN'"
	exit 1
fi

if [ "$(echo "$VOXELIZATION_FACTOR < 0.4" | bc -l)" == "1" ] || [ "$(echo "$VOXELIZATION_FACTOR > 1" | bc -l)" == "1" ]
then
	echo >&2 "Error: voxelization factor value '$VOXELIZATION_FACTOR' not in range [0.4,1.0]"
	exit 1
fi

if [ -n "$DRAW_TETRAHEDRONS" ] && [[ "$DRAW_TETRAHEDRONS" != *".py" ]]
then
	echo >&2 "Error: terahedrons drawing script file name '$DRAW_TETRAHEDRONS' does not end with '.py'"
	exit 1
fi

if [ -n "$DRAW_SPHERES" ] && [[ "$DRAW_SPHERES" != *".py" ]]
then
	echo >&2 "Error: spheres drawing script file name '$DRAW_SPHERES' does not end with '.py'"
	exit 1
fi

readonly TMPLDIR=$(mktemp -d)
trap "rm -r $TMPLDIR" EXIT

{
	if [[ "$INFILE" == *".gz" ]]
	then
		zcat "$INFILE"
	else
		cat "$INFILE"
	fi
} \
| voronota get-balls-from-atoms-file \
  --annotated \
  --input-format detect \
  --radii-file <(voronota-resources radii) \
  --include-heteroatoms \
| voronota query-balls \
  --drop-altloc-indicators \
| voronota query-balls $INPUT_FILTER_QUERY \
> $TMPLDIR/balls

if [ ! -s "$TMPLDIR/balls" ]
then
	echo >&2 "Error: no atoms in input file '$INFILE'"
	exit 1
fi

{
cat << EOF
import -format plain -file $TMPLDIR/balls
construct-contacts
describe-exposure -probe-min $PROBE_MIN -probe-max $PROBE_MAX -adj-atom-exposure-value buriedness -weight-power 3 -expansion $TANGENT_LEEWAY -smoothing-iterations 3 -smoothing-depth 1
spectrum-atoms -adjunct buriedness -only-summarize
find-connected-components -atoms-core-use [-v buriedness=${BURIEDNESS_CORE}:1.0] -atoms-all-use [-v buriedness=${BURIEDNESS_RIM}:1.0] -adj-component-number pocket_id -contacts-use [-t peripherial]
select-atoms -use [-v pocket_id=1:${SUBPOCKETS}] -name pocket_atoms_main
select-atoms-by-triangulation-query -use [pocket_atoms_main] -expansion $TANGENT_LEEWAY -min-radius $TANGENT_RADIUS_MIN -max-edge $TETRAHEDRON_EDGE_MAX -name pocket_atoms_all
EOF

if [ -n "$OUTPUT_ATOMS" ]
then
cat << EOF
export-atoms -file $TMPLDIR/result_atoms
EOF
fi

if [ -n "$OUTPUT_BURIEDNESS_PDB" ]
then
cat << EOF
set-adjunct-of-atoms -use (not [-v buriedness]) -name buriedness -value 0
export-atoms -as-pdb -pdb-b-factor buriedness -file $TMPLDIR/result_buriedness.pdb
EOF
fi

if [ -n "$OUTPUT_POCKETNESS_PDB" ]
then
cat << EOF
set-adjunct-of-atoms -name pocketness -value 0
set-adjunct-of-atoms -use [pocket_atoms_all] -name pocketness -value 1
export-atoms -as-pdb -pdb-b-factor pocketness -file $TMPLDIR/result_pocketness.pdb
EOF
fi

if [ -n "$OUTPUT_VERTICES" ] || [ -n "$DRAW_SPHERES" ]
then
cat << EOF
export-triangulation [pocket_atoms_all] -strict -min-radius $TANGENT_RADIUS_MIN -max-edge $TETRAHEDRON_EDGE_MAX -file $TMPLDIR/result_vertices
EOF
fi

if [ -n "$DRAW_TETRAHEDRONS" ]
then
CGO_NAME_OF_TETRAHEDRONS="$(basename "$DRAW_TETRAHEDRONS" .py | tr ' ' '_')"
cat << EOF
add-figure-of-triangulation [pocket_atoms_all] -strict -min-radius $TANGENT_RADIUS_MIN -max-edge $TETRAHEDRON_EDGE_MAX -figure-name pocket_tetrahedrons
color-figures -name pocket_tetrahedrons -col 0x00FFFF
export-figures-as-pymol-cgo -figure-name pocket_tetrahedrons -cgo-name '$CGO_NAME_OF_TETRAHEDRONS' -file $TMPLDIR/result_draw_tetrahedrons.py
EOF
fi

if [ -n "$OUTPUT_VOXELS_PDB" ]
then
cat << EOF
export-triangulation-voxels [pocket_atoms_all] -strict -min-radius $TANGENT_RADIUS_MIN -max-edge $TETRAHEDRON_EDGE_MAX -search-step-factor $VOXELIZATION_FACTOR -file $TMPLDIR/voxels_raw
EOF
fi

echo "print-triangulation [pocket_atoms_all] -strict -min-radius $TANGENT_RADIUS_MIN -max-edge $TETRAHEDRON_EDGE_MAX -only-summary"
} \
| voronota run-script --exit-on-first-failure --max-unfolding 5 \
> "$TMPLDIR/result_log.txt"

GLOBAL_SUCCESS="$(cat "$TMPLDIR/result_log.txt" | jq -c '.script_summary | .commands_successful == .commands_all')"

if [ -n "$OUTPUT_LOG" ]
then
	if [ "$OUTPUT_LOG" == "-" ]
	then
		cat >&2 "$TMPLDIR/result_log.txt"
	else
		cat "$TMPLDIR/result_log.txt" > "$OUTPUT_LOG"
	fi
fi

if [ "$GLOBAL_SUCCESS" != "true" ]
then
	if [ "$OUTPUT_LOG" != "-" ]
	then
		cat >&2 "$TMPLDIR/result_log.txt"
	fi
	echo >&2 "Error: failed to complete all steps, see the log above."
	exit 1
fi

if [ -n "$OUTPUT_ATOMS" ] && [ ! -s "$TMPLDIR/result_atoms" ]
then
	echo >&2 "Error: failed to output atoms."
	exit 1
fi

if [ -n "$OUTPUT_BURIEDNESS_PDB" ] && [ ! -s "$TMPLDIR/result_buriedness.pdb" ]
then
	echo >&2 "Error: failed to output buriedness as PDB file."
	exit 1
fi

if [ -n "$OUTPUT_POCKETNESS_PDB" ] && [ ! -s "$TMPLDIR/result_pocketness.pdb" ]
then
	echo >&2 "Error: failed to output pocketness as PDB file."
	exit 1
fi

if [ -n "$OUTPUT_VERTICES" ] && [ ! -s "$TMPLDIR/result_vertices" ]
then
	echo >&2 "Error: failed to output vertices."
	exit 1
fi

if [ -n "$OUTPUT_VOXELS_PDB" ] && [ ! -s "$TMPLDIR/voxels_raw" ]
then
	echo >&2 "Error: failed to output voxels."
	exit 1
fi

if [ -n "$DRAW_TETRAHEDRONS" ] && [ ! -s "$TMPLDIR/result_draw_tetrahedrons.py" ]
then
	echo >&2 "Error: failed to output tetrahedrons drawing script."
	exit 1
fi

if [ -n "$DRAW_SPHERES" ] && [ ! -s "$TMPLDIR/result_vertices" ]
then
	echo >&2 "Error: failed to output spheres drawing script."
	exit 1
fi

OBSERVED_MAX_BURIEDNESS="$(jq '.commands[] | select(.line=="spectrum-atoms -adjunct buriedness -only-summarize") | .output.spectrum_summary.max_value' < "$TMPLDIR/result_log.txt")"

POCKET_INFO="$(jq -c '.commands[-1].output' < "$TMPLDIR/result_log.txt")"

NUM_OF_ALL_ATOMS="$(echo "$POCKET_INFO" | jq -c '.full_triangulation_summary.number_input_balls')"
NUM_OF_ALL_VERTICES="$(echo "$POCKET_INFO" | jq -c '.full_triangulation_summary.number_voronoi_vertices')"
NUM_OF_POCKET_ATOMS="$(echo "$POCKET_INFO" | jq -c '.atoms_summary.number_total')"
NUM_OF_POCKET_VERTICES="$(echo "$POCKET_INFO" | jq -c '.number_of_relevant_voronoi_vertices')"
VOLUME_OF_POCKET_TETRAHEDRONS="$(echo "$POCKET_INFO" | jq -c '.total_relevant_tetrahedron_volume')"

if $OUTPUT_HEADER
then
	echo "input_file max_buriedness all_atoms all_vertices pocket_atoms pocket_vertices volume_of_pocket_tetrahedrons"
fi

echo "$INFILE $OBSERVED_MAX_BURIEDNESS $NUM_OF_ALL_ATOMS $NUM_OF_ALL_VERTICES $NUM_OF_POCKET_ATOMS $NUM_OF_POCKET_VERTICES $VOLUME_OF_POCKET_TETRAHEDRONS"

if [ -n "$OUTPUT_ATOMS" ] && [ -s "$TMPLDIR/result_atoms" ]
then
	mv "$TMPLDIR/result_atoms" "$OUTPUT_ATOMS"
fi

if [ -n "$OUTPUT_BURIEDNESS_PDB" ] && [ -s "$TMPLDIR/result_buriedness.pdb" ]
then
	mv "$TMPLDIR/result_buriedness.pdb" "$OUTPUT_BURIEDNESS_PDB"
fi

if [ -n "$OUTPUT_POCKETNESS_PDB" ] && [ -s "$TMPLDIR/result_pocketness.pdb" ]
then
	mv "$TMPLDIR/result_pocketness.pdb" "$OUTPUT_POCKETNESS_PDB"
fi

if [ -n "$OUTPUT_VERTICES" ] && [ -s "$TMPLDIR/result_vertices" ]
then
	{
		echo "A B C D S.x S.y S.z S.r Vol"
		cat "$TMPLDIR/result_vertices"
	} \
	| column -t \
	> "$OUTPUT_VERTICES"
fi

if [ -n "$OUTPUT_VOXELS_PDB" ] && [ -s "$TMPLDIR/voxels_raw" ]
then
	cat "$TMPLDIR/voxels_raw" \
	| awk '{print "R<VRT>A<O> " $1 " " $2 " " $3 " 1.4 . ."}' \
	| voronota write-balls-to-atoms-file --pdb-output "$OUTPUT_VOXELS_PDB" \
	> /dev/null
fi

if [ -n "$DRAW_TETRAHEDRONS" ] && [ -s "$TMPLDIR/result_draw_tetrahedrons.py" ]
then
	CGO_LONG_NAME="$(basename "$DRAW_TETRAHEDRONS" .py | tr ' ' '_')"
	if [[ "$CGO_LONG_NAME" == *"_state_"* ]]
	then
		CGO_STATE="$(echo "$CGO_LONG_NAME" | sed 's/^.*_state_\(.*\)$/\1/')"
		CGO_SHORT_NAME="$(echo "$CGO_LONG_NAME" | sed 's/^\(.*\)_state_.*$/\1/')"
		cat "$TMPLDIR/result_draw_tetrahedrons.py" \
		| sed "s/cmd.load_cgo($CGO_LONG_NAME,.\+)/cmd.load_cgo($CGO_LONG_NAME, '$CGO_SHORT_NAME', $CGO_STATE)/" \
		> "$DRAW_TETRAHEDRONS"
	else
		mv "$TMPLDIR/result_draw_tetrahedrons.py" "$DRAW_TETRAHEDRONS"
	fi
fi

if [ -n "$DRAW_SPHERES" ] && [ -s "$TMPLDIR/result_vertices" ]
then
	CGO_LONG_NAME="$(basename "$DRAW_SPHERES" .py | tr ' ' '_')"
	{
		echo 'from pymol.cgo import *'
		echo 'from pymol import cmd'
		echo "$CGO_LONG_NAME = [COLOR, 1.0, 1.0, 0.0,"
		echo 'ALPHA, 1.0,'
		cat "$TMPLDIR/result_vertices" | awk '{print "SPHERE, " $5 ", " $6 ", " $7 ", " $8 ","}'
		echo ']'
		if [[ "$CGO_LONG_NAME" == *"_state_"* ]]
		then
			CGO_STATE="$(echo "$CGO_LONG_NAME" | sed 's/^.*_state_\(.*\)$/\1/')"
			CGO_SHORT_NAME="$(echo "$CGO_LONG_NAME" | sed 's/^\(.*\)_state_.*$/\1/')"
			echo "cmd.load_cgo($CGO_LONG_NAME, '$CGO_SHORT_NAME', $CGO_STATE)"
		else
			echo "cmd.load_cgo($CGO_LONG_NAME, '$CGO_LONG_NAME')"
		fi
	} > "$DRAW_SPHERES"
fi

