Current File : //opt/managed_servers/scripts/puppet.sh
readonly PUPPET_CHECK_DATA="${BACKGROUND_DATA}/puppet_check.data";
readonly PUPPET_CHECK_ERRORS="${BACKGROUND_DATA}/puppet_check.errors";
readonly PUPPET_CHECK_IN_PROGRESS="client already in progress";
readonly NAGIOS_PUPPET_OK="OK:";
# List of known errors that can be fixed automatically
readonly AUTO_FIX_ERR_LIST=( "Failed to generate additional resources using 'eval_generate': getaddrinfo: Name or service not known" "Error: rpmdb open failed" );
# List of commands for known errors that can be fixed automatically
readonly AUTO_FIX_CMD=( 'csf -a 199.188.202.73 1>/dev/null 2>&1; csf -ra' '/usr/local/cpanel/scripts/find_and_fix_rpm_issues' );
puppet_fix_check(){
local puppetsync_fix_res=$(/opt/puppetlabs/bin/puppet agent -tv --server=pmc04.dcops.tech --ssldir=/etc/puppetlabs/puppet/ssl/pmc04 2>&1);
if [[ $? -ne 0 ]]; then
if [[ -n $(grep -i -e error -e fail <<< "$puppetsync_fix_res") ]]; then
puppetsync_data[service_status]="$SERVICE_DOWN";
puppetsync_data[auto_fix_status]="$AUTO_FIX_FAIL";
else
puppetsync_data[service_status]="$SERVICE_ACTIVE";
puppetsync_data[auto_fix_status]="$AUTO_FIX_SUCCESS";
fi;
else
puppetsync_data[service_status]="$SERVICE_ACTIVE";
puppetsync_data[auto_fix_status]="$AUTO_FIX_SUCCESS";
fi;
}
puppet_halted_process_kill(){
local puppet_proc_id=$(pgrep puppet);
if [[ -n "$puppet_proc_id" ]]; then
write_log "Killing the halted puppet process.";
killall -9 2>/dev/null;
systemctl restart puppet 2>/dev/null;
puppetsync_res=$(/opt/puppetlabs/bin/puppet agent -tv --server=pmc04.dcops.tech --ssldir=/etc/puppetlabs/puppet/ssl/pmc04 2>&1);
fi;
}
puppetsync_timeout_check(){
local lock_file=$(grep -oP "(?<=\().*(?= exists)" <<< "$puppetsync_res");
if [[ -e $lock_file ]]; then
local lock_file_timeout=3600; # 1 hour
local lock_file_age=$(get_file_age_in_sec "$lock_file");
if [[ $lock_file_age -gt $lock_file_timeout ]]; then
write_log "The puppet lock file timeout.";
puppet_halted_process_kill;
rm -f $lock_file;
else
write_log "The puppet lock file is present, puppetsync in progress.";
fi;
else
puppet_halted_process_kill;
fi;
}
puppet_err_autofix(){
local puppet_res="$1";
local fix_applied;
# Loop through the errors array and execute the appropriate auto-fix
for (( i=0; $i<${#AUTO_FIX_ERR_LIST[@]}; (( ++i )) )); do
if [[ "$puppet_res" =~ "${AUTO_FIX_ERR_LIST[$i]}" ]]; then
eval ${AUTO_FIX_CMD[$i]} 1>/dev/null 2>&1;
fix_applied=1;
fi;
done;
# Check if the error is still present after apply
if [[ $fix_applied ]]; then
puppet_fix_check;
else
puppetsync_data[service_status]="$SERVICE_DOWN";
fi;
}
puppet_check_main(){
declare -A puppetsync_data;
local puppetsync_res;
local nagios_check_puppet=$(/usr/local/nagios/plugins/check_puppet);
if [[ $nagios_check_puppet =~ ^$NAGIOS_PUPPET_OK ]]; then
puppetsync_data[service_status]="$SERVICE_ACTIVE";
else
puppetsync_res=$(/opt/puppetlabs/bin/puppet agent -tv --server=pmc04.dcops.tech --ssldir=/etc/puppetlabs/puppet/ssl/pmc04 2>&1);
if [[ $? -ne 0 ]]; then
if [[ $puppetsync_res =~ "$PUPPET_CHECK_IN_PROGRESS" ]];then
puppetsync_timeout_check;
fi;
# When puppetsync contains "Notice" results, the exit code will be non 0, in such cases
# the puppetsync result should be additionally checked for the presence of errors/failures by filtering out
# all Info/Notice results.
if [[ -n $(grep -i -e error -e fail <<< "$puppetsync_res") ]]; then
puppet_err_autofix "$puppetsync_res";
else
puppetsync_data[service_status]="$SERVICE_ACTIVE";
fi;
else
puppetsync_data[service_status]="$SERVICE_ACTIVE";
fi;
fi;
bash_arr_to_json puppetsync_data ${!puppetsync_data[@]} > "$PUPPET_CHECK_DATA";
}
puppet_check(){
run_check_in_background "puppet_check_main" "$PUPPET_CHECK_DATA" "$PUPPET_CHECK_ERRORS"\
"${CHECK_LOCKS_DIR}/${FUNCNAME}.lock";
}