Current File : //usr/../opt/managed_servers/python/../scripts/cpu_load.sh
readonly CPU_LOAD_DATA="${BACKGROUND_DATA}/cpu_load_check.data";
readonly REASON_MYSQL="mysql";
readonly REASON_PHP="php_processes";
readonly REASON_NONE="unknown";
readonly CPU_LOAD_CHECK_ERRORS="${BACKGROUND_DATA}/cpu_load_check.errors";
readonly CPU_LOAD_CASES=(check_if_mysql_is_top_process php_high_cpu_usage);
USERDATADOMAINS="/etc/userdatadomains";
NGINX_DOMAIN_LOGS="/var/log/nginx/domains";
APACHE_DOMAIN_LOGS="/var/log/apache2/domlogs";
domain_log_breakdown(){
readarray log <<< "$@";
declare -a bot_hits;
declare -a failed_wp_logins;
local bot;
local wp_login;
local log_entries=$(wc -l <<< "${#log[@]}");
local separator="----------------------";
for log_i in ${!log[@]}; do
# Removing the "robot" part from the log entry is necessary to avoid double-match with the "bot" regex value
bot=$(sed -e "s:robot::g" <<< "${log[$log_i]}" | grep -ioP "[a-z0-9_\.]+(bot|spider|crawler)|facebookexternalhit|meta-externalagent");
if [[ -n $bot ]]; then
bot_hits+=($bot);
continue;
fi;
# Account only for POST requests
wp_login=$(grep -i "wp-login.php" <<< "${log[$log_i]}" | grep POST);
# A failed WordPress login attempt ends with the HTTP 200 response code, as it opens
# the domain.com/wp-login.php page again
if [[ -n $wp_login && $(awk '{print $9}' <<< "$wp_login") -eq 200 ]]; then
failed_wp_logins+=($(awk '{print $1}' <<< "$wp_login"));
continue;
fi;
done;
if [[ ${#bot_hits[@]} -gt 0 ]]; then
# The high CPU load by PHP processes caused by bots
echo "Total bot hit count - ${#bot_hits[@]}";
echo "Hits per bot type:"
tr ' ' '\n' <<< "${bot_hits[@]}" | sort | uniq -c | sort -rn;
printf "\n%s\n\n" "$separator";
fi;
if [[ ${#failed_wp_logins[@]} -gt 0 ]]; then
# The high CPU load by PHP processes caused by brute-force WordPress login attempts
echo "Total WordPress login count - ${#failed_wp_logins[@]}";
echo "Failed WordPress login attempts per IP:"
tr ' ' '\n' <<< "${failed_wp_logins[@]}" | sort | uniq -c | sort -rn;
printf "\n%s\n\n" "$separator";
fi;
printf "%s\n\n" "General log breakdown.";
local top_10_ips=$(sed -z "s/\n$//" <<< "${log[@]}" | awk '{print $1}' | sort | uniq -c | sort -rn | head 2>/dev/null);
local top_10_useragents=$(sed -z "s/\n$//" <<< "${log[@]}" | awk -F\" '{print $6}' | sort | uniq -c | sort -rn | head 2>/dev/null);
local top_10_urls=$(sed -z "s/\n$//" <<< "${log[@]}" | awk '{print $7}' | sort | uniq -c | sort -rn | head 2>/dev/null);
printf "%s\n%s\n\n%s\n\n" "Top 10 IP:" "$top_10_ips" "-------------------------";
printf "%s\n%s\n\n%s\n\n" "Top 10 user-agents:" "$top_10_useragents" "-------------------------";
printf "%s\n%s\n\n%s\n\n" "Top 10 URL:" "$top_10_urls" "-------------------------";
echo "=============================";
}
domain_log_stats(){
local domain log_breakdown;
for d_log in ${!last_15_minutes_per_log[@]}; do
domain=$(rev <<< "$d_log" | cut -d/ -f1 | rev | sed "s/-ssl_log$//");
log_breakdown=$(domain_log_breakdown "${last_15_minutes_per_log[$d_log]}");
cpu_load_check_data[${domain}_log_breakdown]=$(
printf "%s\n\n%s\n" "Domain - $domain" "$log_breakdown" | json_escape);
done;
}
filter_out_older_than_15_min_log_entries(){
local log="$1";
local last_15_min=$(( $(date +%s) - 900 )); # Current date in seconds minus last 15 minutes
declare -a log_lines;
readarray -t log_lines <<< "$(tac $log)";
for i in ${!log_lines[@]}; do
# Ignore log entries older than 15 minutes
if [[ $(log_date_to_seconds "${log_lines[$i]}") -gt $last_15_min ]]; then
log_last_15_min+=("${log_lines[$i]}");
else
return;
fi;
done;
}
log_date_to_seconds(){
local log_entry="$1";
local unformatted_date=$(grep -m1 -oP "(?<=\[).*(?=\])" <<< "$log_entry");
local formatted_date=$(sed -E -e "s/:/ /" -e "s:\/: :g" <<< "$unformatted_date");
# seconds since the Epoch
date -d"$formatted_date" +%s;
}
filter_out_empty_files(){
local files="$@";
for f in $files; do
if [[ ! $(file "$f") =~ $EMPTY_FILE ]]; then
echo $f;
fi;
done;
}
get_last_15_min_from_domain_logs(){
local logs="$@";
local non_empty_logs=$(filter_out_empty_files "$logs");
for log in $non_empty_logs; do
declare -a log_last_15_min;
filter_out_older_than_15_min_log_entries "$log";
last_15_minutes_per_log["$log"]="${log_last_15_min[@]}";
unset log_last_15_min;
done;
}
php_process_id(){ ps faux | grep php | grep -v " lsphp$" | sort -k 3 -rn | awk {'print $2'}; }
get_process_cwd(){ pwdx $1 2>/dev/null; }
php_process_work_dir_filter_and_sort_dirs(){
cat | cut -d' ' -f2 | grep home | grep -v virtfs | cut -d/ -f1-4 | sort | uniq;
}
php_process_work_dir(){
for (( c=0; $c < 60; ((++c)) )); do
for proc in $(php_process_id); do
get_process_cwd $proc;
done;
sleep 1;
done | php_process_work_dir_filter_and_sort_dirs;
}
find_domains_by_work_dir(){
local work_dirs="$1";
local userdata_dom userdata_addon;
for dir in $work_dirs; do
userdata_dom=$(grep "=${dir}=" "$USERDATADOMAINS" | grep -e "=main=" -e "=sub=" -e "=addon=");
userdata_addon=$(grep "=addon=" <<< "$userdata_dom");
if [[ $userdata_addon ]]; then
cut -d: -f1 <<< "$userdata_addon";
else
cut -d: -f1 <<< "$userdata_dom";
fi;
done;
}
find_domain_logs_by_domain_name(){
local doms="$1";
for d in $doms; do
find {${NGINX_DOMAIN_LOGS},${APACHE_DOMAIN_LOGS}} -maxdepth 1 -name "${d}" -or -name "${d}-ssl_log";
done;
}
php_high_cpu_usage(){
local website_php_proc_work_dirs php_proc_domains domlogs apache_status;
declare -A last_15_minutes_per_log;
if php_high_total_cpu_usage "$processes_sorted"; then
cpu_load_check_data[cpu_load_reason]="$REASON_PHP";
unset cpu_load_check_data[auto_fix_status];
cpu_load_check_data[ps_data]=$(printf "%s\n%s\n" "$(head -1 <<< "$processes")"\
"$(grep php <<< "$processes")" | json_escape);
# Find website domain logs causing the high cpu load by process work dir
website_php_proc_work_dirs=$(php_process_work_dir);
# Find domain/subdomain by workdir from /etc/userdatadomains
php_proc_domains=$(find_domains_by_work_dir "$website_php_proc_work_dirs");
# Find domain logs in /var/log/nginx/domains, or /var/log/apache2/domlogs
domlogs=$(find_domain_logs_by_domain_name "$php_proc_domains");
# Get the log entries, not older than 15 minutes per log file.
get_last_15_min_from_domain_logs "$domlogs";
# Provide domain log statistics
domain_log_stats;
apache_status=$(apachectl status 2>/dev/null);
[[ -n $apache_status ]] && cpu_load_check_data[apache_status]="$(json_escape <<< "$apache_status")";
return 0;
else
return 1;
fi;
}
php_high_total_cpu_usage(){
declare -a top_proc;
readarray -t top_proc <<< "$1";
local total_php_cpu_usage=0;
local php_proc_cpu_usage;
local cpu_usage_threshold=$(( $VIRT_CORE_COUNT * 50 )); # %50 of total cpu usage
for i in ${!top_proc[@]}; do
if [[ ${top_proc[$i]} =~ php ]]; then
php_proc_cpu_usage=$(awk '{print $9}' <<< "${top_proc[$i]}");
total_php_cpu_usage=$(awk '{printf $1 + $2}' <<< "$total_php_cpu_usage $php_proc_cpu_usage");
php_proc+=("${top_proc[$i]}");
fi;
done;
if [[ $(awk '{if ($1 >= $2) printf $1}' <<< "$total_php_cpu_usage $cpu_usage_threshold") ]]; then
# total cpu usage by php processes exceeds the cpu usage threshold (%50)
return 0;
else
# total cpu usage by php processes is less than the cpu usage threshold (%50)
# high cpu usage is caused by something else
return 1;
fi;
}
innodb_extra_ram(){
local available_ram=$(free -b | awk '/Mem:/ {print $7}');
echo $(( $available_ram - $available_ram / 5 ));
}
cpu_check_set_new_innodb_vars(){
declare -A human_multipliers;
human_multipliers=([K]=1000 [M]=1000000 [G]=1000000000);
local innodb_vars="$@";
local mysql_config_backup="${mysql_config}_$(date +%s)";
local ram_total=$(free -b | awk '/Mem:/ {print $2}');
rsync -q $mysql_config $mysql_config_backup;
for i_var in $innodb_vars; do
local var_name=$(cut -d= -f1 <<< $i_var);
local multiplier=$(cut -d= -f2 <<< $i_var | grep -oP "[a-zA-Z]$" | tr [:lower:] [:upper:]);
local i_value=$(cut -d= -f2 <<< $i_var | grep -oP "[0-9]+");
local i_extra_ram;
local i_value_new;
if [[ $i_var =~ innodb_buffer_pool_size ]]; then
i_extra_ram=$(( $innodb_total_extra_ram - $innodb_total_extra_ram / 5 ));
else
i_extra_ram=$(( $innodb_total_extra_ram / 5 ));
fi;
if [[ -n $i_value && $multiplier && ${human_multipliers[$multiplier]} ]]; then
i_value=$(( $i_value * ${human_multipliers[$multiplier]} ));
fi;
if [[ -z $i_value ]]; then
# For cases when innodb variable is not set
i_value_new=$i_extra_ram;
sed -i "s:\[mysqld\]:&\n$var_name=$i_value_new:" $mysql_config;
elif [[ $var_name == "innodb_buffer_pool_size" && $i_value -ge $(( $ram_total - $ram_total / 4 )) ||
$var_name == "innodb_log_file_size" && $i_value -ge $(( $ram_total / 6 )) ]]; then
# An additional protection in case the current innodb variables are set too high
return 1;
else
i_value_new=$(( $i_value + $i_extra_ram ));
sed -i "s/$i_var/$var_name=$i_value_new/" $mysql_config;
fi;
# Restore the mysql config backup if a value wasn't set in the mysql config file
if [[ -z $(grep "^$var_name=$i_value_new$" $mysql_config) ]]; then
rsync -q $mysql_config_backup $mysql_config;
rm -f $mysql_config_backup;
return 1;
fi;
done;
/scripts/restartsrv_mysql &>/dev/null && {
rm -f $mysql_config_backup;
return 0;
} || {
rsync -q $mysql_config_backup $mysql_config;
rm -f $mysql_config_backup;
return 1;
}
}
mysql_top_process_complete_mgt(){
local mysql_config="/etc/my.cnf";
local innodb_buffer_pool_size=$(echo "$(grep ^innodb_buffer_pool_size "$mysql_config" ||
echo "innodb_buffer_pool_size=")" | tail -1);
local innodb_log_file_size=$(echo "$(grep ^innodb_log_file_size "$mysql_config" ||
echo "innodb_log_file_size=")" | tail -1);
local innodb_total_extra_ram=$(innodb_extra_ram);
local ram_usage_threshold=$(free -b | awk '/Mem:/ {printf "%d", ($2 * .2 )}') # %20 of total ram
# If available memory that can be used for innodb variables is less than %20 of total RAM, the optimization
# can be skipped
if [[ $innodb_total_extra_ram -gt $ram_usage_threshold ]]; then
cpu_check_set_new_innodb_vars "$innodb_buffer_pool_size $innodb_log_file_size" && {
cpu_load_check_data[service_status]=$SERVICE_ACTIVE;
cpu_load_check_data[auto_fix_status]=$AUTO_FIX_SUCCESS;
} || {
cpu_load_check_data[auto_fix_status]=$AUTO_FIX_FAIL;
};
else
# innodb values cannot be optimized due to high memory usage
cpu_load_check_data[recommendations]=$(printf "%s\n\n%s\n\n%s" \
"$(cpu_load_recommendation_mysql_optimization_unavailable)"\
"$(check_if_wp_is_installed_by_active_db)"\
"$(plan_upgrade_recommendation_if_optimization_fails)" | json_escape);
fi;
}
ps_filter_cpu_usage() {
readarray proc_arr <<< "$1";
local minimal_threshold=9;
for (( i = 0; $i < ${#proc_arr[@]}; ((++i)) )); do
# Filter out the auto_report app process from the ps command results
if [[ ${proc_arr[$i]} =~ $PROJECT_ROOT ]]; then
continue;
fi;
if [[ $(awk '{print $9}' <<< ${proc_arr[$i]} | cut -d. -f1) -gt $minimal_threshold ]]; then
echo "${proc_arr[$i]}" | sed -ze "s/\n$//";
fi;
done;
}
top_filter_cpu_usage() {
declare -a top_proc;
readarray top_proc <<< "$1";
local minimal_threshold=1;
for (( i = 0; $i < ${#top_proc[@]}; ((++i)) )); do
# Filter out the auto_report app process from the ps command results
if [[ ${top_proc[$i]} =~ $PROJECT_ROOT ]]; then
continue;
fi;
local proc_cpu_usage=$(awk '{print $9}' <<< ${top_proc[$i]});
if [[ $(awk '{if ($1 > $2) printf $1}' <<< "$proc_cpu_usage $minimal_threshold") ]]; then
echo "${top_proc[$i]}" | sed -ze "s/\n$//";
fi;
done;
}
check_if_wp_is_installed_by_active_db(){
local queries="$(for n in {0..30}; do
mysql -e "show full processlist;" | grep -e Query -e Execute | grep -v -e NULL -e Progress; sleep 1;
done)";
local vhosts_root="$(cat /etc/userdatadomains | tr -s "=" | cut -d= -f5 | sed /^$/d | sort | uniq)";
local db_names=$(tail -n +2 <<< "$queries" | tr -s '\t' ' ' | cut -d' ' -f4 | sort | uniq);
local wp_websites="$(for vh_root in $vhosts_root; do
for db in $db_names; do
grep -l "'$db'" ${vh_root}/wp-config.php 2> /dev/null 1>&2 && echo $vh_root && break;
done;
done)";
if [[ -n $wp_websites ]]; then
cpu_load_wordpress_mysql_recommendations;
else
cpu_load_generic_mysql_recommendation;
fi;
}
mysql_top_process_basic_mgt(){
local mtuner_rec="$(cpu_load_mysqltuner_recommendations)";
local wp_rec="$(check_if_wp_is_installed_by_active_db)";
local upgrade_rec="$(plan_upgrade_recommendation_if_optimization_fails)";
if [[ -n $wp_rec ]]; then
cpu_load_check_data[recommendations]=$(printf "%s\n\n%s\n\n%s\n" "$mtuner_rec" "$wp_rec" "$upgrade_rec" |
sed -z -e "s/\n/\\\n/g" -e "s/\t/\\\t/g");
else
cpu_load_check_data[recommendations]=$(printf "%s\n\n%s\n" "$mtuner_rec" "$upgrade_rec" |
sed -z -e "s/\n/\\\n/g" -e "s/\t/\\\t/g");
fi;
}
check_if_mysql_is_top_process(){
local top_proc=$(head -1 <<< "$processes_sorted");
local mysql_match="mysql|maria";
local mysql_cpu_usage=$(awk '{print $9}' <<< "$top_proc" | cut -d. -f1);
local average_load=$(($VIRT_CORE_COUNT * 100 / 2));
# The mysql process is the cause of the high CPU usage if it has over %50 cores used.
if [[ $mysql_cpu_usage -ge $average_load ]]; then
cpu_load_check_data[cpu_load_reason]="$REASON_MYSQL";
cpu_load_check_data[ps_data]=$(printf "%s\n%s\n" "$(head -1 <<< "$processes")" "$top_proc" |
json_escape);
if [[ -n $(grep -iE "$mysql_match" <<< "$top_proc") && "$(management_type_check)" == $BASIC ]]; then
mysql_top_process_basic_mgt;
return 0;
elif [[ -n $(grep -iE "$mysql_match" <<< "$top_proc") && $(management_type_check) == $COMPLETE ]]; then
unset cpu_load_check_data[auto_fix_status];
mysql_top_process_complete_mgt;
return 0;
else
return 1;
fi;
else
# If the mysql process is using under %50 cores, then check for something else.
return 1;
fi;
}
cpu_threshold_check(){
declare -A icinga_thresholds;
# Icinga, per-core load thresholds
#icinga_thresholds[1_min_warning]=1.1;
#icinga_thresholds[5_min_warning]=0.9;
#icinga_thresholds[15_min_warning]=0.7;
icinga_thresholds[1_min_critical]=1.2;
icinga_thresholds[5_min_critical]=1;
icinga_thresholds[15_min_critical]=0.8;
local virt_core_max_load=$(($(grep -ci processor /proc/cpuinfo) * 100));
local load_average_1_min=$(awk '{printf "%d",($1 * 100)}' /proc/loadavg);
local load_average_5_min=$(awk '{printf "%d",($2 * 100)}' /proc/loadavg);
local load_average_15_min=$(awk '{printf "%d",($3 * 100)}' /proc/loadavg);
# Converting the Icinga per-core thresholds into all-core load thresholds to match the uptime utility values
#local la_1_min_warning_threshold=$(bc <<< "$virt_core_max_load * ${icinga_thresholds[1_min_warning]}" | cut -d. -f1);
#local la_5_min_warning_threshold=$(bc <<< "$virt_core_max_load * ${icinga_thresholds[5_min_warning]}" | cut -d. -f1);
#local la_15_min_warning_threshold=$(bc <<< "$virt_core_max_load * ${icinga_thresholds[15_min_warning]}" | cut -d. -f1);
local la_1_min_critical_threshold=$(bc <<< "$virt_core_max_load * ${icinga_thresholds[1_min_critical]}" | cut -d. -f1);
local la_5_min_critical_threshold=$(bc <<< "$virt_core_max_load * ${icinga_thresholds[5_min_critical]}" | cut -d. -f1);
local la_15_min_critical_threshold=$(bc <<< "$virt_core_max_load * ${icinga_thresholds[15_min_critical]}" | cut -d. -f1);
if [[ $load_average_1_min -ge $la_1_min_critical_threshold && $load_average_5_min -ge $la_5_min_critical_threshold
&& $load_average_15_min -ge $la_15_min_critical_threshold ]]; then
cpu_load_check_data[service_status]=$SERVICE_DOWN;
#elif [[ $load_average_1_min -ge $la_1_min_warning_threshold && $load_average_5_min -ge $la_5_min_warning_threshold
# && $load_average_15_min -ge $la_15_min_warning_threshold ]]; then
# cpu_load_check_data[service_status]=$SERVICE_WARNING;
else
cpu_load_check_data[service_status]=$SERVICE_ACTIVE;
fi;
}
cpu_load_check_main(){
declare -A cpu_load_check_data;
local top_data_full=$(top -bn1 -o %CPU);
cpu_load_check_data[top_data]=$(echo "Total CPUs: $(grep -ci processor /proc/cpuinfo)" | json_escape;
head -5 <<< "$top_data_full" | json_escape);
local processes=$(tail -n +7 <<< "$top_data_full");
local processes_sorted=$(top_filter_cpu_usage "$(tail -n +2 <<< "$processes")");
cpu_threshold_check;
if [[ ${cpu_load_check_data[service_status]} != $SERVICE_ACTIVE ]]; then
for case in ${!CPU_LOAD_CASES[@]}; do
# Each cpu load check function should return 0 if the load reason is found and set the
# cpu_load_check_data[cpu_load_reason] value to describe a specific reason
if eval ${CPU_LOAD_CASES[$case]}; then
break;
fi;
done;
if [[ -z ${cpu_load_check_data[cpu_load_reason]} ]]; then
cpu_load_check_data[cpu_load_reason]="$REASON_NONE";
cpu_load_check_data[ps_data]=$(printf "%s\n%s\n" "$(head -1 <<< "$processes")" "$processes_sorted" |
json_escape);
fi;
fi;
bash_arr_to_json cpu_load_check_data ${!cpu_load_check_data[@]} > "$CPU_LOAD_DATA"
}
cpu_load_check(){
run_check_in_background "cpu_load_check_main" "$CPU_LOAD_DATA" "$CPU_LOAD_CHECK_ERRORS"\
"${CHECK_LOCKS_DIR}/${FUNCNAME}.lock";
}