Add script for docker events/metrics and support running TA outside of Splunk

* Add docker.sh and docker_metric.sh for collecting docker events/metrics
* Add helper script to extra/ to run the TA commands on systems without
  a Splunk forwarder. The commands can be sent to a syslog server.
  This script is useful for systems with small or read-only filesystems that
  cannot support a Universal Forwarder.
* Add syslog_inputs_nix_ta app to extra/ for ingesting the data from syslog
This commit is contained in:
Michael Erdely 2025-01-11 23:28:44 -05:00
parent 5e766d84d5
commit 5551b8973d
Signed by: mike
SSH key fingerprint: SHA256:ukbnfrRMaRYlBZXENtBTyO2jLnql5AA5m+SzZCfYQe0
13 changed files with 322 additions and 13 deletions

View file

@ -6,10 +6,10 @@ TAR_FILE := ./ta-for-unix-and-linux-$(VERSION).tgz
all: release
updateversion:
ifndef NEWVERSION
$(error NEWVERSION is not specified. Usage make NEWVERSION=<newversion> updateversion)
ifndef NEW
$(error NEW is not specified. Usage make NEW=<newversion> updateversion)
endif
sed -ri "s/$(VERSION)/$(NEWVERSION)/g" app.manifest default/app.conf VERSION
sed -ri "s/$(VERSION)/$(NEW)/g" app.manifest default/app.conf VERSION
release:
mkdir -p $(WORK_DIR)

View file

@ -1,2 +1,2 @@
9.2.0.4
9.2.0.4
9.2.0.5
9.2.0.5

View file

@ -29,7 +29,7 @@
"id": {
"group": null,
"name": "TA-nix",
"version": "9.2.0.4"
"version": "9.2.0.5"
},
"license": {
"name": "Splunk Software License Agreement",

92
bin/docker.sh Executable file
View file

@ -0,0 +1,92 @@
#!/bin/bash
# SPDX-FileCopyrightText: 2022 Michael Erdely <mike@erdelynet.com>
# SPDX-License-Identifier: MIT
# shellcheck disable=SC1091
. "$(dirname "$0")"/common.sh
assertHaveCommand docker
assertHaveCommand bc
assertHaveCommand ip
assertHaveCommand awk
declare -A pids
declare -A time_start
declare -A cpu_start
declare -A rx_start
declare -A tx_start
declare -A br_start
declare -A bw_start
[[ $0 =~ .*_metric.sh ]] && mode=metric
# Either add the splunk user to the docker group or add the following to /etc/sudoers:
# splunk ALL=(root) NOPASSWD: /usr/bin/docker stats --no-stream --no-trunc --all
# splunk ALL=(root) NOPASSWD: /usr/bin/docker ps --all --no-trunc --format *
# splunk ALL=(root) NOPASSWD: /usr/bin/docker inspect -f *
docker_cmd=docker
! groups | grep -q "\bdocker\b" && docker_cmd="sudo -n $docker_cmd"
docker_list=$($docker_cmd ps --all --no-trunc --format '{{ .ID }}')
header_string="ContainerId Name CPUPct MemUsage MemTotal MemPct NetRX RXps NetTX TXps BlockRead BRps BlockWrite BWps Pids"
metric_string=""
header_format="%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n"
string_format="%s\t%s\t%s\t%.2f\t%s\t%s\t%.2f\t%s\t%.2f\t%s\t%.2f\t%s\t%.2f\t%s\t%.2f\t%s\n"
json_format='{ "time": "%s", "ContainerId": "%s", "Name": "%s", "CPUPct": %.2f, "MemUsage": %s, "MemTotal": %s, "MemPct": %.2f, "NetRX": %s, "RXps": %.2f, "NetTX": %s, "TXps": %.2f, "BlockRead": %s, "BRps": %.2f, "BlockWrite": %s, "BWps": %.2f, "Pids": %s }\n'
if [ "$mode" = "metric" ]; then
metric_name=docker_metric
if [ ! -f "/etc/os-release" ] ; then
OSName=$(cat /etc/*release | head -n 1| awk -F" release " '{print $1}'| tr ' ' '_')
OS_version=$(cat /etc/*release | head -n 1| awk -F" release " '{print $2}' | cut -d\. -f1)
IP_address=$(ip addr show dev $(ip route show | awk 'BEGIN{m=1000}$1=="default"$0!~/ metric /{print $5;exit}$1=="default"{if($NF<m){m=$NF;i=$5}}END{print i}') | awk '$1=="inet"{print gensub(/\/[0-9]+/,"","g",$2)}')
else
OSName=$(cat /etc/*release | grep '\bNAME=' | cut -d\= -f2 | tr ' ' '_' | cut -d\" -f2)
OS_version=$(cat /etc/*release | grep '\bVERSION_ID=' | cut -d\= -f2 | cut -d\" -f2)
IP_address=$(ip addr show dev $(ip route show | awk 'BEGIN{m=1000}$1=="default"$0!~/ metric /{print $5;exit}$1=="default"{if($NF<m){m=$NF;i=$5}}END{print i}') | awk '$1=="inet"{print gensub(/\/[0-9]+/,"","g",$2)}')
fi
[ -z "$OSName" ] && OSName="?"
[ $OSName = Arch_Linux ] && OS_version=rolling
[ -z "$OS_version" ] && OS_version="?"
header_string="$header_string OSName OS_version IP_address"
metric_string=" $OSName $OS_version $IP_address"
header_format="${header_format::-2}\t%s\t%s\t%s\n"
string_format="${string_format::-2}\t%s\t%s\t%s\n"
json_format='{ "time": "%s", "ContainerId": "%s", "Name": "%s", "CPUPct": %.2f, "MemUsage": %.2f, "MemTotal": %.2f, "MemPct": %.2f, "NetRX": %.2f, "RXps": %.2f, "NetTX": %.2f, "TXps": %.2f, "BlockRead": %.2f, "BRps": %.2f, "BlockWrite": %.2f, "BWps": %.2f, "Pids": %s, "OSName": "%s", "OS_version": "%s", "IP_address": "%s", "event": "metric" }\n'
fi
# Currently calculates CPU % over time; not right now
for id in $docker_list; do
[ ! -d /sys/fs/cgroup/system.slice/docker-$id.scope ] && continue
pids[$id]=$($docker_cmd inspect -f '{{ .State.Pid }}' $id)
read time_start[$id] _ < /proc/uptime
read _ cpu_start[$id] < /sys/fs/cgroup/system.slice/docker-$id.scope/cpu.stat
while read _if _rx _ _ _ _ _ _ _ _tx _ _ _ _ _ _ _ ; do if=$_if rx_start[$id]=$_rx tx_start[$id]=$_tx; done < /proc/${pids[$id]}/net/dev
br_start[$id]=0;bw_start[$id]=0;while read _ _br _bw _ _ _ _; do br_start[$id]=$((${br_start[$id]}+${_br:7}));bw_start[$id]=$((${bw_start[$id]}+${_bw:7})); done < /sys/fs/cgroup/system.slice/docker-$id.scope/io.stat
done
sleep 2 # Sleep 2 seconds to give the script time to get CPU stats
MemTotal=$(awk '$1=="MemTotal:" {print $2*1024}' /proc/meminfo)
#printf "$header_format" $header_string
for id in $docker_list; do
name=$($docker_cmd inspect -f '{{ .Name }}' $id)
if [ ! -d /sys/fs/cgroup/system.slice/docker-$id.scope ]; then
printf "$json_format" $id ${name:1} 0 0 0 0 0 0 0 0 0 0 0 0 0$metric_string
continue
fi
read cpu_stop _ < /proc/uptime
read _ proc_stop < /sys/fs/cgroup/system.slice/docker-$id.scope/cpu.stat
while read _if _rx _ _ _ _ _ _ _ _tx _ _ _ _ _ _ _ ; do if=$_if NetRX=$_rx NetTX=$_tx; done < /proc/${pids[$id]}/net/dev
BlockRead=0;BlockWrite=0;while read _ _br _bw _ _ _ _; do BlockRead=$((BlockRead+${_br:7}));BlockWrite=$((BlockWrite+${_bw:7})); done < /sys/fs/cgroup/system.slice/docker-$id.scope/io.stat
read MemUsage < /sys/fs/cgroup/system.slice/docker-$id.scope/memory.current
read Pids < /sys/fs/cgroup/system.slice/docker-$id.scope/pids.current
read _ CPU < /sys/fs/cgroup/cpu.stat
CpuUsage=$(echo "($proc_stop - ${cpu_start[$id]}) / ($cpu_stop * 1000000 - ${time_start[$id]} * 1000000) * 100" | bc -l)
RXps=$(echo "($NetRX - ${rx_start[$id]}) / ($cpu_stop * 1000000 - ${time_start[$id]} * 1000000) * 100" | bc -l)
TXps=$(echo "($NetTX - ${tx_start[$id]}) / ($cpu_stop * 1000000 - ${time_start[$id]} * 1000000) * 100" | bc -l)
BRps=$(echo "($BlockRead - ${br_start[$id]}) / ($cpu_stop * 1000000 - ${time_start[$id]} * 1000000) * 100" | bc -l)
BWps=$(echo "($BlockWrite - ${bw_start[$id]}) / ($cpu_stop * 1000000 - ${time_start[$id]} * 1000000) * 100" | bc -l)
printf "$json_format" "$(env TZ=UTC date "+%FT%T.%NZ")" $id ${name:1} $CpuUsage $MemUsage $MemTotal $(echo "$MemUsage*100/$MemTotal"|bc -l) $NetRX $RXps $NetTX $TXps $BlockRead $BRps $BlockWrite $BWps $Pids$metric_string
done

1
bin/docker_metric.sh Symbolic link
View file

@ -0,0 +1 @@
docker.sh

View file

@ -17,7 +17,7 @@ docs_section_override = AddOns:released
[launcher]
author = Michael Erdely
version = 9.2.0.4
version = 9.2.0.5
description = Technical Add-on for Unix and Linux
#[package]
@ -26,5 +26,5 @@ description = Technical Add-on for Unix and Linux
[id]
name = TA-unix
version = 9.2.0.4
version = 9.2.0.5

View file

@ -8,7 +8,7 @@
search = NOT *
[nix_ta_data]
search = eventtype=nix_ta_custom_eventtype OR (sourcetype IN (vmstat_metric, iostat_metric, ps_metric, df_metric, interfaces_metric, cpu_metric, vmstat, iostat, ps, top, netstat, bandwidth, protocol, openPorts, time, lsof, df, who, usersWithLoginPrivs, lastlog, interfaces, cpu, auditd, package, hardware, bash_history, Unix:ListeningPorts, Unix:UserAccounts, Linux:SELinuxConfig, Unix:Service, Unix:SSHDConfig, Unix:Update, Unix:Uptime, Unix:Version, Unix:VSFTPDConfig, config_file, dhcpd, nfsiostat, ignored_type, aix_secure, osx_secure, linux_secure, linux_audit, syslog) OR source IN (/Library/Logs/*, /var/log/*, /var/adm/*, /etc/*))
search = eventtype=nix_ta_custom_eventtype OR (sourcetype IN (docker_metric, vmstat_metric, iostat_metric, ps_metric, df_metric, interfaces_metric, cpu_metric, docker, vmstat, iostat, ps, top, netstat, bandwidth, protocol, openPorts, time, lsof, df, who, usersWithLoginPrivs, lastlog, interfaces, cpu, auditd, package, hardware, bash_history, Unix:ListeningPorts, Unix:UserAccounts, Linux:SELinuxConfig, Unix:Service, Unix:SSHDConfig, Unix:Update, Unix:Uptime, Unix:Version, Unix:VSFTPDConfig, config_file, dhcpd, nfsiostat, ignored_type, aix_secure, osx_secure, linux_secure, linux_audit, syslog) OR source IN (/Library/Logs/*, /var/log/*, /var/adm/*, /etc/*))
###### Globals ######
[nix_security]
@ -112,6 +112,10 @@ search = sourcetype=time
[usersWithLoginPrivs]
search = sourcetype=usersWithLoginPrivs
[docker]
search = sourcetype=docker
#tags = performance os avail unix report docker
[vmstat]
search = sourcetype=vmstat
#tags = performance os avail unix report vmstat resource success memory

View file

@ -4,6 +4,12 @@
##
##
[script://./bin/docker_metric.sh]
sourcetype = docker_metric
source = docker
interval = 60
disabled = 1
[script://./bin/vmstat_metric.sh]
sourcetype = vmstat_metric
source = vmstat
@ -44,6 +50,12 @@ disabled = 1
############### Event Inputs ###################
################################################
[script://./bin/docker.sh]
interval = 60
sourcetype = docker
source = docker
disabled = 1
[script://./bin/vmstat.sh]
interval = 60
sourcetype = vmstat

View file

@ -91,6 +91,15 @@ FIELDALIAS-dest_nt_host = dest_host as dest_nt_host
## Scripted Metric Inputs
#########################
[docker_metric]
SHOULD_LINEMERGE=false
LINE_BREAKER = ([\r\n]+)
KV_MODE = json
NO_BINARY_CHECK = true
TRUNCATE=1000000
TRANSFORMS-docker-metric-dimensions=eval_dimensions
METRIC-SCHEMA-TRANSFORMS=metric-schema:extract_metrics_docker
[vmstat_metric]
SHOULD_LINEMERGE=false
LINE_BREAKER=(^$|[\r\n]+[\r\n]+)
@ -506,6 +515,14 @@ TRUNCATE=1000000
DATETIME_CONFIG = CURRENT
KV_MODE=multi
[docker]
SHOULD_LINEMERGE=false
LINE_BREAKER=(^$|[\r\n]+[\r\n]+)
TRUNCATE=1000000
KV_MODE = json
FIELDALIAS-dest_for_docker = host as dest
FIELDALIAS-src_for_docker = host as src
[vmstat]
LINE_BREAKER=(^$|[\r\n]+[\r\n]+)
TRUNCATE=1000000

View file

@ -183,6 +183,9 @@ REGEX=[[dhcp_prefix_src]]reuse_lease:\s+lease\s+age.*under.*threshold,\s+reply\s
# Support for omitting the IPv6 Address field when the script output doesn't include an IPv6 Address
INGEST_EVAL = metric_name=sourcetype, entity_type="TA_Nix", OS_name=replace(OSName, "_", " "), IPv6_address = if(IPv6_Address=="?", null(), IPv6_Address)
#[extract_docker_metrics]
#INGEST_EVAL= CPUPct=CPUPct,MemUsage=MemUsage,MemTotal=MemTotal,MemPct=MemPct,NetRX=NetRX,RXps=RXps,NetTX=NetTX,TXps=TXps,BlockRead=BlockRead,BRps=BRps,BlockWrite=BlockWrite,BWps=BWps,Pids=Pids
[extract_df_metrics]
INGEST_EVAL = UsePct=coalesce('UsePct','Capacity','Use'), Size_KB=coalesce('Size','1K_blocks','1024_blocks'), Used_KB='Used', Avail_KB=coalesce('Avail','Available'), INodes=coalesce('INodes','Inodes'), IUsed=coalesce('IUsed','iused','Iused'), IFree=coalesce('IFree','ifree','Ifree'), IUsePct=coalesce('IUsePct','IUse'), Size=coalesce('Size','1K_blocks','1024_blocks'), Avail=coalesce('Avail','Available'), Type=coalesce('Type',"?")
@ -208,6 +211,10 @@ METRIC-SCHEMA-BLACKLIST-DIMS= OSName
METRIC-SCHEMA-MEASURES= memTotalMB,memFreeMB,memUsedMB,memFreePct,memUsedPct,pgPageOut,swapUsedPct,pgSwapOut,cSwitches,interrupts,forks,processes,threads,loadAvg1mi,waitThreads,interrupts_PS,pgPageIn_PS,pgPageOut_PS
METRIC-SCHEMA-BLACKLIST-DIMS= OSName
[metric-schema:extract_metrics_docker]
METRIC-SCHEMA-MEASURES= _NUMS_EXCEPT_ OS_version
METRIC-SCHEMA-BLACKLIST-DIMS= OSName
[metric-schema:extract_metrics_df]
METRIC-SCHEMA-MEASURES= _NUMS_EXCEPT_ OS_name, OS_version, IP_address, Filesystem, Type, MountedOn, IPv6_Address, IPv6_address
METRIC-SCHEMA-BLACKLIST-DIMS= IPv6_Address

View file

@ -1,6 +1,19 @@
# Technical Add-on for Unix and Linux
## Version 9.2.0.4
## Version 9.2.0.5 (2025-01-11)
Add script for docker events/metrics and support running TA outside of Splunk
Changes:
* Add docker.sh and docker_metric.sh for collecting docker events/metrics
* Add helper script to extra/ to run the TA commands on systems without
a Splunk forwarder. The commands can be sent to a syslog server.
This script is useful for systems with small or read-only filesystems that
cannot support a Universal Forwarder.
* Add syslog_inputs_nix_ta app to extra/ for ingesting the data from syslog
## Version 9.2.0.4 (2025-01-11)
Make distro_name work everywhere
@ -9,7 +22,7 @@ Changes:
* For MacOS, print MacOS for distro_name
* For others, print $KERNEL for distro_name
## Version 9.2.0.3
## Version 9.2.0.3 (2025-01-11)
Fix bug in 9.2.0.2
@ -18,7 +31,7 @@ Changes:
* Add code I forgot for machine_arch for Linux
* Add Makefile to make making releases easier
## Version 9.2.0.2
## Version 9.2.0.2 (2025-01-11)
Improvements for version.sh
@ -28,7 +41,7 @@ Changes:
* For Linux and MacOS, use actual OS versions/releases instead of
kernel version/release
## Version 9.2.0.1
## Version 9.2.0.1 (2025-01-09)
Initial fork of the Splunk Add-on for Unix and Linux

159
extra/run_nix_ta_commands Executable file
View file

@ -0,0 +1,159 @@
#!/bin/bash
# This script allows getting the Techical Add-on for Unix and Linux data into
# Splunk from systems that are not running a Splunk Universal Forwarder.
# This is useful for systems with small or read-only file-systems.
#
# ## Sample rsyslog.conf
# # Config for handling remote logs
# template(name="RemoteLogs" type="string" string="/share/syslog/%FROMHOST%/%$.myprogramname%/%$.myprogramname%-%$YEAR%-%$MONTH%-%$DAY%.log")
# # Write raw messages for splunk logs
# template(name="RawMessageOnly" type="string" string="%$.mymsg%\n")
# # Look for logs with nix_ta to apply RawMessagesOnly and send to RemoteLogs
# if ($syslogtag startswith 'nix_ta_') then {
# set $.mymsg = replace($msg, "#011", " ");
# action(type="omfile" dynaFile="RemoteLogs" template="RawMessageOnly"
# fileCreateMode="0644" dirCreateMode="0755"
# fileOwner="root" fileGroup="splunk"
# dirOwner="root" dirGroup="splunk")
# stop
# }
# # End of sample rsyslog.conf
#
# To use:
# * Modify the variables below to fit your environment
# * ta_home: The directory you copied the Technical Add-on for Unix and Linux files
# * tag_prefix: The events will be sent to syslog with ${tag_prefix}SCRIPTNAME as a tag
# * syslog_server: The UDP syslog server to send events to
# * run_minute: For scripts that have intervals over an hour, which minute to run them
# * run_hour: For scripts that run once a day, which hour to run them
# * Create a cron job: * * * * * /path/to/script/run_nix_ta_commands
# Ensure the logger command is available
which logger > /dev/null 2>&1 || { echo "Error: The logger command is required for this script"; exit; }
ta_home=/srv/TA-unix
tag_prefix=nix_ta_
syslog_server=192.168.1.1
run_minute=2
run_hour=6
# Get the current minute now to be consistent through the script run
minute=$(date +%_M | tr -d ' ')
# Get the current hour now to be consistent through the script run
hour=$(date +%_H | tr -d ' ')
# Set defaults disabling force-mode and list-mode
force=0
list=0
usage() {
echo "usage: $(basename $0) [-h] [-f] [-l] [script]"
echo " -h: print this help text"
echo " -f: run all enabled scripts regardless of interval"
echo " -l: list scripts, enabled status, and interval (if enabled)"
exit
}
# Get the command line options
while getopts ":hlf" opt; do
case $opt in
f) force=1 ;;
l) list=1 ;;
*) usage ;;
esac
done
shift $((OPTIND -1))
# Function to actually run the script and pipe it to logger
runit() {
[ -z "$1" ] && return 1
if [ -x $ta_home/bin/$1.sh ]; then
{ $ta_home/bin/$1.sh 2> /dev/null; echo; } | logger -n $syslog_server -t ${tag_prefix}$(echo $1|tr '[A-Z]' '[a-z]')
else
echo Could not find $1 in $ta_home/bin
return 1
fi
}
# Check the inputs.conf to see if any of the checks are disabled
declare -A scripts
declare -A intervals
# Load defaults first
if [ -r $ta_home/default/inputs.conf ]; then
eval $(awk -F '[=#]' '
/^\[/{name=""}
/^\[script:\/\//{n=split($1,a,"/");name=gensub(/\.[a-z]+\]/,"",1,a[n]);printf "scripts[%s]=1\nintervals[%s]=60\n",name,name}
name!="" && $1~/(^|\s*)disabled(\s*|$)/ {disabled=gensub(/(^ | $)/,"","g",gensub(/true/,"1",1,gensub(/false/,"0",1,$2)));printf "scripts[%s]=%s\n",name,disabled}
name!="" && $1~/(^|\s*)interval(\s*|$)/ {interval=gensub(/(^ | $)/,"","g",$2);printf "intervals[%s]=%s\n",name,interval}
' $ta_home/default/inputs.conf)
fi
# See if any defaults are overridden in the local directory
if [ -r $ta_home/local/inputs.conf ]; then
eval $(awk -F '[=#]' '
/^\[/{name="";disabled=1;interval=60}
/^\[script:\/\//{n=split($1,a,"/");name=gensub(/\.[a-z]+\]/,"",1,a[n])}
name!="" && $1~/(^|\s*)disabled(\s*|$)/ {disabled=gensub(/(^ | $)/,"","g",gensub(/true/,"1",1,gensub(/false/,"0",1,$2)));printf "scripts[%s]=%s\n",name,disabled}
name!="" && $1~/(^|\s*)interval(\s*|$)/ {interval=gensub(/(^ | $)/,"","g",$2);printf "intervals[%s]=%s\n",name,interval}
' $ta_home/local/inputs.conf)
fi
# If -l, just print the scripts
if [ $list = 1 ]; then
for script in "${!scripts[@]}"; do
if [ "${scripts[$script]}" = "0" ]; then
echo "$script is enabled (${intervals[$script]} seconds)"
else
echo "$script is disabled"
fi
done
exit
fi
# If a script is specified on the command line, run it (even if disabled)
if [ "$1" ]; then
runit $1
exit
fi
# Without -l or -f, loop through the enabled scripts and run them at their interval
for script in "${!scripts[@]}"; do
# Only run enabled scripts
if [ "${scripts[$script]}" = "0" ]; then
i=${intervals[$script]}
[ $i -lt 60 ] && i=60
min=$((i/60))
# If -f, always run each script
if [ $force = 1 ]; then
runit $script
# If interval is 60 seconds or less, run every minute
elif [ $min -le 1 ]; then
runit $script
# If the current minute is divisible by the number of interval minutes, run
# example: 600 is 5 minutes, it'll run at 0, 5, 10, 15, ... minutes
elif [ $((minute % min)) = 0 ]; then
runit $script
# If interval is an hour or more
elif [ $min -gt 60 ]; then
hr=$((i/60/60))
# If interval is 1 hour or less, run every hour on $run_minute
if [ $hr -le 1 ] && [ $minute = $run_minute ]; then
runit $script
# If the current hour is divisible by the number of interval hours, run
# example: 21600 is 6 hours, it'll run at 0, 6, 12, 18 hours
elif [ $((hour % hr)) = 0 ] && [ $minute = $run_minute ]; then
runit $script
# If the number of hours is 24 or more, run every day at $run_hour:$run_minute
elif [ $hr -ge 24 ] && [ $hour = $run_hour ] && [ $minute = $run_minute ]; then
runit $script
fi
fi
fi
done

View file

@ -0,0 +1,4 @@
# Application-level permissions
[]
access = read : [ * ], write : [ admin , sc_admin ]
export = system