Consider the following awk script called

This prints out the statistics of column 1.

What if we wanted to find the statistics of an arbitrary column

Presto!

**stat.awk**:

**#**

# AWK SCRIPT stat_general.awk does statistics...# AWK SCRIPT stat_general.awk does statistics...

**# prints out arithmetic mean,****standard deviation,****# standard error of****column 1**

#

BEGIN { n = 0; s = 0; ss = 0;}

{#

BEGIN { n = 0; s = 0; ss = 0;}

{

**n++;****s += $1;****ss += $1 *$1;****}**

END {

print n " data points"

m = (s+0.0)/n; print m " average"

sd = sqrt( (ss - n * m * m) / ( n - 1.0))

print sd " standard deviation"

se = sd/sqrt(n)

print se " standard error"

}END {

print n " data points"

m = (s+0.0)/n; print m " average"

sd = sqrt( (ss - n * m * m) / ( n - 1.0))

print sd " standard deviation"

se = sd/sqrt(n)

print se " standard error"

}

$ awk -f stat.awk $ awk -f stat.awk

This prints out the statistics of column 1.

What if we wanted to find the statistics of an arbitrary column

**ColumnNum**(especially if you want to wrap it in a bash script for repeated operation)? It is reasonably straightforward to modify the script above to**stat_general.awk**

#

# AWK SCRIPT stat_general.awk does statistics...

#

# AWK SCRIPT stat_general.awk does statistics...

**# prints out arithmetic mean,**

**standard deviation,**

**# standard error of an arbitrary column**

**#**

BEGIN { n = 0; s = 0; ss = 0;}

{

BEGIN { n = 0; s = 0; ss = 0;}

{

**n++;**

**s += $ColumnNum;**

**ss += $ColumnNum *$ColumnNum;**

**}**

**END {**

print n " data points"

m = (s+0.0)/n; print m " average"

sd = sqrt( (ss - n * m * m) / ( n - 1.0))

print sd " standard deviation"

se = sd/sqrt(n)

print se " standard error"

}print n " data points"

m = (s+0.0)/n; print m " average"

sd = sqrt( (ss - n * m * m) / ( n - 1.0))

print sd " standard deviation"

se = sd/sqrt(n)

print se " standard error"

}

**$ x=5**

$ awk -f stat_general.awk -v ColumnNum=$x

$ awk -f stat_general.awk -v ColumnNum=$x

Presto!