Consider the following awk script called stat.awk:
#
# AWK SCRIPT stat_general.awk does statistics...
# prints out arithmetic mean, standard deviation,
# standard error of column 1
#
BEGIN { n = 0; s = 0; ss = 0;}
{
n++;
s += $1;
ss += $1 *$1;
}
END {
print n " data points"
m = (s+0.0)/n; print m " average"
sd = sqrt( (ss - n * m * m) / ( n - 1.0))
print sd " standard deviation"
se = sd/sqrt(n)
print se " standard error"
}
$ awk -f stat.awk
This prints out the statistics of column 1.
What if we wanted to find the statistics of an arbitrary column ColumnNum (especially if you want to wrap it in a bash script for repeated operation)? It is reasonably straightforward to modify the script above to stat_general.awk
#
# AWK SCRIPT stat_general.awk does statistics...
END {
print n " data points"
m = (s+0.0)/n; print m " average"
sd = sqrt( (ss - n * m * m) / ( n - 1.0))
print sd " standard deviation"
se = sd/sqrt(n)
print se " standard error"
}
Presto!
#
# AWK SCRIPT stat_general.awk does statistics...
# prints out arithmetic mean, standard deviation,
# standard error of column 1
#
BEGIN { n = 0; s = 0; ss = 0;}
{
n++;
s += $1;
ss += $1 *$1;
}
END {
print n " data points"
m = (s+0.0)/n; print m " average"
sd = sqrt( (ss - n * m * m) / ( n - 1.0))
print sd " standard deviation"
se = sd/sqrt(n)
print se " standard error"
}
$ awk -f stat.awk
This prints out the statistics of column 1.
What if we wanted to find the statistics of an arbitrary column ColumnNum (especially if you want to wrap it in a bash script for repeated operation)? It is reasonably straightforward to modify the script above to stat_general.awk
#
# AWK SCRIPT stat_general.awk does statistics...
# prints out arithmetic mean, standard deviation,
# standard error of an arbitrary column
# standard error of an arbitrary column
#
BEGIN { n = 0; s = 0; ss = 0;}
{
BEGIN { n = 0; s = 0; ss = 0;}
{
n++;
s += $ColumnNum;
ss += $ColumnNum *$ColumnNum;
}
END {
print n " data points"
m = (s+0.0)/n; print m " average"
sd = sqrt( (ss - n * m * m) / ( n - 1.0))
print sd " standard deviation"
se = sd/sqrt(n)
print se " standard error"
}
$ x=5
$ awk -f stat_general.awk -v ColumnNum=$x
$ awk -f stat_general.awk -v ColumnNum=$x
Presto!
No comments:
Post a Comment