aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMitch Riedstra <mitch@riedstra.us>2019-03-09 10:33:42 -0500
committerMitch Riedstra <mitch@riedstra.us>2019-03-09 10:33:42 -0500
commit7c4e0a7c598d823f00fae48b139eb16abef70575 (patch)
treeaff710829b9371e3d5e2730529d3cfdc8497dda8
parentcc9fb4ed26c47ff2b195b3a986b4beee7d0998e8 (diff)
downloadstats-dev.tar.gz
stats-dev.tar.xz
Complete re-writedev
-rw-r--r--print.go21
-rw-r--r--stats.go93
2 files changed, 98 insertions, 16 deletions
diff --git a/print.go b/print.go
new file mode 100644
index 0000000..2a39c71
--- /dev/null
+++ b/print.go
@@ -0,0 +1,21 @@
+package stats
+
+import (
+ "fmt"
+)
+
+// Prints each calculated statistic on a line with one tab preceding it
+func (s *Stats) PrettyStats() string {
+ return fmt.Sprintf(`
+ Mean: %.2f
+ Variance: %.2f
+ Standard Deviation: %.2f
+ Skewness: %.2f
+ Kurtosis: %.2f
+`, s.Mean(), s.Variance(), s.StandardDeviation(), s.Skewness(), s.Kurtosis())
+}
+
+// Returns a string containing all of the data within the struct
+func (s *Stats) Internals() string {
+ return fmt.Sprintf("%#v", s)
+}
diff --git a/stats.go b/stats.go
index bc5be7c..ffe94eb 100644
--- a/stats.go
+++ b/stats.go
@@ -1,29 +1,90 @@
// Provides some basic statisitcs functionality. Specifically tailored around
-// dealing with large streams of data at the moment.
+// dealing with large streams of data
+// It's a Go implimentation of this:
+//
+// https://www.johndcook.com/blog/skewness_kurtosis/
+//
+// Example program:
+// package main
+//
+// import (
+// "git.riedstra.us/go/stats"
+//
+// "fmt"
+// )
+//
+// func main() {
+// myFakeStreamOfData := []float64{01, 04, 60, 55, 80, 06, 75, 51, 63, 10}
+// myStats := &stats.Stats{}
+// for _, entry := range myFakeStreamOfData {
+// myStats.Push(entry)
+// }
+// fmt.Println(myStats.PrettyStats())
+// }
+// Example output:
+//
+// Mean: 40.50
+// Variance: 996.72
+// Standard Deviation: 31.57
+// Skewness: -0.20
+// Kurtosis: -1.66
+//
+//
package stats
import (
"math"
)
-// Stores the average, standard deviation, and the current number of entities
-// processed
+// Structure used to hold the information about the dataset.
+// several methods are exposed in order to make your life easy
type Stats struct {
- Mean float64
- Variance float64
- N float64
+ n, m1, m2, m3, m4 float64
}
-// Returns the square root of 1/2 the variance, or Standard Deviation from the
-// mean
-func (s *Stats) Stdev() float64 {
- return math.Sqrt(s.Variance / 2)
+// Zeros out the struct for re-use
+func (s *Stats) Clear() {
+ s.n = 0
+ s.m1, s.m2, s.m3, s.m4 = 0, 0, 0, 0
}
-// Adds an entry to our struct, taking care of adjusting the Mean and Variance
-func (s *Stats) AddEntry(e float64) {
- m := s.Mean
- s.Mean += (e - m) / s.N
- s.Variance += (e - m) * (e - s.Mean)
- s.N++
+// This function is used to push numbers onto the struct and calculate on the
+// fly the necessary information to output relevant statistics
+func (s *Stats) Push(x float64) {
+ var delta, delta_n, delta_n2, term1, n1 float64
+
+ n1 = s.n
+ s.n++
+ delta = x - s.m1
+ delta_n = delta / s.n
+ delta_n2 = delta_n * delta_n
+ term1 = delta * delta_n * n1
+ s.m1 += delta_n
+ s.m4 += term1*delta_n2*(s.n*s.n-3*s.n+3) + 6*delta_n2*s.m2 - 4*delta_n*s.m3
+ s.m3 += term1*delta_n*(s.n-2) - 3*delta_n*s.m2
+ s.m2 += term1
+}
+
+func (s *Stats) NumValues() float64 {
+ return s.n
+}
+
+func (s *Stats) Mean() float64 {
+ return s.m1
+}
+
+func (s *Stats) Variance() float64 {
+ return s.m2 / (s.n - 1)
+}
+
+func (s *Stats) StandardDeviation() float64 {
+ return math.Sqrt(s.Variance())
+}
+
+func (s *Stats) Skewness() float64 {
+ return math.Sqrt(s.n) * s.m3 / math.Pow(s.m2, 1.5)
+}
+
+func (s *Stats) Kurtosis() float64 {
+ return s.n*s.m4/(s.m2*s.m2) - 3
}