aboutsummaryrefslogtreecommitdiff
path: root/stats.go
blob: 534d24edab177cc6ae8d52c5dd39e60bc05eaee5 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
// Provides some basic statisitcs functionality. Specifically tailored around
// dealing with large streams of data
// It's a Go implimentation of this:
//
// https://www.johndcook.com/blog/skewness_kurtosis/
//
// Example program:
//	package main
//
//	import (
//		"git.riedstra.us/go/stats"
//
//		"fmt"
//	)
//
//	func main() {
//		myFakeStreamOfData := []float64{01, 04, 60, 55, 80, 06, 75, 51, 63, 10}
//		myStats := &stats.Stats{}
//		for _, entry := range myFakeStreamOfData {
//			myStats.Push(entry)
//		}
//		fmt.Println(myStats.PrettyStats())
//	}
// Example output:
//
//		Mean: 40.50
//		Variance: 996.72
//		Standard Deviation: 31.57
//		Skewness: -0.20
//		Kurtosis: -1.66
//
//
package stats

import (
	"math"
)

// Structure used to hold the information about the dataset.
// several methods are exposed in order to make your life easy
type Stats struct {
	n, m1, m2, m3, m4 float64
	max, min          *float64
}

// Zeros out the  struct for re-use
func (s *Stats) Clear() {
	s.n = 0
	s.m1, s.m2, s.m3, s.m4 = 0, 0, 0, 0
}

// This function is used to push numbers onto the struct and calculate on the
// fly the necessary information to output relevant statistics
func (s *Stats) Push(x float64) {
	var delta, delta_n, delta_n2, term1, n1 float64

	if s.max == nil || x >= *s.max {
		s.max = &x
	}
	if s.min == nil || x <= *s.min {
		s.min = &x
	}

	n1 = s.n
	s.n++
	delta = x - s.m1
	delta_n = delta / s.n
	delta_n2 = delta_n * delta_n
	term1 = delta * delta_n * n1
	s.m1 += delta_n
	s.m4 += term1*delta_n2*(s.n*s.n-3*s.n+3) + 6*delta_n2*s.m2 - 4*delta_n*s.m3
	s.m3 += term1*delta_n*(s.n-2) - 3*delta_n*s.m2
	s.m2 += term1
}

func (s *Stats) NumValues() float64 {
	return s.n
}

func (s *Stats) Mean() float64 {
	return s.m1
}

func (s *Stats) Variance() float64 {
	return s.m2 / (s.n - 1)
}

func (s *Stats) StandardDeviation() float64 {
	return math.Sqrt(s.Variance())
}

func (s *Stats) Skewness() float64 {
	return math.Sqrt(s.n) * s.m3 / math.Pow(s.m2, 1.5)
}

func (s *Stats) Kurtosis() float64 {
	return s.n*s.m4/(s.m2*s.m2) - 3
}

func (s *Stats) Min() float64 {
	return *s.min
}

func (s *Stats) Max() float64 {
	return *s.max
}