1 | package de.uka.ipd.sdq.statistics.independence; |
2 | |
3 | import java.util.Collection; |
4 | |
5 | import org.apache.log4j.Logger; |
6 | |
7 | import de.uka.ipd.sdq.probfunction.math.IChiSquareDistribution; |
8 | import de.uka.ipd.sdq.probfunction.math.IContinousPDFFactory; |
9 | import de.uka.ipd.sdq.probfunction.math.apache.impl.PDFFactory; |
10 | |
11 | /** |
12 | * Implements the "run test" algorithm which tests a data sequence for |
13 | * independence. |
14 | * |
15 | * Confer [Donald E. Knuth: The Art of Computer Programming. Seminumerical |
16 | * Algorithms] |
17 | * |
18 | * @author Philipp Merkle |
19 | * |
20 | */ |
21 | public class RunUpTest implements IIndependenceTest { |
22 | |
23 | private static final Logger logger = Logger.getLogger(RunUpTest.class); |
24 | |
25 | private static final int LOWER_SAMPLE_LIMIT = 4000; |
26 | |
27 | private static final int CHI_SQUARE_DOF = 6; |
28 | |
29 | private static final double CHI_SQUARE_LOWER_QUANTILE = 0.01; |
30 | |
31 | private static final double CHI_SQUARE_UPPER_QUANTILE = 0.99; |
32 | |
33 | private static final double[][] A = { |
34 | { 4529.4, 9044.9, 13568, 18091, 22615, 27892 }, |
35 | { 9044.9, 18097, 27139, 36187, 45234, 55789 }, |
36 | { 13568, 27139, 40721, 54281, 67852, 83685 }, |
37 | { 18091, 36187, 54281, 72414, 90470, 111580 }, |
38 | { 22615, 45234, 67852, 90470, 113262, 139476 }, |
39 | { 27892, 55789, 83685, 111580, 139476, 172860 } }; |
40 | |
41 | private static final double[] B = { 1.0 / 6, 5.0 / 24, 11.0 / 120, |
42 | 19.0 / 720, 29.0 / 5040, 1.0 / 840 }; |
43 | |
44 | private IContinousPDFFactory pdfFactory; |
45 | |
46 | public RunUpTest() { |
47 | // use apache math factory as default |
48 | this(new PDFFactory()); |
49 | } |
50 | |
51 | public RunUpTest(IContinousPDFFactory pdfFactory) { |
52 | assert pdfFactory != null : "The passed PDF factory may not be null."; |
53 | this.pdfFactory = pdfFactory; |
54 | } |
55 | |
56 | @Override |
57 | public boolean testIndependence(Collection<Double> samples) { |
58 | int[] runCounts = calculateRunCounts(samples); |
59 | int n = samples.size(); |
60 | |
61 | // calculate a statistics denoted by V. According to Knuth, V should |
62 | // have the chi-square distribution with six degrees of freedom. |
63 | double sum = 0.0; |
64 | for (int i = 0; i < 6; i++) { |
65 | for (int j = 0; j < 6; j++) { |
66 | sum += (runCounts[i] - n * B[i]) * (runCounts[j] - n * B[j]) |
67 | * A[i][j]; |
68 | } |
69 | } |
70 | double V = 1.0 / (n - 6) * sum; |
71 | |
72 | // test for chi-square distribution |
73 | IChiSquareDistribution dist = pdfFactory.createChiSquareDistribution(CHI_SQUARE_DOF); |
74 | double upperQuantile = dist.inverseF(CHI_SQUARE_UPPER_QUANTILE); |
75 | double lowerQuantile = dist.inverseF(CHI_SQUARE_LOWER_QUANTILE); |
76 | |
77 | logger.info("Run Up Indepence test: F=" + V); |
78 | |
79 | return V >= lowerQuantile && V <= upperQuantile; |
80 | } |
81 | |
82 | @Override |
83 | public int getLowerSampleLimit() { |
84 | return LOWER_SAMPLE_LIMIT; |
85 | } |
86 | |
87 | private int[] calculateRunCounts(Collection<Double> sequence) { |
88 | int[] runCounts = new int[6]; |
89 | |
90 | int length = 0; |
91 | double highest = Double.MIN_VALUE; |
92 | for (Double d : sequence) { |
93 | if (d > highest) { |
94 | length++; |
95 | } else { |
96 | if (length > 6) |
97 | length = 6; |
98 | runCounts[length - 1]++; |
99 | length = 1; |
100 | } |
101 | highest = d; |
102 | } |
103 | |
104 | return runCounts; |
105 | } |
106 | |
107 | } |