| 1 | package de.uka.ipd.sdq.statistics.independence; |
| 2 | |
| 3 | import java.util.Collection; |
| 4 | |
| 5 | import org.apache.log4j.Logger; |
| 6 | |
| 7 | import de.uka.ipd.sdq.probfunction.math.IChiSquareDistribution; |
| 8 | import de.uka.ipd.sdq.probfunction.math.IContinousPDFFactory; |
| 9 | import de.uka.ipd.sdq.probfunction.math.apache.impl.PDFFactory; |
| 10 | |
| 11 | /** |
| 12 | * Implements the "run test" algorithm which tests a data sequence for |
| 13 | * independence. |
| 14 | * |
| 15 | * Confer [Donald E. Knuth: The Art of Computer Programming. Seminumerical |
| 16 | * Algorithms] |
| 17 | * |
| 18 | * @author Philipp Merkle |
| 19 | * |
| 20 | */ |
| 21 | public class RunUpTest implements IIndependenceTest { |
| 22 | |
| 23 | private static final Logger logger = Logger.getLogger(RunUpTest.class); |
| 24 | |
| 25 | private static final int LOWER_SAMPLE_LIMIT = 4000; |
| 26 | |
| 27 | private static final int CHI_SQUARE_DOF = 6; |
| 28 | |
| 29 | private static final double CHI_SQUARE_LOWER_QUANTILE = 0.01; |
| 30 | |
| 31 | private static final double CHI_SQUARE_UPPER_QUANTILE = 0.99; |
| 32 | |
| 33 | private static final double[][] A = { |
| 34 | { 4529.4, 9044.9, 13568, 18091, 22615, 27892 }, |
| 35 | { 9044.9, 18097, 27139, 36187, 45234, 55789 }, |
| 36 | { 13568, 27139, 40721, 54281, 67852, 83685 }, |
| 37 | { 18091, 36187, 54281, 72414, 90470, 111580 }, |
| 38 | { 22615, 45234, 67852, 90470, 113262, 139476 }, |
| 39 | { 27892, 55789, 83685, 111580, 139476, 172860 } }; |
| 40 | |
| 41 | private static final double[] B = { 1.0 / 6, 5.0 / 24, 11.0 / 120, |
| 42 | 19.0 / 720, 29.0 / 5040, 1.0 / 840 }; |
| 43 | |
| 44 | private IContinousPDFFactory pdfFactory; |
| 45 | |
| 46 | public RunUpTest() { |
| 47 | // use apache math factory as default |
| 48 | this(new PDFFactory()); |
| 49 | } |
| 50 | |
| 51 | public RunUpTest(IContinousPDFFactory pdfFactory) { |
| 52 | assert pdfFactory != null : "The passed PDF factory may not be null."; |
| 53 | this.pdfFactory = pdfFactory; |
| 54 | } |
| 55 | |
| 56 | @Override |
| 57 | public boolean testIndependence(Collection<Double> samples) { |
| 58 | int[] runCounts = calculateRunCounts(samples); |
| 59 | int n = samples.size(); |
| 60 | |
| 61 | // calculate a statistics denoted by V. According to Knuth, V should |
| 62 | // have the chi-square distribution with six degrees of freedom. |
| 63 | double sum = 0.0; |
| 64 | for (int i = 0; i < 6; i++) { |
| 65 | for (int j = 0; j < 6; j++) { |
| 66 | sum += (runCounts[i] - n * B[i]) * (runCounts[j] - n * B[j]) |
| 67 | * A[i][j]; |
| 68 | } |
| 69 | } |
| 70 | double V = 1.0 / (n - 6) * sum; |
| 71 | |
| 72 | // test for chi-square distribution |
| 73 | IChiSquareDistribution dist = pdfFactory.createChiSquareDistribution(CHI_SQUARE_DOF); |
| 74 | double upperQuantile = dist.inverseF(CHI_SQUARE_UPPER_QUANTILE); |
| 75 | double lowerQuantile = dist.inverseF(CHI_SQUARE_LOWER_QUANTILE); |
| 76 | |
| 77 | logger.info("Run Up Indepence test: F=" + V); |
| 78 | |
| 79 | return V >= lowerQuantile && V <= upperQuantile; |
| 80 | } |
| 81 | |
| 82 | @Override |
| 83 | public int getLowerSampleLimit() { |
| 84 | return LOWER_SAMPLE_LIMIT; |
| 85 | } |
| 86 | |
| 87 | private int[] calculateRunCounts(Collection<Double> sequence) { |
| 88 | int[] runCounts = new int[6]; |
| 89 | |
| 90 | int length = 0; |
| 91 | double highest = Double.MIN_VALUE; |
| 92 | for (Double d : sequence) { |
| 93 | if (d > highest) { |
| 94 | length++; |
| 95 | } else { |
| 96 | if (length > 6) |
| 97 | length = 6; |
| 98 | runCounts[length - 1]++; |
| 99 | length = 1; |
| 100 | } |
| 101 | highest = d; |
| 102 | } |
| 103 | |
| 104 | return runCounts; |
| 105 | } |
| 106 | |
| 107 | } |