[go: up one dir, main page]

File: Ewens.cpp

package info (click to toggle)
freebayes 1.3.9-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 6,984 kB
  • sloc: cpp: 125,778; ansic: 4,581; sh: 1,084; python: 672; asm: 271; javascript: 94; lisp: 85; makefile: 37; perl: 27
file content (51 lines) | stat: -rw-r--r-- 1,738 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#include "Ewens.h"


long double alleleFrequencyProbability(const map<int, int>& alleleFrequencyCounts, long double theta) {

    int M = 0;
    long double p = 1;

    for (map<int, int>::const_iterator f = alleleFrequencyCounts.begin(); f != alleleFrequencyCounts.end(); ++f) {
        int frequency = f->first;
        int count = f->second;
        M += frequency * count;
        p *= (double) pow((double) theta, (double) count) / ((double) pow((double) frequency, (double) count) * factorial(count));
    }

    long double thetaH = 1;
    for (int h = 1; h < M; ++h)
        thetaH *= theta + h;

    return factorial(M) / (theta * thetaH) * p;

}

AlleleFrequencyProbabilityCache alleleFrequencyProbabilityCache;

long double alleleFrequencyProbabilityln(const map<int, int>& alleleFrequencyCounts, long double theta) {
    return alleleFrequencyProbabilityCache.alleleFrequencyProbabilityln(alleleFrequencyCounts, theta);
}

// Implements Ewens' Sampling Formula, which provides probability of a given
// partition of alleles in a sample from a population
long double impl_alleleFrequencyProbabilityln(const map<int, int>& alleleFrequencyCounts, long double theta) {

    int M = 0; // multiplicity of site
    long double p = 0;
    long double thetaln = log(theta);

    for (map<int, int>::const_iterator f = alleleFrequencyCounts.begin(); f != alleleFrequencyCounts.end(); ++f) {
        int frequency = f->first;
        int count = f->second;
        M += frequency * count;
        p += powln(thetaln, count) - (powln(log(frequency), count) + factorialln(count));
    }

    long double thetaH = 0;
    for (int h = 1; h < M; ++h)
        thetaH += log(theta + h);

    return factorialln(M) - (thetaln + thetaH) + p;

}