[go: up one dir, main page]

File: avg.c

package info (click to toggle)
r-bioc-preprocesscore 1.36.0-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 1,108 kB
  • ctags: 463
  • sloc: ansic: 9,494; sh: 10; makefile: 2
file content (297 lines) | stat: -rw-r--r-- 10,661 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
/************************************************************************
 **
 ** avg.c
 **
 ** created by: B. M. Bolstad   <bmb@bmbolstad.com>
 ** created on: Sep 16, 2007  (but based on earlier work from Nov avg_log.c)
 **
 ** Copyright (C) 2007-2014 Ben Bolstad
 **
 ** last modified: Sept 1, 2014
 **
 ** License: LGPL V2 (same as the rest of the preprocessCore package)
 **
 ** General discussion
 **
 ** Implement average summarization
 **
 ** This file provides functions that take the mean (arithmetic average) by
 ** column of matrixes. They may also return the standard error estimate of the mean.
 ** 
 ** There are four main functions (that are exposed to outside this file):
 ** colaverage  - computes averages of each column
 ** colaverage_no_copy - computes averages of each column (does not allocate extra space, which means may change values in input matrix)
 ** ColAverage  - computes averages (and SE of average) of each column using only a subset of rows (with subset specified and identical across columns)
 ** ColAverage_noSE - computes averages of each column using only a subset of rows (with subset specified and identical across columns)
 **
 ** History
 ** Sep 16, 2007 - Initial version
 ** Sep 2014 - Change to size_t rather than int for variables indexing pointers. Improve code documentation.
 ** 
 **
 ************************************************************************/

#include <R.h> 
#include <Rdefines.h>
#include <Rmath.h>
#include <Rinternals.h>

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <stddef.h>

#include "avg.h"



/***************************************************************************
 **
 ** double Avg(double *x, size_t length)
 **
 ** double *x - a vector of PM intensities  
 ** size_t length - length of *x
 **
 ** take the average of input intensities.
 **
 ***************************************************************************/

static double Avg(double *x, size_t length){
  int i;
  double sum = 0.0;
  double mean = 0.0;

  for (i=0; i < length; i++){
    sum = sum + x[i];
  }
  
  mean = sum/(double)length;

  return (mean);    
}



/***************************************************************************
 **
 ** static double AvgSE(double *x, double mean, size_t length)
 **
 ** double *x - a vector of PM intensities 
 ** double mean - the mean of x computed using Avg above
 ** int length - length of *x
 **
 ** compute the standard error of the average of log2 PM intensities.
 ** 
 **
 ***************************************************************************/

static double AvgSE(double *x, double mean, size_t length){
  int i;
  double sum = 0.0;

  for (i=0; i < length; i++){
    sum = sum + (x[i]- mean)*(x[i] - mean);
  }
  
  sum = sqrt(sum/(double)(length -1));
  sum = sum/sqrt((double)length);

  return (sum);    
}



/***************************************************************************
 ** 
 ** void colaverage_no_copy(double *data, size_t rows, size_t cols, double *results, double *resultsSE)
 **
 ** aim: given a data matrix of probe intensities, compute averages in column wise manner. also return SE of mean 
 **      
 **
 ** double *data - Probe intensity matrix
 ** int rows - number of rows in matrix *data (probes)
 ** int cols - number of cols in matrix *data (chips)
 ** int *cur_rows - indicies of rows corresponding to current probeset
 ** double *results - already allocated location to store expression measures (cols length)
 ** int nprobes - number of probes in current probeset.
 **
 ***************************************************************************/

/*! \brief Compute the mean and SE of the mean
 * 
 *  Given a data matrix of probe intensities compute average expression measure and SE of this estimate
 *  on a column by column basis. Specifically, the arithmetic mean
 *  is computed for each column. The sample standard error is also computed. This function guarantees that 
 *  no additional memory is temporarily allocated to copy the input data matrix. However, this means that
 *  on output the input matrix may be changed.
 *    
 *
 * @param data a matrix containing data stored column-wise stored in rows*cols length of memory
 * @param rows the number of rows in the matrix 
 * @param cols the number of columns in the matrix
 * @param results pre-allocated space to store output log2 averages. Should be of length cols
 * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols
 *
 *  
 */

void colaverage_no_copy(double *data, size_t rows, size_t cols, double *results, double *resultsSE){
  int j;

  for (j = 0; j < cols; j++){
    results[j] = Avg(&data[j*rows],rows);
    resultsSE[j] = AvgSE(&data[j*rows],results[j],rows);
  } 
}



/***************************************************************************
 ** 
 ** void colaverage(double *data, size_t rows, size_t cols, double *results, double *resultsSE)
 **
 ** aim: given a data matrix of probe intensities, compute averages in column wise manner 
 **      
 **
 ** double *data - Probe intensity matrix
 ** int rows - number of rows in matrix *data (probes)
 ** int cols - number of cols in matrix *data (chips)
 ** int *cur_rows - indicies of rows corresponding to current probeset
 ** double *results - already allocated location to store expression measures (cols length)
 ** int nprobes - number of probes in current probeset.
 **
 ***************************************************************************/

/*! \brief Compute the mean and SE of the mean
 * 
 *  Given a data matrix of probe intensities compute average expression measure and SE of this estimate
 *  on a column by column basis. Specifically, the arithmetic mean
 *  is computed for each column. The sample standard error is also computed. On output the data matrix will
 *  be unchanged.
 *    
 *
 * @param data a matrix containing data stored column-wise stored in rows*cols length of memory
 * @param rows the number of rows in the matrix 
 * @param cols the number of columns in the matrix
 * @param results pre-allocated space to store output averages. Should be of length cols
 * @param resultsSE pre-allocated space to store SE of averages. Should be of length cols
 *
 *  
 */

void colaverage(double *data, size_t rows, size_t cols, double *results, double *resultsSE){
  int i,j;
  double *z = Calloc(rows,double);

  for (j = 0; j < cols; j++){
    for (i =0; i < rows; i++){
      z[i] = data[j*rows + i];  
    }
    results[j] = Avg(z,rows);
    resultsSE[j] = AvgSE(z,results[j],rows);
  } 
  Free(z);

}



/***************************************************************************
 **
 ** double ColAverage(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE)
 **
 ** aim: given a data matrix of probe intensities, and a list of rows in the matrix 
 **      corresponding to a single probeset, compute average expression measure. 
 **      Note that data is a probes by chips matrix.
 **
 ** double *data - Probe intensity matrix
 ** int rows - number of rows in matrix *data (probes)
 ** int cols - number of cols in matrix *data (chips)
 ** int *cur_rows - indicies of rows corresponding to current probeset
 ** double *results - already allocated location to store expression measures (cols length)
 ** int nprobes - number of probes in current probeset. 
 ** double *resultsSE - already allocated location to store expression measures SE (cols length)
 **
 ***************************************************************************/

/*! \brief Given a data matrix of probe intensities, and a list of rows in the matrix 
 *      corresponding to a single probeset, compute average expression measure. 
 *      Note that data is a probes by chips matrix. Also compute SE estimates
 *
 * @param data a matrix containing data stored column-wise stored in rows*cols length of memory
 * @param rows the number of rows in the matrix 
 * @param cols the number of columns in the matrix
 * @param cur_rows a vector containing row indices to use
 * @param results pre-allocated space to store output averages. Should be of length cols
 * @param nprobes number of probes in current set
 * @param resultsSE pre-allocated space to store SE of averages. Should be of length cols
 *
 *  
 */

void ColAverage(double *data, size_t rows, size_t cols, int *cur_rows, double *results, size_t nprobes, double *resultsSE){
  int i,j;
  double *z = Calloc(nprobes*cols,double);

  for (j = 0; j < cols; j++){
    for (i =0; i < nprobes; i++){
      z[j*nprobes + i] = data[j*rows + cur_rows[i]];  
    }
  } 
  
  for (j=0; j < cols; j++){
    results[j] = Avg(&z[j*nprobes],nprobes);
    resultsSE[j] = AvgSE(&z[j*nprobes],results[j],nprobes);
  }

  Free(z);
}



/***************************************************************************
 **
 ** void ColAverage_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes)
 **
 ** aim: given a data matrix of probe intensities, and a list of rows in the matrix 
 **      corresponding to a single probeset, compute average log2 expression measure. 
 **      Note that data is a probes by chips matrix.
 **
 ** double *data - Probe intensity matrix
 ** int rows - number of rows in matrix *data (probes)
 ** int cols - number of cols in matrix *data (chips)
 ** int *cur_rows - indicies of rows corresponding to current probeset
 ** double *results - already allocated location to store expression measures (cols length)
 ** int nprobes - number of probes in current probeset.
 **
 ***************************************************************************/

/*! \brief Given a data matrix of probe intensities, and a list of rows in the matrix 
 *      corresponding to a single probeset, compute average expression measure. 
 *      Note that data is a probes by chips matrix. 
 *
 * @param data a matrix containing data stored column-wise stored in rows*cols length of memory
 * @param rows the number of rows in the matrix 
 * @param cols the number of columns in the matrix
 * @param cur_rows a vector containing row indices to use
 * @param results pre-allocated space to store output averages. Should be of length cols
 * @param nprobes number of probes in current set
 *
 *  
 */

void ColAverage_noSE(double *data, size_t rows, size_t cols, int *cur_rows, double *results, size_t nprobes){
  int i,j;
  double *z = Calloc(nprobes*cols,double);

  for (j = 0; j < cols; j++){
    for (i =0; i < nprobes; i++){
      z[j*nprobes + i] = data[j*rows + cur_rows[i]];  
    }
  } 
  
  for (j=0; j < cols; j++){
    results[j] = Avg(&z[j*nprobes],nprobes);
  }
  Free(z);
}