1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
|
/************************************************************************
**
** avg.c
**
** created by: B. M. Bolstad <bmb@bmbolstad.com>
** created on: Sep 16, 2007 (but based on earlier work from Nov avg_log.c)
**
** Copyright (C) 2007-2014 Ben Bolstad
**
** last modified: Sept 1, 2014
**
** License: LGPL V2 (same as the rest of the preprocessCore package)
**
** General discussion
**
** Implement average summarization
**
** This file provides functions that take the mean (arithmetic average) by
** column of matrixes. They may also return the standard error estimate of the mean.
**
** There are four main functions (that are exposed to outside this file):
** colaverage - computes averages of each column
** colaverage_no_copy - computes averages of each column (does not allocate extra space, which means may change values in input matrix)
** ColAverage - computes averages (and SE of average) of each column using only a subset of rows (with subset specified and identical across columns)
** ColAverage_noSE - computes averages of each column using only a subset of rows (with subset specified and identical across columns)
**
** History
** Sep 16, 2007 - Initial version
** Sep 2014 - Change to size_t rather than int for variables indexing pointers. Improve code documentation.
**
**
************************************************************************/
#include <R.h>
#include <Rdefines.h>
#include <Rmath.h>
#include <Rinternals.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <stddef.h>
#include "avg.h"
/***************************************************************************
**
** double Avg(double *x, size_t length)
**
** double *x - a vector of PM intensities
** size_t length - length of *x
**
** take the average of input intensities.
**
***************************************************************************/
static double Avg(double *x, size_t length){
int i;
double sum = 0.0;
double mean = 0.0;
for (i=0; i < length; i++){
sum = sum + x[i];
}
mean = sum/(double)length;
return (mean);
}
/***************************************************************************
**
** static double AvgSE(double *x, double mean, size_t length)
**
** double *x - a vector of PM intensities
** double mean - the mean of x computed using Avg above
** int length - length of *x
**
** compute the standard error of the average of log2 PM intensities.
**
**
***************************************************************************/
static double AvgSE(double *x, double mean, size_t length){
int i;
double sum = 0.0;
for (i=0; i < length; i++){
sum = sum + (x[i]- mean)*(x[i] - mean);
}
sum = sqrt(sum/(double)(length -1));
sum = sum/sqrt((double)length);
return (sum);
}
/***************************************************************************
**
** void colaverage_no_copy(double *data, size_t rows, size_t cols, double *results, double *resultsSE)
**
** aim: given a data matrix of probe intensities, compute averages in column wise manner. also return SE of mean
**
**
** double *data - Probe intensity matrix
** int rows - number of rows in matrix *data (probes)
** int cols - number of cols in matrix *data (chips)
** int *cur_rows - indicies of rows corresponding to current probeset
** double *results - already allocated location to store expression measures (cols length)
** int nprobes - number of probes in current probeset.
**
***************************************************************************/
/*! \brief Compute the mean and SE of the mean
*
* Given a data matrix of probe intensities compute average expression measure and SE of this estimate
* on a column by column basis. Specifically, the arithmetic mean
* is computed for each column. The sample standard error is also computed. This function guarantees that
* no additional memory is temporarily allocated to copy the input data matrix. However, this means that
* on output the input matrix may be changed.
*
*
* @param data a matrix containing data stored column-wise stored in rows*cols length of memory
* @param rows the number of rows in the matrix
* @param cols the number of columns in the matrix
* @param results pre-allocated space to store output log2 averages. Should be of length cols
* @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols
*
*
*/
void colaverage_no_copy(double *data, size_t rows, size_t cols, double *results, double *resultsSE){
int j;
for (j = 0; j < cols; j++){
results[j] = Avg(&data[j*rows],rows);
resultsSE[j] = AvgSE(&data[j*rows],results[j],rows);
}
}
/***************************************************************************
**
** void colaverage(double *data, size_t rows, size_t cols, double *results, double *resultsSE)
**
** aim: given a data matrix of probe intensities, compute averages in column wise manner
**
**
** double *data - Probe intensity matrix
** int rows - number of rows in matrix *data (probes)
** int cols - number of cols in matrix *data (chips)
** int *cur_rows - indicies of rows corresponding to current probeset
** double *results - already allocated location to store expression measures (cols length)
** int nprobes - number of probes in current probeset.
**
***************************************************************************/
/*! \brief Compute the mean and SE of the mean
*
* Given a data matrix of probe intensities compute average expression measure and SE of this estimate
* on a column by column basis. Specifically, the arithmetic mean
* is computed for each column. The sample standard error is also computed. On output the data matrix will
* be unchanged.
*
*
* @param data a matrix containing data stored column-wise stored in rows*cols length of memory
* @param rows the number of rows in the matrix
* @param cols the number of columns in the matrix
* @param results pre-allocated space to store output averages. Should be of length cols
* @param resultsSE pre-allocated space to store SE of averages. Should be of length cols
*
*
*/
void colaverage(double *data, size_t rows, size_t cols, double *results, double *resultsSE){
int i,j;
double *z = Calloc(rows,double);
for (j = 0; j < cols; j++){
for (i =0; i < rows; i++){
z[i] = data[j*rows + i];
}
results[j] = Avg(z,rows);
resultsSE[j] = AvgSE(z,results[j],rows);
}
Free(z);
}
/***************************************************************************
**
** double ColAverage(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE)
**
** aim: given a data matrix of probe intensities, and a list of rows in the matrix
** corresponding to a single probeset, compute average expression measure.
** Note that data is a probes by chips matrix.
**
** double *data - Probe intensity matrix
** int rows - number of rows in matrix *data (probes)
** int cols - number of cols in matrix *data (chips)
** int *cur_rows - indicies of rows corresponding to current probeset
** double *results - already allocated location to store expression measures (cols length)
** int nprobes - number of probes in current probeset.
** double *resultsSE - already allocated location to store expression measures SE (cols length)
**
***************************************************************************/
/*! \brief Given a data matrix of probe intensities, and a list of rows in the matrix
* corresponding to a single probeset, compute average expression measure.
* Note that data is a probes by chips matrix. Also compute SE estimates
*
* @param data a matrix containing data stored column-wise stored in rows*cols length of memory
* @param rows the number of rows in the matrix
* @param cols the number of columns in the matrix
* @param cur_rows a vector containing row indices to use
* @param results pre-allocated space to store output averages. Should be of length cols
* @param nprobes number of probes in current set
* @param resultsSE pre-allocated space to store SE of averages. Should be of length cols
*
*
*/
void ColAverage(double *data, size_t rows, size_t cols, int *cur_rows, double *results, size_t nprobes, double *resultsSE){
int i,j;
double *z = Calloc(nprobes*cols,double);
for (j = 0; j < cols; j++){
for (i =0; i < nprobes; i++){
z[j*nprobes + i] = data[j*rows + cur_rows[i]];
}
}
for (j=0; j < cols; j++){
results[j] = Avg(&z[j*nprobes],nprobes);
resultsSE[j] = AvgSE(&z[j*nprobes],results[j],nprobes);
}
Free(z);
}
/***************************************************************************
**
** void ColAverage_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes)
**
** aim: given a data matrix of probe intensities, and a list of rows in the matrix
** corresponding to a single probeset, compute average log2 expression measure.
** Note that data is a probes by chips matrix.
**
** double *data - Probe intensity matrix
** int rows - number of rows in matrix *data (probes)
** int cols - number of cols in matrix *data (chips)
** int *cur_rows - indicies of rows corresponding to current probeset
** double *results - already allocated location to store expression measures (cols length)
** int nprobes - number of probes in current probeset.
**
***************************************************************************/
/*! \brief Given a data matrix of probe intensities, and a list of rows in the matrix
* corresponding to a single probeset, compute average expression measure.
* Note that data is a probes by chips matrix.
*
* @param data a matrix containing data stored column-wise stored in rows*cols length of memory
* @param rows the number of rows in the matrix
* @param cols the number of columns in the matrix
* @param cur_rows a vector containing row indices to use
* @param results pre-allocated space to store output averages. Should be of length cols
* @param nprobes number of probes in current set
*
*
*/
void ColAverage_noSE(double *data, size_t rows, size_t cols, int *cur_rows, double *results, size_t nprobes){
int i,j;
double *z = Calloc(nprobes*cols,double);
for (j = 0; j < cols; j++){
for (i =0; i < nprobes; i++){
z[j*nprobes + i] = data[j*rows + cur_rows[i]];
}
}
for (j=0; j < cols; j++){
results[j] = Avg(&z[j*nprobes],nprobes);
}
Free(z);
}
|