[go: up one dir, main page]

File: lm.c

package info (click to toggle)
r-bioc-preprocesscore 1.36.0-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 1,108 kB
  • ctags: 463
  • sloc: ansic: 9,494; sh: 10; makefile: 2
file content (236 lines) | stat: -rw-r--r-- 7,128 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
/*********************************************************************
 **
 ** file: lm.c
 **
 ** Aim: implement linear models.
 **
 ** Copyright (C) 2003 Ben Bolstad
 **
 ** created by: B. M. Bolstad <bolstad@stat.berkeley.edu>
 ** 
 ** created on: Jun 05, 2003
 **
 ** Last modified: Jun 05, 2003
 **
 ** The aim will be to provide a functions for fitting linear models.
 **
 **
 ** History
 **
 ** Jun 05, 2003 - Moved lm_wfit from rlm.c to this file lm.c
 **                modify lm_wfit to handle cases where weight is 0.
 **                otherwise bad things will happen when using a psi_fn that 
 **                gives weights of 0.
 ** Jul 27, 2003 - better handling of cases where a parameter
 **                is not estimable (set it to NA)
 ** Sep 14, 2003 - fix a bug where k was where a j should be in a for loop
 ** Mar 1, 2006 - change all comments to ansi style
 ** May 19, 2007 - branch out of affyPLM into a new package preprocessCore, then restructure the code. Add doxygen style documentation
 **
 ********************************************************************/

#include <R.h>
#include <Rdefines.h>
#include <Rmath.h>
#include <Rinternals.h>
#include "dqrls.h"
#include "lm.h"

/*************************************************************************
 **
 ** void lm_wfit(double *x, double *y, double *w, int rows, int cols, double tol, double *outbeta, double *outresid)
 ** 
 ** double *x - coefficient matrix: dimension rows*cols
 ** double *y - dependent variable: length rows
 ** double *w - weights for each observation: length rows
 ** int rows, cols - dimension of input
 ** double tol - machine tolerance used in qr decomp
 ** double *outbeta - place to output beta estimates: lenght cols
 ** double *outresid - place to output residuals: length rows
 **
 ** This function computes weighted linear regression estimates using QR decomposition
 ** Note that a linpack routine is used to do the actual fit.
 **
 ** For now we will assume that singularities will not exist when doing QR, but will fix later
 ** if becomes problematic.  NB a printf comment is returned when matrix is of full rank
 ** but still no proper checking.
 **
 *************************************************************************/

void lm_wfit(double *x, double *y, double *w, int rows, int cols, double tol, double *out_beta, double *out_resids){
  int i,j;
  int ny = 1;
  int k;
  int numzero_weights = 0,nrows,totnumzero=0;
  
  double fittedvalue;

  double *wts = Calloc(rows,double);
  double *x_wts_f = Calloc(rows*cols,double);
  double *y_wts_f = Calloc(rows,double);
  double *beta = Calloc(cols,double);
  double *resid = Calloc(rows,double);
  double *qraux = Calloc(cols,double);
  double *qty = Calloc(rows,double);
  double *work = Calloc(2*cols,double);
  
  int *jpvt = Calloc(cols,int);

  for (i=0; i < rows; i++){
    if (w[i] == 0.0){
      totnumzero++;
    }
  }

  if (totnumzero > 0){
    /* we need to chop up the X and Y matricies a little more then removing the 
       observations that have weight = 0. In particular fit the model removing the weight
       0 observations. then to compute the residuals for the weight 0 observations used fitted
       values and observed X to compute fitted values */
    numzero_weights = 0;
    for (i=0; i < rows; i++){
      if (w[i] > 0.0){
	wts[i - numzero_weights] = sqrt(w[i]);
	y_wts_f[i - numzero_weights] =  wts[i - numzero_weights]*y[i];
	for (j = 0; j < cols; j++){
	  x_wts_f[j*(rows-totnumzero)+(i-numzero_weights)] = wts[i - numzero_weights]*x[j*rows+i];
	}
      } else {
	numzero_weights++;
      }
    }
    
    for (j=0;j < cols; j++){
      jpvt[j] = j;
    }
    
    nrows = rows - numzero_weights;
    
    /* now fit the model */
    dqrls_(x_wts_f,&nrows,&cols,y_wts_f,&ny,&tol,beta,resid,qty,&k,jpvt,qraux,work);

    if (k != cols){
      /* the solution is not of full rank */
      /* printf("Improper fit\n"); */
       
      for (j = 0; j < k; j++){
	out_beta[j] = beta[jpvt[j]];
      }
      for(j =k; j < cols; j++){
	out_beta[jpvt[j]] =  R_NaN;
      }

    } else {
    
    /* detangle beta and residual estimates */
      
      for (j = 0; j < cols; j++){
	out_beta[j] = beta[jpvt[j]];
      }
    }
    /* now the model is fitted, lets compute residuals for the 0 weighted observations
       by first computing fitted values. */
    
    numzero_weights = 0;
    for (i=0; i < rows; i++){
      if (w[i] > 0){
	out_resids[i] = resid[i- numzero_weights]/wts[i- numzero_weights];
      } else {
	/* compute the fitted value */
	numzero_weights++;
	fittedvalue = 0.0;
	for (j=0; j <cols; j++){
	  if (out_beta[j] != R_NaN){
	    fittedvalue+=out_beta[j]*x[j*rows+i];
	  }
	}
	out_resids[i] = y[i] -  fittedvalue;
      }
    }

  } else {
    for (i=0; i < rows; i++){
      wts[i] = sqrt(w[i]);
    }
    
    /* dqrls is a linpack routine for least squares solution */
  
    for (i=0; i < rows; i++){
      for (j = 0; j < cols; j++){
	x_wts_f[j*rows+i] = wts[i]*x[j*rows+i];
      }
    }
    
    
    for (i=0; i < rows; i++){
      y_wts_f[i] =  wts[i]*y[i];
    }
    
    for (j=0;j < cols; j++){
      jpvt[j] = j;
    }
    
    /* using function from linpack to fit linear model
       dqrls_(double *x, int *n, int *p, double *y, int *ny, double *tol, double *b, double *rsd, double *qty, int *k, int *jpvt, double *qraux, double *work);

    */

    dqrls_(x_wts_f,&rows,&cols,y_wts_f,&ny,&tol,beta,resid,qty,&k,jpvt,qraux,work);
    
    if (k != cols){
      /* the solution is not of full rank */
      /* printf("Improper fit\n");*/ 
      for (j = 0; j < k; j++){
	out_beta[j] = beta[jpvt[j]];
      }
      for(j =k; j < cols; j++){
	out_beta[j] =  R_NaN; /* out_beta[jpvt[j]] =  R_NaN; */
      }
    } else {
    
      /* detangle beta and residual estimates */
      for (j = 0; j < cols; j++){
	out_beta[j] = beta[jpvt[j]];
      }
    }
    for (i=0; i < rows; i++){
      out_resids[i] = resid[i]/wts[i];
      /* resid[i] = resid[i]/wts[i]; */
    }
  }
  
  Free(wts); 
  Free(x_wts_f); 
  Free(y_wts_f);
  Free(beta);
  Free(resid);
  Free(qraux);
  Free(qty);
  Free(work);
  Free(jpvt );
}


/**********************************************************************************
 **
 ** void lm_wfit_R(double *x, double *y, double *w, int *rows, int *cols, double *tol, double *out_beta, double *out_resids)
 ** 
 ** double *x - coefficient matrix: dimension rows*cols
 ** double *y - dependent variable: length rows
 ** double *w - weights for each observation: length rows
 ** int rows, cols - dimension of input
 ** double tol - machine tolerance used in qr decomp
 ** double *outbeta - place to output beta estimates: lenght cols
 ** double *outresid - place to output residuals: length rows
 **
 ** an wrapper that allows us to use .C() calls from R to test the fitting function
 **
 **
 ********************************************************************************/

void lm_wfit_R(double *x, double *y, double *w, int *rows, int *cols, double *tol, double *out_beta, double *out_resids){

  
  lm_wfit(x, y, w, *rows, *cols, *tol, out_beta, out_resids);
    
}