File: fftw_threads.c

package info (click to toggle)
fftw 2.1.5-7
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 6,772 kB
sloc: ansic: 65,239; sh: 7,399; ml: 3,084; perl: 2,894; makefile: 380; fortran: 102
file content (180 lines) | stat: -rw-r--r-- 5,775 bytes
parent folder | download | duplicates (5)
/*
 * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

/* Note: this header file *must* be the first thing in this file,
   due to AIX alloca lossage. */
#include "fftw_threads-int.h"

/* Distribute a loop from 0 to loopmax-1 over nthreads threads.
   proc(d) is called to execute a block of iterations from d->min
   to d->max-1.  d->thread_num indicate the number of the thread
   that is executing proc (from 0 to nthreads-1), and d->data is
   the same as the data parameter passed to fftw_thread_spawn_loop.

   This function returns only when all the threads have completed. */
void fftw_thread_spawn_loop(int loopmax, int nthreads,
			    fftw_loop_function proc, void *data)
{
     int block_size;

     if (!nthreads)
	  nthreads = 1;

     /* Choose the block size and number of threads in order to (1)
        minimize the critical path and (2) use the fewest threads that
        achieve the same critical path (to minimize overhead).
        e.g. if loopmax is 5 and nthreads is 4, we should use only 3
        threads with block sizes of 2, 2, and 1. */
     block_size = (loopmax + nthreads - 1) / nthreads;
     nthreads = (loopmax + block_size - 1) / block_size;

     if (nthreads <= 1) {
	  fftw_loop_data d;
	  d.min = 0; d.max = loopmax;
	  d.thread_num = 0;
	  d.data = data;
	  proc(&d);
     }
     else {
#ifdef FFTW_USING_COMPILER_THREADS
	  fftw_loop_data d;
#else
	  fftw_loop_data *d;
	  fftw_thread_id *tid;
#endif
	  int i;
	  
#ifdef FFTW_USING_COMPILER_THREADS
	  
#if defined(FFTW_USING_SGIMP_THREADS)
#pragma parallel local(d,i)
	  {
#pragma pfor iterate(i=0; nthreads; 1)
#elif defined(FFTW_USING_OPENMP_THREADS)
#pragma omp parallel for private(d)
#endif	
	       for (i = 0; i < nthreads; ++i) {
		    d.max = (d.min = i * block_size) + block_size;
                    if (d.max > loopmax)
                         d.max = loopmax;
		    d.thread_num = i;
		    d.data = data;
		    proc(&d);
	       }
#if defined(FFTW_USING_SGIMP_THREADS)
	  }
#endif

#else /* ! FFTW_USING_COMPILER_THREADS, i.e. explicit thread spawning: */

	  d = (fftw_loop_data *) ALLOCA(sizeof(fftw_loop_data) * nthreads);
	  tid = (fftw_thread_id *) 
	       ALLOCA(sizeof(fftw_thread_id) * (--nthreads));

	  for (i = 0; i < nthreads; ++i) {
	       d[i].max = (d[i].min = i * block_size) + block_size;
	       d[i].thread_num = i;
	       d[i].data = data;
	       fftw_thread_spawn(&tid[i],
				 (fftw_thread_function) proc, (void *) &d[i]);
	  }
	  d[i].min = i * block_size;
	  d[i].max = loopmax;
	  d[i].thread_num = i;
	  d[i].data = data;
	  proc(&d[i]);
	  
	  for (i = 0; i < nthreads; ++i)
	       fftw_thread_wait(tid[i]);

	  ALLOCA_CLEANUP(tid);
	  ALLOCA_CLEANUP(d);

#endif /* ! FFTW_USING_COMPILER_THREADS */
     }
}

#ifdef FFTW_USING_POSIX_THREADS

static pthread_attr_t fftw_pthread_attributes; /* attrs for POSIX threads */
pthread_attr_t *fftw_pthread_attributes_p = NULL;

#endif /* FFTW_USING_POSIX_THREADS */

/* fftw_threads_init does any initialization that is necessary to use
   threads.  It must be called before calling fftw_threads or
   fftwnd_threads. 
   
   Returns 0 if successful, and non-zero if there is an error.
   Do not call any fftw_threads routines if fftw_threads_init
   is not successful! */

int fftw_threads_init(void)
{
#ifdef FFTW_USING_POSIX_THREADS
     /* Set the thread creation attributes as necessary.  If we don't
	change anything, just use the default attributes (NULL). */
     int err, attr, attr_changed = 0;

     err = pthread_attr_init(&fftw_pthread_attributes); /* set to defaults */
     if (err) return err;

     /* Make sure that threads are joinable!  (they aren't on AIX) */
     err = pthread_attr_getdetachstate(&fftw_pthread_attributes, &attr);
     if (err) return err;
     if (attr != PTHREAD_CREATE_JOINABLE) {
	  err = pthread_attr_setdetachstate(&fftw_pthread_attributes,
					    PTHREAD_CREATE_JOINABLE);
	  if (err) return err;
	  attr_changed = 1;
     }

     /* Make sure threads parallelize (they don't by default on Solaris) */
     err = pthread_attr_getscope(&fftw_pthread_attributes, &attr);
     if (err) return err;
     if (attr != PTHREAD_SCOPE_SYSTEM) {
	  err = pthread_attr_setscope(&fftw_pthread_attributes,
				      PTHREAD_SCOPE_SYSTEM);
	  if (err) return err;
	  attr_changed = 1;
     }

     if (attr_changed)  /* we aren't using the defaults */
	  fftw_pthread_attributes_p = &fftw_pthread_attributes;
     else {
	  fftw_pthread_attributes_p = NULL;  /* use default attributes */
	  err = pthread_attr_destroy(&fftw_pthread_attributes);
	  if (err) return err;
     }
#endif /* FFTW_USING_POSIX_THREADS */

#ifdef FFTW_USING_MACOS_THREADS
     /* Must use MPAllocate and MPFree instead of malloc and free: */
     if (MPLibraryIsLoaded()) {
	  fftw_malloc_hook = MPAllocate;
	  fftw_free_hook = MPFree;
     }
#endif /* FFTW_USING_MACOS_THREADS */

#if defined(FFTW_USING_OPENMP_THREADS) && ! defined(_OPENMP)
#error OpenMP enabled but not using an OpenMP compiler
#endif

     return 0; /* no error */
}