1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
|
/*
* Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
/* Note: this header file *must* be the first thing in this file,
due to AIX alloca lossage. */
#include "fftw_threads-int.h"
/* Distribute a loop from 0 to loopmax-1 over nthreads threads.
proc(d) is called to execute a block of iterations from d->min
to d->max-1. d->thread_num indicate the number of the thread
that is executing proc (from 0 to nthreads-1), and d->data is
the same as the data parameter passed to fftw_thread_spawn_loop.
This function returns only when all the threads have completed. */
void fftw_thread_spawn_loop(int loopmax, int nthreads,
fftw_loop_function proc, void *data)
{
int block_size;
if (!nthreads)
nthreads = 1;
/* Choose the block size and number of threads in order to (1)
minimize the critical path and (2) use the fewest threads that
achieve the same critical path (to minimize overhead).
e.g. if loopmax is 5 and nthreads is 4, we should use only 3
threads with block sizes of 2, 2, and 1. */
block_size = (loopmax + nthreads - 1) / nthreads;
nthreads = (loopmax + block_size - 1) / block_size;
if (nthreads <= 1) {
fftw_loop_data d;
d.min = 0; d.max = loopmax;
d.thread_num = 0;
d.data = data;
proc(&d);
}
else {
#ifdef FFTW_USING_COMPILER_THREADS
fftw_loop_data d;
#else
fftw_loop_data *d;
fftw_thread_id *tid;
#endif
int i;
#ifdef FFTW_USING_COMPILER_THREADS
#if defined(FFTW_USING_SGIMP_THREADS)
#pragma parallel local(d,i)
{
#pragma pfor iterate(i=0; nthreads; 1)
#elif defined(FFTW_USING_OPENMP_THREADS)
#pragma omp parallel for private(d)
#endif
for (i = 0; i < nthreads; ++i) {
d.max = (d.min = i * block_size) + block_size;
if (d.max > loopmax)
d.max = loopmax;
d.thread_num = i;
d.data = data;
proc(&d);
}
#if defined(FFTW_USING_SGIMP_THREADS)
}
#endif
#else /* ! FFTW_USING_COMPILER_THREADS, i.e. explicit thread spawning: */
d = (fftw_loop_data *) ALLOCA(sizeof(fftw_loop_data) * nthreads);
tid = (fftw_thread_id *)
ALLOCA(sizeof(fftw_thread_id) * (--nthreads));
for (i = 0; i < nthreads; ++i) {
d[i].max = (d[i].min = i * block_size) + block_size;
d[i].thread_num = i;
d[i].data = data;
fftw_thread_spawn(&tid[i],
(fftw_thread_function) proc, (void *) &d[i]);
}
d[i].min = i * block_size;
d[i].max = loopmax;
d[i].thread_num = i;
d[i].data = data;
proc(&d[i]);
for (i = 0; i < nthreads; ++i)
fftw_thread_wait(tid[i]);
ALLOCA_CLEANUP(tid);
ALLOCA_CLEANUP(d);
#endif /* ! FFTW_USING_COMPILER_THREADS */
}
}
#ifdef FFTW_USING_POSIX_THREADS
static pthread_attr_t fftw_pthread_attributes; /* attrs for POSIX threads */
pthread_attr_t *fftw_pthread_attributes_p = NULL;
#endif /* FFTW_USING_POSIX_THREADS */
/* fftw_threads_init does any initialization that is necessary to use
threads. It must be called before calling fftw_threads or
fftwnd_threads.
Returns 0 if successful, and non-zero if there is an error.
Do not call any fftw_threads routines if fftw_threads_init
is not successful! */
int fftw_threads_init(void)
{
#ifdef FFTW_USING_POSIX_THREADS
/* Set the thread creation attributes as necessary. If we don't
change anything, just use the default attributes (NULL). */
int err, attr, attr_changed = 0;
err = pthread_attr_init(&fftw_pthread_attributes); /* set to defaults */
if (err) return err;
/* Make sure that threads are joinable! (they aren't on AIX) */
err = pthread_attr_getdetachstate(&fftw_pthread_attributes, &attr);
if (err) return err;
if (attr != PTHREAD_CREATE_JOINABLE) {
err = pthread_attr_setdetachstate(&fftw_pthread_attributes,
PTHREAD_CREATE_JOINABLE);
if (err) return err;
attr_changed = 1;
}
/* Make sure threads parallelize (they don't by default on Solaris) */
err = pthread_attr_getscope(&fftw_pthread_attributes, &attr);
if (err) return err;
if (attr != PTHREAD_SCOPE_SYSTEM) {
err = pthread_attr_setscope(&fftw_pthread_attributes,
PTHREAD_SCOPE_SYSTEM);
if (err) return err;
attr_changed = 1;
}
if (attr_changed) /* we aren't using the defaults */
fftw_pthread_attributes_p = &fftw_pthread_attributes;
else {
fftw_pthread_attributes_p = NULL; /* use default attributes */
err = pthread_attr_destroy(&fftw_pthread_attributes);
if (err) return err;
}
#endif /* FFTW_USING_POSIX_THREADS */
#ifdef FFTW_USING_MACOS_THREADS
/* Must use MPAllocate and MPFree instead of malloc and free: */
if (MPLibraryIsLoaded()) {
fftw_malloc_hook = MPAllocate;
fftw_free_hook = MPFree;
}
#endif /* FFTW_USING_MACOS_THREADS */
#if defined(FFTW_USING_OPENMP_THREADS) && ! defined(_OPENMP)
#error OpenMP enabled but not using an OpenMP compiler
#endif
return 0; /* no error */
}
|