Current Path : /compat/linux/proc/self/root/usr/src/contrib/gcclibs/libgomp/ |
FreeBSD hs32.drive.ne.jp 9.1-RELEASE FreeBSD 9.1-RELEASE #1: Wed Jan 14 12:18:08 JST 2015 root@hs32.drive.ne.jp:/sys/amd64/compile/hs32 amd64 |
Current File : //compat/linux/proc/self/root/usr/src/contrib/gcclibs/libgomp/iter.c |
/* Copyright (C) 2005 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU OpenMP Library (libgomp). Libgomp is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with libgomp; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /* As a special exception, if you link this library with other files, some of which are compiled with GCC, to produce an executable, this library does not by itself cause the resulting executable to be covered by the GNU General Public License. This exception does not however invalidate any other reasons why the executable file might be covered by the GNU General Public License. */ /* This file contains routines for managing work-share iteration, both for loops and sections. */ #include "libgomp.h" #include <stdlib.h> /* This function implements the STATIC scheduling method. The caller should iterate *pstart <= x < *pend. Return zero if there are more iterations to perform; nonzero if not. Return less than 0 if this thread had received the absolutely last iteration. */ int gomp_iter_static_next (long *pstart, long *pend) { struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; struct gomp_work_share *ws = thr->ts.work_share; unsigned long nthreads = team ? team->nthreads : 1; if (thr->ts.static_trip == -1) return -1; /* Quick test for degenerate teams and orphaned constructs. */ if (nthreads == 1) { *pstart = ws->next; *pend = ws->end; thr->ts.static_trip = -1; return ws->next == ws->end; } /* We interpret chunk_size zero as "unspecified", which means that we should break up the iterations such that each thread makes only one trip through the outer loop. */ if (ws->chunk_size == 0) { unsigned long n, q, i; unsigned long s0, e0; long s, e; if (thr->ts.static_trip > 0) return 1; /* Compute the total number of iterations. */ s = ws->incr + (ws->incr > 0 ? -1 : 1); n = (ws->end - ws->next + s) / ws->incr; i = thr->ts.team_id; /* Compute the "zero-based" start and end points. That is, as if the loop began at zero and incremented by one. */ q = n / nthreads; q += (q * nthreads != n); s0 = q * i; e0 = s0 + q; if (e0 > n) e0 = n; /* Notice when no iterations allocated for this thread. */ if (s0 >= e0) { thr->ts.static_trip = 1; return 1; } /* Transform these to the actual start and end numbers. */ s = (long)s0 * ws->incr + ws->next; e = (long)e0 * ws->incr + ws->next; *pstart = s; *pend = e; thr->ts.static_trip = (e0 == n ? -1 : 1); return 0; } else { unsigned long n, s0, e0, i, c; long s, e; /* Otherwise, each thread gets exactly chunk_size iterations (if available) each time through the loop. */ s = ws->incr + (ws->incr > 0 ? -1 : 1); n = (ws->end - ws->next + s) / ws->incr; i = thr->ts.team_id; c = ws->chunk_size; /* Initial guess is a C sized chunk positioned nthreads iterations in, offset by our thread number. */ s0 = (thr->ts.static_trip * nthreads + i) * c; e0 = s0 + c; /* Detect overflow. */ if (s0 >= n) return 1; if (e0 > n) e0 = n; /* Transform these to the actual start and end numbers. */ s = (long)s0 * ws->incr + ws->next; e = (long)e0 * ws->incr + ws->next; *pstart = s; *pend = e; if (e0 == n) thr->ts.static_trip = -1; else thr->ts.static_trip++; return 0; } } /* This function implements the DYNAMIC scheduling method. Arguments are as for gomp_iter_static_next. This function must be called with ws->lock held. */ bool gomp_iter_dynamic_next_locked (long *pstart, long *pend) { struct gomp_thread *thr = gomp_thread (); struct gomp_work_share *ws = thr->ts.work_share; long start, end, chunk, left; start = ws->next; if (start == ws->end) return false; chunk = ws->chunk_size * ws->incr; left = ws->end - start; if (ws->incr < 0) { if (chunk < left) chunk = left; } else { if (chunk > left) chunk = left; } end = start + chunk; ws->next = end; *pstart = start; *pend = end; return true; } #ifdef HAVE_SYNC_BUILTINS /* Similar, but doesn't require the lock held, and uses compare-and-swap instead. Note that the only memory value that changes is ws->next. */ bool gomp_iter_dynamic_next (long *pstart, long *pend) { struct gomp_thread *thr = gomp_thread (); struct gomp_work_share *ws = thr->ts.work_share; long start, end, nend, chunk, incr; start = ws->next; end = ws->end; incr = ws->incr; chunk = ws->chunk_size * incr; while (1) { long left = end - start; long tmp; if (start == end) return false; if (incr < 0) { if (chunk < left) chunk = left; } else { if (chunk > left) chunk = left; } nend = start + chunk; tmp = __sync_val_compare_and_swap (&ws->next, start, nend); if (__builtin_expect (tmp == start, 1)) break; start = tmp; } *pstart = start; *pend = nend; return true; } #endif /* HAVE_SYNC_BUILTINS */ /* This function implements the GUIDED scheduling method. Arguments are as for gomp_iter_static_next. This function must be called with the work share lock held. */ bool gomp_iter_guided_next_locked (long *pstart, long *pend) { struct gomp_thread *thr = gomp_thread (); struct gomp_work_share *ws = thr->ts.work_share; struct gomp_team *team = thr->ts.team; unsigned long nthreads = team ? team->nthreads : 1; unsigned long n, q; long start, end; if (ws->next == ws->end) return false; n = (ws->end - ws->next) / ws->incr; q = (n + nthreads - 1) / nthreads; if (q < ws->chunk_size) q = ws->chunk_size; if (q > n) q = n; start = ws->next; end = start + q * ws->incr; ws->next = end; *pstart = start; *pend = end; return true; } #ifdef HAVE_SYNC_BUILTINS /* Similar, but doesn't require the lock held, and uses compare-and-swap instead. Note that the only memory value that changes is ws->next. */ bool gomp_iter_guided_next (long *pstart, long *pend) { struct gomp_thread *thr = gomp_thread (); struct gomp_work_share *ws = thr->ts.work_share; struct gomp_team *team = thr->ts.team; unsigned long nthreads = team ? team->nthreads : 1; long start, end, nend, incr; unsigned long chunk_size; start = ws->next; end = ws->end; incr = ws->incr; chunk_size = ws->chunk_size; while (1) { unsigned long n, q; long tmp; if (start == end) return false; n = (end - start) / ws->incr; q = (n + nthreads - 1) / nthreads; if (q < chunk_size) q = chunk_size; if (q > n) q = n; nend = start + q * incr; tmp = __sync_val_compare_and_swap (&ws->next, start, nend); if (__builtin_expect (tmp == start, 1)) break; start = tmp; } *pstart = start; *pend = nend; return true; } #endif /* HAVE_SYNC_BUILTINS */