Retired Document
Important: This sample code may not represent best practices for current development. The project may use deprecated symbols and illustrate technologies and techniques that are no longer recommended.
TOMS_transpose.c
/* |
* Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology |
* |
* This program is free software; you can redistribute it and/or modify |
* it under the terms of the GNU General Public License as published by |
* the Free Software Foundation; either version 2 of the License, or |
* (at your option) any later version. |
* |
* This program is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with this program; if not, write to the Free Software |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
* |
*/ |
/* |
* TOMS Transpose. Revised version of algorithm 380. |
* |
* These routines do in-place transposes of arrays. |
* |
* [ Cate, E.G. and Twigg, D.W., ACM Transactions on Mathematical Software, |
* vol. 3, no. 1, 104-110 (1977) ] |
* |
* C version by Steven G. Johnson. February 1997. |
*/ |
/* |
* Changes made by Advanced Computation Group July 2004 |
* Copyright (c) 2004 Apple Computer, Inc |
*/ |
#include <stdlib.h> |
#include <string.h> |
#include "TOMS_transpose.h" |
#include "dist_fft_transpose.h" |
static int TOMS_gcd(int a, int b); |
/* |
* "a" is a 1D array of length ny*nx which constains the nx x ny matrix to be |
* transposed. "a" is stored in C order (last index varies fastest). move |
* is a 1D array of length move_size used to store information to speed up |
* the process. The value move_size=(ny+nx)/2 is recommended. |
* |
* The return value indicates the success or failure of the routine. Returns 0 |
* if okay, -1 if ny or nx < 0, and -2 if move_size < 1. The return value |
* should never be positive, but it it is, it is set to the final position in |
* a when the search is completed but some elements have not been moved. |
* |
* Note: move[i] will stay zero for fixed points. |
*/ |
short TOMS_transpose_2d(TOMS_el_type * a, |
int nx, int ny, |
char *move, |
int move_size) |
{ |
int i, j, im, mn; |
TOMS_el_type b, c, d; |
int ncount; |
int k; |
/* check arguments and initialize: */ |
if (ny < 0 || nx < 0) |
return -1; |
if (ny < 2 || nx < 2) |
return 0; |
if (move_size < 1) |
return -2; |
if (ny == nx) { |
// if it's a multiple of 4, and vector aligned memory, use vector |
if ((ny & 0x3) == 0 && |
((((long)a) & 0xF) == 0) && |
(sizeof(TOMS_el_type) == sizeof(float))) { |
// and we're absolutely sure we're using floats |
float *a_float = (float *)a; |
dist_fft_transpose_square(a_float, ny, ny*sizeof(float)); |
return 0; |
} |
/* |
* if matrix is square, exchange elements a(i,j) and a(j,i): |
*/ |
for (i = 0; i < nx; ++i) |
for (j = i + 1; j < nx; ++j) { |
b = a[i + j * nx]; |
a[i + j * nx] = a[j + i * nx]; |
a[j + i * nx] = b; |
} |
return 0; |
} |
ncount = 2; /* always at least 2 fixed points */ |
k = (mn = ny * nx) - 1; |
for (i = 0; i < move_size; ++i) |
move[i] = 0; |
if (ny >= 3 && nx >= 3) |
ncount += TOMS_gcd(ny - 1, nx - 1) - 1; /* # fixed points */ |
i = 1; |
im = ny; |
while (1) { |
int i1, i2, i1c, i2c; |
int kmi; |
/** Rearrange the elements of a loop |
and its companion loop: **/ |
i1 = i; |
kmi = k - i; |
b = a[i1]; |
i1c = kmi; |
c = a[i1c]; |
while (1) { |
i2 = ny * i1 - k * (i1 / nx); |
i2c = k - i2; |
if (i1 < move_size) |
move[i1] = 1; |
if (i1c < move_size) |
move[i1c] = 1; |
ncount += 2; |
if (i2 == i) |
break; |
if (i2 == kmi) { |
d = b; |
b = c; |
c = d; |
break; |
} |
a[i1] = a[i2]; |
a[i1c] = a[i2c]; |
i1 = i2; |
i1c = i2c; |
} |
a[i1] = b; |
a[i1c] = c; |
if (ncount >= mn) |
break; /* we've moved all elements */ |
/** Search for loops to rearrange: **/ |
while (1) { |
int max; |
max = k - i; |
++i; |
if (i > max) |
return i; |
im += ny; |
if (im > k) |
im -= k; |
i2 = im; |
if (i == i2) |
continue; |
if (i >= move_size) { |
while (i2 > i && i2 < max) { |
i1 = i2; |
i2 = ny * i1 - k * (i1 / nx); |
} |
if (i2 == i) |
break; |
} else if (!move[i]) |
break; |
} |
} |
return 0; |
} |
/* |
* "a" is a 1D array of length ny*nx which constains the nx x ny matrix to be |
* transposed. "a" is stored in C order (last index varies fastest). move |
* is a 1D array of length move_size used to store information to speed up |
* the process. The value move_size=(ny+nx)/2 is recommended. |
* |
* Here, instead of each element of "a" being a single value of type |
* TOMS_el_type, each element is el_size values of type TOMS_el_type. |
* |
* The return value indicates the success or failure of the routine. Returns 0 |
* if okay, -1 if ny or nx < 0, and -2 if move_size < 1. Also, returns -3 if |
* it ran out of memory. The return value should never be positive, but it |
* it is, it is set to the final position in a when the search is completed |
* but some elements have not been moved. |
* |
* Note: move[i] will stay zero for fixed points. |
*/ |
short TOMS_transpose_2d_arbitrary(TOMS_el_type * a, |
int nx, int ny, |
int el_size, |
char *move, |
int move_size) |
{ |
int i, j, im, mn; |
TOMS_el_type *b, *c, *d; |
int ncount; |
int k; |
/* check arguments and initialize: */ |
if (ny < 0 || nx < 0) |
return -1; |
if (ny < 2 || nx < 2 || el_size < 1) |
return 0; |
if (move_size < 1) |
return -2; |
b = (TOMS_el_type *) malloc(sizeof(TOMS_el_type) * el_size); |
if (!b) |
return -3; |
if (ny == nx) { |
/* |
* if matrix is square, exchange elements a(i,j) and a(j,i): |
*/ |
for (i = 0; i < nx; ++i) |
for (j = i + 1; j < nx; ++j) { |
memcpy(b, &a[el_size * (i + j * nx)], el_size * sizeof(TOMS_el_type)); |
memcpy(&a[el_size * (i + j * nx)], &a[el_size * (j + i * nx)], el_size * sizeof(TOMS_el_type)); |
memcpy(&a[el_size * (j + i * nx)], b, el_size * sizeof(TOMS_el_type)); |
} |
free(b); |
return 0; |
} |
c = (TOMS_el_type *) malloc(sizeof(TOMS_el_type) * el_size); |
if (!c) { |
free(b); |
return -3; |
} |
ncount = 2; /* always at least 2 fixed points */ |
k = (mn = ny * nx) - 1; |
for (i = 0; i < move_size; ++i) |
move[i] = 0; |
if (ny >= 3 && nx >= 3) |
ncount += TOMS_gcd(ny - 1, nx - 1) - 1; /* # fixed points */ |
i = 1; |
im = ny; |
while (1) { |
int i1, i2, i1c, i2c; |
int kmi; |
/** Rearrange the elements of a loop |
and its companion loop: **/ |
i1 = i; |
kmi = k - i; |
memcpy(b, &a[el_size * i1], el_size * sizeof(TOMS_el_type)); |
i1c = kmi; |
memcpy(c, &a[el_size * i1c], el_size * sizeof(TOMS_el_type)); |
while (1) { |
i2 = ny * i1 - k * (i1 / nx); |
i2c = k - i2; |
if (i1 < move_size) |
move[i1] = 1; |
if (i1c < move_size) |
move[i1c] = 1; |
ncount += 2; |
if (i2 == i) |
break; |
if (i2 == kmi) { |
d = b; |
b = c; |
c = d; |
break; |
} |
memcpy(&a[el_size * i1], &a[el_size * i2], |
el_size * sizeof(TOMS_el_type)); |
memcpy(&a[el_size * i1c], &a[el_size * i2c], |
el_size * sizeof(TOMS_el_type)); |
i1 = i2; |
i1c = i2c; |
} |
memcpy(&a[el_size * i1], b, el_size * sizeof(TOMS_el_type)); |
memcpy(&a[el_size * i1c], c, el_size * sizeof(TOMS_el_type)); |
if (ncount >= mn) |
break; /* we've moved all elements */ |
/** Search for loops to rearrange: **/ |
while (1) { |
int max; |
max = k - i; |
++i; |
if (i > max) { |
free(b); |
free(c); |
return i; |
} |
im += ny; |
if (im > k) |
im -= k; |
i2 = im; |
if (i == i2) |
continue; |
if (i >= move_size) { |
while (i2 > i && i2 < max) { |
i1 = i2; |
i2 = ny * i1 - k * (i1 / nx); |
} |
if (i2 == i) |
break; |
} else if (!move[i]) |
break; |
} |
} |
free(b); |
free(c); |
return 0; |
} |
static int TOMS_gcd(int a, int b) |
{ |
int r; |
do { |
r = a % b; |
a = b; |
b = r; |
} while (r != 0); |
return a; |
} |
Copyright © 2004 Apple Computer, Inc. All Rights Reserved. Terms of Use | Privacy Policy | Updated: 2004-08-23