Next Previous

Retired Document

Important: This sample code may not represent best practices for current development. The project may use deprecated symbols and illustrate technologies and techniques that are no longer recommended.

TOMS_transpose.c

/*

 * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology

 * This program is free software; you can redistribute it and/or modify

 * it under the terms of the GNU General Public License as published by

 * the Free Software Foundation; either version 2 of the License, or

 * (at your option) any later version.

 * This program is distributed in the hope that it will be useful,

 * but WITHOUT ANY WARRANTY; without even the implied warranty of

 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 * GNU General Public License for more details.

 * You should have received a copy of the GNU General Public License

 * along with this program; if not, write to the Free Software

 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/

/*

 * TOMS Transpose.  Revised version of algorithm 380.

 * These routines do in-place transposes of arrays.

 * [ Cate, E.G. and Twigg, D.W., ACM Transactions on Mathematical Software,

 *   vol. 3, no. 1, 104-110 (1977) ]

 * C version by Steven G. Johnson. February 1997.

*/

/*

 * Changes made by Advanced Computation Group July 2004

 * Copyright (c) 2004 Apple Computer, Inc

*/

#include <stdlib.h>

#include <string.h>

#include "TOMS_transpose.h"

#include "dist_fft_transpose.h"

static int TOMS_gcd(int a, int b);

/*

 * "a" is a 1D array of length ny*nx which constains the nx x ny matrix to be

 * transposed.  "a" is stored in C order (last index varies fastest).  move

 * is a 1D array of length move_size used to store information to speed up

 * the process.  The value move_size=(ny+nx)/2 is recommended.

 * The return value indicates the success or failure of the routine. Returns 0

 * if okay, -1 if ny or nx < 0, and -2 if move_size < 1. The return value

 * should never be positive, but it it is, it is set to the final position in

 * a when the search is completed but some elements have not been moved.

 * Note: move[i] will stay zero for fixed points.

*/

short TOMS_transpose_2d(TOMS_el_type * a,

                        int nx, int ny,

                        char *move,

                        int move_size)

        int             i, j, im, mn;

        TOMS_el_type    b, c, d;

        int             ncount;

        int             k;

        /* check arguments and initialize: */

        if (ny < 0 || nx < 0)

                return -1;

        if (ny < 2 || nx < 2)

                return 0;

        if (move_size < 1)

                return -2;

        if (ny == nx) {

                // if it's a multiple of 4, and vector aligned memory, use vector

                if ((ny & 0x3) == 0 &&

                    ((((long)a) & 0xF) == 0) &&

                    (sizeof(TOMS_el_type) == sizeof(float))) {

                    // and we're absolutely sure we're using floats

                    float *a_float = (float *)a;

                    dist_fft_transpose_square(a_float, ny, ny*sizeof(float));

                    return 0;

/*

                 * if matrix is square, exchange elements a(i,j) and a(j,i):

*/

                for (i = 0; i < nx; ++i)

                        for (j = i + 1; j < nx; ++j) {

                                b = a[i + j * nx];

                                a[i + j * nx] = a[j + i * nx];

                                a[j + i * nx] = b;

                return 0;

        ncount = 2;             /* always at least 2 fixed points */

        k = (mn = ny * nx) - 1;

        for (i = 0; i < move_size; ++i)

                move[i] = 0;

        if (ny >= 3 && nx >= 3)

                ncount += TOMS_gcd(ny - 1, nx - 1) - 1; /* # fixed points */

        i = 1;

        im = ny;

        while (1) {

                int             i1, i2, i1c, i2c;

                int             kmi;

                /** Rearrange the elements of a loop

                      and its companion loop: **/

                i1 = i;

                kmi = k - i;

                b = a[i1];

                i1c = kmi;

                c = a[i1c];

                while (1) {

                        i2 = ny * i1 - k * (i1 / nx);

                        i2c = k - i2;

                        if (i1 < move_size)

                                move[i1] = 1;

                        if (i1c < move_size)

                                move[i1c] = 1;

                        ncount += 2;

                        if (i2 == i)

                                break;

                        if (i2 == kmi) {

                                d = b;

                                b = c;

                                c = d;

                                break;

                        a[i1] = a[i2];

                        a[i1c] = a[i2c];

                        i1 = i2;

                        i1c = i2c;

                a[i1] = b;

                a[i1c] = c;

                if (ncount >= mn)

                        break;  /* we've moved all elements */

                /** Search for loops to rearrange: **/

                while (1) {

                        int             max;

                        max = k - i;

                        ++i;

                        if (i > max)

                                return i;

                        im += ny;

                        if (im > k)

                                im -= k;

                        i2 = im;

                        if (i == i2)

                                continue;

                        if (i >= move_size) {

                                while (i2 > i && i2 < max) {

                                        i1 = i2;

                                        i2 = ny * i1 - k * (i1 / nx);

                                if (i2 == i)

                                        break;

                        } else if (!move[i])

                                break;

        return 0;

/*

 * "a" is a 1D array of length ny*nx which constains the nx x ny matrix to be

 * transposed.  "a" is stored in C order (last index varies fastest).  move

 * is a 1D array of length move_size used to store information to speed up

 * the process.  The value move_size=(ny+nx)/2 is recommended.

 * Here, instead of each element of "a" being a single value of type

 * TOMS_el_type, each element is el_size values of type TOMS_el_type.

 * The return value indicates the success or failure of the routine. Returns 0

 * if okay, -1 if ny or nx < 0, and -2 if move_size < 1. Also, returns -3 if

 * it ran out of memory.  The return value should never be positive, but it

 * it is, it is set to the final position in a when the search is completed

 * but some elements have not been moved.

 * Note: move[i] will stay zero for fixed points.

*/

short TOMS_transpose_2d_arbitrary(TOMS_el_type * a,

                                  int nx, int ny,

                                  int el_size,

                                  char *move,

                                  int move_size)

        int             i, j, im, mn;

        TOMS_el_type   *b, *c, *d;

        int             ncount;

        int             k;

        /* check arguments and initialize: */

        if (ny < 0 || nx < 0)

                return -1;

        if (ny < 2 || nx < 2 || el_size < 1)

                return 0;

        if (move_size < 1)

                return -2;

        b = (TOMS_el_type *) malloc(sizeof(TOMS_el_type) * el_size);

        if (!b)

                return -3;

        if (ny == nx) {

/*

                 * if matrix is square, exchange elements a(i,j) and a(j,i):

*/

                for (i = 0; i < nx; ++i)

                        for (j = i + 1; j < nx; ++j) {

                                memcpy(b, &a[el_size * (i + j * nx)], el_size * sizeof(TOMS_el_type));

                                memcpy(&a[el_size * (i + j * nx)], &a[el_size * (j + i * nx)], el_size * sizeof(TOMS_el_type));

                                memcpy(&a[el_size * (j + i * nx)], b, el_size * sizeof(TOMS_el_type));

                free(b);

                return 0;

        c = (TOMS_el_type *) malloc(sizeof(TOMS_el_type) * el_size);

        if (!c) {

                free(b);

                return -3;

        ncount = 2;             /* always at least 2 fixed points */

        k = (mn = ny * nx) - 1;

        for (i = 0; i < move_size; ++i)

                move[i] = 0;

        if (ny >= 3 && nx >= 3)

                ncount += TOMS_gcd(ny - 1, nx - 1) - 1; /* # fixed points */

        i = 1;

        im = ny;

        while (1) {

                int             i1, i2, i1c, i2c;

                int             kmi;

                /** Rearrange the elements of a loop

                      and its companion loop: **/

                i1 = i;

                kmi = k - i;

                memcpy(b, &a[el_size * i1], el_size * sizeof(TOMS_el_type));

                i1c = kmi;

                memcpy(c, &a[el_size * i1c], el_size * sizeof(TOMS_el_type));

                while (1) {

                        i2 = ny * i1 - k * (i1 / nx);

                        i2c = k - i2;

                        if (i1 < move_size)

                                move[i1] = 1;

                        if (i1c < move_size)

                                move[i1c] = 1;

                        ncount += 2;

                        if (i2 == i)

                                break;

                        if (i2 == kmi) {

                                d = b;

                                b = c;

                                c = d;

                                break;

                        memcpy(&a[el_size * i1], &a[el_size * i2],

                               el_size * sizeof(TOMS_el_type));

                        memcpy(&a[el_size * i1c], &a[el_size * i2c],

                               el_size * sizeof(TOMS_el_type));

                        i1 = i2;

                        i1c = i2c;

                memcpy(&a[el_size * i1], b, el_size * sizeof(TOMS_el_type));

                memcpy(&a[el_size * i1c], c, el_size * sizeof(TOMS_el_type));

                if (ncount >= mn)

                        break;  /* we've moved all elements */

                /** Search for loops to rearrange: **/

                while (1) {

                        int             max;

                        max = k - i;

                        ++i;

                        if (i > max) {

                                free(b);

                                free(c);

                                return i;

                        im += ny;

                        if (im > k)

                                im -= k;

                        i2 = im;

                        if (i == i2)

                                continue;

                        if (i >= move_size) {

                                while (i2 > i && i2 < max) {

                                        i1 = i2;

                                        i2 = ny * i1 - k * (i1 / nx);

                                if (i2 == i)

                                        break;

                        } else if (!move[i])

                                break;

        free(b);

        free(c);

        return 0;

static int TOMS_gcd(int a, int b)

        int r;

        do {

                r = a % b;

                a = b;

                b = r;

        } while (r != 0);

        return a;

Next Previous