Commit 34fca3c8 authored by Mitch Burnett's avatar Mitch Burnett Committed by GitHub
Browse files

Merge pull request #1 from mitchburnett/master

Integration of pfb codes
parents 8a5cfafa 529276b0
Showing with 1695 additions and 7 deletions
+1695 -7
......@@ -9,12 +9,13 @@ else
fi
# Change this to match your destination directory
prefix=/users/$USER/fresh_hash
prefix=~/fresh_hash
# Set library source directories
HASHPIPE_SRC=$FLAG_DIR/lib/hashpipe/src
XGPU_SRC=$FLAG_DIR/lib/xGPU/src
BEAM_SRC=$FLAG_DIR/lib/beamformer/src
PFB_SRC=$FLAG_DIR/lib/pfb/src
POWER_SRC=$FLAG_DIR/lib/flagPower/src
# Compile the hashpipe library
......@@ -59,13 +60,23 @@ make install prefix=$prefix
echo "========================================"
echo
# Compile the pfb library
echo "Installing flagpfb"
echo "========================================"
echo "Navigating to $PFB_SRC"
cd $PFB_SRC
make
make install prefix=$prefix
echo "========================================"
echo
# Compile the flag HASHPIPE plugins
echo "Installing the FLAG plugins"
echo "========================================"
echo "Navigating to $FLAG_DIR/src"
cd $FLAG_DIR/src
autoreconf -is
./configure --prefix=$prefix --with-hashpipe=$prefix --with-xgpu=$prefix --with-xgpufrb=$prefix --with-xgpupfb=$prefix --with-flagbeamformer=$prefix --with-flagpower=$prefix --silent
./configure --prefix=$prefix --with-hashpipe=$prefix --with-xgpu=$prefix --with-xgpufrb=$prefix --with-xgpupfb=$prefix --with-flagpfb=$prefix --with-flagbeamformer=$prefix --with-flagpower=$prefix --silent
make
make install
echo "========================================"
......
#!/usr/bin/python
# flag_gen_coeff.py
# Generate PFB filter coefficients for FLAG. The
# filter coefficients array contains duplicates for optimised reading
# from the GPU.
import sys
import getopt
import math
import numpy
import scipy.signal as sp
import matplotlib.pyplot as plotter
# function definitions
def PrintUsage(ProgName):
"Prints usage information."
print "Usage: " + ProgName + " [options]"
print " -h --help Display this usage information"
print " -n --nfft <value> Number of points in FFT"
print " -t --taps <value> Number of taps in PFB"
print " -w --window <value> Window to apply i.e \"cheb-win\", default: rect."
print " -b --sub-bands <value> Number of sub-bands in data"
print " -d --data-type <value> Data type - \"float\" or " \
+ "\"signedchar\""
print " -p --no-plot Do not plot coefficients"
print " Window types:"
print " rect"
print " hanning"
print " cheb-win"
return
def genCoeff(window, M):
# CHEBWIN
if window == "cheb-win":
PFBCoeff = sp.chebwin(M, at=-30)
# HANNING WINDOW
elif window == "hanning":
X = numpy.array([(float(i) / NFFT) - (float(NTaps) / 2) for i in range(M)])
PFBCoeff = numpy.sinc(X) * numpy.hanning(M)
else:
PFBCoeff = numpy.ones(M)
return PFBCoeff
# default values
NFFT = 32768 # number of points in FFT
NTaps = 8 # number of taps in PFB
Window = "rect" # rectangular window
NSubBands = 1 # number of sub-bands in data
DataType = "signedchar" # data type - "float" or "signedchar"
Plot = True # plot flag
# get the command line arguments
ProgName = sys.argv[0]
OptsShort = "hn:t:w:b:d:p"
OptsLong = ["help", "nfft=", "taps=","window=", "sub-bands=", "data-type=", "no-plot"]
# check if the minimum expected number of arguments has been passed
# to the program
if (1 == len(sys.argv)):
sys.stderr.write("ERROR: No arguments passed to the program!\n")
PrintUsage(ProgName)
sys.exit(1)
# get the arguments using the getopt module
try:
(Opts, Args) = getopt.getopt(sys.argv[1:], OptsShort, OptsLong)
except getopt.GetoptError, ErrMsg:
# print usage information and exit
sys.stderr.write("ERROR: " + str(ErrMsg) + "!\n")
PrintUsage(ProgName)
sys.exit(1)
# parse the arguments
for o, a in Opts:
if o in ("-h", "--help"):
PrintUsage(ProgName)
sys.exit()
elif o in ("-n", "--nfft"):
NFFT = int(a)
elif o in ("-t", "--taps"):
NTaps = int(a)
elif o in ("-w", "--window"):
Window = a
elif o in ("-b", "--sub-bands"):
NSubBands = int(a)
elif o in ("-d", "--data-type"):
DataType = a
elif o in ("-p", "--no-plot"):
Plot = False
else:
PrintUsage(ProgName)
sys.exit(1)
M = NTaps * NFFT
PFBCoeff = genCoeff(Window, M)
# create conversion map
if ("signedchar" == DataType):
Map = numpy.zeros(256, numpy.float32)
for i in range(0, 128):
Map[i] = float(i) / 128
for i in range(128, 256):
Map[i] = - (float(256 -i) / 128)
# 32-bit (float) coefficients
PFBCoeffFloat32 = numpy.zeros(M * NSubBands, numpy.float32)
# 8-bit (signedchar) coefficients
if ("signedchar" == DataType):
PFBCoeffInt8 = numpy.zeros(M * NSubBands, numpy.int8)
k = 0
for i in range(len(PFBCoeff)):
Coeff = float(PFBCoeff[i])
if ("signedchar" == DataType):
for j in range(256):
# if (math.fabs(Coeff - Map[j]) <= (0.0078125 / 2)):
if (math.fabs(Coeff - Map[j]) <= 0.0078125):
for m in range(NSubBands):
PFBCoeffInt8[k + m] = j
break
elif ("float" == DataType):
for m in range(NSubBands):
PFBCoeffFloat32[k + m] = Coeff
else:
# print usage information and exit
sys.stderr.write("ERROR: Invalid data type!\n")
PrintUsage(ProgName)
sys.exit(1)
k = k + NSubBands
# write the coefficients to disk and also plot it
FileCoeff = open("coeff_" \
+ DataType + "_" \
+ str(NTaps) + "_" \
+ str(NFFT) + "_" \
+ str(NSubBands) + ".dat", \
"wb")
if ("signedchar" == DataType):
FileCoeff.write(PFBCoeffInt8)
# plot the coefficients
if (Plot):
plotter.plot(PFBCoeffInt8)
else:
FileCoeff.write(PFBCoeffFloat32)
# plot the coefficients
if (Plot):
plotter.plot(PFBCoeffFloat32)
FileCoeff.close()
if (Plot):
plotter.show()
-include Makefile.local
prefix ?= /usr/local
libdir ?= $(prefix)/lib
includedir ?= $(prefix)/include
bindir ?= $(prefix)/bin
CUDA_DIR ?= $(prefix)/cuda
#CUDA_DIR ?= /usr/local/cuda
#prefix ?= tmp
#libdir ?= $(prefix)/lib
#includedir ?= $(prefix)/include
#bindir ?= $(prefix)/bin
#CUDA_DIR ?= /Developer/NVIDIA/CUDA-6.5
INCLUDES = -I$(CUDA_DIR)/include
CUDA_LIBDIR = -L$(CUDA_DIR)/lib
#INCLUDES = -I/Developer/NVIDIA/CUDA-6.5/include
#CUDA_LIBDIR = -L/Developer/NVIDIA/CUDA-6.5/lib
CLIBDIR = -L/usr/local/lib
CU_LIB = -lcufft -lcudart
C_LIB = -lpython2.7 -ldl -lutil
LINKER = -Xlinker -export-dynamic
ifneq ($(strip $(OSTYPE)),osx)
CUDA_LIBDIR ?= $(CUDA_DIR)/lib64
else
CUDA_LIBDIR ?= $(CUDA_DIR)/lib
endif
NVCC = $(CUDA_DIR)/bin/nvcc
all: libflagpfb.so
libflagpfb.so: pfb.o kernels.o
$(NVCC) $(INCLUDS) -Xcompiler -fPIC -Xcompiler -D_REENTRANT -O3 pfb.o kernels.o -o libflagpfb.so $(CUDA_LIBDIR) $(CU_LIB) $(CLIBDIR) $(C_LIB) $(LINKER) --shared
pfb.o: kernels.cu kernels.h
$(NVCC) $(INCLUDES) $(CUDA_LIBDIR) $(CU_LIB) -c -o pfb.o -Xcompiler -fPIC -Xcompiler -D_REENTRANT -O3 pfb.cu
kernels.o: kernels.cu kernels.h
$(NVCC) $(INCLUDES) $(CUDA_LIBDIR) $(CU_LIB) -c -o kernels.o kernels.cu -Xcompiler -fPIC -Xcompiler -D_REENTRANT -O3
clean:
rm -f *.o
rm -f *.so
install: kernels.o pfb.o libflagpfb.so
mkdir -p $(includedir)
cp pfb.h $(includedir)
cp kernels.h $(includedir)
mkdir -p $(libdir)
cp libflagpfb.so $(libdir)
#mkdir -p $(bindir)
#cp cublas_main $(bindir)
File added
#ifdef __cplusplus
extern "C" {
#include "kernels.h"
}
#endif
__global__ void map(char* dataIn,
char2* dataOut,
int channelSelect,
params pfbParams)
{
// select the channel range
int channelMin = pfbParams.fine_channels*channelSelect;
int absIdx = 2 * blockDim.y*(blockIdx.x*pfbParams.coarse_channels + (channelMin+blockIdx.y)) + 2 * threadIdx.y; // times 2 because we are mapping a sequence of values to char2 array.
int mapIdx = blockDim.y*(blockIdx.x*gridDim.y + blockIdx.y) + threadIdx.y;
dataOut[mapIdx].x = dataIn[absIdx];
dataOut[mapIdx].y = dataIn[absIdx+1];
return;
}
/* prepare data for PFB */
__global__ void PFB_kernel(char2* pc2Data,
float2* pf2FFTIn,
float* pfPFBCoeff,
params pfbParams)
{
int i = (blockIdx.x * blockDim.x) + threadIdx.x;
int iNFFT = (gridDim.x * blockDim.x);
int j = 0;
int iAbsIdx = 0;
float2 f2PFBOut = make_float2(0.0, 0.0);
char2 c2Data = make_char2(0, 0);
for (j = 0; j < pfbParams.taps; ++j)
{
/* calculate the absolute index */
iAbsIdx = (j * iNFFT) + i;
/* get the address of the block */
c2Data = pc2Data[iAbsIdx];
f2PFBOut.x += (float) c2Data.x * pfPFBCoeff[iAbsIdx];
f2PFBOut.y += (float) c2Data.y * pfPFBCoeff[iAbsIdx];
}
pf2FFTIn[i] = f2PFBOut;
return;
}
__global__ void CopyDataForFFT(char2 *pc2Data, float2 *pf2FFTIn)
{
int i = (blockIdx.x * blockDim.x) + threadIdx.x;
pf2FFTIn[i].x = (float) pc2Data[i].x;
pf2FFTIn[i].y = (float) pc2Data[i].y;
return;
}
// prepares for the next PFB.
__global__ void saveData(char2* dataIn, char2* dataOut){
int i = blockIdx.y*(gridDim.x*blockDim.x) + blockIdx.x*blockDim.x + threadIdx.x;
dataOut[i] = dataIn[i];
return;
}
\ No newline at end of file
#ifndef __KERNELS_H__
#define __KERNELS_H__
#include <cuda.h>
#include <cufft.h>
// stuct of parameters for PFB. Values indicate default values.
#define DEFAULT_PFB_PARAMS {4000, 32, 8, 25, 5, 64, 320, 0, (char*)"hanning\0", (char*)"float\0", 1};
// plot 1 mean to hide the plot of the filter before continuing.
typedef struct {
int samples;
int nfft;
int taps;
int coarse_channels;
int fine_channels;
int elements;
int subbands;
int select;
char* window;
char* dataType;
int plot;
} params;
__global__ void PFB_kernel(char2* pc2Data, float2* pf2FFTIn, float* pfPFBCoeff, params pfbParams);
__global__ void map(char* dataIn, char2* dataOut, int channelSelect, params pfbParams);
__global__ void CopyDataForFFT(char2* pc2Data, float2* pf2FFTIn);
__global__ void saveData(char2* dataIn, char2* dataOut);
#endif
\ No newline at end of file
#ifdef __cplusplus
extern "C" {
#include "pfb.h"
}
#endif
// data ptrs
char2* g_pc2InBuf = NULL;
char2* g_pc2InBufRead = NULL;
char2* g_pc2Data_d = NULL;
char2* g_pc2DataRead_d = NULL;
float2* g_pf2FFTIn_d = NULL;
float2* g_pf2FFTOut_d = NULL;
float *g_pfPFBCoeff = NULL;
float *g_pfPFBCoeff_d = NULL;
char* g_pcInputData_d = NULL;
// pfb params
int g_iNFFT = DEF_LEN_SPEC;
int g_iNTaps = NUM_TAPS;
int g_iNumSubBands = PFB_CHANNELS * DEF_NUM_ELEMENTS;
// process flags
int g_IsDataReadDone = FALSE;
int g_IsProcDone = FALSE;
// size vars
int g_iSizeFile = 0;
int g_iReadCount = 0;
int g_iSizeRead = DEF_SIZE_READ;
int g_iFileCoeff = 0;
char g_acFileCoeff[256] = {0};
// GPU params
dim3 g_dimBPFB(1, 1, 1);
dim3 g_dimGPFB(1, 1);
dim3 g_dimBCopy(1, 1, 1);
dim3 g_dimGCopy(1, 1);
dim3 mapGSize(1,1,1);
dim3 mapBSize(1,1,1);
dim3 saveGSize(1, 1, 1 ); // (5, 256, 1)
dim3 saveBSize(1, 1, 1); // (64, 1, 1)
cufftHandle g_stPlan = {0};
int g_iMaxThreadsPerBlock = 0;
int g_iMaxPhysThreads = 0;
int runPFB(signed char* inputData_h, float* outputData_h, params pfbParams) {
//process variables
int iRet = EXIT_SUCCESS;
int countPFB = 0; // count number of times pfb fires.
int countCpyFFT = 0;
int countFFT = 0; // count number of FFT's computed.
long lProcData = 0; // count how much data processed
long ltotData = pfbParams.samples * pfbParams.fine_channels * pfbParams.elements + pfbParams.fine_channels*pfbParams.elements*pfbParams.nfft*pfbParams.taps; // total amount of data to proc (includes the padding for the saved filter state.)
int start = pfbParams.fine_channels*pfbParams.elements*(pfbParams.nfft*pfbParams.taps); // starting point to copy over the map data.
// copy data to device
CUDASafeCallWithCleanUp(cudaMemcpy(g_pcInputData_d, inputData_h, g_iSizeRead, cudaMemcpyHostToDevice)); //g_iSizeRead = samples*coarse_channels*elements*(2*sizeof(char));
// map - extract channel data from full data stream and load into buffer.
map<<<mapGSize, mapBSize>>>(g_pcInputData_d, &g_pc2Data_d[start], pfbParams.select, pfbParams);
CUDASafeCallWithCleanUp(cudaGetLastError());
CUDASafeCallWithCleanUp(cudaThreadSynchronize());
// Begin PFB
g_pc2DataRead_d = g_pc2Data_d; // p_pc2Data_d contains all the data. DataRead will update with each pass through the PFB.
int pfb_on = 1; // Enable pfb flag. Extendable.
while(!g_IsProcDone){
if(pfb_on) {
//PFB
PFB_kernel<<<g_dimGPFB, g_dimBPFB>>>(g_pc2DataRead_d, g_pf2FFTIn_d, g_pfPFBCoeff_d, pfbParams);
CUDASafeCallWithCleanUp(cudaGetLastError());
CUDASafeCallWithCleanUp(cudaThreadSynchronize());
//update data read pointer
g_pc2DataRead_d += g_iNumSubBands * g_iNFFT;
++countPFB;
} else {
CopyDataForFFT<<<g_dimGPFB, g_dimBPFB>>>(g_pc2DataRead_d, g_pf2FFTIn_d);
g_pc2DataRead_d += g_iNumSubBands * g_iNFFT;
++countCpyFFT;
}
//FFT
iRet = doFFT();
if(iRet != EXIT_SUCCESS) {
(void) fprintf(stderr, "ERROR: FFT failed\n");
cleanUp();
return EXIT_FAILURE;
}
CUDASafeCallWithCleanUp(cudaGetLastError());
++countFFT;
//update output fft pointer.
g_pf2FFTOut_d += g_iNumSubBands * g_iNFFT;
//update proc data
lProcData += g_iNumSubBands * g_iNFFT;
if(lProcData >= ltotData - NUM_TAPS*g_iNumSubBands*g_iNFFT){ // >= process 117 ffts leaving 256 time samples, > process 118 ffts leaving 224 time samples.
(void) fprintf(stdout, "\nINFO: Processed finished!\n");
(void) fprintf(stdout, "\tCounters--PFB:%d FFT:%d\n",countPFB, countFFT);
(void) fprintf(stdout, "\tData process by the numbers:\n \t\tProcessed:%ld (Samples) \n \t\tTo Process:%ld (Samples)\n\n",lProcData, ltotData);
g_IsProcDone = TRUE;
// prepare next filter
saveData<<<saveGSize, saveBSize>>>(g_pc2DataRead_d, g_pc2Data_d);
CUDASafeCallWithCleanUp(cudaGetLastError());
// copy back to host.
//wind back out ptr - should put in another pointer as a process read ptr.
int outDataSize = countFFT * g_iNumSubBands * g_iNFFT * sizeof(cufftComplex);
g_pf2FFTOut_d = g_pf2FFTOut_d - countFFT*g_iNumSubBands*g_iNFFT;
//fprintf(stdout, "Copyting back: %d\n", outDataSize);
CUDASafeCallWithCleanUp(cudaMemcpy(outputData_h, g_pf2FFTOut_d, outDataSize, cudaMemcpyDeviceToHost));
}
}
return iRet;
}
// return true or false upon successful setup.
int initPFB(int iCudaDevice, params pfbParams){
int iRet = EXIT_SUCCESS;
// set pfb params from input parameters.
pfbParams.subbands = pfbParams.elements*pfbParams.fine_channels;
g_iNFFT = pfbParams.nfft;
g_iNTaps = pfbParams.taps;
g_iNumSubBands = pfbParams.subbands; // equal to elements*fine_channels. (The fine channels are the channels processed.)
g_iSizeRead = pfbParams.samples*pfbParams.coarse_channels*pfbParams.elements*(2*sizeof(char));
int iDevCount = 0;
cudaDeviceProp stDevProp = {0};
cufftResult iCUFFTRet = CUFFT_SUCCESS;
int i = 0;
//Register signal handlers?
/********************************************/
/* Look for eligable Cuda Device and select */
/********************************************/
(void) fprintf(stdout, "Querying CUDA devices.\n");
(void) cudaGetDeviceCount(&iDevCount);
if (0 == iDevCount) {
(void) fprintf(stderr, "ERROR: No CUDA-capable device found!\n");
return EXIT_FAILURE;
}
// Look for requested device (if applicable)
if (iCudaDevice >= iDevCount) {
(void) fprintf(stderr,
"ERROR: Requested device %d no found in present %d device list.\n",
iCudaDevice,
iDevCount);
return EXIT_FAILURE;
}
// Query devices and setup selected device.
for(i = 0; i < iDevCount; i++) {
CUDASafeCallWithCleanUp(cudaGetDeviceProperties(&stDevProp, i));
printf("\tDevice %d: %s, Compute Capability %d.%d, %d physical threads %s\n",
i,
stDevProp.name, stDevProp.major, stDevProp.minor,
stDevProp.multiProcessorCount * stDevProp.maxThreadsPerMultiProcessor,
(iCudaDevice == i) ? "<<SELECTED>>" : "");
}
CUDASafeCallWithCleanUp(cudaSetDevice(iCudaDevice));
// Setup block and thread paramters
CUDASafeCallWithCleanUp(cudaGetDeviceProperties(&stDevProp, 0));
g_iMaxThreadsPerBlock = stDevProp.maxThreadsPerBlock;
g_iMaxPhysThreads = stDevProp.multiProcessorCount * stDevProp.maxThreadsPerMultiProcessor;
// Check if valid operation lengths. i.e. The input buffer is long enough (should this be done here or elsewhere?)
// Set malloc size - lTotCUDAMalloc is used only to calculate the total amount of memory not used for the allocation.
size_t cudaMem_total, cudaMem_available;
size_t lTotCUDAMalloc = 0;
cudaMemGetInfo(&cudaMem_available, &cudaMem_total);
lTotCUDAMalloc += g_iSizeRead; // size data
lTotCUDAMalloc += (g_iNumSubBands * g_iNFFT * sizeof(float(2))); // size of FFT input array This should be different since our data is unsigned char?
lTotCUDAMalloc += (g_iNumSubBands * pfbParams.samples * sizeof(float(2))); // size of FFT output array
lTotCUDAMalloc += (g_iNumSubBands * g_iNFFT * sizeof(float)); // size of PFB Coefficients
// Check CUDA device can handle the memory request
if(lTotCUDAMalloc > stDevProp.totalGlobalMem) {
(void) fprintf(stderr,
"ERROR: Total memory requested on GPU is %g MB of %g possible MB (Total Global Memory: %g MB).\n"
"\t**** Memory breakdown *****\n"
"\tInput data buffer:\t%g MB\n"
"\tFFT in array:\t%g MB\n"
"\tFFT out array:\t%g MB\n"
"\tPFB Coefficients: %f KB\n",
((float) lTotCUDAMalloc) / (1024*1024),
((float) cudaMem_available) / (1024*1024), //stDevProp.totalGlobalMem
((float) cudaMem_total) / (1024*1024),
((float) g_iSizeRead) / (1024 * 1024),
((float) g_iNumSubBands * g_iNFFT * sizeof(float2)) / (1024 * 1024),
((float) g_iNumSubBands * pfbParams.samples * sizeof(float2)) / (1024 * 1024),
((float) g_iNumSubBands * g_iNFFT * sizeof(float)));
return EXIT_FAILURE;
}
// print memory usage report.
(void) fprintf(stdout,
"INFO: Total memory requested on GPU is %g MB of %g possible MB (Total Global Memory: %g MB).\n"
"\t**** Memory breakdown ****\n"
"\tInput data buffer:\t%g MB\n"
"\tFFT in array:\t%g MB\n"
"\tFFT out array:\t%g MB\n"
"\tPFB Coefficients: %f KB\n",
((float) lTotCUDAMalloc) / (1024*1024),
((float) cudaMem_available) / (1024*1024), //stDevProp.totalGlobalMem
((float) cudaMem_total) / (1024*1024),
((float) g_iSizeRead) / (1024 * 1024),
((float) g_iNumSubBands * g_iNFFT * sizeof(float2)) / (1024 * 1024),
((float) g_iNumSubBands * pfbParams.samples * sizeof(float2)) / (1024 * 1024),
((float) g_iNumSubBands * g_iNFFT * sizeof(float)));
/*************************/
/* Load PFB coefficients */
/*************************/
(void) fprintf(stdout, "\nSetting up PFB filter coefficients...\n");
int sizePFB = g_iNumSubBands * g_iNTaps * g_iNFFT * sizeof(float);
// Allocate memory for PFB coefficients to be read in
g_pfPFBCoeff = (float *) malloc(sizePFB); // allocate the memory needed for the size of one pfb pass through
if(NULL == g_pfPFBCoeff) {
(void) fprintf(stderr, "ERROR: Memory allocation for the PFB coefficients failed. %s\n",
strerror(errno));
return EXIT_FAILURE;
}
char relPath[256] = "/home/mburnett/GitHub/flag_gpu/lib/pfb/src/";
// Read filter coefficients from file
(void) fprintf(stdout, "\tReading in coefficients...\n");
(void) sprintf(g_acFileCoeff,
"%s%s_%s_%d_%d_%d%s",
relPath,
FILE_COEFF_PREFIX,
FILE_COEFF_DATATYPE,
g_iNTaps,
g_iNFFT,
g_iNumSubBands,
FILE_COEFF_SUFFIX);
g_iFileCoeff = open(g_acFileCoeff, O_RDONLY);
if(g_iFileCoeff < EXIT_SUCCESS) {
(void) fprintf(stderr, "ERROR: Failed to open coefficient file %s. %s\n",
g_acFileCoeff,
strerror(errno));
return EXIT_FAILURE;
}
iRet = read(g_iFileCoeff, g_pfPFBCoeff, sizePFB);
if(iRet != sizePFB) {
(void) fprintf(stderr, "ERROR: Failed reading filter coefficients. %s\n", strerror(errno));
return EXIT_FAILURE;
}
(void) close(g_iFileCoeff);
/********************************************/
/* Allocate memory and setup on CUDA device */
/********************************************/
(void) fprintf(stdout, "\nSetting up CUDA device.\n");
//malloc map array and copy data to device
(void) fprintf(stdout, "\tAllocating memory for MAP...\n");
// creates a size that is paddedd in the front to store the filter state. Worth one 256 (nfft*taps) time sample amount of data
int sizeMap = pfbParams.samples * pfbParams.fine_channels * pfbParams.elements * (2*sizeof(char)) + pfbParams.fine_channels*pfbParams.elements*pfbParams.nfft*pfbParams.taps * (2*sizeof(char));
CUDASafeCallWithCleanUp(cudaMalloc((void **) &g_pcInputData_d, g_iSizeRead));
CUDASafeCallWithCleanUp(cudaMemset((void *) g_pcInputData_d, 0, g_iSizeRead));
CUDASafeCallWithCleanUp(cudaMalloc((void **) &g_pc2Data_d, sizeMap));
CUDASafeCallWithCleanUp(cudaMemset((void *) g_pc2Data_d, 0, sizeMap));
// allocate memory for pfb coefficients on GPU
(void) fprintf(stdout, "\tAllocating memory for PFB...\n");
CUDASafeCallWithCleanUp(cudaMalloc((void **) &g_pfPFBCoeff_d, sizePFB));
// copy coeff to device
(void) fprintf(stdout, "\tCopying filter coefficients...\n");
CUDASafeCallWithCleanUp(cudaMemcpy(g_pfPFBCoeff_d, g_pfPFBCoeff, sizePFB, cudaMemcpyHostToDevice));
// allocate memory for FFT in and out arrays
(void) fprintf(stdout, "\tAllocate memory for FFT arrays...\n");
int sizeDataBlock_in = g_iNumSubBands * g_iNFFT * sizeof(float2);
int sizeTotalDataBlock_out = pfbParams.samples*g_iNumSubBands * sizeof(float2); // output fft array same size as output data for convinence the full size is not used. In the pfb function the output data will be the fft counter times block amount in the fft.
CUDASafeCallWithCleanUp(cudaMalloc((void **) &g_pf2FFTIn_d, sizeDataBlock_in));
CUDASafeCallWithCleanUp(cudaMalloc((void **) &g_pf2FFTOut_d, sizeTotalDataBlock_out)); // goal will be to update the output ptr each time it fires.
// set kernel parameters
(void) fprintf(stdout, "\tSetting kernel parameters...\n");
if(g_iNFFT < g_iMaxThreadsPerBlock) {
g_dimBPFB.x = g_iNFFT;
g_dimBCopy.x = g_iNFFT;
} else {
g_dimBPFB.x = g_iMaxThreadsPerBlock;
g_dimBCopy.x = g_iMaxThreadsPerBlock;
}
g_dimGPFB.x = (g_iNumSubBands * g_iNFFT) / g_dimBPFB.x;
g_dimGCopy.x = (g_iNumSubBands * g_iNFFT) / g_dimBCopy.x;
// map kernel params
mapGSize.x = pfbParams.samples;
mapGSize.y = pfbParams.fine_channels;
mapGSize.z = 1;
mapBSize.x = 1;
mapBSize.y = pfbParams.elements;
mapBSize.z = 1;
// copy kernel params
saveGSize.x = pfbParams.fine_channels;
saveGSize.y = pfbParams.nfft*pfbParams.taps;
saveGSize.z = 1;
saveBSize.x = pfbParams.elements;
saveBSize.y = 1;
saveBSize.z = 1;
(void) fprintf(stdout, "\t\tPFB Kernel Parmaters are:\n\t\tgridDim(%d,%d,%d) blockDim(%d,%d,%d)\n\n",
g_dimGPFB.x, g_dimGPFB.y, g_dimGPFB.z,
g_dimBPFB.x, g_dimBPFB.y, g_dimBPFB.z);
(void) fprintf(stdout, "\t\tMAP Kernel Parmaters are:\n\t\tgridDim(%d,%d,%d) blockDim(%d,%d,%d)\n\n",
mapGSize.x, mapGSize.y, mapGSize.z,
mapBSize.x, mapBSize.y, mapBSize.z);
(void) fprintf(stdout, "\t\tSave Kernel Parmaters are:\n\t\tgridDim(%d,%d,%d) blockDim(%d,%d,%d)\n",
saveGSize.x, saveGSize.y, saveGSize.z,
saveBSize.x, saveBSize.y, saveBSize.z);
// create a CUFFT plan
(void) fprintf(stdout, "\tCreating cuFFT plan...\n");
iCUFFTRet = cufftPlanMany(&g_stPlan,
FFTPLAN_RANK,
&g_iNFFT,
&g_iNFFT,
FFTPLAN_ISTRIDE,
FFTPLAN_IDIST,
&g_iNFFT,
FFTPLAN_OSTRIDE,
FFTPLAN_ODIST,
CUFFT_C2C,
FFTPLAN_BATCH);
if(iCUFFTRet != CUFFT_SUCCESS) {
(void) fprintf(stderr, "ERROR: Plan creation failed!\n");
return EXIT_FAILURE;
}
fprintf(stdout, "\nDevice for PFB successfully initialized!\n");
return EXIT_SUCCESS;
}
// make a call to execute a ptyhon program.
void genCoeff(char* procName, params pfbParams) {
system("python --version");
FILE* file;
char fname[256] = {"/home/mburnett/GitHub/flag_gpu/lib/pfb/python/flag_gen_coeff.py"};
int argCount = 11;
char* arguments[32] = {}; // come back and create a dynamic structure, i.e definetly do not need 32, always 10 or 11.
int i = 0;
for(i = 0; i < 32; i++) {
arguments[i] = (char*) malloc(256*sizeof(char*));
}
arguments[0] = procName;
arguments[1] = (char*) "-n\0"; // (char*) acknowledges that I am assigning a const literal to a mutable and removes compile warnings for now.
sprintf(arguments[2], "%d", pfbParams.nfft);
arguments[3] = (char*) "-t\0";
sprintf(arguments[4], "%d", pfbParams.taps);
arguments[5] = (char*) "-b\0";
sprintf(arguments[6], "%d", pfbParams.subbands);
arguments[7] = (char*) "-w\0";
sprintf(arguments[8], "%s", pfbParams.window);
arguments[9] = (char*) "-d\0";
sprintf(arguments[10], "%s", pfbParams.dataType);
if(pfbParams.plot) {
arguments[11] = (char*) "-p\0";
argCount++;
}
for(i = 0; i < argCount; i++){
fprintf(stdout, " %s", arguments[i]); // Add a gen coeff output for feedback.c
}
fprintf(stdout, "\n");
// initalize and run python script
Py_SetProgramName(procName);
Py_Initialize();
PySys_SetArgv(argCount, arguments);
file = fopen(fname, "r");
PyRun_SimpleFile(file, fname);
Py_Finalize();
return;
}
int doFFT() {
cufftResult iCUFFTRet = CUFFT_SUCCESS;
/* execute plan */
iCUFFTRet = cufftExecC2C(g_stPlan,
(cufftComplex*) g_pf2FFTIn_d,
(cufftComplex*) g_pf2FFTOut_d,
CUFFT_FORWARD);
if (iCUFFTRet != CUFFT_SUCCESS)
{
(void) fprintf(stderr, "ERROR! FFT failed!\n");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
int resetDevice() {
cudaError_t cuErr = cudaDeviceReset();
if (cuErr != cudaSuccess) {
fprintf(stderr, "Device Reset Failed.\n");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
void cleanUp() {
/* free resources */
if (g_pc2InBuf != NULL) {
free(g_pc2InBuf);
g_pc2InBuf = NULL;
}
if (g_pc2Data_d != NULL) {
(void) cudaFree(g_pc2Data_d);
g_pc2Data_d = NULL;
}
if (g_pf2FFTIn_d != NULL) {
(void) cudaFree(g_pf2FFTIn_d);
g_pf2FFTIn_d = NULL;
}
if (g_pf2FFTOut_d != NULL) {
(void) cudaFree(g_pf2FFTOut_d);
g_pf2FFTOut_d = NULL;
}
free(g_pfPFBCoeff);
(void) cudaFree(g_pfPFBCoeff_d);
/* destroy plan */
/* TODO: check for plan */
(void) cufftDestroy(g_stPlan);
return;
}
void __CUDASafeCallWithCleanUp(cudaError_t iRet, const char* pcFile, const int iLine, void (*pcleanUp)(void)) {
if (iRet != cudaSuccess)
{
(void) fprintf(stderr,
"ERROR: File <%s>, Line %d: %s\n",
pcFile,
iLine,
cudaGetErrorString(iRet));
/* free resources */
(*pcleanUp)();
exit(EXIT_FAILURE);
}
return;
}
#ifndef __PFB_H__
#define __PFB_H__
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h> /* for open() */
#include <sys/stat.h> /* for open() */
#include <fcntl.h> /* for open() */
#include <python2.7/Python.h> /* for executing coeff gen file */
#include "kernels.h"
#define FALSE 0
#define TRUE 1
#define DEBUG 1
#define DEF_CUDA_DEVICE 0
#define DEF_SIZE_READ 262144 // data block size. should this be set dynamically once I get the data?
#define DEF_LEN_SPEC 32 // Transform size
#define NUM_TAPS 8 // PFB Decimation factor
#define DEF_NUM_CHANNELS 25 // System spec for total number of channels
#define PFB_CHANNELS 5 // Number of coarse channels through PFB
#define DEF_NUM_ELEMENTS 64 // System spec for number of elements
#define SAMPLES 4000// Time samples.
#define PFB_OUTPUT_BLOCK_SIZE SAMPLES*PFB_CHANNELS*2
// FFT Plan configuration
#define FFTPLAN_RANK 1 // dimension of the transform
#define FFTPLAN_ISTRIDE (g_iNumSubBands) // The distance between two successive input time elements. - (polarization*numsubbands).
#define FFTPLAN_OSTRIDE (g_iNumSubBands) // Similar to ostride to maintain data structure
#define FFTPLAN_IDIST 1 // The distance between the first elements of two consecutive batches in the input data. Each FFT operation is a 'batch'. Each subband is a time series and we need a FFT for each subband. Since we have interleaved samples the distance between consecutive batches is 1 sample.
#define FFTPLAN_ODIST 1 // Simailar to odist to maintian data structure
#define FFTPLAN_BATCH (g_iNumSubBands) // The total number of FFTs to perform per call to DoFFT().
// coeff file configuration
#define FILE_COEFF_PREFIX "coeff"
#define FILE_COEFF_DATATYPE "float"
#define FILE_COEFF_SUFFIX ".dat"
#define USEC2SEC 1e-6
typedef unsigned char BYTE;
#define CUDASafeCallWithCleanUp(iRet) __CUDASafeCallWithCleanUp(iRet, __FILE__, __LINE__, &cleanUp)
void __CUDASafeCallWithCleanUp(cudaError_t iRet, const char* pcFile, const int iLine, void (*pcleanUp)(void));
void genCoeff(char* procName, params pfbParams);
int initPFB(int iCudaDevice, params pfbParams);
int runPFB(signed char* inputData_h, float* outputData_h, params pfbParams);
int doFFT();
int resetDevice(void);
void cleanUp(void);
#endif
\ No newline at end of file
#include "main.h"
char* g_inputData = NULL;
float2* g_outputData = NULL;
params pfbParams = DEFAULT_PFB_PARAMS;
int main(int argc, char *argv[]) {
int ret = EXIT_SUCCESS;
/*********************************** PARSE INPUT *****************************************/
/* valid short and long options */
const char* const pcOptsShort = ":hn:t:w:k:c:f:e:b:d:s:p";
const struct option stOptsLong[] = {
{ "help", 0, NULL, 'h' },
{ "nfft", 1, NULL, 'n' },
{ "taps", 1, NULL, 't' },
{ "window", 1, NULL, 'w' },
{ "samples", 1, NULL, 'k' },
{ "coarse", 1, NULL, 'c' },
{ "fine", 1, NULL, 'f' },
{ "elements", 1, NULL, 'e' },
{ "nsub", 1, NULL, 'b' },
{ "datatype", 1, NULL, 'd' },
{ "select", 1, NULL, 's' },
{ "plot", 0, NULL, 'p' },
{ NULL, 0, NULL, 0 }
};
const char* progName = argv[0];
int errFlag = 0;
/* parse input */
int opt = 0; //
int prevInd = 0; // used to track optind to manual check missing arguments.
do {
/*
Getopt will load the next option if the argument is missing, getopt's ':' error check
really only works on the last option. This assumes that no argument has a '-' in it.
*/
prevInd = optind;
opt = getopt_long(argc, argv, pcOptsShort, stOptsLong, NULL);
if(optind == prevInd + 2 && (*optarg == '-' || *optarg == '.')) { // assumes arguments cannot start with '-' or '.'. Also, if optarg is null this causes a seg fault and the first logical comparisson catches the null case. The parans for the or helps not cause the fault.
optopt = opt; // update getopt's optopt variable to contain the violating variable.
opt = ':'; // trigger the error character.
--optind; // decrement optind since it was incremented incorrectly.
}
switch(opt)
{
case 'h':
printUsage(progName);
return EXIT_SUCCESS;
case 'n':
pfbParams.nfft = (int) atoi(optarg);
break;
case 't':
pfbParams.taps = (int) atoi(optarg);
break;
case 'w':
pfbParams.window = optarg;
break;
case 'k':
pfbParams.samples = (int) atoi(optarg);
break;
case 'c':
pfbParams.coarse_channels = (int) atoi(optarg);
break;
case 'e':
pfbParams.elements = (int) atoi(optarg);
break;
case 'f':
pfbParams.fine_channels = (int) atoi(optarg);
break;
case 'b':
pfbParams.subbands = (int) atoi(optarg);
break;
case 'd':
pfbParams.dataType = optarg;
break;
case 's':
pfbParams.select = (int) atoi(optarg);
// check valid select range.
if(pfbParams.select < 0 || pfbParams.select > 4) {
(void) fprintf(stderr, "ERROR: Channel select range [0, 4]\n");
errFlag++;
}
break;
case 'p':
pfbParams.plot = 0;
break;
case ':':
(void) fprintf(stderr, "-%c option requires a parameter.\n", optopt);
errFlag++;
break;
case '?':
(void) fprintf(stderr, "Unrecognized option -%c.\n", optopt);
errFlag++;
break;
case -1: /* done with options */
break;
default: /* unexpected */
assert(0);
}
} while (opt != -1);
if(errFlag) {
printUsage(progName);
return EXIT_FAILURE;
}
// no data file presented
int genFlag = 0;
if(argc <= optind) {
(void) fprintf(stderr, "ERROR: Missing data file.\n");
genFlag = 1;
}
// init input data array
int readSize = pfbParams.samples * pfbParams.coarse_channels * pfbParams.elements * (2*sizeof(char));
g_inputData = (char*) malloc(readSize);
memset(g_inputData, 0, readSize);
// Determine wether to get data from a file or generate it.
if(!genFlag) {
// get data filename
char filename[256] = {0};
(void) strncpy(filename, argv[optind], 256);
filename[255] = '\0';
ret = loadData(filename, g_inputData, readSize);
if (ret == EXIT_FAILURE) {
return EXIT_FAILURE;
}
} else {
// generate data
//generate freq array for data
int i = 0;
float fs = 303.0; // KHz - a default sample rate.
int channelBandgap = 10.0; // KHz jumps
float* freq = (float *) malloc(pfbParams.coarse_channels*sizeof(float));
for(i = 0; i <= pfbParams.coarse_channels; i++) {
freq[i] = channelBandgap * i + 5.0;
}
genData(g_inputData, freq, fs, pfbParams.samples, pfbParams.coarse_channels, pfbParams.elements);
}
/****************************** SETUP PFB ******************************/
/* init cuda device */
int iCudaDevice = DEF_CUDA_DEVICE;
// create coeff and write to a file that is read in initPFB.
pfbParams.subbands = pfbParams.elements*pfbParams.fine_channels;
genCoeff(argc, argv, pfbParams);
// init the device, loads coeff
ret = initPFB(iCudaDevice, pfbParams);
// malloc data arrays
int outputSize = pfbParams.samples * pfbParams.fine_channels * pfbParams.elements * (2*sizeof(float)); // need to convince myself of this output data size.
g_outputData = (float2*) malloc(outputSize);
memset(g_outputData, 0, outputSize);
// run the pfb function
clock_t start, end;
start = clock();
ret = runPFB(g_inputData, g_outputData, pfbParams);
end = clock();
double timeTaken = 0;
timeTaken = ((double) (end - start))/CLOCKS_PER_SEC;
(void) printf("Time taken (barring Init()): %gs\n", timeTaken);
if (ret == EXIT_FAILURE) {
(void) fprintf(stderr, "ERROR: runPFB failed!\n");
free(g_inputData);
free(g_outputData);
return EXIT_FAILURE;
}
// process return from pfb - write to file
int file = 0;
char outfile[256] = "output/outfile.dat\0";
file = open(outfile,
O_CREAT | O_TRUNC | O_WRONLY,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if(file < EXIT_SUCCESS) {
(void) fprintf(stderr, "ERROR: writing outfile failed\n");
free(g_inputData);
free(g_outputData);
return EXIT_FAILURE;
}
// clean up and exit
cleanUp();
ret = resetDevice();
(void) write(file, g_outputData, outputSize);
(void) close(file);
free(g_inputData);
free(g_outputData);
return EXIT_SUCCESS;
}
\ No newline at end of file
#include <stdio.h>
#include <stdlib.h>
#include <string.h> /* for strncopy(), memcpy(), strerror()*/
#include <sys/types.h> /* for open() */
#include <sys/stat.h> /* for open() */
#include <sys/time.h>
#include <fcntl.h> /* for open() */
#include <unistd.h> /* for close() */
#include <errno.h> /* for errno */
#include <getopt.h> /* for option parsing */
#include "pfb.h"
#include "helper.h"
#include "tools/tools.h"
\ No newline at end of file
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <sys/types.h>
#include <fcntl.h>
#include <unistd.h>
#include <float.h>
#include <getopt.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#define LEN_GENSTRING 256
#define SCALE_FACTOR 127
#define F_S 256.0 // MHz
#define N 256 // Time samples
#define MAX_FREQ 256
#define CHANNELS 25 // Freq Channels
#define NUM_EL 64 // Antenna Elements
void printUsage(const char* progName) {
(void) printf("Usage: %s [options] <data-file>\n", progName);
(void) printf(" -h --help ");
(void) printf("Display this usage information\n");
(void) printf(" -s --samples <value> ");
(void) printf("Number of samples in the data\n");
(void) printf(" -f --fs <value> ");
(void) printf("Max freq\n");
(void) printf(" -w --band <value> ");
(void) printf("Sample rate\n");
(void) printf(" -c --channels <value> ");
(void) printf("Number of channels in data\n");
(void) printf(" -e --elemnets <value> ");
(void) printf("Number of elements in data\n");
return;
}
int main(int argc, char *argv[]) {
int samples = N;
int fs = F_S;
int coarseChannels = CHANNELS;
int elements = NUM_EL;
int genFreq = MAX_FREQ;
/* valid short and long options */
const char* const pcOptsShort = ":hs:f:w:c:e:";
const struct option stOptsLong[] = {
{ "help", 0, NULL, 'h' },
{ "samples", 1, NULL, 's' },
{ "fs", 1, NULL, 'f' },
{ "band", 1, NULL, 'w' },
{ "channels", 1, NULL, 'c' },
{ "elements", 1, NULL, 'e' },
{ NULL, 0, NULL, 0 }
};
const char* progName = argv[0];
int errFlag = 0;
/* parse input */
int opt = 0; //
int prevInd = 0; // used to track optind to manual check missing arguments.
do {
/*
Getopt will load the next option if the argument is missing, getopt's ':' error check
really only works on the last option. This assumes that no argument has a '-' in it.
*/
prevInd = optind;
opt = getopt_long(argc, argv, pcOptsShort, stOptsLong, NULL);
if(optind == prevInd + 2 && (*optarg == '-' || *optarg == '.')) { // assumes arguments cannot start with '-' or '.'. Also, if optarg is null this causes a seg fault and the first logical comparisson catches the null case. The parans for the or helps not cause the fault.
optopt = opt; // update getopt's optopt variable to contain the violating variable.
opt = ':'; // trigger the error character.
--optind; // decrement optind since it was incremented incorrectly.
}
switch(opt)
{
case 'h':
printUsage(progName);
return EXIT_SUCCESS;
case 's':
samples = (int) atoi(optarg);
break;
case 'c':
coarseChannels = (int) atoi(optarg);
break;
case 'e':
elements = (int) atoi(optarg);
break;
case 'f':
fs = (int) atoi(optarg);
break;
case 'w':
genFreq = (int) atoi(optarg);
break;
case ':':
(void) fprintf(stderr, "-%c option requires a parameter.\n", optopt);
errFlag++;
break;
case '?':
(void) fprintf(stderr, "Unrecognized option -%c.\n", optopt);
errFlag++;
break;
case -1: /* done with options */
break;
default: /* unexpected */
assert(0);
}
} while (opt != -1);
if(errFlag) {
printUsage(progName);
return EXIT_FAILURE;
}
// no data file presented
if(argc <= optind) {
(void) fprintf(stderr, "ERROR: Missing output data filename.\n");
return EXIT_FAILURE;
}
int iFile = 0;
char acDataFilename[LEN_GENSTRING] = {0};
(void) strncpy(acDataFilename, argv[optind], LEN_GENSTRING);
acDataFilename[LEN_GENSTRING-1] = '\0'; //NUll terminator at end of filename string.
iFile = open(acDataFilename,
O_CREAT | O_TRUNC | O_WRONLY,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if(EXIT_FAILURE == iFile) {
(void) fprintf(stderr, "ERROR: Failed to open output file. %s\n", strerror(errno));
return EXIT_FAILURE;
}
int i = 0;
//const int genFreq = 256;
//Generate freq array.
float* freq = NULL;
freq = (float*) malloc(genFreq*sizeof(float));
//float freq[genFreq] = {};
for(i = 1; i <= genFreq; i++) {
freq[i-1] = i*1.0;
//fprintf(stdout, "freq: %f\n", freq[i-1]);
}
fprintf(stdout, "i: %d\n", i);
int f = 0;
int n = 0;
int c = 0;
int e = 0;
signed char cDataRe = 0;
signed char cDataIm = 0;
signed char* toWrite = (signed char *) malloc(samples*coarseChannels*elements*(2*sizeof(signed char)));
fprintf(stdout,
"INFO: Generating samples...\n"
"\tSamples:\t %d\n"
"\tSample rate:\t %d\n"
"\tMax Range:\t %d\n"
"\tChannels:\t %d\n"
"\tElements:\t %d\n",
samples, fs, genFreq, coarseChannels, elements);
for(f = 0; f < genFreq; f++) {
for(n = 0; n < samples; n++) {
cDataRe = SCALE_FACTOR * (0.1 * cos(2*M_PI * freq[f] * n / fs));
cDataIm = SCALE_FACTOR * (0.1 * sin(2*M_PI * freq[f] * n / fs));
for(c = 0; c < coarseChannels; c++) {
for(e = 0; e < 2*elements; e++) {
int idx = e + c * (2 * elements) + n * (coarseChannels * 2*elements);
if( !(e%2) ) {
//create interleaved samples for real and Im
toWrite[idx] = cDataRe;
} else {
toWrite[idx] = cDataIm;
}
}
}
}
(void) write(iFile, toWrite, samples*coarseChannels*elements*(2*sizeof(signed char)));
}
(void) close(iFile);
return EXIT_SUCCESS;
}
\ No newline at end of file
File added
#include "helper.h"
// File containing helper functions for main
void printUsage(const char* progName) {
(void) printf("Usage: %s [options] <data-file>\n", progName);
(void) printf(" -h --help ");
(void) printf("Display this usage information\n");
(void) printf(" -b --nsub ");
(void) printf("Number of sub-bands in the data\n");
(void) printf(" -n --nfft <value> ");
(void) printf("Number of points in FFT\n");
(void) printf(" -w --window <string> ");
(void) printf("Filter window type, hanning, cheb-win\n");
(void) printf(" -k --samples <value> ");
(void) printf("Number of time samples processed\n");
(void) printf(" -c --coarse <value> ");
(void) printf("Number of coarse channels in data\n");
(void) printf(" -f --fine <value> ");
(void) printf("Number of channels selected to process\n");
(void) printf(" -e --elements <value> ");
(void) printf("Number of elements in data\n");
(void) printf(" -d --datatype <string> ");
(void) printf("Filter coefficient data type, float or int\n");
(void) printf(" -s --select <value> ");
(void) printf("Where in channels to begin selecting fine\n");
return;
}
int loadData(char* f, char* inputData, int size) {
int ret = EXIT_SUCCESS;
int file = 0;
//int readSize = SAMPLES * DEF_NUM_CHANNELS * DEF_NUM_ELEMENTS * (2*sizeof(char));
//inputData = (char*) malloc(readSize);
if(NULL == inputData) {
(void) fprintf(stderr, "ERROR: Memory allocation failed! %s.\n", strerror(errno));
return EXIT_FAILURE;
}
file = open(f, O_RDONLY);
if (file < EXIT_SUCCESS) {
(void) fprintf(stderr, "ERROR: failed to open data file. %s\n", strerror(errno));
return EXIT_FAILURE;
}
ret = read(file, inputData, size);
if (ret < EXIT_SUCCESS) {
(void) fprintf(stderr, "ERROR: failed to read data file. %s\n", strerror(errno));
(void) close(file);
return EXIT_FAILURE;
}
(void) close(file);
return EXIT_SUCCESS;
}
\ No newline at end of file
#ifndef __HELPER_H
#define __HELPER_H
#include <stdio.h>
#include <stdlib.h>
// Had to copy these over form pfb.h since they are part of a load memory helper function most likely not needed in full pfb implementation
#include <string.h> /* for strncopy(), memcpy(), strerror()*/
#include <sys/types.h> /* for open() */
#include <sys/stat.h> /* for open() */
#include <fcntl.h> /* for open() */
#include <unistd.h> /* for close() */
#include <errno.h> /* for errno */
void printUsage(const char* progName);
int loadData(char* f, char* inputData, int size);
#endif
\ No newline at end of file
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <sys/types.h>
#include <fcntl.h>
#include <unistd.h>
#include <float.h>
#include <getopt.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#define LEN_GENSTRING 256
#define SCALE_FACTOR 127
#define F_S 256.0 // MHz
#define N 256 // Time samples
#define MAX_FREQ 256
#define CHANNELS 25 // Freq Channels
#define NUM_EL 64 // Antenna Elements
void printUsage(const char* progName) {
(void) printf("Usage: %s [options] <data-file>\n", progName);
(void) printf(" -h --help ");
(void) printf("Display this usage information\n");
(void) printf(" -s --samples <value> ");
(void) printf("Number of samples in the data\n");
(void) printf(" -f --fs <value> ");
(void) printf("Max freq\n");
(void) printf(" -w --band <value> ");
(void) printf("Sample rate\n");
(void) printf(" -c --channels <value> ");
(void) printf("Number of channels in data\n");
(void) printf(" -e --elemnets <value> ");
(void) printf("Number of elements in data\n");
return;
}
int main(int argc, char *argv[]) {
int samples = N;
int fs = F_S;
int coarseChannels = CHANNELS;
int elements = NUM_EL;
int genFreq = MAX_FREQ;
/* valid short and long options */
const char* const pcOptsShort = ":hs:f:w:c:e:";
const struct option stOptsLong[] = {
{ "help", 0, NULL, 'h' },
{ "samples", 1, NULL, 's' },
{ "fs", 1, NULL, 'f' },
{ "band", 1, NULL, 'w' },
{ "channels", 1, NULL, 'c' },
{ "elements", 1, NULL, 'e' },
{ NULL, 0, NULL, 0 }
};
const char* progName = argv[0];
int errFlag = 0;
/* parse input */
int opt = 0; //
int prevInd = 0; // used to track optind to manual check missing arguments.
do {
/*
Getopt will load the next option if the argument is missing, getopt's ':' error check
really only works on the last option. This assumes that no argument has a '-' in it.
*/
prevInd = optind;
opt = getopt_long(argc, argv, pcOptsShort, stOptsLong, NULL);
if(optind == prevInd + 2 && (*optarg == '-' || *optarg == '.')) { // assumes arguments cannot start with '-' or '.'. Also, if optarg is null this causes a seg fault and the first logical comparisson catches the null case. The parans for the or helps not cause the fault.
optopt = opt; // update getopt's optopt variable to contain the violating variable.
opt = ':'; // trigger the error character.
--optind; // decrement optind since it was incremented incorrectly.
}
switch(opt)
{
case 'h':
printUsage(progName);
return EXIT_SUCCESS;
case 's':
samples = (int) atoi(optarg);
break;
case 'c':
coarseChannels = (int) atoi(optarg);
break;
case 'e':
elements = (int) atoi(optarg);
break;
case 'f':
fs = (int) atoi(optarg);
break;
case 'w':
genFreq = (int) atoi(optarg);
break;
case ':':
(void) fprintf(stderr, "-%c option requires a parameter.\n", optopt);
errFlag++;
break;
case '?':
(void) fprintf(stderr, "Unrecognized option -%c.\n", optopt);
errFlag++;
break;
case -1: /* done with options */
break;
default: /* unexpected */
assert(0);
}
} while (opt != -1);
if(errFlag) {
printUsage(progName);
return EXIT_FAILURE;
}
int i = 0;
//Generate freq array.
float* freq = NULL;
freq = (float*) malloc(genFreq*sizeof(float));
for(i = 1; i <= genFreq; i++) {
freq[i-1] = i*1.0;
}
int p = 0; // phase iterations
int f = 0;
int n = 0;
int c = 0;
int e = 0;
signed char cDataRe = 0;
signed char cDataIm = 0;
signed char* toWrite = (signed char *) malloc(samples*coarseChannels*elements*(2*sizeof(signed char)));
fprintf(stdout,
"INFO: Generating samples...\n"
"\tSamples:\t %d\n"
"\tSample rate:\t %d\n"
"\tChannels:\t %d\n"
"\tElements:\t %d\n",
samples, fs, coarseChannels, elements);
for(p = 0; p < 6; p++){
int iFile = 0;
char acDataFilename[LEN_GENSTRING] = "phase_data/file_%d.dat";
sprintf(acDataFilename, acDataFilename, p);
fprintf(stdout, "%s\n",acDataFilename);
acDataFilename[LEN_GENSTRING-1] = '\0'; //NUll terminator at end of filename string.
iFile = open(acDataFilename,
O_CREAT | O_TRUNC | O_WRONLY,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if(EXIT_FAILURE == iFile) {
(void) fprintf(stderr, "ERROR: Failed to open output file. %s\n", strerror(errno));
return EXIT_FAILURE;
}
for(f = 0; f < genFreq; f++) {
for(n = 0; n < samples; n++) {
cDataRe = SCALE_FACTOR * (0.99 * cos(2*M_PI * freq[f] * n / fs + 2*M_PI/(double)(p+1)));
cDataIm = SCALE_FACTOR * (0.99 * sin(2*M_PI * freq[f] * n / fs + 2*M_PI/(double)(p+1)));
for(c = 0; c < coarseChannels; c++) {
for(e = 0; e < 2*elements; e++) {
int idx = e + c * (2 * elements) + n * (coarseChannels * 2*elements);
if( !(e%2) ) {
//create interleaved samples for real and Im
toWrite[idx] = cDataRe;
} else {
toWrite[idx] = cDataIm;
}
}
}
}
(void) write(iFile, toWrite, samples*coarseChannels*elements*(2*sizeof(signed char)));
}
(void) close(iFile);
}
return EXIT_SUCCESS;
}
\ No newline at end of file
#include "tools.h"
void genData(char* data, float* freq, float fs, int samples, int channels, int elements){
fprintf(stdout,
"INFO: Generating samples...\n"
"\tSamples:\t %d\n"
"\tSample rate:\t %f\n"
"\tChannels:\t %d\n"
"\tElements:\t %d\n",
samples, fs, channels, elements);
int n = 0;
int f = 0;
int e = 0;
signed char dataRe = 0;
signed char dataIm = 0;
//int size = elements*channels*(2*sizeof(char)); // 2 for complex data.
for(n = 0; n < samples; n++) {
for(f = 0; f < channels; f++) {
//if(f==5){ // only insert one tone
//use the same sample for all elements
dataRe = SCALE_FACTOR * (.1 * cos(2*M_PI * freq[f] * n / fs));
dataIm = SCALE_FACTOR * (.1 * sin(2*M_PI * freq[f] * n / fs));
for(e = 0; e < 2*elements; e++) {
int idx = e + f * (2 * elements) + n * channels * (2*elements);
if( !(e%2) ) {
//create interleaved samples for real and Im
data[idx] = dataRe;
} else {
data[idx] = dataIm;
}
}
//} else {
// cDataReX = 0;
// cDataImY = 0;
//}
}
}
return;
}
\ No newline at end of file
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <float.h>
#define SCALE_FACTOR 127
void genData(char* data, float* freq, float fs, int samples, int channels, int elements);
\ No newline at end of file
......@@ -4,10 +4,13 @@ AM_CPPFLAGS =
AM_CPPFLAGS += -I"@HASHPIPE_INCDIR@"
AM_CPPFLAGS += -I"@XGPU_INCDIR@"
AM_CPPFLAGS += -I"@FLAGBEAM_INCDIR@"
AM_CPPFLAGS += -I"@FLAGPFB_INCDIR@"
AM_CPPFLAGS += -I"@FLAGPOW_INCDIR@"
AM_CPPFLAGS += -I"@XGPU_FRB_INCDIR@"
AM_CPPFLAGS += -I"@XGPU_PFB_INCDIR@"
AM_CPPFLAGS += "@CUDA_INCDIR@"
# AM_CFLAGS is used for all C compiles
AM_CFLAGS = -fPIC -O3 -Wall -Werror -fno-strict-aliasing -mavx
# AM_CFLAGS = -ggdb -fPIC -O3 -Wall -Werror -fno-strict-aliasing
......@@ -43,6 +46,11 @@ flag_b_threads = flag_net_thread.c \
flag_beamform_thread.c \
flag_beamsave_thread.c
flag_f_threads = flag_net_thread.c \
flag_transpose_thread.c \
flag_pfb_thread.c \
flag_pfbsave_thread.c
flag_bx_threads = flag_net_thread.c \
flag_transpose_thread.c \
flag_dual_thread.c \
......@@ -53,14 +61,14 @@ flag_x_frb_threads = flag_net_thread.c \
flag_frb_correlator_thread.c \
flag_frb_corsave_thread.c
# TODO: Add the pfb thread
flag_fx_threads = flag_net_thread.c \
flag_transpose_thread.c \
flag_pfb_thread.c \
flag_pfb_correlator_thread.c \
flag_pfb_corsave_thread.c
# This lists all of the plugins that will be created
lib_LTLIBRARIES = flag_x.la flag_b.la flag_x_frb.la flag_bx.la flag_fx.la
lib_LTLIBRARIES = flag_x.la flag_b.la flag_f.la flag_x_frb.la flag_bx.la flag_fx.la
# flag_x.la sources and libraries
flag_x_la_SOURCES = $(flag_databuf) $(flag_x_threads) $(fifo_codes)
......@@ -77,6 +85,14 @@ flag_b_la_LDFLAGS += -L"@XGPU_LIBDIR@" -Wl,-rpath,"@XGPU_LIBDIR@"
flag_b_la_LDFLAGS += -L"@HASHPIPE_LIBDIR@" -Wl,-rpath,"@HASHPIPE_LIBDIR@"
flag_b_la_LDFLAGS += -L"@FLAGBEAM_LIBDIR@" -Wl, -rpath, "@FLAGBEAM_LIBDIR@"
# flab_f.la sources and libraries
flag_f_la_SOURCES = $(flag_databuf) $(flag_f_threads) $(fifo_codes)
flag_f_la_LIBADD = -lrt -ldl -lutil -L/usr/local/lib -lpython2.7 -lxgpu -lflagpfb -lcufft -lcudart -L/usr/local/cuda/lib64 -lcfitsio
flag_f_la_LDFLAGS = -avoid-version -module -shared -export-dynamic --enable-shared
flag_f_la_LDFLAGS += -L"@XGPU_LIBDIR@" -Wl, -rpath, "@XGPU_LIBDIR@"
flag_f_la_LDFLAGS += -L"@HASHPIPE_LIBDIR@" -Wl, -rpath,"@HASHPIPE_LIBDIR@"
flag_f_la_LDFLAGS += -L"@FLAGPFB_LIBDIR@" -Wl, -rpath,"@FLAGPFB_LIBDIR@"
# flag_bx.la sources and libraries
flag_bx_la_SOURCES = $(flag_databuf) $(flag_bx_threads) $(fifo_codes)
flag_bx_la_LIBADD = -lrt -lxgpu -lflagbeamformer -lcublas -L/usr/local/cuda/lib64 -lcfitsio
......@@ -94,11 +110,11 @@ flag_x_frb_la_LDFLAGS += -L"@HASHPIPE_LIBDIR@" -Wl,-rpath,"@HASHPIPE_LIBDIR@"
# flag_fx.la sources and libraries
flag_fx_la_SOURCES = $(flag_databuf) $(flag_fx_threads) $(fifo_codes)
flag_fx_la_LIBADD = -lrt -lxgpupfb -lcublas -L/usr/local/cuda/lib64 -lcfitsio
flag_fx_la_LIBADD = -lrt -lxgpupfb -lcublas -L/usr/local/cuda/lib64 -lcfitsio -lflagpfb -lcufft -lcudart
flag_fx_la_LDFLAGS = -avoid-version -module -shared -export-dynamic --enable-shared
flag_fx_la_LDFLAGS += -L"@XGPU_PFB_LIBDIR@" -Wl,-rpath,"@XGPU_PFB_LIBDIR@"
flag_fx_la_LDFLAGS += -L"@HASHPIPE_LIBDIR@" -Wl,-rpath,"@HASHPIPE_LIBDIR@"
# TODO: Add the PFB library flags
flag_fx_la_LDFLAGS += -L"@FLAGPFB_LIBDIR@" -Wl, -rpath,"@FLAGPFB_LIBDIR@"
# Installed scripts
......
......@@ -28,6 +28,10 @@ AX_CHECK_XGPU_FRB_INFO
AX_CHECK_XGPU_PFB_INFO
AX_CHECK_FLAGBEAMFORM
AX_CHECK_FLAGPOW
AX_CHECK_FLAGPFB
# Check for CUDA
AX_CHECK_CUDA
# Checks for libraries.
AC_CHECK_LIB([pthread], [pthread_create])
......@@ -35,7 +39,7 @@ AC_CHECK_LIB([rt], [clock_gettime])
AC_CHECK_LIB([z], [crc32])
# Checks for header files.
AC_CHECK_HEADERS([netdb.h stdint.h stdlib.h string.h sys/socket.h sys/time.h unistd.h zlib.h])
AC_CHECK_HEADERS([cuda.h netdb.h stdint.h stdlib.h string.h sys/socket.h sys/time.h unistd.h zlib.h])
# Checks for typedefs, structures, and compiler characteristics.
AC_C_INLINE
......
......@@ -177,6 +177,29 @@ int flag_gpu_beamformer_output_databuf_set_filled(flag_gpu_beamformer_output_dat
return hashpipe_databuf_set_filled((hashpipe_databuf_t *)d, block_id);
}
hashpipe_databuf_t * flag_gpu_pfb_output_databuf_create(int instance_id, int databuf_id) {
size_t header_size = sizeof(hashpipe_databuf_t) + sizeof(hashpipe_databuf_cache_alignment);
size_t block_size = sizeof(flag_gpu_pfb_output_block_t);
int n_block = N_GPU_OUT_BLOCKS;
return hashpipe_databuf_create(
instance_id, databuf_id, header_size, block_size, n_block);
}
int flag_gpu_pfb_output_databuf_wait_free(flag_gpu_pfb_output_databuf_t* d, int block_id) {
return hashpipe_databuf_wait_free((hashpipe_databuf_t *)d, block_id);
}
int flag_gpu_pfb_output_databuf_wait_filled(flag_gpu_pfb_output_databuf_t* d, int block_id) {
return hashpipe_databuf_wait_filled((hashpipe_databuf_t *)d, block_id);
}
int flag_gpu_pfb_output_databuf_set_free(flag_gpu_pfb_output_databuf_t* d, int block_id) {
return hashpipe_databuf_set_free((hashpipe_databuf_t *)d, block_id);
}
int flag_gpu_pfb_output_databuf_set_filled(flag_gpu_pfb_output_databuf_t* d, int block_id) {
return hashpipe_databuf_set_filled((hashpipe_databuf_t *)d, block_id);
}
hashpipe_databuf_t * flag_gpu_power_output_databuf_create(int instance_id, int databuf_id) {
size_t header_size = sizeof(hashpipe_databuf_t) + sizeof(hashpipe_databuf_cache_alignment);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment