Commit 47613404 authored by Mitch Burnett's avatar Mitch Burnett
Browse files

simple read weights and metadata from file

bring back in a simple way to read weight files and load them to the
device

simple program to write the steering vector weights that have been used
to produce beam patterns for dummy input time data that iare the angles
for the beam pattern of a ULA.

simple program to test loading those weights and use them in the same
way that the test program does
parent 99dbf0d9
No related merge requests found
Showing with 598 additions and 109 deletions
+598 -109
......@@ -152,108 +152,84 @@ void rtbfInfo(RTBFInfo* info) {
return;
}
/*
// TODO need to keep the load from file capability
int update_weights(RTBFContext* context, char* filename) {
printf("RTBF: In update_weights()...\n");
// TODO no rigurous testing of valid file format or error handling
int load_weights_from_file(RTBFContext* context, char* filename) {
int rtbf_status = RTBF_OK;
RTBFInternalContext* internal = (RTBFInternalContext *)context->internal;
if (internal == NULL) {
return RTBF_NOT_INITIALIZED;
rtbf_status = RTBF_NOT_INITIALIZED;
return rtbf_status;
}
bf_metadata my_metadata = context->metadata;
long long unsigned int weight_len = compiletime_info.weight_len;
unsigned int nbeam = compiletime_info.nbeam;
// update_weights can accept `filename` of PATHLEN, but looks like CHARLEN is
// used everywhere else, and what is saved. Could be a problem.
char weight_filename[PATHLEN];
strcpy(weight_filename, filename);
FILE* weights;
float* bf_weights;
cuComplex* weights_dc;
cuComplex* weights_dc_n;
rtbf_metadata* md = &(context->metadata);
// Allocate heap memory for file data
bf_weights = (float *) malloc(2*RTBF_WEIGHT_SIZE*sizeof(float));
weights_dc = (cuComplex *) malloc(RTBF_WEIGHT_SIZE*sizeof(cuComplex *));
weights_dc_n = (cuComplex *) malloc(RTBF_WEIGHT_SIZE*sizeof(cuComplex *));
// open weight file
weights = fopen(weight_filename, "r");
float* rtbf_weights;
rtbf_weights = (float *) malloc(weight_len* 2*sizeof(float));
if (weights != NULL) {
fread(bf_weights, sizeof(float), 2*RTBF_WEIGHT_SIZE, weights);
// open and parse weight file
// TODO update_weights can accept `filename` of PATHLEN, but looks like CHARLEN is
// used everywhere else, and what is saved. Could be a problem.
// TODO do we even need to make this copy to `weight_filename`?
FILE* weight_stream;
char weight_filename[PATHLEN];
strcpy(weight_filename, filename);
printf("opening file: %s\n", weight_filename);
weight_stream = fopen(weight_filename, "r");
fread(my_metadata.offsets, sizeof(float), RTBF_NBEAM, weights);
fread(my_metadata.cal_filename, sizeof(char), (CHARLEN-1), weights);
fread(my_metadata.algorithm, sizeof(char), (CHARLEN-1), weights);
fread(&(my_metadata.xid), sizeof(long long unsigned int), 1, weights);
if (weight_stream != NULL) {
fread(rtbf_weights, sizeof(float), 2*weight_len, weight_stream);
fread(md->offsets, sizeof(float), nbeam, weight_stream);
fread(md->cal_filename, sizeof(char), (CHARLEN-1), weight_stream);
fread(md->algorithm, sizeof(char), (CHARLEN-1), weight_stream);
fread(&(md->xid), sizeof(long long unsigned int), 1, weight_stream);
my_metadata.cal_filename[CHARLEN-1] = '\0';
my_metadata.algorithm[CHARLEN-1] = '\0';
md->cal_filename[CHARLEN-1] = '\0';
md->algorithm[CHARLEN-1] = '\0';
// Extract all path information from weight_filename for metadata
char* short_filename = strrchr(weight_filename, '/');
if (short_filename != NULL) {
strcpy(my_metadata.weight_filename, short_filename+1);
}
else {
strcpy(my_metadata.weight_filename, weight_filename);
}
// Convert to complex numbers (do a conjugate at the same time)
for(int j = 0; j < RTBF_WEIGHT_SIZE; j++){
weights_dc_n[j].x = bf_weights[2*j];
weights_dc_n[j].y = -(bf_weights[(2*j)+1]);
}
// Transpose the weights
cuComplex transpose[RTBF_NBEAM][RTBF_NANTENNA*RTBF_NBIN];
for(int m=0; m<RTBF_NBEAM; m++){
for(int n=0; n<RTBF_NANTENNA*RTBF_NBIN; n++){
transpose[m][n] = weights_dc_n[m*RTBF_NANTENNA*RTBF_NBIN + n];
}
}
for(int n=0; n<RTBF_NANTENNA*RTBF_NBIN; n++){
for(int m=0; m<RTBF_NBEAM; m++){
weights_dc[n*RTBF_NBEAM + m] = transpose[m][n];
}
strcpy(md->weight_filename, short_filename+1);
} else {
strcpy(md->weight_filename, weight_filename);
}
fclose(weights);
fclose(weight_stream);
} else {
rtbf_status = RTBF_FATAL;
return rtbf_status;
}
// Copy weights to device
//(TODO leftover, what does it mean) r_weights instead of weights_dc //*RTBF_NTIME
cudaMemcpy(internal->d_weights, weights_dc, RTBF_WEIGHT_SIZE*sizeof(cuComplex), cudaMemcpyHostToDevice);
// free memory
free(weights_dc);
free(weights_dc_n);
free(bf_weights);
// update beamformer weights on the device and free temp buffer
update_weights(context, rtbf_weights);
free(rtbf_weights);
return RTBF_OK;
}
*/
void bf_get_offsets(RTBFContext* context, float* offsets) {
for(int i = 0; i<RTBF_NBEAM; i++){
void get_rtbf_offsets(RTBFContext* context, float* offsets) {
for(int i = 0; i<compiletime_info.nbeam; i++){
offsets[i] = context->metadata.offsets[i];
}
}
void bf_get_cal_filename(RTBFContext* context, char* cal_filename) {
void get_rtbf_cal_filename(RTBFContext* context, char* cal_filename) {
for(int i = 0; i < CHARLEN; i++){
cal_filename[i] = context->metadata.cal_filename[i];
}
}
void bf_get_algorithm(RTBFContext* context, char* algorithm) {
void get_rtbf_algorithm(RTBFContext* context, char* algorithm) {
for(int i = 0; i < CHARLEN; i++){
algorithm[i] = context->metadata.algorithm[i];
}
}
void bf_get_weight_filename(RTBFContext* context, char* weight_filename) {
void get_rtbf_weight_filename(RTBFContext* context, char* weight_filename) {
int num_chars = strlen(context->metadata.weight_filename);
for (int i=0; i<num_chars; i++) {
weight_filename[i] = context->metadata.weight_filename[i];
......@@ -264,7 +240,7 @@ void bf_get_weight_filename(RTBFContext* context, char* weight_filename) {
weight_filename[CHARLEN-1] = '\0';
}
long long unsigned int bf_get_xid(RTBFContext* context) {
long long unsigned int get_rtbf_xid(RTBFContext* context) {
return context->metadata.xid;
}
......@@ -276,20 +252,22 @@ int update_weights(RTBFContext* context, float* weights) {
return rtbf_status;
}
// setup for weight update
long long unsigned int weight_len = compiletime_info.weight_len;
unsigned int Ne = compiletime_info.narray_elements;
unsigned int Nc = compiletime_info.nbin;
unsigned int Nb = compiletime_info.nbeam;
// setup the weights
long unsigned int Ne_x_Nc = Ne*Nc;
cuComplex* weights_cx = (cuComplex *) weights;
// Transpose the weights and conjugate
// note, this assumes the weights to be set have dimensions RTBF_NANTENNA
cuComplex* weights_Htranspose = (cuComplex *) malloc(weight_len*sizeof(cuComplex));
for(int n=0; n<(Ne*Nc); n++) {
for(int n=0; n<(Ne_x_Nc); n++) {
for(int m=0; m<Nb; m++) {
weights_Htranspose[n*Nb+ m].x = weights_cx[m*(Ne*Nc)+ n].x;
weights_Htranspose[n*Nb+ m].y = -weights_cx[m*(Ne*Nc)+ n].y;
weights_Htranspose[n*Nb+ m].x = weights_cx[m*(Ne_x_Nc)+ n].x;
weights_Htranspose[n*Nb+ m].y = -weights_cx[m*(Ne_x_Nc)+ n].y;
}
}
......@@ -301,7 +279,7 @@ int update_weights(RTBFContext* context, float* weights) {
return rtbf_status;
}
int init_beamformer(RTBFContext* context) { // [NBEAM][ELE_BLOC*BIN]
int init_beamformer(RTBFContext* context) {
switch (context->operational_mode) {
case BEAM_OP_RAW:
case BEAM_OP_STI:
......@@ -375,10 +353,10 @@ int init_beamformer(RTBFContext* context) { // [NBEAM][ELE_BLOC*BIN]
printf("there is a memory collision!!\n");
if (ptr_aligned_ibuf < ptr_aligned_obuf) {
ptr_aligned_min = ptr_aligned_ibuf;
length = (ptr_aligned_obuf+obuf_len) - ptr_aligned_min; // minus 1 or not?
length = (ptr_aligned_obuf+obuf_len) - ptr_aligned_min;
} else {
ptr_aligned_min = ptr_aligned_obuf;
length = (ptr_aligned_ibuf+ibuf_len) - ptr_aligned_min; // minus 1 or not?
length = (ptr_aligned_ibuf+ibuf_len) - ptr_aligned_min;
}
printf("aligned buf min = %p\n", ptr_aligned_min);
printf("length = %lx\n", length);
......@@ -397,7 +375,7 @@ int init_beamformer(RTBFContext* context) { // [NBEAM][ELE_BLOC*BIN]
}
} else {
// must initialize rtbf with user allocated input and output array, for now.
// for now, user must initialize rtbf with user allocated input and output array.
return RTBF_INVALID_INIT;
}
......@@ -415,7 +393,7 @@ int init_beamformer(RTBFContext* context) { // [NBEAM][ELE_BLOC*BIN]
internal->nr_cols_C = compiletime_info.ntime;
internal->batchCount = compiletime_info.nbin;
// allocate device memory for beamformer and post-beamform operation
// allocate device memory for transpose, beamformer, and post-beamform operation
cudaMalloc((void **) &(internal->d_restruct_in), array_len*2*sizeof(signed char)); // input data
cudaMalloc((void **) &(internal->d_array_data), array_len*sizeof(cuComplex)); // transpose
cudaMalloc((void **) &(internal->d_beamformed), beamformed_len*sizeof(cuComplex)); // beamformed output
......@@ -428,7 +406,7 @@ int init_beamformer(RTBFContext* context) { // [NBEAM][ELE_BLOC*BIN]
update_weights(context, (float *)w_all_ones);
free(w_all_ones);
// setup output based on for operational mode
// setup output based on operational mode
if (context->operational_mode == BEAM_OP_RAW) {
internal->matLength = beamformed_len*2*sizeof(float);
// RAW mode, skip post-processing kernel and dump the beamformed outputs
......@@ -461,38 +439,36 @@ int init_beamformer(RTBFContext* context) { // [NBEAM][ELE_BLOC*BIN]
ncols_C = internal->nr_cols_C;
batchCount = internal->batchCount;
// Allocate memory to host arrays - This is all memory allocated to arrays
// that are used by gemmBatched. Allocate 3 arrays on CPU
// allocate 3 arrays on host, used to allocated device arrays needed for gemmBatched.
const cuComplex** h_arr_A = NULL;
const cuComplex** h_arr_B = NULL;
cuComplex** h_arr_C = NULL;
// Pinned memory
cudaMallocHost((void **) &h_arr_A, batchCount*sizeof(cuComplex*));
cudaMallocHost((void **) &h_arr_B, batchCount*sizeof(cuComplex*));
cudaMallocHost((void **) &h_arr_C, batchCount*sizeof(cuComplex*));
checkCudaError();
// Allocate memory for each batch in an array.
// allocate memory for each batch in an array.
for (int i=0; i<batchCount; i++) {
h_arr_A[i] = internal->d_weights + i*nrows_A*ncols_A;
h_arr_B[i] = internal->d_array_data + i*nrows_B*ncols_B;
h_arr_C[i] = internal->d_beamformed + i*nrows_C*ncols_C;
}
// Allocate memory to arrays on device.
// allocate memory to arrays on device.
cudaMalloc((void **) &(internal->d_arr_A), batchCount*sizeof(cuComplex*));
cudaMalloc((void **) &(internal->d_arr_B), batchCount*sizeof(cuComplex*));
cudaMalloc((void **) &(internal->d_arr_C), batchCount*sizeof(cuComplex*));
checkCudaError();
// Copy memory from host to device.
// copy memory from host to device.
cudaMemcpy(internal->d_arr_A, h_arr_A, batchCount*sizeof(cuComplex*), cudaMemcpyHostToDevice);
cudaMemcpy(internal->d_arr_B, h_arr_B, batchCount*sizeof(cuComplex*), cudaMemcpyHostToDevice);
cudaMemcpy(internal->d_arr_C, h_arr_C, batchCount*sizeof(cuComplex*), cudaMemcpyHostToDevice);
checkCudaError();
// Free pinned memory
// free the temporary memory
cudaFreeHost(h_arr_A);
cudaFreeHost(h_arr_B);
cudaFreeHost(h_arr_C);
......@@ -729,20 +705,20 @@ int run_beamformer(RTBFContext* context) {
cudaMemcpy(internal->d_restruct_in, context->array_h, array_len*2*sizeof(signed char), cudaMemcpyHostToDevice);
checkCudaError();
// Restructure data (transpose/corner turn) for cublasCgemmBatched function.
// restructure data (transpose/corner turn) where bin is the slowest moving dimension for cublasCgemmBatched
data_restructure<<<gridDim_transpose, blockDim_transpose>>>(internal->d_restruct_in, internal->d_array_data);
checkCudaError();
// beamform
rtbf_status = beamform(internal);
// post-beamform kernels
// post-beamform kernel
if (internal->post_beamform_func) {
(*(internal->post_beamform_func))<<<dimGrid, dimBlock>>>(internal->d_beamformed, internal->d_output);
checkCudaError();
}
// Copy output data from device to host.
// copy output data from device to host
cudaMemcpy(context->beamformed_h, internal->d_output, internal->matLength, cudaMemcpyDeviceToHost);
checkCudaError();
......
......@@ -2,13 +2,13 @@
#define CUBLAS_BEAMFORMER_H
// SANDBOX
#define RTBF_NANTENNA 160 // number of elements in array
#define RTBF_NBIN 25 // channels (bins)
#define RTBF_NTIME 4704 // time per block
#define RTBF_NBEAM 80 // single pol. beams
#define RTBF_STI_LENGTH 32
#define RTBF_STI_GPU_BLOCK 32
//#define RTBF_NANTENNA 160 // number of elements in array
//#define RTBF_NBIN 25 // channels (bins)
//#define RTBF_NTIME 4704 // time per block
//#define RTBF_NBEAM 80 // single pol. beams
//
//#define RTBF_STI_LENGTH 32
//#define RTBF_STI_GPU_BLOCK 32
// configurable parameters
// TODO should check for multiples of 8 or 16 for GPU optimizations
......@@ -127,13 +127,13 @@ typedef struct RTBFInfoStruct {
#define PATHLEN 128
// Struct defintion for beamformer metadata
typedef struct bf_metadata_struct {
typedef struct rtbf_metadata_struct {
float offsets[RTBF_NBEAM];
char cal_filename[CHARLEN];
char algorithm[CHARLEN];
char weight_filename[CHARLEN];
long long unsigned int xid;
} bf_metadata;
} rtbf_metadata;
#define BEAM_OP_RAW 0 // raw iq outputs
#define BEAM_OP_STI 1 // full-stokes w/ STI
......@@ -173,22 +173,23 @@ typedef struct RTBFContextStruct {
int freq_per_xid; // frequency channels per xeng
int input_per_fid; // antennas per f-engine
bf_metadata metadata;
rtbf_metadata metadata;
// for rtbf internal use only
void* internal;
} RTBFContext;
void bf_get_offsets(RTBFContext* context, float* offsets);
void bf_get_cal_filename(RTBFContext* context, char* cal_filename);
void bf_get_algorithm(RTBFContext* context, char* algorithm);
void bf_get_weight_filename(RTBFContext* context, char* weight_filename);
long long unsigned int bf_get_xid(RTBFContext* context);
void get_rtbf_offsets(RTBFContext* context, float* offsets);
void get_rtbf_cal_filename(RTBFContext* context, char* cal_filename);
void get_rtbf_algorithm(RTBFContext* context, char* algorithm);
void get_rtbf_weight_filename(RTBFContext* context, char* weight_filename);
long long unsigned int get_rtbf_xid(RTBFContext* context);
void rtbfInfo(RTBFInfo* info);
int init_beamformer(RTBFContext* context);
int update_weights(RTBFContext* context, float* weights);
int load_weights_from_file(RTBFContext* context, char* weight_filename);
int run_beamformer(RTBFContext* context);
void rtbfCleanup(RTBFContext* context);
#ifdef __cplusplus
......
......@@ -119,20 +119,21 @@ int main(int argc, char * argv[]) {
init_beamformer(&context);
/*********************************************************
* Update in weights
* Update weights
*********************************************************/
float* w; // TODO define weights, library defaults to all ones in the real component [CMPLXF(1,0)]
// TODO define some weights, otherwise the library defaults to all ones
// in the real component [CMPLXF(1,0)]
float offsets[RTBF_NBEAM];
char cal_filename[CHARLEN];
char algorithm[CHARLEN];
char weight_file[CHARLEN];
long long unsigned int xid;
bf_get_offsets(&context, offsets);
bf_get_cal_filename(&context, cal_filename);
bf_get_algorithm(&context, algorithm);
bf_get_weight_filename(&context, weight_file);
xid = bf_get_xid(&context);
get_rtbf_offsets(&context, offsets);
get_rtbf_cal_filename(&context, cal_filename);
get_rtbf_algorithm(&context, algorithm);
get_rtbf_weight_filename(&context, weight_file);
xid = get_rtbf_xid(&context);
for (int i=0; i<rtbf_info.crosspol_offset; i++) {
printf("Offset %d = (%f, %f)\n", i, offsets[2*i], offsets[2*i+1]);
......
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h> // write
#include <stdint.h>
#include <time.h> // timeval
#include <sys/time.h> // timersub
#include <math.h>
#include <complex.h>
#include "cublas_beamformer.h"
#define RUN_UNIT_TEST
#ifdef RUN_UNIT_TEST
// unit test definition
#define Ni 4 // inputs per fid
#define Nf 2 // number of fids
#define Nt 10 // time per network packet
#define Nm 40 // mcounts per block
#define Bk 1 // number of data blocks
#else
// xb system definition
#define Ni 10 // inputs per fid
#define Nf 16 // number of fids
#define Nt 14 // time per network packet
#define Nm 336 // mcounts per block
#define Bk 1 // number of data blocks
#endif
typedef struct {
int8_t re;
int8_t im;
} cx_input_t;
typedef struct {
float re;
float im;
} cx_output_t;
int main(int argc, char* argv[]) {
RTBFInfo rtbf_info;
rtbfInfo(&rtbf_info);
int Ne = rtbf_info.narray_elements; // elements in array
int Nc = rtbf_info.nbin; // frequency channels (bins)
int Nb = rtbf_info.nbeam; // single pol. formed beams
int NT = rtbf_info.ntime; // time per block
long long unsigned int S = rtbf_info.array_len; // number of complex elements in a block size
// validate xb-engine system parameters with library compiled parameters
if (Ni*Nf != Ne) {
printf("element configuration error\n");
return -1;
}
if (Nm*Nt != NT) {
printf("tiem configuration error\n");
return -1;
}
if (Ne*Nc*NT != S) {
printf("data size configuration error\n");
return -1;
}
// array constants
const double SPEED_OF_LIGHT = 3e8; // c
const double SRC_FREQUENCY = 1600e6; // f0
const double LAMBDA = SPEED_OF_LIGHT/SRC_FREQUENCY; // lambda
const double ELEMENT_SPACING = LAMBDA/2; // d
const double arg_tau = 2*M_PI*SRC_FREQUENCY*(ELEMENT_SPACING/SPEED_OF_LIGHT);
// angles for steered beams
const double THETA_MAX = 90.0*(M_PI/180.0);
const double THETA_MIN = -THETA_MAX;
const int PATTERN_POINTS = NT; // all time, need to keep track across packets
// channels and beams make up the
double* angles_rad = malloc(Nb*sizeof(double));//{0.0};
double* angles_deg = malloc(Nb*sizeof(double));//{0.0};
float* angles_degreesf = malloc(Nb*sizeof(float));//{0.0};
double delta_rad = (THETA_MAX-THETA_MIN)/Nb;
printf("angles: {");
for (int b=0; b<Nb; b++) {
int idx = b;
int offset = -Nb/2; // generate pointings (-90, 90] using integers (-Nb/2, Nb/2)
angles_rad[idx] = (delta_rad*(idx+offset));
angles_deg[idx] = angles_rad[idx]*(180/M_PI);
angles_degreesf[idx] = (float) angles_deg[idx];
printf("%f ", angles_deg[idx]);
}
printf("}\n");
// propagation delay across elements
double tau[Nb];
for (int b=0; b<Nb; b++) {
int idx = b;
tau[idx] = arg_tau * sin(angles_rad[idx]);
}
// steering vector (weights) for direction of arrival for sources
// NB: for this to work Ne here needs to match RTBF_NANTENNA
//printf("steering vectors:\n");
complex float (*steering_vectors)[Nc*Ne] = malloc(sizeof(complex float[Nb][Nc*Ne])); // NB here
for (int b=0; b<Nb; b++) { // steering vector (direction)
for (int c=0; c<Nc; c++) { // NB: channel added here to match `update_weights` accessing
for (int e=0; e<Ne;e++) { // element in vector
int eidx = e + c*Ne; // channels grouped together
steering_vectors[b][eidx] = cexpf(-I*e*tau[b]);
//printf("{%f, %f}\n", creal(steering_vectors[c][b][e]), cimag(steering_vectors[c][b][e]));
}
//printf("\n");
}
}
// write steering vectors to file to test load
char cal_filename[CHARLEN] = "calibration_file.fits";
cal_filename[CHARLEN-1] = '\0';
char algorithm[CHARLEN] = "CFM for ULA pattern scan";
algorithm[CHARLEN-1] = '\0';
long long unsigned int xid = 49;
FILE* weight_stream;
char filename[CHARLEN] = "steering_vectors.xb";
weight_stream = fopen(filename, "w+");
if (weight_stream != NULL) {
fwrite(steering_vectors, sizeof(float), 2*rtbf_info.weight_len, weight_stream);
fwrite(angles_degreesf, sizeof(float), rtbf_info.nbeam, weight_stream);
fwrite(cal_filename, sizeof(char), (CHARLEN-1), weight_stream);
fwrite(algorithm, sizeof(char), (CHARLEN-1), weight_stream);
fwrite(&xid, sizeof(long long unsigned int), 1, weight_stream);
} else {
printf("could not open file to create weight file\n");
free(angles_rad);
free(angles_deg);
free(angles_degreesf);
free(steering_vectors);
return -1;
}
fclose(weight_stream);
free(angles_rad);
free(angles_deg);
free(angles_degreesf);
free(steering_vectors);
return 0;
}
......@@ -4,3 +4,5 @@ make
gcc rtbf_info.c -o rtbfinfo -L /home/mcb/git/xb-engine/lib/rtbf/src/ -L/usr/local/cuda/lib64/ -Wl,-rpath,/home/mcb/git/xb-engine/lib/rtbf/src/ -lflagbeamformer -lcublas
gcc testbeam.c -o testbeam -L /home/mcb/git/xb-engine/lib/rtbf/src/ -L/usr/local/cuda/lib64/ -Wl,-rpath,/home/mcb/git/xb-engine/lib/rtbf/src/ -lflagbeamformer -lcublas -lm -lcpgplot -lpgplot -lgfortran -lX11 -lpng -lz
gcc testbeam_sti.c -o sti_testbeam -L /home/mcb/git/xb-engine/lib/rtbf/src/ -L/usr/local/cuda/lib64/ -Wl,-rpath,/home/mcb/git/xb-engine/lib/rtbf/src/ -lflagbeamformer -lcublas -lm
gcc ex_write_weightfile.c -o write_weights -L /home/mcb/git/xb-engine/lib/rtbf/src/ -L/usr/local/cuda/lib64/ -Wl,-rpath,/home/mcb/git/xb-engine/lib/rtbf/src/ -lflagbeamformer -lcublas -lm
gcc testload_beam.c -o loadbeam_test -L /home/mcb/git/xb-engine/lib/rtbf/src/ -L/usr/local/cuda/lib64/ -Wl,-rpath,/home/mcb/git/xb-engine/lib/rtbf/src/ -lflagbeamformer -lcublas -lm -lcpgplot -lpgplot -lgfortran -lX11 -lpng -lz
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h> // write
#include <stdint.h>
#include <time.h> // timeval
#include <sys/time.h> // timersub
#include <math.h>
#include <complex.h>
#include "cublas_beamformer.h"
#include "cpgplot.h"
#define RUN_UNIT_TEST
#ifdef RUN_UNIT_TEST
#include "unit_test.h"
// unit test definition
#define Ni 4 // inputs per fid
#define Nf 2 // number of fids
#define Nt 10 // time per network packet
#define Nm 40 // mcounts per block
#define Bk 1 // number of data blocks
#else
// xb system definition
#define Ni 10 // inputs per fid
#define Nf 16 // number of fids
#define Nt 14 // time per network packet
#define Nm 336 // mcounts per block
#define Bk 1 // number of data blocks
#endif
typedef struct {
int8_t re;
int8_t im;
} cx_input_t;
typedef struct {
float re;
float im;
} cx_output_t;
// RTBF library has no notion of blocks because it operates on a single
// "block" of data.
// But it does have a notion of mcount because a transpose from network order to
// GPU order is implemented within the library (at least for now)
int main(int argc, char* argv[]) {
RTBFInfo rtbf_info;
rtbfInfo(&rtbf_info);
int Ne = rtbf_info.narray_elements; // elements in array
int Nc = rtbf_info.nbin; // frequency channels (bins)
int Nb = rtbf_info.nbeam; // single pol. formed beams
int NT = rtbf_info.ntime; // time per block
long long unsigned int S = rtbf_info.array_len; // number of complex elements in a block size
// validate xb-engine system parameters with library compiled parameters
if (Ni*Nf != Ne) {
printf("element configuration error\n");
return -1;
}
if (Nm*Nt != NT) {
printf("tiem configuration error\n");
return -1;
}
if (Ne*Nc*NT != S) {
printf("data size configuration error\n");
return -1;
}
// array constants
const double SPEED_OF_LIGHT = 3e8; // c
const double SRC_FREQUENCY = 1600e6; // f0
const double LAMBDA = SPEED_OF_LIGHT/SRC_FREQUENCY; // lambda
const double ELEMENT_SPACING = LAMBDA/2; // d
const double arg_tau = 2*M_PI*SRC_FREQUENCY*(ELEMENT_SPACING/SPEED_OF_LIGHT);
// angles for steered beams
const double THETA_MAX = 90.0*(M_PI/180.0);
const double THETA_MIN = -THETA_MAX;
const int PATTERN_POINTS = NT; // all time, need to keep track across packets
// channels and beams make up the
double* angles_rad = malloc(Nb*sizeof(double));//{0.0};
double* angles_deg = malloc(Nb*sizeof(double));//{0.0};
float* angles_degreesf = malloc(Nb*sizeof(float));//{0.0};
double delta_rad = (THETA_MAX-THETA_MIN)/Nb;
printf("angles: {");
for (int b=0; b<Nb; b++) {
int idx = b;
int offset = -Nb/2; // generate pointings (-90, 90] using integers (-Nb/2, Nb/2)
angles_rad[idx] = (delta_rad*(idx+offset));
angles_deg[idx] = angles_rad[idx]*(180/M_PI);
angles_degreesf[idx] = (float) angles_deg[idx];
printf("%f ", angles_deg[idx]);
}
printf("}\n");
// angles and steering vectors for beam pattern
double* angles_pattern = malloc(PATTERN_POINTS * sizeof(double));
complex float tau_patterns;
complex float (*steering_vectors_patterns)[Ne] = malloc(sizeof(complex float[PATTERN_POINTS][Ne]));
double dtheta = (THETA_MAX-THETA_MIN)/PATTERN_POINTS;
for (int e=0; e<Ne; e++) {
for (int a=0; a<PATTERN_POINTS; a++) {
angles_pattern[a] = THETA_MIN + a*dtheta;
tau_patterns = arg_tau * sin(angles_pattern[a]);
steering_vectors_patterns[a][e] = cexpf(-I*e*tau_patterns);
}
}
// BEGIN TO FILL DATA -- same for each channel at this time
printf("data size %u\n", S);
cx_input_t* d_cx_ptr = (cx_input_t *) malloc(S*sizeof(cx_input_t)); // input data
cx_output_t* b_cx_ptr = (cx_output_t*) malloc(rtbf_info.beamformed_len*sizeof(cx_output_t));// output data
printf("input ptr: %p (size: %u), output ptr: %p (size: %u)\n",
d_cx_ptr, S*sizeof(cx_input_t), b_cx_ptr, rtbf_info.beamformed_len*sizeof(cx_output_t));
signed char* d_char_ptr = (signed char*) d_cx_ptr;
float* b_float_ptr = (float*) b_cx_ptr;
for (int m=0; m<Nm; m++) { // mcnt
for (int f=0; f<Nf; f++) { // fid
for (int t=0; t<Nt; t++) { // time
for (int c=0; c<Nc; c++) { // frequency
for (int i=0; i<Ni; i++) { // elements per fid
int midx = m*(Ni*Nc*Nt*Nf); // number of complex items per mcount
int fidx = f*(Ni*Nc*Nt); // number of complex items per fid
int eidx = i + c*Ni + t*(Nc*Ni) + fidx + midx;
//printf("%u, %u, %u, %u, %u (%u)\n", m, f, t, c, i, eidx);
int antenna = i + f*Ni;
int pattern = t + m*Nt;
complex float d = steering_vectors_patterns[pattern][antenna];
// quantize for 8-bit two-complement data
d = d*(127); // 128 saturates... what is the correct op here?
float dre = roundf(creal(d));
float dim = roundf(cimag(d));
d_cx_ptr[eidx].re = dre;
d_cx_ptr[eidx].im = dim;
//d_cx_ptr[eidx].re = 1.0;
//d_cx_ptr[eidx].im = 0.0;
}
}
}
}
}
// initialize beamformer
RTBFContext context;
context.operational_mode = BEAM_OP_RAW;
context.array_h = d_char_ptr;
context.array_len = S;
context.beamformed_h = b_float_ptr;
context.beamformed_len = rtbf_info.beamformed_len;
context.mcnt_per_data_block = Nm;
context.nfengines = Nf;
context.time_per_mcnt = Nt;
context.freq_per_xid = Nc;
context.input_per_fid = Ni;
int rtbf_status = RTBF_OK;
rtbf_status = init_beamformer(&context);
if (rtbf_status != RTBF_OK) {
printf("rtbf failure with code: %d\n", rtbf_status);
free(angles_rad);
free(angles_deg);
free(angles_degreesf);
free(angles_pattern);
free(steering_vectors_patterns);
free(d_cx_ptr);
free(b_cx_ptr);
rtbfCleanup(&context);
return -1;
}
char filename[CHARLEN] = "steering_vectors.xb";
rtbf_status = load_weights_from_file(&context, filename);
if (rtbf_status != RTBF_OK) {
printf("rtbf failure with code: %d\n", rtbf_status);
free(angles_rad);
free(angles_deg);
free(angles_degreesf);
free(angles_pattern);
free(steering_vectors_patterns);
free(d_cx_ptr);
free(b_cx_ptr);
rtbfCleanup(&context);
return -1;
}
float md_offsets[RTBF_NBEAM];
char md_cal_filename[CHARLEN];
char md_algorithm[CHARLEN];
char md_weight_file[CHARLEN];
long long unsigned int md_xid;
get_rtbf_offsets(&context, md_offsets);
get_rtbf_cal_filename(&context, md_cal_filename);
get_rtbf_algorithm(&context, md_algorithm);
get_rtbf_weight_filename(&context, md_weight_file);
md_xid = get_rtbf_xid(&context);
for (int i=0; i<rtbf_info.nbeam; i++) {
printf("angle idx:%d = %f\n", i, md_offsets[i]);
}
printf("**** weight file info ****\n");
printf("Calibration Filename = %s\n", md_cal_filename);
printf("Algorithm for Weights = %s\n", md_algorithm);
printf("XID = %llu\n", md_xid);
printf("Weight filename = %s\n", md_weight_file);
// Run beamformer
struct timeval tval_before, tval_after, tval_result;
gettimeofday(&tval_before, NULL);
/////////////
run_beamformer(&context);
////////////
gettimeofday(&tval_after, NULL);
timersub(&tval_after, &tval_before, &tval_result);
printf("run_beamformer() time = %f ms\n", (float) tval_result.tv_usec/1000);
printf("Finished run_beamformer()!\n");
//printf("outputs (%u)\n", rtbf_info.beamformed_len);
for (int b=0; b<Nb; b++) { // beam
for (int t=0; t<NT; t++) { // time (NT=Nm*Nt)
for (int c=0; c<Nc; c++) { // frequency
cx_output_t tmp;
int idx = b + t*Nb + c*Nb*NT;
tmp = b_cx_ptr[idx];
//printf("%u {%f, %f}\n", idx, tmp.re, tmp.im);
}
}
}
// LET's PLOT
// prepare cpgplot
if (1)
{
cpgopen("/xwin"); // docs say cpgopen should be used over beg now
cpgask(1);
double pattern_power[PATTERN_POINTS];
float xv[PATTERN_POINTS];
float yv[PATTERN_POINTS];
// used to select the beam/channel
int chan_index = 0;
int beam_index = 0;
// loop getting cursor position and updating selected frame in second panel
// from selected cursor position
//
// for getting, setting, and using cursor info
float cx, cy;
char pressed;
// setup env and labels before plotting
cpgenv((180/M_PI)*angles_pattern[0], (180/M_PI)*angles_pattern[PATTERN_POINTS-1], 0.0, 90.0, 0, 1);
cpglab("theta", "dB arb. units", "beam patterns");
//cpgline(PATTERN_POINTS, xv, yv);
// the approach here is to loop accepting input, and redraw the line instead
// of starting a new window (e.g., call to cpgenv)
printf("press 0-8, or x to end\n");
do {
// ask for cursor position (could have used cpgcurs, but this method
// provides the "mode" to put a vertical line through the cursor)
cpgcurs(&cx, &cy, &pressed);
//printf("cx=%f, cy=%f, pressed=%c\n", cx, cy, pressed);
if (pressed >= '0' && pressed <= '8') {
beam_index = atoi(&pressed);
}
// take pressed input
for (int t=0; t<PATTERN_POINTS; t++) {
int idx = beam_index + t*Nb + chan_index*Nb*NT;
//int idx = b + t*Nb + c*Nb*NT;
complex float y = CMPLXF(b_cx_ptr[idx].re, b_cx_ptr[idx].im);
pattern_power[t] = 10*log10((creal(conj(y)*y)));
//printf("(%u)t=%u: {%f, %f} \n", idx, t, creal(y), cimag(y));
}
// draw by redrawing the line, this was similar to what was done to animate
// from pgdemo11.f and my example analyzer codes
//
// TODO: pixels on edge are erased, need to do something like pgdemo11.f (and
// other demos) that slightly pad the edge so that the axis and labels are
// not erased
cpgbbuf();
cpgsci(0);
cpgline(PATTERN_POINTS, xv, yv);
cpgsci(1);
for (int p=0; p<PATTERN_POINTS; p++) {
xv[p] = (float) (180/M_PI)*angles_pattern[p];
yv[p] = (float) pattern_power[p];
}
cpgsci(1);
cpgline(PATTERN_POINTS, xv, yv);
cpgebuf();
} while (pressed!='x');
cpgend();
}
// dump bytes for creating unit test
// `./a.out | xxd -i >> unit_test.h`
if (0) {
void *p = (void *) b_cx_ptr;
if(write(1, p, rtbf_info.beamformed_len*sizeof(cx_output_t)) == -1) {
fprintf(stdout, "error dumping with write\n");
}
}
#ifdef RUN_UNIT_TEST
// compare bytes from unit test
printf("checking output...\n");
int32_t error = 0;
cx_output_t* unit_test = (cx_output_t*) UNIT_TEST;
for (int i=0; i<S; i++) {
if (unit_test[i].re != b_cx_ptr[i].re ||
unit_test[i].im != b_cx_ptr[i].im)
{
error += 1;
}
}
if (error) {
printf("completed with %u errors.\n", error);
} else {
printf("completed without errors!\n");
}
#endif
// clean up
free(angles_rad);
free(angles_deg);
free(angles_degreesf);
free(angles_pattern);
free(steering_vectors_patterns);
free(d_cx_ptr);
free(b_cx_ptr);
rtbfCleanup(&context);
return 0;
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment