Skip to content

Commit

Permalink
firmware: arm_loader: Adding dtdebug directive, and improving debug m…
Browse files Browse the repository at this point in the history
…essages

See: http://www.raspberrypi.org/forums/viewtopic.php?f=29&t=93015

firmware: arm_loader: Allow set_clock to skip enabling turbo
See: https://github.com/raspberrypi/firmware/wiki/Mailbox-property-interface

userland: hello_fft: Update to release 2.0
See: http://www.aholme.co.uk/GPU_FFT/Main.htm
  • Loading branch information
popcornmix committed Dec 29, 2014
1 parent ee67812 commit 0868c6e
Show file tree
Hide file tree
Showing 44 changed files with 1,580 additions and 35 deletions.
Binary file modified fixup.dat
Binary file not shown.
Binary file modified fixup_cd.dat
Binary file not shown.
Binary file modified fixup_x.dat
Binary file not shown.
Binary file modified start.elf
Binary file not shown.
Binary file modified start_cd.elf
Binary file not shown.
Binary file modified start_x.elf
Binary file not shown.
Binary file modified vc/hardfp/opt/vc/lib/libEGL_static.a
Binary file not shown.
Binary file modified vc/hardfp/opt/vc/lib/libGLESv2_static.a
Binary file not shown.
Binary file modified vc/hardfp/opt/vc/lib/libdebug_sym_static.a
Binary file not shown.
Binary file modified vc/hardfp/opt/vc/lib/libkhrn_client.a
Binary file not shown.
Binary file modified vc/hardfp/opt/vc/lib/libkhrn_static.a
Binary file not shown.
Binary file modified vc/hardfp/opt/vc/lib/libvcfiled_check.a
Binary file not shown.
Binary file modified vc/hardfp/opt/vc/lib/libvchostif.a
Binary file not shown.
Binary file modified vc/hardfp/opt/vc/lib/libvcilcs.a
Binary file not shown.
Binary file modified vc/hardfp/opt/vc/lib/libvmcs_rpc_client.a
Binary file not shown.
4 changes: 2 additions & 2 deletions vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
BCM2835 "GPU_FFT" release 2.0 BETA
BCM2835 "GPU_FFT" release 2.0
Copyright (c) 2014, Andrew Holme.
All rights reserved.
Expand Down Expand Up @@ -127,7 +127,7 @@ int gpu_fft_prepare(
}

unsigned gpu_fft_execute(struct GPU_FFT *info) {
gpu_fft_base_exec(&info->base, GPU_FFT_QPUS);
return gpu_fft_base_exec(&info->base, GPU_FFT_QPUS);
}

void gpu_fft_release(struct GPU_FFT *info) {
Expand Down
2 changes: 1 addition & 1 deletion vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
BCM2835 "GPU_FFT" release 2.0 BETA
BCM2835 "GPU_FFT" release 2.0
Copyright (c) 2014, Andrew Holme.
All rights reserved.
Expand Down
16 changes: 8 additions & 8 deletions vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
BCM2835 "GPU_FFT" release 2.0 BETA by Andrew Holme, 2014.
BCM2835 "GPU_FFT" release 2.0 by Andrew Holme, 2014.

GPU_FFT is an FFT library for the Raspberry Pi which exploits the BCM2835 SoC
3D hardware to deliver ten times more data throughput than is possible on the
700 MHz ARM. Kernels are provided for all power-of-2 FFT lengths between 256
and 1,048,576 points inclusive. A transpose function, which also uses the 3D
and 2,097,152 points inclusive. A transpose function, which also uses the 3D
hardware, is provided to support 2-dimensional transforms.


Expand All @@ -16,8 +16,8 @@ is not scaled. The relative root-mean-square (rms) error in parts-per-million
log2(N) | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17
ppm rms | 0.27 | 0.42 | 0.50 | 0.70 | 2.3 | 4.4 | 7.6 | 9.2 | 18 | 70

log2(N) | 18 | 19 | 20 | 8...17 batch of 10
ppm rms | 100 | 180 | 360 | 18...20 batch of 1
log2(N) | 18 | 19 | 20 | 21 | 8...17 batch of 10
ppm rms | 100 | 180 | 360 | 720 | 18...21 batch of 1


*** Throughput ***
Expand All @@ -36,9 +36,9 @@ log2(N) | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 |
10 | 0.016 | 0.027 | 0.045 | 0.095 | 0.25 | 0.61 | 1.2 | 3.2 |
FFTW | 0.092 | 0.22 | 0.48 | 0.95 | 3.0 | 5.1 | 12 | 31 |

log2(N) | 16 | 17 | 18 | 19 | 20 | All times in
1 | 6.8 | 16 | 42 | 95 | 190 | milliseconds
FFTW | 83 | 180 | 560 | 670 | 1600 | 2 sig. figs.
log2(N) | 16 | 17 | 18 | 19 | 20 | 21 | All times in
1 | 6.8 | 16 | 42 | 95 | 190 | 380 | milliseconds
FFTW | 83 | 180 | 560 | 670 | 1600 | 3400 | 2 sig. figs.


*** API functions ***
Expand All @@ -57,7 +57,7 @@ log2(N) | 16 | 17 | 18 | 19 | 20 | All times in

int mb Mailbox file descriptor obtained by calling mbox_open()

int log2_N log2(FFT length) = 8 to 20
int log2_N log2(FFT length) = 8 to 21

int direction FFT direction: GPU_FFT_FWD for forward FFT
GPU_FFT_REV for inverse FFT
Expand Down
5 changes: 3 additions & 2 deletions vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft_base.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
BCM2835 "GPU_FFT" release 2.0 BETA
BCM2835 "GPU_FFT" release 2.0
Copyright (c) 2014, Andrew Holme.
All rights reserved.
Expand Down Expand Up @@ -46,7 +46,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GPU_FFT_MEM_MAP 0x0 // cached=0x0; direct=0x20000000

#define GPU_FFT_NO_FLUSH 1
#define GPU_FFT_TIMEOUT 1000 // ms
#define GPU_FFT_TIMEOUT 2000 // ms

unsigned gpu_fft_base_exec_direct (
struct GPU_FFT_BASE *base,
Expand Down Expand Up @@ -82,6 +82,7 @@ unsigned gpu_fft_base_exec(

if (base->vc_msg) {
// Use mailbox
// Returns: 0x0 for success; 0x80000000 for timeout
return execute_qpu(base->mb, num_qpus, base->vc_msg, GPU_FFT_NO_FLUSH, GPU_FFT_TIMEOUT);
}
else {
Expand Down
8 changes: 6 additions & 2 deletions vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft_shaders.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
BCM2835 "GPU_FFT" release 2.0 BETA
BCM2835 "GPU_FFT" release 2.0
Copyright (c) 2014, Andrew Holme.
All rights reserved.
Expand Down Expand Up @@ -65,6 +65,9 @@ static unsigned int shader_512k[] = {
static unsigned int shader_1024k[] = {
#include "hex/shader_1024k.hex"
};
static unsigned int shader_2048k[] = {
#include "hex/shader_2048k.hex"
};

static struct {
unsigned int size, *code;
Expand All @@ -82,7 +85,8 @@ shaders[] = {
{sizeof(shader_128k), shader_128k},
{sizeof(shader_256k), shader_256k},
{sizeof(shader_512k), shader_512k},
{sizeof(shader_1024k), shader_1024k}
{sizeof(shader_1024k), shader_1024k},
{sizeof(shader_2048k), shader_2048k}
};

unsigned int gpu_fft_shader_size(int log2_N) {
Expand Down
4 changes: 2 additions & 2 deletions vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft_trans.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
BCM2835 "GPU_FFT" release 2.0 BETA
BCM2835 "GPU_FFT" release 2.0
Copyright (c) 2014, Andrew Holme.
All rights reserved.
Expand Down Expand Up @@ -87,7 +87,7 @@ int gpu_fft_trans_prepare(
}

unsigned gpu_fft_trans_execute(struct GPU_FFT_TRANS *info) {
gpu_fft_base_exec(&info->base, 1);
return gpu_fft_base_exec(&info->base, 1);
}

void gpu_fft_trans_release(struct GPU_FFT_TRANS *info) {
Expand Down
2 changes: 1 addition & 1 deletion vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft_trans.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
BCM2835 "GPU_FFT" release 2.0 BETA
BCM2835 "GPU_FFT" release 2.0
Copyright (c) 2014, Andrew Holme.
All rights reserved.
Expand Down
20 changes: 17 additions & 3 deletions vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft_twiddles.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
BCM2835 "GPU_FFT" release 2.0 BETA
BCM2835 "GPU_FFT" release 2.0
Copyright (c) 2014, Andrew Holme.
All rights reserved.
Expand Down Expand Up @@ -243,6 +243,19 @@ static void twiddles_1024k(double two_pi, float *out) {
out = twiddles_base_32(two_pi, out, two_pi/N*q);
}

static void twiddles_2048k(double two_pi, float *out) {
double N=2048*1024;
int q;

out = twiddles_base_64(two_pi, out);
out = twiddles_step_32(two_pi, out, two_pi/N * 32*32);
out = twiddles_step_32(two_pi, out, two_pi/N * 32);
out = twiddles_step_32(two_pi, out, two_pi/N * GPU_FFT_QPUS);

for (q=0; q<GPU_FFT_QPUS; q++)
out = twiddles_base_32(two_pi, out, two_pi/N*q);
}

/****************************************************************************/

static struct {
Expand All @@ -262,11 +275,12 @@ shaders[] = {
{4, 5, 1, twiddles_128k},
{4, 6, 2, twiddles_256k},
{4, 7, 2, twiddles_512k},
{4, 8, 2, twiddles_1024k}
{4, 8, 2, twiddles_1024k},
{4,10, 2, twiddles_2048k}
};

int gpu_fft_twiddle_size(int log2_N, int *shared, int *unique, int *passes) {
if (log2_N<8 || log2_N>20) return -1;
if (log2_N<8 || log2_N>21) return -1;
*shared = shaders[log2_N-8].shared;
*unique = shaders[log2_N-8].unique;
*passes = shaders[log2_N-8].passes;
Expand Down
8 changes: 4 additions & 4 deletions vc/sdk/opt/vc/src/hello_pi/hello_fft/hello_fft.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
BCM2835 "GPU_FFT" release 2.0 BETA
BCM2835 "GPU_FFT" release 2.0
Copyright (c) 2014, Andrew Holme.
All rights reserved.
Expand Down Expand Up @@ -37,7 +37,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

char Usage[] =
"Usage: hello_fft.bin log2_N [jobs [loops]]\n"
"log2_N = log2(FFT_length), log2_N = 8...20\n"
"log2_N = log2(FFT_length), log2_N = 8...21\n"
"jobs = transforms per batch, jobs>0, default 1\n"
"loops = number of test repeats, loops>0, default 1\n";

Expand All @@ -55,7 +55,7 @@ int main(int argc, char *argv[]) {
struct GPU_FFT_COMPLEX *base;
struct GPU_FFT *fft;

log2_N = argc>1? atoi(argv[1]) : 12; // 8 <= log2_N <= 20
log2_N = argc>1? atoi(argv[1]) : 12; // 8 <= log2_N <= 21
jobs = argc>2? atoi(argv[2]) : 1; // transforms per batch
loops = argc>3? atoi(argv[3]) : 1; // test repetitions

Expand All @@ -69,7 +69,7 @@ int main(int argc, char *argv[]) {

switch(ret) {
case -1: printf("Unable to enable V3D. Please check your firmware is up to date.\n"); return -1;
case -2: printf("log2_N=%d not supported. Try between 8 and 20.\n", log2_N); return -1;
case -2: printf("log2_N=%d not supported. Try between 8 and 21.\n", log2_N); return -1;
case -3: printf("Out of memory. Try a smaller batch or increase GPU memory.\n"); return -1;
case -4: printf("Unable to map Videocore peripherals into ARM memory space.\n"); return -1;
}
Expand Down
6 changes: 3 additions & 3 deletions vc/sdk/opt/vc/src/hello_pi/hello_fft/hello_fft_2d.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
BCM2835 "GPU_FFT" release 2.0 BETA
BCM2835 "GPU_FFT" release 2.0
Copyright (c) 2014, Andrew Holme.
All rights reserved.
Expand Down Expand Up @@ -69,8 +69,8 @@ int main(int argc, char *argv[]) {
// Write bitmap info
memset(&bih, 0, sizeof(bih));
bih.biSize = sizeof(bih);
bih.biWidth = N-1;
bih.biHeight = N-1;
bih.biWidth = N;
bih.biHeight = N;
bih.biPlanes = 1;
bih.biBitCount = 24;
bih.biCompression = BI_RGB;
Expand Down
Loading

0 comments on commit 0868c6e

Please sign in to comment.