firmware: arm_loader: Adding dtdebug directive, and improving debug m…

…essages See: http://www.raspberrypi.org/forums/viewtopic.php?f=29&t=93015 firmware: arm_loader: Allow set_clock to skip enabling turbo See: https://github.com/raspberrypi/firmware/wiki/Mailbox-property-interface userland: hello_fft: Update to release 2.0 See: http://www.aholme.co.uk/GPU_FFT/Main.htm
Hexxeh · Dec 29, 2014 · 0868c6e · 0868c6e
1 parent ee67812
commit 0868c6e
Show file tree

Hide file tree

Showing 44 changed files with 1,580 additions and 35 deletions.
diff --git a/fixup.dat b/fixup.dat
diff --git a/fixup_cd.dat b/fixup_cd.dat
diff --git a/fixup_x.dat b/fixup_x.dat
diff --git a/start.elf b/start.elf
diff --git a/start_cd.elf b/start_cd.elf
diff --git a/start_x.elf b/start_x.elf
diff --git a/vc/hardfp/opt/vc/lib/libEGL_static.a b/vc/hardfp/opt/vc/lib/libEGL_static.a
diff --git a/vc/hardfp/opt/vc/lib/libGLESv2_static.a b/vc/hardfp/opt/vc/lib/libGLESv2_static.a
diff --git a/vc/hardfp/opt/vc/lib/libdebug_sym_static.a b/vc/hardfp/opt/vc/lib/libdebug_sym_static.a
diff --git a/vc/hardfp/opt/vc/lib/libkhrn_client.a b/vc/hardfp/opt/vc/lib/libkhrn_client.a
diff --git a/vc/hardfp/opt/vc/lib/libkhrn_static.a b/vc/hardfp/opt/vc/lib/libkhrn_static.a
diff --git a/vc/hardfp/opt/vc/lib/libvcfiled_check.a b/vc/hardfp/opt/vc/lib/libvcfiled_check.a
diff --git a/vc/hardfp/opt/vc/lib/libvchostif.a b/vc/hardfp/opt/vc/lib/libvchostif.a
diff --git a/vc/hardfp/opt/vc/lib/libvcilcs.a b/vc/hardfp/opt/vc/lib/libvcilcs.a
diff --git a/vc/hardfp/opt/vc/lib/libvmcs_rpc_client.a b/vc/hardfp/opt/vc/lib/libvmcs_rpc_client.a
diff --git a/vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft.c b/vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft.c
@@ -1,5 +1,5 @@
 /*
-BCM2835 "GPU_FFT" release 2.0 BETA
+BCM2835 "GPU_FFT" release 2.0
 Copyright (c) 2014, Andrew Holme.
 All rights reserved.
 
@@ -127,7 +127,7 @@ int gpu_fft_prepare(
 }
 
 unsigned gpu_fft_execute(struct GPU_FFT *info) {
-    gpu_fft_base_exec(&info->base, GPU_FFT_QPUS);
+    return gpu_fft_base_exec(&info->base, GPU_FFT_QPUS);
 }
 
 void gpu_fft_release(struct GPU_FFT *info) {

diff --git a/vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft.h b/vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft.h
@@ -1,5 +1,5 @@
 /*
-BCM2835 "GPU_FFT" release 2.0 BETA
+BCM2835 "GPU_FFT" release 2.0
 Copyright (c) 2014, Andrew Holme.
 All rights reserved.
 

diff --git a/vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft.txt b/vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft.txt
@@ -1,9 +1,9 @@
-BCM2835 "GPU_FFT" release 2.0 BETA by Andrew Holme, 2014.
+BCM2835 "GPU_FFT" release 2.0 by Andrew Holme, 2014.
 
 GPU_FFT is an FFT library for the Raspberry Pi which exploits the BCM2835 SoC
 3D hardware to deliver ten times more data throughput than is possible on the
 700 MHz ARM.  Kernels are provided for all power-of-2 FFT lengths between 256
-and 1,048,576 points inclusive.  A transpose function, which also uses the 3D
+and 2,097,152 points inclusive.  A transpose function, which also uses the 3D
 hardware, is provided to support 2-dimensional transforms.
 
 
@@ -16,8 +16,8 @@ is not scaled.  The relative root-mean-square (rms) error in parts-per-million
 log2(N) |  8    | 9    | 10   |  11   |  12  |  13  |  14  |  15  |  16 |  17
 ppm rms |  0.27 | 0.42 | 0.50 |  0.70 |  2.3 |  4.4 |  7.6 |  9.2 |  18 |  70
 
-log2(N) |  18 |  19 |  20 |                 8...17 batch of 10
-ppm rms | 100 | 180 | 360 |                18...20 batch of  1
+log2(N) |  18 |  19 |  20 |  21 |                8...17 batch of 10
+ppm rms | 100 | 180 | 360 | 720 |               18...21 batch of  1
 
 
 *** Throughput ***
@@ -36,9 +36,9 @@ log2(N) |   8   |   9   |  10   |  11   |  12  |  13  |  14  |  15  |
      10 | 0.016 | 0.027 | 0.045 | 0.095 | 0.25 | 0.61 |  1.2 |  3.2 |
    FFTW | 0.092 | 0.22  | 0.48  | 0.95  | 3.0  | 5.1  | 12   | 31   |
 
-log2(N) |  16  |  17 |  18 |  19 |   20 |           All times in
-      1 |  6.8 |  16 |  42 |  95 |  190 |           milliseconds
-   FFTW | 83   | 180 | 560 | 670 | 1600 |           2 sig. figs.
+log2(N) |  16  |  17 |  18 |  19 |   20 |   21 |       All times in
+      1 |  6.8 |  16 |  42 |  95 |  190 |  380 |       milliseconds
+   FFTW | 83   | 180 | 560 | 670 | 1600 | 3400 |       2 sig. figs.
 
 
 *** API functions ***
@@ -57,7 +57,7 @@ log2(N) |  16  |  17 |  18 |  19 |   20 |           All times in
 
     int mb          Mailbox file descriptor obtained by calling mbox_open()
 
-    int log2_N      log2(FFT length) = 8 to 20
+    int log2_N      log2(FFT length) = 8 to 21
 
     int direction   FFT direction:  GPU_FFT_FWD for forward FFT
                                     GPU_FFT_REV for inverse FFT

diff --git a/vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft_base.c b/vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft_base.c
@@ -1,5 +1,5 @@
 /*
-BCM2835 "GPU_FFT" release 2.0 BETA
+BCM2835 "GPU_FFT" release 2.0
 Copyright (c) 2014, Andrew Holme.
 All rights reserved.
 
@@ -46,7 +46,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GPU_FFT_MEM_MAP 0x0 // cached=0x0; direct=0x20000000
 
 #define GPU_FFT_NO_FLUSH 1
-#define GPU_FFT_TIMEOUT 1000 // ms
+#define GPU_FFT_TIMEOUT 2000 // ms
 
 unsigned gpu_fft_base_exec_direct (
     struct GPU_FFT_BASE *base,
@@ -82,6 +82,7 @@ unsigned gpu_fft_base_exec(
 
     if (base->vc_msg) {
         // Use mailbox
+        // Returns: 0x0 for success; 0x80000000 for timeout
         return execute_qpu(base->mb, num_qpus, base->vc_msg, GPU_FFT_NO_FLUSH, GPU_FFT_TIMEOUT);
     }
     else {

diff --git a/vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft_shaders.c b/vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft_shaders.c
@@ -1,5 +1,5 @@
 /*
-BCM2835 "GPU_FFT" release 2.0 BETA
+BCM2835 "GPU_FFT" release 2.0
 Copyright (c) 2014, Andrew Holme.
 All rights reserved.
 
@@ -65,6 +65,9 @@ static unsigned int shader_512k[] = {
 static unsigned int shader_1024k[] = {
     #include "hex/shader_1024k.hex"
 };
+static unsigned int shader_2048k[] = {
+    #include "hex/shader_2048k.hex"
+};
 
 static struct {
     unsigned int size, *code;
@@ -82,7 +85,8 @@ shaders[] = {
     {sizeof(shader_128k), shader_128k},
     {sizeof(shader_256k), shader_256k},
     {sizeof(shader_512k), shader_512k},
-    {sizeof(shader_1024k), shader_1024k}
+    {sizeof(shader_1024k), shader_1024k},
+    {sizeof(shader_2048k), shader_2048k}
 };
 
 unsigned int  gpu_fft_shader_size(int log2_N) {

diff --git a/vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft_trans.c b/vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft_trans.c
@@ -1,5 +1,5 @@
 /*
-BCM2835 "GPU_FFT" release 2.0 BETA
+BCM2835 "GPU_FFT" release 2.0
 Copyright (c) 2014, Andrew Holme.
 All rights reserved.
 
@@ -87,7 +87,7 @@ int gpu_fft_trans_prepare(
 }
 
 unsigned gpu_fft_trans_execute(struct GPU_FFT_TRANS *info) {
-    gpu_fft_base_exec(&info->base, 1);
+    return gpu_fft_base_exec(&info->base, 1);
 }
 
 void gpu_fft_trans_release(struct GPU_FFT_TRANS *info) {

diff --git a/vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft_trans.h b/vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft_trans.h
@@ -1,5 +1,5 @@
 /*
-BCM2835 "GPU_FFT" release 2.0 BETA
+BCM2835 "GPU_FFT" release 2.0
 Copyright (c) 2014, Andrew Holme.
 All rights reserved.
 

diff --git a/vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft_twiddles.c b/vc/sdk/opt/vc/src/hello_pi/hello_fft/gpu_fft_twiddles.c
@@ -1,5 +1,5 @@
 /*
-BCM2835 "GPU_FFT" release 2.0 BETA
+BCM2835 "GPU_FFT" release 2.0
 Copyright (c) 2014, Andrew Holme.
 All rights reserved.
 
@@ -243,6 +243,19 @@ static void twiddles_1024k(double two_pi, float *out) {
         out = twiddles_base_32(two_pi, out, two_pi/N*q);
 }
 
+static void twiddles_2048k(double two_pi, float *out) {
+    double N=2048*1024;
+    int q;
+
+    out = twiddles_base_64(two_pi, out);
+    out = twiddles_step_32(two_pi, out, two_pi/N * 32*32);
+    out = twiddles_step_32(two_pi, out, two_pi/N * 32);
+    out = twiddles_step_32(two_pi, out, two_pi/N * GPU_FFT_QPUS);
+
+    for (q=0; q<GPU_FFT_QPUS; q++)
+        out = twiddles_base_32(two_pi, out, two_pi/N*q);
+}
+
 /****************************************************************************/
 
 static struct {
@@ -262,11 +275,12 @@ shaders[] = {
     {4, 5, 1, twiddles_128k},
     {4, 6, 2, twiddles_256k},
     {4, 7, 2, twiddles_512k},
-    {4, 8, 2, twiddles_1024k}
+    {4, 8, 2, twiddles_1024k},
+    {4,10, 2, twiddles_2048k}
 };
 
 int gpu_fft_twiddle_size(int log2_N, int *shared, int *unique, int *passes) {
-    if (log2_N<8 || log2_N>20) return -1;
+    if (log2_N<8 || log2_N>21) return -1;
     *shared = shaders[log2_N-8].shared;
     *unique = shaders[log2_N-8].unique;
     *passes = shaders[log2_N-8].passes;

diff --git a/vc/sdk/opt/vc/src/hello_pi/hello_fft/hello_fft.c b/vc/sdk/opt/vc/src/hello_pi/hello_fft/hello_fft.c
@@ -1,5 +1,5 @@
 /*
-BCM2835 "GPU_FFT" release 2.0 BETA
+BCM2835 "GPU_FFT" release 2.0
 Copyright (c) 2014, Andrew Holme.
 All rights reserved.
 
@@ -37,7 +37,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 char Usage[] =
     "Usage: hello_fft.bin log2_N [jobs [loops]]\n"
-    "log2_N = log2(FFT_length),       log2_N = 8...20\n"
+    "log2_N = log2(FFT_length),       log2_N = 8...21\n"
     "jobs   = transforms per batch,   jobs>0,        default 1\n"
     "loops  = number of test repeats, loops>0,       default 1\n";
 
@@ -55,7 +55,7 @@ int main(int argc, char *argv[]) {
     struct GPU_FFT_COMPLEX *base;
     struct GPU_FFT *fft;
 
-    log2_N = argc>1? atoi(argv[1]) : 12; // 8 <= log2_N <= 20
+    log2_N = argc>1? atoi(argv[1]) : 12; // 8 <= log2_N <= 21
     jobs   = argc>2? atoi(argv[2]) : 1;  // transforms per batch
     loops  = argc>3? atoi(argv[3]) : 1;  // test repetitions
 
@@ -69,7 +69,7 @@ int main(int argc, char *argv[]) {
 
     switch(ret) {
         case -1: printf("Unable to enable V3D. Please check your firmware is up to date.\n"); return -1;
-        case -2: printf("log2_N=%d not supported.  Try between 8 and 20.\n", log2_N);         return -1;
+        case -2: printf("log2_N=%d not supported.  Try between 8 and 21.\n", log2_N);         return -1;
         case -3: printf("Out of memory.  Try a smaller batch or increase GPU memory.\n");     return -1;
         case -4: printf("Unable to map Videocore peripherals into ARM memory space.\n");      return -1;
     }

diff --git a/vc/sdk/opt/vc/src/hello_pi/hello_fft/hello_fft_2d.c b/vc/sdk/opt/vc/src/hello_pi/hello_fft/hello_fft_2d.c
@@ -1,5 +1,5 @@
 /*
-BCM2835 "GPU_FFT" release 2.0 BETA
+BCM2835 "GPU_FFT" release 2.0
 Copyright (c) 2014, Andrew Holme.
 All rights reserved.
 
@@ -69,8 +69,8 @@ int main(int argc, char *argv[]) {
     // Write bitmap info
     memset(&bih, 0, sizeof(bih));
     bih.biSize = sizeof(bih);
-    bih.biWidth = N-1;
-    bih.biHeight = N-1;
+    bih.biWidth = N;
+    bih.biHeight = N;
     bih.biPlanes = 1;
     bih.biBitCount = 24;
     bih.biCompression = BI_RGB;