sox-devel@lists.sourceforge.net unofficial mirror
 help / color / mirror / code / Atom feed
* sox spectrogram patches
@ 2015-12-26  1:23 Martin Guy
  2015-12-26 10:43 ` Eric Wong
  0 siblings, 1 reply; 8+ messages in thread
From: Martin Guy @ 2015-12-26  1:23 UTC (permalink / raw)
  To: sox-devel

[-- Attachment #1: Type: text/plain, Size: 993 bytes --]

On 26/12/2015, Eric Wong <normalperson@yhbt.net> wrote:
> I've started maintaining a branch of things in their absence.

Thanks Eric. I have three commits, all regarding "sox spectrogram":
- remove arbitrary limit on spectrogram output height (was 1200)
- add "spectrogram -n" flag to normalize the image brightness
regardless of audio file loudness
- add FFTW3 support, which is between 150 and 250 times faster than
the slow built-in FFT routine
see commentary in the patches for further details.

They are the last three commits on https://github.com/martinwguy/sox.git
which is a fork of the main sf.net repository and are also attached.

They only touch src/spectrogram.c, and are independent o each other
(i.e. you can apply any or all of them in any order as they do not
conflict with each other). They are also attached as patches.

This were all needed to be able to use "sox spectrogram" for the
production of log-frequency-axis spectrograms of music in
wikidelia.net

Cheers

    M

[-- Attachment #2: sox-spectrogram-raise-size-limits.patch --]
[-- Type: text/x-patch, Size: 4594 bytes --]

spectrogram: remove arbitrary limit on height of spectrogram

Sox had an arbitrary limit of 8193 on the vertical axis size
based on MAX_FFT_SIZE=4096 and had fixed-size arrays for its data.
This is both wasteful of memory for smaller FFTs and stops us producing
more detailed output for no obvious reason.

This patch removes the size limit on Y-axis-height by making array
allocation dynamic. In practice, you can't remove the limit as getopt
insists on minimum and maximum values for numeric arguments, so we
copy the similarly arbitrary limit of 200000 from MAX_X_SIZE.

diff --git a/src/spectrogram.c b/src/spectrogram.c
index afb0b0e..b38d212 100644
--- a/src/spectrogram.c
+++ b/src/spectrogram.c
@@ -36,10 +36,13 @@
   #include <io.h>
 #endif
 
-#define MAX_FFT_SIZE 4096
 #define is_p2(x) !(x & (x - 1))
 
+/* These are arbitrary as there is no upper limit, but
+ * sox's getopt() needs an upper limit for each option, so...
+ */
 #define MAX_X_SIZE 200000
+#define MAX_Y_SIZE 200000
 
 typedef enum {Window_Hann, Window_Hamming, Window_Bartlett, Window_Rectangular, Window_Kaiser, Window_Dolph} win_type_t;
 static lsx_enum_item const window_options[] = {
@@ -71,8 +74,11 @@ typedef struct {
   int        dft_size, step_size, block_steps, block_num, rows, cols, read;
   int        x_size, end, end_min, last_end;
   sox_bool   truncated;
-  double     buf[MAX_FFT_SIZE], dft_buf[MAX_FFT_SIZE], window[MAX_FFT_SIZE+1];
-  double     block_norm, max, magnitudes[(MAX_FFT_SIZE>>1) + 1];
+  double     * buf;		/* [dft_size] */
+  double     * dft_buf;		/* [dft_size] */
+  double     * window;		/* [dft_size + 1] */
+  double     block_norm, max;
+  double     * magnitudes;	/* [(dft_size / 2) + 1] */
   float      * dBfs;
 } priv_t;
 
@@ -114,9 +120,9 @@ static int getopts(sox_effect_t * effp, int argc, char **argv)
 
   while ((c = lsx_getopt(&optstate)) != -1) switch (c) {
     GETOPT_NUMERIC(optstate, 'x', x_size0       , 100, MAX_X_SIZE)
-    GETOPT_NUMERIC(optstate, 'X', pixels_per_sec,  1 , 5000)
-    GETOPT_NUMERIC(optstate, 'y', y_size        , 64 , 1200)
-    GETOPT_NUMERIC(optstate, 'Y', Y_size        , 130, MAX_FFT_SIZE / 2 + 2)
+    GETOPT_NUMERIC(optstate, 'X', pixels_per_sec,  1 , MAX_X_SIZE)
+    GETOPT_NUMERIC(optstate, 'y', y_size        , 64 , MAX_Y_SIZE)
+    GETOPT_NUMERIC(optstate, 'Y', Y_size        , 130, MAX_Y_SIZE)
     GETOPT_NUMERIC(optstate, 'z', dB_range      , 20 , 180)
     GETOPT_NUMERIC(optstate, 'Z', gain          ,-100, 100)
     GETOPT_NUMERIC(optstate, 'q', spectrum_points, 0 , p->spectrum_points)
@@ -172,7 +178,7 @@ static double make_window(priv_t * p, int end)
   double sum = 0, * w = end < 0? p->window : p->window + end;
   int i, n = 1 + p->dft_size - abs(end);
 
-  if (end) memset(p->window, 0, sizeof(p->window));
+  if (end) memset(p->window, 0, sizeof(*(p->window)) * p->dft_size);
   for (i = 0; i < n; ++i) w[i] = 1;
   switch (p->win_type) {
     case Window_Hann: lsx_apply_hann(w, n); break;
@@ -190,10 +196,10 @@ static double make_window(priv_t * p, int end)
   return sum;
 }
 
-static double * rdft_init(int n)
+static double * rdft_init(size_t n)
 {
   double * q = lsx_malloc(2 * (n / 2 + 1) * n * sizeof(*q)), * p = q;
-  int i, j;
+  size_t i, j;
   for (j = 0; j <= n / 2; ++j) for (i = 0; i < n; ++i)
     *p++ = cos(2 * M_PI * j * i / n), *p++ = sin(2 * M_PI * j * i / n);
   return q;
@@ -260,11 +266,18 @@ static int start(sox_effect_t * effp)
   if (p->y_size) {
     p->dft_size = 2 * (p->y_size - 1);
     if (!is_p2(p->dft_size) && !effp->flow)
-      p->shared = rdft_init(p->dft_size);
+      p->shared = rdft_init((size_t)(p->dft_size));
   } else {
    int y = max(32, (p->Y_size? p->Y_size : 550) / effp->in_signal.channels - 2);
    for (p->dft_size = 128; p->dft_size <= y; p->dft_size <<= 1);
   }
+
+  /* Now that dft_size is set, allocate variable-sized elements of priv_t */
+  p->buf	= lsx_calloc(p->dft_size, sizeof(*(p->buf)));
+  p->dft_buf	= lsx_calloc(p->dft_size, sizeof(*(p->dft_buf)));
+  p->window	= lsx_calloc(p->dft_size + 1, sizeof(*(p->window)));
+  p->magnitudes = lsx_calloc((p->dft_size / 2) + 1, sizeof(*(p->magnitudes)));
+
   if (is_p2(p->dft_size) && !effp->flow)
     lsx_safe_rdft(p->dft_size, 1, p->dft_buf);
   lsx_debug("duration=%g x_size=%i pixels_per_sec=%g dft_size=%i", duration, p->x_size, pixels_per_sec, p->dft_size);
@@ -651,6 +664,10 @@ error: png_destroy_write_struct(&png, &png_info);
   free(png_rows);
   free(pixels);
   free(p->dBfs);
+  free(p->buf);
+  free(p->dft_buf);
+  free(p->window);
+  free(p->magnitudes);
   return SOX_SUCCESS;
 }
 

[-- Attachment #3: sox-spectrogram-add-normalize-flag.patch --]
[-- Type: text/x-patch, Size: 4532 bytes --]

Add spectrogram -n flag to normalise the output to maximum brightness

This change adds a "normalize" flag, -n, to sox spectrogram to adjust
the spectrogram gain such that the highest values get the brightest
colours, allowing one to get uniform spectrograms regardless of
different volumes in music files.

diff --git a/sox.1 b/sox.1
index 2c4ca47..699ceda 100644
--- a/sox.1
+++ b/sox.1
@@ -3235,6 +3235,10 @@ A negative
 .I num
 effectively increases the `brightness' of the spectrogram display,
 and vice versa.
+.IP \fB\-n\fR
+Sets the upper limit of the Z axis so that the loudest pixels
+are shown using the brightest colour in the palette - a kind of
+automatic \fB\-Z\fR flag.
 .IP \fB\-q\ \fInum\fR
 Sets the Z-axis quantisation, i.e. the number of different colours (or
 intensities) in which to render Z-axis
diff --git a/src/spectrogram.c b/src/spectrogram.c
index 8cbb3f4..6866e7b 100644
--- a/src/spectrogram.c
+++ b/src/spectrogram.c
@@ -59,7 +59,7 @@ typedef struct {
   double     pixels_per_sec, window_adjust;
   int        x_size0, y_size, Y_size, dB_range, gain, spectrum_points, perm;
   sox_bool   monochrome, light_background, high_colour, slack_overlap, no_axes;
-  sox_bool   raw, alt_palette, truncate;
+  sox_bool   normalize, raw, alt_palette, truncate;
   win_type_t win_type;
   char const * out_name, * title, * comment;
   char const *duration_str, *start_time_str;
@@ -113,7 +113,7 @@ static int getopts(sox_effect_t * effp, int argc, char **argv)
   char const * next;
   int c;
   lsx_getopt_t optstate;
-  lsx_getopt_init(argc, argv, "+S:d:x:X:y:Y:z:Z:q:p:W:w:st:c:AarmlhTo:", NULL, lsx_getopt_flag_none, 1, &optstate);
+  lsx_getopt_init(argc, argv, "+S:d:x:X:y:Y:z:Z:q:p:W:w:st:c:AarmnlhTo:", NULL, lsx_getopt_flag_none, 1, &optstate);
 
   p->dB_range = 120, p->spectrum_points = 249, p->perm = 1; /* Non-0 defaults */
   p->out_name = "spectrogram.png", p->comment = "Created by SoX";
@@ -134,6 +134,7 @@ static int getopts(sox_effect_t * effp, int argc, char **argv)
     case 'a': p->no_axes          = sox_true;   break;
     case 'r': p->raw              = sox_true;   break;
     case 'm': p->monochrome       = sox_true;   break;
+    case 'n': p->normalize        = sox_true;   break;
     case 'l': p->light_background = sox_true;   break;
     case 'h': p->high_colour      = sox_true;   break;
     case 'T': p->truncate         = sox_true;   break;
@@ -557,6 +558,7 @@ static int stop(sox_effect_t * effp) /* only called, by end(), on flow 0 */
   int         i, j, k, base, step, tick_len = 3 - p->no_axes;
   char        text[200], * prefix;
   double      limit;
+  float       autogain = 0.0;	/* Is changed if the -n flag was supplied */
 
   free(p->shared);
   if (p->using_stdout) {
@@ -583,8 +585,22 @@ static int stop(sox_effect_t * effp) /* only called, by end(), on flow 0 */
     png_rows[rows - 1 - j] = (png_bytep)(pixels + j * cols);
 
   /* Spectrogram */
+
+  if (p->normalize)
+    /* values are already in dB, so we subtract the maximum value
+     * (which will normally be negative) to raise the maximum to 0.0.
+     */
+    autogain = -p->max;
+
   for (k = 0; k < chans; ++k) {
     priv_t * q = (priv_t *)(effp - effp->flow + k)->priv;
+
+    if (p->normalize) {
+      float *fp;
+
+      for (i = p->rows * p->cols, fp = q->dBfs; i > 0 ; fp++, i--)
+	*fp += autogain;
+    }
     base = !p->raw * below + (chans - 1 - k) * (p->rows + 1);
     for (j = 0; j < p->rows; ++j) {
       for (i = 0; i < p->cols; ++i)
@@ -651,7 +667,7 @@ static int stop(sox_effect_t * effp) /* only called, by end(), on flow 0 */
     step = 10 * ceil(p->dB_range / 10. * (font_y + 2) / (k - 1));
     for (i = 0; i <= p->dB_range; i += step) {           /* (Tick) labels */
       int y = (double)i / p->dB_range * (k - 1) + .5;
-      sprintf(text, "%+i", i - p->gain - p->dB_range);
+      sprintf(text, "%+i", i - p->gain - p->dB_range - (int)(autogain+0.5));
       print_at(cols - right + 1, base + y + 5, Labels, text);
     }
   }
@@ -692,6 +708,7 @@ sox_effect_handler_t const * lsx_spectrogram_effect_fn(void)
     "\t-Y num\tY-height total (i.e. not per channel); default 550",
     "\t-z num\tZ-axis range in dB; default 120",
     "\t-Z num\tZ-axis maximum in dBFS; default 0",
+    "\t-n\tSet Z-axis maximum to the brightest pixel",
     "\t-q num\tZ-axis quantisation (0 - 249); default 249",
     "\t-w name\tWindow: Hann(default)/Hamming/Bartlett/Rectangular/Kaiser/Dolph",
     "\t-W num\tWindow adjust parameter (-10 - 10); applies only to Kaiser/Dolph",

[-- Attachment #4: sox-spectrogram-fftw.patch --]
[-- Type: text/x-patch, Size: 6218 bytes --]

spectrogram: add FFTW3 support

Sox specrogram has two different algorithms to do spectrograms:
lsx_safe_rdft() for dft_size of powers of two (-ysize=2^n+1) and
rdft_p(), private to spectrogram.c, which does any size but is from
150 to 250 times slower.

This adds FFTW3 support, which is about the same speed as the old lsx
routine but works at any size:
- stuff in "configure" to autodetect it and "./configure --without-fftw"
  to forcibly disable it
- code in src/spectrogram.c to use FFTW if it is enabled or the old
  routines otherwise
- changes to debian/control to build with FFTW (though the debian package
  build dies for other reasons not concerning this patch).

The output from the old algorithms and FFTW (the PNGs) are identical.

diff --git a/configure.ac b/configure.ac
index 23138a9..4bc3064 100644
--- a/configure.ac
+++ b/configure.ac
@@ -332,6 +332,28 @@ AC_SUBST(PNG_LIBS)
 
 
 
+dnl Check for FFTW3 libraries
+AC_ARG_WITH(fftw,
+    AS_HELP_STRING([--without-fftw],
+        [Don't try to use FFTW]))
+using_fftw=no
+if test "$with_fftw" != "no"; then
+    AC_CHECK_HEADERS(fftw3.h, using_fftw=yes)
+    if test "$using_fftw" = "yes"; then
+        AC_CHECK_LIB(fftw3, fftw_execute, FFTW_LIBS="$FFTW_LIBS -lfftw3", using_fftw=no)
+    fi
+    if test "$with_fftw" = "yes" -a "$using_fftw" = "no"; then
+        AC_MSG_FAILURE([cannot find FFTW3])
+    fi
+fi
+if test "$using_fftw" = yes; then
+   AC_DEFINE(HAVE_FFTW, 1, [Define to 1 if you have FFTW3.])
+fi
+AM_CONDITIONAL(HAVE_FFTW, test x$using_fftw = xyes)
+AC_SUBST(FFTW_LIBS)
+
+
+
 dnl Test for LADSPA
 AC_ARG_WITH(ladspa,
     AS_HELP_STRING([--without-ladspa], [Don't try to use LADSPA]))
@@ -756,6 +778,7 @@ echo "OTHER OPTIONS"
 echo "ladspa effects.............$using_ladspa"
 echo "magic support..............$using_magic"
 echo "png support................$using_png"
+echo "FFTW support...............$using_fftw"
 if test "x$OPENMP_CFLAGS" = "x"; then
 echo "OpenMP support.............no"
 else
diff --git a/debian/control b/debian/control
index dc3c34b..94681e8 100644
--- a/debian/control
+++ b/debian/control
@@ -8,6 +8,7 @@ Build-Depends: dh-autoreconf,
                ladspa-sdk,
                libao-dev,
                libasound2-dev [linux-any],
+               libfftw3-dev,
                libgsm1-dev,
                libid3tag0-dev,
                libltdl3-dev,
diff --git a/src/Makefile.am b/src/Makefile.am
index 7cceaaf..5838f1f 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -90,6 +90,9 @@ libsox_la_LIBADD = @PNG_LIBS@
 if HAVE_MAGIC
 libsox_la_LIBADD += @MAGIC_LIBS@
 endif
+if HAVE_FFTW
+libsox_la_LIBADD += @FFTW_LIBS@
+endif
 
 libsox_la_LIBADD += @GOMP_LIBS@
 
diff --git a/src/spectrogram.c b/src/spectrogram.c
index 34457f2..3d8c208 100644
--- a/src/spectrogram.c
+++ b/src/spectrogram.c
@@ -30,6 +30,10 @@
 #endif
 #include <zlib.h>
 
+#ifdef HAVE_FFTW3_H
+#include <fftw3.h>
+#endif
+
 /* For SET_BINARY_MODE: */
 #include <fcntl.h>
 #ifdef HAVE_IO_H
@@ -80,6 +84,9 @@ typedef struct {
   double     block_norm, max;
   double     * magnitudes;	/* [(dft_size / 2) + 1] */
   float      * dBfs;
+#if HAVE_FFTW
+  fftw_plan  fftw_plan;		/* Used if FFT_type == FFT_fftw */
+#endif
 } priv_t;
 
 #define secs(cols) \
@@ -197,6 +204,8 @@ static double make_window(priv_t * p, int end)
   return sum;
 }
 
+#if !HAVE_FFTW
+
 static double * rdft_init(size_t n)
 {
   double * q = lsx_malloc(2 * (n / 2 + 1) * n * sizeof(*q)), * p = q;
@@ -218,6 +227,8 @@ static void rdft_p(double const * q, double const * in, double * out, int n)
   }
 }
 
+#endif /* HAVE_FFTW */
+
 static int start(sox_effect_t * effp)
 {
   priv_t * p = (priv_t *)effp->priv;
@@ -266,8 +277,10 @@ static int start(sox_effect_t * effp)
 
   if (p->y_size) {
     p->dft_size = 2 * (p->y_size - 1);
+#if !HAVE_FFTW
     if (!is_p2(p->dft_size) && !effp->flow)
       p->shared = rdft_init((size_t)(p->dft_size));
+#endif
   } else {
    int y = max(32, (p->Y_size? p->Y_size : 550) / effp->in_signal.channels - 2);
    for (p->dft_size = 128; p->dft_size <= y; p->dft_size <<= 1);
@@ -279,12 +292,20 @@ static int start(sox_effect_t * effp)
   p->window	= lsx_calloc(p->dft_size + 1, sizeof(*(p->window)));
   p->magnitudes = lsx_calloc((p->dft_size / 2) + 1, sizeof(*(p->magnitudes)));
 
+  /* Initialize the FFT routine */
+#if HAVE_FFTW
+  /* We have one FFT plan per flow because the input/output arrays differ. */
+  p->fftw_plan = fftw_plan_r2r_1d(p->dft_size, p->dft_buf, p->dft_buf,
+                      FFTW_R2HC, FFTW_MEASURE);
+#else
   if (is_p2(p->dft_size) && !effp->flow)
     lsx_safe_rdft(p->dft_size, 1, p->dft_buf);
+#endif
+
   lsx_debug("duration=%g x_size=%i pixels_per_sec=%g dft_size=%i", duration, p->x_size, pixels_per_sec, p->dft_size);
 
   p->end = p->dft_size;
-  p->rows = (p->dft_size >> 1) + 1;
+  p->rows = (p->dft_size / 2) + 1;
   actual = make_window(p, p->last_end = 0);
   lsx_debug("window_density=%g", actual / p->dft_size);
   p->step_size = (p->slack_overlap? sqrt(actual * p->dft_size) : actual) + .5;
@@ -359,6 +380,19 @@ static int flow(sox_effect_t * effp,
     if ((p->end = max(p->end, p->end_min)) != p->last_end)
       make_window(p, p->last_end = p->end);
     for (i = 0; i < p->dft_size; ++i) p->dft_buf[i] = p->buf[i] * p->window[i];
+#if HAVE_FFTW
+    fftw_execute(p->fftw_plan);
+    /* Convert from FFTW's "half complex" format to an array of magnitudes.
+     * In HC format, the values are stored:
+     * r0, r1, r2 ... r(n/2), i(n+1)/2-1 .. i2, i1
+     */
+    p->magnitudes[0] += sqr(p->dft_buf[0]);
+    for (i = 1; i < p->dft_size / 2; ++i) {
+      p->magnitudes[i] += sqr(p->dft_buf[i]) + sqr(p->dft_buf[p->dft_size - i
+]);
+    }
+    p->magnitudes[p->dft_size / 2] += sqr(p->dft_buf[p->dft_size / 2]);
+#else
     if (is_p2(p->dft_size)) {
       lsx_safe_rdft(p->dft_size, 1, p->dft_buf);
       p->magnitudes[0] += sqr(p->dft_buf[0]);
@@ -367,6 +401,8 @@ static int flow(sox_effect_t * effp,
       p->magnitudes[p->dft_size >> 1] += sqr(p->dft_buf[1]);
     }
     else rdft_p(*p->shared_ptr, p->dft_buf, p->magnitudes, p->dft_size);
+#endif
+
     if (++p->block_num == p->block_steps && do_column(effp) == SOX_EOF)
       return SOX_EOF;
   }

[-- Attachment #5: Type: text/plain, Size: 79 bytes --]

------------------------------------------------------------------------------

[-- Attachment #6: Type: text/plain, Size: 158 bytes --]

_______________________________________________
SoX-devel mailing list
SoX-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/sox-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: sox spectrogram patches
  2015-12-26  1:23 sox spectrogram patches Martin Guy
@ 2015-12-26 10:43 ` Eric Wong
  2015-12-26 13:37   ` Martin Guy
  0 siblings, 1 reply; 8+ messages in thread
From: Eric Wong @ 2015-12-26 10:43 UTC (permalink / raw)
  To: sox-devel

Martin Guy <martinwguy@gmail.com> wrote:
> On 26/12/2015, Eric Wong <normalperson@yhbt.net> wrote:
> > I've started maintaining a branch of things in their absence.
> 
> Thanks Eric. I have three commits, all regarding "sox spectrogram":
> - remove arbitrary limit on spectrogram output height (was 1200)

Seems alright, did you check for possible integer overflow issues
from raising the limit?  I was mildly alarmed at this:

+  if (end) memset(p->window, 0, sizeof(*(p->window)) * p->dft_size);

Until I noticed p->window was allocated with calloc which does overflow
checking:

  p->window	= lsx_calloc(p->dft_size + 1, sizeof(*(p->window)));

> - add "spectrogram -n" flag to normalize the image brightness
> regardless of audio file loudness

No issues here.

> - add FFTW3 support, which is between 150 and 250 times faster than
> the slow built-in FFT routine

Nice!  I've tested with/without FFTW3 installed and can confirm the
speedup.  I do have problems with eyesight and do not see well;
so I can't comment on actual images.

I've made some minor edits to configure.ac for portability and
robustness (I realize you're taking nearby statements as a
guideline, but I think we can do better when introducing new code)

- prefer AS_IF macro to generate portable "if" statements in sh

I also realize you're only testing auto-generated variables,
so the next two are unnecessary, but can aid in avoiding red flags
for other reviewers:

- avoid "-a" with "test" since it is error-prone compared to
  using "&&" and multiple "test" statements.
  E.g.  test -n "$x" -a "$a" = "$b"
  is buggy if "$x" is "="
  (example taken from Documentation/CodingGuidelines in git.git)

- prefix variables with "x" in "test" conditionals.
  E.g. test "$var" = yes
  is buggy if "$var" is "-n" or other "test" operators
  We avoid this problem with: test "x$var" = xyes

I'm also trying to keep configure.ac wrapped at 80 columns
(I wish 64 columns were standard, I want to use bigger fonts).

I also added #endif labels for the larger flow function.
I'd prefer if we could avoid CPP inside C functions entirely;
but that's a larger task and can be split into another patch.

So I'll squash the following and merge into my "pu" branch
for now:

diff --git a/configure.ac b/configure.ac
index 4bc3064..794f19c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -337,19 +337,18 @@ AC_ARG_WITH(fftw,
     AS_HELP_STRING([--without-fftw],
         [Don't try to use FFTW]))
 using_fftw=no
-if test "$with_fftw" != "no"; then
-    AC_CHECK_HEADERS(fftw3.h, using_fftw=yes)
-    if test "$using_fftw" = "yes"; then
-        AC_CHECK_LIB(fftw3, fftw_execute, FFTW_LIBS="$FFTW_LIBS -lfftw3", using_fftw=no)
-    fi
-    if test "$with_fftw" = "yes" -a "$using_fftw" = "no"; then
-        AC_MSG_FAILURE([cannot find FFTW3])
-    fi
-fi
-if test "$using_fftw" = yes; then
-   AC_DEFINE(HAVE_FFTW, 1, [Define to 1 if you have FFTW3.])
-fi
-AM_CONDITIONAL(HAVE_FFTW, test x$using_fftw = xyes)
+AS_IF([test "x$with_fftw" != xno], [
+    AC_CHECK_HEADERS([fftw3.h], [using_fftw=yes])
+    AS_IF([test "x$using_fftw" = xyes],
+        [ AC_CHECK_LIB([fftw3], [fftw_execute],
+			[FFTW_LIBS="$FFTW_LIBS -lfftw3"], [using_fftw=no]) ])
+
+    AS_IF([test "x$with_fftw" = xyes && test "x$using_fftw" = xno],
+        [ AC_MSG_FAILURE([cannot find FFTW3]) ])
+])
+AS_IF([test "x$using_fftw" = xyes],
+   [ AC_DEFINE(HAVE_FFTW, 1, [Define to 1 if you have FFTW3.]) ])
+AM_CONDITIONAL(HAVE_FFTW, test x"$using_fftw" = xyes)
 AC_SUBST(FFTW_LIBS)
 
 
diff --git a/src/spectrogram.c b/src/spectrogram.c
index 3d8c208..fb7313e 100644
--- a/src/spectrogram.c
+++ b/src/spectrogram.c
@@ -392,7 +392,7 @@ static int flow(sox_effect_t * effp,
 ]);
     }
     p->magnitudes[p->dft_size / 2] += sqr(p->dft_buf[p->dft_size / 2]);
-#else
+#else /* ! HAVE_FFTW */
     if (is_p2(p->dft_size)) {
       lsx_safe_rdft(p->dft_size, 1, p->dft_buf);
       p->magnitudes[0] += sqr(p->dft_buf[0]);
@@ -401,7 +401,7 @@ static int flow(sox_effect_t * effp,
       p->magnitudes[p->dft_size >> 1] += sqr(p->dft_buf[1]);
     }
     else rdft_p(*p->shared_ptr, p->dft_buf, p->magnitudes, p->dft_size);
-#endif
+#endif /* ! HAVE_FFTW */
 
     if (++p->block_num == p->block_steps && do_column(effp) == SOX_EOF)
       return SOX_EOF;

> They are the last three commits on https://github.com/martinwguy/sox.git
> which is a fork of the main sf.net repository and are also attached.

Thanks, I've also setup a mirror of yours at:

	git://80x24.org/mirrors/martinwguy/sox.git

(as well as one for Mans @ s/martinwguy/mansr/)

And I forgot to mention I made a mirror of my own repo at:

	git://repo.or.cz/sox/ew.git

------------------------------------------------------------------------------

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: sox spectrogram patches
  2015-12-26 10:43 ` Eric Wong
@ 2015-12-26 13:37   ` Martin Guy
  2015-12-26 21:18     ` Eric Wong
  0 siblings, 1 reply; 8+ messages in thread
From: Martin Guy @ 2015-12-26 13:37 UTC (permalink / raw)
  To: sox-devel

On 26/12/2015, Eric Wong <normalperson@yhbt.net> wrote:
> Martin Guy <martinwguy@gmail.com> wrote:
>> - remove arbitrary limit on spectrogram output height (was 1200)
>
> Seems alright, did you check for possible integer overflow issues
> from raising the limit?

Only by inspection and testing a range of heights/widths.

>> - add FFTW3 support, which is between 150 and 250 times faster than
>> the slow built-in FFT routine
>
> I do have problems with eyesight and do not see well;
> so I can't comment on actual images.

"cmp" tells me that the PNG files are identical with/without FFTW

> I've made some minor edits to configure.ac for portability and
> robustness

Yes, I've done relatively little configure.ac hacking and was hoping
someone who knows better might improve things. Thanks for the heads-up
on these possible issues

> I'd prefer if we could avoid CPP inside C functions entirely;
> but that's a larger task and can be split into another patch.

Do you mean, where there is a configure option, have two separate
functions, one with the HAVE_ code and one with the HAVE_NOT code?
Yes, it's ugly, I agree.
One alternative would be to use "if (HAVE_FFTW)" and let the compiler
turn if(0) or if(1) into constant code removal.

Thanks again for stepping in to work on this. I was about to leave sox
to die, given that its fathers have abandoned it. If we can get a last
twitch of life out of them, it would be best they appoint you as its
official maintainer.

Blessings

    M

------------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: sox spectrogram patches
  2015-12-26 13:37   ` Martin Guy
@ 2015-12-26 21:18     ` Eric Wong
  2015-12-26 22:14       ` Martin Guy
  2015-12-28 22:11       ` Måns Rullgård
  0 siblings, 2 replies; 8+ messages in thread
From: Eric Wong @ 2015-12-26 21:18 UTC (permalink / raw)
  To: sox-devel

Martin Guy <martinwguy@gmail.com> wrote:
>
> "cmp" tells me that the PNG files are identical with/without FFTW

Thanks.  I'll note that if I get around to making a test suite.

> Yes, I've done relatively little configure.ac hacking and was hoping
> someone who knows better might improve things. Thanks for the heads-up
> on these possible issues

No problem.  I'm still not too experienced in this area,
either but I started forcing myself to learn in another project.

> On 26/12/2015, Eric Wong <normalperson@yhbt.net> wrote:
> >
> > I'd prefer if we could avoid CPP inside C functions entirely;
> > but that's a larger task and can be split into another patch.
> 
> Do you mean, where there is a configure option, have two separate
> functions, one with the HAVE_ code and one with the HAVE_NOT code?
> Yes, it's ugly, I agree.
> One alternative would be to use "if (HAVE_FFTW)" and let the compiler
> turn if(0) or if(1) into constant code removal.

I mean defining functions with common signatures so using
them is transparent to higher-level callers.

The #ifdef in the struct seems inevitable, but below I've created
shared_{start,stop} functions which do the same thing inside
their respective "start" and "stop" functions.  It helps identify
common code and even brought me to find a memory leak (see below).

diff --git a/src/spectrogram.c b/src/spectrogram.c
index fb7313e..1b82aad 100644
--- a/src/spectrogram.c
+++ b/src/spectrogram.c
@@ -70,7 +70,11 @@ typedef struct {
   sox_bool   using_stdout; /* output image to stdout */
 
   /* Shared work area */
+#if HAVE_FFTW
+  fftw_plan  fftw_plan;		/* Used if FFT_type == FFT_fftw */
+#else
   double     * shared, * * shared_ptr;
+#endif
 
   /* Per-channel work area */
   int        WORK;  /* Start of work area is marked by this dummy variable. */
@@ -84,9 +88,6 @@ typedef struct {
   double     block_norm, max;
   double     * magnitudes;	/* [(dft_size / 2) + 1] */
   float      * dBfs;
-#if HAVE_FFTW
-  fftw_plan  fftw_plan;		/* Used if FFT_type == FFT_fftw */
-#endif
 } priv_t;
 
 #define secs(cols) \
@@ -169,7 +170,6 @@ static int getopts(sox_effect_t * effp, int argc, char **argv)
   p->spectrum_points += 2;
   if (p->alt_palette)
     p->spectrum_points = min(p->spectrum_points, (int)alt_palette_len);
-  p->shared_ptr = &p->shared;
   if (!strcmp(p->out_name, "-")) {
     if (effp->global_info->global_info->stdout_in_use_by) {
       lsx_fail("stdout already in use by `%s'", effp->global_info->global_info->stdout_in_use_by);
@@ -204,8 +204,21 @@ static double make_window(priv_t * p, int end)
   return sum;
 }
 
-#if !HAVE_FFTW
+#if HAVE_FFTW
+/* Initialize the FFT routine */
+static void shared_start(sox_effect_t * effp)
+{
+  priv_t * p = (priv_t *)effp->priv;
+  /* We have one FFT plan per flow because the input/output arrays differ. */
+  p->fftw_plan = fftw_plan_r2r_1d(p->dft_size, p->dft_buf, p->dft_buf,
+                      FFTW_R2HC, FFTW_MEASURE);
+}
+
+static void shared_stop(priv_t * p) {
+  (void)p; /* nothing */
+}
 
+#else /* !HAVE_FFTW */
 static double * rdft_init(size_t n)
 {
   double * q = lsx_malloc(2 * (n / 2 + 1) * n * sizeof(*q)), * p = q;
@@ -227,7 +240,24 @@ static void rdft_p(double const * q, double const * in, double * out, int n)
   }
 }
 
-#endif /* HAVE_FFTW */
+static void shared_start(sox_effect_t * effp)
+{
+  priv_t * p = (priv_t *)effp->priv;
+
+  p->shared_ptr = &p->shared;
+  if (!is_p2(p->dft_size) && !effp->flow) {
+    if (p->y_size) {
+      p->shared = rdft_init((size_t)(p->dft_size));
+    }
+    lsx_safe_rdft(p->dft_size, 1, p->dft_buf);
+  }
+}
+
+static void shared_stop(priv_t * p)
+{
+  free(p->shared);
+}
+#endif /* !HAVE_FFTW */
 
 static int start(sox_effect_t * effp)
 {
@@ -277,10 +307,6 @@ static int start(sox_effect_t * effp)
 
   if (p->y_size) {
     p->dft_size = 2 * (p->y_size - 1);
-#if !HAVE_FFTW
-    if (!is_p2(p->dft_size) && !effp->flow)
-      p->shared = rdft_init((size_t)(p->dft_size));
-#endif
   } else {
    int y = max(32, (p->Y_size? p->Y_size : 550) / effp->in_signal.channels - 2);
    for (p->dft_size = 128; p->dft_size <= y; p->dft_size <<= 1);
@@ -292,15 +318,7 @@ static int start(sox_effect_t * effp)
   p->window	= lsx_calloc(p->dft_size + 1, sizeof(*(p->window)));
   p->magnitudes = lsx_calloc((p->dft_size / 2) + 1, sizeof(*(p->magnitudes)));
 
-  /* Initialize the FFT routine */
-#if HAVE_FFTW
-  /* We have one FFT plan per flow because the input/output arrays differ. */
-  p->fftw_plan = fftw_plan_r2r_1d(p->dft_size, p->dft_buf, p->dft_buf,
-                      FFTW_R2HC, FFTW_MEASURE);
-#else
-  if (is_p2(p->dft_size) && !effp->flow)
-    lsx_safe_rdft(p->dft_size, 1, p->dft_buf);
-#endif
+  shared_start(effp);
 
   lsx_debug("duration=%g x_size=%i pixels_per_sec=%g dft_size=%i", duration, p->x_size, pixels_per_sec, p->dft_size);
 
@@ -596,7 +614,7 @@ static int stop(sox_effect_t * effp) /* only called, by end(), on flow 0 */
   double      limit;
   float       autogain = 0.0;	/* Is changed if the -n flag was supplied */
 
-  free(p->shared);
+  shared_stop(p);
   if (p->using_stdout) {
     SET_BINARY_MODE(stdout);
     file = stdout;

Which also brings me to the question:

  Do we need to we need to teardown fftw_plan to avoid resource leaks?

valgrind --leak-check=full says we do:

18,040 (24 direct, 18,016 indirect) bytes in 1 blocks are definitely lost in loss record 204 of 206
   at 0x4C27ED6: memalign (vg_replace_malloc.c:755)
   by 0x5D67944: fftw_malloc_plain (in /usr/lib/x86_64-linux-gnu/libfftw3.so.3.3.2)
   by 0x5E2EFBE: fftw_mkapiplan (in /usr/lib/x86_64-linux-gnu/libfftw3.so.3.3.2)
   by 0x5E338E2: fftw_plan_many_r2r (in /usr/lib/x86_64-linux-gnu/libfftw3.so.3.3.2)
   by 0x5E33A28: fftw_plan_r2r (in /usr/lib/x86_64-linux-gnu/libfftw3.so.3.3.2)
   by 0x5E33948: fftw_plan_r2r_1d (in /usr/lib/x86_64-linux-gnu/libfftw3.so.3.3.2)
   by 0x4E8560F: start (spectrogram.c:298)
   by 0x4E5CF4F: sox_add_effect (effects.c:157)
   by 0x406EB0: add_effect (sox.c:708)
   by 0x403CED: main (sox.c:1073)

24,744 (24 direct, 24,720 indirect) bytes in 1 blocks are definitely lost in loss record 205 of 206
   at 0x4C27ED6: memalign (vg_replace_malloc.c:755)
   by 0x5D67944: fftw_malloc_plain (in /usr/lib/x86_64-linux-gnu/libfftw3.so.3.3.2)
   by 0x5E2EFBE: fftw_mkapiplan (in /usr/lib/x86_64-linux-gnu/libfftw3.so.3.3.2)
   by 0x5E338E2: fftw_plan_many_r2r (in /usr/lib/x86_64-linux-gnu/libfftw3.so.3.3.2)
   by 0x5E33A28: fftw_plan_r2r (in /usr/lib/x86_64-linux-gnu/libfftw3.so.3.3.2)
   by 0x5E33948: fftw_plan_r2r_1d (in /usr/lib/x86_64-linux-gnu/libfftw3.so.3.3.2)
   by 0x4E8560F: start (spectrogram.c:298)
   by 0x4E5D0D7: sox_add_effect (effects.c:202)
   by 0x406EB0: add_effect (sox.c:708)
   by 0x403CED: main (sox.c:1073)

Btw, I haven't touched "flow", that is an exercise for the reader :)

> Thanks again for stepping in to work on this. I was about to leave sox
> to die, given that its fathers have abandoned it. If we can get a last
> twitch of life out of them, it would be best they appoint you as its
> official maintainer.

No problem.  I hope they come back.  It is certainly a scary thought
if I were to become an official... anything :x

------------------------------------------------------------------------------

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: sox spectrogram patches
  2015-12-26 21:18     ` Eric Wong
@ 2015-12-26 22:14       ` Martin Guy
  2015-12-26 23:15         ` Eric Wong
  2015-12-28 22:11       ` Måns Rullgård
  1 sibling, 1 reply; 8+ messages in thread
From: Martin Guy @ 2015-12-26 22:14 UTC (permalink / raw)
  To: sox-devel

On 26/12/2015, Eric Wong <normalperson@yhbt.net> wrote:
> --- a/src/spectrogram.c
> +++ b/src/spectrogram.c
> @@ -70,7 +70,11 @@ typedef struct {
>    sox_bool   using_stdout; /* output image to stdout */
>
>    /* Shared work area */
> +#if HAVE_FFTW
> +  fftw_plan  fftw_plan;		/* Used if FFT_type == FFT_fftw */
> +#else
>    double     * shared, * * shared_ptr;
> +#endif
>
>    /* Per-channel work area */
>    int        WORK;  /* Start of work area is marked by this dummy variable.
> */
> @@ -84,9 +88,6 @@ typedef struct {
>    double     block_norm, max;
>    double     * magnitudes;	/* [(dft_size / 2) + 1] */
>    float      * dBfs;
> -#if HAVE_FFTW
> -  fftw_plan  fftw_plan;		/* Used if FFT_type == FFT_fftw */
> -#endif
>  } priv_t;
>
>  #define secs(cols) \
> +  /* We have one FFT plan per flow because the input/output arrays differ.
> */
> +  p->fftw_plan = fftw_plan_r2r_1d(p->dft_size, p->dft_buf, p->dft_buf,
> +                      FFTW_R2HC, FFTW_MEASURE);
> +}

This may not be right. FFTW3 plans depend on the memory addresses of
the input and output vectors, so if you have two FFTs that are exactly
the same except for the input and output buffer addresses, they need
separate plans.
In this case, the plan depends on dft_buf, which seems to be specific
to each flow, so I think you should have a separare plan for each
flow.

I don't know what "shared" is used for, so I just left it alone. AFter
all, it's ore important that the code work, than to save four bytes of
RAM and risk breaking it...

By the way, that "/* Used if FFT_type == FFT_fftw */" comment is
stale, dating back to when I had an option to choose the FFT algorithm
at runtime - sorry about that...

Thanks

    M

------------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: sox spectrogram patches
  2015-12-26 22:14       ` Martin Guy
@ 2015-12-26 23:15         ` Eric Wong
  0 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2015-12-26 23:15 UTC (permalink / raw)
  To: sox-devel

Martin Guy <martinwguy@gmail.com> wrote:
> On 26/12/2015, Eric Wong <normalperson@yhbt.net> wrote:
> > --- a/src/spectrogram.c
> > +++ b/src/spectrogram.c
> > @@ -70,7 +70,11 @@ typedef struct {
> >    sox_bool   using_stdout; /* output image to stdout */
> >
> >    /* Shared work area */
> > +#if HAVE_FFTW
> > +  fftw_plan  fftw_plan;		/* Used if FFT_type == FFT_fftw */
> > +#else
> >    double     * shared, * * shared_ptr;
> > +#endif
> >
> >    /* Per-channel work area */
> >    int        WORK;  /* Start of work area is marked by this dummy variable.
> > */
> > @@ -84,9 +88,6 @@ typedef struct {
> >    double     block_norm, max;
> >    double     * magnitudes;	/* [(dft_size / 2) + 1] */
> >    float      * dBfs;
> > -#if HAVE_FFTW
> > -  fftw_plan  fftw_plan;		/* Used if FFT_type == FFT_fftw */
> > -#endif
> >  } priv_t;
> >
> >  #define secs(cols) \
> > +  /* We have one FFT plan per flow because the input/output arrays differ.
> > */
> > +  p->fftw_plan = fftw_plan_r2r_1d(p->dft_size, p->dft_buf, p->dft_buf,
> > +                      FFTW_R2HC, FFTW_MEASURE);
> > +}
> 
> This may not be right. FFTW3 plans depend on the memory addresses of
> the input and output vectors, so if you have two FFTs that are exactly
> the same except for the input and output buffer addresses, they need
> separate plans.
> In this case, the plan depends on dft_buf, which seems to be specific
> to each flow, so I think you should have a separare plan for each
> flow.

I'm not entirely sure what you mean, here.  Basically the shared_start
function doesn't change anything for FFTW users; and I've verified the
PNGs of using FFTW with/without my work-in-progress patch via cmp(1)
with identical results.

Or did you mean the location of fftw_plan field inside priv_t?
That doesn't seem to matter, does it?

I don't know the FFT stuff and math stuff well at all,
so I could be wrong.

> I don't know what "shared" is used for, so I just left it alone. AFter
> all, it's ore important that the code work, than to save four bytes of
> RAM and risk breaking it...

Heh, I did break it --without-fftw :x  I wasn't even thinking about
the RAM usage, but about documenting how the structure would be used.

Here's my work-in-progress fix, output now matches the FFTW-using PNG:

diff --git a/src/spectrogram.c b/src/spectrogram.c
index 1b82aad..d1e98c5 100644
--- a/src/spectrogram.c
+++ b/src/spectrogram.c
@@ -72,9 +72,8 @@ typedef struct {
   /* Shared work area */
 #if HAVE_FFTW
   fftw_plan  fftw_plan;		/* Used if FFT_type == FFT_fftw */
-#else
-  double     * shared, * * shared_ptr;
 #endif
+  double     * shared, * * shared_ptr; /* unused for FFTW */
 
   /* Per-channel work area */
   int        WORK;  /* Start of work area is marked by this dummy variable. */
@@ -170,6 +169,7 @@ static int getopts(sox_effect_t * effp, int argc, char **argv)
   p->spectrum_points += 2;
   if (p->alt_palette)
     p->spectrum_points = min(p->spectrum_points, (int)alt_palette_len);
+  p->shared_ptr = &p->shared;
   if (!strcmp(p->out_name, "-")) {
     if (effp->global_info->global_info->stdout_in_use_by) {
       lsx_fail("stdout already in use by `%s'", effp->global_info->global_info->stdout_in_use_by);
@@ -244,12 +244,14 @@ static void shared_start(sox_effect_t * effp)
 {
   priv_t * p = (priv_t *)effp->priv;
 
-  p->shared_ptr = &p->shared;
-  if (!is_p2(p->dft_size) && !effp->flow) {
-    if (p->y_size) {
-      p->shared = rdft_init((size_t)(p->dft_size));
+  if (!effp->flow) {
+    if (!is_p2(p->dft_size)) {
+      if (p->y_size) {
+        p->shared = rdft_init((size_t)(p->dft_size));
+      }
+    } else {
+      lsx_safe_rdft(p->dft_size, 1, p->dft_buf);
     }
-    lsx_safe_rdft(p->dft_size, 1, p->dft_buf);
   }
 }
 

> By the way, that "/* Used if FFT_type == FFT_fftw */" comment is
> stale, dating back to when I had an option to choose the FFT algorithm
> at runtime - sorry about that...

No worries.  Better we catch our mistakes now instead of
throwing a nasty heap at Chris and the gang when they
decide to return :)

------------------------------------------------------------------------------

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: sox spectrogram patches
  2015-12-26 21:18     ` Eric Wong
  2015-12-26 22:14       ` Martin Guy
@ 2015-12-28 22:11       ` Måns Rullgård
  2015-12-29 22:26         ` Eric Wong
  1 sibling, 1 reply; 8+ messages in thread
From: Måns Rullgård @ 2015-12-28 22:11 UTC (permalink / raw)
  To: Eric Wong; +Cc: sox-devel

Eric Wong <normalperson@yhbt.net> writes:

>> Thanks again for stepping in to work on this. I was about to leave sox
>> to die, given that its fathers have abandoned it. If we can get a last
>> twitch of life out of them, it would be best they appoint you as its
>> official maintainer.
>
> No problem.  I hope they come back.  It is certainly a scary thought
> if I were to become an official... anything :x

I'd be willing to co-maintain it, should it come to that.

-- 
Måns Rullgård

------------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: sox spectrogram patches
  2015-12-28 22:11       ` Måns Rullgård
@ 2015-12-29 22:26         ` Eric Wong
  0 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2015-12-29 22:26 UTC (permalink / raw)
  To: Måns Rullgård; +Cc: sox-devel

Måns Rullgård <mans@mansr.com> wrote:
> I'd be willing to co-maintain it, should it come to that.

Thank you, much appreciated!

------------------------------------------------------------------------------
_______________________________________________
SoX-devel mailing list
SoX-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/sox-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2015-12-29 22:26 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-26  1:23 sox spectrogram patches Martin Guy
2015-12-26 10:43 ` Eric Wong
2015-12-26 13:37   ` Martin Guy
2015-12-26 21:18     ` Eric Wong
2015-12-26 22:14       ` Martin Guy
2015-12-26 23:15         ` Eric Wong
2015-12-28 22:11       ` Måns Rullgård
2015-12-29 22:26         ` Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/sox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).