sox-devel@lists.sourceforge.net unofficial mirror
 help / color / mirror / code / Atom feed
* [PATCH 1/6] Add SOX_ENCODING_DSD
@ 2015-09-16 14:16 Mans Rullgard
  2015-09-16 14:16 ` [PATCH 2/6] Add DSF file support Mans Rullgard
                   ` (4 more replies)
  0 siblings, 5 replies; 18+ messages in thread
From: Mans Rullgard @ 2015-09-16 14:16 UTC (permalink / raw)
  To: sox-devel

---
 src/formats.c | 2 ++
 src/sox.h     | 1 +
 2 files changed, 3 insertions(+)

diff --git a/src/formats.c b/src/formats.c
index f3efe76..4bd4ea5 100644
--- a/src/formats.c
+++ b/src/formats.c
@@ -145,6 +145,7 @@ static sox_encodings_info_t const s_sox_encodings_info[] = {
   {sox_encodings_lossy2, "CVSD"         , "CVSD"},
   {sox_encodings_lossy2, "LPC10"        , "LPC10"},
   {sox_encodings_lossy2, "Opus"         , "Opus"},
+  {sox_encodings_none  , "DSD"          , "Direct Stream Digital"},
 };
 
 assert_static(array_length(s_sox_encodings_info) == SOX_ENCODINGS,
@@ -169,6 +170,7 @@ unsigned sox_precision(sox_encoding_t encoding, unsigned bits_per_sample)
 
     case SOX_ENCODING_ALAW:       return bits_per_sample == 8? 13: 0;
     case SOX_ENCODING_ULAW:       return bits_per_sample == 8? 14: 0;
+    case SOX_ENCODING_DSD:        return bits_per_sample;
 
     case SOX_ENCODING_CL_ADPCM:   return bits_per_sample? 8: 0;
     case SOX_ENCODING_CL_ADPCM16: return bits_per_sample == 4? 13: 0;
diff --git a/src/sox.h b/src/sox.h
index 155742e..20339ab 100644
--- a/src/sox.h
+++ b/src/sox.h
@@ -594,6 +594,7 @@ typedef enum sox_encoding_t {
   SOX_ENCODING_CVSD      , /**< Continuously Variable Slope Delta modulation */
   SOX_ENCODING_LPC10     , /**< Linear Predictive Coding */
   SOX_ENCODING_OPUS      , /**< Opus compression */
+  SOX_ENCODING_DSD       , /**< Direct Stream Digital */
 
   SOX_ENCODINGS            /**< End of list marker */
 } sox_encoding_t;
-- 
2.5.2


------------------------------------------------------------------------------
Monitor Your Dynamic Infrastructure at Any Scale With Datadog!
Get real-time metrics from all of your servers, apps and tools
in one place.
SourceForge users - Click here to start your Free Trial of Datadog now!
http://pubads.g.doubleclick.net/gampad/clk?id=241902991&iu=/4140

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/6] Add DSF file support
  2015-09-16 14:16 [PATCH 1/6] Add SOX_ENCODING_DSD Mans Rullgard
@ 2015-09-16 14:16 ` Mans Rullgard
  2015-09-16 14:16 ` [PATCH 3/6] Add support for reading DSDIFF files Mans Rullgard
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 18+ messages in thread
From: Mans Rullgard @ 2015-09-16 14:16 UTC (permalink / raw)
  To: sox-devel

This adds support for reading and writing DSF files as specified by
http://dsd-guide.com/sites/default/files/white-papers/DSFFileFormatSpec_E.pdf

The 1-bit DSD samples are simply unpacked to sox_sample_t values of
maximum amplitude.  The "rate" filter can be used directly on this to
obtain usable PCM samples.

DSF files may include an ID3v2 metadata tag, which is not handled here.
---
 msvc10/LibSoX.vcxproj         |   1 +
 msvc10/LibSoX.vcxproj.filters |   3 +
 soxformat.7                   |   4 +
 src/Makefile.am               |   2 +-
 src/dsf.c                     | 378 ++++++++++++++++++++++++++++++++++++++++++
 src/formats.h                 |   1 +
 6 files changed, 388 insertions(+), 1 deletion(-)
 create mode 100644 src/dsf.c

diff --git a/msvc10/LibSoX.vcxproj b/msvc10/LibSoX.vcxproj
index b2e9d5a..e257831 100644
--- a/msvc10/LibSoX.vcxproj
+++ b/msvc10/LibSoX.vcxproj
@@ -144,6 +144,7 @@
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
     </ClCompile>
+    <ClCompile Include="..\src\dsf.c" />
     <ClCompile Include="..\src\example0.c">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
diff --git a/msvc10/LibSoX.vcxproj.filters b/msvc10/LibSoX.vcxproj.filters
index f39449c..6cc951c 100644
--- a/msvc10/LibSoX.vcxproj.filters
+++ b/msvc10/LibSoX.vcxproj.filters
@@ -597,5 +597,8 @@
     <ClCompile Include="..\src\downsample.c">
       <Filter>Effect Sources</Filter>
     </ClCompile>
+    <ClCompile Include="..\src\dsf.c">
+      <Filter>Format Sources</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>
diff --git a/soxformat.7 b/soxformat.7
index 5e53b9f..685b27e 100644
--- a/soxformat.7
+++ b/soxformat.7
@@ -304,6 +304,10 @@ Example containing only 2 stereo samples of silence:
     0.00012481278	0	0
 .EE
 .TP
+.B .dsf
+DSD Stream File.  Format defined by Sony for storing 1-bit DSD data.
+Commonly used for online distribution of audiophile recordings.
+.TP
 \&\fB.dvms\fR, \fB.vms\fR
 Used in Germany to compress speech audio for voice mail.
 A self-describing variant of
diff --git a/src/Makefile.am b/src/Makefile.am
index 7cceaaf..462d46d 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -121,7 +121,7 @@ libsox_la_SOURCES += raw-fmt.c s1-fmt.c s2-fmt.c s3-fmt.c \
   lu-fmt.c 8svx.c aiff-fmt.c aifc-fmt.c au.c avr.c cdr.c cvsd-fmt.c \
   dvms-fmt.c dat.c hcom.c htk.c maud.c prc.c sf.c smp.c \
   sounder.c soundtool.c sphere.c tx16w.c voc.c vox-fmt.c ima-fmt.c adpcm.c adpcm.h \
-  ima_rw.c ima_rw.h wav.c wve.c xa.c nulfile.c f4-fmt.c f8-fmt.c gsrt.c
+  ima_rw.c ima_rw.h wav.c wve.c xa.c nulfile.c f4-fmt.c f8-fmt.c gsrt.c dsf.c
 
 libsox_la_LIBADD += @GSM_LIBS@ @LIBGSM_LIBADD@
 libsox_la_LIBADD += @LPC10_LIBS@ @LIBLPC10_LIBADD@
diff --git a/src/dsf.c b/src/dsf.c
new file mode 100644
index 0000000..2f17c8d
--- /dev/null
+++ b/src/dsf.c
@@ -0,0 +1,378 @@
+/* DSF file support
+ *
+ * Copyright (c) 2015 Mans Rullgard <mans@mansr.com>
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/* File format specification available at
+ * http://dsd-guide.com/sites/default/files/white-papers/DSFFileFormatSpec_E.pdf
+ */
+
+#include <stdint.h>
+#include "sox_i.h"
+
+struct dsf {
+	uint64_t file_size;
+	uint64_t metadata;
+	uint32_t version;
+	uint32_t format_id;
+	uint32_t chan_type;
+	uint32_t chan_num;
+	uint32_t sfreq;
+	uint32_t bps;
+	uint64_t scount;
+	uint32_t block_size;
+
+	uint32_t block_pos;
+	uint32_t bit_pos;
+	uint8_t *block;
+	uint64_t read_samp;
+};
+
+#define TAG(a, b, c, d) ((a) | (b) << 8 | (c) << 16 | (d) << 24)
+
+#define DSF_TAG  TAG('D', 'S', 'D', ' ')
+#define FMT_TAG  TAG('f', 'm', 't', ' ')
+#define DATA_TAG TAG('d', 'a', 't', 'a')
+
+#define HEADER_SIZE (28 + 52 + 12)
+
+static int dsf_startread(sox_format_t *ft)
+{
+	struct dsf *dsf = ft->priv;
+	uint32_t magic;
+	uint64_t csize;
+	uint32_t v;
+
+	if (lsx_readdw(ft, &magic) || magic != DSF_TAG) {
+		lsx_fail_errno(ft, SOX_EHDR, "DSF signature not found");
+		return SOX_EHDR;
+	}
+
+	if (lsx_readqw(ft, &csize) || csize != 28) {
+		lsx_fail_errno(ft, SOX_EHDR, "invalid DSD chunk size");
+		return SOX_EHDR;
+	}
+
+	lsx_readqw(ft, &dsf->file_size);
+	lsx_readqw(ft, &dsf->metadata);
+
+	if (lsx_readdw(ft, &magic) || magic != FMT_TAG) {
+		lsx_fail_errno(ft, SOX_EHDR, "fmt chunk not found");
+		return SOX_EHDR;
+	}
+
+	if (lsx_readqw(ft, &csize) || csize != 52) {
+		lsx_fail_errno(ft, SOX_EHDR, "invalid fmt chunk size");
+		return SOX_EHDR;
+	}
+
+	if (lsx_readdw(ft, &dsf->version)   ||
+	    lsx_readdw(ft, &dsf->format_id) ||
+	    lsx_readdw(ft, &dsf->chan_type) ||
+	    lsx_readdw(ft, &dsf->chan_num)  ||
+	    lsx_readdw(ft, &dsf->sfreq)     ||
+	    lsx_readdw(ft, &dsf->bps)       ||
+	    lsx_readqw(ft, &dsf->scount)    ||
+	    lsx_readdw(ft, &dsf->block_size))
+		return SOX_EHDR;
+
+	if (lsx_readdw(ft, &v) || v) /* reserved */
+		return SOX_EHDR;
+
+	if (lsx_readdw(ft, &magic) || magic != DATA_TAG) {
+		lsx_fail_errno(ft, SOX_EHDR, "data chunk not found");
+		return SOX_EHDR;
+	}
+
+	if (lsx_readqw(ft, &csize) ||
+	    csize < 12 + dsf->block_size * dsf->chan_num) {
+		lsx_fail_errno(ft, SOX_EHDR, "invalid data chunk size");
+		return SOX_EHDR;
+	}
+
+	if (dsf->version != 1) {
+		lsx_fail_errno(ft, SOX_EHDR, "unknown format version %d",
+			       dsf->version);
+		return SOX_EHDR;
+	}
+
+	if (dsf->format_id != 0) {
+		lsx_fail_errno(ft, SOX_EFMT, "unknown format ID %d",
+			       dsf->format_id);
+		return SOX_EFMT;
+	}
+
+	if (dsf->chan_num < 1 || dsf->chan_num > 6) {
+		lsx_fail_errno(ft, SOX_EHDR, "invalid channel count %d",
+			       dsf->chan_num);
+		return SOX_EHDR;
+	}
+
+	if (dsf->bps != 1) {
+		lsx_fail_errno(ft, SOX_EFMT, "unsupported bit depth %d",
+			       dsf->bps);
+		return SOX_EFMT;
+	}
+
+	dsf->block = lsx_calloc(dsf->chan_num, (size_t)dsf->block_size);
+	if (!dsf->block)
+		return SOX_ENOMEM;
+
+	dsf->block_pos = dsf->block_size;
+
+	ft->signal.rate = dsf->sfreq;
+	ft->signal.channels = dsf->chan_num;
+	ft->signal.precision = 1;
+	ft->signal.length = dsf->scount * dsf->chan_num;
+
+	ft->encoding.encoding = SOX_ENCODING_DSD;
+	ft->encoding.bits_per_sample = 1;
+
+	return SOX_SUCCESS;
+}
+
+static void dsf_read_bits(struct dsf *dsf, sox_sample_t *buf, unsigned len)
+{
+	uint8_t *dsd = dsf->block + dsf->block_pos;
+	unsigned i, j;
+
+	for (i = 0; i < dsf->chan_num; i++) {
+		unsigned d = dsd[i * dsf->block_size];
+
+		for (j = 0; j < len; j++) {
+			buf[i + j * dsf->chan_num] = d & 1 ?
+				SOX_SAMPLE_MAX : -SOX_SAMPLE_MAX;
+			d >>= 1;
+		}
+	}
+}
+
+static size_t dsf_read(sox_format_t *ft, sox_sample_t *buf, size_t len)
+{
+	struct dsf *dsf = ft->priv;
+	uint64_t samp_left = dsf->scount - dsf->read_samp;
+	size_t rsamp = 0;
+
+	len /= dsf->chan_num;
+	len = min(len, samp_left);
+
+	while (len >= 8) {
+		if (dsf->block_pos >= dsf->block_size) {
+			size_t rlen = dsf->chan_num * dsf->block_size;
+			if (lsx_read_b_buf(ft, dsf->block, rlen) < rlen)
+				return rsamp * dsf->chan_num;
+			dsf->block_pos = 0;
+		}
+
+		dsf_read_bits(dsf, buf, 8);
+		buf += 8 * dsf->chan_num;
+		dsf->block_pos++;
+		rsamp += 8;
+		len -= 8;
+	}
+
+	if (len && samp_left < 8) {
+		dsf_read_bits(dsf, buf, (unsigned)len);
+		rsamp += len;
+	}
+
+	dsf->read_samp += rsamp;
+
+	return rsamp * dsf->chan_num;
+}
+
+static int dsf_stopread(sox_format_t *ft)
+{
+	struct dsf *dsf = ft->priv;
+
+	free(dsf->block);
+
+	return SOX_SUCCESS;
+}
+
+static int dsf_writeheader(sox_format_t *ft)
+{
+	struct dsf *dsf = ft->priv;
+	uint64_t data_size = dsf->file_size ? dsf->file_size - HEADER_SIZE : 0;
+
+	if (lsx_writedw(ft, DSF_TAG) ||
+	    lsx_writeqw(ft, (uint64_t)28) ||
+	    lsx_writeqw(ft, dsf->file_size) ||
+	    lsx_writeqw(ft, dsf->metadata) ||
+	    lsx_writedw(ft, FMT_TAG) ||
+	    lsx_writeqw(ft, (uint64_t)52) ||
+	    lsx_writedw(ft, dsf->version) ||
+	    lsx_writedw(ft, dsf->format_id) ||
+	    lsx_writedw(ft, dsf->chan_type) ||
+	    lsx_writedw(ft, dsf->chan_num) ||
+	    lsx_writedw(ft, dsf->sfreq) ||
+	    lsx_writedw(ft, dsf->bps) ||
+	    lsx_writeqw(ft, dsf->scount) ||
+	    lsx_writedw(ft, dsf->block_size) ||
+	    lsx_writedw(ft, 0) || /* reserved */
+	    lsx_writedw(ft, DATA_TAG) ||
+	    lsx_writeqw(ft, data_size + 12))
+		return SOX_EOF;
+
+	return SOX_SUCCESS;
+}
+
+static int dsf_startwrite(sox_format_t *ft)
+{
+	struct dsf *dsf = ft->priv;
+
+	dsf->version = 1;
+	dsf->format_id = 0;
+	dsf->chan_type = ft->signal.channels + (ft->signal.channels > 4);
+	dsf->chan_num = ft->signal.channels;
+	dsf->sfreq = ft->signal.rate;
+	dsf->bps = ft->encoding.bits_per_sample;
+	dsf->block_size = 4096;
+
+	dsf->block = lsx_calloc(dsf->chan_num, (size_t)dsf->block_size);
+	if (!dsf->block)
+		return SOX_ENOMEM;
+
+	return dsf_writeheader(ft);
+}
+
+static int dsf_write_buf(sox_format_t *ft)
+{
+	struct dsf *dsf = ft->priv;
+
+	if (dsf->block_pos == dsf->block_size) {
+		size_t wlen = dsf->chan_num * dsf->block_size;
+		if (lsx_write_b_buf(ft, dsf->block, wlen) < wlen)
+			return SOX_EOF;
+		dsf->block_pos = 0;
+		memset(dsf->block, 0, wlen);
+	}
+
+	return SOX_SUCCESS;
+}
+
+static void dsf_write_bits(struct dsf *dsf, const sox_sample_t *buf,
+			   unsigned start_bit, unsigned len)
+{
+	uint8_t *dsd = dsf->block + dsf->block_pos;
+	unsigned i, j;
+
+	for (i = 0; i < dsf->chan_num; i++) {
+		unsigned d = dsd[i * dsf->block_size];
+
+		for (j = start_bit; j < start_bit + len; j++) {
+			d |= (buf[i + j * dsf->chan_num] > 0) << j;
+		}
+
+		dsd[i * dsf->block_size] = d;
+	}
+}
+
+static size_t dsf_write(sox_format_t *ft, const sox_sample_t *buf, size_t len)
+{
+	struct dsf *dsf = ft->priv;
+	unsigned nchan = dsf->chan_num;
+	size_t wsamp = 0;
+
+	len /= nchan;
+
+	if (dsf->bit_pos) {
+		unsigned pre = min(len, 8 - dsf->bit_pos);
+
+		dsf_write_bits(dsf, buf, dsf->bit_pos, pre);
+		buf += pre * nchan;
+		wsamp += pre;
+		len -= pre;
+		dsf->bit_pos += pre;
+
+		if (dsf->bit_pos == 8) {
+			dsf->block_pos++;
+			dsf->bit_pos = 0;
+			if (dsf_write_buf(ft))
+				return 0;
+		}
+	}
+
+	while (len >= 8) {
+		dsf_write_bits(dsf, buf, 0, 8);
+		buf += 8 * nchan;
+		dsf->block_pos++;
+		wsamp += 8;
+		len -= 8;
+
+		if (dsf_write_buf(ft))
+			return wsamp * nchan;
+	}
+
+	if (len) {
+		dsf_write_bits(dsf, buf, 0, (unsigned)len);
+		wsamp += len;
+		dsf->bit_pos = len;
+	}
+
+	dsf->scount += wsamp;
+
+	return wsamp * nchan;
+}
+
+static int dsf_stopwrite(sox_format_t *ft)
+{
+	struct dsf *dsf = ft->priv;
+	int err = SOX_SUCCESS;
+
+	if (dsf->bit_pos)
+		dsf->block_pos++;
+
+	if (dsf->block_pos) {
+		size_t wlen = dsf->chan_num * dsf->block_size;
+		if (lsx_write_b_buf(ft, dsf->block, wlen) < wlen)
+			err = SOX_EOF;
+	}
+
+	free(dsf->block);
+
+	if (err)
+		return err;
+
+	dsf->file_size = lsx_tell(ft);
+
+	if (lsx_seeki(ft, (off_t)0, SEEK_SET)) {
+		lsx_fail_errno(ft, SOX_EOF,
+			       "error rewinding output to update header");
+		return SOX_EOF;
+	}
+
+	return dsf_writeheader(ft);
+}
+
+LSX_FORMAT_HANDLER(dsf)
+{
+	static char const * const names[] = { "dsf", NULL };
+	static unsigned const write_encodings[] = {
+		SOX_ENCODING_DSD, 1, 0,
+		0 };
+	static sox_format_handler_t const handler = {
+		SOX_LIB_VERSION_CODE,
+		"Container for DSD data",
+		names, SOX_FILE_LIT_END,
+		dsf_startread, dsf_read, dsf_stopread,
+		dsf_startwrite, dsf_write, dsf_stopwrite,
+		NULL, write_encodings, NULL,
+		sizeof(struct dsf)
+	};
+	return &handler;
+}
diff --git a/src/formats.h b/src/formats.h
index a42ce27..4701efd 100644
--- a/src/formats.h
+++ b/src/formats.h
@@ -26,6 +26,7 @@
   FORMAT(cvsd)
   FORMAT(cvu)
   FORMAT(dat)
+  FORMAT(dsf)
   FORMAT(dvms)
   FORMAT(f4)
   FORMAT(f8)
-- 
2.5.2


------------------------------------------------------------------------------
Monitor Your Dynamic Infrastructure at Any Scale With Datadog!
Get real-time metrics from all of your servers, apps and tools
in one place.
SourceForge users - Click here to start your Free Trial of Datadog now!
http://pubads.g.doubleclick.net/gampad/clk?id=241902991&iu=/4140

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 3/6] Add support for reading DSDIFF files
  2015-09-16 14:16 [PATCH 1/6] Add SOX_ENCODING_DSD Mans Rullgard
  2015-09-16 14:16 ` [PATCH 2/6] Add DSF file support Mans Rullgard
@ 2015-09-16 14:16 ` Mans Rullgard
  2015-09-16 14:16 ` [PATCH 4/6] Add macros for increasing data alignment Mans Rullgard
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 18+ messages in thread
From: Mans Rullgard @ 2015-09-16 14:16 UTC (permalink / raw)
  To: sox-devel

This adds support for reading DSDIFF files containing uncompressed
DSD data.  1-bit samples are unpacked to sox_sample_t values of
maximum amplitude.  Optional file elements are ignored.
---
 msvc10/LibSoX.vcxproj         |   1 +
 msvc10/LibSoX.vcxproj.filters |   3 +
 soxformat.7                   |   5 +
 src/Makefile.am               |   3 +-
 src/dsdiff.c                  | 209 ++++++++++++++++++++++++++++++++++++++++++
 src/formats.h                 |   1 +
 6 files changed, 221 insertions(+), 1 deletion(-)
 create mode 100644 src/dsdiff.c

diff --git a/msvc10/LibSoX.vcxproj b/msvc10/LibSoX.vcxproj
index e257831..c38a764 100644
--- a/msvc10/LibSoX.vcxproj
+++ b/msvc10/LibSoX.vcxproj
@@ -144,6 +144,7 @@
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
     </ClCompile>
+    <ClCompile Include="..\src\dsdiff.c" />
     <ClCompile Include="..\src\dsf.c" />
     <ClCompile Include="..\src\example0.c">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
diff --git a/msvc10/LibSoX.vcxproj.filters b/msvc10/LibSoX.vcxproj.filters
index 6cc951c..bed7cb2 100644
--- a/msvc10/LibSoX.vcxproj.filters
+++ b/msvc10/LibSoX.vcxproj.filters
@@ -597,6 +597,9 @@
     <ClCompile Include="..\src\downsample.c">
       <Filter>Effect Sources</Filter>
     </ClCompile>
+    <ClCompile Include="..\src\dsdiff.c">
+      <Filter>Format Sources</Filter>
+    </ClCompile>
     <ClCompile Include="..\src\dsf.c">
       <Filter>Format Sources</Filter>
     </ClCompile>
diff --git a/soxformat.7 b/soxformat.7
index 685b27e..645910e 100644
--- a/soxformat.7
+++ b/soxformat.7
@@ -304,6 +304,11 @@ Example containing only 2 stereo samples of silence:
     0.00012481278	0	0
 .EE
 .TP
+.B .dff
+Direct Stream Digital Interchange File Format (DSDIFF). Format defined
+by Philips for storing 1-bit DSD data.  Used in SACD mastering and
+occasionally for online distribution.
+.TP
 .B .dsf
 DSD Stream File.  Format defined by Sony for storing 1-bit DSD data.
 Commonly used for online distribution of audiophile recordings.
diff --git a/src/Makefile.am b/src/Makefile.am
index 462d46d..e047580 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -121,7 +121,8 @@ libsox_la_SOURCES += raw-fmt.c s1-fmt.c s2-fmt.c s3-fmt.c \
   lu-fmt.c 8svx.c aiff-fmt.c aifc-fmt.c au.c avr.c cdr.c cvsd-fmt.c \
   dvms-fmt.c dat.c hcom.c htk.c maud.c prc.c sf.c smp.c \
   sounder.c soundtool.c sphere.c tx16w.c voc.c vox-fmt.c ima-fmt.c adpcm.c adpcm.h \
-  ima_rw.c ima_rw.h wav.c wve.c xa.c nulfile.c f4-fmt.c f8-fmt.c gsrt.c dsf.c
+  ima_rw.c ima_rw.h wav.c wve.c xa.c nulfile.c f4-fmt.c f8-fmt.c gsrt.c dsf.c \
+  dsdiff.c
 
 libsox_la_LIBADD += @GSM_LIBS@ @LIBGSM_LIBADD@
 libsox_la_LIBADD += @LPC10_LIBS@ @LIBLPC10_LIBADD@
diff --git a/src/dsdiff.c b/src/dsdiff.c
new file mode 100644
index 0000000..8d3ccc3
--- /dev/null
+++ b/src/dsdiff.c
@@ -0,0 +1,209 @@
+/* DSDIFF file support
+ *
+ * Copyright (c) 2015 Mans Rullgard <mans@mansr.com>
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/* File format specification available at
+ * http://dsd-guide.com/sites/default/files/white-papers/DSDIFF_1.5_Spec.pdf
+ */
+
+#include <stdint.h>
+#include "sox_i.h"
+
+struct dsdiff {
+	uint32_t sample_rate;
+	uint16_t num_channels;
+	uint64_t data_size;
+	uint8_t *buf;
+};
+
+#define ID(a, b, c, d) ((a) << 24 | (b) << 16 | (c) << 8 | (d))
+
+static int dff_startread(sox_format_t *ft)
+{
+	struct dsdiff *dff = ft->priv;
+	uint32_t ckid;
+	uint32_t cktype;
+	uint64_t cksize;
+	uint64_t f8size;
+	uint32_t fver;
+	uint64_t spos, epos;
+
+	if (lsx_readdw(ft, &ckid) || ckid != ID('F', 'R', 'M', '8')) {
+		lsx_fail_errno(ft, SOX_EHDR, "FRM8 tag not found");
+		return SOX_EHDR;
+	}
+
+	if (lsx_readqw(ft, &f8size)) {
+		lsx_fail_errno(ft, SOX_EHDR, "error reading chunk size");
+		return SOX_EHDR;
+	}
+
+	if (lsx_readdw(ft, &cktype) || cktype != ID('D', 'S', 'D', ' ')) {
+		lsx_fail_errno(ft, SOX_EHDR, "DSD tag not found");
+		return SOX_EHDR;
+	}
+
+	do {
+		if (lsx_readdw(ft, &ckid) || lsx_readqw(ft, &cksize)) {
+			lsx_fail_errno(ft, SOX_EHDR, "read error");
+			return SOX_EHDR;
+		}
+
+		spos = lsx_tell(ft);
+
+		switch (ckid) {
+		case ID('F', 'V', 'E', 'R'):
+			if (cksize != 4)
+				return SOX_EHDR;
+			if (lsx_readdw(ft, &fver))
+				return SOX_EHDR;
+			if (fver >> 24 != 1) {
+				lsx_fail_errno(ft, SOX_EHDR, "unknown version");
+				return SOX_EHDR;
+			}
+			break;
+
+		case ID('P', 'R', 'O', 'P'):
+			if (cksize < 4)
+				return SOX_EHDR;
+			if (lsx_readdw(ft, &cktype))
+				return SOX_EHDR;
+			if (cktype == ID('S', 'N', 'D', ' '))
+				cksize = 4;
+			break;
+
+		case ID('F', 'S', ' ', ' '):
+			if (cksize < 4)
+				return SOX_EHDR;
+			if (lsx_readdw(ft, &dff->sample_rate))
+				return SOX_EHDR;
+			break;
+
+		case ID('C', 'H', 'N', 'L'):
+			if (cksize < 4)
+				return SOX_EHDR;
+			if (lsx_readw(ft, &dff->num_channels))
+				return SOX_EHDR;
+			break;
+
+		case ID('C', 'M', 'P', 'R'):
+			if (cksize < 4)
+				return SOX_EHDR;
+			if (lsx_readdw(ft, &cktype))
+				return SOX_EHDR;
+			if (cktype != ID('D', 'S', 'D', ' ')) {
+				lsx_fail_errno(ft, SOX_EHDR,
+					       "unsupported compression");
+				return SOX_EHDR;
+			}
+			break;
+
+		case ID('D', 'S', 'D', ' '):
+			if (cksize < 8)
+				return SOX_EHDR;
+			dff->data_size = cksize;
+			cksize = 0;
+			break;
+		}
+
+		cksize += cksize & 1;
+		epos = lsx_tell(ft);
+		if (epos != spos + cksize)
+			lsx_seeki(ft, (off_t)(spos + cksize - epos), SEEK_CUR);
+	} while (cksize && epos < f8size);
+
+	if (!dff->sample_rate || !dff->num_channels || !dff->data_size) {
+		lsx_fail_errno(ft, SOX_EHDR, "invalid file header");
+		return SOX_EHDR;
+	}
+
+	if (ckid != ID('D', 'S', 'D', ' ')) {
+		lsx_fail_errno(ft, SOX_EHDR, "unsupported data type");
+		return SOX_EHDR;
+	}
+
+	dff->buf = lsx_malloc((size_t)dff->num_channels);
+	if (!dff->buf)
+		return SOX_ENOMEM;
+
+	ft->signal.rate = dff->sample_rate;
+	ft->signal.channels = dff->num_channels;
+	ft->signal.precision = 1;
+	ft->signal.length = dff->data_size * 8;
+
+	ft->encoding.encoding = SOX_ENCODING_DSD;
+	ft->encoding.bits_per_sample = 1;
+
+	return SOX_SUCCESS;
+}
+
+static size_t dff_read(sox_format_t *ft, sox_sample_t *buf, size_t len)
+{
+	struct dsdiff *dff = ft->priv;
+	size_t nc = dff->num_channels;
+	size_t rsamp = 0;
+	unsigned i, j;
+
+	len /= nc;
+
+	while (len >= 8) {
+		if (lsx_read_b_buf(ft, dff->buf, nc) < nc)
+			return rsamp * nc;
+
+		for (i = 0; i < nc; i++) {
+			unsigned d = dff->buf[i];
+
+			for (j = 0; j < 8; j++) {
+				buf[i + j * nc] = d & 128 ?
+					SOX_SAMPLE_MAX : -SOX_SAMPLE_MAX;
+				d <<= 1;
+			}
+		}
+
+		buf += 8 * nc;
+		rsamp += 8;
+		len -= 8;
+	}
+
+	return rsamp * nc;
+}
+
+static int dff_stopread(sox_format_t *ft)
+{
+	struct dsdiff *dff = ft->priv;
+
+	free(dff->buf);
+
+	return SOX_SUCCESS;
+}
+
+LSX_FORMAT_HANDLER(dsdiff)
+{
+	static char const * const names[] = { "dff", NULL };
+	static unsigned const write_encodings[] = { 0 };
+	static sox_format_handler_t const handler = {
+		SOX_LIB_VERSION_CODE,
+		"Direct Stream Digital Interchange File Format (DSDIFF)",
+		names, SOX_FILE_BIG_END,
+		dff_startread, dff_read, dff_stopread,
+		NULL, NULL, NULL,
+		NULL, write_encodings, NULL,
+		sizeof(struct dsdiff)
+	};
+	return &handler;
+}
diff --git a/src/formats.h b/src/formats.h
index 4701efd..eb33577 100644
--- a/src/formats.h
+++ b/src/formats.h
@@ -26,6 +26,7 @@
   FORMAT(cvsd)
   FORMAT(cvu)
   FORMAT(dat)
+  FORMAT(dsdiff)
   FORMAT(dsf)
   FORMAT(dvms)
   FORMAT(f4)
-- 
2.5.2


------------------------------------------------------------------------------
Monitor Your Dynamic Infrastructure at Any Scale With Datadog!
Get real-time metrics from all of your servers, apps and tools
in one place.
SourceForge users - Click here to start your Free Trial of Datadog now!
http://pubads.g.doubleclick.net/gampad/clk?id=241902991&iu=/4140

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 4/6] Add macros for increasing data alignment
  2015-09-16 14:16 [PATCH 1/6] Add SOX_ENCODING_DSD Mans Rullgard
  2015-09-16 14:16 ` [PATCH 2/6] Add DSF file support Mans Rullgard
  2015-09-16 14:16 ` [PATCH 3/6] Add support for reading DSDIFF files Mans Rullgard
@ 2015-09-16 14:16 ` Mans Rullgard
  2015-12-20 12:41   ` Eric Wong
  2015-09-16 14:16 ` [PATCH 5/6] Add a sigma-delta modulator for DSD encoding Mans Rullgard
  2015-09-16 14:16 ` [PATCH 6/6] Add DSD over PCM (dop) effect Mans Rullgard
  4 siblings, 1 reply; 18+ messages in thread
From: Mans Rullgard @ 2015-09-16 14:16 UTC (permalink / raw)
  To: sox-devel

This adds the LSX_ALIGN() macro to request a specified alignment of
static data and struct definitions using GCC and MSVC attributes.
Also check for the aligned_alloc() function and alias to Microsoft's
_aligned_malloc() if necessary.
---
 configure.ac |  2 +-
 src/util.h   | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 23138a9..08f80e7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -207,7 +207,7 @@ AC_HEADER_STDC
 AC_CHECK_HEADERS(fcntl.h unistd.h byteswap.h sys/stat.h sys/time.h sys/timeb.h sys/types.h sys/utsname.h termios.h glob.h fenv.h)
 
 dnl Checks for library functions.
-AC_CHECK_FUNCS(strcasecmp strdup popen vsnprintf gettimeofday mkstemp fmemopen)
+AC_CHECK_FUNCS(strcasecmp strdup popen vsnprintf gettimeofday mkstemp fmemopen aligned_alloc)
 
 dnl Check if math library is needed.
 AC_SEARCH_LIBS([pow], [m])
diff --git a/src/util.h b/src/util.h
index 87970bb..b5cc9b8 100644
--- a/src/util.h
+++ b/src/util.h
@@ -184,6 +184,26 @@
 #define field_offset(type, field) ((size_t)&(((type *)0)->field))
 #define unless(x) if (!(x))
 
+/*----------------------------- Data alignment -------------------------------*/
+
+#ifdef __GNUC__
+#define LSX_ALIGN(n) __attribute__((aligned(n)))
+#elif defined _MSC_VER
+#define LSX_ALIGN(n) __declspec(align(n))
+#else
+#define LSX_ALIGN(n)
+#endif
+
+#ifdef HAVE_ALIGNED_ALLOC
+  #define aligned_free(p) free(p)
+#elif defined _MSC_VER
+  #define aligned_alloc(a, s) _aligned_malloc(s, a)
+  #define aligned_free(p) _aligned_free(p)
+#else
+  #define aligned_alloc(a, s) malloc(s)
+  #define aligned_free(p) free(p)
+#endif
+
 /*------------------------------- Maths stuff --------------------------------*/
 
 #include <math.h>
-- 
2.5.2


------------------------------------------------------------------------------
Monitor Your Dynamic Infrastructure at Any Scale With Datadog!
Get real-time metrics from all of your servers, apps and tools
in one place.
SourceForge users - Click here to start your Free Trial of Datadog now!
http://pubads.g.doubleclick.net/gampad/clk?id=241902991&iu=/4140

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 5/6] Add a sigma-delta modulator for DSD encoding
  2015-09-16 14:16 [PATCH 1/6] Add SOX_ENCODING_DSD Mans Rullgard
                   ` (2 preceding siblings ...)
  2015-09-16 14:16 ` [PATCH 4/6] Add macros for increasing data alignment Mans Rullgard
@ 2015-09-16 14:16 ` Mans Rullgard
  2015-10-03 22:31   ` Eric Wong
  2015-09-16 14:16 ` [PATCH 6/6] Add DSD over PCM (dop) effect Mans Rullgard
  4 siblings, 1 reply; 18+ messages in thread
From: Mans Rullgard @ 2015-09-16 14:16 UTC (permalink / raw)
  To: sox-devel

This adds a sigma-delta modulator for 1-bit (DSD) encoding.  It is
invoked by the "dither" effect when the output precision is 1-bit or
manually with choice of the following noise-shaping filters:

  fast   Reasonably good quality while fast enough for real-time
         operation.  This is the default.

  hq     Lower noise and distortion than "fast" at the expense of
         being much slower.

  audiophile
         Somewhat better quality than "hq" and almost twice as slow.

  goldenear
         Slightly higher quality than "audiophile" and considerably
         slower.

Prior to this encoder, the sampling rate should be increased, e.g. by
means of the "rate" effect.
---
 msvc10/LibSoX.vcxproj         |   1 +
 msvc10/LibSoX.vcxproj.filters |   3 +
 sox.1                         |  39 +++
 src/Makefile.am               |   4 +-
 src/dither.c                  |  46 ++-
 src/effects.h                 |   1 +
 src/sdm.c                     | 666 ++++++++++++++++++++++++++++++++++++++++++
 src/sdm.h                     |  42 +++
 src/sdm_x86.h                 | 235 +++++++++++++++
 9 files changed, 1025 insertions(+), 12 deletions(-)
 create mode 100644 src/sdm.c
 create mode 100644 src/sdm.h
 create mode 100644 src/sdm_x86.h

diff --git a/msvc10/LibSoX.vcxproj b/msvc10/LibSoX.vcxproj
index c38a764..4a475cf 100644
--- a/msvc10/LibSoX.vcxproj
+++ b/msvc10/LibSoX.vcxproj
@@ -174,6 +174,7 @@
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
     </ClCompile>
+    <ClCompile Include="..\src\sdm.c" />
     <ClCompile Include="..\src\skeleff.c">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
diff --git a/msvc10/LibSoX.vcxproj.filters b/msvc10/LibSoX.vcxproj.filters
index bed7cb2..01bda8c 100644
--- a/msvc10/LibSoX.vcxproj.filters
+++ b/msvc10/LibSoX.vcxproj.filters
@@ -603,5 +603,8 @@
     <ClCompile Include="..\src\dsf.c">
       <Filter>Format Sources</Filter>
     </ClCompile>
+    <ClCompile Include="..\src\sdm.c">
+      <Filter>Effect Sources</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>
diff --git a/sox.1 b/sox.1
index 2c4ca47..98a84a1 100644
--- a/sox.1
+++ b/sox.1
@@ -2012,6 +2012,10 @@ option is not given, then the pseudo-random number generator used to
 generate the white noise will be `reseeded', i.e. the generated noise
 will be different between invocations.
 .SP
+If the target precision is 1-bit, the \fBsdm\fR effect is applied
+automatically with default settings. Invoke it manually to control its
+options.
+.SP
 This effect should not be followed by any other effect that
 affects the audio.
 .SP
@@ -2991,6 +2995,41 @@ The sampling rate must be one of: 44\*d1, 48, 88\*d2, 96 kHz.
 .SP
 This effect supports the \fB\-\-plot\fR global option.
 .TP
+\fBsdm\fR [\fB\-f \fIfilter\fR] [\fB\-t \fIorder\fR] [\fB\-n \fInum\fR] [\fB-l \fIlatency\fR]
+Apply a 1-bit sigma-delta modulator producing DSD output.  The input
+should be previously upsampled, e.g. with the \fBrate\fR effect, to a
+high rate, 2\*d8224MHz for DSD64.  The \fB\-f\fR option selects the
+noise-shaping filter from the following list:
+.RS
+.IP \fBfast\fR
+Reasonably good quality while fast enough for real-time operation.
+This is the default.
+.IP \fBhq\fR
+Lower noise and distortion than \fBfast\fR at the expense of being
+much slower.
+.IP \fBaudiophile\fR
+Somewhat better quality than \fBhq\fR and almost twice as slow.
+.IP \fBgoldenear\fR
+Slightly higher quality than \fBaudiophile\fR and considerably slower.
+.RE
+.TP
+\ 
+All but the \fBfast\fR filter perform a partial trellis/viterbi search
+with preset parameters which can be overridden using the following
+options:
+.RS
+.IP \fB\-t \fIorder\fR
+Set the trellis order, max 32.
+.IP \fB\-n \fInum\fR
+Set the number of paths to consider, max 32.
+.IP \fB\-n \fIlatency\fR
+Set the output latency, max 2048.
+.RE
+.TP
+\ 
+The result of using these overrides is hard to predict and can include
+high noise levels or instability.  Caution is advised.
+.TP
 \fBsilence \fR[\fB\-l\fR] \fIabove-periods\fR [\fIduration threshold\fR[\fBd\fR\^|\^\fB%\fR]
 [\fIbelow-periods duration threshold\fR[\fBd\fR\^|\^\fB%\fR]]
 .SP
diff --git a/src/Makefile.am b/src/Makefile.am
index e047580..ca7bae7 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -76,8 +76,8 @@ libsox_la_SOURCES += \
 	mcompand_xover.h noiseprof.c noisered.c \
 	noisered.h output.c overdrive.c pad.c phaser.c rate.c \
 	rate_filters.h rate_half_fir.h rate_poly_fir0.h rate_poly_fir.h \
-	remix.c repeat.c reverb.c reverse.c silence.c sinc.c skeleff.c \
-	speed.c splice.c stat.c stats.c stretch.c swap.c \
+	remix.c repeat.c reverb.c reverse.c sdm.c sdm.h sdm_x86.h silence.c \
+	sinc.c skeleff.c speed.c splice.c stat.c stats.c stretch.c swap.c \
 	synth.c tempo.c tremolo.c trim.c upsample.c vad.c vol.c \
 	ignore-warning.h
 if HAVE_PNG
diff --git a/src/dither.c b/src/dither.c
index 3351615..2af827a 100644
--- a/src/dither.c
+++ b/src/dither.c
@@ -20,6 +20,7 @@
 #endif
 
 #include "sox_i.h"
+#include "sdm.h"
 #include <assert.h>
 
 #undef RANQD1
@@ -263,6 +264,7 @@ typedef struct {
   double const  * coefs;
   sox_bool      dither_off;
   sox_effect_handler_flow flow;
+  sdm_t         *sdm;
 } priv_t;
 
 #define CONVOLVE _ _ _ _
@@ -292,6 +294,13 @@ typedef struct {
 #define N 20
 #include "dither.h"
 
+static int flow_sdm(sox_effect_t * effp, const sox_sample_t * ibuf,
+    sox_sample_t * obuf, size_t * isamp, size_t * osamp)
+{
+  priv_t * p = (priv_t *)effp->priv;
+  return sdm_process(p->sdm, ibuf, obuf, isamp, osamp);
+}
+
 static int flow_no_shape(sox_effect_t * effp, const sox_sample_t * ibuf,
     sox_sample_t * obuf, size_t * isamp, size_t * osamp)
 {
@@ -364,17 +373,17 @@ static int start(sox_effect_t * effp)
   if (effp->in_signal.precision <= p->prec || p->prec > 24)
     return SOX_EFF_NULL;   /* Dithering not needed at this resolution */
 
-  if (p->prec == 1) {
-    /* The general dither routines don't work in this case, so notify
-       user and leave it at that for now.
-       TODO: Some special-case treatment of 1-bit noise shaping will be
-         needed for meaningful DSD write support. */
-    lsx_warn("Dithering/noise-shaping to 1 bit is currently not supported.");
-    return SOX_EFF_NULL;
-  }
-
   effp->out_signal.precision = p->prec;
 
+  if (p->prec == 1) {
+    p->sdm = sdm_init(NULL, 0, 0, 0);
+    if (!p->sdm)
+      return SOX_EOF;
+
+    p->flow = flow_sdm;
+    return SOX_SUCCESS;
+  }
+
   p->flow = flow_no_shape;
   if (p->filter_name) {
     filter_t const * f;
@@ -418,6 +427,23 @@ static int flow(sox_effect_t * effp, const sox_sample_t * ibuf,
   return p->flow(effp, ibuf, obuf, isamp, osamp);
 }
 
+static int drain(sox_effect_t * effp, sox_sample_t * obuf, size_t * osamp)
+{
+  priv_t * p = (priv_t *)effp->priv;
+  if (p->sdm)
+    return sdm_drain(p->sdm, obuf, osamp);
+  *osamp = 0;
+  return SOX_SUCCESS;
+}
+
+static int stop(sox_effect_t * effp)
+{
+  priv_t * p = (priv_t *)effp->priv;
+  if (p->sdm)
+    sdm_close(p->sdm);
+  return SOX_SUCCESS;
+}
+
 sox_effect_handler_t const * lsx_dither_effect_fn(void)
 {
   static sox_effect_handler_t handler = {
@@ -430,7 +456,7 @@ sox_effect_handler_t const * lsx_dither_effect_fn(void)
     "\n           shibata, low-shibata, high-shibata."
     "\n  -a       Automatically turn on & off dithering as needed (use with caution!)"
     "\n  -p bits  Override the target sample precision",
-    SOX_EFF_PREC, getopts, start, flow, 0, 0, 0, sizeof(priv_t)
+    SOX_EFF_PREC, getopts, start, flow, drain, stop, 0, sizeof(priv_t)
   };
   return &handler;
 }
diff --git a/src/effects.h b/src/effects.h
index 450a5c2..e8987de 100644
--- a/src/effects.h
+++ b/src/effects.h
@@ -66,6 +66,7 @@
   EFFECT(reverb)
   EFFECT(reverse)
   EFFECT(riaa)
+  EFFECT(sdm)
   EFFECT(silence)
   EFFECT(sinc)
 #ifdef HAVE_PNG
diff --git a/src/sdm.c b/src/sdm.c
new file mode 100644
index 0000000..4770ba7
--- /dev/null
+++ b/src/sdm.c
@@ -0,0 +1,666 @@
+/* Sigma-Delta modulator
+ * Copyright (c) 2015 Mans Rullgard <mans@mansr.com>
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/*
+ * References:
+ *
+ * Derk Reefman, Erwin Janssen. 2002.
+ * "Signal processing for Direct Stream Digital: A tutorial for
+ * digital Sigma Delta modulation and 1-bit digital audio processing"
+ * http://www.emmlabs.com/pdf/papers/DerkSigmaDelta.pdf
+ *
+ * P.J.A. Harpe. 2003.
+ * "Trellis-type Sigma Delta Modulators for Super Audio CD applications"
+ * http://www.pieterharpe.nl/docs/report_trunc.pdf
+ *
+ * Richard Schreier. 2000-2011.
+ * "Delta Sigma Toolbox"
+ * http://www.mathworks.com/matlabcentral/fileexchange/19-delta-sigma-toolbox
+ */
+
+#define _ISOC11_SOURCE
+
+#include "sox_i.h"
+#include "sdm.h"
+
+#define MAX_FILTER_ORDER 8
+#define PATH_HASH_SIZE 128
+#define PATH_HASH_MASK (PATH_HASH_SIZE - 1)
+
+typedef struct {
+  const double  a[MAX_FILTER_ORDER];
+  const double  g[MAX_FILTER_ORDER];
+  int32_t       order;
+  double        scale;
+  const char   *name;
+  int           trellis_order;
+  int           trellis_num;
+  int           trellis_lat;
+} LSX_ALIGN(32) sdm_filter_t;
+
+typedef struct sdm_state {
+  double        state[MAX_FILTER_ORDER];
+  double        cost;
+  uint32_t      path;
+  uint8_t       next;
+  uint8_t       hist;
+  uint8_t       hist_used;
+  struct sdm_state *parent;
+  struct sdm_state *path_list;
+} LSX_ALIGN(32) sdm_state_t;
+
+typedef struct {
+  sdm_state_t   sdm[2 * SDM_TRELLIS_MAX_NUM];
+  sdm_state_t  *act[SDM_TRELLIS_MAX_NUM];
+} sdm_trellis_t;
+
+struct sdm {
+  sdm_trellis_t trellis[2];
+  sdm_state_t  *path_hash[PATH_HASH_SIZE];
+  uint8_t       hist_free[2 * SDM_TRELLIS_MAX_NUM];
+  unsigned      hist_fnum;
+  uint32_t      trellis_mask;
+  uint32_t      trellis_num;
+  uint32_t      trellis_lat;
+  unsigned      num_cands;
+  unsigned      pos;
+  unsigned      pending;
+  unsigned      draining;
+  unsigned      idx;
+  const sdm_filter_t *filter;
+  double        prev_y;
+  uint64_t      conv_fail;
+  uint8_t       hist[2 * SDM_TRELLIS_MAX_NUM][SDM_TRELLIS_MAX_LAT / 8];
+};
+
+static sdm_filter_t sdm_filter_fast = {
+  {
+    8.11979821108649e-01,  3.21578526301959e-01,
+    8.03842133084308e-02,  1.36652129069769e-02,
+    1.62614939720868e-03,  1.18730980344801e-04,
+    5.81753857463105e-06, -4.43443601283455e-08,
+  },
+  {
+    8.10778762576884e-05, 0, 6.65340842513387e-04, 0,
+    1.52852264942192e-03, 0, 2.22035724073886e-03, 0,
+  },
+  8,
+  0.492,
+  "fast",
+  0, 0, 0,
+};
+
+static sdm_filter_t sdm_filter_hq = {
+  {
+    1.05966158780858e+00, 5.47009636009057e-01,
+    1.76263553121650e-01, 3.79953988065231e-02,
+    5.31936695611806e-03, 4.64865473231071e-04,
+    1.21930947998838e-05,
+  },
+  {
+    0, 3.96825873999969e-04, 0, 1.32436089566069e-03,
+    0, 2.16898568341885e-03, 0,
+  },
+  7,
+  0.50,
+  "hq",
+  16,
+  10,
+  1280,
+};
+
+static sdm_filter_t sdm_filter_audiophile = {
+  {
+    1.17270840974752e+00, 6.69435755948125e-01,
+    2.38385844332401e-01, 5.67404687000751e-02,
+    8.79926385368848e-03, 8.47664163271991e-04,
+    2.69551713329985e-05,
+  },
+  {
+    0, 3.96825873999969e-04, 0, 1.32436089566069e-03,
+    0, 2.16898568341885e-03, 0,
+  },
+  7,
+  0.50,
+  "audiophile",
+  24,
+  16,
+  1664,
+};
+
+static sdm_filter_t sdm_filter_goldenear = {
+  {
+    1.33055162190254e+00, 8.60392723676436e-01,
+    3.46524494169335e-01, 9.31146164773126e-02,
+    1.63339758570028e-02, 1.76908163241072e-03,
+    6.86294038857449e-05,
+  },
+  {
+    0, 3.96825873999969e-04, 0, 1.32436089566069e-03,
+    0, 2.16898568341885e-03, 0,
+  },
+  7,
+  0.50,
+  "goldenear",
+  24,
+  24,
+  2048,
+};
+
+static const sdm_filter_t *sdm_filters[] = {
+  &sdm_filter_fast,
+  &sdm_filter_hq,
+  &sdm_filter_audiophile,
+  &sdm_filter_goldenear,
+  NULL,
+};
+
+static const sdm_filter_t *sdm_find_filter(const char *name)
+{
+  int i;
+
+  if (!name)
+    return sdm_filters[0];
+
+  for (i = 0; sdm_filters[i]; i++)
+    if (!strcmp(name, sdm_filters[i]->name))
+      return sdm_filters[i];
+
+  return NULL;
+}
+
+#include "sdm_x86.h"
+
+#ifndef sdm_filter_calc
+static double sdm_filter_calc(const double *s, double *d,
+                              const sdm_filter_t *f,
+                              double x, double y)
+{
+  const double *a = f->a;
+  const double *g = f->g;
+  double v;
+  int i;
+
+  d[0] = s[0] - g[0] * s[1] + x - y;
+  v = x + a[0] * d[0];
+
+  for (i = 1; i < f->order - 1; i++) {
+    d[i] = s[i] + s[i - 1] - g[i] * s[i + 1];
+    v += a[i] * d[i];
+  }
+
+  d[i] = s[i] + s[i - 1];
+  v += a[i] * d[i];
+
+  return v;
+}
+#endif
+
+#ifndef sdm_filter_calc2
+static void sdm_filter_calc2(sdm_state_t *src, sdm_state_t *dst,
+                             const sdm_filter_t *f, double x)
+{
+  const double *a = f->a;
+  double v;
+  int i;
+
+  v = sdm_filter_calc(src->state, dst[0].state, f, x, 0.0);
+
+  for (i = 0; i < f->order; i++)
+    dst[1].state[i] = dst[0].state[i];
+
+  dst[0].state[0] += 1.0;
+  dst[1].state[0] -= 1.0;
+
+  dst[0].cost = src->cost + sqr(v + a[0]);
+  dst[1].cost = src->cost + sqr(v - a[0]);
+}
+#endif
+
+static inline unsigned sdm_histbuf_get(sdm_t *p)
+{
+  return p->hist_free[--p->hist_fnum];
+}
+
+static inline void sdm_histbuf_put(sdm_t *p, unsigned h)
+{
+  p->hist_free[p->hist_fnum++] = h;
+}
+
+static inline unsigned get_bit(uint8_t *p, unsigned i)
+{
+  return (p[i >> 3] >> (i & 7)) & 1;
+}
+
+static inline void put_bit(uint8_t *p, unsigned i, unsigned v)
+{
+  int b = p[i >> 3];
+  int s = i & 7;
+  b &= ~(1 << s);
+  b |= v << s;
+  p[i >> 3] = b;
+}
+
+static inline unsigned sdm_hist_get(sdm_t *p, unsigned h, unsigned i)
+{
+  return get_bit(p->hist[h], i);
+}
+
+static inline void sdm_hist_put(sdm_t *p, unsigned h, unsigned i, unsigned v)
+{
+  put_bit(p->hist[h], i, v);
+}
+
+static inline void sdm_hist_copy(sdm_t *p, unsigned d, unsigned s)
+{
+  memcpy(p->hist[d], p->hist[s], (size_t)(p->trellis_lat + 7) / 8);
+}
+
+static inline int64_t dbl2int64(double a)
+{
+  union { double d; int64_t i; } v;
+  v.d = a;
+  return v.i;
+}
+
+static inline int sdm_cmplt(sdm_state_t *a, sdm_state_t *b)
+{
+  return dbl2int64(a->cost) < dbl2int64(b->cost);
+}
+
+static inline int sdm_cmple(sdm_state_t *a, sdm_state_t *b)
+{
+  return dbl2int64(a->cost) <= dbl2int64(b->cost);
+}
+
+static sdm_state_t *sdm_check_path(sdm_t *p, sdm_state_t *s)
+{
+  unsigned index = s->path & PATH_HASH_MASK;
+  sdm_state_t **hash = p->path_hash;
+  sdm_state_t *t = hash[index];
+
+  while (t) {
+    if (t->path == s->path)
+      return t;
+    t = t->path_list;
+  }
+
+  s->path_list = hash[index];
+  hash[index] = s;
+
+  return NULL;
+}
+
+static unsigned sdm_sort_cands(sdm_t *p, sdm_trellis_t *st)
+{
+  sdm_state_t *r, *s, *t;
+  sdm_state_t *min;
+  unsigned i, j, n;
+
+  for (i = 0; i < 2 * p->num_cands; i++) {
+    s = &st->sdm[i];
+    p->path_hash[s->path & PATH_HASH_MASK] = NULL;
+    if (!i || sdm_cmplt(s, min))
+      min = s;
+  }
+
+  for (i = 0, n = 0; i < 2 * p->num_cands; i++) {
+    s = &st->sdm[i];
+
+    if (s->next != min->next)
+      continue;
+
+    if (n == p->trellis_num && sdm_cmple(st->act[n - 1], s))
+      continue;
+
+    t = sdm_check_path(p, s);
+
+    if (!t) {
+      for (j = n; j > 0; j--) {
+        t = st->act[j - 1];
+        if (sdm_cmple(t, s))
+          break;
+        st->act[j] = t;
+      }
+      if (j < p->trellis_num)
+        st->act[j] = s;
+      if (n < p->trellis_num)
+        n++;
+      continue;
+    }
+
+    if (sdm_cmple(t, s))
+      continue;
+
+    for (j = 0; j < n; j++) {
+      r = st->act[j];
+      if (sdm_cmple(s, r))
+        break;
+    }
+
+    st->act[j++] = s;
+
+    while (r != t && j < n) {
+      sdm_state_t *u = st->act[j];
+      st->act[j] = r;
+      r = u;
+      j++;
+    }
+  }
+
+  return n;
+}
+
+static inline void sdm_step(sdm_t *p, sdm_state_t *cur, sdm_state_t *next,
+                            double x)
+{
+  const sdm_filter_t *f = p->filter;
+  int i;
+
+  sdm_filter_calc2(cur, next, f, x);
+
+  for (i = 0; i < 2; i++) {
+    next[i].path = (cur->path << 1 | i) & p->trellis_mask;
+    next[i].hist = cur->hist;
+    next[i].next = cur->next;
+    next[i].parent = cur;
+  }
+}
+
+static sox_sample_t sdm_sample_trellis(sdm_t *p, double x)
+{
+  sdm_trellis_t *st_cur = &p->trellis[p->idx];
+  sdm_trellis_t *st_next = &p->trellis[p->idx ^ 1];
+  double min_cost;
+  unsigned new_cands;
+  unsigned next_pos;
+  unsigned output;
+  unsigned i;
+
+  next_pos = p->pos + 1;
+  if (next_pos == p->trellis_lat)
+    next_pos = 0;
+
+  for (i = 0; i < p->num_cands; i++) {
+    sdm_state_t *cur = st_cur->act[i];
+    sdm_state_t *next = &st_next->sdm[2 * i];
+    sdm_step(p, cur, next, x);
+    cur->next = sdm_hist_get(p, cur->hist, next_pos);
+    cur->hist_used = 0;
+  }
+
+  new_cands = sdm_sort_cands(p, st_next);
+  min_cost = st_next->act[0]->cost;
+  output = st_next->act[0]->next;
+
+  for (i = 0; i < new_cands; i++) {
+    sdm_state_t *s = st_next->act[i];
+    if (s->parent->hist_used) {
+      unsigned h = sdm_histbuf_get(p);
+      sdm_hist_copy(p, h, s->hist);
+      s->hist = h;
+    } else {
+      s->parent->hist_used = 1;
+    }
+
+    s->cost -= min_cost;
+    s->next = s->parent->next;
+    sdm_hist_put(p, s->hist, p->pos, s->path & 1);
+  }
+
+  for (i = 0; i < p->num_cands; i++) {
+    sdm_state_t *s = st_cur->act[i];
+    if (!s->hist_used)
+      sdm_histbuf_put(p, s->hist);
+  }
+
+  if (new_cands < p->num_cands)
+    p->conv_fail++;
+
+  p->num_cands = new_cands;
+  p->pos = next_pos;
+  p->idx ^= 1;
+
+  return output ? SOX_SAMPLE_MAX : -SOX_SAMPLE_MAX;
+}
+
+static sox_sample_t sdm_sample(sdm_t *p, double x)
+{
+  const sdm_filter_t *f = p->filter;
+  double *s0 = p->trellis[0].sdm[p->idx].state;
+  double *s1 = p->trellis[0].sdm[p->idx ^ 1].state;
+  double y, v;
+
+  v = sdm_filter_calc(s0, s1, f, x, p->prev_y);
+  y = sign(v);
+
+  p->idx ^= 1;
+  p->prev_y = y;
+
+  return y * SOX_SAMPLE_MAX;
+}
+
+int sdm_process(sdm_t *p, const sox_sample_t *ibuf, sox_sample_t *obuf,
+                size_t *ilen, size_t *olen)
+{
+  sox_sample_t *out = obuf;
+  size_t len = *ilen = min(*ilen, *olen);
+  double scale = p->filter->scale;
+  double x;
+
+  if (p->trellis_mask) {
+    if (p->pending < p->trellis_lat) {
+      size_t pre = min(p->trellis_lat - p->pending, len);
+      p->pending += pre;
+      len -= pre;
+      while (pre--) {
+        x = *ibuf++ * scale * (1.0 / SOX_SAMPLE_MAX);
+        sdm_sample_trellis(p, x);
+      }
+    }
+    while (len--) {
+      x = *ibuf++ * scale * (1.0 / SOX_SAMPLE_MAX);
+      *out++ = sdm_sample_trellis(p, x);
+    }
+  } else {
+    while (len--) {
+      x = *ibuf++ * scale * (1.0 / SOX_SAMPLE_MAX);
+      *out++ = sdm_sample(p, x);
+    }
+  }
+
+  *olen = out - obuf;
+
+  return SOX_SUCCESS;
+}
+
+int sdm_drain(sdm_t *p, sox_sample_t *obuf, size_t *olen)
+{
+  if (p->trellis_mask) {
+    size_t len = *olen = min(p->pending, *olen);
+
+    if (!p->draining && p->pending < p->trellis_lat) {
+      unsigned flush = p->trellis_lat - p->pending;
+      while (flush--)
+        sdm_sample_trellis(p, 0.0);
+    }
+
+    p->draining = 1;
+    p->pending -= len;
+
+    while (len--)
+      *obuf++ = sdm_sample_trellis(p, 0.0);
+  } else {
+    *olen = 0;
+  }
+
+  return SOX_SUCCESS;
+}
+
+sdm_t *sdm_init(const char *filter_name,
+                unsigned trellis_order,
+                unsigned trellis_num,
+                unsigned trellis_latency)
+{
+  sdm_t *p;
+  const sdm_filter_t *f;
+  sdm_trellis_t *st;
+  unsigned i;
+
+  if (trellis_order > SDM_TRELLIS_MAX_ORDER) {
+    lsx_fail("trellis order too high (max %d)", SDM_TRELLIS_MAX_ORDER);
+    return NULL;
+  }
+
+  if (trellis_num > SDM_TRELLIS_MAX_NUM) {
+    lsx_fail("trellis size too high (max %d)", SDM_TRELLIS_MAX_NUM);
+    return NULL;
+  }
+
+  if (trellis_latency > SDM_TRELLIS_MAX_LAT) {
+    lsx_fail("trellis latency too high (max %d)", SDM_TRELLIS_MAX_LAT);
+    return NULL;
+  }
+
+  p = aligned_alloc((size_t)32, sizeof(*p));
+  if (!p)
+    return NULL;
+
+  memset(p, 0, sizeof(*p));
+
+  p->filter = sdm_find_filter(filter_name);
+  if (!p->filter) {
+    lsx_fail("invalid filter name `%s'", filter_name);
+    return NULL;
+  }
+
+  f = p->filter;
+  st = &p->trellis[0];
+
+  if (trellis_order || f->trellis_order) {
+    if (trellis_order < 1)
+      trellis_order = f->trellis_order ? f->trellis_order : 13;
+
+    if (trellis_num)
+      p->trellis_num = trellis_num;
+    else
+      p->trellis_num = f->trellis_num ? f->trellis_num : 8;
+
+    if (trellis_latency)
+      p->trellis_lat = trellis_latency;
+    else
+      p->trellis_lat = f->trellis_lat ? f->trellis_lat : 1024;
+
+    p->trellis_mask = ((uint64_t)1 << trellis_order) - 1;
+
+    for (i = 0; i < 2 * p->trellis_num; i++)
+      sdm_histbuf_put(p, i);
+
+    p->num_cands = 1;
+
+    st->sdm[0].hist = sdm_histbuf_get(p);
+    st->sdm[0].path = 0;
+    st->act[0] = &st->sdm[0];
+  }
+
+  return p;
+}
+
+void sdm_close(sdm_t *p)
+{
+  if (p->conv_fail)
+    lsx_warn("failed to converge %"PRId64" times", p->conv_fail);
+
+  aligned_free(p);
+}
+
+typedef struct sdm_effect {
+  sdm_t        *sdm;
+  const char   *filter_name;
+  uint32_t      trellis_order;
+  uint32_t      trellis_num;
+  uint32_t      trellis_lat;
+} sdm_effect_t;
+
+static int getopts(sox_effect_t *effp, int argc, char **argv)
+{
+  sdm_effect_t *p = effp->priv;
+  lsx_getopt_t optstate;
+  int c;
+
+  lsx_getopt_init(argc, argv, "+f:t:n:l:", NULL, lsx_getopt_flag_none,
+                  1, &optstate);
+
+  while ((c = lsx_getopt(&optstate)) != -1) switch (c) {
+    case 'f': p->filter_name = optstate.arg; break;
+    GETOPT_NUMERIC(optstate, 't', trellis_num, 8, SDM_TRELLIS_MAX_ORDER)
+    GETOPT_NUMERIC(optstate, 'n', trellis_num, 8, SDM_TRELLIS_MAX_NUM)
+    GETOPT_NUMERIC(optstate, 'l', trellis_lat, 100, SDM_TRELLIS_MAX_LAT)
+    default: lsx_fail("invalid option `-%c'", optstate.opt); return lsx_usage(effp);
+  }
+
+  return argc != optstate.ind ? lsx_usage(effp) : SOX_SUCCESS;
+}
+
+static int start(sox_effect_t *effp)
+{
+  sdm_effect_t *p = effp->priv;
+
+  p->sdm = sdm_init(p->filter_name, p->trellis_order,
+                    p->trellis_num, p->trellis_lat);
+  if (!p->sdm)
+    return SOX_EOF;
+
+  effp->out_signal.precision = 1;
+
+  return SOX_SUCCESS;
+}
+
+static int flow(sox_effect_t *effp, const sox_sample_t *ibuf,
+                sox_sample_t *obuf, size_t *isamp, size_t *osamp)
+{
+  sdm_effect_t *p = effp->priv;
+  return sdm_process(p->sdm, ibuf, obuf, isamp, osamp);
+}
+
+static int drain(sox_effect_t *effp, sox_sample_t *obuf, size_t *osamp)
+{
+  sdm_effect_t *p = effp->priv;
+  return sdm_drain(p->sdm, obuf, osamp);
+}
+
+static int stop(sox_effect_t *effp)
+{
+  sdm_effect_t *p = effp->priv;
+  sdm_close(p->sdm);
+  return SOX_SUCCESS;
+}
+
+const sox_effect_handler_t *lsx_sdm_effect_fn(void)
+{
+  static sox_effect_handler_t handler = {
+    "sdm", "[-f filter] [-t order] [-n num] [-l latency]"
+    "\n  -f       Set filter to one of: fast, hq, audiophile, goldenear"
+    "\n           Advanced options:"
+    "\n  -t       Override trellis order"
+    "\n  -n       Override number of trellis paths"
+    "\n  -l       Override trellis latency",
+    SOX_EFF_PREC, getopts, start, flow, drain, stop, 0, sizeof(sdm_effect_t),
+  };
+  return &handler;
+}
diff --git a/src/sdm.h b/src/sdm.h
new file mode 100644
index 0000000..98082aa
--- /dev/null
+++ b/src/sdm.h
@@ -0,0 +1,42 @@
+/* Sigma-Delta modulator
+ * Copyright (c) 2015 Mans Rullgard <mans@mansr.com>
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef SOX_SDM_H
+#define SOX_SDM_H
+
+#include "sox_i.h"
+
+#define SDM_TRELLIS_MAX_ORDER 32
+#define SDM_TRELLIS_MAX_NUM   32
+#define SDM_TRELLIS_MAX_LAT   2048
+
+typedef struct sdm sdm_t;
+
+sdm_t *sdm_init(const char *filter_name,
+                unsigned trellis_order,
+                unsigned trellis_num,
+                unsigned trellis_latency);
+
+int sdm_process(sdm_t *s, const sox_sample_t *ibuf, sox_sample_t *obuf,
+                size_t *ilen, size_t *olen);
+
+int sdm_drain(sdm_t *s, sox_sample_t *obuf, size_t *olen);
+
+void sdm_close(sdm_t *s);
+
+#endif
diff --git a/src/sdm_x86.h b/src/sdm_x86.h
new file mode 100644
index 0000000..a40833d
--- /dev/null
+++ b/src/sdm_x86.h
@@ -0,0 +1,235 @@
+/* Sigma-Delta modulator
+ * Copyright (c) 2015 Mans Rullgard <mans@mansr.com>
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef SOX_SDM_X86_H
+#define SOX_SDM_X86_H
+
+#ifdef __AVX__
+
+#include <immintrin.h>
+
+#define SDM_FILTER_AVX(s0, s1, src, x) do {                               \
+    __m256d tx, t0, t1, t2;                                               \
+    __m256d p0, p1;                                                       \
+    __m256d sx;                                                           \
+                                                                          \
+    sx = _mm256_set1_pd(x);                                               \
+    s0 = _mm256_load_pd(src);                                             \
+    s1 = _mm256_load_pd(src + 4);                                         \
+                                                                          \
+    p0 = _mm256_permute_pd(s0, 5);                                        \
+    p1 = _mm256_permute_pd(s1, 5);                                        \
+                                                                          \
+    tx = _mm256_blend_pd(p0, _mm256_permute2f128_pd(sx, p0, 0x21), 0x5);  \
+    t0 = _mm256_blend_pd(p1, _mm256_permute2f128_pd(p0, p1, 0x21), 0x5);  \
+    t1 = _mm256_permute2f128_pd(tx, t0, 0x21);                            \
+    t2 = _mm256_blend_pd(p1, _mm256_permute2f128_pd(p1, p1, 0x21), 0xa);  \
+                                                                          \
+    s0 = _mm256_add_pd(s0, tx);                                           \
+    s1 = _mm256_add_pd(s1, t0);                                           \
+                                                                          \
+    s0 = _mm256_sub_pd(s0, _mm256_mul_pd(t1, _mm256_load_pd(g)));         \
+    s1 = _mm256_sub_pd(s1, _mm256_mul_pd(t2, _mm256_load_pd(g + 4)));     \
+  } while (0)
+
+#define sdm_filter_calc sdm_filter_calc_avx
+static double sdm_filter_calc_avx(const double *src, double *dst,
+                                  const sdm_filter_t *f,
+                                  double x, double y)
+{
+  const double *a = f->a;
+  const double *g = f->g;
+  __m256d s0, s1;
+  __m256d v0, v1;
+  __m128d v;
+
+  SDM_FILTER_AVX(s0, s1, src, x - y);
+
+  _mm256_store_pd(dst,     s0);
+  _mm256_store_pd(dst + 4, s1);
+
+  v0 = _mm256_mul_pd(s0, _mm256_load_pd(a));
+  v1 = _mm256_mul_pd(s1, _mm256_load_pd(a + 4));
+  v0 = _mm256_add_pd(v0, v1);
+
+  v = _mm_add_pd(_mm256_castpd256_pd128(v0), _mm256_extractf128_pd(v0, 1));
+  v = _mm_add_pd(v, _mm_permute_pd(v, 1));
+
+  return x + _mm_cvtsd_f64(v);
+}
+
+#define sdm_filter_calc2 sdm_filter_calc2_avx
+static void sdm_filter_calc2_avx(sdm_state_t *src, sdm_state_t *dst,
+                                 const sdm_filter_t *f, double x)
+{
+  const double *a = f->a;
+  const double *g = f->g;
+  __m256d s0, s1;
+  __m256d t0, t1;
+  __m256d v0, v1, v2;
+  __m128d r0, r1;
+  __m256d a0;
+
+  SDM_FILTER_AVX(s0, s1, src->state, x);
+
+  t1 = _mm256_set_pd(0.0, 0.0, 0.0, 1.0);
+  t0 = _mm256_sub_pd(s0, t1);
+  s0 = _mm256_add_pd(s0, t1);
+
+  _mm256_store_pd(dst[0].state,     s0);
+  _mm256_store_pd(dst[0].state + 4, s1);
+
+  _mm256_store_pd(dst[1].state,     t0);
+  _mm256_store_pd(dst[1].state + 4, s1);
+
+  a0 = _mm256_load_pd(a);
+  v0 = _mm256_mul_pd(s0, a0);
+  v1 = _mm256_mul_pd(t0, a0);
+  v2 = _mm256_mul_pd(s1, _mm256_load_pd(a + 4));
+
+  v0 = _mm256_add_pd(v0, v2);
+  v1 = _mm256_add_pd(v1, v2);
+
+  r0 = _mm_add_pd(_mm256_castpd256_pd128(v0), _mm256_extractf128_pd(v0, 1));
+  r1 = _mm_add_pd(_mm256_castpd256_pd128(v1), _mm256_extractf128_pd(v1, 1));
+
+  r0 = _mm_add_pd(r0, _mm_permute_pd(r1, 1));
+  r0 = _mm_add_pd(r0, _mm_set1_pd(x));
+  r0 = _mm_mul_pd(r0, r0);
+
+  r0 = _mm_add_pd(r0, _mm_load1_pd(&src->cost));
+
+  _mm_storel_pd(&dst[0].cost, r0);
+  _mm_storeh_pd(&dst[1].cost, r0);
+}
+
+#elif defined __SSE2__ || defined _M_X64 || \
+  (defined _M_IX86_FP && _M_IX86_FP >= 2)
+
+#include <emmintrin.h>
+
+#define SWAPHL(x) \
+  _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(x), 0x4e))
+
+#define SDM_FILTER_SSE2(s0, s1, s2, s3, src, x) do {            \
+    __m128d tx, t0, t1, t2, t3;                                 \
+    __m128d sx;                                                 \
+                                                                \
+    sx = _mm_set1_pd(x);                                        \
+    s0 = _mm_load_pd(src);                                      \
+    s1 = _mm_load_pd(src + 2);                                  \
+    s2 = _mm_load_pd(src + 4);                                  \
+    s3 = _mm_load_pd(src + 6);                                  \
+                                                                \
+    tx = _mm_shuffle_pd(sx, s0, 1);                             \
+    t0 = _mm_shuffle_pd(s0, s1, 1);                             \
+    t1 = _mm_shuffle_pd(s1, s2, 1);                             \
+    t2 = _mm_shuffle_pd(s2, s3, 1);                             \
+    t3 = _mm_shuffle_pd(s3, s3, 1);                             \
+                                                                \
+    s0 = _mm_add_pd(s0, tx);                                    \
+    s1 = _mm_add_pd(s1, t0);                                    \
+    s2 = _mm_add_pd(s2, t1);                                    \
+    s3 = _mm_add_pd(s3, t2);                                    \
+                                                                \
+    s0 = _mm_sub_pd(s0, _mm_mul_pd(t0, _mm_load_pd(g)));        \
+    s1 = _mm_sub_pd(s1, _mm_mul_pd(t1, _mm_load_pd(g + 2)));    \
+    s2 = _mm_sub_pd(s2, _mm_mul_pd(t2, _mm_load_pd(g + 4)));    \
+    s3 = _mm_sub_pd(s3, _mm_mul_pd(t3, _mm_load_pd(g + 6)));    \
+  } while (0)
+
+#define sdm_filter_calc sdm_filter_calc_sse2
+static double sdm_filter_calc_sse2(const double *src, double *dst,
+                                   const sdm_filter_t *f,
+                                   double x, double y)
+{
+  const double *a = f->a;
+  const double *g = f->g;
+  __m128d s0, s1, s2, s3;
+  __m128d v0, v1;
+
+  SDM_FILTER_SSE2(s0, s1, s2, s3, src, x - y);
+
+  _mm_store_pd(dst,     s0);
+  _mm_store_pd(dst + 2, s1);
+  _mm_store_pd(dst + 4, s2);
+  _mm_store_pd(dst + 6, s3);
+
+  v0 = _mm_mul_pd(s0, _mm_load_pd(a));
+  v1 = _mm_mul_pd(s1, _mm_load_pd(a + 2));
+  v0 = _mm_add_pd(v0, _mm_mul_pd(s2, _mm_load_pd(a + 4)));
+  v1 = _mm_add_pd(v1, _mm_mul_pd(s3, _mm_load_pd(a + 6)));
+  v0 = _mm_add_pd(v0, v1);
+  v0 = _mm_add_pd(v0, SWAPHL(v0));
+
+  return x + _mm_cvtsd_f64(v0);
+}
+
+#define sdm_filter_calc2 sdm_filter_calc2_sse2
+static void sdm_filter_calc2_sse2(sdm_state_t *src, sdm_state_t *dst,
+                                  const sdm_filter_t *f, double x)
+{
+  const double *a = f->a;
+  const double *g = f->g;
+  __m128d s0, s1, s2, s3;
+  __m128d v0, v1, v2, v3;
+  __m128d t0, t1;
+  __m128d a0;
+
+  SDM_FILTER_SSE2(s0, s1, s2, s3, src->state, x);
+
+  t1 = _mm_set_sd(1.0);
+  t0 = _mm_sub_pd(s0, t1);
+  s0 = _mm_add_pd(s0, t1);
+
+  _mm_store_pd(dst[0].state,     s0);
+  _mm_store_pd(dst[0].state + 2, s1);
+  _mm_store_pd(dst[0].state + 4, s2);
+  _mm_store_pd(dst[0].state + 6, s3);
+
+  _mm_store_pd(dst[1].state,     t0);
+  _mm_store_pd(dst[1].state + 2, s1);
+  _mm_store_pd(dst[1].state + 4, s2);
+  _mm_store_pd(dst[1].state + 6, s3);
+
+  a0 = _mm_load_pd(a);
+  v0 = _mm_mul_pd(s0, a0);
+  t0 = _mm_mul_pd(t0, a0);
+  v1 = _mm_mul_pd(s1, _mm_load_pd(a + 2));
+  v2 = _mm_mul_pd(s2, _mm_load_pd(a + 4));
+  v3 = _mm_mul_pd(s3, _mm_load_pd(a + 6));
+
+  v1 = _mm_add_pd(v1, v2);
+  v1 = _mm_add_pd(v1, v3);
+
+  v0 = _mm_add_pd(v0, v1);
+  t0 = _mm_add_pd(t0, v1);
+
+  v0 = _mm_add_pd(v0, SWAPHL(t0));
+  v0 = _mm_add_pd(v0, _mm_set1_pd(x));
+
+  v0 = _mm_mul_pd(v0, v0);
+  v0 = _mm_add_pd(v0, _mm_load1_pd(&src->cost));
+
+  _mm_storel_pd(&dst[0].cost, v0);
+  _mm_storeh_pd(&dst[1].cost, v0);
+}
+
+#endif
+
+#endif
-- 
2.5.2


------------------------------------------------------------------------------
Monitor Your Dynamic Infrastructure at Any Scale With Datadog!
Get real-time metrics from all of your servers, apps and tools
in one place.
SourceForge users - Click here to start your Free Trial of Datadog now!
http://pubads.g.doubleclick.net/gampad/clk?id=241902991&iu=/4140

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 6/6] Add DSD over PCM (dop) effect
  2015-09-16 14:16 [PATCH 1/6] Add SOX_ENCODING_DSD Mans Rullgard
                   ` (3 preceding siblings ...)
  2015-09-16 14:16 ` [PATCH 5/6] Add a sigma-delta modulator for DSD encoding Mans Rullgard
@ 2015-09-16 14:16 ` Mans Rullgard
  2015-12-19 12:09   ` Eric Wong
  4 siblings, 1 reply; 18+ messages in thread
From: Mans Rullgard @ 2015-09-16 14:16 UTC (permalink / raw)
  To: sox-devel

---
 msvc10/LibSoX.vcxproj         |   1 +
 msvc10/LibSoX.vcxproj.filters |   3 +
 sox.1                         |   4 ++
 src/Makefile.am               |   2 +-
 src/dop.c                     | 158 ++++++++++++++++++++++++++++++++++++++++++
 src/effects.h                 |   1 +
 6 files changed, 168 insertions(+), 1 deletion(-)
 create mode 100644 src/dop.c

diff --git a/msvc10/LibSoX.vcxproj b/msvc10/LibSoX.vcxproj
index 4a475cf..5136eb8 100644
--- a/msvc10/LibSoX.vcxproj
+++ b/msvc10/LibSoX.vcxproj
@@ -207,6 +207,7 @@
     <ClCompile Include="..\src\dft_filter.c" />
     <ClCompile Include="..\src\dither.c" />
     <ClCompile Include="..\src\divide.c" />
+    <ClCompile Include="..\src\dop.c" />
     <ClCompile Include="..\src\downsample.c" />
     <ClCompile Include="..\src\earwax.c" />
     <ClCompile Include="..\src\echo.c" />
diff --git a/msvc10/LibSoX.vcxproj.filters b/msvc10/LibSoX.vcxproj.filters
index 01bda8c..030e10f 100644
--- a/msvc10/LibSoX.vcxproj.filters
+++ b/msvc10/LibSoX.vcxproj.filters
@@ -213,6 +213,9 @@
     <ClCompile Include="..\src\divide.c">
       <Filter>Effect Sources</Filter>
     </ClCompile>
+    <ClCompile Include="..\src\dop.c">
+      <Filter>Effect Sources</Filter>
+    </ClCompile>
     <ClCompile Include="..\src\earwax.c">
       <Filter>Effect Sources</Filter>
     </ClCompile>
diff --git a/sox.1 b/sox.1
index 98a84a1..f26a061 100644
--- a/sox.1
+++ b/sox.1
@@ -2021,6 +2021,10 @@ affects the audio.
 .SP
 See also the `Dithering' section above.
 .TP
+\fBdop\fR
+DSD over PCM.  1-bit DSD data is packed into 24-bit samples for
+transport over non-DSD-aware links.
+.TP
 \fBdownsample\fR [\fIfactor\fR(2)]
 Downsample the signal by an integer factor: Only the first out of
 each \fIfactor\fR samples is retained, the others are discarded.
diff --git a/src/Makefile.am b/src/Makefile.am
index ca7bae7..4450061 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -69,7 +69,7 @@ libsox_la_SOURCES = adpcms.c adpcms.h aiff.c aiff.h cvsd.c cvsd.h cvsdfilt.h \
 libsox_la_SOURCES += \
 	band.h bend.c biquad.c biquad.h biquads.c chorus.c compand.c \
 	compandt.c compandt.h contrast.c dcshift.c delay.c dft_filter.c \
-	dft_filter.h dither.c dither.h divide.c downsample.c earwax.c \
+	dft_filter.h dither.c dither.h divide.c dop.c downsample.c earwax.c \
 	echo.c echos.c effects.c effects.h effects_i.c effects_i_dsp.c \
 	fade.c fft4g.c fft4g.h fifo.h fir.c firfit.c flanger.c gain.c \
 	hilbert.c input.c ladspa.h ladspa.c loudness.c mcompand.c \
diff --git a/src/dop.c b/src/dop.c
new file mode 100644
index 0000000..e06fa9e
--- /dev/null
+++ b/src/dop.c
@@ -0,0 +1,158 @@
+/* DSD over PCM
+ * Copyright (c) 2015 Mans Rullgard <mans@mansr.com>
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "sox_i.h"
+
+typedef struct dop {
+  sox_sample_t *buf;
+  unsigned marker;
+  unsigned pos;
+} dop_t;
+
+#define DOP_MARKER 0x05
+
+static int dop_start(sox_effect_t *eff)
+{
+  dop_t *p = eff->priv;
+
+  if (eff->in_signal.precision != 1) {
+    lsx_fail("1-bit input required");
+    return SOX_EOF;
+  }
+
+  if (eff->in_signal.rate != 16 * eff->out_signal.rate) {
+    lsx_fail("incorrect output rate, should be %.1fk",
+             eff->in_signal.rate / 16 / 1000);
+    return SOX_EOF;
+  }
+
+  eff->out_signal.precision = 24;
+
+  p->buf = lsx_calloc(eff->out_signal.channels, sizeof(*p->buf));
+  p->marker = DOP_MARKER;
+
+  return SOX_SUCCESS;
+}
+
+static unsigned dop_load_bits(const sox_sample_t *ibuf, unsigned step,
+                              unsigned pos, unsigned num)
+{
+  unsigned shift = 23 - pos;
+  unsigned buf = 0;
+
+  while (num--) {
+    unsigned bit = *ibuf > 0 ? 1 : 0;
+    buf |= bit << shift;
+    ibuf += step;
+    shift--;
+  }
+
+  return buf;
+}
+
+static int dop_flow(sox_effect_t *eff, const sox_sample_t *ibuf,
+                    sox_sample_t *obuf, size_t *isamp, size_t *osamp)
+{
+  dop_t *p = eff->priv;
+  unsigned channels = eff->in_signal.channels;
+  const sox_sample_t *in = ibuf;
+  sox_sample_t *out = obuf;
+  size_t ilen = *isamp / channels;
+  size_t olen = *osamp / channels;
+  unsigned i;
+
+  if (p->pos) {
+    size_t n = min(16 - p->pos, ilen);
+    for (i = 0; i < channels; i++)
+        p->buf[i] |= dop_load_bits(in + i, channels, p->pos, n);
+    in += n * channels;
+    ilen -= n;
+    p->pos += n;
+    if (p->pos == 16) {
+      for (i = 0; i < channels; i++)
+        *out++ = p->buf[i] | p->marker << 24;
+      olen--;
+      p->marker ^= 0xff;
+      p->pos = 0;
+    }
+  }
+
+  while (olen && ilen >= 16) {
+    for (i = 0; i < channels; i++)
+      *out++ = dop_load_bits(in + i, channels, 0, 16) | p->marker << 24;
+    olen--;
+    in += 16 * channels;
+    ilen -= 16;
+    p->marker ^= 0xff;
+  }
+
+  if (olen && ilen < 16) {
+    size_t n = min(16 - p->pos, ilen);
+    for (i = 0; i < channels; i++)
+      p->buf[i] |= dop_load_bits(in, channels, p->pos, n);
+    in += n * channels;
+    ilen -= n;
+    p->pos += n;
+    if (p->pos == 16) {
+      for (i = 0; i < channels; i++)
+        *out++ = p->buf[i] | p->marker << 24;
+      olen--;
+      p->marker ^= 0xff;
+      p->pos = 0;
+    }
+  }
+
+  *isamp = in - ibuf;
+  *osamp = out - obuf;
+
+  return SOX_SUCCESS;
+}
+
+static int dop_drain(sox_effect_t *eff, sox_sample_t *obuf, size_t *osamp)
+{
+  dop_t *p = eff->priv;
+  unsigned i;
+
+  if (p->pos) {
+    for (i = 0; i < eff->in_signal.channels; i++)
+      *obuf++ = p->buf[i] | p->marker << 24;
+    *osamp = i;
+  } else {
+    *osamp = 0;
+  }
+
+  return SOX_SUCCESS;
+}
+
+static int dop_stop(sox_effect_t *eff)
+{
+  dop_t *p = eff->priv;
+  free(p->buf);
+  return SOX_SUCCESS;
+}
+
+const sox_effect_handler_t *lsx_dop_effect_fn(void)
+{
+  static sox_effect_handler_t handler = {
+    "dop", NULL,
+    SOX_EFF_MCHAN | SOX_EFF_PREC | SOX_EFF_RATE,
+    NULL, dop_start, dop_flow, dop_drain, dop_stop, NULL,
+    sizeof(dop_t),
+  };
+  return &handler;
+}
diff --git a/src/effects.h b/src/effects.h
index e8987de..9e6ba10 100644
--- a/src/effects.h
+++ b/src/effects.h
@@ -32,6 +32,7 @@
   EFFECT(dft_filter) /* abstract */
   EFFECT(dither)
   EFFECT(divide)
+  EFFECT(dop)
   EFFECT(downsample)
   EFFECT(earwax)
   EFFECT(echo)
-- 
2.5.2


------------------------------------------------------------------------------
Monitor Your Dynamic Infrastructure at Any Scale With Datadog!
Get real-time metrics from all of your servers, apps and tools
in one place.
SourceForge users - Click here to start your Free Trial of Datadog now!
http://pubads.g.doubleclick.net/gampad/clk?id=241902991&iu=/4140

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/6] Add a sigma-delta modulator for DSD encoding
  2015-09-16 14:16 ` [PATCH 5/6] Add a sigma-delta modulator for DSD encoding Mans Rullgard
@ 2015-10-03 22:31   ` Eric Wong
  2015-10-03 22:39     ` Måns Rullgård
  0 siblings, 1 reply; 18+ messages in thread
From: Eric Wong @ 2015-10-03 22:31 UTC (permalink / raw)
  To: sox-devel; +Cc: Mans Rullgard

Mans Rullgard <mans@mansr.com> wrote:
> This adds a sigma-delta modulator for 1-bit (DSD) encoding.  It is
> invoked by the "dither" effect when the output precision is 1-bit or
> manually with choice of the following noise-shaping filters:
> 
>   fast   Reasonably good quality while fast enough for real-time
>          operation.  This is the default.
> 
>   hq     Lower noise and distortion than "fast" at the expense of
>          being much slower.
> 
>   audiophile
>          Somewhat better quality than "hq" and almost twice as slow.
> 
>   goldenear
>          Slightly higher quality than "audiophile" and considerably
>          slower.

Cute names, but perhaps "fast", "high", "higher", "highest" or even a
numeric values be more obvious; especially to folks with a smaller
English vocabulary.

To me, hq/audiophile/goldenear might all be interchangeable.
The "dict" command on my system defines "hq" as Headquarters,
and doesn't even know about the other two.

Haven't had much time or interest in the rest of the series.

------------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/6] Add a sigma-delta modulator for DSD encoding
  2015-10-03 22:31   ` Eric Wong
@ 2015-10-03 22:39     ` Måns Rullgård
  0 siblings, 0 replies; 18+ messages in thread
From: Måns Rullgård @ 2015-10-03 22:39 UTC (permalink / raw)
  To: Eric Wong; +Cc: sox-devel

Eric Wong <normalperson@yhbt.net> writes:

> Mans Rullgard <mans@mansr.com> wrote:
>> This adds a sigma-delta modulator for 1-bit (DSD) encoding.  It is
>> invoked by the "dither" effect when the output precision is 1-bit or
>> manually with choice of the following noise-shaping filters:
>> 
>>   fast   Reasonably good quality while fast enough for real-time
>>          operation.  This is the default.
>> 
>>   hq     Lower noise and distortion than "fast" at the expense of
>>          being much slower.
>> 
>>   audiophile
>>          Somewhat better quality than "hq" and almost twice as slow.
>> 
>>   goldenear
>>          Slightly higher quality than "audiophile" and considerably
>>          slower.
>
> Cute names, but perhaps "fast", "high", "higher", "highest" or even a
> numeric values be more obvious; especially to folks with a smaller
> English vocabulary.

Those names were meant as a bit of a joke.  In fact, the last two modes
could be considered jokes in themselves since they provide only a
minuscule improvement at great expense in computing time.

-- 
Måns Rullgård
mans@mansr.com

------------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 6/6] Add DSD over PCM (dop) effect
  2015-09-16 14:16 ` [PATCH 6/6] Add DSD over PCM (dop) effect Mans Rullgard
@ 2015-12-19 12:09   ` Eric Wong
  2015-12-19 12:14     ` Måns Rullgård
  0 siblings, 1 reply; 18+ messages in thread
From: Eric Wong @ 2015-12-19 12:09 UTC (permalink / raw)
  To: sox-devel; +Cc: Mans Rullgard

Series tested with the following commands to create and play
a `test.dsf' file using an existing FLAC file:

	sox $INPUT.flac test.dsf rate 2822.4k sdm
	sox test.dsf -b 24 -r 48k -p | play -p

(I didn't test dsdiff nor building with non-autotools).

Pushed to the "mr/dsd" branch on git://80x24.org/sox.git
807d49a2384aef4be06ddeaf388aaa13aca8fbbb

------------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 6/6] Add DSD over PCM (dop) effect
  2015-12-19 12:09   ` Eric Wong
@ 2015-12-19 12:14     ` Måns Rullgård
  2015-12-20  5:04       ` Eric Wong
  0 siblings, 1 reply; 18+ messages in thread
From: Måns Rullgård @ 2015-12-19 12:14 UTC (permalink / raw)
  To: Eric Wong; +Cc: sox-devel

Eric Wong <normalperson@yhbt.net> writes:

> Series tested with the following commands to create and play
> a `test.dsf' file using an existing FLAC file:
>
> 	sox $INPUT.flac test.dsf rate 2822.4k sdm
> 	sox test.dsf -b 24 -r 48k -p | play -p
>
> (I didn't test dsdiff nor building with non-autotools).
>
> Pushed to the "mr/dsd" branch on git://80x24.org/sox.git
> 807d49a2384aef4be06ddeaf388aaa13aca8fbbb

There are some fixes and improvements in my github repo at
https://github.com/mansr/sox

You probably want to grab those as well.

-- 
Måns Rullgård

------------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 6/6] Add DSD over PCM (dop) effect
  2015-12-19 12:14     ` Måns Rullgård
@ 2015-12-20  5:04       ` Eric Wong
  2015-12-20 13:44         ` Måns Rullgård
  0 siblings, 1 reply; 18+ messages in thread
From: Eric Wong @ 2015-12-20  5:04 UTC (permalink / raw)
  To: Måns Rullgård; +Cc: sox-devel

Måns Rullgård <mans@mansr.com> wrote:
> There are some fixes and improvements in my github repo at
> https://github.com/mansr/sox
> 
> You probably want to grab those as well.

Thanks. I've split them into 3 topics (mr/{dsd,build,pad})
for easier review on my repo @ git://80x24.org/sox.git

I couldn't test the MSVC build patch, but it looked obvious enough.

I actually haven't tested the dop effect, either, and am not
sure how to use it.  I can only get it to play static with
a file I created:

  sox test.dsf -r 176.4k -p dop | play -p

Resampling the intermediate data to 48k still plays static:

  sox test.dsf -r 176.4k -p dop | sox -p -r48k -p | play -p

This sounds fine:

  sox test.dsf -r 48k -p | play -p

test.dsf was created with a stereo 44.1k / 16-bit FLAC file:

	sox $IN.flac test.dsf rate 2822.4k sdm

------------------------------------------------------------------------------
_______________________________________________
SoX-devel mailing list
SoX-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/sox-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/6] Add macros for increasing data alignment
  2015-09-16 14:16 ` [PATCH 4/6] Add macros for increasing data alignment Mans Rullgard
@ 2015-12-20 12:41   ` Eric Wong
  2015-12-20 13:54     ` Måns Rullgård
  0 siblings, 1 reply; 18+ messages in thread
From: Eric Wong @ 2015-12-20 12:41 UTC (permalink / raw)
  To: sox-devel; +Cc: Mans Rullgard

Mans Rullgard <mans@mansr.com> wrote:
> Also check for the aligned_alloc() function and alias to Microsoft's
> _aligned_malloc() if necessary.

How much does aligned_alloc help performance for you (and with which
settings)?  I'm on an older system with only memalign and
posix_memalign, and trying to enable it didn't change performance with
"--multi-thread --buffer=131072" on a 4-core AMD Phenom II X4 945

--- a/src/util.h
+++ b/src/util.h
@@ -194,8 +194,13 @@
 #define LSX_ALIGN(n)
 #endif
 
+#define HAVE_MEMALIGN
+
 #ifdef HAVE_ALIGNED_ALLOC
   #define aligned_free(p) free(p)
+#elif defined(HAVE_MEMALIGN)
+  #define aligned_alloc(a, s) memalign(a, s)
+  #define aligned_free(p) free(p)
 #elif defined _MSC_VER
   #define aligned_alloc(a, s) _aligned_malloc(s, a)
   #define aligned_free(p) _aligned_free(p)

------------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 6/6] Add DSD over PCM (dop) effect
  2015-12-20  5:04       ` Eric Wong
@ 2015-12-20 13:44         ` Måns Rullgård
  0 siblings, 0 replies; 18+ messages in thread
From: Måns Rullgård @ 2015-12-20 13:44 UTC (permalink / raw)
  To: Eric Wong; +Cc: sox-devel

Eric Wong <normalperson@yhbt.net> writes:

> Måns Rullgård <mans@mansr.com> wrote:
>> There are some fixes and improvements in my github repo at
>> https://github.com/mansr/sox
>> 
>> You probably want to grab those as well.
>
> Thanks. I've split them into 3 topics (mr/{dsd,build,pad})
> for easier review on my repo @ git://80x24.org/sox.git
>
> I couldn't test the MSVC build patch, but it looked obvious enough.
>
> I actually haven't tested the dop effect, either, and am not
> sure how to use it.  I can only get it to play static with
> a file I created:
>
>   sox test.dsf -r 176.4k -p dop | play -p

You need a DAC that supports the format.  If yours doesn't you'll get
static.  The purpose of dop is to wrap DSD data for transport to a DAC
over a non-DSD-aware link.

> Resampling the intermediate data to 48k still plays static:
>
>   sox test.dsf -r 176.4k -p dop | sox -p -r48k -p | play -p

That makes no sense.  The dop output should not be touched further.

> This sounds fine:
>
>   sox test.dsf -r 48k -p | play -p

There you're downsampling the DSD stream to 48k in software and playing
it the normal way.

-- 
Måns Rullgård

------------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/6] Add macros for increasing data alignment
  2015-12-20 12:41   ` Eric Wong
@ 2015-12-20 13:54     ` Måns Rullgård
  2015-12-21 10:55       ` Eric Wong
  0 siblings, 1 reply; 18+ messages in thread
From: Måns Rullgård @ 2015-12-20 13:54 UTC (permalink / raw)
  To: Eric Wong; +Cc: sox-devel

Eric Wong <normalperson@yhbt.net> writes:

> Mans Rullgard <mans@mansr.com> wrote:
>> Also check for the aligned_alloc() function and alias to Microsoft's
>> _aligned_malloc() if necessary.
>
> How much does aligned_alloc help performance for you (and with which
> settings)? 

It's required for the SSE2 and AVX code.  Without proper alignment it
simply crashes (that's how the CPU works).  The speedup of the sdm
filter with SSE2/AVX is substantial.

> I'm on an older system with only memalign and posix_memalign, and
> trying to enable it didn't change performance with "--multi-thread
> --buffer=131072" on a 4-core AMD Phenom II X4 945

That CPU doesn't have AVX so 16-byte alignment is enough, and plain
malloc usually provides that.  It obviously doesn't hurt to add support
for memalign as well even though it is considered obsolete.  If you do
that, you should also take care of #including malloc.h.

> --- a/src/util.h
> +++ b/src/util.h
> @@ -194,8 +194,13 @@
>  #define LSX_ALIGN(n)
>  #endif
>
> +#define HAVE_MEMALIGN
> +
>  #ifdef HAVE_ALIGNED_ALLOC
>    #define aligned_free(p) free(p)
> +#elif defined(HAVE_MEMALIGN)
> +  #define aligned_alloc(a, s) memalign(a, s)
> +  #define aligned_free(p) free(p)
>  #elif defined _MSC_VER
>    #define aligned_alloc(a, s) _aligned_malloc(s, a)
>    #define aligned_free(p) _aligned_free(p)

-- 
Måns Rullgård

------------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/6] Add macros for increasing data alignment
  2015-12-20 13:54     ` Måns Rullgård
@ 2015-12-21 10:55       ` Eric Wong
  2015-12-21 11:37         ` Måns Rullgård
  0 siblings, 1 reply; 18+ messages in thread
From: Eric Wong @ 2015-12-21 10:55 UTC (permalink / raw)
  To: Måns Rullgård; +Cc: sox-devel

Måns Rullgård <mans@mansr.com> wrote:
> That CPU doesn't have AVX so 16-byte alignment is enough, and plain
> malloc usually provides that.  It obviously doesn't hurt to add support
> for memalign as well even though it is considered obsolete.  If you do
> that, you should also take care of #including malloc.h.

Yes, added memalign(3) and posix_memalign(3) fallbacks.
I also went ahead and stole a bit from Ruby to provide a
fallback when none of the 3 functions exist.

Will still need to deal with systems without LSX_ALIGN...

Anyways, pushed the following to git://bogomips.org/sox ew/align

-----------------8<------------------
Subject: [PATCH] always support aligned heap allocation

The new sdm effect will not work correctly on AVX systems without
32-byte alignment; so we need to support older systems without
C11 aligned_alloc.

The fallback emulation is based on code found in gc.c in Ruby 2.0+
(BSD-licensed)
---
 configure.ac | 11 ++++++++++-
 src/util.h   | 31 +++++++++++++++++++++++++++++--
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index 8570638..017a9dd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -207,7 +207,16 @@ AC_HEADER_STDC
 AC_CHECK_HEADERS(fcntl.h unistd.h byteswap.h sys/stat.h sys/time.h sys/timeb.h sys/types.h sys/utsname.h termios.h glob.h fenv.h)
 
 dnl Checks for library functions.
-AC_CHECK_FUNCS(strcasecmp strdup popen vsnprintf gettimeofday mkstemp fmemopen aligned_alloc)
+AC_CHECK_FUNCS(strcasecmp strdup popen vsnprintf gettimeofday mkstemp fmemopen)
+
+dnl aligned alloc required for sdm using AVX (32-byte) or SSE2 (16-byte)
+AC_CHECK_FUNCS(aligned_alloc)
+AS_IF([test "x$ac_cv_func_aligned_alloc" != xyes], [
+	AC_CHECK_FUNCS([memalign])
+	AS_IF([test "x$ac_cv_func_memalign" != xyes], [
+		AC_CHECK_FUNCS([posix_memalign])
+	])
+])
 
 dnl Check if math library is needed.
 AC_SEARCH_LIBS([pow], [m])
diff --git a/src/util.h b/src/util.h
index b5cc9b8..3a9686d 100644
--- a/src/util.h
+++ b/src/util.h
@@ -196,12 +196,39 @@
 
 #ifdef HAVE_ALIGNED_ALLOC
   #define aligned_free(p) free(p)
+#elif defined(HAVE_MEMALIGN)
+  #include <malloc.h>
+  #define aligned_alloc(a, s) memalign(a, s)
+  #define aligned_free(p) free(p)
+#elif defined(HAVE_POSIX_MEMALIGN)
+  #include <errno.h>
+static inline void *sox_aligned_alloc_pm(size_t align, size_t size)
+{
+  void *ptr;
+  int err = posix_memalign(&ptr, align, size);
+
+  if (!err) return ptr;
+  errno = err;
+  return 0;
+}
+  #define aligned_alloc(a, s) sox_aligned_alloc_pm(a, s)
+  #define aligned_free(p) free(p)
 #elif defined _MSC_VER
   #define aligned_alloc(a, s) _aligned_malloc(s, a)
   #define aligned_free(p) _aligned_free(p)
 #else
-  #define aligned_alloc(a, s) malloc(s)
-  #define aligned_free(p) free(p)
+static inline void *sox_aligned_alloc_m(size_t align, size_t size)
+{
+  void *res = malloc(align + size + sizeof(void *));
+  char *aligned = (char *)res + align + sizeof(void *);
+
+  aligned -= ((size_t)aligned & (align - 1));
+  ((void **)aligned)[-1] = res;
+  return (void *)aligned;
+}
+
+  #define aligned_alloc(a, s) sox_aligned_alloc_m(a, s)
+  #define aligned_free(p) free(((void**)p)[-1]);
 #endif
 
 /*------------------------------- Maths stuff --------------------------------*/
-- 
EW

------------------------------------------------------------------------------
_______________________________________________
SoX-devel mailing list
SoX-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/sox-devel

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/6] Add macros for increasing data alignment
  2015-12-21 10:55       ` Eric Wong
@ 2015-12-21 11:37         ` Måns Rullgård
  2015-12-21 18:33           ` Eric Wong
  0 siblings, 1 reply; 18+ messages in thread
From: Måns Rullgård @ 2015-12-21 11:37 UTC (permalink / raw)
  To: Eric Wong; +Cc: sox-devel

Eric Wong <normalperson@yhbt.net> writes:

> Måns Rullgård <mans@mansr.com> wrote:
>> That CPU doesn't have AVX so 16-byte alignment is enough, and plain
>> malloc usually provides that.  It obviously doesn't hurt to add support
>> for memalign as well even though it is considered obsolete.  If you do
>> that, you should also take care of #including malloc.h.
>
> Yes, added memalign(3) and posix_memalign(3) fallbacks.
> I also went ahead and stole a bit from Ruby to provide a
> fallback when none of the 3 functions exist.
>
> Will still need to deal with systems without LSX_ALIGN...

Almost all compilers support either the GNU or the MSVC syntax.  The few
that don't probably don't support AVX anyhow.  Mostly it's compilers for
obscure DSPs.

> Anyways, pushed the following to git://bogomips.org/sox ew/align
>
> -----------------8<------------------
> Subject: [PATCH] always support aligned heap allocation
>
> The new sdm effect will not work correctly on AVX systems without
> 32-byte alignment; so we need to support older systems without
> C11 aligned_alloc.
>
> The fallback emulation is based on code found in gc.c in Ruby 2.0+
> (BSD-licensed)
> ---
>  configure.ac | 11 ++++++++++-
>  src/util.h   | 31 +++++++++++++++++++++++++++++--
>  2 files changed, 39 insertions(+), 3 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index 8570638..017a9dd 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -207,7 +207,16 @@ AC_HEADER_STDC
>  AC_CHECK_HEADERS(fcntl.h unistd.h byteswap.h sys/stat.h sys/time.h sys/timeb.h sys/types.h sys/utsname.h termios.h glob.h fenv.h)
>
>  dnl Checks for library functions.
> -AC_CHECK_FUNCS(strcasecmp strdup popen vsnprintf gettimeofday mkstemp fmemopen aligned_alloc)
> +AC_CHECK_FUNCS(strcasecmp strdup popen vsnprintf gettimeofday mkstemp fmemopen)
> +
> +dnl aligned alloc required for sdm using AVX (32-byte) or SSE2 (16-byte)
> +AC_CHECK_FUNCS(aligned_alloc)
> +AS_IF([test "x$ac_cv_func_aligned_alloc" != xyes], [
> +	AC_CHECK_FUNCS([memalign])
> +	AS_IF([test "x$ac_cv_func_memalign" != xyes], [
> +		AC_CHECK_FUNCS([posix_memalign])
> +	])
> +])

Why don't you just add (posix_)memalign to the existing list of
functions?  It's not an error if some of them don't exist, and you're
checking the resulting HAVE_ macros in the same order anyway.

>  dnl Check if math library is needed.
>  AC_SEARCH_LIBS([pow], [m])
> diff --git a/src/util.h b/src/util.h
> index b5cc9b8..3a9686d 100644
> --- a/src/util.h
> +++ b/src/util.h
> @@ -196,12 +196,39 @@
>
>  #ifdef HAVE_ALIGNED_ALLOC
>    #define aligned_free(p) free(p)
> +#elif defined(HAVE_MEMALIGN)
> +  #include <malloc.h>
> +  #define aligned_alloc(a, s) memalign(a, s)
> +  #define aligned_free(p) free(p)
> +#elif defined(HAVE_POSIX_MEMALIGN)
> +  #include <errno.h>
> +static inline void *sox_aligned_alloc_pm(size_t align, size_t size)
> +{
> +  void *ptr;
> +  int err = posix_memalign(&ptr, align, size);
> +
> +  if (!err) return ptr;
> +  errno = err;
> +  return 0;
> +}
> +  #define aligned_alloc(a, s) sox_aligned_alloc_pm(a, s)
> +  #define aligned_free(p) free(p)
>  #elif defined _MSC_VER
>    #define aligned_alloc(a, s) _aligned_malloc(s, a)
>    #define aligned_free(p) _aligned_free(p)
>  #else
> -  #define aligned_alloc(a, s) malloc(s)
> -  #define aligned_free(p) free(p)
> +static inline void *sox_aligned_alloc_m(size_t align, size_t size)
> +{
> +  void *res = malloc(align + size + sizeof(void *));
> +  char *aligned = (char *)res + align + sizeof(void *);
> +
> +  aligned -= ((size_t)aligned & (align - 1));

Use uintptr_t rather than size_t there.  Although they are usually the
same underlying type, there is no such guarantee, especially for systems
bizarre enough not to have an aligned allocation function.

> +  ((void **)aligned)[-1] = res;
> +  return (void *)aligned;
> +}
> +
> +  #define aligned_alloc(a, s) sox_aligned_alloc_m(a, s)
> +  #define aligned_free(p) free(((void**)p)[-1]);
>  #endif
>
>  /*------------------------------- Maths stuff --------------------------------*/
> -- 
> EW

-- 
Måns Rullgård

------------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/6] Add macros for increasing data alignment
  2015-12-21 11:37         ` Måns Rullgård
@ 2015-12-21 18:33           ` Eric Wong
  2015-12-21 18:37             ` Måns Rullgård
  0 siblings, 1 reply; 18+ messages in thread
From: Eric Wong @ 2015-12-21 18:33 UTC (permalink / raw)
  To: Måns Rullgård; +Cc: sox-devel

Måns Rullgård <mans@mansr.com> wrote:
> Eric Wong <normalperson@yhbt.net> writes:
> >  dnl Checks for library functions.
> > -AC_CHECK_FUNCS(strcasecmp strdup popen vsnprintf gettimeofday mkstemp fmemopen aligned_alloc)
> > +AC_CHECK_FUNCS(strcasecmp strdup popen vsnprintf gettimeofday mkstemp fmemopen)
> > +
> > +dnl aligned alloc required for sdm using AVX (32-byte) or SSE2 (16-byte)
> > +AC_CHECK_FUNCS(aligned_alloc)
> > +AS_IF([test "x$ac_cv_func_aligned_alloc" != xyes], [
> > +	AC_CHECK_FUNCS([memalign])
> > +	AS_IF([test "x$ac_cv_func_memalign" != xyes], [
> > +		AC_CHECK_FUNCS([posix_memalign])
> > +	])
> > +])
> 
> Why don't you just add (posix_)memalign to the existing list of
> functions?  It's not an error if some of them don't exist, and you're
> checking the resulting HAVE_ macros in the same order anyway.

I wanted to avoid the overhead of checking redundantly if one
function was already available; but perhaps it's not worth the effort
since `configure' is already slow in other places.

> > +static inline void *sox_aligned_alloc_m(size_t align, size_t size)
> > +{
> > +  void *res = malloc(align + size + sizeof(void *));
> > +  char *aligned = (char *)res + align + sizeof(void *);
> > +
> > +  aligned -= ((size_t)aligned & (align - 1));
> 
> Use uintptr_t rather than size_t there.  Although they are usually the
> same underlying type, there is no such guarantee, especially for systems
> bizarre enough not to have an aligned allocation function.

I wanted to, but I figure systems missing *memalign could also
be missing uintptr_t and stdint.h
Perhaps "unsigned long" is better?

------------------------------------------------------------------------------
_______________________________________________
SoX-devel mailing list
SoX-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/sox-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/6] Add macros for increasing data alignment
  2015-12-21 18:33           ` Eric Wong
@ 2015-12-21 18:37             ` Måns Rullgård
  0 siblings, 0 replies; 18+ messages in thread
From: Måns Rullgård @ 2015-12-21 18:37 UTC (permalink / raw)
  To: Eric Wong; +Cc: sox-devel

Eric Wong <normalperson@yhbt.net> writes:

> Måns Rullgård <mans@mansr.com> wrote:
>> Eric Wong <normalperson@yhbt.net> writes:
>> >  dnl Checks for library functions.
>> > -AC_CHECK_FUNCS(strcasecmp strdup popen vsnprintf gettimeofday mkstemp fmemopen aligned_alloc)
>> > +AC_CHECK_FUNCS(strcasecmp strdup popen vsnprintf gettimeofday mkstemp fmemopen)
>> > +
>> > +dnl aligned alloc required for sdm using AVX (32-byte) or SSE2 (16-byte)
>> > +AC_CHECK_FUNCS(aligned_alloc)
>> > +AS_IF([test "x$ac_cv_func_aligned_alloc" != xyes], [
>> > +	AC_CHECK_FUNCS([memalign])
>> > +	AS_IF([test "x$ac_cv_func_memalign" != xyes], [
>> > +		AC_CHECK_FUNCS([posix_memalign])
>> > +	])
>> > +])
>> 
>> Why don't you just add (posix_)memalign to the existing list of
>> functions?  It's not an error if some of them don't exist, and you're
>> checking the resulting HAVE_ macros in the same order anyway.
>
> I wanted to avoid the overhead of checking redundantly if one
> function was already available; but perhaps it's not worth the effort
> since `configure' is already slow in other places.

The difference in time is barely noticeable on a modern system.

>> > +static inline void *sox_aligned_alloc_m(size_t align, size_t size)
>> > +{
>> > +  void *res = malloc(align + size + sizeof(void *));
>> > +  char *aligned = (char *)res + align + sizeof(void *);
>> > +
>> > +  aligned -= ((size_t)aligned & (align - 1));
>> 
>> Use uintptr_t rather than size_t there.  Although they are usually the
>> same underlying type, there is no such guarantee, especially for systems
>> bizarre enough not to have an aligned allocation function.
>
> I wanted to, but I figure systems missing *memalign could also
> be missing uintptr_t and stdint.h
> Perhaps "unsigned long" is better?

In that case size_t should be the safe choice.  It's more likely than
unsigned long to be the same size as a pointer.  Of course it doesn't
really matter what size it is since we only care about the low 5 bits,
but the compiler doesn't know that.

-- 
Måns Rullgård

------------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2015-12-21 18:38 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-09-16 14:16 [PATCH 1/6] Add SOX_ENCODING_DSD Mans Rullgard
2015-09-16 14:16 ` [PATCH 2/6] Add DSF file support Mans Rullgard
2015-09-16 14:16 ` [PATCH 3/6] Add support for reading DSDIFF files Mans Rullgard
2015-09-16 14:16 ` [PATCH 4/6] Add macros for increasing data alignment Mans Rullgard
2015-12-20 12:41   ` Eric Wong
2015-12-20 13:54     ` Måns Rullgård
2015-12-21 10:55       ` Eric Wong
2015-12-21 11:37         ` Måns Rullgård
2015-12-21 18:33           ` Eric Wong
2015-12-21 18:37             ` Måns Rullgård
2015-09-16 14:16 ` [PATCH 5/6] Add a sigma-delta modulator for DSD encoding Mans Rullgard
2015-10-03 22:31   ` Eric Wong
2015-10-03 22:39     ` Måns Rullgård
2015-09-16 14:16 ` [PATCH 6/6] Add DSD over PCM (dop) effect Mans Rullgard
2015-12-19 12:09   ` Eric Wong
2015-12-19 12:14     ` Måns Rullgård
2015-12-20  5:04       ` Eric Wong
2015-12-20 13:44         ` Måns Rullgård

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/sox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).