diff options
author | The Android Open Source Project <initial-contribution@android.com> | 2009-02-10 15:43:58 -0800 |
---|---|---|
committer | The Android Open Source Project <initial-contribution@android.com> | 2009-02-10 15:43:58 -0800 |
commit | 235397992afb6a676a075c8814388c4471bba6cf (patch) | |
tree | 0d83f1f0e6b25989a1f53a71ee7a3473c056291e | |
parent | 022cacc578c40632cc9f65efcbeff86bacdde635 (diff) | |
download | bluez-235397992afb6a676a075c8814388c4471bba6cf.tar.gz |
auto import from //branches/cupcake/...@130745
-rw-r--r-- | utils/audio/Android.mk | 7 | ||||
-rw-r--r-- | utils/audio/a2dp.c | 5 | ||||
-rw-r--r-- | utils/audio/control.c | 13 | ||||
-rw-r--r-- | utils/audio/liba2dp.c | 205 | ||||
-rw-r--r-- | utils/sbc/Makefile.am | 7 | ||||
-rw-r--r-- | utils/sbc/sbc.c | 375 | ||||
-rw-r--r-- | utils/sbc/sbc.h | 1 | ||||
-rw-r--r-- | utils/sbc/sbc_math.h | 2 | ||||
-rw-r--r-- | utils/sbc/sbc_primitives.c | 469 | ||||
-rw-r--r-- | utils/sbc/sbc_primitives.h | 74 | ||||
-rw-r--r-- | utils/sbc/sbc_primitives_mmx.c | 319 | ||||
-rw-r--r-- | utils/sbc/sbc_primitives_mmx.h | 40 | ||||
-rw-r--r-- | utils/sbc/sbc_primitives_neon.c | 245 | ||||
-rw-r--r-- | utils/sbc/sbc_primitives_neon.h | 40 | ||||
-rw-r--r-- | utils/sbc/sbc_tables.h | 436 | ||||
-rw-r--r-- | utils/sbc/sbcdec.c | 2 | ||||
-rw-r--r-- | utils/sbc/sbcenc.c | 132 | ||||
-rw-r--r-- | utils/sbc/sbcinfo.c | 5 |
18 files changed, 1879 insertions, 498 deletions
diff --git a/utils/audio/Android.mk b/utils/audio/Android.mk index 3dc2d40d..309cfa7b 100644 --- a/utils/audio/Android.mk +++ b/utils/audio/Android.mk @@ -20,6 +20,7 @@ LOCAL_CFLAGS:= \ -DVERSION=\"3.36\" \ -DSTORAGEDIR=\"/data/misc/hcid\" \ -DCONFIGDIR=\"/etc/bluez\" \ + -DANDROID \ -D__S_IFREG=0100000 # missing from bionic stat.h LOCAL_C_INCLUDES:= \ @@ -49,7 +50,11 @@ include $(CLEAR_VARS) LOCAL_SRC_FILES:= \ liba2dp.c \ ipc.c \ - ../sbc/sbc.c.arm + ../sbc/sbc.c.arm \ + ../sbc/sbc_primitives.c + +# to improve SBC performance +LOCAL_CFLAGS:= -funroll-loops LOCAL_C_INCLUDES:= \ $(call include-path-for, bluez-libs) \ diff --git a/utils/audio/a2dp.c b/utils/audio/a2dp.c index 3e89d6b1..67e17828 100644 --- a/utils/audio/a2dp.c +++ b/utils/audio/a2dp.c @@ -357,10 +357,14 @@ static gboolean sbc_getcap_ind(struct avdtp *session, struct avdtp_local_sep *se sbc_cap.cap.media_type = AVDTP_MEDIA_TYPE_AUDIO; sbc_cap.cap.media_codec_type = A2DP_CODEC_SBC; +#ifdef ANDROID + sbc_cap.frequency = SBC_SAMPLING_FREQ_44100; +#else sbc_cap.frequency = ( SBC_SAMPLING_FREQ_48000 | SBC_SAMPLING_FREQ_44100 | SBC_SAMPLING_FREQ_32000 | SBC_SAMPLING_FREQ_16000 ); +#endif sbc_cap.channel_mode = ( SBC_CHANNEL_MODE_JOINT_STEREO | SBC_CHANNEL_MODE_STEREO | @@ -1467,4 +1471,3 @@ gboolean a2dp_sep_unlock(struct a2dp_sep *sep, struct avdtp *session) return TRUE; } - diff --git a/utils/audio/control.c b/utils/audio/control.c index c16e79c8..4354493e 100644 --- a/utils/audio/control.c +++ b/utils/audio/control.c @@ -98,7 +98,9 @@ static DBusConnection *connection = NULL; static uint32_t tg_record_id = 0; +#ifndef ANDROID static uint32_t ct_record_id = 0; +#endif static GIOChannel *avctp_server = NULL; static gchar *input_device_name = NULL; @@ -179,7 +181,7 @@ static sdp_record_t *avrcp_ct_record() sdp_list_t *aproto, *proto[2]; sdp_record_t *record; sdp_data_t *psm, *version, *features; - uint16_t lp = AVCTP_PSM, ver = 0x0103, feat = 0x000f; + int16_t lp = AVCTP_PSM, ver = 0x0100, feat = 0x000f; record = sdp_record_alloc(); if (!record) @@ -242,7 +244,7 @@ static sdp_record_t *avrcp_tg_record() sdp_list_t *aproto, *proto[2]; sdp_record_t *record; sdp_data_t *psm, *version, *features; - uint16_t lp = AVCTP_PSM, ver = 0x0103, feat = 0x000f; + uint16_t lp = AVCTP_PSM, ver = 0x0100, feat = 0x000f; record = sdp_record_alloc(); if (!record) @@ -425,7 +427,7 @@ static void avctp_unref(struct avctp *session) if (session->io) g_source_remove(session->io); - if (session->dev) + if (session->dev && session->dev->control) session->dev->control->session = NULL; if (session->uinput >= 0) { @@ -857,6 +859,7 @@ int avrcp_init(DBusConnection *conn, GKeyFile *config) } tg_record_id = record->handle; +#ifndef ANDROID record = avrcp_ct_record(); if (!record) { error("Unable to allocate new service record"); @@ -869,6 +872,7 @@ int avrcp_init(DBusConnection *conn, GKeyFile *config) return -1; } ct_record_id = record->handle; +#endif avctp_server = avctp_server_socket(master); if (!avctp_server) @@ -886,9 +890,10 @@ void avrcp_exit(void) g_io_channel_unref(avctp_server); avctp_server = NULL; +#ifndef ANDROID remove_record_from_server(ct_record_id); ct_record_id = 0; - +#endif remove_record_from_server(tg_record_id); tg_record_id = 0; diff --git a/utils/audio/liba2dp.c b/utils/audio/liba2dp.c index 0b31cd2e..2b005127 100644 --- a/utils/audio/liba2dp.c +++ b/utils/audio/liba2dp.c @@ -76,8 +76,8 @@ #define ERR LOGE -/* Number of milliseconds worth of audio to buffer in our the data->stream.fd socket */ -#define SOCK_BUFFER_MS 50 +/* Number of packets to buffer in the stream socket */ +#define PACKET_BUFFER_COUNT 10 struct bluetooth_data { int link_mtu; /* MTU for selected transport channel */ @@ -97,36 +97,38 @@ struct bluetooth_data { uint16_t seq_num; /* Cumulative packet sequence */ int frame_count; /* Current frames in buffer*/ - int started; char address[20]; int rate; int channels; /* used for pacing our writes to the output socket */ - struct timeval last_write; - unsigned long last_duration; - - /* true if we already set the buffer size on the data->stream.fd socket */ - int adjusted_sock_buffer; + struct timeval next_write; }; -static int audioservice_send(int sk, const bt_audio_msg_header_t *msg); -static int audioservice_expect(int sk, bt_audio_msg_header_t *outmsg, +static int audioservice_send(struct bluetooth_data *data, const bt_audio_msg_header_t *msg); +static int audioservice_expect(struct bluetooth_data *data, bt_audio_msg_header_t *outmsg, int expected_type); static int bluetooth_a2dp_hw_params(struct bluetooth_data *data); -static void bluetooth_exit(struct bluetooth_data *data) +static void bluetooth_close(struct bluetooth_data *data) { - if (data->server.fd >= 0) + LOGD("bluetooth_close"); + if (data->server.fd >= 0) { bt_audio_service_close(data->server.fd); + data->server.fd = -1; + } - if (data->stream.fd >= 0) + if (data->stream.fd >= 0) { close(data->stream.fd); + data->stream.fd = -1; + } if (data->sbc_initialized) sbc_finish(&data->sbc); + + data->sbc_initialized = 0; } static int bluetooth_start(struct bluetooth_data *data) @@ -136,20 +138,17 @@ static int bluetooth_start(struct bluetooth_data *data) struct bt_streamstart_req *start_req = (void*) buf; bt_audio_rsp_msg_header_t *rsp_hdr = (void*) buf; struct bt_streamfd_ind *streamfd_ind = (void*) buf; - int opt_name, err; - int retry = 0; + int opt_name, err, bytes; -retry: /* send start */ memset(start_req, 0, BT_AUDIO_IPC_PACKET_SIZE); start_req->h.msg_type = BT_STREAMSTART_REQ; - err = audioservice_send(data->server.fd, &start_req->h); + err = audioservice_send(data, &start_req->h); if (err < 0) return err; - err = audioservice_expect(data->server.fd, &rsp_hdr->msg_h, - BT_STREAMSTART_RSP); + err = audioservice_expect(data, &rsp_hdr->msg_h, BT_STREAMSTART_RSP); if (err < 0) return err; @@ -157,40 +156,33 @@ retry: ERR("BT_START failed : %s(%d)", strerror(rsp_hdr->posix_errno), rsp_hdr->posix_errno); - - /* if the connection dropped, we may need to reset the configuration */ - if (!retry) { - retry = 1; - if (bluetooth_a2dp_hw_params(data) == 0) - goto retry; - } - return -rsp_hdr->posix_errno; } - err = audioservice_expect(data->server.fd, &streamfd_ind->h, - BT_STREAMFD_IND); + err = audioservice_expect(data, &streamfd_ind->h, BT_STREAMFD_IND); if (err < 0) return err; - if (data->stream.fd >= 0) { - close(data->stream.fd); - data->stream.fd = -1; - data->adjusted_sock_buffer = 0; - } - data->stream.fd = bt_audio_service_get_data_fd(data->server.fd); if (data->stream.fd < 0) { + LOGE("bt_audio_service_get_data_fd failed, errno: %d\n", errno); return -errno; } data->stream.events = POLLOUT; + /* set our socket buffer to the size of PACKET_BUFFER_COUNT packets */ + bytes = data->link_mtu * PACKET_BUFFER_COUNT; + setsockopt(data->stream.fd, SOL_SOCKET, SO_SNDBUF, &bytes, + sizeof(bytes)); + data->count = sizeof(struct rtp_header) + sizeof(struct rtp_payload); data->frame_count = 0; data->samples = 0; data->nsamples = 0; data->seq_num = 0; data->frame_count = 0; + data->next_write.tv_sec = 0; + data->next_write.tv_usec = 0; return 0; } @@ -202,23 +194,20 @@ static int bluetooth_stop(struct bluetooth_data *data) bt_audio_rsp_msg_header_t *rsp_hdr = (void*) buf; int err; - data->started = 0; - if (data->stream.fd >= 0) { close(data->stream.fd); - data->stream.fd = 0; + data->stream.fd = -1; } /* send stop request */ memset(stop_req, 0, BT_AUDIO_IPC_PACKET_SIZE); stop_req->h.msg_type = BT_STREAMSTOP_REQ; - err = audioservice_send(data->server.fd, &stop_req->h); + err = audioservice_send(data, &stop_req->h); if (err < 0) return err; - err = audioservice_expect(data->server.fd, &rsp_hdr->msg_h, - BT_STREAMSTOP_RSP); + err = audioservice_expect(data, &rsp_hdr->msg_h, BT_STREAMSTOP_RSP); if (err < 0) return err; @@ -510,12 +499,11 @@ static int bluetooth_a2dp_hw_params(struct bluetooth_data *data) DBG("\tmin_bitpool: %d\n", data->sbc_capabilities.min_bitpool); DBG("\tmax_bitpool: %d\n", data->sbc_capabilities.max_bitpool); - err = audioservice_send(data->server.fd, &setconf_req->h); + err = audioservice_send(data, &setconf_req->h); if (err < 0) return err; - err = audioservice_expect(data->server.fd, &rsp_hdr->msg_h, - BT_SETCONFIGURATION_RSP); + err = audioservice_expect(data, &rsp_hdr->msg_h, BT_SETCONFIGURATION_RSP); if (err < 0) return err; @@ -538,14 +526,13 @@ static int bluetooth_a2dp_hw_params(struct bluetooth_data *data) return 0; } -static int avdtp_write(struct bluetooth_data *data, unsigned long duration) +static int avdtp_write(struct bluetooth_data *data) { int ret = 0; struct rtp_header *header; struct rtp_payload *payload; - unsigned long delta; struct timeval now; - int microseconds, bytes; + long duration = data->frame_duration * data->frame_count; header = (struct rtp_header *)data->buffer; payload = (struct rtp_payload *)(data->buffer + sizeof(*header)); @@ -562,19 +549,32 @@ static int avdtp_write(struct bluetooth_data *data, unsigned long duration) data->stream.revents = 0; ret = poll(&data->stream, 1, -1); if (ret == 1 && data->stream.revents == POLLOUT) { + long ahead = 0; gettimeofday(&now, NULL); - if (data->last_write.tv_sec || data->last_write.tv_usec) { - delta = (now.tv_sec - data->last_write.tv_sec) * 1000000 + - now.tv_usec - data->last_write.tv_usec; - if (duration > delta) { - VDBG("duration: %ld delta: %ld, delay %ld us", - duration, delta, duration - delta); - usleep(duration - delta); + + if (data->next_write.tv_sec || data->next_write.tv_usec) { + ahead = (data->next_write.tv_sec - now.tv_sec) * 1000000 + + (data->next_write.tv_usec - now.tv_usec); + if (ahead > 0) { + /* too fast, need to throttle */ + usleep(ahead); } + } else { + data->next_write = now; } - data->last_write = now; - - ret = send(data->stream.fd, data->buffer, data->count, 0); + if (ahead < -duration * PACKET_BUFFER_COUNT) { + /* fallen too far behind, don't try to catch up */ + data->next_write.tv_sec = 0; + data->next_write.tv_usec = 0; + } else { + /* advance next_write by duration */ + data->next_write.tv_usec += duration; + data->next_write.tv_sec += + data->next_write.tv_usec / 1000000; + data->next_write.tv_usec %= 1000000; + } + + ret = send(data->stream.fd, data->buffer, data->count, MSG_NOSIGNAL); if (ret < 0) { ERR("send returned %d errno %s.", ret, strerror(errno)); ret = -errno; @@ -583,18 +583,6 @@ static int avdtp_write(struct bluetooth_data *data, unsigned long duration) ret = -errno; } - if (!data->adjusted_sock_buffer) { - /* microseconds: number of microseconds of audio for this write */ - microseconds = data->frame_duration * data->frame_count; - /* ret: number of bytes written */ - /* bytes: number of bytes corresponding to SOCK_BUFFER_MS milliseconds of audio playback */ - bytes = (ret * 1000 * SOCK_BUFFER_MS) / microseconds; - - VDBG("microseconds: %d, ret: %d, bytes: %d\n", microseconds, ret, bytes); - setsockopt(data->stream.fd, SOL_SOCKET, SO_SNDBUF, &bytes, sizeof(bytes)); - data->adjusted_sock_buffer = 1; - } - /* Reset buffer of data to send */ data->count = sizeof(struct rtp_header) + sizeof(struct rtp_payload); data->frame_count = 0; @@ -604,17 +592,20 @@ static int avdtp_write(struct bluetooth_data *data, unsigned long duration) return ret; } -static int audioservice_send(int sk, const bt_audio_msg_header_t *msg) +static int audioservice_send(struct bluetooth_data *data, + const bt_audio_msg_header_t *msg) { int err; VDBG("sending %s", bt_audio_strmsg(msg->msg_type)); - if (send(sk, msg, BT_AUDIO_IPC_PACKET_SIZE, 0) > 0) + if (send(data->server.fd, msg, BT_AUDIO_IPC_PACKET_SIZE, + MSG_NOSIGNAL) > 0) err = 0; else { err = -errno; ERR("Error sending data to audio service: %s(%d)", strerror(errno), errno); + bluetooth_close(data); } return err; @@ -646,10 +637,10 @@ static int audioservice_recv(int sk, bt_audio_msg_header_t *inmsg) return err; } -static int audioservice_expect(int sk, bt_audio_msg_header_t *rsp_hdr, - int expected_type) +static int audioservice_expect(struct bluetooth_data *data, + bt_audio_msg_header_t *rsp_hdr, int expected_type) { - int err = audioservice_recv(sk, rsp_hdr); + int err = audioservice_recv(data->server.fd, rsp_hdr); if (err == 0) { if (rsp_hdr->msg_type != expected_type) { err = -EINVAL; @@ -670,11 +661,7 @@ static int bluetooth_init(struct bluetooth_data *data) struct bt_getcapabilities_req *getcaps_req = (void*) buf; struct bt_getcapabilities_rsp *getcaps_rsp = (void*) buf; - memset(data, 0, sizeof(struct bluetooth_data)); - - data->server.fd = -1; - data->stream.fd = -1; - data->adjusted_sock_buffer = 0; + LOGD("bluetooth_init"); sk = bt_audio_service_open(); if (sk <= 0) { @@ -693,13 +680,13 @@ static int bluetooth_init(struct bluetooth_data *data) strncpy(getcaps_req->device, data->address, 18); getcaps_req->transport = BT_CAPABILITIES_TRANSPORT_A2DP; - err = audioservice_send(data->server.fd, &getcaps_req->h); + err = audioservice_send(data, &getcaps_req->h); if (err < 0) { ERR("audioservice_send failed for BT_GETCAPABILITIES_REQ\n"); goto failed; } - err = audioservice_expect(data->server.fd, &rsp_hdr->msg_h, BT_GETCAPABILITIES_RSP); + err = audioservice_expect(data, &rsp_hdr->msg_h, BT_GETCAPABILITIES_RSP); if (err < 0) { ERR("audioservice_expect failed for BT_GETCAPABILITIES_RSP\n"); goto failed; @@ -715,6 +702,12 @@ static int bluetooth_init(struct bluetooth_data *data) if (getcaps_rsp->transport == BT_CAPABILITIES_TRANSPORT_A2DP) data->sbc_capabilities = getcaps_rsp->sbc_capabilities; + err = bluetooth_a2dp_hw_params(data); + if (err < 0) { + ERR("bluetooth_a2dp_hw_params failed err: %d", err); + goto failed; + } + return 0; failed: @@ -728,32 +721,29 @@ int a2dp_init(const char* address, int rate, int channels, a2dpData* dataPtr) { int err; - DBG("a2dp_init"); + DBG("a2dp_init %s rate: %d channels: %d", address, rate, channels); *dataPtr = NULL; struct bluetooth_data* data = malloc(sizeof(struct bluetooth_data)); if (!data) return -1; - strncpy(data->address, address, 18); - - err = bluetooth_init(data); - if (err < 0) - goto error; + memset(data, 0, sizeof(struct bluetooth_data)); + data->server.fd = -1; + data->stream.fd = -1; + strncpy(data->address, address, 18); data->rate = rate; data->channels = channels; - err = bluetooth_a2dp_hw_params(data); - if (err < 0) { - ERR("bluetooth_a2dp_hw_params failed"); + err = bluetooth_init(data); + if (err < 0) goto error; - } *dataPtr = data; return 0; error: - bluetooth_exit(data); + bluetooth_close(data); free(data); return err; @@ -764,19 +754,23 @@ int a2dp_write(a2dpData d, const void* buffer, int count) struct bluetooth_data* data = (struct bluetooth_data*)d; uint8_t* src = (uint8_t *)buffer; int codesize = data->codesize; - long ret = 0; + int err, ret = 0; long frames_left = count; int encoded, written; const char *buff; - unsigned long duration = 0; - - if (!data->started) { - ret = bluetooth_start(data); - if (ret < 0) { - ERR("bluetooth_start failed"); - return ret; + + if (data->server.fd == -1) { + err = bluetooth_init(data); + if (err < 0) + return err; + } + + if (data->stream.fd == -1) { + err = bluetooth_start(data); + if (err < 0) { + ERR("bluetooth_start failed err: %d", err); + return err; } - data->started = 1; } while (frames_left >= codesize) { @@ -797,16 +791,15 @@ int a2dp_write(a2dpData d, const void* buffer, int count) data->frame_count++; data->samples += encoded; data->nsamples += encoded; - duration += data->frame_duration; /* No space left for another frame then send */ if (data->count + written >= data->link_mtu) { VDBG("sending packet %d, count %d, link_mtu %u", data->seq_num, data->count, data->link_mtu); - avdtp_write(data, data->last_duration); - data->last_duration = duration; - duration = 0; + err = avdtp_write(data); + if (err < 0) + return err; } ret += encoded; @@ -834,6 +827,6 @@ int a2dp_stop(a2dpData d) void a2dp_cleanup(a2dpData d) { struct bluetooth_data* data = (struct bluetooth_data*)d; - bluetooth_exit(data); + bluetooth_close(data); free(data); } diff --git a/utils/sbc/Makefile.am b/utils/sbc/Makefile.am index c42f1622..f8701641 100644 --- a/utils/sbc/Makefile.am +++ b/utils/sbc/Makefile.am @@ -8,9 +8,12 @@ endif if SBC noinst_LTLIBRARIES = libsbc.la -libsbc_la_SOURCES = sbc.h sbc.c sbc_math.h sbc_tables.h +libsbc_la_SOURCES = sbc.h sbc.c sbc_math.h sbc_tables.h \ + sbc_primitives.h sbc_primitives_mmx.h sbc_primitives_neon.h \ + sbc_primitives.c sbc_primitives_mmx.c sbc_primitives_neon.c -libsbc_la_CFLAGS = -finline-functions -funswitch-loops -fgcse-after-reload +libsbc_la_CFLAGS = -finline-functions -fgcse-after-reload \ + -funswitch-loops -funroll-loops noinst_PROGRAMS = sbcinfo sbcdec sbcenc $(sndfile_programs) diff --git a/utils/sbc/sbc.c b/utils/sbc/sbc.c index 651981fa..29258d05 100644 --- a/utils/sbc/sbc.c +++ b/utils/sbc/sbc.c @@ -46,6 +46,7 @@ #include "sbc_tables.h" #include "sbc.h" +#include "sbc_primitives.h" #define SBC_SYNCWORD 0x9C @@ -76,13 +77,16 @@ struct sbc_frame { uint8_t joint; /* only the lower 4 bits of every element are to be used */ - uint8_t scale_factor[2][8]; + uint32_t scale_factor[2][8]; /* raw integer subband samples in the frame */ + int32_t SBC_ALIGNED sb_sample_f[16][2][8]; - int32_t sb_sample_f[16][2][8]; - int32_t sb_sample[16][2][8]; /* modified subband samples */ - int16_t pcm_sample[2][16*8]; /* original pcm audio samples */ + /* modified subband samples */ + int32_t SBC_ALIGNED sb_sample[16][2][8]; + + /* original pcm audio samples */ + int16_t SBC_ALIGNED pcm_sample[2][16*8]; }; struct sbc_decoder_state { @@ -91,16 +95,6 @@ struct sbc_decoder_state { int offset[2][16]; }; -struct sbc_encoder_state { - int subbands; - int position[2]; - int16_t X[2][256]; - void (*sbc_analyze_4b_4s)(int16_t *pcm, int16_t *x, - int32_t *out, int out_stride); - void (*sbc_analyze_4b_8s)(int16_t *pcm, int16_t *x, - int32_t *out, int out_stride); -}; - /* * Calculates the CRC-8 of the first len bits in data */ @@ -368,7 +362,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) static int sbc_unpack_frame(const uint8_t *data, struct sbc_frame *frame, size_t len) { - int consumed; + unsigned int consumed; /* Will copy the parts of the header that are relevant to crc * calculation here */ uint8_t crc_header[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; @@ -653,180 +647,41 @@ static int sbc_synthesize_audio(struct sbc_decoder_state *state, } } -static inline void _sbc_analyze_four(const int16_t *in, int32_t *out) -{ - FIXED_A t1[4]; - FIXED_T t2[4]; - int i = 0, hop = 0; - - /* rounding coefficient */ - t1[0] = t1[1] = t1[2] = t1[3] = - (FIXED_A) 1 << (SBC_PROTO_FIXED4_SCALE - 1); - - /* low pass polyphase filter */ - for (hop = 0; hop < 40; hop += 8) { - t1[0] += (FIXED_A) in[hop] * _sbc_proto_fixed4[hop]; - t1[1] += (FIXED_A) in[hop + 1] * _sbc_proto_fixed4[hop + 1]; - t1[2] += (FIXED_A) in[hop + 2] * _sbc_proto_fixed4[hop + 2]; - t1[1] += (FIXED_A) in[hop + 3] * _sbc_proto_fixed4[hop + 3]; - t1[0] += (FIXED_A) in[hop + 4] * _sbc_proto_fixed4[hop + 4]; - t1[3] += (FIXED_A) in[hop + 5] * _sbc_proto_fixed4[hop + 5]; - t1[3] += (FIXED_A) in[hop + 7] * _sbc_proto_fixed4[hop + 7]; - } - - /* scaling */ - t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE; - t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE; - t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE; - t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE; - - /* do the cos transform */ - for (i = 0, hop = 0; i < 4; hop += 8, i++) { - out[i] = ((FIXED_A) t2[0] * cos_table_fixed_4[0 + hop] + - (FIXED_A) t2[1] * cos_table_fixed_4[1 + hop] + - (FIXED_A) t2[2] * cos_table_fixed_4[2 + hop] + - (FIXED_A) t2[3] * cos_table_fixed_4[5 + hop]) >> - (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); - } -} - -static void sbc_analyze_4b_4s(int16_t *pcm, int16_t *x, - int32_t *out, int out_stride) -{ - int i; - - /* Input 4 x 4 Audio Samples */ - for (i = 0; i < 16; i += 4) { - x[64 + i] = x[0 + i] = pcm[15 - i]; - x[65 + i] = x[1 + i] = pcm[14 - i]; - x[66 + i] = x[2 + i] = pcm[13 - i]; - x[67 + i] = x[3 + i] = pcm[12 - i]; - } - - /* Analyze four blocks */ - _sbc_analyze_four(x + 12, out); - out += out_stride; - _sbc_analyze_four(x + 8, out); - out += out_stride; - _sbc_analyze_four(x + 4, out); - out += out_stride; - _sbc_analyze_four(x, out); -} - -static inline void _sbc_analyze_eight(const int16_t *in, int32_t *out) -{ - FIXED_A t1[8]; - FIXED_T t2[8]; - int i, hop; - - /* rounding coefficient */ - t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = - (FIXED_A) 1 << (SBC_PROTO_FIXED8_SCALE-1); - - /* low pass polyphase filter */ - for (hop = 0; hop < 80; hop += 16) { - t1[0] += (FIXED_A) in[hop] * _sbc_proto_fixed8[hop]; - t1[1] += (FIXED_A) in[hop + 1] * _sbc_proto_fixed8[hop + 1]; - t1[2] += (FIXED_A) in[hop + 2] * _sbc_proto_fixed8[hop + 2]; - t1[3] += (FIXED_A) in[hop + 3] * _sbc_proto_fixed8[hop + 3]; - t1[4] += (FIXED_A) in[hop + 4] * _sbc_proto_fixed8[hop + 4]; - t1[3] += (FIXED_A) in[hop + 5] * _sbc_proto_fixed8[hop + 5]; - t1[2] += (FIXED_A) in[hop + 6] * _sbc_proto_fixed8[hop + 6]; - t1[1] += (FIXED_A) in[hop + 7] * _sbc_proto_fixed8[hop + 7]; - t1[0] += (FIXED_A) in[hop + 8] * _sbc_proto_fixed8[hop + 8]; - t1[5] += (FIXED_A) in[hop + 9] * _sbc_proto_fixed8[hop + 9]; - t1[6] += (FIXED_A) in[hop + 10] * _sbc_proto_fixed8[hop + 10]; - t1[7] += (FIXED_A) in[hop + 11] * _sbc_proto_fixed8[hop + 11]; - t1[7] += (FIXED_A) in[hop + 13] * _sbc_proto_fixed8[hop + 13]; - t1[6] += (FIXED_A) in[hop + 14] * _sbc_proto_fixed8[hop + 14]; - t1[5] += (FIXED_A) in[hop + 15] * _sbc_proto_fixed8[hop + 15]; - } - - /* scaling */ - t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE; - t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE; - t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE; - t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE; - t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE; - t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE; - t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE; - t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE; - - /* do the cos transform */ - for (i = 0, hop = 0; i < 8; hop += 16, i++) { - out[i] = ((FIXED_A) t2[0] * cos_table_fixed_8[0 + hop] + - (FIXED_A) t2[1] * cos_table_fixed_8[1 + hop] + - (FIXED_A) t2[2] * cos_table_fixed_8[2 + hop] + - (FIXED_A) t2[3] * cos_table_fixed_8[3 + hop] + - (FIXED_A) t2[4] * cos_table_fixed_8[4 + hop] + - (FIXED_A) t2[5] * cos_table_fixed_8[9 + hop] + - (FIXED_A) t2[6] * cos_table_fixed_8[10 + hop] + - (FIXED_A) t2[7] * cos_table_fixed_8[11 + hop]) >> - (SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS); - } -} - -static void sbc_analyze_4b_8s(int16_t *pcm, int16_t *x, - int32_t *out, int out_stride) -{ - int i; - - /* Input 4 x 8 Audio Samples */ - for (i = 0; i < 32; i += 8) { - x[128 + i] = x[0 + i] = pcm[31 - i]; - x[129 + i] = x[1 + i] = pcm[30 - i]; - x[130 + i] = x[2 + i] = pcm[29 - i]; - x[131 + i] = x[3 + i] = pcm[28 - i]; - x[132 + i] = x[4 + i] = pcm[27 - i]; - x[133 + i] = x[5 + i] = pcm[26 - i]; - x[134 + i] = x[6 + i] = pcm[25 - i]; - x[135 + i] = x[7 + i] = pcm[24 - i]; - } - - /* Analyze four blocks */ - _sbc_analyze_eight(x + 24, out); - out += out_stride; - _sbc_analyze_eight(x + 16, out); - out += out_stride; - _sbc_analyze_eight(x + 8, out); - out += out_stride; - _sbc_analyze_eight(x, out); -} - static int sbc_analyze_audio(struct sbc_encoder_state *state, struct sbc_frame *frame) { int ch, blk; + int16_t *x; switch (frame->subbands) { case 4: - for (ch = 0; ch < frame->channels; ch++) + for (ch = 0; ch < frame->channels; ch++) { + x = &state->X[ch][state->position - 16 + + frame->blocks * 4]; for (blk = 0; blk < frame->blocks; blk += 4) { state->sbc_analyze_4b_4s( - &frame->pcm_sample[ch][blk * 4], - &state->X[ch][state->position[ch]], + x, frame->sb_sample_f[blk][ch], frame->sb_sample_f[blk + 1][ch] - frame->sb_sample_f[blk][ch]); - state->position[ch] -= 16; - if (state->position[ch] < 0) - state->position[ch] = 64 - 16; + x -= 16; } + } return frame->blocks * 4; case 8: - for (ch = 0; ch < frame->channels; ch++) + for (ch = 0; ch < frame->channels; ch++) { + x = &state->X[ch][state->position - 32 + + frame->blocks * 8]; for (blk = 0; blk < frame->blocks; blk += 4) { state->sbc_analyze_4b_8s( - &frame->pcm_sample[ch][blk * 8], - &state->X[ch][state->position[ch]], + x, frame->sb_sample_f[blk][ch], frame->sb_sample_f[blk + 1][ch] - frame->sb_sample_f[blk][ch]); - state->position[ch] -= 32; - if (state->position[ch] < 0) - state->position[ch] = 128 - 32; + x -= 32; } + } return frame->blocks * 8; default: @@ -836,23 +691,31 @@ static int sbc_analyze_audio(struct sbc_encoder_state *state, /* Supplementary bitstream writing macros for 'sbc_pack_frame' */ -#define PUT_BITS(v, n)\ - bits_cache = (v) | (bits_cache << (n));\ - bits_count += (n);\ - if (bits_count >= 16) {\ - bits_count -= 8;\ - *data_ptr++ = (uint8_t) (bits_cache >> bits_count);\ - bits_count -= 8;\ - *data_ptr++ = (uint8_t) (bits_cache >> bits_count);\ - }\ - -#define FLUSH_BITS()\ - while (bits_count >= 8) {\ - bits_count -= 8;\ - *data_ptr++ = (uint8_t) (bits_cache >> bits_count);\ - }\ - if (bits_count > 0)\ - *data_ptr++ = (uint8_t) (bits_cache << (8 - bits_count));\ +#define PUT_BITS(data_ptr, bits_cache, bits_count, v, n) \ + do { \ + bits_cache = (v) | (bits_cache << (n)); \ + bits_count += (n); \ + if (bits_count >= 16) { \ + bits_count -= 8; \ + *data_ptr++ = (uint8_t) \ + (bits_cache >> bits_count); \ + bits_count -= 8; \ + *data_ptr++ = (uint8_t) \ + (bits_cache >> bits_count); \ + } \ + } while (0) + +#define FLUSH_BITS(data_ptr, bits_cache, bits_count) \ + do { \ + while (bits_count >= 8) { \ + bits_count -= 8; \ + *data_ptr++ = (uint8_t) \ + (bits_cache >> bits_count); \ + } \ + if (bits_count > 0) \ + *data_ptr++ = (uint8_t) \ + (bits_cache << (8 - bits_count)); \ + } while (0) /* * Packs the SBC frame from frame into the memory at data. At most len @@ -869,7 +732,9 @@ static int sbc_analyze_audio(struct sbc_encoder_state *state, * -99 not implemented */ -static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len) +static SBC_ALWAYS_INLINE int sbc_pack_frame_internal( + uint8_t *data, struct sbc_frame *frame, size_t len, + int frame_subbands, int frame_channels) { /* Bitstream writer starts from the fourth byte */ uint8_t *data_ptr = data + 4; @@ -887,8 +752,6 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len) uint32_t levels[2][8]; /* levels are derived from that */ uint32_t sb_sample_delta[2][8]; - u_int32_t scalefactor[2][8]; /* derived from frame->scale_factor */ - data[0] = SBC_SYNCWORD; data[1] = (frame->frequency & 0x03) << 6; @@ -899,7 +762,7 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len) data[1] |= (frame->allocation & 0x01) << 1; - switch (frame->subbands) { + switch (frame_subbands) { case 4: /* Nothing to do */ break; @@ -914,11 +777,11 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len) data[2] = frame->bitpool; if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) && - frame->bitpool > frame->subbands << 4) + frame->bitpool > frame_subbands << 4) return -5; if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) && - frame->bitpool > frame->subbands << 5) + frame->bitpool > frame_subbands << 5) return -5; /* Can't fill in crc yet */ @@ -927,36 +790,24 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len) crc_header[1] = data[2]; crc_pos = 16; - for (ch = 0; ch < frame->channels; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { - frame->scale_factor[ch][sb] = 0; - scalefactor[ch][sb] = 2 << SCALE_OUT_BITS; - for (blk = 0; blk < frame->blocks; blk++) { - while (scalefactor[ch][sb] < fabs(frame->sb_sample_f[blk][ch][sb])) { - frame->scale_factor[ch][sb]++; - scalefactor[ch][sb] *= 2; - } - } - } - } - if (frame->mode == JOINT_STEREO) { /* like frame->sb_sample but joint stereo */ int32_t sb_sample_j[16][2]; /* scalefactor and scale_factor in joint case */ - u_int32_t scalefactor_j[2]; + uint32_t scalefactor_j[2]; uint8_t scale_factor_j[2]; uint8_t joint = 0; frame->joint = 0; - for (sb = 0; sb < frame->subbands - 1; sb++) { + for (sb = 0; sb < frame_subbands - 1; sb++) { scale_factor_j[0] = 0; scalefactor_j[0] = 2 << SCALE_OUT_BITS; scale_factor_j[1] = 0; scalefactor_j[1] = 2 << SCALE_OUT_BITS; for (blk = 0; blk < frame->blocks; blk++) { + uint32_t tmp; /* Calculate joint stereo signal */ sb_sample_j[blk][0] = ASR(frame->sb_sample_f[blk][0][sb], 1) + @@ -966,11 +817,13 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len) ASR(frame->sb_sample_f[blk][1][sb], 1); /* calculate scale_factor_j and scalefactor_j for joint case */ - while (scalefactor_j[0] < fabs(sb_sample_j[blk][0])) { + tmp = fabs(sb_sample_j[blk][0]); + while (scalefactor_j[0] < tmp) { scale_factor_j[0]++; scalefactor_j[0] *= 2; } - while (scalefactor_j[1] < fabs(sb_sample_j[blk][1])) { + tmp = fabs(sb_sample_j[blk][1]); + while (scalefactor_j[1] < tmp) { scale_factor_j[1]++; scalefactor_j[1] *= 2; } @@ -982,7 +835,7 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len) (scale_factor_j[0] + scale_factor_j[1])) { /* use joint stereo for this subband */ - joint |= 1 << (frame->subbands - 1 - sb); + joint |= 1 << (frame_subbands - 1 - sb); frame->joint |= 1 << sb; frame->scale_factor[0][sb] = scale_factor_j[0]; frame->scale_factor[1][sb] = scale_factor_j[1]; @@ -995,14 +848,16 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len) } } - PUT_BITS(joint, frame->subbands); + PUT_BITS(data_ptr, bits_cache, bits_count, + joint, frame_subbands); crc_header[crc_pos >> 3] = joint; - crc_pos += frame->subbands; + crc_pos += frame_subbands; } - for (ch = 0; ch < frame->channels; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { - PUT_BITS(frame->scale_factor[ch][sb] & 0x0F, 4); + for (ch = 0; ch < frame_channels; ch++) { + for (sb = 0; sb < frame_subbands; sb++) { + PUT_BITS(data_ptr, bits_cache, bits_count, + frame->scale_factor[ch][sb] & 0x0F, 4); crc_header[crc_pos >> 3] <<= 4; crc_header[crc_pos >> 3] |= frame->scale_factor[ch][sb] & 0x0F; crc_pos += 4; @@ -1017,8 +872,8 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len) sbc_calculate_bits(frame, bits); - for (ch = 0; ch < frame->channels; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { + for (ch = 0; ch < frame_channels; ch++) { + for (sb = 0; sb < frame_subbands; sb++) { levels[ch][sb] = ((1 << bits[ch][sb]) - 1) << (32 - (frame->scale_factor[ch][sb] + SCALE_OUT_BITS + 2)); @@ -1029,8 +884,8 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len) } for (blk = 0; blk < frame->blocks; blk++) { - for (ch = 0; ch < frame->channels; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { + for (ch = 0; ch < frame_channels; ch++) { + for (sb = 0; sb < frame_subbands; sb++) { if (bits[ch][sb] == 0) continue; @@ -1039,33 +894,46 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len) (sb_sample_delta[ch][sb] + frame->sb_sample_f[blk][ch][sb])) >> 32; - PUT_BITS(audio_sample, bits[ch][sb]); + PUT_BITS(data_ptr, bits_cache, bits_count, + audio_sample, bits[ch][sb]); } } } - FLUSH_BITS(); + FLUSH_BITS(data_ptr, bits_cache, bits_count); return data_ptr - data; } +static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len) +{ + if (frame->subbands == 4) { + if (frame->channels == 1) + return sbc_pack_frame_internal(data, frame, len, 4, 1); + else + return sbc_pack_frame_internal(data, frame, len, 4, 2); + } else { + if (frame->channels == 1) + return sbc_pack_frame_internal(data, frame, len, 8, 1); + else + return sbc_pack_frame_internal(data, frame, len, 8, 2); + } +} + static void sbc_encoder_init(struct sbc_encoder_state *state, const struct sbc_frame *frame) { memset(&state->X, 0, sizeof(state->X)); - state->subbands = frame->subbands; - state->position[0] = state->position[1] = 12 * frame->subbands; + state->position = SBC_X_BUFFER_SIZE - frame->subbands * 9; - /* Default implementation for analyze function */ - state->sbc_analyze_4b_4s = sbc_analyze_4b_4s; - state->sbc_analyze_4b_8s = sbc_analyze_4b_8s; + sbc_init_primitives(state); } struct sbc_priv { int init; - struct sbc_frame frame; - struct sbc_decoder_state dec_state; - struct sbc_encoder_state enc_state; + struct SBC_ALIGNED sbc_frame frame; + struct SBC_ALIGNED sbc_decoder_state dec_state; + struct SBC_ALIGNED sbc_encoder_state enc_state; }; static void sbc_set_defaults(sbc_t *sbc, unsigned long flags) @@ -1091,10 +959,13 @@ int sbc_init(sbc_t *sbc, unsigned long flags) memset(sbc, 0, sizeof(sbc_t)); - sbc->priv = malloc(sizeof(struct sbc_priv)); - if (!sbc->priv) + sbc->priv_alloc_base = malloc(sizeof(struct sbc_priv) + SBC_ALIGN_MASK); + if (!sbc->priv_alloc_base) return -ENOMEM; + sbc->priv = (void *) (((uintptr_t) sbc->priv_alloc_base + + SBC_ALIGN_MASK) & ~((uintptr_t) SBC_ALIGN_MASK)); + memset(sbc->priv, 0, sizeof(struct sbc_priv)); sbc_set_defaults(sbc, flags); @@ -1177,8 +1048,10 @@ int sbc_encode(sbc_t *sbc, void *input, int input_len, void *output, int output_len, int *written) { struct sbc_priv *priv; - char *ptr; - int i, ch, framelen, samples; + int framelen, samples; + int (*sbc_enc_process_input)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); if (!sbc && !input) return -EIO; @@ -1213,22 +1086,34 @@ int sbc_encode(sbc_t *sbc, void *input, int input_len, void *output, if (!output || output_len < priv->frame.length) return -ENOSPC; - ptr = input; - - for (i = 0; i < priv->frame.subbands * priv->frame.blocks; i++) { - for (ch = 0; ch < priv->frame.channels; ch++) { - int16_t s; - if (sbc->endian == SBC_BE) - s = (ptr[0] & 0xff) << 8 | (ptr[1] & 0xff); - else - s = (ptr[0] & 0xff) | (ptr[1] & 0xff) << 8; - ptr += 2; - priv->frame.pcm_sample[ch][i] = s; - } + /* Select the needed input data processing function and call it */ + if (priv->frame.subbands == 8) { + if (sbc->endian == SBC_BE) + sbc_enc_process_input = + priv->enc_state.sbc_enc_process_input_8s_be; + else + sbc_enc_process_input = + priv->enc_state.sbc_enc_process_input_8s_le; + } else { + if (sbc->endian == SBC_BE) + sbc_enc_process_input = + priv->enc_state.sbc_enc_process_input_4s_be; + else + sbc_enc_process_input = + priv->enc_state.sbc_enc_process_input_4s_le; } + priv->enc_state.position = sbc_enc_process_input( + priv->enc_state.position, (const uint8_t *) input, + priv->enc_state.X, priv->frame.subbands * priv->frame.blocks, + priv->frame.channels); + samples = sbc_analyze_audio(&priv->enc_state, &priv->frame); + priv->enc_state.sbc_calc_scalefactors( + priv->frame.sb_sample_f, priv->frame.scale_factor, + priv->frame.blocks, priv->frame.channels, priv->frame.subbands); + framelen = sbc_pack_frame(output, &priv->frame, output_len); if (written) @@ -1242,8 +1127,8 @@ void sbc_finish(sbc_t *sbc) if (!sbc) return; - if (sbc->priv) - free(sbc->priv); + if (sbc->priv_alloc_base) + free(sbc->priv_alloc_base); memset(sbc, 0, sizeof(sbc_t)); } diff --git a/utils/sbc/sbc.h b/utils/sbc/sbc.h index 8ac59309..b0a14888 100644 --- a/utils/sbc/sbc.h +++ b/utils/sbc/sbc.h @@ -74,6 +74,7 @@ struct sbc_struct { uint8_t endian; void *priv; + void *priv_alloc_base; }; typedef struct sbc_struct sbc_t; diff --git a/utils/sbc/sbc_math.h b/utils/sbc/sbc_math.h index 6ca4f526..b87bc81c 100644 --- a/utils/sbc/sbc_math.h +++ b/utils/sbc/sbc_math.h @@ -29,8 +29,6 @@ #define ASR(val, bits) ((-2 >> 1 == -1) ? \ ((int32_t)(val)) >> (bits) : ((int32_t) (val)) / (1 << (bits))) -#define SCALE_OUT_BITS 15 - #define SCALE_SPROTO4_TBL 12 #define SCALE_SPROTO8_TBL 14 #define SCALE_NPROTO4_TBL 11 diff --git a/utils/sbc/sbc_primitives.c b/utils/sbc/sbc_primitives.c new file mode 100644 index 00000000..303f3fee --- /dev/null +++ b/utils/sbc/sbc_primitives.c @@ -0,0 +1,469 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann <marcel@holtmann.org> + * Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch> + * Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com> + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <stdint.h> +#include <limits.h> +#include <string.h> +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives.h" +#include "sbc_primitives_mmx.h" +#include "sbc_primitives_neon.h" + +/* + * A reference C code of analysis filter with SIMD-friendly tables + * reordering and code layout. This code can be used to develop platform + * specific SIMD optimizations. Also it may be used as some kind of test + * for compiler autovectorization capabilities (who knows, if the compiler + * is very good at this stuff, hand optimized assembly may be not strictly + * needed for some platform). + * + * Note: It is also possible to make a simple variant of analysis filter, + * which needs only a single constants table without taking care about + * even/odd cases. This simple variant of filter can be implemented without + * input data permutation. The only thing that would be lost is the + * possibility to use pairwise SIMD multiplications. But for some simple + * CPU cores without SIMD extensions it can be useful. If anybody is + * interested in implementing such variant of a filter, sourcecode from + * bluez versions 4.26/4.27 can be used as a reference and the history of + * the changes in git repository done around that time may be worth checking. + */ + +static inline void sbc_analyze_four_simd(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + FIXED_A t1[4]; + FIXED_T t2[4]; + int hop = 0; + + /* rounding coefficient */ + t1[0] = t1[1] = t1[2] = t1[3] = + (FIXED_A) 1 << (SBC_PROTO_FIXED4_SCALE - 1); + + /* low pass polyphase filter */ + for (hop = 0; hop < 40; hop += 8) { + t1[0] += (FIXED_A) in[hop] * consts[hop]; + t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; + t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2]; + t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3]; + t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4]; + t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5]; + t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; + t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; + } + + /* scaling */ + t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE; + t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE; + t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE; + t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE; + + /* do the cos transform */ + t1[0] = (FIXED_A) t2[0] * consts[40 + 0]; + t1[0] += (FIXED_A) t2[1] * consts[40 + 1]; + t1[1] = (FIXED_A) t2[0] * consts[40 + 2]; + t1[1] += (FIXED_A) t2[1] * consts[40 + 3]; + t1[2] = (FIXED_A) t2[0] * consts[40 + 4]; + t1[2] += (FIXED_A) t2[1] * consts[40 + 5]; + t1[3] = (FIXED_A) t2[0] * consts[40 + 6]; + t1[3] += (FIXED_A) t2[1] * consts[40 + 7]; + + t1[0] += (FIXED_A) t2[2] * consts[40 + 8]; + t1[0] += (FIXED_A) t2[3] * consts[40 + 9]; + t1[1] += (FIXED_A) t2[2] * consts[40 + 10]; + t1[1] += (FIXED_A) t2[3] * consts[40 + 11]; + t1[2] += (FIXED_A) t2[2] * consts[40 + 12]; + t1[2] += (FIXED_A) t2[3] * consts[40 + 13]; + t1[3] += (FIXED_A) t2[2] * consts[40 + 14]; + t1[3] += (FIXED_A) t2[3] * consts[40 + 15]; + + out[0] = t1[0] >> + (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); + out[1] = t1[1] >> + (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); + out[2] = t1[2] >> + (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); + out[3] = t1[3] >> + (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); +} + +static inline void sbc_analyze_eight_simd(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + FIXED_A t1[8]; + FIXED_T t2[8]; + int i, hop; + + /* rounding coefficient */ + t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = + (FIXED_A) 1 << (SBC_PROTO_FIXED8_SCALE-1); + + /* low pass polyphase filter */ + for (hop = 0; hop < 80; hop += 16) { + t1[0] += (FIXED_A) in[hop] * consts[hop]; + t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; + t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2]; + t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3]; + t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4]; + t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5]; + t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; + t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; + t1[4] += (FIXED_A) in[hop + 8] * consts[hop + 8]; + t1[4] += (FIXED_A) in[hop + 9] * consts[hop + 9]; + t1[5] += (FIXED_A) in[hop + 10] * consts[hop + 10]; + t1[5] += (FIXED_A) in[hop + 11] * consts[hop + 11]; + t1[6] += (FIXED_A) in[hop + 12] * consts[hop + 12]; + t1[6] += (FIXED_A) in[hop + 13] * consts[hop + 13]; + t1[7] += (FIXED_A) in[hop + 14] * consts[hop + 14]; + t1[7] += (FIXED_A) in[hop + 15] * consts[hop + 15]; + } + + /* scaling */ + t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE; + t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE; + t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE; + t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE; + t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE; + t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE; + t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE; + t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE; + + + /* do the cos transform */ + t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 0; + + for (i = 0; i < 4; i++) { + t1[0] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 0]; + t1[0] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 1]; + t1[1] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 2]; + t1[1] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 3]; + t1[2] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 4]; + t1[2] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 5]; + t1[3] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 6]; + t1[3] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 7]; + t1[4] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 8]; + t1[4] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 9]; + t1[5] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 10]; + t1[5] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 11]; + t1[6] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 12]; + t1[6] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 13]; + t1[7] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 14]; + t1[7] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 15]; + } + + for (i = 0; i < 8; i++) + out[i] = t1[i] >> + (SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS); +} + +static inline void sbc_analyze_4b_4s_simd(int16_t *x, + int32_t *out, int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_four_simd(x + 12, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_simd(x + 8, out, analysis_consts_fixed4_simd_even); + out += out_stride; + sbc_analyze_four_simd(x + 4, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_simd(x + 0, out, analysis_consts_fixed4_simd_even); +} + +static inline void sbc_analyze_4b_8s_simd(int16_t *x, + int32_t *out, int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_eight_simd(x + 24, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_simd(x + 16, out, analysis_consts_fixed8_simd_even); + out += out_stride; + sbc_analyze_eight_simd(x + 8, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_simd(x + 0, out, analysis_consts_fixed8_simd_even); +} + +static inline int16_t unaligned16_be(const uint8_t *ptr) +{ + return (int16_t) ((ptr[0] << 8) | ptr[1]); +} + +static inline int16_t unaligned16_le(const uint8_t *ptr) +{ + return (int16_t) (ptr[0] | (ptr[1] << 8)); +} + +/* + * Internal helper functions for input data processing. In order to get + * optimal performance, it is important to have "nsamples", "nchannels" + * and "big_endian" arguments used with this inline function as compile + * time constants. + */ + +static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s4_internal( + int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels, int big_endian) +{ + /* handle X buffer wraparound */ + if (position < nsamples) { + if (nchannels > 0) + memcpy(&X[0][SBC_X_BUFFER_SIZE - 36], &X[0][position], + 36 * sizeof(int16_t)); + if (nchannels > 1) + memcpy(&X[1][SBC_X_BUFFER_SIZE - 36], &X[1][position], + 36 * sizeof(int16_t)); + position = SBC_X_BUFFER_SIZE - 36; + } + + #define PCM(i) (big_endian ? \ + unaligned16_be(pcm + (i) * 2) : unaligned16_le(pcm + (i) * 2)) + + /* copy/permutate audio samples */ + while ((nsamples -= 8) >= 0) { + position -= 8; + if (nchannels > 0) { + int16_t *x = &X[0][position]; + x[0] = PCM(0 + 7 * nchannels); + x[1] = PCM(0 + 3 * nchannels); + x[2] = PCM(0 + 6 * nchannels); + x[3] = PCM(0 + 4 * nchannels); + x[4] = PCM(0 + 0 * nchannels); + x[5] = PCM(0 + 2 * nchannels); + x[6] = PCM(0 + 1 * nchannels); + x[7] = PCM(0 + 5 * nchannels); + } + if (nchannels > 1) { + int16_t *x = &X[1][position]; + x[0] = PCM(1 + 7 * nchannels); + x[1] = PCM(1 + 3 * nchannels); + x[2] = PCM(1 + 6 * nchannels); + x[3] = PCM(1 + 4 * nchannels); + x[4] = PCM(1 + 0 * nchannels); + x[5] = PCM(1 + 2 * nchannels); + x[6] = PCM(1 + 1 * nchannels); + x[7] = PCM(1 + 5 * nchannels); + } + pcm += 16 * nchannels; + } + #undef PCM + + return position; +} + +static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s8_internal( + int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels, int big_endian) +{ + /* handle X buffer wraparound */ + if (position < nsamples) { + if (nchannels > 0) + memcpy(&X[0][SBC_X_BUFFER_SIZE - 72], &X[0][position], + 72 * sizeof(int16_t)); + if (nchannels > 1) + memcpy(&X[1][SBC_X_BUFFER_SIZE - 72], &X[1][position], + 72 * sizeof(int16_t)); + position = SBC_X_BUFFER_SIZE - 72; + } + + #define PCM(i) (big_endian ? \ + unaligned16_be(pcm + (i) * 2) : unaligned16_le(pcm + (i) * 2)) + + /* copy/permutate audio samples */ + while ((nsamples -= 16) >= 0) { + position -= 16; + if (nchannels > 0) { + int16_t *x = &X[0][position]; + x[0] = PCM(0 + 15 * nchannels); + x[1] = PCM(0 + 7 * nchannels); + x[2] = PCM(0 + 14 * nchannels); + x[3] = PCM(0 + 8 * nchannels); + x[4] = PCM(0 + 13 * nchannels); + x[5] = PCM(0 + 9 * nchannels); + x[6] = PCM(0 + 12 * nchannels); + x[7] = PCM(0 + 10 * nchannels); + x[8] = PCM(0 + 11 * nchannels); + x[9] = PCM(0 + 3 * nchannels); + x[10] = PCM(0 + 6 * nchannels); + x[11] = PCM(0 + 0 * nchannels); + x[12] = PCM(0 + 5 * nchannels); + x[13] = PCM(0 + 1 * nchannels); + x[14] = PCM(0 + 4 * nchannels); + x[15] = PCM(0 + 2 * nchannels); + } + if (nchannels > 1) { + int16_t *x = &X[1][position]; + x[0] = PCM(1 + 15 * nchannels); + x[1] = PCM(1 + 7 * nchannels); + x[2] = PCM(1 + 14 * nchannels); + x[3] = PCM(1 + 8 * nchannels); + x[4] = PCM(1 + 13 * nchannels); + x[5] = PCM(1 + 9 * nchannels); + x[6] = PCM(1 + 12 * nchannels); + x[7] = PCM(1 + 10 * nchannels); + x[8] = PCM(1 + 11 * nchannels); + x[9] = PCM(1 + 3 * nchannels); + x[10] = PCM(1 + 6 * nchannels); + x[11] = PCM(1 + 0 * nchannels); + x[12] = PCM(1 + 5 * nchannels); + x[13] = PCM(1 + 1 * nchannels); + x[14] = PCM(1 + 4 * nchannels); + x[15] = PCM(1 + 2 * nchannels); + } + pcm += 32 * nchannels; + } + #undef PCM + + return position; +} + +/* + * Input data processing functions. The data is endian converted if needed, + * channels are deintrleaved and audio samples are reordered for use in + * SIMD-friendly analysis filter function. The results are put into "X" + * array, getting appended to the previous data (or it is better to say + * prepended, as the buffer is filled from top to bottom). Old data is + * discarded when neededed, but availability of (10 * nrof_subbands) + * contiguous samples is always guaranteed for the input to the analysis + * filter. This is achieved by copying a sufficient part of old data + * to the top of the buffer on buffer wraparound. + */ + +static int sbc_enc_process_input_4s_le(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + if (nchannels > 1) + return sbc_encoder_process_input_s4_internal( + position, pcm, X, nsamples, 2, 0); + else + return sbc_encoder_process_input_s4_internal( + position, pcm, X, nsamples, 1, 0); +} + +static int sbc_enc_process_input_4s_be(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + if (nchannels > 1) + return sbc_encoder_process_input_s4_internal( + position, pcm, X, nsamples, 2, 1); + else + return sbc_encoder_process_input_s4_internal( + position, pcm, X, nsamples, 1, 1); +} + +static int sbc_enc_process_input_8s_le(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + if (nchannels > 1) + return sbc_encoder_process_input_s8_internal( + position, pcm, X, nsamples, 2, 0); + else + return sbc_encoder_process_input_s8_internal( + position, pcm, X, nsamples, 1, 0); +} + +static int sbc_enc_process_input_8s_be(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + if (nchannels > 1) + return sbc_encoder_process_input_s8_internal( + position, pcm, X, nsamples, 2, 1); + else + return sbc_encoder_process_input_s8_internal( + position, pcm, X, nsamples, 1, 1); +} + +/* Supplementary function to count the number of leading zeros */ + +static inline int sbc_clz(uint32_t x) +{ +#ifdef __GNUC__ + return __builtin_clz(x); +#else + /* TODO: this should be replaced with something better if good + * performance is wanted when using compilers other than gcc */ + int cnt = 0; + while (x) { + cnt++; + x >>= 1; + } + return 32 - cnt; +#endif +} + +static void sbc_calc_scalefactors( + int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int channels, int subbands) +{ + int ch, sb, blk; + for (ch = 0; ch < channels; ch++) { + for (sb = 0; sb < subbands; sb++) { + uint32_t x = 1 << SCALE_OUT_BITS; + for (blk = 0; blk < blocks; blk++) { + int32_t tmp = fabs(sb_sample_f[blk][ch][sb]); + if (tmp != 0) + x |= tmp - 1; + } + scale_factor[ch][sb] = (31 - SCALE_OUT_BITS) - + sbc_clz(x); + } + } +} + +/* + * Detect CPU features and setup function pointers + */ +void sbc_init_primitives(struct sbc_encoder_state *state) +{ + /* Default implementation for analyze functions */ + state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_simd; + state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_simd; + + /* Default implementation for input reordering / deinterleaving */ + state->sbc_enc_process_input_4s_le = sbc_enc_process_input_4s_le; + state->sbc_enc_process_input_4s_be = sbc_enc_process_input_4s_be; + state->sbc_enc_process_input_8s_le = sbc_enc_process_input_8s_le; + state->sbc_enc_process_input_8s_be = sbc_enc_process_input_8s_be; + + /* Default implementation for scale factors calculation */ + state->sbc_calc_scalefactors = sbc_calc_scalefactors; + + /* X86/AMD64 optimizations */ +#ifdef SBC_BUILD_WITH_MMX_SUPPORT + sbc_init_primitives_mmx(state); +#endif + + /* ARM optimizations */ +#ifdef SBC_BUILD_WITH_NEON_SUPPORT + sbc_init_primitives_neon(state); +#endif +} diff --git a/utils/sbc/sbc_primitives.h b/utils/sbc/sbc_primitives.h new file mode 100644 index 00000000..2708c829 --- /dev/null +++ b/utils/sbc/sbc_primitives.h @@ -0,0 +1,74 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann <marcel@holtmann.org> + * Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch> + * Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com> + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_PRIMITIVES_H +#define __SBC_PRIMITIVES_H + +#define SCALE_OUT_BITS 15 +#define SBC_X_BUFFER_SIZE 328 + +#ifdef __GNUC__ +#define SBC_ALWAYS_INLINE __attribute__((always_inline)) +#else +#define SBC_ALWAYS_INLINE inline +#endif + +struct sbc_encoder_state { + int position; + int16_t SBC_ALIGNED X[2][SBC_X_BUFFER_SIZE]; + /* Polyphase analysis filter for 4 subbands configuration, + * it handles 4 blocks at once */ + void (*sbc_analyze_4b_4s)(int16_t *x, int32_t *out, int out_stride); + /* Polyphase analysis filter for 8 subbands configuration, + * it handles 4 blocks at once */ + void (*sbc_analyze_4b_8s)(int16_t *x, int32_t *out, int out_stride); + /* Process input data (deinterleave, endian conversion, reordering), + * depending on the number of subbands and input data byte order */ + int (*sbc_enc_process_input_4s_le)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); + int (*sbc_enc_process_input_4s_be)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); + int (*sbc_enc_process_input_8s_le)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); + int (*sbc_enc_process_input_8s_be)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); + /* Scale factors calculation */ + void (*sbc_calc_scalefactors)(int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int channels, int subbands); +}; + +/* + * Initialize pointers to the functions which are the basic "building bricks" + * of SBC codec. Best implementation is selected based on target CPU + * capabilities. + */ +void sbc_init_primitives(struct sbc_encoder_state *encoder_state); + +#endif diff --git a/utils/sbc/sbc_primitives_mmx.c b/utils/sbc/sbc_primitives_mmx.c new file mode 100644 index 00000000..1870a9ba --- /dev/null +++ b/utils/sbc/sbc_primitives_mmx.c @@ -0,0 +1,319 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann <marcel@holtmann.org> + * Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch> + * Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com> + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <stdint.h> +#include <limits.h> +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives_mmx.h" + +/* + * MMX optimizations + */ + +#ifdef SBC_BUILD_WITH_MMX_SUPPORT + +static inline void sbc_analyze_four_mmx(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + static const SBC_ALIGNED int32_t round_c[2] = { + 1 << (SBC_PROTO_FIXED4_SCALE - 1), + 1 << (SBC_PROTO_FIXED4_SCALE - 1), + }; + asm volatile ( + "movq (%0), %%mm0\n" + "movq 8(%0), %%mm1\n" + "pmaddwd (%1), %%mm0\n" + "pmaddwd 8(%1), %%mm1\n" + "paddd (%2), %%mm0\n" + "paddd (%2), %%mm1\n" + "\n" + "movq 16(%0), %%mm2\n" + "movq 24(%0), %%mm3\n" + "pmaddwd 16(%1), %%mm2\n" + "pmaddwd 24(%1), %%mm3\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm3, %%mm1\n" + "\n" + "movq 32(%0), %%mm2\n" + "movq 40(%0), %%mm3\n" + "pmaddwd 32(%1), %%mm2\n" + "pmaddwd 40(%1), %%mm3\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm3, %%mm1\n" + "\n" + "movq 48(%0), %%mm2\n" + "movq 56(%0), %%mm3\n" + "pmaddwd 48(%1), %%mm2\n" + "pmaddwd 56(%1), %%mm3\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm3, %%mm1\n" + "\n" + "movq 64(%0), %%mm2\n" + "movq 72(%0), %%mm3\n" + "pmaddwd 64(%1), %%mm2\n" + "pmaddwd 72(%1), %%mm3\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm3, %%mm1\n" + "\n" + "psrad %4, %%mm0\n" + "psrad %4, %%mm1\n" + "packssdw %%mm0, %%mm0\n" + "packssdw %%mm1, %%mm1\n" + "\n" + "movq %%mm0, %%mm2\n" + "pmaddwd 80(%1), %%mm0\n" + "pmaddwd 88(%1), %%mm2\n" + "\n" + "movq %%mm1, %%mm3\n" + "pmaddwd 96(%1), %%mm1\n" + "pmaddwd 104(%1), %%mm3\n" + "paddd %%mm1, %%mm0\n" + "paddd %%mm3, %%mm2\n" + "\n" + "movq %%mm0, (%3)\n" + "movq %%mm2, 8(%3)\n" + : + : "r" (in), "r" (consts), "r" (&round_c), "r" (out), + "i" (SBC_PROTO_FIXED4_SCALE) + : "memory"); +} + +static inline void sbc_analyze_eight_mmx(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + static const SBC_ALIGNED int32_t round_c[2] = { + 1 << (SBC_PROTO_FIXED8_SCALE - 1), + 1 << (SBC_PROTO_FIXED8_SCALE - 1), + }; + asm volatile ( + "movq (%0), %%mm0\n" + "movq 8(%0), %%mm1\n" + "movq 16(%0), %%mm2\n" + "movq 24(%0), %%mm3\n" + "pmaddwd (%1), %%mm0\n" + "pmaddwd 8(%1), %%mm1\n" + "pmaddwd 16(%1), %%mm2\n" + "pmaddwd 24(%1), %%mm3\n" + "paddd (%2), %%mm0\n" + "paddd (%2), %%mm1\n" + "paddd (%2), %%mm2\n" + "paddd (%2), %%mm3\n" + "\n" + "movq 32(%0), %%mm4\n" + "movq 40(%0), %%mm5\n" + "movq 48(%0), %%mm6\n" + "movq 56(%0), %%mm7\n" + "pmaddwd 32(%1), %%mm4\n" + "pmaddwd 40(%1), %%mm5\n" + "pmaddwd 48(%1), %%mm6\n" + "pmaddwd 56(%1), %%mm7\n" + "paddd %%mm4, %%mm0\n" + "paddd %%mm5, %%mm1\n" + "paddd %%mm6, %%mm2\n" + "paddd %%mm7, %%mm3\n" + "\n" + "movq 64(%0), %%mm4\n" + "movq 72(%0), %%mm5\n" + "movq 80(%0), %%mm6\n" + "movq 88(%0), %%mm7\n" + "pmaddwd 64(%1), %%mm4\n" + "pmaddwd 72(%1), %%mm5\n" + "pmaddwd 80(%1), %%mm6\n" + "pmaddwd 88(%1), %%mm7\n" + "paddd %%mm4, %%mm0\n" + "paddd %%mm5, %%mm1\n" + "paddd %%mm6, %%mm2\n" + "paddd %%mm7, %%mm3\n" + "\n" + "movq 96(%0), %%mm4\n" + "movq 104(%0), %%mm5\n" + "movq 112(%0), %%mm6\n" + "movq 120(%0), %%mm7\n" + "pmaddwd 96(%1), %%mm4\n" + "pmaddwd 104(%1), %%mm5\n" + "pmaddwd 112(%1), %%mm6\n" + "pmaddwd 120(%1), %%mm7\n" + "paddd %%mm4, %%mm0\n" + "paddd %%mm5, %%mm1\n" + "paddd %%mm6, %%mm2\n" + "paddd %%mm7, %%mm3\n" + "\n" + "movq 128(%0), %%mm4\n" + "movq 136(%0), %%mm5\n" + "movq 144(%0), %%mm6\n" + "movq 152(%0), %%mm7\n" + "pmaddwd 128(%1), %%mm4\n" + "pmaddwd 136(%1), %%mm5\n" + "pmaddwd 144(%1), %%mm6\n" + "pmaddwd 152(%1), %%mm7\n" + "paddd %%mm4, %%mm0\n" + "paddd %%mm5, %%mm1\n" + "paddd %%mm6, %%mm2\n" + "paddd %%mm7, %%mm3\n" + "\n" + "psrad %4, %%mm0\n" + "psrad %4, %%mm1\n" + "psrad %4, %%mm2\n" + "psrad %4, %%mm3\n" + "\n" + "packssdw %%mm0, %%mm0\n" + "packssdw %%mm1, %%mm1\n" + "packssdw %%mm2, %%mm2\n" + "packssdw %%mm3, %%mm3\n" + "\n" + "movq %%mm0, %%mm4\n" + "movq %%mm0, %%mm5\n" + "pmaddwd 160(%1), %%mm4\n" + "pmaddwd 168(%1), %%mm5\n" + "\n" + "movq %%mm1, %%mm6\n" + "movq %%mm1, %%mm7\n" + "pmaddwd 192(%1), %%mm6\n" + "pmaddwd 200(%1), %%mm7\n" + "paddd %%mm6, %%mm4\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm2, %%mm6\n" + "movq %%mm2, %%mm7\n" + "pmaddwd 224(%1), %%mm6\n" + "pmaddwd 232(%1), %%mm7\n" + "paddd %%mm6, %%mm4\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm3, %%mm6\n" + "movq %%mm3, %%mm7\n" + "pmaddwd 256(%1), %%mm6\n" + "pmaddwd 264(%1), %%mm7\n" + "paddd %%mm6, %%mm4\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm4, (%3)\n" + "movq %%mm5, 8(%3)\n" + "\n" + "movq %%mm0, %%mm5\n" + "pmaddwd 176(%1), %%mm0\n" + "pmaddwd 184(%1), %%mm5\n" + "\n" + "movq %%mm1, %%mm7\n" + "pmaddwd 208(%1), %%mm1\n" + "pmaddwd 216(%1), %%mm7\n" + "paddd %%mm1, %%mm0\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm2, %%mm7\n" + "pmaddwd 240(%1), %%mm2\n" + "pmaddwd 248(%1), %%mm7\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm3, %%mm7\n" + "pmaddwd 272(%1), %%mm3\n" + "pmaddwd 280(%1), %%mm7\n" + "paddd %%mm3, %%mm0\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm0, 16(%3)\n" + "movq %%mm5, 24(%3)\n" + : + : "r" (in), "r" (consts), "r" (&round_c), "r" (out), + "i" (SBC_PROTO_FIXED8_SCALE) + : "memory"); +} + +static inline void sbc_analyze_4b_4s_mmx(int16_t *x, int32_t *out, + int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_four_mmx(x + 12, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_mmx(x + 8, out, analysis_consts_fixed4_simd_even); + out += out_stride; + sbc_analyze_four_mmx(x + 4, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_mmx(x + 0, out, analysis_consts_fixed4_simd_even); + + asm volatile ("emms\n"); +} + +static inline void sbc_analyze_4b_8s_mmx(int16_t *x, int32_t *out, + int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_eight_mmx(x + 24, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_mmx(x + 16, out, analysis_consts_fixed8_simd_even); + out += out_stride; + sbc_analyze_eight_mmx(x + 8, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_mmx(x + 0, out, analysis_consts_fixed8_simd_even); + + asm volatile ("emms\n"); +} + +static int check_mmx_support(void) +{ +#ifdef __amd64__ + return 1; /* We assume that all 64-bit processors have MMX support */ +#else + int cpuid_feature_information; + asm volatile ( + /* According to Intel manual, CPUID instruction is supported + * if the value of ID bit (bit 21) in EFLAGS can be modified */ + "pushf\n" + "movl (%%esp), %0\n" + "xorl $0x200000, (%%esp)\n" /* try to modify ID bit */ + "popf\n" + "pushf\n" + "xorl (%%esp), %0\n" /* check if ID bit changed */ + "jz 1f\n" + "push %%eax\n" + "push %%ebx\n" + "push %%ecx\n" + "mov $1, %%eax\n" + "cpuid\n" + "pop %%ecx\n" + "pop %%ebx\n" + "pop %%eax\n" + "1:\n" + "popf\n" + : "=d" (cpuid_feature_information) + : + : "cc"); + return cpuid_feature_information & (1 << 23); +#endif +} + +void sbc_init_primitives_mmx(struct sbc_encoder_state *state) +{ + if (check_mmx_support()) { + state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_mmx; + state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_mmx; + } +} + +#endif diff --git a/utils/sbc/sbc_primitives_mmx.h b/utils/sbc/sbc_primitives_mmx.h new file mode 100644 index 00000000..c1e44a5d --- /dev/null +++ b/utils/sbc/sbc_primitives_mmx.h @@ -0,0 +1,40 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann <marcel@holtmann.org> + * Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch> + * Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com> + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_PRIMITIVES_MMX_H +#define __SBC_PRIMITIVES_MMX_H + +#include "sbc_primitives.h" + +#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__)) && \ + !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) + +#define SBC_BUILD_WITH_MMX_SUPPORT + +void sbc_init_primitives_mmx(struct sbc_encoder_state *encoder_state); + +#endif + +#endif diff --git a/utils/sbc/sbc_primitives_neon.c b/utils/sbc/sbc_primitives_neon.c new file mode 100644 index 00000000..d9c12f9e --- /dev/null +++ b/utils/sbc/sbc_primitives_neon.c @@ -0,0 +1,245 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann <marcel@holtmann.org> + * Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch> + * Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com> + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <stdint.h> +#include <limits.h> +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives_neon.h" + +/* + * ARM NEON optimizations + */ + +#ifdef SBC_BUILD_WITH_NEON_SUPPORT + +static inline void _sbc_analyze_four_neon(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + /* TODO: merge even and odd cases (or even merge all four calls to this + * function) in order to have only aligned reads from 'in' array + * and reduce number of load instructions */ + asm volatile ( + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmull.s16 q0, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmull.s16 q1, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + + "vmlal.s16 q0, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q1, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q0, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q1, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + + "vmlal.s16 q0, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q1, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q0, d4, d8\n" + "vmlal.s16 q1, d5, d9\n" + + "vpadd.s32 d0, d0, d1\n" + "vpadd.s32 d1, d2, d3\n" + + "vrshrn.s32 d0, q0, %3\n" + + "vld1.16 {d2, d3, d4, d5}, [%1, :128]!\n" + + "vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */ + "vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */ + + "vmull.s16 q3, d2, d0\n" + "vmull.s16 q4, d3, d0\n" + "vmlal.s16 q3, d4, d1\n" + "vmlal.s16 q4, d5, d1\n" + + "vpadd.s32 d0, d6, d7\n" /* TODO: can be eliminated */ + "vpadd.s32 d1, d8, d9\n" /* TODO: can be eliminated */ + + "vst1.32 {d0, d1}, [%2, :128]\n" + : "+r" (in), "+r" (consts) + : "r" (out), + "i" (SBC_PROTO_FIXED4_SCALE) + : "memory", + "d0", "d1", "d2", "d3", "d4", "d5", + "d6", "d7", "d8", "d9", "d10", "d11"); +} + +static inline void _sbc_analyze_eight_neon(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + /* TODO: merge even and odd cases (or even merge all four calls to this + * function) in order to have only aligned reads from 'in' array + * and reduce number of load instructions */ + asm volatile ( + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmull.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmull.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmull.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmull.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmlal.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmlal.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmlal.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + + "vmlal.s16 q8, d6, d10\n" + "vmlal.s16 q9, d7, d11\n" + + "vpadd.s32 d0, d12, d13\n" + "vpadd.s32 d1, d14, d15\n" + "vpadd.s32 d2, d16, d17\n" + "vpadd.s32 d3, d18, d19\n" + + "vrshr.s32 q0, q0, %3\n" + "vrshr.s32 q1, q1, %3\n" + "vmovn.s32 d0, q0\n" + "vmovn.s32 d1, q1\n" + + "vdup.i32 d3, d1[1]\n" /* TODO: can be eliminated */ + "vdup.i32 d2, d1[0]\n" /* TODO: can be eliminated */ + "vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */ + "vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */ + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmull.s16 q6, d4, d0\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmull.s16 q7, d5, d0\n" + "vmull.s16 q8, d6, d0\n" + "vmull.s16 q9, d7, d0\n" + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmlal.s16 q6, d4, d1\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmlal.s16 q7, d5, d1\n" + "vmlal.s16 q8, d6, d1\n" + "vmlal.s16 q9, d7, d1\n" + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmlal.s16 q6, d4, d2\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmlal.s16 q7, d5, d2\n" + "vmlal.s16 q8, d6, d2\n" + "vmlal.s16 q9, d7, d2\n" + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmlal.s16 q6, d4, d3\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmlal.s16 q7, d5, d3\n" + "vmlal.s16 q8, d6, d3\n" + "vmlal.s16 q9, d7, d3\n" + + "vpadd.s32 d0, d12, d13\n" /* TODO: can be eliminated */ + "vpadd.s32 d1, d14, d15\n" /* TODO: can be eliminated */ + "vpadd.s32 d2, d16, d17\n" /* TODO: can be eliminated */ + "vpadd.s32 d3, d18, d19\n" /* TODO: can be eliminated */ + + "vst1.32 {d0, d1, d2, d3}, [%2, :128]\n" + : "+r" (in), "+r" (consts) + : "r" (out), + "i" (SBC_PROTO_FIXED8_SCALE) + : "memory", + "d0", "d1", "d2", "d3", "d4", "d5", + "d6", "d7", "d8", "d9", "d10", "d11", + "d12", "d13", "d14", "d15", "d16", "d17", + "d18", "d19"); +} + +static inline void sbc_analyze_4b_4s_neon(int16_t *x, + int32_t *out, int out_stride) +{ + /* Analyze blocks */ + _sbc_analyze_four_neon(x + 12, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + _sbc_analyze_four_neon(x + 8, out, analysis_consts_fixed4_simd_even); + out += out_stride; + _sbc_analyze_four_neon(x + 4, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + _sbc_analyze_four_neon(x + 0, out, analysis_consts_fixed4_simd_even); +} + +static inline void sbc_analyze_4b_8s_neon(int16_t *x, + int32_t *out, int out_stride) +{ + /* Analyze blocks */ + _sbc_analyze_eight_neon(x + 24, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + _sbc_analyze_eight_neon(x + 16, out, analysis_consts_fixed8_simd_even); + out += out_stride; + _sbc_analyze_eight_neon(x + 8, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + _sbc_analyze_eight_neon(x + 0, out, analysis_consts_fixed8_simd_even); +} + +void sbc_init_primitives_neon(struct sbc_encoder_state *state) +{ + state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_neon; + state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_neon; +} + +#endif diff --git a/utils/sbc/sbc_primitives_neon.h b/utils/sbc/sbc_primitives_neon.h new file mode 100644 index 00000000..30766ed8 --- /dev/null +++ b/utils/sbc/sbc_primitives_neon.h @@ -0,0 +1,40 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann <marcel@holtmann.org> + * Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch> + * Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com> + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_PRIMITIVES_NEON_H +#define __SBC_PRIMITIVES_NEON_H + +#include "sbc_primitives.h" + +#if defined(__GNUC__) && defined(__ARM_NEON__) && \ + !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) + +#define SBC_BUILD_WITH_NEON_SUPPORT + +void sbc_init_primitives_neon(struct sbc_encoder_state *encoder_state); + +#endif + +#endif diff --git a/utils/sbc/sbc_tables.h b/utils/sbc/sbc_tables.h index f1dfe6c0..0057c73f 100644 --- a/utils/sbc/sbc_tables.h +++ b/utils/sbc/sbc_tables.h @@ -157,33 +157,34 @@ static const int32_t synmatrix8[16][8] = { */ #define SBC_PROTO_FIXED4_SCALE \ ((sizeof(FIXED_T) * CHAR_BIT - 1) - SBC_FIXED_EXTRA_BITS + 1) -#define F(x) (FIXED_A) ((x * 2) * \ +#define F_PROTO4(x) (FIXED_A) ((x * 2) * \ ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_PROTO4(x) static const FIXED_T _sbc_proto_fixed4[40] = { - F(0.00000000E+00), F(5.36548976E-04), + F(0.00000000E+00), F(5.36548976E-04), -F(1.49188357E-03), F(2.73370904E-03), - F(3.83720193E-03), F(3.89205149E-03), - F(1.86581691E-03), F(3.06012286E-03), + F(3.83720193E-03), F(3.89205149E-03), + F(1.86581691E-03), F(3.06012286E-03), - F(1.09137620E-02), F(2.04385087E-02), + F(1.09137620E-02), F(2.04385087E-02), -F(2.88757392E-02), F(3.21939290E-02), - F(2.58767811E-02), F(6.13245186E-03), + F(2.58767811E-02), F(6.13245186E-03), -F(2.88217274E-02), F(7.76463494E-02), - F(1.35593274E-01), F(1.94987841E-01), + F(1.35593274E-01), F(1.94987841E-01), -F(2.46636662E-01), F(2.81828203E-01), - F(2.94315332E-01), F(2.81828203E-01), - F(2.46636662E-01), -F(1.94987841E-01), + F(2.94315332E-01), F(2.81828203E-01), + F(2.46636662E-01), -F(1.94987841E-01), -F(1.35593274E-01), -F(7.76463494E-02), - F(2.88217274E-02), F(6.13245186E-03), - F(2.58767811E-02), F(3.21939290E-02), - F(2.88757392E-02), -F(2.04385087E-02), + F(2.88217274E-02), F(6.13245186E-03), + F(2.58767811E-02), F(3.21939290E-02), + F(2.88757392E-02), -F(2.04385087E-02), -F(1.09137620E-02), -F(3.06012286E-03), -F(1.86581691E-03), F(3.89205149E-03), - F(3.83720193E-03), F(2.73370904E-03), - F(1.49188357E-03), -F(5.36548976E-04), + F(3.83720193E-03), F(2.73370904E-03), + F(1.49188357E-03), -F(5.36548976E-04), }; #undef F @@ -206,11 +207,12 @@ static const FIXED_T _sbc_proto_fixed4[40] = { */ #define SBC_COS_TABLE_FIXED4_SCALE \ ((sizeof(FIXED_T) * CHAR_BIT - 1) + SBC_FIXED_EXTRA_BITS) -#define F(x) (FIXED_A) ((x) * \ +#define F_COS4(x) (FIXED_A) ((x) * \ ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_COS4(x) static const FIXED_T cos_table_fixed_4[32] = { - F(0.7071067812), F(0.9238795325), -F(1.0000000000), F(0.9238795325), - F(0.7071067812), F(0.3826834324), F(0.0000000000), F(0.3826834324), + F(0.7071067812), F(0.9238795325), -F(1.0000000000), F(0.9238795325), + F(0.7071067812), F(0.3826834324), F(0.0000000000), F(0.3826834324), -F(0.7071067812), F(0.3826834324), -F(1.0000000000), F(0.3826834324), -F(0.7071067812), -F(0.9238795325), -F(0.0000000000), -F(0.9238795325), @@ -218,8 +220,8 @@ static const FIXED_T cos_table_fixed_4[32] = { -F(0.7071067812), -F(0.3826834324), -F(1.0000000000), -F(0.3826834324), -F(0.7071067812), F(0.9238795325), F(0.0000000000), F(0.9238795325), - F(0.7071067812), -F(0.9238795325), -F(1.0000000000), -F(0.9238795325), - F(0.7071067812), -F(0.3826834324), -F(0.0000000000), -F(0.3826834324), + F(0.7071067812), -F(0.9238795325), -F(1.0000000000), -F(0.9238795325), + F(0.7071067812), -F(0.3826834324), -F(0.0000000000), -F(0.3826834324), }; #undef F @@ -232,53 +234,54 @@ static const FIXED_T cos_table_fixed_4[32] = { * in order to compensate the same change applied to cos_table_fixed_8 */ #define SBC_PROTO_FIXED8_SCALE \ - ((sizeof(FIXED_T) * CHAR_BIT - 1) - SBC_FIXED_EXTRA_BITS + 2) -#define F(x) (FIXED_A) ((x * 4) * \ + ((sizeof(FIXED_T) * CHAR_BIT - 1) - SBC_FIXED_EXTRA_BITS + 1) +#define F_PROTO8(x) (FIXED_A) ((x * 2) * \ ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_PROTO8(x) static const FIXED_T _sbc_proto_fixed8[80] = { - F(0.00000000E+00), F(1.56575398E-04), - F(3.43256425E-04), F(5.54620202E-04), + F(0.00000000E+00), F(1.56575398E-04), + F(3.43256425E-04), F(5.54620202E-04), -F(8.23919506E-04), F(1.13992507E-03), - F(1.47640169E-03), F(1.78371725E-03), - F(2.01182542E-03), F(2.10371989E-03), - F(1.99454554E-03), F(1.61656283E-03), - F(9.02154502E-04), F(1.78805361E-04), - F(1.64973098E-03), F(3.49717454E-03), - - F(5.65949473E-03), F(8.02941163E-03), - F(1.04584443E-02), F(1.27472335E-02), + F(1.47640169E-03), F(1.78371725E-03), + F(2.01182542E-03), F(2.10371989E-03), + F(1.99454554E-03), F(1.61656283E-03), + F(9.02154502E-04), F(1.78805361E-04), + F(1.64973098E-03), F(3.49717454E-03), + + F(5.65949473E-03), F(8.02941163E-03), + F(1.04584443E-02), F(1.27472335E-02), -F(1.46525263E-02), F(1.59045603E-02), - F(1.62208471E-02), F(1.53184106E-02), - F(1.29371806E-02), F(8.85757540E-03), - F(2.92408442E-03), -F(4.91578024E-03), + F(1.62208471E-02), F(1.53184106E-02), + F(1.29371806E-02), F(8.85757540E-03), + F(2.92408442E-03), -F(4.91578024E-03), -F(1.46404076E-02), F(2.61098752E-02), - F(3.90751381E-02), F(5.31873032E-02), + F(3.90751381E-02), F(5.31873032E-02), - F(6.79989431E-02), F(8.29847578E-02), - F(9.75753918E-02), F(1.11196689E-01), + F(6.79989431E-02), F(8.29847578E-02), + F(9.75753918E-02), F(1.11196689E-01), -F(1.23264548E-01), F(1.33264415E-01), - F(1.40753505E-01), F(1.45389847E-01), - F(1.46955068E-01), F(1.45389847E-01), - F(1.40753505E-01), F(1.33264415E-01), - F(1.23264548E-01), -F(1.11196689E-01), + F(1.40753505E-01), F(1.45389847E-01), + F(1.46955068E-01), F(1.45389847E-01), + F(1.40753505E-01), F(1.33264415E-01), + F(1.23264548E-01), -F(1.11196689E-01), -F(9.75753918E-02), -F(8.29847578E-02), -F(6.79989431E-02), -F(5.31873032E-02), -F(3.90751381E-02), -F(2.61098752E-02), - F(1.46404076E-02), -F(4.91578024E-03), - F(2.92408442E-03), F(8.85757540E-03), - F(1.29371806E-02), F(1.53184106E-02), - F(1.62208471E-02), F(1.59045603E-02), - F(1.46525263E-02), -F(1.27472335E-02), + F(1.46404076E-02), -F(4.91578024E-03), + F(2.92408442E-03), F(8.85757540E-03), + F(1.29371806E-02), F(1.53184106E-02), + F(1.62208471E-02), F(1.59045603E-02), + F(1.46525263E-02), -F(1.27472335E-02), -F(1.04584443E-02), -F(8.02941163E-03), -F(5.65949473E-03), -F(3.49717454E-03), -F(1.64973098E-03), -F(1.78805361E-04), -F(9.02154502E-04), F(1.61656283E-03), - F(1.99454554E-03), F(2.10371989E-03), - F(2.01182542E-03), F(1.78371725E-03), - F(1.47640169E-03), F(1.13992507E-03), - F(8.23919506E-04), -F(5.54620202E-04), + F(1.99454554E-03), F(2.10371989E-03), + F(2.01182542E-03), F(1.78371725E-03), + F(1.47640169E-03), F(1.13992507E-03), + F(8.23919506E-04), -F(5.54620202E-04), -F(3.43256425E-04), -F(1.56575398E-04), }; #undef F @@ -301,13 +304,14 @@ static const FIXED_T _sbc_proto_fixed8[80] = { */ #define SBC_COS_TABLE_FIXED8_SCALE \ ((sizeof(FIXED_T) * CHAR_BIT - 1) + SBC_FIXED_EXTRA_BITS) -#define F(x) (FIXED_A) ((x) * \ +#define F_COS8(x) (FIXED_A) ((x) * \ ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_COS8(x) static const FIXED_T cos_table_fixed_8[128] = { - F(0.7071067812), F(0.8314696123), F(0.9238795325), F(0.9807852804), + F(0.7071067812), F(0.8314696123), F(0.9238795325), F(0.9807852804), -F(1.0000000000), F(0.9807852804), F(0.9238795325), F(0.8314696123), - F(0.7071067812), F(0.5555702330), F(0.3826834324), F(0.1950903220), - F(0.0000000000), F(0.1950903220), F(0.3826834324), F(0.5555702330), + F(0.7071067812), F(0.5555702330), F(0.3826834324), F(0.1950903220), + F(0.0000000000), F(0.1950903220), F(0.3826834324), F(0.5555702330), -F(0.7071067812), -F(0.1950903220), F(0.3826834324), F(0.8314696123), -F(1.0000000000), F(0.8314696123), F(0.3826834324), -F(0.1950903220), @@ -317,17 +321,17 @@ static const FIXED_T cos_table_fixed_8[128] = { -F(0.7071067812), -F(0.9807852804), -F(0.3826834324), F(0.5555702330), -F(1.0000000000), F(0.5555702330), -F(0.3826834324), -F(0.9807852804), -F(0.7071067812), F(0.1950903220), F(0.9238795325), F(0.8314696123), - F(0.0000000000), F(0.8314696123), F(0.9238795325), F(0.1950903220), + F(0.0000000000), F(0.8314696123), F(0.9238795325), F(0.1950903220), - F(0.7071067812), -F(0.5555702330), -F(0.9238795325), F(0.1950903220), + F(0.7071067812), -F(0.5555702330), -F(0.9238795325), F(0.1950903220), -F(1.0000000000), F(0.1950903220), -F(0.9238795325), -F(0.5555702330), - F(0.7071067812), F(0.8314696123), -F(0.3826834324), -F(0.9807852804), + F(0.7071067812), F(0.8314696123), -F(0.3826834324), -F(0.9807852804), -F(0.0000000000), -F(0.9807852804), -F(0.3826834324), F(0.8314696123), - F(0.7071067812), F(0.5555702330), -F(0.9238795325), -F(0.1950903220), + F(0.7071067812), F(0.5555702330), -F(0.9238795325), -F(0.1950903220), -F(1.0000000000), -F(0.1950903220), -F(0.9238795325), F(0.5555702330), - F(0.7071067812), -F(0.8314696123), -F(0.3826834324), F(0.9807852804), - F(0.0000000000), F(0.9807852804), -F(0.3826834324), -F(0.8314696123), + F(0.7071067812), -F(0.8314696123), -F(0.3826834324), F(0.9807852804), + F(0.0000000000), F(0.9807852804), -F(0.3826834324), -F(0.8314696123), -F(0.7071067812), F(0.9807852804), -F(0.3826834324), -F(0.5555702330), -F(1.0000000000), -F(0.5555702330), -F(0.3826834324), F(0.9807852804), @@ -339,9 +343,317 @@ static const FIXED_T cos_table_fixed_8[128] = { -F(0.7071067812), F(0.9807852804), -F(0.9238795325), F(0.5555702330), -F(0.0000000000), F(0.5555702330), -F(0.9238795325), F(0.9807852804), - F(0.7071067812), -F(0.8314696123), F(0.9238795325), -F(0.9807852804), + F(0.7071067812), -F(0.8314696123), F(0.9238795325), -F(0.9807852804), -F(1.0000000000), -F(0.9807852804), F(0.9238795325), -F(0.8314696123), - F(0.7071067812), -F(0.5555702330), F(0.3826834324), -F(0.1950903220), + F(0.7071067812), -F(0.5555702330), F(0.3826834324), -F(0.1950903220), -F(0.0000000000), -F(0.1950903220), F(0.3826834324), -F(0.5555702330), }; #undef F + +/* + * Enforce 16 byte alignment for the data, which is supposed to be used + * with SIMD optimized code. + */ + +#define SBC_ALIGN_BITS 4 +#define SBC_ALIGN_MASK ((1 << (SBC_ALIGN_BITS)) - 1) + +#ifdef __GNUC__ +#define SBC_ALIGNED __attribute__((aligned(1 << (SBC_ALIGN_BITS)))) +#else +#define SBC_ALIGNED +#endif + +/* + * Constant tables for the use in SIMD optimized analysis filters + * Each table consists of two parts: + * 1. reordered "proto" table + * 2. reordered "cos" table + * + * Due to non-symmetrical reordering, separate tables for "even" + * and "odd" cases are needed + */ + +static const FIXED_T SBC_ALIGNED analysis_consts_fixed4_simd_even[40 + 16] = { +#define C0 1.0932568993 +#define C1 1.3056875580 +#define C2 1.3056875580 +#define C3 1.6772280856 + +#define F(x) F_PROTO4(x) + F(0.00000000E+00 * C0), F(3.83720193E-03 * C0), + F(5.36548976E-04 * C1), F(2.73370904E-03 * C1), + F(3.06012286E-03 * C2), F(3.89205149E-03 * C2), + F(0.00000000E+00 * C3), -F(1.49188357E-03 * C3), + F(1.09137620E-02 * C0), F(2.58767811E-02 * C0), + F(2.04385087E-02 * C1), F(3.21939290E-02 * C1), + F(7.76463494E-02 * C2), F(6.13245186E-03 * C2), + F(0.00000000E+00 * C3), -F(2.88757392E-02 * C3), + F(1.35593274E-01 * C0), F(2.94315332E-01 * C0), + F(1.94987841E-01 * C1), F(2.81828203E-01 * C1), + -F(1.94987841E-01 * C2), F(2.81828203E-01 * C2), + F(0.00000000E+00 * C3), -F(2.46636662E-01 * C3), + -F(1.35593274E-01 * C0), F(2.58767811E-02 * C0), + -F(7.76463494E-02 * C1), F(6.13245186E-03 * C1), + -F(2.04385087E-02 * C2), F(3.21939290E-02 * C2), + F(0.00000000E+00 * C3), F(2.88217274E-02 * C3), + -F(1.09137620E-02 * C0), F(3.83720193E-03 * C0), + -F(3.06012286E-03 * C1), F(3.89205149E-03 * C1), + -F(5.36548976E-04 * C2), F(2.73370904E-03 * C2), + F(0.00000000E+00 * C3), -F(1.86581691E-03 * C3), +#undef F +#define F(x) F_COS4(x) + F(0.7071067812 / C0), F(0.9238795325 / C1), + -F(0.7071067812 / C0), F(0.3826834324 / C1), + -F(0.7071067812 / C0), -F(0.3826834324 / C1), + F(0.7071067812 / C0), -F(0.9238795325 / C1), + F(0.3826834324 / C2), -F(1.0000000000 / C3), + -F(0.9238795325 / C2), -F(1.0000000000 / C3), + F(0.9238795325 / C2), -F(1.0000000000 / C3), + -F(0.3826834324 / C2), -F(1.0000000000 / C3), +#undef F + +#undef C0 +#undef C1 +#undef C2 +#undef C3 +}; + +static const FIXED_T SBC_ALIGNED analysis_consts_fixed4_simd_odd[40 + 16] = { +#define C0 1.3056875580 +#define C1 1.6772280856 +#define C2 1.0932568993 +#define C3 1.3056875580 + +#define F(x) F_PROTO4(x) + F(2.73370904E-03 * C0), F(5.36548976E-04 * C0), + -F(1.49188357E-03 * C1), F(0.00000000E+00 * C1), + F(3.83720193E-03 * C2), F(1.09137620E-02 * C2), + F(3.89205149E-03 * C3), F(3.06012286E-03 * C3), + F(3.21939290E-02 * C0), F(2.04385087E-02 * C0), + -F(2.88757392E-02 * C1), F(0.00000000E+00 * C1), + F(2.58767811E-02 * C2), F(1.35593274E-01 * C2), + F(6.13245186E-03 * C3), F(7.76463494E-02 * C3), + F(2.81828203E-01 * C0), F(1.94987841E-01 * C0), + -F(2.46636662E-01 * C1), F(0.00000000E+00 * C1), + F(2.94315332E-01 * C2), -F(1.35593274E-01 * C2), + F(2.81828203E-01 * C3), -F(1.94987841E-01 * C3), + F(6.13245186E-03 * C0), -F(7.76463494E-02 * C0), + F(2.88217274E-02 * C1), F(0.00000000E+00 * C1), + F(2.58767811E-02 * C2), -F(1.09137620E-02 * C2), + F(3.21939290E-02 * C3), -F(2.04385087E-02 * C3), + F(3.89205149E-03 * C0), -F(3.06012286E-03 * C0), + -F(1.86581691E-03 * C1), F(0.00000000E+00 * C1), + F(3.83720193E-03 * C2), F(0.00000000E+00 * C2), + F(2.73370904E-03 * C3), -F(5.36548976E-04 * C3), +#undef F +#define F(x) F_COS4(x) + F(0.9238795325 / C0), -F(1.0000000000 / C1), + F(0.3826834324 / C0), -F(1.0000000000 / C1), + -F(0.3826834324 / C0), -F(1.0000000000 / C1), + -F(0.9238795325 / C0), -F(1.0000000000 / C1), + F(0.7071067812 / C2), F(0.3826834324 / C3), + -F(0.7071067812 / C2), -F(0.9238795325 / C3), + -F(0.7071067812 / C2), F(0.9238795325 / C3), + F(0.7071067812 / C2), -F(0.3826834324 / C3), +#undef F + +#undef C0 +#undef C1 +#undef C2 +#undef C3 +}; + +static const FIXED_T SBC_ALIGNED analysis_consts_fixed8_simd_even[80 + 64] = { +#define C0 2.7906148894 +#define C1 2.4270044280 +#define C2 2.8015616024 +#define C3 3.1710363741 +#define C4 2.5377944043 +#define C5 2.4270044280 +#define C6 2.8015616024 +#define C7 3.1710363741 + +#define F(x) F_PROTO8(x) + F(0.00000000E+00 * C0), F(2.01182542E-03 * C0), + F(1.56575398E-04 * C1), F(1.78371725E-03 * C1), + F(3.43256425E-04 * C2), F(1.47640169E-03 * C2), + F(5.54620202E-04 * C3), F(1.13992507E-03 * C3), + -F(8.23919506E-04 * C4), F(0.00000000E+00 * C4), + F(2.10371989E-03 * C5), F(3.49717454E-03 * C5), + F(1.99454554E-03 * C6), F(1.64973098E-03 * C6), + F(1.61656283E-03 * C7), F(1.78805361E-04 * C7), + F(5.65949473E-03 * C0), F(1.29371806E-02 * C0), + F(8.02941163E-03 * C1), F(1.53184106E-02 * C1), + F(1.04584443E-02 * C2), F(1.62208471E-02 * C2), + F(1.27472335E-02 * C3), F(1.59045603E-02 * C3), + -F(1.46525263E-02 * C4), F(0.00000000E+00 * C4), + F(8.85757540E-03 * C5), F(5.31873032E-02 * C5), + F(2.92408442E-03 * C6), F(3.90751381E-02 * C6), + -F(4.91578024E-03 * C7), F(2.61098752E-02 * C7), + F(6.79989431E-02 * C0), F(1.46955068E-01 * C0), + F(8.29847578E-02 * C1), F(1.45389847E-01 * C1), + F(9.75753918E-02 * C2), F(1.40753505E-01 * C2), + F(1.11196689E-01 * C3), F(1.33264415E-01 * C3), + -F(1.23264548E-01 * C4), F(0.00000000E+00 * C4), + F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5), + F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6), + F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7), + -F(6.79989431E-02 * C0), F(1.29371806E-02 * C0), + -F(5.31873032E-02 * C1), F(8.85757540E-03 * C1), + -F(3.90751381E-02 * C2), F(2.92408442E-03 * C2), + -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3), + F(1.46404076E-02 * C4), F(0.00000000E+00 * C4), + F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5), + F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6), + F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7), + -F(5.65949473E-03 * C0), F(2.01182542E-03 * C0), + -F(3.49717454E-03 * C1), F(2.10371989E-03 * C1), + -F(1.64973098E-03 * C2), F(1.99454554E-03 * C2), + -F(1.78805361E-04 * C3), F(1.61656283E-03 * C3), + -F(9.02154502E-04 * C4), F(0.00000000E+00 * C4), + F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5), + F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6), + F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7), +#undef F +#define F(x) F_COS8(x) + F(0.7071067812 / C0), F(0.8314696123 / C1), + -F(0.7071067812 / C0), -F(0.1950903220 / C1), + -F(0.7071067812 / C0), -F(0.9807852804 / C1), + F(0.7071067812 / C0), -F(0.5555702330 / C1), + F(0.7071067812 / C0), F(0.5555702330 / C1), + -F(0.7071067812 / C0), F(0.9807852804 / C1), + -F(0.7071067812 / C0), F(0.1950903220 / C1), + F(0.7071067812 / C0), -F(0.8314696123 / C1), + F(0.9238795325 / C2), F(0.9807852804 / C3), + F(0.3826834324 / C2), F(0.8314696123 / C3), + -F(0.3826834324 / C2), F(0.5555702330 / C3), + -F(0.9238795325 / C2), F(0.1950903220 / C3), + -F(0.9238795325 / C2), -F(0.1950903220 / C3), + -F(0.3826834324 / C2), -F(0.5555702330 / C3), + F(0.3826834324 / C2), -F(0.8314696123 / C3), + F(0.9238795325 / C2), -F(0.9807852804 / C3), + -F(1.0000000000 / C4), F(0.5555702330 / C5), + -F(1.0000000000 / C4), -F(0.9807852804 / C5), + -F(1.0000000000 / C4), F(0.1950903220 / C5), + -F(1.0000000000 / C4), F(0.8314696123 / C5), + -F(1.0000000000 / C4), -F(0.8314696123 / C5), + -F(1.0000000000 / C4), -F(0.1950903220 / C5), + -F(1.0000000000 / C4), F(0.9807852804 / C5), + -F(1.0000000000 / C4), -F(0.5555702330 / C5), + F(0.3826834324 / C6), F(0.1950903220 / C7), + -F(0.9238795325 / C6), -F(0.5555702330 / C7), + F(0.9238795325 / C6), F(0.8314696123 / C7), + -F(0.3826834324 / C6), -F(0.9807852804 / C7), + -F(0.3826834324 / C6), F(0.9807852804 / C7), + F(0.9238795325 / C6), -F(0.8314696123 / C7), + -F(0.9238795325 / C6), F(0.5555702330 / C7), + F(0.3826834324 / C6), -F(0.1950903220 / C7), +#undef F + +#undef C0 +#undef C1 +#undef C2 +#undef C3 +#undef C4 +#undef C5 +#undef C6 +#undef C7 +}; + +static const FIXED_T SBC_ALIGNED analysis_consts_fixed8_simd_odd[80 + 64] = { +#define C0 2.5377944043 +#define C1 2.4270044280 +#define C2 2.8015616024 +#define C3 3.1710363741 +#define C4 2.7906148894 +#define C5 2.4270044280 +#define C6 2.8015616024 +#define C7 3.1710363741 + +#define F(x) F_PROTO8(x) + F(0.00000000E+00 * C0), -F(8.23919506E-04 * C0), + F(1.56575398E-04 * C1), F(1.78371725E-03 * C1), + F(3.43256425E-04 * C2), F(1.47640169E-03 * C2), + F(5.54620202E-04 * C3), F(1.13992507E-03 * C3), + F(2.01182542E-03 * C4), F(5.65949473E-03 * C4), + F(2.10371989E-03 * C5), F(3.49717454E-03 * C5), + F(1.99454554E-03 * C6), F(1.64973098E-03 * C6), + F(1.61656283E-03 * C7), F(1.78805361E-04 * C7), + F(0.00000000E+00 * C0), -F(1.46525263E-02 * C0), + F(8.02941163E-03 * C1), F(1.53184106E-02 * C1), + F(1.04584443E-02 * C2), F(1.62208471E-02 * C2), + F(1.27472335E-02 * C3), F(1.59045603E-02 * C3), + F(1.29371806E-02 * C4), F(6.79989431E-02 * C4), + F(8.85757540E-03 * C5), F(5.31873032E-02 * C5), + F(2.92408442E-03 * C6), F(3.90751381E-02 * C6), + -F(4.91578024E-03 * C7), F(2.61098752E-02 * C7), + F(0.00000000E+00 * C0), -F(1.23264548E-01 * C0), + F(8.29847578E-02 * C1), F(1.45389847E-01 * C1), + F(9.75753918E-02 * C2), F(1.40753505E-01 * C2), + F(1.11196689E-01 * C3), F(1.33264415E-01 * C3), + F(1.46955068E-01 * C4), -F(6.79989431E-02 * C4), + F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5), + F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6), + F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7), + F(0.00000000E+00 * C0), F(1.46404076E-02 * C0), + -F(5.31873032E-02 * C1), F(8.85757540E-03 * C1), + -F(3.90751381E-02 * C2), F(2.92408442E-03 * C2), + -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3), + F(1.29371806E-02 * C4), -F(5.65949473E-03 * C4), + F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5), + F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6), + F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7), + F(0.00000000E+00 * C0), -F(9.02154502E-04 * C0), + -F(3.49717454E-03 * C1), F(2.10371989E-03 * C1), + -F(1.64973098E-03 * C2), F(1.99454554E-03 * C2), + -F(1.78805361E-04 * C3), F(1.61656283E-03 * C3), + F(2.01182542E-03 * C4), F(0.00000000E+00 * C4), + F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5), + F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6), + F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7), +#undef F +#define F(x) F_COS8(x) + -F(1.0000000000 / C0), F(0.8314696123 / C1), + -F(1.0000000000 / C0), -F(0.1950903220 / C1), + -F(1.0000000000 / C0), -F(0.9807852804 / C1), + -F(1.0000000000 / C0), -F(0.5555702330 / C1), + -F(1.0000000000 / C0), F(0.5555702330 / C1), + -F(1.0000000000 / C0), F(0.9807852804 / C1), + -F(1.0000000000 / C0), F(0.1950903220 / C1), + -F(1.0000000000 / C0), -F(0.8314696123 / C1), + F(0.9238795325 / C2), F(0.9807852804 / C3), + F(0.3826834324 / C2), F(0.8314696123 / C3), + -F(0.3826834324 / C2), F(0.5555702330 / C3), + -F(0.9238795325 / C2), F(0.1950903220 / C3), + -F(0.9238795325 / C2), -F(0.1950903220 / C3), + -F(0.3826834324 / C2), -F(0.5555702330 / C3), + F(0.3826834324 / C2), -F(0.8314696123 / C3), + F(0.9238795325 / C2), -F(0.9807852804 / C3), + F(0.7071067812 / C4), F(0.5555702330 / C5), + -F(0.7071067812 / C4), -F(0.9807852804 / C5), + -F(0.7071067812 / C4), F(0.1950903220 / C5), + F(0.7071067812 / C4), F(0.8314696123 / C5), + F(0.7071067812 / C4), -F(0.8314696123 / C5), + -F(0.7071067812 / C4), -F(0.1950903220 / C5), + -F(0.7071067812 / C4), F(0.9807852804 / C5), + F(0.7071067812 / C4), -F(0.5555702330 / C5), + F(0.3826834324 / C6), F(0.1950903220 / C7), + -F(0.9238795325 / C6), -F(0.5555702330 / C7), + F(0.9238795325 / C6), F(0.8314696123 / C7), + -F(0.3826834324 / C6), -F(0.9807852804 / C7), + -F(0.3826834324 / C6), F(0.9807852804 / C7), + F(0.9238795325 / C6), -F(0.8314696123 / C7), + -F(0.9238795325 / C6), F(0.5555702330 / C7), + F(0.3826834324 / C6), -F(0.1950903220 / C7), +#undef F + +#undef C0 +#undef C1 +#undef C2 +#undef C3 +#undef C4 +#undef C5 +#undef C6 +#undef C7 +}; diff --git a/utils/sbc/sbcdec.c b/utils/sbc/sbcdec.c index 462663ab..dfe092f5 100644 --- a/utils/sbc/sbcdec.c +++ b/utils/sbc/sbcdec.c @@ -143,7 +143,7 @@ static void decode(char *filename, char *output, int tofile) au_hdr.channels = BE_INT(channels); written = write(ad, &au_hdr, sizeof(au_hdr)); - if (written < sizeof(au_hdr)) { + if (written < (int) sizeof(au_hdr)) { fprintf(stderr, "Failed to write header\n"); goto close; } diff --git a/utils/sbc/sbcenc.c b/utils/sbc/sbcenc.c index 9cbfb871..d284789a 100644 --- a/utils/sbc/sbcenc.c +++ b/utils/sbc/sbcenc.c @@ -40,45 +40,21 @@ static int verbose = 0; -static ssize_t __read(int fd, void *buf, size_t count) -{ - ssize_t len, pos = 0; - - while (count > 0) { - len = read(fd, buf + pos, count); - if (len <= 0) - return pos > len ? pos : len; - - count -= len; - pos += len; - } - - return pos; -} - -static ssize_t __write(int fd, const void *buf, size_t count) -{ - ssize_t len, pos = 0; - - while (count > 0) { - len = write(fd, buf + pos, count); - if (len <= 0) - return len; - - count -= len; - pos += len; - } - - return pos; -} +#define BUF_SIZE 32768 +static unsigned char input[BUF_SIZE], output[BUF_SIZE + BUF_SIZE / 4]; static void encode(char *filename, int subbands, int bitpool, int joint, int dualchannel, int snr, int blocks) { - struct au_header *au_hdr; - unsigned char input[2048], output[2048]; + struct au_header au_hdr; sbc_t sbc; - int fd, len, size, count, encoded, srate; + int fd, len, size, encoded, srate, codesize, nframes; + + if (sizeof(au_hdr) != 24) { + /* Sanity check just in case */ + fprintf(stderr, "FIXME: sizeof(au_hdr) != 24\n"); + return; + } if (strcmp(filename, "-")) { fd = open(filename, O_RDONLY); @@ -90,8 +66,8 @@ static void encode(char *filename, int subbands, int bitpool, int joint, } else fd = fileno(stdin); - len = __read(fd, input, sizeof(input)); - if (len < sizeof(*au_hdr)) { + len = read(fd, &au_hdr, sizeof(au_hdr)); + if (len < (int) sizeof(au_hdr)) { if (fd > fileno(stderr)) fprintf(stderr, "Can't read header from file %s: %s\n", filename, strerror(errno)); @@ -100,19 +76,17 @@ static void encode(char *filename, int subbands, int bitpool, int joint, goto done; } - au_hdr = (struct au_header *) input; - - if (au_hdr->magic != AU_MAGIC || - BE_INT(au_hdr->hdr_size) > 128 || - BE_INT(au_hdr->hdr_size) < 24 || - BE_INT(au_hdr->encoding) != AU_FMT_LIN16) { + if (au_hdr.magic != AU_MAGIC || + BE_INT(au_hdr.hdr_size) > 128 || + BE_INT(au_hdr.hdr_size) < sizeof(au_hdr) || + BE_INT(au_hdr.encoding) != AU_FMT_LIN16) { fprintf(stderr, "Not in Sun/NeXT audio S16_BE format\n"); goto done; } sbc_init(&sbc, 0L); - switch (BE_INT(au_hdr->sample_rate)) { + switch (BE_INT(au_hdr.sample_rate)) { case 16000: sbc.frequency = SBC_FREQ_16000; break; @@ -127,11 +101,11 @@ static void encode(char *filename, int subbands, int bitpool, int joint, break; } - srate = BE_INT(au_hdr->sample_rate); + srate = BE_INT(au_hdr.sample_rate); sbc.subbands = subbands == 4 ? SBC_SB_4 : SBC_SB_8; - if (BE_INT(au_hdr->channels) == 1) { + if (BE_INT(au_hdr.channels) == 1) { sbc.mode = SBC_MODE_MONO; if (joint || dualchannel) { fprintf(stderr, "Audio is mono but joint or " @@ -151,9 +125,9 @@ static void encode(char *filename, int subbands, int bitpool, int joint, } sbc.endian = SBC_BE; - count = BE_INT(au_hdr->data_size); - size = len - BE_INT(au_hdr->hdr_size); - memmove(input, input + BE_INT(au_hdr->hdr_size), size); + /* Skip extra bytes of the header if any */ + if (read(fd, input, BE_INT(au_hdr.hdr_size) - len) < 0) + goto done; sbc.bitpool = bitpool; sbc.allocation = snr ? SBC_AM_SNR : SBC_AM_LOUDNESS; @@ -172,37 +146,51 @@ static void encode(char *filename, int subbands, int bitpool, int joint, "STEREO" : "JOINTSTEREO"); } + codesize = sbc_get_codesize(&sbc); + nframes = sizeof(input) / codesize; while (1) { - if (size < sizeof(input)) { - len = __read(fd, input + size, sizeof(input) - size); - if (len == 0 && size == 0) - break; - - if (len < 0) { - perror("Can't read audio data"); + unsigned char *inp, *outp; + /* read data for up to 'nframes' frames of input data */ + size = read(fd, input, codesize * nframes); + if (size < 0) { + /* Something really bad happened */ + perror("Can't read audio data"); + break; + } + if (size < codesize) { + /* Not enough data for encoding even a single frame */ + break; + } + /* encode all the data from the input buffer in a loop */ + inp = input; + outp = output; + while (size >= codesize) { + len = sbc_encode(&sbc, inp, codesize, + outp, sizeof(output) - (outp - output), + &encoded); + if (len != codesize || encoded <= 0) { + fprintf(stderr, + "sbc_encode fail, len=%d, encoded=%d\n", + len, encoded); break; } - - size += len; + size -= len; + inp += len; + outp += encoded; } - - len = sbc_encode(&sbc, input, size, - output, sizeof(output), &encoded); - if (len <= 0) - break; - if (len < size) - memmove(input, input + len, size - len); - - size -= len; - - len = __write(fileno(stdout), output, encoded); - if (len == 0) - break; - - if (len < 0 || len != encoded) { + len = write(fileno(stdout), output, outp - output); + if (len != outp - output) { perror("Can't write SBC output"); break; } + if (size != 0) { + /* + * sbc_encode failure has been detected earlier or end + * of file reached (have trailing partial data which is + * insufficient to encode SBC frame) + */ + break; + } } sbc_finish(&sbc); diff --git a/utils/sbc/sbcinfo.c b/utils/sbc/sbcinfo.c index 7420bfd2..339518a2 100644 --- a/utils/sbc/sbcinfo.c +++ b/utils/sbc/sbcinfo.c @@ -174,7 +174,8 @@ static int analyze_file(char *filename) double rate; int bitpool[SIZE], frame_len[SIZE]; int subbands, blocks, freq, mode, method; - int n, p1, p2, fd, len, size, count, num; + int n, p1, p2, fd, len, size, num; + unsigned int count; if (strcmp(filename, "-")) { printf("Filename\t\t%s\n", basename(filename)); @@ -235,7 +236,7 @@ static int analyze_file(char *filename) if (len == 0) break; - if (len < sizeof(hdr) || hdr.syncword != 0x9c) { + if (len < (int) sizeof(hdr) || hdr.syncword != 0x9c) { fprintf(stderr, "Corrupted SBC stream " "(len %d syncword 0x%02x)\n", len, hdr.syncword); |