You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1716 lines
53 KiB

/* Copyright (C) 2002 Jean-Marc Valin
File: nb_celp.c
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of the Xiph.org Foundation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <math.h>
#include "nb_celp.h"
#include "lpc.h"
#include "lsp.h"
#include "ltp.h"
#include "quant_lsp.h"
#include "cb_search.h"
#include "filters.h"
#include "stack_alloc.h"
#include "vq.h"
#include "speex_bits.h"
#include "vbr.h"
#include "misc.h"
#include "speex_callbacks.h"
#ifdef SLOW_TRIG
#include "math_approx.h"
#define cos speex_cos
#endif
#ifndef M_PI
#define M_PI 3.14159265358979323846 /* pi */
#endif
#ifndef NULL
#define NULL 0
#endif
#define SUBMODE(x) st->submodes[st->submodeID]->x
float exc_gain_quant_scal3[8]={-2.794750, -1.810660, -1.169850, -0.848119, -0.587190, -0.329818, -0.063266, 0.282826};
float exc_gain_quant_scal1[2]={-0.35, 0.05};
#define sqr(x) ((x)*(x))
void *nb_encoder_init(SpeexMode *m)
{
EncState *st;
SpeexNBMode *mode;
int i;
mode=(SpeexNBMode *)m->mode;
st = (EncState*)speex_alloc(sizeof(EncState)+8000*sizeof(float));
if (!st)
return NULL;
st->stack = ((char*)st) + sizeof(EncState);
st->mode=m;
st->frameSize = mode->frameSize;
st->windowSize = st->frameSize*3/2;
st->nbSubframes=mode->frameSize/mode->subframeSize;
st->subframeSize=mode->subframeSize;
st->lpcSize = mode->lpcSize;
st->bufSize = mode->bufSize;
st->gamma1=mode->gamma1;
st->gamma2=mode->gamma2;
st->min_pitch=mode->pitchStart;
st->max_pitch=mode->pitchEnd;
st->lag_factor=mode->lag_factor;
st->lpc_floor = mode->lpc_floor;
st->preemph = mode->preemph;
st->submodes=mode->submodes;
st->submodeID=st->submodeSelect=mode->defaultSubmode;
st->pre_mem=0;
st->pre_mem2=0;
st->bounded_pitch = 1;
/* Allocating input buffer */
st->inBuf = PUSH(st->stack, st->bufSize, float);
st->frame = st->inBuf + st->bufSize - st->windowSize;
/* Allocating excitation buffer */
st->excBuf = PUSH(st->stack, st->bufSize, float);
st->exc = st->excBuf + st->bufSize - st->windowSize;
st->swBuf = PUSH(st->stack, st->bufSize, float);
st->sw = st->swBuf + st->bufSize - st->windowSize;
st->exc2Buf = PUSH(st->stack, st->bufSize, float);
st->exc2 = st->exc2Buf + st->bufSize - st->windowSize;
st->innov = PUSH(st->stack, st->frameSize, float);
/* Asymmetric "pseudo-Hamming" window */
{
int part1, part2;
part1 = st->subframeSize*7/2;
part2 = st->subframeSize*5/2;
st->window = PUSH(st->stack, st->windowSize, float);
for (i=0;i<part1;i++)
st->window[i]=.54-.46*cos(M_PI*i/part1);
for (i=0;i<part2;i++)
st->window[part1+i]=.54+.46*cos(M_PI*i/part2);
}
/* Create the window for autocorrelation (lag-windowing) */
st->lagWindow = PUSH(st->stack, st->lpcSize+1, float);
for (i=0;i<st->lpcSize+1;i++)
st->lagWindow[i]=exp(-.5*sqr(2*M_PI*st->lag_factor*i));
st->autocorr = PUSH(st->stack, st->lpcSize+1, float);
st->buf2 = PUSH(st->stack, st->windowSize, float);
st->lpc = PUSH(st->stack, st->lpcSize+1, float);
st->interp_lpc = PUSH(st->stack, st->lpcSize+1, float);
st->interp_qlpc = PUSH(st->stack, st->lpcSize+1, float);
st->bw_lpc1 = PUSH(st->stack, st->lpcSize+1, float);
st->bw_lpc2 = PUSH(st->stack, st->lpcSize+1, float);
st->lsp = PUSH(st->stack, st->lpcSize, float);
st->qlsp = PUSH(st->stack, st->lpcSize, float);
st->old_lsp = PUSH(st->stack, st->lpcSize, float);
st->old_qlsp = PUSH(st->stack, st->lpcSize, float);
st->interp_lsp = PUSH(st->stack, st->lpcSize, float);
st->interp_qlsp = PUSH(st->stack, st->lpcSize, float);
st->rc = PUSH(st->stack, st->lpcSize, float);
st->first = 1;
for (i=0;i<st->lpcSize;i++)
{
st->lsp[i]=(M_PI*((float)(i+1)))/(st->lpcSize+1);
}
st->mem_sp = PUSH(st->stack, st->lpcSize, float);
st->mem_sw = PUSH(st->stack, st->lpcSize, float);
st->mem_sw_whole = PUSH(st->stack, st->lpcSize, float);
st->mem_exc = PUSH(st->stack, st->lpcSize, float);
st->pi_gain = PUSH(st->stack, st->nbSubframes, float);
st->pitch = PUSH(st->stack, st->nbSubframes, int);
st->vbr = PUSHS(st->stack, VBRState);
vbr_init(st->vbr);
st->vbr_quality = 8;
st->vbr_enabled = 0;
st->vad_enabled = 0;
st->dtx_enabled = 0;
st->abr_enabled = 0;
st->abr_drift = 0;
st->complexity=2;
st->sampling_rate=8000;
st->dtx_count=0;
return st;
}
void nb_encoder_destroy(void *state)
{
EncState *st=(EncState *)state;
/* Free all allocated memory */
vbr_destroy(st->vbr);
/*Free state memory... should be last*/
speex_free(st);
}
int nb_encode(void *state, float *in, SpeexBits *bits)
{
EncState *st;
int i, sub, roots;
int ol_pitch;
float ol_pitch_coef;
float ol_gain;
float *res, *target, *mem;
char *stack;
float *syn_resp;
float lsp_dist=0;
float *orig;
st=(EncState *)state;
stack=st->stack;
/* Copy new data in input buffer */
speex_move(st->inBuf, st->inBuf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(float));
st->inBuf[st->bufSize-st->frameSize] = in[0] - st->preemph*st->pre_mem;
for (i=1;i<st->frameSize;i++)
st->inBuf[st->bufSize-st->frameSize+i] = in[i] - st->preemph*in[i-1];
st->pre_mem = in[st->frameSize-1];
/* Move signals 1 frame towards the past */
speex_move(st->exc2Buf, st->exc2Buf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(float));
speex_move(st->excBuf, st->excBuf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(float));
speex_move(st->swBuf, st->swBuf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(float));
/* Window for analysis */
for (i=0;i<st->windowSize;i++)
st->buf2[i] = st->frame[i] * st->window[i];
/* Compute auto-correlation */
_spx_autocorr(st->buf2, st->autocorr, st->lpcSize+1, st->windowSize);
st->autocorr[0] += 10; /* prevents NANs */
st->autocorr[0] *= st->lpc_floor; /* Noise floor in auto-correlation domain */
/* Lag windowing: equivalent to filtering in the power-spectrum domain */
for (i=0;i<st->lpcSize+1;i++)
st->autocorr[i] *= st->lagWindow[i];
/* Levinson-Durbin */
wld(st->lpc+1, st->autocorr, st->rc, st->lpcSize);
st->lpc[0]=1;
/* LPC to LSPs (x-domain) transform */
roots=lpc_to_lsp (st->lpc, st->lpcSize, st->lsp, 15, 0.2, stack);
/* Check if we found all the roots */
if (roots==st->lpcSize)
{
/* LSP x-domain to angle domain*/
for (i=0;i<st->lpcSize;i++)
st->lsp[i] = acos(st->lsp[i]);
} else {
/* Search again if we can afford it */
if (st->complexity>1)
roots = lpc_to_lsp (st->lpc, st->lpcSize, st->lsp, 11, 0.05, stack);
if (roots==st->lpcSize)
{
/* LSP x-domain to angle domain*/
for (i=0;i<st->lpcSize;i++)
st->lsp[i] = acos(st->lsp[i]);
} else {
/*If we can't find all LSP's, do some damage control and use previous filter*/
for (i=0;i<st->lpcSize;i++)
{
st->lsp[i]=st->old_lsp[i];
}
}
}
lsp_dist=0;
for (i=0;i<st->lpcSize;i++)
lsp_dist += (st->old_lsp[i] - st->lsp[i])*(st->old_lsp[i] - st->lsp[i]);
/* Whole frame analysis (open-loop estimation of pitch and excitation gain) */
{
if (st->first)
for (i=0;i<st->lpcSize;i++)
st->interp_lsp[i] = st->lsp[i];
else
for (i=0;i<st->lpcSize;i++)
st->interp_lsp[i] = .375*st->old_lsp[i] + .625*st->lsp[i];
lsp_enforce_margin(st->interp_lsp, st->lpcSize, .002);
/* Compute interpolated LPCs (unquantized) for whole frame*/
for (i=0;i<st->lpcSize;i++)
st->interp_lsp[i] = cos(st->interp_lsp[i]);
lsp_to_lpc(st->interp_lsp, st->interp_lpc, st->lpcSize,stack);
/*Open-loop pitch*/
if (!st->submodes[st->submodeID] || st->vbr_enabled || st->vad_enabled || SUBMODE(forced_pitch_gain) ||
SUBMODE(lbr_pitch) != -1)
{
int nol_pitch[6];
float nol_pitch_coef[6];
bw_lpc(st->gamma1, st->interp_lpc, st->bw_lpc1, st->lpcSize);
bw_lpc(st->gamma2, st->interp_lpc, st->bw_lpc2, st->lpcSize);
filter_mem2(st->frame, st->bw_lpc1, st->bw_lpc2, st->sw, st->frameSize, st->lpcSize, st->mem_sw_whole);
open_loop_nbest_pitch(st->sw, st->min_pitch, st->max_pitch, st->frameSize,
nol_pitch, nol_pitch_coef, 6, stack);
ol_pitch=nol_pitch[0];
ol_pitch_coef = nol_pitch_coef[0];
/*Try to remove pitch multiples*/
for (i=1;i<6;i++)
{
if ((nol_pitch_coef[i]>.85*ol_pitch_coef) &&
(fabs(nol_pitch[i]-ol_pitch/2.0)<=1 || fabs(nol_pitch[i]-ol_pitch/3.0)<=1 ||
fabs(nol_pitch[i]-ol_pitch/4.0)<=1 || fabs(nol_pitch[i]-ol_pitch/5.0)<=1))
{
/*ol_pitch_coef=nol_pitch_coef[i];*/
ol_pitch = nol_pitch[i];
}
}
/*if (ol_pitch>50)
ol_pitch/=2;*/
/*ol_pitch_coef = sqrt(ol_pitch_coef);*/
} else {
ol_pitch=0;
ol_pitch_coef=0;
}
/*Compute "real" excitation*/
fir_mem2(st->frame, st->interp_lpc, st->exc, st->frameSize, st->lpcSize, st->mem_exc);
/* Compute open-loop excitation gain */
ol_gain=0;
for (i=0;i<st->frameSize;i++)
ol_gain += st->exc[i]*st->exc[i];
ol_gain=sqrt(1+ol_gain/st->frameSize);
}
/*VBR stuff*/
if (st->vbr && (st->vbr_enabled||st->vad_enabled))
{
if (st->abr_enabled)
{
float qual_change=0;
if (st->abr_drift2 * st->abr_drift > 0)
{
/* Only adapt if long-term and short-term drift are the same sign */
qual_change = -.00001*st->abr_drift/(1+st->abr_count);
if (qual_change>.05)
qual_change=.05;
if (qual_change<-.05)
qual_change=-.05;
}
st->vbr_quality += qual_change;
if (st->vbr_quality>10)
st->vbr_quality=10;
if (st->vbr_quality<0)
st->vbr_quality=0;
}
st->relative_quality = vbr_analysis(st->vbr, in, st->frameSize, ol_pitch, ol_pitch_coef);
/*if (delta_qual<0)*/
/* delta_qual*=.1*(3+st->vbr_quality);*/
if (st->vbr_enabled)
{
int mode;
int choice=0;
float min_diff=100;
mode = 8;
while (mode)
{
int v1;
float thresh;
v1=(int)floor(st->vbr_quality);
if (v1==10)
thresh = vbr_nb_thresh[mode][v1];
else
thresh = (st->vbr_quality-v1)*vbr_nb_thresh[mode][v1+1] + (1+v1-st->vbr_quality)*vbr_nb_thresh[mode][v1];
if (st->relative_quality > thresh &&
st->relative_quality-thresh<min_diff)
{
choice = mode;
min_diff = st->relative_quality-thresh;
}
mode--;
}
mode=choice;
if (mode==0)
{
if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20)
{
mode=1;
st->dtx_count=1;
} else {
mode=0;
st->dtx_count++;
}
} else {
st->dtx_count=0;
}
speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);
if (st->abr_enabled)
{
int bitrate;
speex_encoder_ctl(state, SPEEX_GET_BITRATE, &bitrate);
st->abr_drift+=(bitrate-st->abr_enabled);
st->abr_drift2 = .95*st->abr_drift2 + .05*(bitrate-st->abr_enabled);
st->abr_count += 1.0;
}
} else {
/*VAD only case*/
int mode;
if (st->relative_quality<2)
{
if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20)
{
st->dtx_count=1;
mode=1;
} else {
mode=0;
st->dtx_count++;
}
} else {
st->dtx_count = 0;
mode=st->submodeSelect;
}
/*speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);*/
st->submodeID=mode;
}
} else {
st->relative_quality = -1;
}
/* First, transmit a zero for narrowband */
speex_bits_pack(bits, 0, 1);
/* Transmit the sub-mode we use for this frame */
speex_bits_pack(bits, st->submodeID, NB_SUBMODE_BITS);
/* If null mode (no transmission), just set a couple things to zero*/
if (st->submodes[st->submodeID] == NULL)
{
for (i=0;i<st->frameSize;i++)
st->exc[i]=st->exc2[i]=st->sw[i]=0;
for (i=0;i<st->lpcSize;i++)
st->mem_sw[i]=0;
st->first=1;
st->bounded_pitch = 1;
/* Final signal synthesis from excitation */
iir_mem2(st->exc, st->interp_qlpc, st->frame, st->frameSize, st->lpcSize, st->mem_sp);
in[0] = st->frame[0] + st->preemph*st->pre_mem2;
for (i=1;i<st->frameSize;i++)
in[i]=st->frame[i] + st->preemph*in[i-1];
st->pre_mem2=in[st->frameSize-1];
return 0;
}
/* LSP Quantization */
if (st->first)
{
for (i=0;i<st->lpcSize;i++)
st->old_lsp[i] = st->lsp[i];
}
/*Quantize LSPs*/
#if 1 /*0 for unquantized*/
SUBMODE(lsp_quant)(st->lsp, st->qlsp, st->lpcSize, bits);
#else
for (i=0;i<st->lpcSize;i++)
st->qlsp[i]=st->lsp[i];
#endif
/*If we use low bit-rate pitch mode, transmit open-loop pitch*/
if (SUBMODE(lbr_pitch)!=-1)
{
speex_bits_pack(bits, ol_pitch-st->min_pitch, 7);
}
if (SUBMODE(forced_pitch_gain))
{
int quant;
quant = (int)floor(.5+15*ol_pitch_coef);
if (quant>15)
quant=15;
if (quant<0)
quant=0;
speex_bits_pack(bits, quant, 4);
ol_pitch_coef=0.066667*quant;
}
/*Quantize and transmit open-loop excitation gain*/
{
int qe = (int)(floor(3.5*log(ol_gain)));
if (qe<0)
qe=0;
if (qe>31)
qe=31;
ol_gain = exp(qe/3.5);
speex_bits_pack(bits, qe, 5);
}
/* Special case for first frame */
if (st->first)
{
for (i=0;i<st->lpcSize;i++)
st->old_qlsp[i] = st->qlsp[i];
}
/* Filter response */
res = PUSH(stack, st->subframeSize, float);
/* Target signal */
target = PUSH(stack, st->subframeSize, float);
syn_resp = PUSH(stack, st->subframeSize, float);
mem = PUSH(stack, st->lpcSize, float);
orig = PUSH(stack, st->frameSize, float);
for (i=0;i<st->frameSize;i++)
orig[i]=st->frame[i];
/* Loop on sub-frames */
for (sub=0;sub<st->nbSubframes;sub++)
{
float tmp;
int offset;
float *sp, *sw, *exc, *exc2;
int pitch;
/* Offset relative to start of frame */
offset = st->subframeSize*sub;
/* Original signal */
sp=st->frame+offset;
/* Excitation */
exc=st->exc+offset;
/* Weighted signal */
sw=st->sw+offset;
exc2=st->exc2+offset;
/* LSP interpolation (quantized and unquantized) */
tmp = (1.0 + sub)/st->nbSubframes;
for (i=0;i<st->lpcSize;i++)
st->interp_lsp[i] = (1-tmp)*st->old_lsp[i] + tmp*st->lsp[i];
for (i=0;i<st->lpcSize;i++)
st->interp_qlsp[i] = (1-tmp)*st->old_qlsp[i] + tmp*st->qlsp[i];
/* Make sure the filters are stable */
lsp_enforce_margin(st->interp_lsp, st->lpcSize, .002);
lsp_enforce_margin(st->interp_qlsp, st->lpcSize, .002);
/* Compute interpolated LPCs (quantized and unquantized) */
for (i=0;i<st->lpcSize;i++)
st->interp_lsp[i] = cos(st->interp_lsp[i]);
lsp_to_lpc(st->interp_lsp, st->interp_lpc, st->lpcSize,stack);
for (i=0;i<st->lpcSize;i++)
st->interp_qlsp[i] = cos(st->interp_qlsp[i]);
lsp_to_lpc(st->interp_qlsp, st->interp_qlpc, st->lpcSize, stack);
/* Compute analysis filter gain at w=pi (for use in SB-CELP) */
tmp=1;
st->pi_gain[sub]=0;
for (i=0;i<=st->lpcSize;i++)
{
st->pi_gain[sub] += tmp*st->interp_qlpc[i];
tmp = -tmp;
}
/* Compute bandwidth-expanded (unquantized) LPCs for perceptual weighting */
bw_lpc(st->gamma1, st->interp_lpc, st->bw_lpc1, st->lpcSize);
if (st->gamma2>=0)
bw_lpc(st->gamma2, st->interp_lpc, st->bw_lpc2, st->lpcSize);
else
{
st->bw_lpc2[0]=1;
st->bw_lpc2[1]=-st->preemph;
for (i=2;i<=st->lpcSize;i++)
st->bw_lpc2[i]=0;
}
/* Compute impulse response of A(z/g1) / ( A(z)*A(z/g2) )*/
for (i=0;i<st->subframeSize;i++)
exc[i]=0;
exc[0]=1;
syn_percep_zero(exc, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, syn_resp, st->subframeSize, st->lpcSize, stack);
/* Reset excitation */
for (i=0;i<st->subframeSize;i++)
exc[i]=0;
for (i=0;i<st->subframeSize;i++)
exc2[i]=0;
/* Compute zero response of A(z/g1) / ( A(z/g2) * A(z) ) */
for (i=0;i<st->lpcSize;i++)
mem[i]=st->mem_sp[i];
iir_mem2(exc, st->interp_qlpc, exc, st->subframeSize, st->lpcSize, mem);
for (i=0;i<st->lpcSize;i++)
mem[i]=st->mem_sw[i];
filter_mem2(exc, st->bw_lpc1, st->bw_lpc2, res, st->subframeSize, st->lpcSize, mem);
/* Compute weighted signal */
for (i=0;i<st->lpcSize;i++)
mem[i]=st->mem_sw[i];
filter_mem2(sp, st->bw_lpc1, st->bw_lpc2, sw, st->subframeSize, st->lpcSize, mem);
/* Compute target signal */
for (i=0;i<st->subframeSize;i++)
target[i]=sw[i]-res[i];
for (i=0;i<st->subframeSize;i++)
exc[i]=exc2[i]=0;
/* If we have a long-term predictor (otherwise, something's wrong) */
if (SUBMODE(ltp_quant))
{
int pit_min, pit_max;
/* Long-term prediction */
if (SUBMODE(lbr_pitch) != -1)
{
/* Low bit-rate pitch handling */
int margin;
margin = SUBMODE(lbr_pitch);
if (margin)
{
if (ol_pitch < st->min_pitch+margin-1)
ol_pitch=st->min_pitch+margin-1;
if (ol_pitch > st->max_pitch-margin)
ol_pitch=st->max_pitch-margin;
pit_min = ol_pitch-margin+1;
pit_max = ol_pitch+margin;
} else {
pit_min=pit_max=ol_pitch;
}
} else {
pit_min = st->min_pitch;
pit_max = st->max_pitch;
}
/* Force pitch to use only the current frame if needed */
if (st->bounded_pitch && pit_max>offset)
pit_max=offset;
/* Perform pitch search */
pitch = SUBMODE(ltp_quant)(target, sw, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2,
exc, SUBMODE(ltp_params), pit_min, pit_max, ol_pitch_coef,
st->lpcSize, st->subframeSize, bits, stack,
exc2, syn_resp, st->complexity);
st->pitch[sub]=pitch;
} else {
speex_error ("No pitch prediction, what's wrong");
}
/* Update target for adaptive codebook contribution */
syn_percep_zero(exc, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, res, st->subframeSize, st->lpcSize, stack);
for (i=0;i<st->subframeSize;i++)
target[i]-=res[i];
/* Quantization of innovation */
{
float *innov;
float ener=0, ener_1;
innov = st->innov+sub*st->subframeSize;
for (i=0;i<st->subframeSize;i++)
innov[i]=0;
residue_percep_zero(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, st->buf2, st->subframeSize, st->lpcSize, stack);
for (i=0;i<st->subframeSize;i++)
ener+=st->buf2[i]*st->buf2[i];
ener=sqrt(.1+ener/st->subframeSize);
/*for (i=0;i<st->subframeSize;i++)
printf ("%f\n", st->buf2[i]/ener);
*/
ener /= ol_gain;
/* Calculate gain correction for the sub-frame (if any) */
if (SUBMODE(have_subframe_gain))
{
int qe;
ener=log(ener);
if (SUBMODE(have_subframe_gain)==3)
{
qe = vq_index(&ener, exc_gain_quant_scal3, 1, 8);
speex_bits_pack(bits, qe, 3);
ener=exc_gain_quant_scal3[qe];
} else {
qe = vq_index(&ener, exc_gain_quant_scal1, 1, 2);
speex_bits_pack(bits, qe, 1);
ener=exc_gain_quant_scal1[qe];
}
ener=exp(ener);
} else {
ener=1;
}
ener*=ol_gain;
/*printf ("%f %f\n", ener, ol_gain);*/
ener_1 = 1/ener;
/* Normalize innovation */
for (i=0;i<st->subframeSize;i++)
target[i]*=ener_1;
/* Quantize innovation */
if (SUBMODE(innovation_quant))
{
/* Codebook search */
SUBMODE(innovation_quant)(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2,
SUBMODE(innovation_params), st->lpcSize, st->subframeSize,
innov, syn_resp, bits, stack, st->complexity);
/* De-normalize innovation and update excitation */
for (i=0;i<st->subframeSize;i++)
innov[i]*=ener;
for (i=0;i<st->subframeSize;i++)
exc[i] += innov[i];
} else {
speex_error("No fixed codebook");
}
/* In some (rare) modes, we do a second search (more bits) to reduce noise even more */
if (SUBMODE(double_codebook)) {
char *tmp_stack=stack;
float *innov2 = PUSH(tmp_stack, st->subframeSize, float);
for (i=0;i<st->subframeSize;i++)
innov2[i]=0;
for (i=0;i<st->subframeSize;i++)
target[i]*=2.2;
SUBMODE(innovation_quant)(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2,
SUBMODE(innovation_params), st->lpcSize, st->subframeSize,
innov2, syn_resp, bits, tmp_stack, st->complexity);
for (i=0;i<st->subframeSize;i++)
innov2[i]*=ener*(1/2.2);
for (i=0;i<st->subframeSize;i++)
exc[i] += innov2[i];
}
for (i=0;i<st->subframeSize;i++)
target[i]*=ener;
}
/*Keep the previous memory*/
for (i=0;i<st->lpcSize;i++)
mem[i]=st->mem_sp[i];
/* Final signal synthesis from excitation */
iir_mem2(exc, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, st->mem_sp);
/* Compute weighted signal again, from synthesized speech (not sure it's the right thing) */
filter_mem2(sp, st->bw_lpc1, st->bw_lpc2, sw, st->subframeSize, st->lpcSize, st->mem_sw);
for (i=0;i<st->subframeSize;i++)
exc2[i]=exc[i];
}
/* Store the LSPs for interpolation in the next frame */
if (st->submodeID>=1)
{
for (i=0;i<st->lpcSize;i++)
st->old_lsp[i] = st->lsp[i];
for (i=0;i<st->lpcSize;i++)
st->old_qlsp[i] = st->qlsp[i];
}
if (st->submodeID==1)
{
if (st->dtx_count)
speex_bits_pack(bits, 15, 4);
else
speex_bits_pack(bits, 0, 4);
}
/* The next frame will not be the first (Duh!) */
st->first = 0;
{
float ener=0, err=0;
float snr;
for (i=0;i<st->frameSize;i++)
{
ener+=st->frame[i]*st->frame[i];
err += (st->frame[i]-orig[i])*(st->frame[i]-orig[i]);
}
snr = 10*log10((ener+1)/(err+1));
/*printf ("%f %f %f\n", snr, ener, err);*/
}
/* Replace input by synthesized speech */
in[0] = st->frame[0] + st->preemph*st->pre_mem2;
for (i=1;i<st->frameSize;i++)
in[i]=st->frame[i] + st->preemph*in[i-1];
st->pre_mem2=in[st->frameSize-1];
if (SUBMODE(innovation_quant) == noise_codebook_quant || st->submodeID==0)
st->bounded_pitch = 1;
else
st->bounded_pitch = 0;
return 1;
}
void *nb_decoder_init(SpeexMode *m)
{
DecState *st;
SpeexNBMode *mode;
int i;
mode=(SpeexNBMode*)m->mode;
st = (DecState *)speex_alloc(sizeof(DecState)+4000*sizeof(float));
st->mode=m;
st->stack = ((char*)st) + sizeof(DecState);
st->first=1;
/* Codec parameters, should eventually have several "modes"*/
st->frameSize = mode->frameSize;
st->windowSize = st->frameSize*3/2;
st->nbSubframes=mode->frameSize/mode->subframeSize;
st->subframeSize=mode->subframeSize;
st->lpcSize = mode->lpcSize;
st->bufSize = mode->bufSize;
st->gamma1=mode->gamma1;
st->gamma2=mode->gamma2;
st->min_pitch=mode->pitchStart;
st->max_pitch=mode->pitchEnd;
st->preemph = mode->preemph;
st->submodes=mode->submodes;
st->submodeID=mode->defaultSubmode;
st->pre_mem=0;
st->lpc_enh_enabled=0;
st->inBuf = PUSH(st->stack, st->bufSize, float);
st->frame = st->inBuf + st->bufSize - st->windowSize;
st->excBuf = PUSH(st->stack, st->bufSize, float);
st->exc = st->excBuf + st->bufSize - st->windowSize;
for (i=0;i<st->bufSize;i++)
st->inBuf[i]=0;
for (i=0;i<st->bufSize;i++)
st->excBuf[i]=0;
st->innov = PUSH(st->stack, st->frameSize, float);
st->interp_qlpc = PUSH(st->stack, st->lpcSize+1, float);
st->qlsp = PUSH(st->stack, st->lpcSize, float);
st->old_qlsp = PUSH(st->stack, st->lpcSize, float);
st->interp_qlsp = PUSH(st->stack, st->lpcSize, float);
st->mem_sp = PUSH(st->stack, 5*st->lpcSize, float);
st->comb_mem = PUSHS(st->stack, CombFilterMem);
comp_filter_mem_init (st->comb_mem);
st->pi_gain = PUSH(st->stack, st->nbSubframes, float);
st->last_pitch = 40;
st->count_lost=0;
st->pitch_gain_buf[0] = st->pitch_gain_buf[1] = st->pitch_gain_buf[2] = 0;
st->pitch_gain_buf_idx = 0;
st->sampling_rate=8000;
st->last_ol_gain = 0;
st->user_callback.func = &speex_default_user_handler;
st->user_callback.data = NULL;
for (i=0;i<16;i++)
st->speex_callbacks[i].func = NULL;
st->voc_m1=st->voc_m2=st->voc_mean=0;
st->voc_offset=0;
st->dtx_enabled=0;
return st;
}
void nb_decoder_destroy(void *state)
{
DecState *st;
st=(DecState*)state;
speex_free(state);
}
#define median3(a, b, c) ((a) < (b) ? ((b) < (c) ? (b) : ((a) < (c) ? (c) : (a))) : ((c) < (b) ? (b) : ((c) < (a) ? (c) : (a))))
static void nb_decode_lost(DecState *st, float *out, char *stack)
{
int i, sub;
float *awk1, *awk2, *awk3;
float pitch_gain, fact, gain_med;
fact = exp(-.04*st->count_lost*st->count_lost);
gain_med = median3(st->pitch_gain_buf[0], st->pitch_gain_buf[1], st->pitch_gain_buf[2]);
if (gain_med < st->last_pitch_gain)
st->last_pitch_gain = gain_med;
pitch_gain = st->last_pitch_gain;
if (pitch_gain>.95)
pitch_gain=.95;
pitch_gain *= fact;
/* Shift all buffers by one frame */
speex_move(st->inBuf, st->inBuf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(float));
speex_move(st->excBuf, st->excBuf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(float));
awk1=PUSH(stack, (st->lpcSize+1), float);
awk2=PUSH(stack, (st->lpcSize+1), float);
awk3=PUSH(stack, (st->lpcSize+1), float);
for (sub=0;sub<st->nbSubframes;sub++)
{
int offset;
float *sp, *exc;
/* Offset relative to start of frame */
offset = st->subframeSize*sub;
/* Original signal */
sp=st->frame+offset;
/* Excitation */
exc=st->exc+offset;
/* Excitation after post-filter*/
/* Calculate perceptually enhanced LPC filter */
if (st->lpc_enh_enabled)
{
float r=.9;
float k1,k2,k3;
if (st->submodes[st->submodeID] != NULL)
{
k1=SUBMODE(lpc_enh_k1);
k2=SUBMODE(lpc_enh_k2);
} else {
k1=k2=.7;
}
k3=(1-(1-r*k1)/(1-r*k2))/r;
if (!st->lpc_enh_enabled)
{
k1=k2;
k3=0;
}
bw_lpc(k1, st->interp_qlpc, awk1, st->lpcSize);
bw_lpc(k2, st->interp_qlpc, awk2, st->lpcSize);
bw_lpc(k3, st->interp_qlpc, awk3, st->lpcSize);
}
/* Make up a plausible excitation */
/* THIS CAN BE IMPROVED */
/*if (pitch_gain>.95)
pitch_gain=.95;*/
{
float innov_gain=0;
for (i=0;i<st->frameSize;i++)
innov_gain += st->innov[i]*st->innov[i];
innov_gain=sqrt(innov_gain/st->frameSize);
for (i=0;i<st->subframeSize;i++)
{
#if 0
exc[i] = pitch_gain * exc[i - st->last_pitch] + fact*sqrt(1-pitch_gain)*st->innov[i+offset];
/*Just so it give the same lost packets as with if 0*/
/*rand();*/
#else
/*exc[i]=pitch_gain*exc[i-st->last_pitch] + fact*st->innov[i+offset];*/
exc[i]=pitch_gain*exc[i-st->last_pitch] +
fact*sqrt(1-pitch_gain)*speex_rand(innov_gain);
#endif
}
}
for (i=0;i<st->subframeSize;i++)
sp[i]=exc[i];
/* Signal synthesis */
if (st->lpc_enh_enabled)
{
filter_mem2(sp, awk2, awk1, sp, st->subframeSize, st->lpcSize,
st->mem_sp+st->lpcSize);
filter_mem2(sp, awk3, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
st->mem_sp);
} else {
for (i=0;i<st->lpcSize;i++)
st->mem_sp[st->lpcSize+i] = 0;
iir_mem2(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
st->mem_sp);
}
}
out[0] = st->frame[0] + st->preemph*st->pre_mem;
for (i=1;i<st->frameSize;i++)
out[i]=st->frame[i] + st->preemph*out[i-1];
st->pre_mem=out[st->frameSize-1];
st->first = 0;
st->count_lost++;
st->pitch_gain_buf[st->pitch_gain_buf_idx++] = pitch_gain;
if (st->pitch_gain_buf_idx > 2) /* rollover */
st->pitch_gain_buf_idx = 0;
}
int nb_decode(void *state, SpeexBits *bits, float *out)
{
DecState *st;
int i, sub;
int pitch;
float pitch_gain[3];
float ol_gain=0;
int ol_pitch=0;
float ol_pitch_coef=0;
int best_pitch=40;
float best_pitch_gain=0;
int wideband;
int m;
char *stack;
float *awk1, *awk2, *awk3;
float pitch_average=0;
st=(DecState*)state;
stack=st->stack;
/* Check if we're in DTX mode*/
if (!bits && st->dtx_enabled)
{
st->submodeID=0;
} else
{
/* If bits is NULL, consider the packet to be lost (what could we do anyway) */
if (!bits)
{
nb_decode_lost(st, out, stack);
return 0;
}
/* Search for next narrowband block (handle requests, skip wideband blocks) */
do {
wideband = speex_bits_unpack_unsigned(bits, 1);
if (wideband) /* Skip wideband block (for compatibility) */
{
int submode;
int advance;
advance = submode = speex_bits_unpack_unsigned(bits, SB_SUBMODE_BITS);
speex_mode_query(&speex_wb_mode, SPEEX_SUBMODE_BITS_PER_FRAME, &advance);
if (advance < 0)
{
speex_warning ("Invalid wideband mode encountered. Corrupted stream?");
return -2;
}
advance -= (SB_SUBMODE_BITS+1);
speex_bits_advance(bits, advance);
wideband = speex_bits_unpack_unsigned(bits, 1);
if (wideband)
{
advance = submode = speex_bits_unpack_unsigned(bits, SB_SUBMODE_BITS);
speex_mode_query(&speex_wb_mode, SPEEX_SUBMODE_BITS_PER_FRAME, &advance);
if (advance < 0)
{
speex_warning ("Invalid wideband mode encountered: corrupted stream?");
return -2;
}
advance -= (SB_SUBMODE_BITS+1);
speex_bits_advance(bits, advance);
wideband = speex_bits_unpack_unsigned(bits, 1);
if (wideband)
{
speex_warning ("More than to wideband layers found: corrupted stream?");
return -2;
}
}
}
/* FIXME: Check for overflow */
m = speex_bits_unpack_unsigned(bits, 4);
if (m==15) /* We found a terminator */
{
return -1;
} else if (m==14) /* Speex in-band request */
{
int ret = speex_inband_handler(bits, st->speex_callbacks, state);
if (ret)
return ret;
} else if (m==13) /* User in-band request */
{
int ret = st->user_callback.func(bits, state, st->user_callback.data);
if (ret)
return ret;
} else if (m>8) /* Invalid mode */
{
speex_warning("Invalid mode encountered: corrupted stream?");
return -2;
}
} while (m>8);
/* Get the sub-mode that was used */
st->submodeID = m;
}
/* Shift all buffers by one frame */
speex_move(st->inBuf, st->inBuf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(float));
speex_move(st->excBuf, st->excBuf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(float));
/* If null mode (no transmission), just set a couple things to zero*/
if (st->submodes[st->submodeID] == NULL)
{
float *lpc;
lpc = PUSH(stack,11, float);
bw_lpc(.93, st->interp_qlpc, lpc, 10);
/*for (i=0;i<st->frameSize;i++)
st->exc[i]=0;*/
{
float innov_gain=0;
float pgain=st->last_pitch_gain;
if (pgain>.6)
pgain=.6;
for (i=0;i<st->frameSize;i++)
innov_gain += st->innov[i]*st->innov[i];
innov_gain=sqrt(innov_gain/st->frameSize);
for (i=0;i<st->frameSize;i++)
st->exc[i]=0;
speex_rand_vec(innov_gain, st->exc, st->frameSize);
}
st->first=1;
/* Final signal synthesis from excitation */
iir_mem2(st->exc, lpc, st->frame, st->frameSize, st->lpcSize, st->mem_sp);
out[0] = st->frame[0] + st->preemph*st->pre_mem;
for (i=1;i<st->frameSize;i++)
out[i]=st->frame[i] + st->preemph*out[i-1];
st->pre_mem=out[st->frameSize-1];
st->count_lost=0;
return 0;
}
/* Unquantize LSPs */
SUBMODE(lsp_unquant)(st->qlsp, st->lpcSize, bits);
/*Damp memory if a frame was lost and the LSP changed too much*/
if (st->count_lost)
{
float lsp_dist=0, fact;
for (i=0;i<st->lpcSize;i++)
lsp_dist += fabs(st->old_qlsp[i] - st->qlsp[i]);
fact = .6*exp(-.2*lsp_dist);
for (i=0;i<2*st->lpcSize;i++)
st->mem_sp[i] *= fact;
}
/* Handle first frame and lost-packet case */
if (st->first || st->count_lost)
{
for (i=0;i<st->lpcSize;i++)
st->old_qlsp[i] = st->qlsp[i];
}
/* Get open-loop pitch estimation for low bit-rate pitch coding */
if (SUBMODE(lbr_pitch)!=-1)
{
ol_pitch = st->min_pitch+speex_bits_unpack_unsigned(bits, 7);
}
if (SUBMODE(forced_pitch_gain))
{
int quant;
quant = speex_bits_unpack_unsigned(bits, 4);
ol_pitch_coef=0.066667*quant;
}
/* Get global excitation gain */
{
int qe;
qe = speex_bits_unpack_unsigned(bits, 5);
ol_gain = exp(qe/3.5);
}
awk1=PUSH(stack, st->lpcSize+1, float);
awk2=PUSH(stack, st->lpcSize+1, float);
awk3=PUSH(stack, st->lpcSize+1, float);
if (st->submodeID==1)
{
int extra;
extra = speex_bits_unpack_unsigned(bits, 4);
if (extra==15)
st->dtx_enabled=1;
else
st->dtx_enabled=0;
}
if (st->submodeID>1)
st->dtx_enabled=0;
/*Loop on subframes */
for (sub=0;sub<st->nbSubframes;sub++)
{
int offset;
float *sp, *exc, tmp;
/* Offset relative to start of frame */
offset = st->subframeSize*sub;
/* Original signal */
sp=st->frame+offset;
/* Excitation */
exc=st->exc+offset;
/* Excitation after post-filter*/
/* LSP interpolation (quantized and unquantized) */
tmp = (1.0 + sub)/st->nbSubframes;
for (i=0;i<st->lpcSize;i++)
st->interp_qlsp[i] = (1-tmp)*st->old_qlsp[i] + tmp*st->qlsp[i];
/* Make sure the LSP's are stable */
lsp_enforce_margin(st->interp_qlsp, st->lpcSize, .002);
/* Compute interpolated LPCs (unquantized) */
for (i=0;i<st->lpcSize;i++)
st->interp_qlsp[i] = cos(st->interp_qlsp[i]);
lsp_to_lpc(st->interp_qlsp, st->interp_qlpc, st->lpcSize, stack);
/* Compute enhanced synthesis filter */
if (st->lpc_enh_enabled)
{
float r=.9;
float k1,k2,k3;
k1=SUBMODE(lpc_enh_k1);
k2=SUBMODE(lpc_enh_k2);
k3=(1-(1-r*k1)/(1-r*k2))/r;
if (!st->lpc_enh_enabled)
{
k1=k2;
k3=0;
}
bw_lpc(k1, st->interp_qlpc, awk1, st->lpcSize);
bw_lpc(k2, st->interp_qlpc, awk2, st->lpcSize);
bw_lpc(k3, st->interp_qlpc, awk3, st->lpcSize);
}
/* Compute analysis filter at w=pi */
tmp=1;
st->pi_gain[sub]=0;
for (i=0;i<=st->lpcSize;i++)
{
st->pi_gain[sub] += tmp*st->interp_qlpc[i];
tmp = -tmp;
}
/* Reset excitation */
for (i=0;i<st->subframeSize;i++)
exc[i]=0;
/*Adaptive codebook contribution*/
if (SUBMODE(ltp_unquant))
{
int pit_min, pit_max;
/* Handle pitch constraints if any */
if (SUBMODE(lbr_pitch) != -1)
{
int margin;
margin = SUBMODE(lbr_pitch);
if (margin)
{
/* GT - need optimization?
if (ol_pitch < st->min_pitch+margin-1)
ol_pitch=st->min_pitch+margin-1;
if (ol_pitch > st->max_pitch-margin)
ol_pitch=st->max_pitch-margin;
pit_min = ol_pitch-margin+1;
pit_max = ol_pitch+margin;
*/
pit_min = ol_pitch-margin+1;
if (pit_min < st->min_pitch)
pit_min = st->min_pitch;
pit_max = ol_pitch+margin;
if (pit_max > st->max_pitch)
pit_max = st->max_pitch;
} else {
pit_min = pit_max = ol_pitch;
}
} else {
pit_min = st->min_pitch;
pit_max = st->max_pitch;
}
/* Pitch synthesis */
SUBMODE(ltp_unquant)(exc, pit_min, pit_max, ol_pitch_coef, SUBMODE(ltp_params),
st->subframeSize, &pitch, &pitch_gain[0], bits, stack, st->count_lost, offset, st->last_pitch_gain);
/* If we had lost frames, check energy of last received frame */
if (st->count_lost && ol_gain < st->last_ol_gain)
{
float fact = ol_gain/(st->last_ol_gain+1);
for (i=0;i<st->subframeSize;i++)
exc[i]*=fact;
}
tmp = fabs(pitch_gain[0]+pitch_gain[1]+pitch_gain[2]);
tmp = fabs(pitch_gain[1]);
if (pitch_gain[0]>0)
tmp += pitch_gain[0];
else
tmp -= .5*pitch_gain[0];
if (pitch_gain[2]>0)
tmp += pitch_gain[2];
else
tmp -= .5*pitch_gain[0];
pitch_average += tmp;
if (tmp>best_pitch_gain)
{
best_pitch = pitch;
best_pitch_gain = tmp;
/* best_pitch_gain = tmp*.9;
if (best_pitch_gain>.85)
best_pitch_gain=.85;*/
}
} else {
speex_error("No pitch prediction, what's wrong");
}
/* Unquantize the innovation */
{
int q_energy;
float ener;
float *innov;
innov = st->innov+sub*st->subframeSize;
for (i=0;i<st->subframeSize;i++)
innov[i]=0;
/* Decode sub-frame gain correction */
if (SUBMODE(have_subframe_gain)==3)
{
q_energy = speex_bits_unpack_unsigned(bits, 3);
ener = ol_gain*exp(exc_gain_quant_scal3[q_energy]);
} else if (SUBMODE(have_subframe_gain)==1)
{
q_energy = speex_bits_unpack_unsigned(bits, 1);
ener = ol_gain*exp(exc_gain_quant_scal1[q_energy]);
} else {
ener = ol_gain;
}
if (SUBMODE(innovation_unquant))
{
/*Fixed codebook contribution*/
SUBMODE(innovation_unquant)(innov, SUBMODE(innovation_params), st->subframeSize, bits, stack);
} else {
speex_error("No fixed codebook");
}
/* De-normalize innovation and update excitation */
for (i=0;i<st->subframeSize;i++)
innov[i]*=ener;
/*Vocoder mode*/
if (st->submodeID==1)
{
float g=ol_pitch_coef;
for (i=0;i<st->subframeSize;i++)
exc[i]=0;
while (st->voc_offset<st->subframeSize)
{
if (st->voc_offset>=0)
exc[st->voc_offset]=sqrt(1.0*ol_pitch);
st->voc_offset+=ol_pitch;
}
st->voc_offset -= st->subframeSize;
g=.5+2*(g-.6);
if (g<0)
g=0;
if (g>1)
g=1;
for (i=0;i<st->subframeSize;i++)
{
float exci=exc[i];
exc[i]=.8*g*exc[i]*ol_gain + .6*g*st->voc_m1*ol_gain + .5*g*innov[i] - .5*g*st->voc_m2 + (1-g)*innov[i];
st->voc_m1 = exci;
st->voc_m2=innov[i];
st->voc_mean = .95*st->voc_mean + .05*exc[i];
exc[i]-=st->voc_mean;
}
} else {
for (i=0;i<st->subframeSize;i++)
exc[i]+=innov[i];
}
/* Decode second codebook (only for some modes) */
if (SUBMODE(double_codebook))
{
char *tmp_stack=stack;
float *innov2 = PUSH(tmp_stack, st->subframeSize, float);
for (i=0;i<st->subframeSize;i++)
innov2[i]=0;
SUBMODE(innovation_unquant)(innov2, SUBMODE(innovation_params), st->subframeSize, bits, tmp_stack);
for (i=0;i<st->subframeSize;i++)
innov2[i]*=ener*(1/2.2);
for (i=0;i<st->subframeSize;i++)
exc[i] += innov2[i];
}
}
for (i=0;i<st->subframeSize;i++)
sp[i]=exc[i];
/* Signal synthesis */
if (st->lpc_enh_enabled && SUBMODE(comb_gain)>0)
comb_filter(exc, sp, st->interp_qlpc, st->lpcSize, st->subframeSize,
pitch, pitch_gain, SUBMODE(comb_gain), st->comb_mem);
if (st->lpc_enh_enabled)
{
/* Use enhanced LPC filter */
filter_mem2(sp, awk2, awk1, sp, st->subframeSize, st->lpcSize,
st->mem_sp+st->lpcSize);
filter_mem2(sp, awk3, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
st->mem_sp);
} else {
/* Use regular filter */
for (i=0;i<st->lpcSize;i++)
st->mem_sp[st->lpcSize+i] = 0;
iir_mem2(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
st->mem_sp);
}
}
/*Copy output signal*/
out[0] = st->frame[0] + st->preemph*st->pre_mem;
for (i=1;i<st->frameSize;i++)
out[i]=st->frame[i] + st->preemph*out[i-1];
st->pre_mem=out[st->frameSize-1];
/* Store the LSPs for interpolation in the next frame */
for (i=0;i<st->lpcSize;i++)
st->old_qlsp[i] = st->qlsp[i];
/* The next frame will not be the first (Duh!) */
st->first = 0;
st->count_lost=0;
st->last_pitch = best_pitch;
st->last_pitch_gain = .25*pitch_average;
st->pitch_gain_buf[st->pitch_gain_buf_idx++] = st->last_pitch_gain;
if (st->pitch_gain_buf_idx > 2) /* rollover */
st->pitch_gain_buf_idx = 0;
st->last_ol_gain = ol_gain;
return 0;
}
int nb_encoder_ctl(void *state, int request, void *ptr)
{
EncState *st;
st=(EncState*)state;
switch(request)
{
case SPEEX_GET_FRAME_SIZE:
(*(int*)ptr) = st->frameSize;
break;
case SPEEX_SET_LOW_MODE:
case SPEEX_SET_MODE:
st->submodeSelect = st->submodeID = (*(int*)ptr);
break;
case SPEEX_GET_LOW_MODE:
case SPEEX_GET_MODE:
(*(int*)ptr) = st->submodeID;
break;
case SPEEX_SET_VBR:
st->vbr_enabled = (*(int*)ptr);
break;
case SPEEX_GET_VBR:
(*(int*)ptr) = st->vbr_enabled;
break;
case SPEEX_SET_VAD:
st->vad_enabled = (*(int*)ptr);
break;
case SPEEX_GET_VAD:
(*(int*)ptr) = st->vad_enabled;
break;
case SPEEX_SET_DTX:
st->dtx_enabled = (*(int*)ptr);
break;
case SPEEX_GET_DTX:
(*(int*)ptr) = st->dtx_enabled;
break;
case SPEEX_SET_ABR:
st->abr_enabled = (*(int*)ptr);
st->vbr_enabled = 1;
{
int i=10, rate, target;
float vbr_qual;
target = (*(int*)ptr);
while (i>=0)
{
speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i);
speex_encoder_ctl(st, SPEEX_GET_BITRATE, &rate);
if (rate <= target)
break;
i--;
}
vbr_qual=i;
if (vbr_qual<0)
vbr_qual=0;
speex_encoder_ctl(st, SPEEX_SET_VBR_QUALITY, &vbr_qual);
st->abr_count=0;
st->abr_drift=0;
st->abr_drift2=0;
}
break;
case SPEEX_GET_ABR:
(*(int*)ptr) = st->abr_enabled;
break;
case SPEEX_SET_VBR_QUALITY:
st->vbr_quality = (*(float*)ptr);
break;
case SPEEX_GET_VBR_QUALITY:
(*(float*)ptr) = st->vbr_quality;
break;
case SPEEX_SET_QUALITY:
{
int quality = (*(int*)ptr);
if (quality < 0)
quality = 0;
if (quality > 10)
quality = 10;
st->submodeSelect = st->submodeID = ((SpeexNBMode*)(st->mode->mode))->quality_map[quality];
}
break;
case SPEEX_SET_COMPLEXITY:
st->complexity = (*(int*)ptr);
if (st->complexity<1)
st->complexity=1;
break;
case SPEEX_GET_COMPLEXITY:
(*(int*)ptr) = st->complexity;
break;
case SPEEX_SET_BITRATE:
{
int i=10, rate, target;
target = (*(int*)ptr);
while (i>=0)
{
speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i);
speex_encoder_ctl(st, SPEEX_GET_BITRATE, &rate);
if (rate <= target)
break;
i--;
}
}
break;
case SPEEX_GET_BITRATE:
if (st->submodes[st->submodeID])
(*(int*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize;
else
(*(int*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize;
break;
case SPEEX_SET_SAMPLING_RATE:
st->sampling_rate = (*(int*)ptr);
break;
case SPEEX_GET_SAMPLING_RATE:
(*(int*)ptr)=st->sampling_rate;
break;
case SPEEX_RESET_STATE:
{
int i;
st->bounded_pitch = 1;
st->first = 1;
for (i=0;i<st->lpcSize;i++)
st->lsp[i]=(M_PI*((float)(i+1)))/(st->lpcSize+1);
for (i=0;i<st->lpcSize;i++)
st->mem_sw[i]=st->mem_sw_whole[i]=st->mem_sp[i]=st->mem_exc[i]=0;
for (i=0;i<st->bufSize;i++)
st->excBuf[i]=st->swBuf[i]=st->inBuf[i]=st->exc2Buf[i]=0;
}
break;
case SPEEX_GET_PI_GAIN:
{
int i;
float *g = (float*)ptr;
for (i=0;i<st->nbSubframes;i++)
g[i]=st->pi_gain[i];
}
break;
case SPEEX_GET_EXC:
{
int i;
float *e = (float*)ptr;
for (i=0;i<st->frameSize;i++)
e[i]=st->exc[i];
}
break;
case SPEEX_GET_INNOV:
{
int i;
float *e = (float*)ptr;
for (i=0;i<st->frameSize;i++)
e[i]=st->innov[i];
}
break;
case SPEEX_GET_RELATIVE_QUALITY:
(*(float*)ptr)=st->relative_quality;
break;
default:
speex_warning_int("Unknown nb_ctl request: ", request);
return -1;
}
return 0;
}
int nb_decoder_ctl(void *state, int request, void *ptr)
{
DecState *st;
st=(DecState*)state;
switch(request)
{
case SPEEX_GET_LOW_MODE:
case SPEEX_GET_MODE:
(*(int*)ptr) = st->submodeID;
break;
case SPEEX_SET_ENH:
st->lpc_enh_enabled = *((int*)ptr);
break;
case SPEEX_GET_ENH:
*((int*)ptr) = st->lpc_enh_enabled;
break;
case SPEEX_GET_FRAME_SIZE:
(*(int*)ptr) = st->frameSize;
break;
case SPEEX_GET_BITRATE:
if (st->submodes[st->submodeID])
(*(int*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize;
else
(*(int*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize;
break;
case SPEEX_SET_SAMPLING_RATE:
st->sampling_rate = (*(int*)ptr);
break;
case SPEEX_GET_SAMPLING_RATE:
(*(int*)ptr)=st->sampling_rate;
break;
case SPEEX_SET_HANDLER:
{
SpeexCallback *c = (SpeexCallback*)ptr;
st->speex_callbacks[c->callback_id].func=c->func;
st->speex_callbacks[c->callback_id].data=c->data;
st->speex_callbacks[c->callback_id].callback_id=c->callback_id;
}
break;
case SPEEX_SET_USER_HANDLER:
{
SpeexCallback *c = (SpeexCallback*)ptr;
st->user_callback.func=c->func;
st->user_callback.data=c->data;
st->user_callback.callback_id=c->callback_id;
}
break;
case SPEEX_RESET_STATE:
{
int i;
for (i=0;i<2*st->lpcSize;i++)
st->mem_sp[i]=0;
for (i=0;i<st->bufSize;i++)
st->excBuf[i]=st->inBuf[i]=0;
}
break;
case SPEEX_GET_PI_GAIN:
{
int i;
float *g = (float*)ptr;
for (i=0;i<st->nbSubframes;i++)
g[i]=st->pi_gain[i];
}
break;
case SPEEX_GET_EXC:
{
int i;
float *e = (float*)ptr;
for (i=0;i<st->frameSize;i++)
e[i]=st->exc[i];
}
break;
case SPEEX_GET_INNOV:
{
int i;
float *e = (float*)ptr;
for (i=0;i<st->frameSize;i++)
e[i]=st->innov[i];
}
break;
case SPEEX_GET_DTX_STATUS:
*((int*)ptr) = st->dtx_enabled;
break;
default:
speex_warning_int("Unknown nb_ctl request: ", request);
return -1;
}
return 0;
}