g722enc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) CMU 1993 Computer Science, Speech Group
3  * Chengxiang Lu and Alex Hauptmann
4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5  * Copyright (c) 2009 Kenan Gillet
6  * Copyright (c) 2010 Martin Storsjo
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
30 #include "avcodec.h"
31 #include "internal.h"
32 #include "g722.h"
33 #include "libavutil/common.h"
34 
35 #define FREEZE_INTERVAL 128
36 
37 /* This is an arbitrary value. Allowing insanely large values leads to strange
38  problems, so we limit it to a reasonable value */
39 #define MAX_FRAME_SIZE 32768
40 
41 /* We clip the value of avctx->trellis to prevent data type overflows and
42  undefined behavior. Using larger values is insanely slow anyway. */
43 #define MIN_TRELLIS 0
44 #define MAX_TRELLIS 16
45 
47 {
48  G722Context *c = avctx->priv_data;
49  int i;
50  for (i = 0; i < 2; i++) {
51  av_freep(&c->paths[i]);
52  av_freep(&c->node_buf[i]);
53  av_freep(&c->nodep_buf[i]);
54  }
55 #if FF_API_OLD_ENCODE_AUDIO
56  av_freep(&avctx->coded_frame);
57 #endif
58  return 0;
59 }
60 
62 {
63  G722Context *c = avctx->priv_data;
64  int ret;
65 
66  if (avctx->channels != 1) {
67  av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
68  return AVERROR_INVALIDDATA;
69  }
70 
71  c->band[0].scale_factor = 8;
72  c->band[1].scale_factor = 2;
73  c->prev_samples_pos = 22;
74 
75  if (avctx->trellis) {
76  int frontier = 1 << avctx->trellis;
77  int max_paths = frontier * FREEZE_INTERVAL;
78  int i;
79  for (i = 0; i < 2; i++) {
80  c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
81  c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
82  c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
83  if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
84  ret = AVERROR(ENOMEM);
85  goto error;
86  }
87  }
88  }
89 
90  if (avctx->frame_size) {
91  /* validate frame size */
92  if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
93  int new_frame_size;
94 
95  if (avctx->frame_size == 1)
96  new_frame_size = 2;
97  else if (avctx->frame_size > MAX_FRAME_SIZE)
98  new_frame_size = MAX_FRAME_SIZE;
99  else
100  new_frame_size = avctx->frame_size - 1;
101 
102  av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
103  "allowed. Using %d instead of %d\n", new_frame_size,
104  avctx->frame_size);
105  avctx->frame_size = new_frame_size;
106  }
107  } else {
108  /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
109  a common packet size for VoIP applications */
110  avctx->frame_size = 320;
111  }
112  avctx->delay = 22;
113 
114  if (avctx->trellis) {
115  /* validate trellis */
116  if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
117  int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
118  av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
119  "allowed. Using %d instead of %d\n", new_trellis,
120  avctx->trellis);
121  avctx->trellis = new_trellis;
122  }
123  }
124 
125 #if FF_API_OLD_ENCODE_AUDIO
126  avctx->coded_frame = avcodec_alloc_frame();
127  if (!avctx->coded_frame) {
128  ret = AVERROR(ENOMEM);
129  goto error;
130  }
131 #endif
132 
133  return 0;
134 error:
135  g722_encode_close(avctx);
136  return ret;
137 }
138 
139 static const int16_t low_quant[33] = {
140  35, 72, 110, 150, 190, 233, 276, 323,
141  370, 422, 473, 530, 587, 650, 714, 786,
142  858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
143  1765, 1980, 2195, 2557, 2919
144 };
145 
146 static inline void filter_samples(G722Context *c, const int16_t *samples,
147  int *xlow, int *xhigh)
148 {
149  int xout1, xout2;
150  c->prev_samples[c->prev_samples_pos++] = samples[0];
151  c->prev_samples[c->prev_samples_pos++] = samples[1];
152  ff_g722_apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
153  *xlow = xout1 + xout2 >> 14;
154  *xhigh = xout1 - xout2 >> 14;
156  memmove(c->prev_samples,
157  c->prev_samples + c->prev_samples_pos - 22,
158  22 * sizeof(c->prev_samples[0]));
159  c->prev_samples_pos = 22;
160  }
161 }
162 
163 static inline int encode_high(const struct G722Band *state, int xhigh)
164 {
165  int diff = av_clip_int16(xhigh - state->s_predictor);
166  int pred = 141 * state->scale_factor >> 8;
167  /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
168  return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
169 }
170 
171 static inline int encode_low(const struct G722Band* state, int xlow)
172 {
173  int diff = av_clip_int16(xlow - state->s_predictor);
174  /* = diff >= 0 ? diff : -(diff + 1) */
175  int limit = diff ^ (diff >> (sizeof(diff)*8-1));
176  int i = 0;
177  limit = limit + 1 << 10;
178  if (limit > low_quant[8] * state->scale_factor)
179  i = 9;
180  while (i < 29 && limit > low_quant[i] * state->scale_factor)
181  i++;
182  return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
183 }
184 
185 static void g722_encode_trellis(G722Context *c, int trellis,
186  uint8_t *dst, int nb_samples,
187  const int16_t *samples)
188 {
189  int i, j, k;
190  int frontier = 1 << trellis;
191  struct TrellisNode **nodes[2];
192  struct TrellisNode **nodes_next[2];
193  int pathn[2] = {0, 0}, froze = -1;
194  struct TrellisPath *p[2];
195 
196  for (i = 0; i < 2; i++) {
197  nodes[i] = c->nodep_buf[i];
198  nodes_next[i] = c->nodep_buf[i] + frontier;
199  memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
200  nodes[i][0] = c->node_buf[i] + frontier;
201  nodes[i][0]->ssd = 0;
202  nodes[i][0]->path = 0;
203  nodes[i][0]->state = c->band[i];
204  }
205 
206  for (i = 0; i < nb_samples >> 1; i++) {
207  int xlow, xhigh;
208  struct TrellisNode *next[2];
209  int heap_pos[2] = {0, 0};
210 
211  for (j = 0; j < 2; j++) {
212  next[j] = c->node_buf[j] + frontier*(i & 1);
213  memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
214  }
215 
216  filter_samples(c, &samples[2*i], &xlow, &xhigh);
217 
218  for (j = 0; j < frontier && nodes[0][j]; j++) {
219  /* Only k >> 2 affects the future adaptive state, therefore testing
220  * small steps that don't change k >> 2 is useless, the original
221  * value from encode_low is better than them. Since we step k
222  * in steps of 4, make sure range is a multiple of 4, so that
223  * we don't miss the original value from encode_low. */
224  int range = j < frontier/2 ? 4 : 0;
225  struct TrellisNode *cur_node = nodes[0][j];
226 
227  int ilow = encode_low(&cur_node->state, xlow);
228 
229  for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
230  int decoded, dec_diff, pos;
231  uint32_t ssd;
232  struct TrellisNode* node;
233 
234  if (k < 0)
235  continue;
236 
237  decoded = av_clip((cur_node->state.scale_factor *
238  ff_g722_low_inv_quant6[k] >> 10)
239  + cur_node->state.s_predictor, -16384, 16383);
240  dec_diff = xlow - decoded;
241 
242 #define STORE_NODE(index, UPDATE, VALUE)\
243  ssd = cur_node->ssd + dec_diff*dec_diff;\
244  /* Check for wraparound. Using 64 bit ssd counters would \
245  * be simpler, but is slower on x86 32 bit. */\
246  if (ssd < cur_node->ssd)\
247  continue;\
248  if (heap_pos[index] < frontier) {\
249  pos = heap_pos[index]++;\
250  assert(pathn[index] < FREEZE_INTERVAL * frontier);\
251  node = nodes_next[index][pos] = next[index]++;\
252  node->path = pathn[index]++;\
253  } else {\
254  /* Try to replace one of the leaf nodes with the new \
255  * one, but not always testing the same leaf position */\
256  pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
257  if (ssd >= nodes_next[index][pos]->ssd)\
258  continue;\
259  heap_pos[index]++;\
260  node = nodes_next[index][pos];\
261  }\
262  node->ssd = ssd;\
263  node->state = cur_node->state;\
264  UPDATE;\
265  c->paths[index][node->path].value = VALUE;\
266  c->paths[index][node->path].prev = cur_node->path;\
267  /* Sift the newly inserted node up in the heap to restore \
268  * the heap property */\
269  while (pos > 0) {\
270  int parent = (pos - 1) >> 1;\
271  if (nodes_next[index][parent]->ssd <= ssd)\
272  break;\
273  FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
274  nodes_next[index][pos]);\
275  pos = parent;\
276  }
277  STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
278  }
279  }
280 
281  for (j = 0; j < frontier && nodes[1][j]; j++) {
282  int ihigh;
283  struct TrellisNode *cur_node = nodes[1][j];
284 
285  /* We don't try to get any initial guess for ihigh via
286  * encode_high - since there's only 4 possible values, test
287  * them all. Testing all of these gives a much, much larger
288  * gain than testing a larger range around ilow. */
289  for (ihigh = 0; ihigh < 4; ihigh++) {
290  int dhigh, decoded, dec_diff, pos;
291  uint32_t ssd;
292  struct TrellisNode* node;
293 
294  dhigh = cur_node->state.scale_factor *
295  ff_g722_high_inv_quant[ihigh] >> 10;
296  decoded = av_clip(dhigh + cur_node->state.s_predictor,
297  -16384, 16383);
298  dec_diff = xhigh - decoded;
299 
300  STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
301  }
302  }
303 
304  for (j = 0; j < 2; j++) {
305  FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
306 
307  if (nodes[j][0]->ssd > (1 << 16)) {
308  for (k = 1; k < frontier && nodes[j][k]; k++)
309  nodes[j][k]->ssd -= nodes[j][0]->ssd;
310  nodes[j][0]->ssd = 0;
311  }
312  }
313 
314  if (i == froze + FREEZE_INTERVAL) {
315  p[0] = &c->paths[0][nodes[0][0]->path];
316  p[1] = &c->paths[1][nodes[1][0]->path];
317  for (j = i; j > froze; j--) {
318  dst[j] = p[1]->value << 6 | p[0]->value;
319  p[0] = &c->paths[0][p[0]->prev];
320  p[1] = &c->paths[1][p[1]->prev];
321  }
322  froze = i;
323  pathn[0] = pathn[1] = 0;
324  memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
325  memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
326  }
327  }
328 
329  p[0] = &c->paths[0][nodes[0][0]->path];
330  p[1] = &c->paths[1][nodes[1][0]->path];
331  for (j = i; j > froze; j--) {
332  dst[j] = p[1]->value << 6 | p[0]->value;
333  p[0] = &c->paths[0][p[0]->prev];
334  p[1] = &c->paths[1][p[1]->prev];
335  }
336  c->band[0] = nodes[0][0]->state;
337  c->band[1] = nodes[1][0]->state;
338 }
339 
340 static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
341  const int16_t *samples)
342 {
343  int xlow, xhigh, ilow, ihigh;
344  filter_samples(c, samples, &xlow, &xhigh);
345  ihigh = encode_high(&c->band[1], xhigh);
346  ilow = encode_low (&c->band[0], xlow);
348  ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
349  ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
350  *dst = ihigh << 6 | ilow;
351 }
352 
353 static void g722_encode_no_trellis(G722Context *c,
354  uint8_t *dst, int nb_samples,
355  const int16_t *samples)
356 {
357  int i;
358  for (i = 0; i < nb_samples; i += 2)
359  encode_byte(c, dst++, &samples[i]);
360 }
361 
362 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
363  const AVFrame *frame, int *got_packet_ptr)
364 {
365  G722Context *c = avctx->priv_data;
366  const int16_t *samples = (const int16_t *)frame->data[0];
367  int nb_samples, out_size, ret;
368 
369  out_size = (frame->nb_samples + 1) / 2;
370  if ((ret = ff_alloc_packet(avpkt, out_size))) {
371  av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
372  return ret;
373  }
374 
375  nb_samples = frame->nb_samples - (frame->nb_samples & 1);
376 
377  if (avctx->trellis)
378  g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
379  else
380  g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
381 
382  /* handle last frame with odd frame_size */
383  if (nb_samples < frame->nb_samples) {
384  int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
385  encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
386  }
387 
388  if (frame->pts != AV_NOPTS_VALUE)
389  avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
390  *got_packet_ptr = 1;
391  return 0;
392 }
393 
395  .name = "g722",
396  .type = AVMEDIA_TYPE_AUDIO,
398  .priv_data_size = sizeof(G722Context),
401  .encode2 = g722_encode_frame,
402  .capabilities = CODEC_CAP_SMALL_LAST_FRAME,
403  .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
404  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
406 };