Main Page
Related Pages
Modules
Data Structures
Files
Examples
File List
Globals
libavutil
x86
float_dsp_init.c
Go to the documentation of this file.
1
/*
2
* This file is part of Libav.
3
*
4
* Libav is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU Lesser General Public
6
* License as published by the Free Software Foundation; either
7
* version 2.1 of the License, or (at your option) any later version.
8
*
9
* Libav is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
* Lesser General Public License for more details.
13
*
14
* You should have received a copy of the GNU Lesser General Public
15
* License along with Libav; if not, write to the Free Software
16
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
*/
18
19
#include "config.h"
20
21
#include "
libavutil/cpu.h
"
22
#include "
libavutil/float_dsp.h
"
23
#include "
cpu.h
"
24
#include "
asm.h
"
25
26
extern
void
ff_vector_fmul_sse
(
float
*dst,
const
float
*src0,
const
float
*src1,
27
int
len
);
28
extern
void
ff_vector_fmul_avx
(
float
*dst,
const
float
*src0,
const
float
*src1,
29
int
len
);
30
31
extern
void
ff_vector_fmac_scalar_sse
(
float
*dst,
const
float
*src,
float
mul,
32
int
len
);
33
extern
void
ff_vector_fmac_scalar_avx
(
float
*dst,
const
float
*src,
float
mul,
34
int
len
);
35
36
extern
void
ff_vector_fmul_scalar_sse
(
float
*dst,
const
float
*src,
float
mul,
37
int
len
);
38
39
extern
void
ff_vector_dmul_scalar_sse2
(
double
*dst,
const
double
*src,
40
double
mul,
int
len
);
41
extern
void
ff_vector_dmul_scalar_avx
(
double
*dst,
const
double
*src,
42
double
mul,
int
len
);
43
44
#if HAVE_6REGS
45
static
void
vector_fmul_window_3dnowext(
float
*dst,
const
float
*src0,
46
const
float
*src1,
const
float
*win,
47
int
len
)
48
{
49
x86_reg
i = -len * 4;
50
x86_reg
j = len * 4 - 8;
51
__asm__
volatile
(
52
"1: \n"
53
"pswapd (%5, %1), %%mm1 \n"
54
"movq (%5, %0), %%mm0 \n"
55
"pswapd (%4, %1), %%mm5 \n"
56
"movq (%3, %0), %%mm4 \n"
57
"movq %%mm0, %%mm2 \n"
58
"movq %%mm1, %%mm3 \n"
59
"pfmul %%mm4, %%mm2 \n"
// src0[len + i] * win[len + i]
60
"pfmul %%mm5, %%mm3 \n"
// src1[j] * win[len + j]
61
"pfmul %%mm4, %%mm1 \n"
// src0[len + i] * win[len + j]
62
"pfmul %%mm5, %%mm0 \n"
// src1[j] * win[len + i]
63
"pfadd %%mm3, %%mm2 \n"
64
"pfsub %%mm0, %%mm1 \n"
65
"pswapd %%mm2, %%mm2 \n"
66
"movq %%mm1, (%2, %0) \n"
67
"movq %%mm2, (%2, %1) \n"
68
"sub $8, %1 \n"
69
"add $8, %0 \n"
70
"jl 1b \n"
71
"femms \n"
72
:
"+r"
(i),
"+r"
(j)
73
:
"r"
(dst +
len
),
"r"
(src0 + len),
"r"
(src1),
"r"
(win + len)
74
);
75
}
76
77
static
void
vector_fmul_window_sse(
float
*dst,
const
float
*src0,
78
const
float
*src1,
const
float
*win,
int
len)
79
{
80
x86_reg
i = -len * 4;
81
x86_reg
j = len * 4 - 16;
82
__asm__
volatile
(
83
"1: \n"
84
"movaps (%5, %1), %%xmm1 \n"
85
"movaps (%5, %0), %%xmm0 \n"
86
"movaps (%4, %1), %%xmm5 \n"
87
"movaps (%3, %0), %%xmm4 \n"
88
"shufps $0x1b, %%xmm1, %%xmm1 \n"
89
"shufps $0x1b, %%xmm5, %%xmm5 \n"
90
"movaps %%xmm0, %%xmm2 \n"
91
"movaps %%xmm1, %%xmm3 \n"
92
"mulps %%xmm4, %%xmm2 \n"
// src0[len + i] * win[len + i]
93
"mulps %%xmm5, %%xmm3 \n"
// src1[j] * win[len + j]
94
"mulps %%xmm4, %%xmm1 \n"
// src0[len + i] * win[len + j]
95
"mulps %%xmm5, %%xmm0 \n"
// src1[j] * win[len + i]
96
"addps %%xmm3, %%xmm2 \n"
97
"subps %%xmm0, %%xmm1 \n"
98
"shufps $0x1b, %%xmm2, %%xmm2 \n"
99
"movaps %%xmm1, (%2, %0) \n"
100
"movaps %%xmm2, (%2, %1) \n"
101
"sub $16, %1 \n"
102
"add $16, %0 \n"
103
"jl 1b \n"
104
:
"+r"
(i),
"+r"
(j)
105
:
"r"
(dst +
len
),
"r"
(src0 + len),
"r"
(src1),
"r"
(win + len)
106
);
107
}
108
#endif
/* HAVE_6REGS */
109
110
void
ff_float_dsp_init_x86
(
AVFloatDSPContext
*fdsp)
111
{
112
int
mm_flags =
av_get_cpu_flags
();
113
114
#if HAVE_6REGS
115
if
(
INLINE_AMD3DNOWEXT
(mm_flags)) {
116
fdsp->
vector_fmul_window
= vector_fmul_window_3dnowext;
117
}
118
if
(
INLINE_SSE
(mm_flags)) {
119
fdsp->
vector_fmul_window
= vector_fmul_window_sse;
120
}
121
#endif
122
if
(
EXTERNAL_SSE
(mm_flags)) {
123
fdsp->
vector_fmul
=
ff_vector_fmul_sse
;
124
fdsp->
vector_fmac_scalar
=
ff_vector_fmac_scalar_sse
;
125
fdsp->
vector_fmul_scalar
=
ff_vector_fmul_scalar_sse
;
126
}
127
if
(
EXTERNAL_SSE2
(mm_flags)) {
128
fdsp->
vector_dmul_scalar
=
ff_vector_dmul_scalar_sse2
;
129
}
130
if
(
EXTERNAL_AVX
(mm_flags)) {
131
fdsp->
vector_fmul
=
ff_vector_fmul_avx
;
132
fdsp->
vector_fmac_scalar
=
ff_vector_fmac_scalar_avx
;
133
fdsp->
vector_dmul_scalar
=
ff_vector_dmul_scalar_avx
;
134
}
135
}