diff --git a/Makefile b/Makefile index f1078eea..83aea39a 100644 --- a/Makefile +++ b/Makefile @@ -130,7 +130,7 @@ OBJDIRS+= $(sort $(dir $(BUNDLE_OBJS))) # Common CFLAGS for all files CFLAGS_com = -g -funsigned-char -O2 CFLAGS_com += -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -CFLAGS_com += -I${BUILDDIR} -I${CURDIR}/src -I${CURDIR} +CFLAGS_com += -I${BUILDDIR} -I${CURDIR}/src -I${CURDIR} $(CFLAGS_ffdecsa) all: ${PROG} diff --git a/configure b/configure index 8e0cadde..ab6222c0 100755 --- a/configure +++ b/configure @@ -19,6 +19,7 @@ CPU=generic ARCH=`uname -m` OSENV="posix" PREFIX=/usr/local +PARALLEL_MODE=PARALLEL_64_MMX show_help(){ echo "Usage: configure [options]" @@ -30,6 +31,9 @@ show_help(){ echo " --arch=arch Build for this architecture [$ARCH]" echo " --cpu=cpu Build and optimize for specific CPU" echo " --cc=CC Build using the given compiler" + echo " --ffdecsa-mode=MODE Select Mode for ffdecsa" + echo " PARALLEL_64_MMX (default)" + echo " PARALLEL_128_SSE2" echo " --release Stage for release" exit 1 } @@ -51,6 +55,8 @@ for opt do ;; --cc=*) CC="$optval" ;; + --ffdecsa-mode=*) PARALLEL_MODE="$optval" + ;; --enable-?*|--disable-?*) eval $(echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g') $action $option @@ -107,6 +113,11 @@ if enabled avahi; then fi fi +# +# ffdecsa mode +# +echo >>${CONFIG_MAK} "CFLAGS_ffdecsa = -DPARALLEL_MODE=${PARALLEL_MODE}" + # # configure ffmpeg # diff --git a/src/ffdecsa/parallel_128_sse2.h b/src/ffdecsa/parallel_128_sse2.h new file mode 100644 index 00000000..5a537a9d --- /dev/null +++ b/src/ffdecsa/parallel_128_sse2.h @@ -0,0 +1,82 @@ +/* FFdecsa -- fast decsa algorithm + * + * Copyright (C) 2007 Dark Avenger + * 2003-2004 fatih89r + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include + +#define MEMALIGN __attribute__((aligned(16))) + +union __u128i { + unsigned int u[4]; + __m128i v; +}; + +static const union __u128i ff0 = {{0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U}}; +static const union __u128i ff1 = {{0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU}}; + +typedef __m128i group; +#define GROUP_PARALLELISM 128 +#define FF0() ff0.v +#define FF1() ff1.v +#define FFAND(a,b) _mm_and_si128((a),(b)) +#define FFOR(a,b) _mm_or_si128((a),(b)) +#define FFXOR(a,b) _mm_xor_si128((a),(b)) +#define FFNOT(a) _mm_xor_si128((a),FF1()) +#define MALLOC(X) _mm_malloc(X,16) +#define FREE(X) _mm_free(X) + +/* BATCH */ + +static const union __u128i ff29 = {{0x29292929U, 0x29292929U, 0x29292929U, 0x29292929U}}; +static const union __u128i ff02 = {{0x02020202U, 0x02020202U, 0x02020202U, 0x02020202U}}; +static const union __u128i ff04 = {{0x04040404U, 0x04040404U, 0x04040404U, 0x04040404U}}; +static const union __u128i ff10 = {{0x10101010U, 0x10101010U, 0x10101010U, 0x10101010U}}; +static const union __u128i ff40 = {{0x40404040U, 0x40404040U, 0x40404040U, 0x40404040U}}; +static const union __u128i ff80 = {{0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U}}; + +typedef __m128i batch; +#define BYTES_PER_BATCH 16 +#define B_FFN_ALL_29() ff29.v +#define B_FFN_ALL_02() ff02.v +#define B_FFN_ALL_04() ff04.v +#define B_FFN_ALL_10() ff10.v +#define B_FFN_ALL_40() ff40.v +#define B_FFN_ALL_80() ff80.v + +#define B_FFAND(a,b) FFAND(a,b) +#define B_FFOR(a,b) FFOR(a,b) +#define B_FFXOR(a,b) FFXOR(a,b) +#define B_FFSH8L(a,n) _mm_slli_epi64((a),(n)) +#define B_FFSH8R(a,n) _mm_srli_epi64((a),(n)) + +#define M_EMPTY() + +#undef BEST_SPAN +#define BEST_SPAN 16 + +#undef XOR_BEST_BY +static inline void XOR_BEST_BY(unsigned char *d, unsigned char *s1, unsigned char *s2) +{ + __m128i vs1 = _mm_load_si128((__m128i*)s1); + __m128i vs2 = _mm_load_si128((__m128i*)s2); + vs1 = _mm_xor_si128(vs1, vs2); + _mm_store_si128((__m128i*)d, vs1); +} + +#include "fftable.h"