summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'media-libs/gst-plugins-base/files/gst-0.10.32-0011-add-some-neon.patch')
-rw-r--r--media-libs/gst-plugins-base/files/gst-0.10.32-0011-add-some-neon.patch293
1 files changed, 293 insertions, 0 deletions
diff --git a/media-libs/gst-plugins-base/files/gst-0.10.32-0011-add-some-neon.patch b/media-libs/gst-plugins-base/files/gst-0.10.32-0011-add-some-neon.patch
new file mode 100644
index 0000000..6737811
--- /dev/null
+++ b/media-libs/gst-plugins-base/files/gst-0.10.32-0011-add-some-neon.patch
@@ -0,0 +1,293 @@
+From 537d185b9e9b25f7dacb5e5c4dab47bb8524da34 Mon Sep 17 00:00:00 2001
+From: Rob Clark <rob@ti.com>
+Date: Thu, 8 Apr 2010 00:30:25 -0500
+Subject: [PATCH 11/24] add some neon
+
+---
+ configure.ac | 1 +
+ gst/stride/Makefile.am | 1 +
+ gst/stride/armv7.s | 119 ++++++++++++++++++++++++++++++++++++++++++++++++
+ gst/stride/convert.c | 76 ++++++++++++++++--------------
+ 4 files changed, 162 insertions(+), 35 deletions(-)
+ create mode 100644 gst/stride/armv7.s
+
+diff --git a/configure.ac b/configure.ac
+index af6cd52..8e7ba18 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -58,6 +58,7 @@ dnl AS_LIBTOOL_TAGS
+
+ AC_LIBTOOL_WIN32_DLL
+ AM_PROG_LIBTOOL
++AM_PROG_AS
+
+ dnl *** required versions of GStreamer stuff ***
+ GST_REQ=0.10.32
+diff --git a/gst/stride/Makefile.am b/gst/stride/Makefile.am
+index 0b61d55..3b466de 100644
+--- a/gst/stride/Makefile.am
++++ b/gst/stride/Makefile.am
+@@ -3,6 +3,7 @@ plugin_LTLIBRARIES = libgststridetransform.la
+ libgststridetransform_la_SOURCES = \
+ gststridetransform.c \
+ convert.c \
++ armv7.s \
+ plugin.c
+
+ libgststridetransform_la_CFLAGS = $(GST_PLUGINS_BASE_CFLAGS) $(GST_BASE_CFLAGS) $(GST_CFLAGS)
+diff --git a/gst/stride/armv7.s b/gst/stride/armv7.s
+new file mode 100644
+index 0000000..ed636f7
+--- /dev/null
++++ b/gst/stride/armv7.s
+@@ -0,0 +1,119 @@
++@ GStreamer
++@
++@ Copyright (C) 2009 Texas Instruments, Inc - http://www.ti.com/
++@
++@ Description: NEON/VFP accelerated functions for armv7 architecture
++@ Created on: Nov 27, 2009
++@ Author: Rob Clark <rob@ti.com>
++@
++@ This library is free software; you can redistribute it and/or
++@ modify it under the terms of the GNU Library General Public
++@ License as published by the Free Software Foundation; either
++@ version 2 of the License, or (at your option) any later version.
++@
++@ This library is distributed in the hope that it will be useful,
++@ but WITHOUT ANY WARRANTY; without even the implied warranty of
++@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++@ Library General Public License for more details.
++@
++@ You should have received a copy of the GNU Library General Public
++@ License along with this library; if not, write to the
++@ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
++@ Boston, MA 02111-1307, USA.
++
++ .fpu neon
++ .text
++
++ .align
++ .global stride_copy_zip2
++ .type stride_copy_zip2, %function
++@void
++@stride_copy_zip2 (guchar *new_buf, guchar *orig_buf1, guchar *orig_buf2, gint sz)
++@{
++@@@@ note: r0-r3, q0-3, and q8-q15 do not need to be preserved
++stride_copy_zip2:
++@ interleave remaining >= 16 bytes:
++ pld [r1, #64]
++ pld [r2, #64]
++ cmp r3, #16
++ blt stride_copy_zip2_2
++stride_copy_zip2_1:
++ vld1.8 {q8}, [r1]!
++ vld1.8 {q9}, [r2]!
++
++ vzip.8 q8, q9
++
++ pld [r1, #64]
++ vst1.8 {q8,q9}, [r0]!
++ pld [r2, #64]
++ sub r3, r3, #16
++
++ cmp r3, #16
++ bge stride_copy_zip2_1
++@ interleave remaining >= 8 bytes:
++stride_copy_zip2_2:
++ cmp r3, #8
++ blt stride_copy_zip2_3
++
++ vld1.8 {d16}, [r1]!
++ vld1.8 {d17}, [r2]!
++
++ vzip.8 d16, d17
++
++ vst1.8 {d16,d17}, [r0]!
++ sub r3, r3, #8
++
++@ interleave remaining < 8 bytes:
++stride_copy_zip2_3:
++@XXX
++ bx lr
++@}
++
++ .align
++ .global stride_copy
++ .type stride_copy, %function
++@void
++@stride_copy (guchar *new_buf, guchar *orig_buf, gint sz)
++@{
++@@@@ note: r0-r3, q0-3, and q8-q15 do not need to be preserved
++stride_copy:
++@ copy remaining >= 64 bytes:
++ pld [r1, #64]
++ cmp r2, #64
++ blt stride_copy_2
++stride_copy_1:
++ vld1.8 {q8-q9}, [r1]!
++ sub r2, r2, #64
++ vld1.8 {q10-q11},[r1]!
++ vst1.8 {q8-q9}, [r0]!
++ pld [r1, #64]
++ cmp r2, #64
++ vst1.8 {q10-q11},[r0]!
++ bge stride_copy_1
++@ copy remaining >= 32 bytes:
++stride_copy_2:
++ cmp r2, #32
++ blt stride_copy_3
++ vld1.8 {q8-q9}, [r1]!
++ sub r2, r2, #32
++ vst1.8 {q8-q9}, [r0]!
++@ copy remaining >= 16 bytes:
++stride_copy_3:
++ cmp r2, #16
++ blt stride_copy_4
++ vld1.8 {q8}, [r1]!
++ sub r2, r2, #16
++ vst1.8 {q8}, [r0]!
++@ copy remaining >= 8 bytes:
++stride_copy_4:
++ cmp r2, #8
++ blt stride_copy_5
++ vld1.8 {d16}, [r1]!
++ sub r2, r2, #8
++ vst1.8 {d16}, [r0]!
++@ copy remaining < 8 bytes:
++stride_copy_5:
++@XXX
++ bx lr
++@}
++
+diff --git a/gst/stride/convert.c b/gst/stride/convert.c
+index 860f16c..a15063b 100644
+--- a/gst/stride/convert.c
++++ b/gst/stride/convert.c
+@@ -37,38 +37,43 @@ GST_DEBUG_CATEGORY_EXTERN (stridetransform_debug);
+ #define GST_CAT_DEFAULT stridetransform_debug
+
+
++/* note: some parts of code support in-place transform.. some do not.. I'm
++ * not sure if zip/interleave functions could really support in-place copy..
++ * I need to think about this after having some sleep ;-)
++ */
++
++#define WEAK __attribute__((weak))
++
+ /*
+ * Conversion utilities:
+ */
+
+-static void
+-memmove_demux (guchar *new_buf, guchar *orig_buf, gint sz, gint pxstride)
++WEAK void
++stride_copy_zip2 (guchar *new_buf, guchar *orig_buf1, guchar *orig_buf2, gint sz)
+ {
+- if (new_buf > orig_buf) {
+- /* copy backwards */
+- new_buf += ((sz - 1) * pxstride);
+- orig_buf += sz - 1;
+- while(sz--) {
+- *new_buf = *orig_buf;
+- new_buf -= pxstride;
+- orig_buf--;
+- }
+- } else {
+- while(sz--) {
+- *new_buf = *orig_buf;
+- new_buf += pxstride;
+- orig_buf++;
+- }
++ while (sz--) {
++ *new_buf++ = *orig_buf1++;
++ *new_buf++ = *orig_buf2++;
+ }
+ }
+
++WEAK void
++stride_copy (guchar *new_buf, guchar *orig_buf, gint sz)
++{
++ memcpy (new_buf, orig_buf, sz);
++}
++
++
++/**
++ * move to strided buffer, interleaving two planes of identical dimensions
++ */
+ static void
+-stridemove_demux (guchar *new_buf, guchar *orig_buf, gint new_width, gint orig_width, gint height, gint pxstride)
++stridemove_zip2 (guchar *new_buf, guchar *orig_buf1, guchar *orig_buf2, gint new_width, gint orig_width, gint height)
+ {
+ int row;
+
+- GST_DEBUG ("new_buf=%p, orig_buf=%p, new_width=%d, orig_width=%d, height=%d",
+- new_buf, orig_buf, new_width, orig_width, height);
++ GST_DEBUG ("new_buf=%p, orig_buf1=%p, orig_buf2=%p, new_width=%d, orig_width=%d, height=%d",
++ new_buf, orig_buf1, orig_buf2, new_width, orig_width, height);
+
+ /* if increasing the stride, work from bottom-up to avoid overwriting data
+ * that has not been moved yet.. otherwise, work in the opposite order,
+@@ -76,11 +81,19 @@ stridemove_demux (guchar *new_buf, guchar *orig_buf, gint new_width, gint orig_w
+ */
+ if (new_width > orig_width) {
+ for (row=height-1; row>=0; row--) {
+- memmove_demux (new_buf+(new_width*row), orig_buf+(orig_width*row), orig_width, pxstride);
++ stride_copy_zip2 (
++ new_buf+(new_width*row),
++ orig_buf1+(orig_width*row),
++ orig_buf2+(orig_width*row),
++ orig_width);
+ }
+ } else {
+ for (row=0; row<height; row++) {
+- memmove_demux (new_buf+(new_width*row), orig_buf+(orig_width*row), new_width, pxstride);
++ stride_copy_zip2 (
++ new_buf+(new_width*row),
++ orig_buf1+(orig_width*row),
++ orig_buf2+(orig_width*row),
++ new_width);
+ }
+ }
+ }
+@@ -106,11 +119,11 @@ stridemove (guchar *new_buf, guchar *orig_buf, gint new_width, gint orig_width,
+ */
+ if (new_width > orig_width) {
+ for (row=height-1; row>=0; row--) {
+- memmove (new_buf+(new_width*row), orig_buf+(orig_width*row), orig_width);
++ stride_copy (new_buf+(new_width*row), orig_buf+(orig_width*row), orig_width);
+ }
+ } else {
+ for (row=0; row<height; row++) {
+- memmove (new_buf+(new_width*row), orig_buf+(orig_width*row), new_width);
++ stride_copy (new_buf+(new_width*row), orig_buf+(orig_width*row), new_width);
+ }
+ }
+ }
+@@ -234,19 +247,12 @@ stridify_i420_nv12 (GstStrideTransform *self, guchar *strided, guchar *unstrided
+
+ g_return_val_if_fail (stride >= width, GST_FLOW_ERROR);
+
+- /* note: if not an in-place conversion, then doing the U&V in one pass
+- * would be more efficient... but if it is an in-place conversion, I'd
+- * need to think about whether it is potential for the new UV plane to
+- * corrupt the V plane before it is done copying..
+- */
+- stridemove_demux (
+- strided + (height*stride) + 1,
+- unstrided + (int)(height*width*1.25),
+- stride, width/2, height/2, 2); /* move V */
+- stridemove_demux (
++ /* XXX widths/heights/strides that are not multiple of four??: */
++ stridemove_zip2 (
+ strided + (height*stride),
+ unstrided + (height*width),
+- stride, width/2, height/2, 2); /* move U */
++ unstrided + (int)(height*width*1.25),
++ stride, width/2, height/2); /* interleave U&V */
+ stridemove (strided, unstrided, stride, width, height); /* move Y */
+
+ return GST_FLOW_OK;
+--
+1.7.1
+