- Added swipe left/right to change between tabs

- Added option to enable/disable infinite swipe between tabs
- Added new menu Plus and moved there all additional Plus features
- Added option to show/hide direct share button in any chat
- Added switch to use direct share without quoting sender
- Added date indicator toast in chat
- Added current download/upload size in chat screen
- Added sort options (default/unread) to all tabs
- Added different options in groups/supergroups for admins when user avatar is clicked in chat screen: ‘Show profile’, ‘Delete from group’ and ‘Set as admin’
- Added option to remove from admin list if user is already an admin
- Added ‘Mark as read’ for individual chats and ‘Mark all as read’ for every tab
- Added option in Plus settings to show username instead of mobile number in menu
- Bug fixes
This commit is contained in:
rafalense 2016-02-25 10:36:39 +01:00
parent 6c9a71f6d1
commit a4e9947d42
273 changed files with 17155 additions and 27306 deletions

View File

@ -1,4 +1,4 @@
### Plus Messenger for Android ### Plus Messenger for Android (http://plusmessenger.org)
This is an UNOFFICIAL app that uses [Telegram's API](https://core.telegram.org/api) This is an UNOFFICIAL app that uses [Telegram's API](https://core.telegram.org/api)

View File

@ -6,7 +6,8 @@ repositories {
dependencies { dependencies {
compile 'com.android.support:support-v4:23.1.+' compile 'com.android.support:support-v4:23.1.+'
compile 'com.google.android.gms:play-services:3.2.+' compile "com.google.android.gms:play-services-gcm:8.4.0"
compile "com.google.android.gms:play-services-maps:8.4.0"
compile 'net.hockeyapp.android:HockeySDK:3.6.+' compile 'net.hockeyapp.android:HockeySDK:3.6.+'
compile 'com.googlecode.mp4parser:isoparser:1.0.+' compile 'com.googlecode.mp4parser:isoparser:1.0.+'
} }
@ -16,6 +17,8 @@ android {
buildToolsVersion '23.0.2' buildToolsVersion '23.0.2'
useLibrary 'org.apache.http.legacy' useLibrary 'org.apache.http.legacy'
//defaultConfig.applicationId = "org.telegram.messenger"
defaultConfig.applicationId = "org.telegram.plus"
compileOptions { compileOptions {
sourceCompatibility JavaVersion.VERSION_1_7 sourceCompatibility JavaVersion.VERSION_1_7
@ -25,6 +28,10 @@ android {
signingConfigs { signingConfigs {
debug { debug {
storeFile file("config/debug.keystore") storeFile file("config/debug.keystore")
//storeFile file("config/release.keystore")
//storePassword RELEASE_STORE_PASSWORD
//keyAlias RELEASE_KEY_ALIAS
//keyPassword RELEASE_KEY_PASSWORD
} }
release { release {
@ -47,6 +54,8 @@ android {
debuggable false debuggable false
jniDebuggable false jniDebuggable false
//signingConfig signingConfigs.release //signingConfig signingConfigs.release
minifyEnabled false
proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro'
} }
foss { foss {
@ -79,10 +88,9 @@ android {
} }
defaultConfig { defaultConfig {
applicationId "org.telegram.plus" minSdkVersion 9
minSdkVersion 8
targetSdkVersion 23 targetSdkVersion 23
versionCode 689 versionCode 736
versionName "3.3.1.1" versionName "3.4.2.5"
} }
} }

View File

@ -27,7 +27,7 @@
android:label="Plus beta" android:label="Plus beta"
tools:replace="label" tools:replace="label"
android:theme="@style/Theme.TMessages.Start" android:theme="@style/Theme.TMessages.Start"
android:name=".ApplicationLoader" android:name="org.telegram.messenger.ApplicationLoader"
android:hardwareAccelerated="@bool/useHardwareAcceleration" android:hardwareAccelerated="@bool/useHardwareAcceleration"
android:largeHeap="true"> android:largeHeap="true">
@ -36,7 +36,8 @@
<activity android:name="net.hockeyapp.android.UpdateActivity" /> <activity android:name="net.hockeyapp.android.UpdateActivity" />
<receiver <receiver
android:name="org.telegram.messenger.GcmBroadcastReceiver" android:name="com.google.android.gms.gcm.GcmReceiver"
android:exported="true"
android:permission="com.google.android.c2dm.permission.SEND" > android:permission="com.google.android.c2dm.permission.SEND" >
<intent-filter> <intent-filter>
<action android:name="com.google.android.c2dm.intent.RECEIVE" /> <action android:name="com.google.android.c2dm.intent.RECEIVE" />
@ -44,6 +45,24 @@
<category android:name="org.telegram.plus.beta" /> <category android:name="org.telegram.plus.beta" />
</intent-filter> </intent-filter>
</receiver> </receiver>
<service
android:name="org.telegram.messenger.GcmPushListenerService"
android:exported="false" >
<intent-filter>
<action android:name="com.google.android.c2dm.intent.RECEIVE" />
</intent-filter>
</service>
<service
android:name="org.telegram.messenger.GcmInstanceIDListenerService"
android:exported="false">
<intent-filter>
<action android:name="com.google.android.gms.iid.InstanceID" />
</intent-filter>
</service>
<service
android:name="org.telegram.messenger.GcmRegistrationIntentService"
android:exported="false">
</service>
<uses-library android:name="com.google.android.maps" android:required="false"/> <uses-library android:name="com.google.android.maps" android:required="false"/>

View File

@ -25,7 +25,7 @@
android:icon="@drawable/ic_launcher" android:icon="@drawable/ic_launcher"
android:label="@string/ShortAppName" android:label="@string/ShortAppName"
android:theme="@style/Theme.TMessages.Start" android:theme="@style/Theme.TMessages.Start"
android:name=".ApplicationLoader" android:name="org.telegram.messenger.ApplicationLoader"
android:hardwareAccelerated="@bool/useHardwareAcceleration" android:hardwareAccelerated="@bool/useHardwareAcceleration"
android:largeHeap="true"> android:largeHeap="true">
@ -34,7 +34,8 @@
<activity android:name="net.hockeyapp.android.UpdateActivity" /> <activity android:name="net.hockeyapp.android.UpdateActivity" />
<receiver <receiver
android:name=".GcmBroadcastReceiver" android:name="com.google.android.gms.gcm.GcmReceiver"
android:exported="true"
android:permission="com.google.android.c2dm.permission.SEND" > android:permission="com.google.android.c2dm.permission.SEND" >
<intent-filter> <intent-filter>
<action android:name="com.google.android.c2dm.intent.RECEIVE" /> <action android:name="com.google.android.c2dm.intent.RECEIVE" />
@ -42,6 +43,24 @@
<category android:name="org.telegram.plus" /> <category android:name="org.telegram.plus" />
</intent-filter> </intent-filter>
</receiver> </receiver>
<service
android:name="org.telegram.messenger.GcmPushListenerService"
android:exported="false" >
<intent-filter>
<action android:name="com.google.android.c2dm.intent.RECEIVE" />
</intent-filter>
</service>
<service
android:name="org.telegram.messenger.GcmInstanceIDListenerService"
android:exported="false">
<intent-filter>
<action android:name="com.google.android.gms.iid.InstanceID" />
</intent-filter>
</service>
<service
android:name="org.telegram.messenger.GcmRegistrationIntentService"
android:exported="false">
</service>
<uses-library android:name="com.google.android.maps" android:required="false"/> <uses-library android:name="com.google.android.maps" android:required="false"/>

View File

@ -1,5 +1,57 @@
LOCAL_PATH := $(call my-dir) LOCAL_PATH := $(call my-dir)
LOCAL_MODULE := avutil
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
LOCAL_SRC_FILES := ./ffmpeg/armv7-a/libavutil.a
else
ifeq ($(TARGET_ARCH_ABI),armeabi)
LOCAL_SRC_FILES := ./ffmpeg/armv5te/libavutil.a
else
ifeq ($(TARGET_ARCH_ABI),x86)
LOCAL_SRC_FILES := ./ffmpeg/i686/libavutil.a
endif
endif
endif
include $(PREBUILT_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_MODULE := avformat
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
LOCAL_SRC_FILES := ./ffmpeg/armv7-a/libavformat.a
else
ifeq ($(TARGET_ARCH_ABI),armeabi)
LOCAL_SRC_FILES := ./ffmpeg/armv5te/libavformat.a
else
ifeq ($(TARGET_ARCH_ABI),x86)
LOCAL_SRC_FILES := ./ffmpeg/i686/libavformat.a
endif
endif
endif
include $(PREBUILT_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_MODULE := avcodec
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
LOCAL_SRC_FILES := ./ffmpeg/armv7-a/libavcodec.a
else
ifeq ($(TARGET_ARCH_ABI),armeabi)
LOCAL_SRC_FILES := ./ffmpeg/armv5te/libavcodec.a
else
ifeq ($(TARGET_ARCH_ABI),x86)
LOCAL_SRC_FILES := ./ffmpeg/i686/libavcodec.a
endif
endif
endif
include $(PREBUILT_STATIC_LIBRARY)
include $(CLEAR_VARS) include $(CLEAR_VARS)
LOCAL_MODULE := crypto LOCAL_MODULE := crypto
@ -26,8 +78,8 @@ LOCAL_MODULE := breakpad
LOCAL_CPPFLAGS := -Wall -std=c++11 -DANDROID -finline-functions -ffast-math -Os -fno-strict-aliasing LOCAL_CPPFLAGS := -Wall -std=c++11 -DANDROID -finline-functions -ffast-math -Os -fno-strict-aliasing
LOCAL_C_INCLUDES := \ LOCAL_C_INCLUDES := \
./breakpad/common/android/include \ $(LOCAL_PATH)/breakpad/common/android/include \
./breakpad $(LOCAL_PATH)/breakpad
LOCAL_SRC_FILES := \ LOCAL_SRC_FILES := \
./breakpad/client/linux/crash_generation/crash_generation_client.cc \ ./breakpad/client/linux/crash_generation/crash_generation_client.cc \
@ -57,7 +109,7 @@ include $(BUILD_STATIC_LIBRARY)
include $(CLEAR_VARS) include $(CLEAR_VARS)
LOCAL_CPPFLAGS := -Wall -std=c++11 -DANDROID -frtti -DHAVE_PTHREAD -finline-functions -ffast-math -Os LOCAL_CPPFLAGS := -Wall -std=c++11 -DANDROID -frtti -DHAVE_PTHREAD -finline-functions -ffast-math -Os
LOCAL_C_INCLUDES += ./boringssl/include/ LOCAL_C_INCLUDES += $(LOCAL_PATH)/boringssl/include/
LOCAL_ARM_MODE := arm LOCAL_ARM_MODE := arm
LOCAL_MODULE := tgnet LOCAL_MODULE := tgnet
LOCAL_STATIC_LIBRARIES := crypto LOCAL_STATIC_LIBRARIES := crypto
@ -85,15 +137,12 @@ include $(BUILD_STATIC_LIBRARY)
include $(CLEAR_VARS) include $(CLEAR_VARS)
LOCAL_CFLAGS := -Wall -DANDROID -DHAVE_MALLOC_H -DHAVE_PTHREAD -DWEBP_USE_THREAD -finline-functions -ffast-math -ffunction-sections -fdata-sections -Os LOCAL_CFLAGS := -Wall -DANDROID -DHAVE_MALLOC_H -DHAVE_PTHREAD -DWEBP_USE_THREAD -finline-functions -ffast-math -ffunction-sections -fdata-sections -Os
LOCAL_C_INCLUDES += ./libwebp/src LOCAL_C_INCLUDES += $(LOCAL_PATH)/libwebp/src
LOCAL_ARM_MODE := arm LOCAL_ARM_MODE := arm
LOCAL_STATIC_LIBRARIES := cpufeatures LOCAL_STATIC_LIBRARIES := cpufeatures
LOCAL_MODULE := webp LOCAL_MODULE := webp
ifneq ($(findstring armeabi-v7a, $(TARGET_ARCH_ABI)),) ifneq ($(findstring armeabi-v7a, $(TARGET_ARCH_ABI)),)
# Setting LOCAL_ARM_NEON will enable -mfpu=neon which may cause illegal
# instructions to be generated for armv7a code. Instead target the neon code
# specifically.
NEON := c.neon NEON := c.neon
else else
NEON := c NEON := c
@ -185,19 +234,14 @@ include $(BUILD_STATIC_LIBRARY)
include $(CLEAR_VARS) include $(CLEAR_VARS)
LOCAL_PRELINK_MODULE := false LOCAL_PRELINK_MODULE := false
LOCAL_STATIC_LIBRARIES := webp sqlite tgnet breakpad
LOCAL_MODULE := tmessages.15 LOCAL_MODULE := tmessages.17
LOCAL_CFLAGS := -w -std=c11 -Os -DNULL=0 -DSOCKLEN_T=socklen_t -DLOCALE_NOT_USED -D_LARGEFILE_SOURCE=1 -D_FILE_OFFSET_BITS=64 LOCAL_CFLAGS := -w -std=c11 -Os -DNULL=0 -DSOCKLEN_T=socklen_t -DLOCALE_NOT_USED -D_LARGEFILE_SOURCE=1 -D_FILE_OFFSET_BITS=64
LOCAL_CFLAGS += -Drestrict='' -D__EMX__ -DOPUS_BUILD -DFIXED_POINT -DUSE_ALLOCA -DHAVE_LRINT -DHAVE_LRINTF -fno-math-errno LOCAL_CFLAGS += -Drestrict='' -D__EMX__ -DOPUS_BUILD -DFIXED_POINT -DUSE_ALLOCA -DHAVE_LRINT -DHAVE_LRINTF -fno-math-errno
LOCAL_CFLAGS += -DANDROID_NDK -DDISABLE_IMPORTGL -fno-strict-aliasing -fprefetch-loop-arrays -DAVOID_TABLES -DANDROID_TILE_BASED_DECODE -DANDROID_ARMV6_IDCT -ffast-math LOCAL_CFLAGS += -DANDROID_NDK -DDISABLE_IMPORTGL -fno-strict-aliasing -fprefetch-loop-arrays -DAVOID_TABLES -DANDROID_TILE_BASED_DECODE -DANDROID_ARMV6_IDCT -ffast-math -D__STDC_CONSTANT_MACROS
LOCAL_CPPFLAGS := -DBSD=1 -ffast-math -Os -funroll-loops -std=c++11 LOCAL_CPPFLAGS := -DBSD=1 -ffast-math -Os -funroll-loops -std=c++11
LOCAL_LDLIBS := -ljnigraphics -llog -lz LOCAL_LDLIBS := -ljnigraphics -llog -lz
ifeq ($(TARGET_ARCH_ABI),armeabi) LOCAL_STATIC_LIBRARIES := webp sqlite tgnet breakpad avformat avcodec avutil
LOCAL_ARM_MODE := thumb
else
LOCAL_ARM_MODE := arm
endif
LOCAL_SRC_FILES := \ LOCAL_SRC_FILES := \
./opus/src/opus.c \ ./opus/src/opus.c \
@ -211,6 +255,23 @@ LOCAL_SRC_FILES := \
./opus/src/mlp.c \ ./opus/src/mlp.c \
./opus/src/mlp_data.c ./opus/src/mlp_data.c
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
LOCAL_ARM_MODE := arm
LOCAL_CPPFLAGS += -DLIBYUV_NEON
LOCAL_CFLAGS += -DLIBYUV_NEON
else
ifeq ($(TARGET_ARCH_ABI),armeabi)
LOCAL_ARM_MODE := arm
else
ifeq ($(TARGET_ARCH_ABI),x86)
LOCAL_ARM_MODE := arm
LOCAL_SRC_FILE += \
./libyuv/source/row_x86.asm
endif
endif
endif
LOCAL_SRC_FILES += \ LOCAL_SRC_FILES += \
./opus/silk/CNG.c \ ./opus/silk/CNG.c \
./opus/silk/code_signs.c \ ./opus/silk/code_signs.c \
@ -346,21 +407,18 @@ LOCAL_SRC_FILES += \
./opus/opusfile/opusfile.c \ ./opus/opusfile/opusfile.c \
./opus/opusfile/stream.c ./opus/opusfile/stream.c
LOCAL_SRC_FILES += \
./giflib/dgif_lib.c \
./giflib/gifalloc.c
LOCAL_C_INCLUDES := \ LOCAL_C_INCLUDES := \
./opus/include \ $(LOCAL_PATH)/opus/include \
./opus/silk \ $(LOCAL_PATH)/opus/silk \
./opus/silk/fixed \ $(LOCAL_PATH)/opus/silk/fixed \
./opus/celt \ $(LOCAL_PATH)/opus/celt \
./opus/ \ $(LOCAL_PATH)/opus/ \
./opus/opusfile \ $(LOCAL_PATH)/opus/opusfile \
./libyuv/include \ $(LOCAL_PATH)/libyuv/include \
./boringssl/include \ $(LOCAL_PATH)/boringssl/include \
./breakpad/common/android/include \ $(LOCAL_PATH)/breakpad/common/android/include \
./breakpad $(LOCAL_PATH)/breakpad \
$(LOCAL_PATH)/ffmpeg/include
LOCAL_SRC_FILES += \ LOCAL_SRC_FILES += \
./libjpeg/jcapimin.c \ ./libjpeg/jcapimin.c \
@ -413,8 +471,8 @@ LOCAL_SRC_FILES += \
LOCAL_SRC_FILES += \ LOCAL_SRC_FILES += \
./libyuv/source/compare_common.cc \ ./libyuv/source/compare_common.cc \
./libyuv/source/compare_neon.cc \ ./libyuv/source/compare_gcc.cc \
./libyuv/source/compare_posix.cc \ ./libyuv/source/compare_neon64.cc \
./libyuv/source/compare_win.cc \ ./libyuv/source/compare_win.cc \
./libyuv/source/compare.cc \ ./libyuv/source/compare.cc \
./libyuv/source/convert_argb.cc \ ./libyuv/source/convert_argb.cc \
@ -425,32 +483,42 @@ LOCAL_SRC_FILES += \
./libyuv/source/convert_to_i420.cc \ ./libyuv/source/convert_to_i420.cc \
./libyuv/source/convert.cc \ ./libyuv/source/convert.cc \
./libyuv/source/cpu_id.cc \ ./libyuv/source/cpu_id.cc \
./libyuv/source/format_conversion.cc \
./libyuv/source/mjpeg_decoder.cc \ ./libyuv/source/mjpeg_decoder.cc \
./libyuv/source/mjpeg_validate.cc \ ./libyuv/source/mjpeg_validate.cc \
./libyuv/source/planar_functions.cc \ ./libyuv/source/planar_functions.cc \
./libyuv/source/rotate_any.cc \
./libyuv/source/rotate_argb.cc \ ./libyuv/source/rotate_argb.cc \
./libyuv/source/rotate_common.cc \
./libyuv/source/rotate_gcc.cc \
./libyuv/source/rotate_mips.cc \ ./libyuv/source/rotate_mips.cc \
./libyuv/source/rotate_neon.cc \
./libyuv/source/rotate_neon64.cc \ ./libyuv/source/rotate_neon64.cc \
./libyuv/source/rotate_win.cc \
./libyuv/source/rotate.cc \ ./libyuv/source/rotate.cc \
./libyuv/source/row_any.cc \ ./libyuv/source/row_any.cc \
./libyuv/source/row_common.cc \ ./libyuv/source/row_common.cc \
./libyuv/source/row_gcc.cc \
./libyuv/source/row_mips.cc \ ./libyuv/source/row_mips.cc \
./libyuv/source/row_neon.cc \
./libyuv/source/row_neon64.cc \ ./libyuv/source/row_neon64.cc \
./libyuv/source/row_posix.cc \
./libyuv/source/row_win.cc \ ./libyuv/source/row_win.cc \
./libyuv/source/scale_any.cc \
./libyuv/source/scale_argb.cc \ ./libyuv/source/scale_argb.cc \
./libyuv/source/scale_common.cc \ ./libyuv/source/scale_common.cc \
./libyuv/source/scale_gcc.cc \
./libyuv/source/scale_mips.cc \ ./libyuv/source/scale_mips.cc \
./libyuv/source/scale_neon.cc \
./libyuv/source/scale_neon64.cc \ ./libyuv/source/scale_neon64.cc \
./libyuv/source/scale_posix.cc \
./libyuv/source/scale_win.cc \ ./libyuv/source/scale_win.cc \
./libyuv/source/scale.cc \ ./libyuv/source/scale.cc \
./libyuv/source/video_common.cc ./libyuv/source/video_common.cc
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
LOCAL_CFLAGS += -DLIBYUV_NEON
LOCAL_SRC_FILES += \
./libyuv/source/compare_neon.cc.neon \
./libyuv/source/rotate_neon.cc.neon \
./libyuv/source/row_neon.cc.neon \
./libyuv/source/scale_neon.cc.neon
endif
LOCAL_SRC_FILES += \ LOCAL_SRC_FILES += \
./jni.c \ ./jni.c \
./sqlite_cursor.c \ ./sqlite_cursor.c \
@ -458,10 +526,10 @@ LOCAL_SRC_FILES += \
./sqlite_statement.c \ ./sqlite_statement.c \
./sqlite.c \ ./sqlite.c \
./audio.c \ ./audio.c \
./gif.c \
./utils.c \ ./utils.c \
./image.c \ ./image.c \
./video.c \ ./video.c \
./gifvideo.cpp \
./TgNetWrapper.cpp \ ./TgNetWrapper.cpp \
./NativeLoader.cpp ./NativeLoader.cpp

View File

@ -1,847 +0,0 @@
//thanks to https://github.com/koral--/android-gif-drawable
/*
MIT License
Copyright (c)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
// Copyright (c) 2011 Google Inc. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The GIFLIB distribution is Copyright (c) 1997 Eric S. Raymond
*/
#include <jni.h>
#include <stdio.h>
#include <time.h>
#include <limits.h>
#include "gif.h"
#include "giflib/gif_lib.h"
#define D_GIF_ERR_NO_FRAMES 1000
#define D_GIF_ERR_INVALID_SCR_DIMS 1001
#define D_GIF_ERR_INVALID_IMG_DIMS 1002
#define D_GIF_ERR_IMG_NOT_CONFINED 1003
typedef struct {
uint8_t blue;
uint8_t green;
uint8_t red;
uint8_t alpha;
} argb;
typedef struct {
unsigned int duration;
int transpIndex;
unsigned char disposalMethod;
} FrameInfo;
typedef struct {
GifFileType *gifFilePtr;
unsigned long lastFrameReaminder;
unsigned long nextStartTime;
int currentIndex;
unsigned int lastDrawIndex;
FrameInfo *infos;
argb *backupPtr;
int startPos;
unsigned char *rasterBits;
char *comment;
unsigned short loopCount;
int currentLoop;
jfloat speedFactor;
} GifInfo;
static ColorMapObject *defaultCmap = NULL;
static ColorMapObject *genDefColorMap(void) {
ColorMapObject *cmap = GifMakeMapObject(256, NULL);
if (cmap != NULL) {
int iColor;
for (iColor = 0; iColor < 256; iColor++) {
cmap->Colors[iColor].Red = (GifByteType) iColor;
cmap->Colors[iColor].Green = (GifByteType) iColor;
cmap->Colors[iColor].Blue = (GifByteType) iColor;
}
}
return cmap;
}
jint gifOnJNILoad(JavaVM *vm, void *reserved, JNIEnv *env) {
defaultCmap = genDefColorMap();
if (defaultCmap == NULL) {
return -1;
}
return JNI_VERSION_1_6;
}
void gifOnJNIUnload(JavaVM *vm, void *reserved) {
GifFreeMapObject(defaultCmap);
}
static int fileReadFunc(GifFileType *gif, GifByteType *bytes, int size) {
FILE *file = (FILE *)gif->UserData;
return fread(bytes, 1, size, file);
}
static int fileRewindFun(GifInfo *info) {
return fseek(info->gifFilePtr->UserData, info->startPos, SEEK_SET);
}
static unsigned long getRealTime() {
struct timespec ts;
const clockid_t id = CLOCK_MONOTONIC;
if (id != (clockid_t) - 1 && clock_gettime(id, &ts) != -1) {
return ts.tv_sec * 1000 + ts.tv_nsec / 1000000;
}
return -1;
}
static void cleanUp(GifInfo *info) {
if (info->backupPtr) {
free(info->backupPtr);
info->backupPtr = NULL;
}
if (info->infos) {
free(info->infos);
info->infos = NULL;
}
if (info->rasterBits) {
free(info->rasterBits);
info->rasterBits = NULL;
}
if (info->comment) {
free(info->comment);
info->comment = NULL;
}
GifFileType *GifFile = info->gifFilePtr;
if (GifFile->SColorMap == defaultCmap) {
GifFile->SColorMap = NULL;
}
if (GifFile->SavedImages != NULL) {
SavedImage *sp;
for (sp = GifFile->SavedImages; sp < GifFile->SavedImages + GifFile->ImageCount; sp++) {
if (sp->ImageDesc.ColorMap != NULL) {
GifFreeMapObject(sp->ImageDesc.ColorMap);
sp->ImageDesc.ColorMap = NULL;
}
}
free(GifFile->SavedImages);
GifFile->SavedImages = NULL;
}
DGifCloseFile(GifFile);
free(info);
}
static int getComment(GifByteType *Bytes, char **cmt) {
unsigned int len = (unsigned int) Bytes[0];
unsigned int offset = *cmt != NULL ? strlen(*cmt) : 0;
char *ret = realloc(*cmt, (len + offset + 1) * sizeof(char));
if (ret != NULL) {
memcpy(ret + offset, &Bytes[1], len);
ret[len + offset] = 0;
*cmt = ret;
return GIF_OK;
}
return GIF_ERROR;
}
static void packARGB32(argb *pixel, GifByteType alpha, GifByteType red, GifByteType green, GifByteType blue) {
pixel->alpha = alpha;
pixel->red = red;
pixel->green = green;
pixel->blue = blue;
}
static void getColorFromTable(int idx, argb *dst, const ColorMapObject *cmap) {
int colIdx = (idx >= cmap->ColorCount) ? 0 : idx;
GifColorType *col = &cmap->Colors[colIdx];
packARGB32(dst, 0xFF, col->Red, col->Green, col->Blue);
}
static void eraseColor(argb *bm, int w, int h, argb color) {
int i;
for (i = 0; i < w * h; i++) {
*(bm + i) = color;
}
}
static inline bool setupBackupBmp(GifInfo *info, short transpIndex) {
GifFileType *fGIF = info->gifFilePtr;
info->backupPtr = calloc(fGIF->SWidth * fGIF->SHeight, sizeof(argb));
if (!info->backupPtr) {
info->gifFilePtr->Error = D_GIF_ERR_NOT_ENOUGH_MEM;
return false;
}
argb paintingColor;
if (transpIndex == -1) {
getColorFromTable(fGIF->SBackGroundColor, &paintingColor, fGIF->SColorMap);
} else {
packARGB32(&paintingColor, 0, 0, 0, 0);
}
eraseColor(info->backupPtr, fGIF->SWidth, fGIF->SHeight, paintingColor);
return true;
}
static int readExtensions(int ExtFunction, GifByteType *ExtData, GifInfo *info) {
if (ExtData == NULL) {
return GIF_OK;
}
if (ExtFunction == GRAPHICS_EXT_FUNC_CODE && ExtData[0] == 4) {
FrameInfo *fi = &info->infos[info->gifFilePtr->ImageCount];
fi->transpIndex = -1;
char *b = (char*) ExtData + 1;
short delay = ((b[2] << 8) | b[1]);
fi->duration = delay > 1 ? delay * 10 : 100;
fi->disposalMethod = ((b[0] >> 2) & 7);
if (ExtData[1] & 1) {
fi->transpIndex = 0xff & b[3];
}
if (fi->disposalMethod == 3 && info->backupPtr == NULL) {
if (!setupBackupBmp(info, fi->transpIndex)) {
return GIF_ERROR;
}
}
} else if (ExtFunction == COMMENT_EXT_FUNC_CODE) {
if (getComment(ExtData, &info->comment) == GIF_ERROR) {
info->gifFilePtr->Error = D_GIF_ERR_NOT_ENOUGH_MEM;
return GIF_ERROR;
}
} else if (ExtFunction == APPLICATION_EXT_FUNC_CODE && ExtData[0] == 11) {
if (strncmp("NETSCAPE2.0", &ExtData[1], 11) == 0 || strncmp("ANIMEXTS1.0", &ExtData[1], 11) == 0) {
if (DGifGetExtensionNext(info->gifFilePtr, &ExtData, &ExtFunction) == GIF_ERROR) {
return GIF_ERROR;
}
if (ExtFunction == APPLICATION_EXT_FUNC_CODE && ExtData[0] == 3 && ExtData[1] == 1) {
info->loopCount = (unsigned short) (ExtData[2] + (ExtData[3] << 8));
}
}
}
return GIF_OK;
}
static int DDGifSlurp(GifFileType *GifFile, GifInfo* info, bool shouldDecode) {
GifRecordType RecordType;
GifByteType *ExtData;
int codeSize;
int ExtFunction;
size_t ImageSize;
do {
if (DGifGetRecordType(GifFile, &RecordType) == GIF_ERROR) {
return (GIF_ERROR);
}
switch (RecordType) {
case IMAGE_DESC_RECORD_TYPE:
if (DGifGetImageDesc(GifFile, !shouldDecode) == GIF_ERROR) {
return (GIF_ERROR);
}
int i = shouldDecode ? info->currentIndex : GifFile->ImageCount - 1;
SavedImage *sp = &GifFile->SavedImages[i];
ImageSize = sp->ImageDesc.Width * sp->ImageDesc.Height;
if (sp->ImageDesc.Width < 1 || sp->ImageDesc.Height < 1 || ImageSize > (SIZE_MAX / sizeof(GifPixelType))) {
GifFile->Error = D_GIF_ERR_INVALID_IMG_DIMS;
return GIF_ERROR;
}
if (sp->ImageDesc.Width > GifFile->SWidth || sp->ImageDesc.Height > GifFile->SHeight) {
GifFile->Error = D_GIF_ERR_IMG_NOT_CONFINED;
return GIF_ERROR;
}
if (shouldDecode) {
sp->RasterBits = info->rasterBits;
if (sp->ImageDesc.Interlace) {
int i, j;
int InterlacedOffset[] = { 0, 4, 2, 1 };
int InterlacedJumps[] = { 8, 8, 4, 2 };
for (i = 0; i < 4; i++) {
for (j = InterlacedOffset[i]; j < sp->ImageDesc.Height; j += InterlacedJumps[i]) {
if (DGifGetLine(GifFile, sp->RasterBits + j * sp->ImageDesc.Width, sp->ImageDesc.Width) == GIF_ERROR) {
return GIF_ERROR;
}
}
}
} else {
if (DGifGetLine(GifFile, sp->RasterBits, ImageSize) == GIF_ERROR) {
return (GIF_ERROR);
}
}
if (info->currentIndex >= GifFile->ImageCount - 1) {
if (info->loopCount > 0) {
info->currentLoop++;
}
if (fileRewindFun(info) != 0) {
info->gifFilePtr->Error = D_GIF_ERR_READ_FAILED;
return GIF_ERROR;
}
}
return GIF_OK;
} else {
if (DGifGetCode(GifFile, &codeSize, &ExtData) == GIF_ERROR) {
return (GIF_ERROR);
}
while (ExtData != NULL) {
if (DGifGetCodeNext(GifFile, &ExtData) == GIF_ERROR) {
return (GIF_ERROR);
}
}
}
break;
case EXTENSION_RECORD_TYPE:
if (DGifGetExtension(GifFile, &ExtFunction, &ExtData) == GIF_ERROR) {
return (GIF_ERROR);
}
if (!shouldDecode) {
FrameInfo *tmpInfos = realloc(info->infos, (GifFile->ImageCount + 1) * sizeof(FrameInfo));
if (tmpInfos == NULL) {
return GIF_ERROR;
}
info->infos = tmpInfos;
if (readExtensions(ExtFunction, ExtData, info) == GIF_ERROR) {
return GIF_ERROR;
}
}
while (ExtData != NULL) {
if (DGifGetExtensionNext(GifFile, &ExtData, &ExtFunction) == GIF_ERROR) {
return (GIF_ERROR);
}
if (!shouldDecode) {
if (readExtensions(ExtFunction, ExtData, info) == GIF_ERROR) {
return GIF_ERROR;
}
}
}
break;
case TERMINATE_RECORD_TYPE:
break;
default:
break;
}
} while (RecordType != TERMINATE_RECORD_TYPE);
bool ok = true;
if (shouldDecode) {
ok = (fileRewindFun(info) == 0);
}
if (ok) {
return (GIF_OK);
} else {
info->gifFilePtr->Error = D_GIF_ERR_READ_FAILED;
return (GIF_ERROR);
}
}
static void copyLine(argb *dst, const unsigned char *src, const ColorMapObject *cmap, int transparent, int width) {
for (; width > 0; width--, src++, dst++) {
if (*src != transparent) {
getColorFromTable(*src, dst, cmap);
}
}
}
static argb *getAddr(argb *bm, int width, int left, int top) {
return bm + top * width + left;
}
static void blitNormal(argb *bm, int width, int height, const SavedImage *frame, const ColorMapObject *cmap, int transparent) {
const unsigned char* src = (unsigned char*) frame->RasterBits;
argb *dst = getAddr(bm, width, frame->ImageDesc.Left, frame->ImageDesc.Top);
GifWord copyWidth = frame->ImageDesc.Width;
if (frame->ImageDesc.Left + copyWidth > width) {
copyWidth = width - frame->ImageDesc.Left;
}
GifWord copyHeight = frame->ImageDesc.Height;
if (frame->ImageDesc.Top + copyHeight > height) {
copyHeight = height - frame->ImageDesc.Top;
}
for (; copyHeight > 0; copyHeight--) {
copyLine(dst, src, cmap, transparent, copyWidth);
src += frame->ImageDesc.Width;
dst += width;
}
}
static void fillRect(argb *bm, int bmWidth, int bmHeight, GifWord left, GifWord top, GifWord width, GifWord height, argb col) {
uint32_t* dst = (uint32_t*) getAddr(bm, bmWidth, left, top);
GifWord copyWidth = width;
if (left + copyWidth > bmWidth) {
copyWidth = bmWidth - left;
}
GifWord copyHeight = height;
if (top + copyHeight > bmHeight) {
copyHeight = bmHeight - top;
}
uint32_t* pColor = (uint32_t *) (&col);
for (; copyHeight > 0; copyHeight--) {
memset(dst, *pColor, copyWidth * sizeof(argb));
dst += bmWidth;
}
}
static void drawFrame(argb *bm, int bmWidth, int bmHeight, const SavedImage *frame, const ColorMapObject *cmap, short transpIndex) {
if (frame->ImageDesc.ColorMap != NULL) {
cmap = frame->ImageDesc.ColorMap;
if (cmap->ColorCount != (1 << cmap->BitsPerPixel)) {
cmap = defaultCmap;
}
}
blitNormal(bm, bmWidth, bmHeight, frame, cmap, transpIndex);
}
static bool checkIfCover(const SavedImage *target, const SavedImage *covered) {
if (target->ImageDesc.Left <= covered->ImageDesc.Left
&& covered->ImageDesc.Left + covered->ImageDesc.Width
<= target->ImageDesc.Left + target->ImageDesc.Width
&& target->ImageDesc.Top <= covered->ImageDesc.Top
&& covered->ImageDesc.Top + covered->ImageDesc.Height
<= target->ImageDesc.Top + target->ImageDesc.Height) {
return true;
}
return false;
}
static inline void disposeFrameIfNeeded(argb *bm, GifInfo *info, unsigned int idx) {
argb* backup = info->backupPtr;
argb color;
packARGB32(&color, 0, 0, 0, 0);
GifFileType *fGif = info->gifFilePtr;
SavedImage* cur = &fGif->SavedImages[idx - 1];
SavedImage* next = &fGif->SavedImages[idx];
bool curTrans = info->infos[idx - 1].transpIndex != -1;
int curDisposal = info->infos[idx - 1].disposalMethod;
bool nextTrans = info->infos[idx].transpIndex != -1;
int nextDisposal = info->infos[idx].disposalMethod;
argb *tmp;
if ((curDisposal == 2 || curDisposal == 3) && (nextTrans || !checkIfCover(next, cur))) {
switch (curDisposal) {
case 2:
fillRect(bm, fGif->SWidth, fGif->SHeight, cur->ImageDesc.Left, cur->ImageDesc.Top, cur->ImageDesc.Width, cur->ImageDesc.Height, color);
break;
case 3:
tmp = bm;
bm = backup;
backup = tmp;
break;
}
}
if (nextDisposal == 3) {
memcpy(backup, bm, fGif->SWidth * fGif->SHeight * sizeof(argb));
}
}
static void reset(GifInfo *info) {
if (fileRewindFun(info) != 0) {
return;
}
info->nextStartTime = 0;
info->currentLoop = -1;
info->currentIndex = -1;
}
static void getBitmap(argb *bm, GifInfo *info) {
GifFileType* fGIF = info->gifFilePtr;
argb paintingColor;
int i = info->currentIndex;
if (DDGifSlurp(fGIF, info, true) == GIF_ERROR) {
return;
}
SavedImage* cur = &fGIF->SavedImages[i];
int transpIndex = info->infos[i].transpIndex;
if (i == 0) {
if (transpIndex == -1) {
getColorFromTable(fGIF->SBackGroundColor, &paintingColor, fGIF->SColorMap);
} else {
packARGB32(&paintingColor, 0, 0, 0, 0);
}
eraseColor(bm, fGIF->SWidth, fGIF->SHeight, paintingColor);
} else {
disposeFrameIfNeeded(bm, info, i);
}
drawFrame(bm, fGIF->SWidth, fGIF->SHeight, cur, fGIF->SColorMap, transpIndex);
}
static void setMetaData(int width, int height, int ImageCount, int errorCode, JNIEnv *env, jintArray metaData) {
jint *const ints = (*env)->GetIntArrayElements(env, metaData, 0);
if (ints == NULL) {
return;
}
ints[0] = width;
ints[1] = height;
ints[2] = ImageCount;
ints[3] = errorCode;
(*env)->ReleaseIntArrayElements(env, metaData, ints, 0);
}
static jint open(GifFileType *GifFileIn, int Error, int startPos, JNIEnv *env, jintArray metaData) {
if (startPos < 0) {
Error = D_GIF_ERR_NOT_READABLE;
DGifCloseFile(GifFileIn);
}
if (Error != 0 || GifFileIn == NULL) {
setMetaData(0, 0, 0, Error, env, metaData);
return (jint) NULL;
}
int width = GifFileIn->SWidth, height = GifFileIn->SHeight;
unsigned int wxh = width * height;
if (wxh < 1 || wxh > INT_MAX) {
DGifCloseFile(GifFileIn);
setMetaData(width, height, 0, D_GIF_ERR_INVALID_SCR_DIMS, env, metaData);
return (jint) NULL;
}
GifInfo *info = malloc(sizeof(GifInfo));
if (info == NULL) {
DGifCloseFile(GifFileIn);
setMetaData(width, height, 0, D_GIF_ERR_NOT_ENOUGH_MEM, env, metaData);
return (jint) NULL;
}
info->gifFilePtr = GifFileIn;
info->startPos = startPos;
info->currentIndex = -1;
info->nextStartTime = 0;
info->lastFrameReaminder = ULONG_MAX;
info->comment = NULL;
info->loopCount = 0;
info->currentLoop = -1;
info->speedFactor = 1.0;
info->rasterBits = calloc(GifFileIn->SHeight * GifFileIn->SWidth, sizeof(GifPixelType));
info->infos = malloc(sizeof(FrameInfo));
info->backupPtr = NULL;
if (info->rasterBits == NULL || info->infos == NULL) {
cleanUp(info);
setMetaData(width, height, 0, D_GIF_ERR_NOT_ENOUGH_MEM, env, metaData);
return (jint) NULL;
}
info->infos->duration = 0;
info->infos->disposalMethod = 0;
info->infos->transpIndex = -1;
if (GifFileIn->SColorMap == NULL || GifFileIn->SColorMap->ColorCount != (1 << GifFileIn->SColorMap->BitsPerPixel)) {
GifFreeMapObject(GifFileIn->SColorMap);
GifFileIn->SColorMap = defaultCmap;
}
DDGifSlurp(GifFileIn, info, false);
int imgCount = GifFileIn->ImageCount;
if (imgCount < 1) {
Error = D_GIF_ERR_NO_FRAMES;
}
if (fileRewindFun(info) != 0) {
Error = D_GIF_ERR_READ_FAILED;
}
if (Error != 0) {
cleanUp(info);
}
setMetaData(width, height, imgCount, Error, env, metaData);
return (jint)(Error == 0 ? info : NULL);
}
JNIEXPORT jlong JNICALL Java_org_telegram_ui_Components_GifDrawable_getAllocationByteCount(JNIEnv *env, jclass class, jobject gifInfo) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL) {
return 0;
}
unsigned int pxCount = info->gifFilePtr->SWidth + info->gifFilePtr->SHeight;
jlong sum = pxCount * sizeof(char);
if (info->backupPtr != NULL) {
sum += pxCount * sizeof(argb);
}
return sum;
}
JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_reset(JNIEnv *env, jclass class, jobject gifInfo) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL) {
return;
}
reset(info);
}
JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_setSpeedFactor(JNIEnv *env, jclass class, jobject gifInfo, jfloat factor) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL) {
return;
}
info->speedFactor = factor;
}
JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_seekToTime(JNIEnv *env, jclass class, jobject gifInfo, jint desiredPos, jintArray jPixels) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL || jPixels == NULL) {
return;
}
int imgCount = info->gifFilePtr->ImageCount;
if (imgCount <= 1) {
return;
}
unsigned long sum = 0;
int i;
for (i = 0; i < imgCount; i++) {
unsigned long newSum = sum + info->infos[i].duration;
if (newSum >= desiredPos) {
break;
}
sum = newSum;
}
if (i < info->currentIndex) {
return;
}
unsigned long lastFrameRemainder = desiredPos - sum;
if (i == imgCount - 1 && lastFrameRemainder > info->infos[i].duration) {
lastFrameRemainder = info->infos[i].duration;
}
if (i > info->currentIndex) {
jint *const pixels = (*env)->GetIntArrayElements(env, jPixels, 0);
if (pixels == NULL) {
return;
}
while (info->currentIndex <= i) {
info->currentIndex++;
getBitmap((argb*) pixels, info);
}
(*env)->ReleaseIntArrayElements(env, jPixels, pixels, 0);
}
info->lastFrameReaminder = lastFrameRemainder;
if (info->speedFactor == 1.0) {
info->nextStartTime = getRealTime() + lastFrameRemainder;
} else {
info->nextStartTime = getRealTime() + lastFrameRemainder * info->speedFactor;
}
}
JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_seekToFrame(JNIEnv *env, jclass class, jobject gifInfo, jint desiredIdx, jintArray jPixels) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL|| jPixels==NULL) {
return;
}
if (desiredIdx <= info->currentIndex) {
return;
}
int imgCount = info->gifFilePtr->ImageCount;
if (imgCount <= 1) {
return;
}
jint *const pixels = (*env)->GetIntArrayElements(env, jPixels, 0);
if (pixels == NULL) {
return;
}
info->lastFrameReaminder = 0;
if (desiredIdx >= imgCount) {
desiredIdx = imgCount - 1;
}
while (info->currentIndex < desiredIdx) {
info->currentIndex++;
getBitmap((argb *) pixels, info);
}
(*env)->ReleaseIntArrayElements(env, jPixels, pixels, 0);
if (info->speedFactor == 1.0) {
info->nextStartTime = getRealTime() + info->infos[info->currentIndex].duration;
} else {
info->nextStartTime = getRealTime() + info->infos[info->currentIndex].duration * info->speedFactor;
}
}
JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_renderFrame(JNIEnv *env, jclass class, jintArray jPixels, jobject gifInfo, jintArray metaData) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL || jPixels == NULL) {
return;
}
bool needRedraw = false;
unsigned long rt = getRealTime();
if (rt >= info->nextStartTime && info->currentLoop < info->loopCount) {
if (++info->currentIndex >= info->gifFilePtr->ImageCount) {
info->currentIndex = 0;
}
needRedraw = true;
}
jint *const rawMetaData = (*env)->GetIntArrayElements(env, metaData, 0);
if (rawMetaData == NULL) {
return;
}
if (needRedraw) {
jint *const pixels = (*env)->GetIntArrayElements(env, jPixels, 0);
if (pixels == NULL) {
(*env)->ReleaseIntArrayElements(env, metaData, rawMetaData, 0);
return;
}
getBitmap((argb *)pixels, info);
rawMetaData[3] = info->gifFilePtr->Error;
(*env)->ReleaseIntArrayElements(env, jPixels, pixels, 0);
unsigned int scaledDuration = info->infos[info->currentIndex].duration;
if (info->speedFactor != 1.0) {
scaledDuration /= info->speedFactor;
if (scaledDuration<=0) {
scaledDuration=1;
} else if (scaledDuration > INT_MAX) {
scaledDuration = INT_MAX;
}
}
info->nextStartTime = rt + scaledDuration;
rawMetaData[4] = scaledDuration;
} else {
long delay = info->nextStartTime-rt;
if (delay < 0) {
rawMetaData[4] = -1;
} else {
rawMetaData[4] = (int) delay;
}
}
(*env)->ReleaseIntArrayElements(env, metaData, rawMetaData, 0);
}
JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_free(JNIEnv *env, jclass class, jobject gifInfo) {
if (gifInfo == NULL) {
return;
}
GifInfo *info = (GifInfo *)gifInfo;
FILE *file = info->gifFilePtr->UserData;
if (file) {
fclose(file);
}
info->gifFilePtr->UserData = NULL;
cleanUp(info);
}
JNIEXPORT jstring JNICALL Java_org_telegram_ui_Components_GifDrawable_getComment(JNIEnv *env, jclass class, jobject gifInfo) {
if (gifInfo == NULL) {
return NULL;
}
GifInfo *info = (GifInfo *)gifInfo;
return (*env)->NewStringUTF(env, info->comment);
}
JNIEXPORT jint JNICALL Java_org_telegram_ui_Components_GifDrawable_getLoopCount(JNIEnv *env, jclass class, jobject gifInfo) {
if (gifInfo == NULL) {
return 0;
}
return ((GifInfo *)gifInfo)->loopCount;
}
JNIEXPORT jint JNICALL Java_org_telegram_ui_Components_GifDrawable_getDuration(JNIEnv *env, jclass class, jobject gifInfo) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL) {
return 0;
}
int i;
unsigned long sum = 0;
for (i = 0; i < info->gifFilePtr->ImageCount; i++) {
sum += info->infos[i].duration;
}
return sum;
}
JNIEXPORT jint JNICALL Java_org_telegram_ui_Components_GifDrawable_getCurrentPosition(JNIEnv *env, jclass class, jobject gifInfo) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL) {
return 0;
}
int idx = info->currentIndex;
if (idx < 0 || info->gifFilePtr->ImageCount <= 1) {
return 0;
}
int i;
unsigned int sum = 0;
for (i = 0; i < idx; i++) {
sum += info->infos[i].duration;
}
unsigned long remainder = info->lastFrameReaminder == ULONG_MAX ? getRealTime() - info->nextStartTime : info->lastFrameReaminder;
return (int) (sum + remainder);
}
JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_saveRemainder(JNIEnv *env, jclass class, jobject gifInfo) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL) {
return;
}
info->lastFrameReaminder = getRealTime() - info->nextStartTime;
}
JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_restoreRemainder(JNIEnv *env, jclass class, jobject gifInfo) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL || info->lastFrameReaminder == ULONG_MAX) {
return;
}
info->nextStartTime = getRealTime() + info->lastFrameReaminder;
info->lastFrameReaminder = ULONG_MAX;
}
JNIEXPORT jint JNICALL Java_org_telegram_ui_Components_GifDrawable_openFile(JNIEnv *env, jclass class, jintArray metaData, jstring jfname) {
if (jfname == NULL) {
setMetaData(0, 0, 0, D_GIF_ERR_OPEN_FAILED, env, metaData);
return (jint) NULL;
}
const char *const fname = (*env)->GetStringUTFChars(env, jfname, 0);
FILE *file = fopen(fname, "rb");
(*env)->ReleaseStringUTFChars(env, jfname, fname);
if (file == NULL) {
setMetaData(0, 0, 0, D_GIF_ERR_OPEN_FAILED, env, metaData);
return (jint) NULL;
}
int Error = 0;
GifFileType *GifFileIn = DGifOpen(file, &fileReadFunc, &Error);
return open(GifFileIn, Error, ftell(file), env, metaData);
}

View File

@ -1,7 +0,0 @@
#ifndef gif_h
#define gif_h
jint gifOnJNILoad(JavaVM *vm, void *reserved, JNIEnv *env);
void gifOnJNIUnload(JavaVM *vm, void *reserved);
#endif

View File

@ -1,13 +0,0 @@
// giflib config.h
#ifndef GIF_CONFIG_H_DEFINED
#define GIF_CONFIG_H_DEFINED
#include <sys/types.h>
#define HAVE_STDINT_H
#define HAVE_FCNTL_H
typedef uint32_t UINT32;
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,132 +0,0 @@
/*****************************************************************************
gif_hash.c -- module to support the following operations:
1. InitHashTable - initialize hash table.
2. ClearHashTable - clear the hash table to an empty state.
2. InsertHashTable - insert one item into data structure.
3. ExistsHashTable - test if item exists in data structure.
This module is used to hash the GIF codes during encoding.
*****************************************************************************/
#include <unistd.h>
#include <stdint.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include "gif_lib.h"
#include "gif_hash.h"
#include "gif_lib_private.h"
/* #define DEBUG_HIT_RATE Debug number of misses per hash Insert/Exists. */
#ifdef DEBUG_HIT_RATE
static long NumberOfTests = 0,
NumberOfMisses = 0;
#endif /* DEBUG_HIT_RATE */
static int KeyItem(uint32_t Item);
/******************************************************************************
Initialize HashTable - allocate the memory needed and clear it. *
******************************************************************************/
GifHashTableType *_InitHashTable(void)
{
GifHashTableType *HashTable;
if ((HashTable = (GifHashTableType *) malloc(sizeof(GifHashTableType)))
== NULL)
return NULL;
_ClearHashTable(HashTable);
return HashTable;
}
/******************************************************************************
Routine to clear the HashTable to an empty state. *
This part is a little machine depended. Use the commented part otherwise. *
******************************************************************************/
void _ClearHashTable(GifHashTableType *HashTable)
{
memset(HashTable -> HTable, 0xFF, HT_SIZE * sizeof(uint32_t));
}
/******************************************************************************
Routine to insert a new Item into the HashTable. The data is assumed to be *
new one. *
******************************************************************************/
void _InsertHashTable(GifHashTableType *HashTable, uint32_t Key, int Code)
{
int HKey = KeyItem(Key);
uint32_t *HTable = HashTable -> HTable;
#ifdef DEBUG_HIT_RATE
NumberOfTests++;
NumberOfMisses++;
#endif /* DEBUG_HIT_RATE */
while (HT_GET_KEY(HTable[HKey]) != 0xFFFFFL) {
#ifdef DEBUG_HIT_RATE
NumberOfMisses++;
#endif /* DEBUG_HIT_RATE */
HKey = (HKey + 1) & HT_KEY_MASK;
}
HTable[HKey] = HT_PUT_KEY(Key) | HT_PUT_CODE(Code);
}
/******************************************************************************
Routine to test if given Key exists in HashTable and if so returns its code *
Returns the Code if key was found, -1 if not. *
******************************************************************************/
int _ExistsHashTable(GifHashTableType *HashTable, uint32_t Key)
{
int HKey = KeyItem(Key);
uint32_t *HTable = HashTable -> HTable, HTKey;
#ifdef DEBUG_HIT_RATE
NumberOfTests++;
NumberOfMisses++;
#endif /* DEBUG_HIT_RATE */
while ((HTKey = HT_GET_KEY(HTable[HKey])) != 0xFFFFFL) {
#ifdef DEBUG_HIT_RATE
NumberOfMisses++;
#endif /* DEBUG_HIT_RATE */
if (Key == HTKey) return HT_GET_CODE(HTable[HKey]);
HKey = (HKey + 1) & HT_KEY_MASK;
}
return -1;
}
/******************************************************************************
Routine to generate an HKey for the hashtable out of the given unique key. *
The given Key is assumed to be 20 bits as follows: lower 8 bits are the *
new postfix character, while the upper 12 bits are the prefix code. *
Because the average hit ratio is only 2 (2 hash references per entry), *
evaluating more complex keys (such as twin prime keys) does not worth it! *
******************************************************************************/
static int KeyItem(uint32_t Item)
{
return ((Item >> 12) ^ Item) & HT_KEY_MASK;
}
#ifdef DEBUG_HIT_RATE
/******************************************************************************
Debugging routine to print the hit ratio - number of times the hash table *
was tested per operation. This routine was used to test the KeyItem routine *
******************************************************************************/
void HashTablePrintHitRatio(void)
{
printf("Hash Table Hit Ratio is %ld/%ld = %ld%%.\n",
NumberOfMisses, NumberOfTests,
NumberOfMisses * 100 / NumberOfTests);
}
#endif /* DEBUG_HIT_RATE */
/* end */

View File

@ -1,39 +0,0 @@
/******************************************************************************
gif_hash.h - magfic constants and declarations for GIF LZW
******************************************************************************/
#ifndef _GIF_HASH_H_
#define _GIF_HASH_H_
#include <unistd.h>
#include <stdint.h>
#define HT_SIZE 8192 /* 12bits = 4096 or twice as big! */
#define HT_KEY_MASK 0x1FFF /* 13bits keys */
#define HT_KEY_NUM_BITS 13 /* 13bits keys */
#define HT_MAX_KEY 8191 /* 13bits - 1, maximal code possible */
#define HT_MAX_CODE 4095 /* Biggest code possible in 12 bits. */
/* The 32 bits of the long are divided into two parts for the key & code: */
/* 1. The code is 12 bits as our compression algorithm is limited to 12bits */
/* 2. The key is 12 bits Prefix code + 8 bit new char or 20 bits. */
/* The key is the upper 20 bits. The code is the lower 12. */
#define HT_GET_KEY(l) (l >> 12)
#define HT_GET_CODE(l) (l & 0x0FFF)
#define HT_PUT_KEY(l) (l << 12)
#define HT_PUT_CODE(l) (l & 0x0FFF)
typedef struct GifHashTableType {
uint32_t HTable[HT_SIZE];
} GifHashTableType;
GifHashTableType *_InitHashTable(void);
void _ClearHashTable(GifHashTableType *HashTable);
void _InsertHashTable(GifHashTableType *HashTable, uint32_t Key, int Code);
int _ExistsHashTable(GifHashTableType *HashTable, uint32_t Key);
#endif /* _GIF_HASH_H_ */
/* end */

View File

@ -1,307 +0,0 @@
/******************************************************************************
gif_lib.h - service library for decoding and encoding GIF images
*****************************************************************************/
#ifndef _GIF_LIB_H_
#define _GIF_LIB_H_ 1
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
#define GIFLIB_MAJOR 5
#define GIFLIB_MINOR 0
#define GIFLIB_RELEASE 5
#define GIF_ERROR 0
#define GIF_OK 1
#include <stddef.h>
#include <stdbool.h>
#define GIF_STAMP "GIFVER" /* First chars in file - GIF stamp. */
#define GIF_STAMP_LEN sizeof(GIF_STAMP) - 1
#define GIF_VERSION_POS 3 /* Version first character in stamp. */
#define GIF87_STAMP "GIF87a" /* First chars in file - GIF stamp. */
#define GIF89_STAMP "GIF89a" /* First chars in file - GIF stamp. */
typedef unsigned char GifPixelType;
typedef unsigned char *GifRowType;
typedef unsigned char GifByteType;
typedef unsigned int GifPrefixType;
typedef int GifWord;
typedef struct GifColorType {
GifByteType Red, Green, Blue;
} GifColorType;
typedef struct ColorMapObject {
int ColorCount;
int BitsPerPixel;
bool SortFlag;
GifColorType *Colors; /* on malloc(3) heap */
} ColorMapObject;
typedef struct GifImageDesc {
GifWord Left, Top, Width, Height; /* Current image dimensions. */
bool Interlace; /* Sequential/Interlaced lines. */
ColorMapObject *ColorMap; /* The local color map */
} GifImageDesc;
typedef struct ExtensionBlock {
int ByteCount;
GifByteType *Bytes; /* on malloc(3) heap */
int Function; /* The block function code */
#define CONTINUE_EXT_FUNC_CODE 0x00 /* continuation subblock */
#define COMMENT_EXT_FUNC_CODE 0xfe /* comment */
#define GRAPHICS_EXT_FUNC_CODE 0xf9 /* graphics control (GIF89) */
#define PLAINTEXT_EXT_FUNC_CODE 0x01 /* plaintext */
#define APPLICATION_EXT_FUNC_CODE 0xff /* application block */
} ExtensionBlock;
typedef struct SavedImage {
GifImageDesc ImageDesc;
GifByteType *RasterBits; /* on malloc(3) heap */
int ExtensionBlockCount; /* Count of extensions before image */
ExtensionBlock *ExtensionBlocks; /* Extensions before image */
} SavedImage;
typedef struct GifFileType {
GifWord SWidth, SHeight; /* Size of virtual canvas */
GifWord SColorResolution; /* How many colors can we generate? */
GifWord SBackGroundColor; /* Background color for virtual canvas */
GifByteType AspectByte; /* Used to compute pixel aspect ratio */
ColorMapObject *SColorMap; /* Global colormap, NULL if nonexistent. */
int ImageCount; /* Number of current image (both APIs) */
GifImageDesc Image; /* Current image (low-level API) */
SavedImage *SavedImages; /* Image sequence (high-level API) */
int ExtensionBlockCount; /* Count extensions past last image */
ExtensionBlock *ExtensionBlocks; /* Extensions past last image */
int Error; /* Last error condition reported */
void *UserData; /* hook to attach user data (TVT) */
void *Private; /* Don't mess with this! */
} GifFileType;
#define GIF_ASPECT_RATIO(n) ((n)+15.0/64.0)
typedef enum {
UNDEFINED_RECORD_TYPE,
SCREEN_DESC_RECORD_TYPE,
IMAGE_DESC_RECORD_TYPE, /* Begin with ',' */
EXTENSION_RECORD_TYPE, /* Begin with '!' */
TERMINATE_RECORD_TYPE /* Begin with ';' */
} GifRecordType;
/* func type to read gif data from arbitrary sources (TVT) */
typedef int (*InputFunc) (GifFileType *, GifByteType *, int);
/* func type to write gif data to arbitrary targets.
* Returns count of bytes written. (MRB)
*/
typedef int (*OutputFunc) (GifFileType *, const GifByteType *, int);
/******************************************************************************
GIF89 structures
******************************************************************************/
typedef struct GraphicsControlBlock {
int DisposalMode;
#define DISPOSAL_UNSPECIFIED 0 /* No disposal specified. */
#define DISPOSE_DO_NOT 1 /* Leave image in place */
#define DISPOSE_BACKGROUND 2 /* Set area too background color */
#define DISPOSE_PREVIOUS 3 /* Restore to previous content */
bool UserInputFlag; /* User confirmation required before disposal */
int DelayTime; /* pre-display delay in 0.01sec units */
int TransparentColor; /* Palette index for transparency, -1 if none */
#define NO_TRANSPARENT_COLOR -1
} GraphicsControlBlock;
/******************************************************************************
GIF encoding routines
******************************************************************************/
/* Main entry points */
GifFileType *EGifOpenFileName(const char *GifFileName,
const bool GifTestExistence, int *Error);
GifFileType *EGifOpenFileHandle(const int GifFileHandle, int *Error);
GifFileType *EGifOpen(void *userPtr, OutputFunc writeFunc, int *Error);
int EGifSpew(GifFileType * GifFile);
char *EGifGetGifVersion(GifFileType *GifFile); /* new in 5.x */
int EGifCloseFile(GifFileType * GifFile);
#define E_GIF_ERR_OPEN_FAILED 1 /* And EGif possible errors. */
#define E_GIF_ERR_WRITE_FAILED 2
#define E_GIF_ERR_HAS_SCRN_DSCR 3
#define E_GIF_ERR_HAS_IMAG_DSCR 4
#define E_GIF_ERR_NO_COLOR_MAP 5
#define E_GIF_ERR_DATA_TOO_BIG 6
#define E_GIF_ERR_NOT_ENOUGH_MEM 7
#define E_GIF_ERR_DISK_IS_FULL 8
#define E_GIF_ERR_CLOSE_FAILED 9
#define E_GIF_ERR_NOT_WRITEABLE 10
/* These are legacy. You probably do not want to call them directly */
int EGifPutScreenDesc(GifFileType *GifFile,
const int GifWidth, const int GifHeight,
const int GifColorRes,
const int GifBackGround,
const ColorMapObject *GifColorMap);
int EGifPutImageDesc(GifFileType *GifFile,
const int GifLeft, const int GifTop,
const int GifWidth, const int GifHeight,
const bool GifInterlace,
const ColorMapObject *GifColorMap);
void EGifSetGifVersion(GifFileType *GifFile, const bool gif89);
int EGifPutLine(GifFileType *GifFile, GifPixelType *GifLine,
int GifLineLen);
int EGifPutPixel(GifFileType *GifFile, const GifPixelType GifPixel);
int EGifPutComment(GifFileType *GifFile, const char *GifComment);
int EGifPutExtensionLeader(GifFileType *GifFile, const int GifExtCode);
int EGifPutExtensionBlock(GifFileType *GifFile,
const int GifExtLen, const void *GifExtension);
int EGifPutExtensionTrailer(GifFileType *GifFile);
int EGifPutExtension(GifFileType *GifFile, const int GifExtCode,
const int GifExtLen,
const void *GifExtension);
int EGifPutCode(GifFileType *GifFile, int GifCodeSize,
const GifByteType *GifCodeBlock);
int EGifPutCodeNext(GifFileType *GifFile,
const GifByteType *GifCodeBlock);
/******************************************************************************
GIF decoding routines
******************************************************************************/
/* Main entry points */
GifFileType *DGifOpenFileName(const char *GifFileName, int *Error);
GifFileType *DGifOpenFileHandle(int GifFileHandle, int *Error);
int DGifSlurp(GifFileType * GifFile);
GifFileType *DGifOpen(void *userPtr, InputFunc readFunc, int *Error); /* new one (TVT) */
int DGifCloseFile(GifFileType * GifFile);
#define D_GIF_ERR_OPEN_FAILED 101 /* And DGif possible errors. */
#define D_GIF_ERR_READ_FAILED 102
#define D_GIF_ERR_NOT_GIF_FILE 103
#define D_GIF_ERR_NO_SCRN_DSCR 104
#define D_GIF_ERR_NO_IMAG_DSCR 105
#define D_GIF_ERR_NO_COLOR_MAP 106
#define D_GIF_ERR_WRONG_RECORD 107
#define D_GIF_ERR_DATA_TOO_BIG 108
#define D_GIF_ERR_NOT_ENOUGH_MEM 109
#define D_GIF_ERR_CLOSE_FAILED 110
#define D_GIF_ERR_NOT_READABLE 111
#define D_GIF_ERR_IMAGE_DEFECT 112
#define D_GIF_ERR_EOF_TOO_SOON 113
/* These are legacy. You probably do not want to call them directly */
int DGifGetScreenDesc(GifFileType *GifFile);
int DGifGetRecordType(GifFileType *GifFile, GifRecordType *GifType);
int DGifGetImageDesc(GifFileType *GifFile, bool changeImageCount);
int DGifGetLine(GifFileType *GifFile, GifPixelType *GifLine, int GifLineLen);
int DGifGetPixel(GifFileType *GifFile, GifPixelType GifPixel);
int DGifGetComment(GifFileType *GifFile, char *GifComment);
int DGifGetExtension(GifFileType *GifFile, int *GifExtCode,
GifByteType **GifExtension);
int DGifGetExtensionNext(GifFileType *GifFile, GifByteType **GifExtension,int* ExtCode);
int DGifGetCode(GifFileType *GifFile, int *GifCodeSize,
GifByteType **GifCodeBlock);
int DGifGetCodeNext(GifFileType *GifFile, GifByteType **GifCodeBlock);
int DGifGetLZCodes(GifFileType *GifFile, int *GifCode);
/******************************************************************************
Color table quantization (deprecated)
******************************************************************************/
int GifQuantizeBuffer(unsigned int Width, unsigned int Height,
int *ColorMapSize, GifByteType * RedInput,
GifByteType * GreenInput, GifByteType * BlueInput,
GifByteType * OutputBuffer,
GifColorType * OutputColorMap);
/******************************************************************************
Error handling and reporting.
******************************************************************************/
extern char *GifErrorString(int ErrorCode); /* new in 2012 - ESR */
/*****************************************************************************
Everything below this point is new after version 1.2, supporting `slurp
mode' for doing I/O in two big belts with all the image-bashing in core.
******************************************************************************/
/******************************************************************************
Color map handling from gif_alloc.c
******************************************************************************/
extern ColorMapObject *GifMakeMapObject(int ColorCount,
const GifColorType *ColorMap);
extern void GifFreeMapObject(ColorMapObject *Object);
extern ColorMapObject *GifUnionColorMap(const ColorMapObject *ColorIn1,
const ColorMapObject *ColorIn2,
GifPixelType ColorTransIn2[]);
extern int GifBitSize(int n);
/******************************************************************************
Support for the in-core structures allocation (slurp mode).
******************************************************************************/
extern void GifApplyTranslation(SavedImage *Image, GifPixelType Translation[]);
extern int GifAddExtensionBlock(int *ExtensionBlock_Count,
ExtensionBlock **ExtensionBlocks,
int Function,
unsigned int Len, unsigned char ExtData[]);
extern void GifFreeExtensions(int *ExtensionBlock_Count,
ExtensionBlock **ExtensionBlocks);
extern SavedImage *GifMakeSavedImage(GifFileType *GifFile,
const SavedImage *CopyFrom);
extern void GifFreeSavedImages(GifFileType *GifFile);
/******************************************************************************
5.x functions for GIF89 graphics control blocks
******************************************************************************/
int DGifExtensionToGCB(const size_t GifExtensionLength,
const GifByteType *GifExtension,
GraphicsControlBlock *GCB);
size_t EGifGCBToExtension(const GraphicsControlBlock *GCB,
GifByteType *GifExtension);
int DGifSavedExtensionToGCB(GifFileType *GifFile,
int ImageIndex,
GraphicsControlBlock *GCB);
int EGifGCBToSavedExtension(const GraphicsControlBlock *GCB,
GifFileType *GifFile,
int ImageIndex);
/******************************************************************************
The library's internal utility font
******************************************************************************/
#define GIF_FONT_WIDTH 8
#define GIF_FONT_HEIGHT 8
extern const unsigned char GifAsciiTable8x8[][GIF_FONT_WIDTH];
extern void GifDrawText8x8(SavedImage *Image,
const int x, const int y,
const char *legend, const int color);
extern void GifDrawBox(SavedImage *Image,
const int x, const int y,
const int w, const int d, const int color);
extern void GifDrawRectangle(SavedImage *Image,
const int x, const int y,
const int w, const int d, const int color);
extern void GifDrawBoxedText8x8(SavedImage *Image,
const int x, const int y,
const char *legend,
const int border, const int bg, const int fg);
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* _GIF_LIB_H */
/* end */

View File

@ -1,59 +0,0 @@
/****************************************************************************
gif_lib_private.h - internal giflib routines and structures
****************************************************************************/
#ifndef _GIF_LIB_PRIVATE_H
#define _GIF_LIB_PRIVATE_H
#include "gif_lib.h"
#include "gif_hash.h"
#define EXTENSION_INTRODUCER 0x21
#define DESCRIPTOR_INTRODUCER 0x2c
#define TERMINATOR_INTRODUCER 0x3b
#define LZ_MAX_CODE 4095 /* Biggest code possible in 12 bits. */
#define LZ_BITS 12
#define FLUSH_OUTPUT 4096 /* Impossible code, to signal flush. */
#define FIRST_CODE 4097 /* Impossible code, to signal first. */
#define NO_SUCH_CODE 4098 /* Impossible code, to signal empty. */
#define FILE_STATE_WRITE 0x01
#define FILE_STATE_SCREEN 0x02
#define FILE_STATE_IMAGE 0x04
#define FILE_STATE_READ 0x08
#define IS_READABLE(Private) (Private->FileState & FILE_STATE_READ)
#define IS_WRITEABLE(Private) (Private->FileState & FILE_STATE_WRITE)
typedef struct GifFilePrivateType {
GifWord FileState, FileHandle, /* Where all this data goes to! */
BitsPerPixel, /* Bits per pixel (Codes uses at least this + 1). */
ClearCode, /* The CLEAR LZ code. */
EOFCode, /* The EOF LZ code. */
RunningCode, /* The next code algorithm can generate. */
RunningBits, /* The number of bits required to represent RunningCode. */
MaxCode1, /* 1 bigger than max. possible code, in RunningBits bits. */
LastCode, /* The code before the current code. */
CrntCode, /* Current algorithm code. */
StackPtr, /* For character stack (see below). */
CrntShiftState; /* Number of bits in CrntShiftDWord. */
unsigned long CrntShiftDWord; /* For bytes decomposition into codes. */
unsigned long PixelCount; /* Number of pixels in image. */
FILE *File; /* File as stream. */
InputFunc Read; /* function to read gif input (TVT) */
OutputFunc Write; /* function to write gif output (MRB) */
GifByteType Buf[256]; /* Compressed input is buffered here. */
GifByteType Stack[LZ_MAX_CODE]; /* Decoded pixels are stacked here. */
GifByteType Suffix[LZ_MAX_CODE + 1]; /* So we can trace the codes. */
GifPrefixType Prefix[LZ_MAX_CODE + 1];
GifHashTableType *HashTable;
bool gif89;
} GifFilePrivateType;
#endif /* _GIF_LIB_PRIVATE_H */
/* end */

View File

@ -1,400 +0,0 @@
/*****************************************************************************
GIF construction tools
****************************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "gif_lib.h"
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
/******************************************************************************
Miscellaneous utility functions
******************************************************************************/
/* return smallest bitfield size n will fit in */
int
GifBitSize(int n)
{
register int i;
for (i = 1; i <= 8; i++)
if ((1 << i) >= n)
break;
return (i);
}
/******************************************************************************
Color map object functions
******************************************************************************/
/*
* Allocate a color map of given size; initialize with contents of
* ColorMap if that pointer is non-NULL.
*/
ColorMapObject *
GifMakeMapObject(int ColorCount, const GifColorType *ColorMap)
{
ColorMapObject *Object;
/*** FIXME: Our ColorCount has to be a power of two. Is it necessary to
* make the user know that or should we automatically round up instead? */
if (ColorCount != (1 << GifBitSize(ColorCount))) {
return ((ColorMapObject *) NULL);
}
Object = (ColorMapObject *)malloc(sizeof(ColorMapObject));
if (Object == (ColorMapObject *) NULL) {
return ((ColorMapObject *) NULL);
}
Object->Colors = (GifColorType *)calloc(ColorCount, sizeof(GifColorType));
if (Object->Colors == (GifColorType *) NULL) {
free(Object);
return ((ColorMapObject *) NULL);
}
Object->ColorCount = ColorCount;
Object->BitsPerPixel = GifBitSize(ColorCount);
if (ColorMap != NULL) {
memcpy((char *)Object->Colors,
(char *)ColorMap, ColorCount * sizeof(GifColorType));
}
return (Object);
}
/*******************************************************************************
Free a color map object
*******************************************************************************/
void
GifFreeMapObject(ColorMapObject *Object)
{
if (Object != NULL) {
(void)free(Object->Colors);
(void)free(Object);
}
}
#ifdef DEBUG
void
DumpColorMap(ColorMapObject *Object,
FILE * fp)
{
if (Object != NULL) {
int i, j, Len = Object->ColorCount;
for (i = 0; i < Len; i += 4) {
for (j = 0; j < 4 && j < Len; j++) {
(void)fprintf(fp, "%3d: %02x %02x %02x ", i + j,
Object->Colors[i + j].Red,
Object->Colors[i + j].Green,
Object->Colors[i + j].Blue);
}
(void)fprintf(fp, "\n");
}
}
}
#endif /* DEBUG */
/*******************************************************************************
Compute the union of two given color maps and return it. If result can't
fit into 256 colors, NULL is returned, the allocated union otherwise.
ColorIn1 is copied as is to ColorUnion, while colors from ColorIn2 are
copied iff they didn't exist before. ColorTransIn2 maps the old
ColorIn2 into the ColorUnion color map table./
*******************************************************************************/
ColorMapObject *
GifUnionColorMap(const ColorMapObject *ColorIn1,
const ColorMapObject *ColorIn2,
GifPixelType ColorTransIn2[])
{
int i, j, CrntSlot, RoundUpTo, NewGifBitSize;
ColorMapObject *ColorUnion;
/*
* We don't worry about duplicates within either color map; if
* the caller wants to resolve those, he can perform unions
* with an empty color map.
*/
/* Allocate table which will hold the result for sure. */
ColorUnion = GifMakeMapObject(MAX(ColorIn1->ColorCount,
ColorIn2->ColorCount) * 2, NULL);
if (ColorUnion == NULL)
return (NULL);
/*
* Copy ColorIn1 to ColorUnion.
*/
for (i = 0; i < ColorIn1->ColorCount; i++)
ColorUnion->Colors[i] = ColorIn1->Colors[i];
CrntSlot = ColorIn1->ColorCount;
/*
* Potentially obnoxious hack:
*
* Back CrntSlot down past all contiguous {0, 0, 0} slots at the end
* of table 1. This is very useful if your display is limited to
* 16 colors.
*/
while (ColorIn1->Colors[CrntSlot - 1].Red == 0
&& ColorIn1->Colors[CrntSlot - 1].Green == 0
&& ColorIn1->Colors[CrntSlot - 1].Blue == 0)
CrntSlot--;
/* Copy ColorIn2 to ColorUnion (use old colors if they exist): */
for (i = 0; i < ColorIn2->ColorCount && CrntSlot <= 256; i++) {
/* Let's see if this color already exists: */
for (j = 0; j < ColorIn1->ColorCount; j++)
if (memcmp (&ColorIn1->Colors[j], &ColorIn2->Colors[i],
sizeof(GifColorType)) == 0)
break;
if (j < ColorIn1->ColorCount)
ColorTransIn2[i] = j; /* color exists in Color1 */
else {
/* Color is new - copy it to a new slot: */
ColorUnion->Colors[CrntSlot] = ColorIn2->Colors[i];
ColorTransIn2[i] = CrntSlot++;
}
}
if (CrntSlot > 256) {
GifFreeMapObject(ColorUnion);
return ((ColorMapObject *) NULL);
}
NewGifBitSize = GifBitSize(CrntSlot);
RoundUpTo = (1 << NewGifBitSize);
if (RoundUpTo != ColorUnion->ColorCount) {
register GifColorType *Map = ColorUnion->Colors;
/*
* Zero out slots up to next power of 2.
* We know these slots exist because of the way ColorUnion's
* start dimension was computed.
*/
for (j = CrntSlot; j < RoundUpTo; j++)
Map[j].Red = Map[j].Green = Map[j].Blue = 0;
/* perhaps we can shrink the map? */
if (RoundUpTo < ColorUnion->ColorCount)
ColorUnion->Colors = (GifColorType *)realloc(Map,
sizeof(GifColorType) * RoundUpTo);
}
ColorUnion->ColorCount = RoundUpTo;
ColorUnion->BitsPerPixel = NewGifBitSize;
return (ColorUnion);
}
/*******************************************************************************
Apply a given color translation to the raster bits of an image
*******************************************************************************/
void
GifApplyTranslation(SavedImage *Image, GifPixelType Translation[])
{
register int i;
register int RasterSize = Image->ImageDesc.Height * Image->ImageDesc.Width;
for (i = 0; i < RasterSize; i++)
Image->RasterBits[i] = Translation[Image->RasterBits[i]];
}
/******************************************************************************
Extension record functions
******************************************************************************/
int
GifAddExtensionBlock(int *ExtensionBlockCount,
ExtensionBlock **ExtensionBlocks,
int Function,
unsigned int Len,
unsigned char ExtData[])
{
ExtensionBlock *ep;
if (*ExtensionBlocks == NULL)
*ExtensionBlocks=(ExtensionBlock *)malloc(sizeof(ExtensionBlock));
else
*ExtensionBlocks = (ExtensionBlock *)realloc(*ExtensionBlocks,
sizeof(ExtensionBlock) *
(*ExtensionBlockCount + 1));
if (*ExtensionBlocks == NULL)
return (GIF_ERROR);
ep = &(*ExtensionBlocks)[(*ExtensionBlockCount)++];
ep->Function = Function;
ep->ByteCount=Len;
ep->Bytes = (GifByteType *)malloc(ep->ByteCount);
if (ep->Bytes == NULL)
return (GIF_ERROR);
if (ExtData != NULL) {
memcpy(ep->Bytes, ExtData, Len);
}
return (GIF_OK);
}
void
GifFreeExtensions(int *ExtensionBlockCount,
ExtensionBlock **ExtensionBlocks)
{
ExtensionBlock *ep;
if (*ExtensionBlocks == NULL)
return;
for (ep = *ExtensionBlocks;
ep < (*ExtensionBlocks + *ExtensionBlockCount);
ep++)
(void)free((char *)ep->Bytes);
(void)free((char *)*ExtensionBlocks);
*ExtensionBlocks = NULL;
*ExtensionBlockCount = 0;
}
/******************************************************************************
Image block allocation functions
******************************************************************************/
/* Private Function:
* Frees the last image in the GifFile->SavedImages array
*/
void
FreeLastSavedImage(GifFileType *GifFile)
{
SavedImage *sp;
if ((GifFile == NULL) || (GifFile->SavedImages == NULL))
return;
/* Remove one SavedImage from the GifFile */
GifFile->ImageCount--;
sp = &GifFile->SavedImages[GifFile->ImageCount];
/* Deallocate its Colormap */
if (sp->ImageDesc.ColorMap != NULL) {
GifFreeMapObject(sp->ImageDesc.ColorMap);
sp->ImageDesc.ColorMap = NULL;
}
/* Deallocate the image data */
if (sp->RasterBits != NULL)
free((char *)sp->RasterBits);
/* Deallocate any extensions */
GifFreeExtensions(&sp->ExtensionBlockCount, &sp->ExtensionBlocks);
/*** FIXME: We could realloc the GifFile->SavedImages structure but is
* there a point to it? Saves some memory but we'd have to do it every
* time. If this is used in GifFreeSavedImages then it would be inefficient
* (The whole array is going to be deallocated.) If we just use it when
* we want to free the last Image it's convenient to do it here.
*/
}
/*
* Append an image block to the SavedImages array
*/
SavedImage *
GifMakeSavedImage(GifFileType *GifFile, const SavedImage *CopyFrom)
{
if (GifFile->SavedImages == NULL)
GifFile->SavedImages = (SavedImage *)malloc(sizeof(SavedImage));
else
GifFile->SavedImages = (SavedImage *)realloc(GifFile->SavedImages,
sizeof(SavedImage) * (GifFile->ImageCount + 1));
if (GifFile->SavedImages == NULL)
return ((SavedImage *)NULL);
else {
SavedImage *sp = &GifFile->SavedImages[GifFile->ImageCount++];
memset((char *)sp, '\0', sizeof(SavedImage));
if (CopyFrom != NULL) {
memcpy((char *)sp, CopyFrom, sizeof(SavedImage));
/*
* Make our own allocated copies of the heap fields in the
* copied record. This guards against potential aliasing
* problems.
*/
/* first, the local color map */
if (sp->ImageDesc.ColorMap != NULL) {
sp->ImageDesc.ColorMap = GifMakeMapObject(
CopyFrom->ImageDesc.ColorMap->ColorCount,
CopyFrom->ImageDesc.ColorMap->Colors);
if (sp->ImageDesc.ColorMap == NULL) {
FreeLastSavedImage(GifFile);
return (SavedImage *)(NULL);
}
}
/* next, the raster */
sp->RasterBits = (unsigned char *)malloc(sizeof(GifPixelType) *
CopyFrom->ImageDesc.Height *
CopyFrom->ImageDesc.Width);
if (sp->RasterBits == NULL) {
FreeLastSavedImage(GifFile);
return (SavedImage *)(NULL);
}
memcpy(sp->RasterBits, CopyFrom->RasterBits,
sizeof(GifPixelType) * CopyFrom->ImageDesc.Height *
CopyFrom->ImageDesc.Width);
/* finally, the extension blocks */
if (sp->ExtensionBlocks != NULL) {
sp->ExtensionBlocks = (ExtensionBlock *)malloc(
sizeof(ExtensionBlock) *
CopyFrom->ExtensionBlockCount);
if (sp->ExtensionBlocks == NULL) {
FreeLastSavedImage(GifFile);
return (SavedImage *)(NULL);
}
memcpy(sp->ExtensionBlocks, CopyFrom->ExtensionBlocks,
sizeof(ExtensionBlock) * CopyFrom->ExtensionBlockCount);
}
}
return (sp);
}
}
void
GifFreeSavedImages(GifFileType *GifFile)
{
SavedImage *sp;
if ((GifFile == NULL) || (GifFile->SavedImages == NULL)) {
return;
}
for (sp = GifFile->SavedImages;
sp < GifFile->SavedImages + GifFile->ImageCount; sp++) {
if (sp->ImageDesc.ColorMap != NULL) {
GifFreeMapObject(sp->ImageDesc.ColorMap);
sp->ImageDesc.ColorMap = NULL;
}
if (sp->RasterBits != NULL)
free((char *)sp->RasterBits);
GifFreeExtensions(&sp->ExtensionBlockCount, &sp->ExtensionBlocks);
}
free((char *)GifFile->SavedImages);
GifFile->SavedImages = NULL;
}
/* end */

View File

@ -68,7 +68,7 @@ static void fastBlurMore(int imageWidth, int imageHeight, int imageStride, void
const int r1 = radius + 1; const int r1 = radius + 1;
const int div = radius * 2 + 1; const int div = radius * 2 + 1;
if (radius > 15 || div >= w || div >= h || w * h > 128 * 128 || imageStride > imageWidth * 4) { if (radius > 15 || div >= w || div >= h || w * h > 150 * 150 || imageStride > imageWidth * 4) {
return; return;
} }
@ -151,6 +151,9 @@ static void fastBlurMore(int imageWidth, int imageHeight, int imageStride, void
static void fastBlur(int imageWidth, int imageHeight, int imageStride, void *pixels, int radius) { static void fastBlur(int imageWidth, int imageHeight, int imageStride, void *pixels, int radius) {
uint8_t *pix = (uint8_t *)pixels; uint8_t *pix = (uint8_t *)pixels;
if (pix == NULL) {
return;
}
const int w = imageWidth; const int w = imageWidth;
const int h = imageHeight; const int h = imageHeight;
const int stride = imageStride; const int stride = imageStride;
@ -169,7 +172,7 @@ static void fastBlur(int imageWidth, int imageHeight, int imageStride, void *pix
return; return;
} }
if (radius > 15 || div >= w || div >= h || w * h > 128 * 128 || imageStride > imageWidth * 4) { if (radius > 15 || div >= w || div >= h || w * h > 150 * 150 || imageStride > imageWidth * 4) {
return; return;
} }
@ -265,18 +268,12 @@ METHODDEF(void) my_error_exit(j_common_ptr cinfo) {
longjmp(myerr->setjmp_buffer, 1); longjmp(myerr->setjmp_buffer, 1);
} }
JNIEXPORT void Java_org_telegram_messenger_Utilities_blurBitmap(JNIEnv *env, jclass class, jobject bitmap, int radius, int unpin) { JNIEXPORT void Java_org_telegram_messenger_Utilities_blurBitmap(JNIEnv *env, jclass class, jobject bitmap, int radius, int unpin, int width, int height, int stride) {
if (!bitmap) { if (!bitmap) {
return; return;
} }
AndroidBitmapInfo info; if (!width || !height || !stride) {
if (AndroidBitmap_getInfo(env, bitmap, &info) < 0) {
return;
}
if (info.format != ANDROID_BITMAP_FORMAT_RGBA_8888 || !info.width || !info.height || !info.stride) {
return; return;
} }
@ -285,9 +282,9 @@ JNIEXPORT void Java_org_telegram_messenger_Utilities_blurBitmap(JNIEnv *env, jcl
return; return;
} }
if (radius <= 3) { if (radius <= 3) {
fastBlur(info.width, info.height, info.stride, pixels, radius); fastBlur(width, height, stride, pixels, radius);
} else { } else {
fastBlurMore(info.width, info.height, info.stride, pixels, radius); fastBlurMore(width, height, stride, pixels, radius);
} }
if (unpin) { if (unpin) {
AndroidBitmap_unlockPixels(env, bitmap); AndroidBitmap_unlockPixels(env, bitmap);
@ -399,10 +396,20 @@ JNIEXPORT void Java_org_telegram_messenger_Utilities_calcCDT(JNIEnv *env, jclass
} }
JNIEXPORT int Java_org_telegram_messenger_Utilities_pinBitmap(JNIEnv *env, jclass class, jobject bitmap) { JNIEXPORT int Java_org_telegram_messenger_Utilities_pinBitmap(JNIEnv *env, jclass class, jobject bitmap) {
if (bitmap == NULL) {
return;
}
unsigned char *pixels; unsigned char *pixels;
return AndroidBitmap_lockPixels(env, bitmap, &pixels) >= 0 ? 1 : 0; return AndroidBitmap_lockPixels(env, bitmap, &pixels) >= 0 ? 1 : 0;
} }
JNIEXPORT int Java_org_telegram_messenger_Utilities_unpinBitmap(JNIEnv *env, jclass class, jobject bitmap) {
if (bitmap == NULL) {
return;
}
AndroidBitmap_unlockPixels(env, bitmap);
}
JNIEXPORT void Java_org_telegram_messenger_Utilities_loadBitmap(JNIEnv *env, jclass class, jstring path, jobject bitmap, int scale, int width, int height, int stride) { JNIEXPORT void Java_org_telegram_messenger_Utilities_loadBitmap(JNIEnv *env, jclass class, jstring path, jobject bitmap, int scale, int width, int height, int stride) {
AndroidBitmapInfo info; AndroidBitmapInfo info;

View File

@ -7,10 +7,10 @@
#include <openssl/aes.h> #include <openssl/aes.h>
#include "utils.h" #include "utils.h"
#include "sqlite.h" #include "sqlite.h"
#include "gif.h"
#include "image.h" #include "image.h"
int registerNativeTgNetFunctions(JavaVM *vm, JNIEnv *env); int registerNativeTgNetFunctions(JavaVM *vm, JNIEnv *env);
int gifvideoOnJNILoad(JavaVM *vm, JNIEnv *env);
jint JNI_OnLoad(JavaVM *vm, void *reserved) { jint JNI_OnLoad(JavaVM *vm, void *reserved) {
JNIEnv *env = 0; JNIEnv *env = 0;
@ -28,17 +28,19 @@ jint JNI_OnLoad(JavaVM *vm, void *reserved) {
return -1; return -1;
} }
if (registerNativeTgNetFunctions(vm, env) != JNI_TRUE) { if (gifvideoOnJNILoad(vm, env) == -1) {
return -1; return -1;
} }
gifOnJNILoad(vm, reserved, env); if (registerNativeTgNetFunctions(vm, env) != JNI_TRUE) {
return -1;
}
return JNI_VERSION_1_6; return JNI_VERSION_1_6;
} }
void JNI_OnUnload(JavaVM *vm, void *reserved) { void JNI_OnUnload(JavaVM *vm, void *reserved) {
gifOnJNIUnload(vm, reserved);
} }
JNIEXPORT void Java_org_telegram_messenger_Utilities_aesIgeEncryption(JNIEnv *env, jclass class, jobject buffer, jbyteArray key, jbyteArray iv, jboolean encrypt, int offset, int length) { JNIEXPORT void Java_org_telegram_messenger_Utilities_aesIgeEncryption(JNIEnv *env, jclass class, jobject buffer, jbyteArray key, jbyteArray iv, jboolean encrypt, int offset, int length) {

View File

@ -18,7 +18,6 @@
#include "libyuv/convert_from.h" #include "libyuv/convert_from.h"
#include "libyuv/convert_from_argb.h" #include "libyuv/convert_from_argb.h"
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
#include "libyuv/mjpeg_decoder.h" #include "libyuv/mjpeg_decoder.h"
#include "libyuv/planar_functions.h" #include "libyuv/planar_functions.h"
#include "libyuv/rotate.h" #include "libyuv/rotate.h"

View File

@ -22,6 +22,11 @@ extern "C" {
LIBYUV_API LIBYUV_API
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed); uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed);
// Scan an opaque argb image and return fourcc based on alpha offset.
// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
LIBYUV_API
uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height);
// Sum Square Error - used to compute Mean Square Error or PSNR. // Sum Square Error - used to compute Mean Square Error or PSNR.
LIBYUV_API LIBYUV_API
uint64 ComputeSumSquareError(const uint8* src_a, uint64 ComputeSumSquareError(const uint8* src_a,

View File

@ -71,6 +71,8 @@ int I400ToI420(const uint8* src_y, int src_stride_y,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
#define J400ToJ420 I400ToI420
// Convert NV12 to I420. // Convert NV12 to I420.
LIBYUV_API LIBYUV_API
int NV12ToI420(const uint8* src_y, int src_stride_y, int NV12ToI420(const uint8* src_y, int src_stride_y,
@ -113,15 +115,6 @@ int M420ToI420(const uint8* src_m420, int src_stride_m420,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// Convert Q420 to I420.
LIBYUV_API
int Q420ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// ARGB little endian (bgra in memory) to I420. // ARGB little endian (bgra in memory) to I420.
LIBYUV_API LIBYUV_API
int ARGBToI420(const uint8* src_frame, int src_stride_frame, int ARGBToI420(const uint8* src_frame, int src_stride_frame,
@ -211,8 +204,6 @@ int MJPGSize(const uint8* sample, size_t sample_size,
int* width, int* height); int* width, int* height);
#endif #endif
// Note Bayer formats (BGGR) To I420 are in format_conversion.h
// Convert camera sample to I420 with cropping, rotation and vertical flip. // Convert camera sample to I420 with cropping, rotation and vertical flip.
// "src_size" is needed to parse MJPG. // "src_size" is needed to parse MJPG.
// "dst_stride_y" number of bytes in a row of the dst_y plane. // "dst_stride_y" number of bytes in a row of the dst_y plane.

View File

@ -18,7 +18,6 @@
#include "libyuv/rotate.h" #include "libyuv/rotate.h"
// TODO(fbarchard): This set of functions should exactly match convert.h // TODO(fbarchard): This set of functions should exactly match convert.h
// Add missing Q420.
// TODO(fbarchard): Add tests. Create random content of right size and convert // TODO(fbarchard): Add tests. Create random content of right size and convert
// with C vs Opt and or to I420 and compare. // with C vs Opt and or to I420 and compare.
// TODO(fbarchard): Some of these functions lack parameter setting. // TODO(fbarchard): Some of these functions lack parameter setting.
@ -61,6 +60,22 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// Convert J444 to ARGB.
LIBYUV_API
int J444ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I444 to ABGR.
LIBYUV_API
int I444ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert I411 to ARGB. // Convert I411 to ARGB.
LIBYUV_API LIBYUV_API
int I411ToARGB(const uint8* src_y, int src_stride_y, int I411ToARGB(const uint8* src_y, int src_stride_y,
@ -69,21 +84,39 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// Convert I400 (grey) to ARGB. // Convert I420 with Alpha to preattenuated ARGB.
LIBYUV_API
int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
const uint8* src_a, int src_stride_a,
uint8* dst_argb, int dst_stride_argb,
int width, int height, int attenuate);
// Convert I420 with Alpha to preattenuated ABGR.
LIBYUV_API
int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
const uint8* src_a, int src_stride_a,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height, int attenuate);
// Convert I400 (grey) to ARGB. Reverse of ARGBToI400.
LIBYUV_API LIBYUV_API
int I400ToARGB(const uint8* src_y, int src_stride_y, int I400ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// Alias. // Convert J400 (jpeg grey) to ARGB.
#define YToARGB I400ToARGB_Reference
// Convert I400 to ARGB. Reverse of ARGBToI400.
LIBYUV_API LIBYUV_API
int I400ToARGB_Reference(const uint8* src_y, int src_stride_y, int J400ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// Alias.
#define YToARGB I400ToARGB
// Convert NV12 to ARGB. // Convert NV12 to ARGB.
LIBYUV_API LIBYUV_API
int NV12ToARGB(const uint8* src_y, int src_stride_y, int NV12ToARGB(const uint8* src_y, int src_stride_y,
@ -104,13 +137,6 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// TODO(fbarchard): Convert Q420 to ARGB.
// LIBYUV_API
// int Q420ToARGB(const uint8* src_y, int src_stride_y,
// const uint8* src_yuy2, int src_stride_yuy2,
// uint8* dst_argb, int dst_stride_argb,
// int width, int height);
// Convert YUY2 to ARGB. // Convert YUY2 to ARGB.
LIBYUV_API LIBYUV_API
int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2, int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
@ -123,6 +149,70 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// Convert J420 to ARGB.
LIBYUV_API
int J420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert J422 to ARGB.
LIBYUV_API
int J422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert J420 to ABGR.
LIBYUV_API
int J420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert J422 to ABGR.
LIBYUV_API
int J422ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert H420 to ARGB.
LIBYUV_API
int H420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert H422 to ARGB.
LIBYUV_API
int H422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert H420 to ABGR.
LIBYUV_API
int H420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert H422 to ABGR.
LIBYUV_API
int H422ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// BGRA little endian (argb in memory) to ARGB. // BGRA little endian (argb in memory) to ARGB.
LIBYUV_API LIBYUV_API
int BGRAToARGB(const uint8* src_frame, int src_stride_frame, int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
@ -184,8 +274,6 @@ int MJPGToARGB(const uint8* sample, size_t sample_size,
int dst_width, int dst_height); int dst_width, int dst_height);
#endif #endif
// Note Bayer formats (BGGR) to ARGB are in format_conversion.h.
// Convert camera sample to ARGB with cropping, rotation and vertical flip. // Convert camera sample to ARGB with cropping, rotation and vertical flip.
// "src_size" is needed to parse MJPG. // "src_size" is needed to parse MJPG.
// "dst_stride_argb" number of bytes in a row of the dst_argb plane. // "dst_stride_argb" number of bytes in a row of the dst_argb plane.

View File

@ -57,7 +57,6 @@ int I400Copy(const uint8* src_y, int src_stride_y,
int width, int height); int width, int height);
// TODO(fbarchard): I420ToM420 // TODO(fbarchard): I420ToM420
// TODO(fbarchard): I420ToQ420
LIBYUV_API LIBYUV_API
int I420ToNV12(const uint8* src_y, int src_stride_y, int I420ToNV12(const uint8* src_y, int src_stride_y,
@ -138,6 +137,17 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
uint8* dst_frame, int dst_stride_frame, uint8* dst_frame, int dst_stride_frame,
int width, int height); int width, int height);
// Convert I420 To RGB565 with 4x4 dither matrix (16 bytes).
// Values in dither matrix from 0 to 7 recommended.
// The order of the dither matrix is first byte is upper left.
LIBYUV_API
int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
const uint8* dither4x4, int width, int height);
LIBYUV_API LIBYUV_API
int I420ToARGB1555(const uint8* src_y, int src_stride_y, int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
@ -152,8 +162,6 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
uint8* dst_frame, int dst_stride_frame, uint8* dst_frame, int dst_stride_frame,
int width, int height); int width, int height);
// Note Bayer formats (BGGR) To I420 are in format_conversion.h.
// Convert I420 to specified format. // Convert I420 to specified format.
// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the // "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal. // buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.

View File

@ -61,6 +61,16 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565, uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height); int width, int height);
// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
// Values in dither matrix from 0 to 7 recommended.
// The order of the dither matrix is first byte is upper left.
// TODO(fbarchard): Consider pointer to 2d array for dither4x4.
// const uint8(*dither)[4][4];
LIBYUV_API
int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565,
const uint8* dither4x4, int width, int height);
// Convert ARGB To ARGB1555. // Convert ARGB To ARGB1555.
LIBYUV_API LIBYUV_API
int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
@ -105,6 +115,14 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// Convert ARGB to J422.
LIBYUV_API
int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
uint8* dst_yj, int dst_stride_yj,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB To I411. // Convert ARGB To I411.
LIBYUV_API LIBYUV_API
int ARGBToI411(const uint8* src_argb, int src_stride_argb, int ARGBToI411(const uint8* src_argb, int src_stride_argb,
@ -125,6 +143,12 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
int width, int height); int width, int height);
// Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB)
LIBYUV_API
int ARGBToG(const uint8* src_argb, int src_stride_argb,
uint8* dst_g, int dst_stride_g,
int width, int height);
// Convert ARGB To NV12. // Convert ARGB To NV12.
LIBYUV_API LIBYUV_API
int ARGBToNV12(const uint8* src_argb, int src_stride_argb, int ARGBToNV12(const uint8* src_argb, int src_stride_argb,

View File

@ -18,9 +18,8 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
// TODO(fbarchard): Consider overlapping bits for different architectures.
// Internal flag to indicate cpuid requires initialization. // Internal flag to indicate cpuid requires initialization.
#define kCpuInit 0x1 static const int kCpuInitialized = 0x1;
// These flags are only valid on ARM processors. // These flags are only valid on ARM processors.
static const int kCpuHasARM = 0x2; static const int kCpuHasARM = 0x2;
@ -37,12 +36,12 @@ static const int kCpuHasAVX = 0x200;
static const int kCpuHasAVX2 = 0x400; static const int kCpuHasAVX2 = 0x400;
static const int kCpuHasERMS = 0x800; static const int kCpuHasERMS = 0x800;
static const int kCpuHasFMA3 = 0x1000; static const int kCpuHasFMA3 = 0x1000;
static const int kCpuHasAVX3 = 0x2000;
// 0x2000, 0x4000, 0x8000 reserved for future X86 flags. // 0x2000, 0x4000, 0x8000 reserved for future X86 flags.
// These flags are only valid on MIPS processors. // These flags are only valid on MIPS processors.
static const int kCpuHasMIPS = 0x10000; static const int kCpuHasMIPS = 0x10000;
static const int kCpuHasMIPS_DSP = 0x20000; static const int kCpuHasMIPS_DSPR2 = 0x20000;
static const int kCpuHasMIPS_DSPR2 = 0x40000;
// Internal function used to auto-init. // Internal function used to auto-init.
LIBYUV_API LIBYUV_API
@ -57,7 +56,7 @@ int ArmCpuCaps(const char* cpuinfo_name);
// returns non-zero if instruction set is detected // returns non-zero if instruction set is detected
static __inline int TestCpuFlag(int test_flag) { static __inline int TestCpuFlag(int test_flag) {
LIBYUV_API extern int cpu_info_; LIBYUV_API extern int cpu_info_;
return (cpu_info_ == kCpuInit ? InitCpuFlags() : cpu_info_) & test_flag; return (!cpu_info_ ? InitCpuFlags() : cpu_info_) & test_flag;
} }
// For testing, allow CPU flags to be disabled. // For testing, allow CPU flags to be disabled.

View File

@ -1,168 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_FORMATCONVERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_FORMATCONVERSION_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Convert Bayer RGB formats to I420.
LIBYUV_API
int BayerBGGRToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int BayerGBRGToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int BayerGRBGToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int BayerRGGBToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Temporary API mapper.
#define BayerRGBToI420(b, bs, f, y, ys, u, us, v, vs, w, h) \
BayerToI420(b, bs, y, ys, u, us, v, vs, w, h, f)
LIBYUV_API
int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height,
uint32 src_fourcc_bayer);
// Convert I420 to Bayer RGB formats.
LIBYUV_API
int I420ToBayerBGGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToBayerGBRG(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToBayerGRBG(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToBayerRGGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
// Temporary API mapper.
#define I420ToBayerRGB(y, ys, u, us, v, vs, b, bs, f, w, h) \
I420ToBayer(y, ys, u, us, v, vs, b, bs, w, h, f)
LIBYUV_API
int I420ToBayer(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height,
uint32 dst_fourcc_bayer);
// Convert Bayer RGB formats to ARGB.
LIBYUV_API
int BayerBGGRToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int BayerGBRGToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int BayerGRBGToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int BayerRGGBToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Temporary API mapper.
#define BayerRGBToARGB(b, bs, f, a, as, w, h) BayerToARGB(b, bs, a, as, w, h, f)
LIBYUV_API
int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height,
uint32 src_fourcc_bayer);
// Converts ARGB to Bayer RGB formats.
LIBYUV_API
int ARGBToBayerBGGR(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height);
LIBYUV_API
int ARGBToBayerGBRG(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height);
LIBYUV_API
int ARGBToBayerGRBG(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height);
LIBYUV_API
int ARGBToBayerRGGB(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height);
// Temporary API mapper.
#define ARGBToBayerRGB(a, as, b, bs, f, w, h) ARGBToBayer(b, bs, a, as, w, h, f)
LIBYUV_API
int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height,
uint32 dst_fourcc_bayer);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_FORMATCONVERSION_H_ NOLINT

View File

@ -45,6 +45,7 @@ int I400ToI400(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
int width, int height); int width, int height);
#define J400ToJ400 I400ToI400
// Copy I422 to I422. // Copy I422 to I422.
#define I422ToI422 I422Copy #define I422ToI422 I422Copy
@ -84,6 +85,18 @@ int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
LIBYUV_API
int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_uv, int dst_stride_uv,
int width, int height);
LIBYUV_API
int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_y, int dst_stride_y,
uint8* dst_uv, int dst_stride_uv,
int width, int height);
// Convert I420 to I400. (calls CopyPlane ignoring u/v). // Convert I420 to I400. (calls CopyPlane ignoring u/v).
LIBYUV_API LIBYUV_API
int I420ToI400(const uint8* src_y, int src_stride_y, int I420ToI400(const uint8* src_y, int src_stride_y,
@ -93,6 +106,7 @@ int I420ToI400(const uint8* src_y, int src_stride_y,
int width, int height); int width, int height);
// Alias // Alias
#define J420ToJ400 I420ToI400
#define I420ToI420Mirror I420Mirror #define I420ToI420Mirror I420Mirror
// I420 mirror. // I420 mirror.
@ -131,13 +145,6 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
uint8* dst_rgb565, int dst_stride_rgb565, uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height); int width, int height);
// Convert NV21 to RGB565.
LIBYUV_API
int NV21ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height);
// I422ToARGB is in convert_argb.h // I422ToARGB is in convert_argb.h
// Convert I422 to BGRA. // Convert I422 to BGRA.
LIBYUV_API LIBYUV_API
@ -163,6 +170,14 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
uint8* dst_rgba, int dst_stride_rgba, uint8* dst_rgba, int dst_stride_rgba,
int width, int height); int width, int height);
// Alias
#define RGB24ToRAW RAWToRGB24
LIBYUV_API
int RAWToRGB24(const uint8* src_raw, int src_stride_raw,
uint8* dst_rgb24, int dst_stride_rgb24,
int width, int height);
// Draw a rectangle into I420. // Draw a rectangle into I420.
LIBYUV_API LIBYUV_API
int I420Rect(uint8* dst_y, int dst_stride_y, int I420Rect(uint8* dst_y, int dst_stride_y,
@ -267,13 +282,13 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// Copy ARGB to ARGB. // Copy Alpha channel of ARGB to alpha of ARGB.
LIBYUV_API LIBYUV_API
int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// Copy ARGB to ARGB. // Copy Y channel to Alpha of ARGB.
LIBYUV_API LIBYUV_API
int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
@ -287,6 +302,7 @@ LIBYUV_API
ARGBBlendRow GetARGBBlend(); ARGBBlendRow GetARGBBlend();
// Alpha Blend ARGB images and store to destination. // Alpha Blend ARGB images and store to destination.
// Source is pre-multiplied by alpha using ARGBAttenuate.
// Alpha of destination is set to 255. // Alpha of destination is set to 255.
LIBYUV_API LIBYUV_API
int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
@ -294,6 +310,31 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// Alpha Blend plane and store to destination.
// Source is not pre-multiplied by alpha.
LIBYUV_API
int BlendPlane(const uint8* src_y0, int src_stride_y0,
const uint8* src_y1, int src_stride_y1,
const uint8* alpha, int alpha_stride,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Alpha Blend YUV images and store to destination.
// Source is not pre-multiplied by alpha.
// Alpha is full width x height and subsampled to half size to apply to UV.
LIBYUV_API
int I420Blend(const uint8* src_y0, int src_stride_y0,
const uint8* src_u0, int src_stride_u0,
const uint8* src_v0, int src_stride_v0,
const uint8* src_y1, int src_stride_y1,
const uint8* src_u1, int src_stride_u1,
const uint8* src_v1, int src_stride_v1,
const uint8* alpha, int alpha_stride,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255. // Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
LIBYUV_API LIBYUV_API
int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0, int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
@ -375,36 +416,57 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height, uint32 value); int width, int height, uint32 value);
// Interpolate between two ARGB images using specified amount of interpolation // Interpolate between two images using specified amount of interpolation
// (0 to 255) and store to destination. // (0 to 255) and store to destination.
// 'interpolation' is specified as 8 bit fraction where 0 means 100% src_argb0 // 'interpolation' is specified as 8 bit fraction where 0 means 100% src0
// and 255 means 1% src_argb0 and 99% src_argb1. // and 255 means 1% src0 and 99% src1.
// Internally uses ARGBScale bilinear filtering. LIBYUV_API
// Caveat: This function will write up to 16 bytes beyond the end of dst_argb. int InterpolatePlane(const uint8* src0, int src_stride0,
const uint8* src1, int src_stride1,
uint8* dst, int dst_stride,
int width, int height, int interpolation);
// Interpolate between two ARGB images using specified amount of interpolation
// Internally calls InterpolatePlane with width * 4 (bpp).
LIBYUV_API LIBYUV_API
int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
const uint8* src_argb1, int src_stride_argb1, const uint8* src_argb1, int src_stride_argb1,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height, int interpolation); int width, int height, int interpolation);
#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \ // Interpolate between two YUV images using specified amount of interpolation
defined(TARGET_IPHONE_SIMULATOR) // Internally calls InterpolatePlane on each plane where the U and V planes
// are half width and half height.
LIBYUV_API
int I420Interpolate(const uint8* src0_y, int src0_stride_y,
const uint8* src0_u, int src0_stride_u,
const uint8* src0_v, int src0_stride_v,
const uint8* src1_y, int src1_stride_y,
const uint8* src1_u, int src1_stride_u,
const uint8* src1_v, int src1_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height, int interpolation);
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__i386__) && !defined(__SSE2__))
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif #endif
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_ARGBAFFINEROW_SSE2
#endif
// Row functions for copying a pixels from a source with a slope to a row // Row function for copying pixels from a source with a slope to a row
// of destination. Useful for scaling, rotation, mirror, texture mapping. // of destination. Useful for scaling, rotation, mirror, texture mapping.
LIBYUV_API LIBYUV_API
void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width); uint8* dst_argb, const float* uv_dudv, int width);
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
LIBYUV_API LIBYUV_API
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width); uint8* dst_argb, const float* uv_dudv, int width);
#define HAS_ARGBAFFINEROW_SSE2
#endif // LIBYUV_DISABLE_X86
// Shuffle ARGB channel order. e.g. BGRA to ARGB. // Shuffle ARGB channel order. e.g. BGRA to ARGB.
// shuffler is 16 bytes and must be aligned. // shuffler is 16 bytes and must be aligned.

File diff suppressed because it is too large Load Diff

View File

@ -34,6 +34,7 @@ void ScalePlane(const uint8* src, int src_stride,
int dst_width, int dst_height, int dst_width, int dst_height,
enum FilterMode filtering); enum FilterMode filtering);
LIBYUV_API
void ScalePlane_16(const uint16* src, int src_stride, void ScalePlane_16(const uint16* src, int src_stride,
int src_width, int src_height, int src_width, int src_height,
uint16* dst, int dst_stride, uint16* dst, int dst_stride,

View File

@ -35,7 +35,6 @@ int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
int clip_x, int clip_y, int clip_width, int clip_height, int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering); enum FilterMode filtering);
// TODO(fbarchard): Implement this.
// Scale with YUV conversion to ARGB and clipping. // Scale with YUV conversion to ARGB and clipping.
LIBYUV_API LIBYUV_API
int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y, int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,

View File

@ -12,53 +12,79 @@
#define INCLUDE_LIBYUV_SCALE_ROW_H_ #define INCLUDE_LIBYUV_SCALE_ROW_H_
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/scale.h"
#ifdef __cplusplus #ifdef __cplusplus
namespace libyuv { namespace libyuv {
extern "C" { extern "C" {
#endif #endif
#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \ #if defined(__pnacl__) || defined(__CLR_VER) || \
defined(TARGET_IPHONE_SIMULATOR) (defined(__i386__) && !defined(__SSE2__))
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif #endif
// GCC >= 4.7.0 required for AVX2.
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
#define GCC_HAS_AVX2 1
#endif // GNUC >= 4.7
#endif // __GNUC__
// clang >= 3.4.0 required for AVX2.
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
#define CLANG_HAS_AVX2 1
#endif // clang >= 3.4
#endif // __clang__
// Visual C 2012 required for AVX2.
#if defined(_M_IX86) && !defined(__clang__) && \
defined(_MSC_VER) && _MSC_VER >= 1700
#define VISUALC_HAS_AVX2 1
#endif // VisualStudio >= 2012
// The following are available on all x86 platforms: // The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \ #if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_SCALEROWDOWN2_SSE2 #define HAS_FIXEDDIV1_X86
#define HAS_SCALEROWDOWN4_SSE2 #define HAS_FIXEDDIV_X86
#define HAS_SCALEROWDOWN34_SSSE3 #define HAS_SCALEARGBCOLS_SSE2
#define HAS_SCALEROWDOWN38_SSSE3 #define HAS_SCALEARGBCOLSUP2_SSE2
#define HAS_SCALEADDROWS_SSE2 #define HAS_SCALEARGBFILTERCOLS_SSSE3
#define HAS_SCALEFILTERCOLS_SSSE3
#define HAS_SCALECOLSUP2_SSE2
#define HAS_SCALEARGBROWDOWN2_SSE2 #define HAS_SCALEARGBROWDOWN2_SSE2
#define HAS_SCALEARGBROWDOWNEVEN_SSE2 #define HAS_SCALEARGBROWDOWNEVEN_SSE2
#define HAS_SCALEARGBCOLS_SSE2 #define HAS_SCALECOLSUP2_SSE2
#define HAS_SCALEARGBFILTERCOLS_SSSE3 #define HAS_SCALEFILTERCOLS_SSSE3
#define HAS_SCALEARGBCOLSUP2_SSE2 #define HAS_SCALEROWDOWN2_SSSE3
#define HAS_FIXEDDIV_X86 #define HAS_SCALEROWDOWN34_SSSE3
#define HAS_FIXEDDIV1_X86 #define HAS_SCALEROWDOWN38_SSSE3
#define HAS_SCALEROWDOWN4_SSSE3
#define HAS_SCALEADDROW_SSE2
#endif
// The following are available on all x86 platforms, but
// require VS2012, clang 3.4 or gcc 4.7.
// The code supports NaCL but requires a new compiler and validator.
#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
#define HAS_SCALEADDROW_AVX2
#define HAS_SCALEROWDOWN2_AVX2
#define HAS_SCALEROWDOWN4_AVX2
#endif #endif
// The following are available on Neon platforms: // The following are available on Neon platforms:
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON)) (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SCALEROWDOWN2_NEON #define HAS_SCALEARGBCOLS_NEON
#define HAS_SCALEROWDOWN4_NEON
#define HAS_SCALEROWDOWN34_NEON
#define HAS_SCALEROWDOWN38_NEON
#define HAS_SCALEARGBROWDOWNEVEN_NEON
#define HAS_SCALEARGBROWDOWN2_NEON
#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
(defined(__aarch64__) || defined(LIBYUV_NEON))
#define HAS_SCALEROWDOWN2_NEON
#define HAS_SCALEROWDOWN4_NEON
#define HAS_SCALEROWDOWN34_NEON
#define HAS_SCALEROWDOWN38_NEON
#define HAS_SCALEARGBROWDOWN2_NEON #define HAS_SCALEARGBROWDOWN2_NEON
#define HAS_SCALEARGBROWDOWNEVEN_NEON #define HAS_SCALEARGBROWDOWNEVEN_NEON
#define HAS_SCALEFILTERCOLS_NEON
#define HAS_SCALEROWDOWN2_NEON
#define HAS_SCALEROWDOWN34_NEON
#define HAS_SCALEROWDOWN38_NEON
#define HAS_SCALEROWDOWN4_NEON
#define HAS_SCALEARGBFILTERCOLS_NEON
#endif #endif
// The following are available on Mips platforms: // The following are available on Mips platforms:
@ -172,10 +198,8 @@ void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int dst_width); uint16* dst_ptr, int dst_width);
void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width);
uint16* dst_ptr, int src_width, int src_height); void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width);
void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint32* dst_ptr, int src_width, int src_height);
void ScaleARGBRowDown2_C(const uint8* src_argb, void ScaleARGBRowDown2_C(const uint8* src_argb,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width); uint8* dst_argb, int dst_width);
@ -202,25 +226,28 @@ void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb, void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx); int dst_width, int x, int dx);
void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, // Specialized scalers for x86.
void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr, void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr, void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
@ -237,46 +264,124 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, uint8* dst_ptr, int dst_width);
int src_height); void ScaleRowDown2Linear_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_1_Box_Any_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_0_Box_Any_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_3_Box_Any_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_2_Box_Any_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleAddRow_Any_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleAddRow_Any_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx); int dst_width, int x, int dx);
void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr, void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx); int dst_width, int x, int dx);
void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width); // ARGB Column functions
void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx); int dst_width, int x, int dx);
void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx); int dst_width, int x, int dx);
void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb, void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx); int dst_width, int x, int dx);
// Row functions. void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBFilterCols_Any_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBCols_Any_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
// ARGB Row functions
void ScaleARGBRowDown2_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleARGBRowDown2_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Linear_Any_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Box_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleARGBRowDown2Linear_Any_NEON(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx, uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride, void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx, int src_stepx,
uint8* dst_argb, int dst_width); uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride, void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx, int src_stepx,
uint8* dst_argb, int dst_width); uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleARGBRowDownEven_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst, int dst_width); int src_stepx,
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_argb, int dst_width);
uint8* dst, int dst_width); void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEven_Any_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_Any_NEON(const uint8* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
// ScaleRowDown2Box also used by planar functions // ScaleRowDown2Box also used by planar functions
// NEON downscalers with interpolation. // NEON downscalers with interpolation.
@ -284,7 +389,8 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
// Note - not static due to reuse in convert for 444 to 420. // Note - not static due to reuse in convert for 444 to 420.
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width); uint8* dst, int dst_width);
void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width); uint8* dst, int dst_width);
@ -319,6 +425,42 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown4_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_0_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_1_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
// 32 -> 12
void ScaleRowDown38_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
// 32x3 -> 12x1
void ScaleRowDown38_3_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
// 32x2 -> 12x1
void ScaleRowDown38_2_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleAddRow_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleAddRow_Any_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
void ScaleFilterCols_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width); uint8* dst, int dst_width);
void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1074 #define LIBYUV_VERSION 1561
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT

View File

@ -62,7 +62,7 @@ enum FourCC {
// 2 Secondary YUV formats: row biplanar. // 2 Secondary YUV formats: row biplanar.
FOURCC_M420 = FOURCC('M', '4', '2', '0'), FOURCC_M420 = FOURCC('M', '4', '2', '0'),
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated.
// 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp. // 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'), FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
@ -75,7 +75,7 @@ enum FourCC {
FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE. FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE.
FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE. FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE.
// 4 Secondary RGB formats: 4 Bayer Patterns. // 4 Secondary RGB formats: 4 Bayer Patterns. deprecated.
FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'), FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'), FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'), FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
@ -90,7 +90,8 @@ enum FourCC {
FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'), FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420. FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
FOURCC_J420 = FOURCC('J', '4', '2', '0'), FOURCC_J420 = FOURCC('J', '4', '2', '0'),
FOURCC_J400 = FOURCC('J', '4', '0', '0'), FOURCC_J400 = FOURCC('J', '4', '0', '0'), // unofficial fourcc
FOURCC_H420 = FOURCC('H', '4', '2', '0'), // unofficial fourcc
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc. // 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420. FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
@ -150,6 +151,7 @@ enum FourCCBpp {
FOURCC_BPP_YU12 = 12, FOURCC_BPP_YU12 = 12,
FOURCC_BPP_J420 = 12, FOURCC_BPP_J420 = 12,
FOURCC_BPP_J400 = 8, FOURCC_BPP_J400 = 8,
FOURCC_BPP_H420 = 12,
FOURCC_BPP_MJPG = 0, // 0 means unknown. FOURCC_BPP_MJPG = 0, // 0 means unknown.
FOURCC_BPP_H264 = 0, FOURCC_BPP_H264 = 0,
FOURCC_BPP_IYUV = 12, FOURCC_BPP_IYUV = 12,

View File

@ -17,38 +17,23 @@
#endif #endif
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
#include "libyuv/row.h" #include "libyuv/row.h"
#include "libyuv/video_common.h"
#ifdef __cplusplus #ifdef __cplusplus
namespace libyuv { namespace libyuv {
extern "C" { extern "C" {
#endif #endif
// hash seed of 5381 recommended.
// Internal C version of HashDjb2 with int sized count for efficiency.
uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
// This module is for Visual C x86
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || \
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
#define HAS_HASHDJB2_SSE41
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
#if _MSC_VER >= 1700
#define HAS_HASHDJB2_AVX2
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
#endif
#endif // HAS_HASHDJB2_SSE41
// hash seed of 5381 recommended. // hash seed of 5381 recommended.
LIBYUV_API LIBYUV_API
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
const int kBlockSize = 1 << 15; // 32768; const int kBlockSize = 1 << 15; // 32768;
int remainder; int remainder;
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C; uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) =
HashDjb2_C;
#if defined(HAS_HASHDJB2_SSE41) #if defined(HAS_HASHDJB2_SSE41)
if (TestCpuFlag(kCpuHasSSE41)) { if (TestCpuFlag(kCpuHasSSE41)) {
HashDjb2_SSE = HashDjb2_SSE41; HashDjb2_SSE = HashDjb2_SSE41;
@ -78,22 +63,53 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
return seed; return seed;
} }
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count); static uint32 ARGBDetectRow_C(const uint8* argb, int width) {
#if !defined(LIBYUV_DISABLE_NEON) && \ int x;
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) for (x = 0; x < width - 1; x += 2) {
#define HAS_SUMSQUAREERROR_NEON if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB.
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count); return FOURCC_BGRA;
#endif }
#if !defined(LIBYUV_DISABLE_X86) && \ if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA.
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) return FOURCC_ARGB;
#define HAS_SUMSQUAREERROR_SSE2 }
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count); if (argb[4] != 255) { // Second pixel first byte is not Alpha of 255.
#endif return FOURCC_BGRA;
// Visual C 2012 required for AVX2. }
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && _MSC_VER >= 1700 if (argb[7] != 255) { // Second pixel 4th byte is not Alpha of 255.
#define HAS_SUMSQUAREERROR_AVX2 return FOURCC_ARGB;
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count); }
#endif argb += 8;
}
if (width & 1) {
if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB.
return FOURCC_BGRA;
}
if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA.
return FOURCC_ARGB;
}
}
return 0;
}
// Scan an opaque argb image and return fourcc based on alpha offset.
// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
LIBYUV_API
uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) {
uint32 fourcc = 0;
int h;
// Coalesce rows.
if (stride_argb == width * 4) {
width *= height;
height = 1;
stride_argb = 0;
}
for (h = 0; h < height && fourcc == 0; ++h) {
fourcc = ARGBDetectRow_C(argb, width);
argb += stride_argb;
}
return fourcc;
}
// TODO(fbarchard): Refactor into row function. // TODO(fbarchard): Refactor into row function.
LIBYUV_API LIBYUV_API
@ -114,8 +130,7 @@ uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
} }
#endif #endif
#if defined(HAS_SUMSQUAREERROR_SSE2) #if defined(HAS_SUMSQUAREERROR_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2)) {
IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) {
// Note only used for multiples of 16 so count is not checked. // Note only used for multiples of 16 so count is not checked.
SumSquareError = SumSquareError_SSE2; SumSquareError = SumSquareError_SSE2;
} }

View File

@ -10,6 +10,8 @@
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#ifdef __cplusplus #ifdef __cplusplus
namespace libyuv { namespace libyuv {
extern "C" { extern "C" {

View File

@ -9,6 +9,8 @@
*/ */
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h" #include "libyuv/row.h"
#ifdef __cplusplus #ifdef __cplusplus
@ -16,7 +18,8 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
!defined(__aarch64__)
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
volatile uint32 sse; volatile uint32 sse;
@ -26,7 +29,6 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
"vmov.u8 q9, #0 \n" "vmov.u8 q9, #0 \n"
"vmov.u8 q11, #0 \n" "vmov.u8 q11, #0 \n"
".p2align 2 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n" "vld1.8 {q0}, [%0]! \n"
@ -56,46 +58,7 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
return sse; return sse;
} }
#elif !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #endif // defined(__ARM_NEON__) && !defined(__aarch64__)
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
volatile uint32 sse;
asm volatile (
"eor v16.16b, v16.16b, v16.16b \n"
"eor v18.16b, v18.16b, v18.16b \n"
"eor v17.16b, v17.16b, v17.16b \n"
"eor v19.16b, v19.16b, v19.16b \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b}, [%0], #16 \n"
MEMACCESS(1)
"ld1 {v1.16b}, [%1], #16 \n"
"subs %2, %2, #16 \n"
"usubl v2.8h, v0.8b, v1.8b \n"
"usubl2 v3.8h, v0.16b, v1.16b \n"
"smlal v16.4s, v2.4h, v2.4h \n"
"smlal v17.4s, v3.4h, v3.4h \n"
"smlal2 v18.4s, v2.8h, v2.8h \n"
"smlal2 v19.4s, v3.8h, v3.8h \n"
"bgt 1b \n"
"add v16.4s, v16.4s, v17.4s \n"
"add v18.4s, v18.4s, v19.4s \n"
"add v19.4s, v16.4s, v18.4s \n"
"addv s0, v19.4s \n"
"fmov %w3, s0 \n"
: "+r"(src_a),
"+r"(src_b),
"+r"(count),
"=r"(sse)
:
: "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
return sse;
}
#endif // __ARM_NEON__
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"

View File

@ -1,158 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
uint32 sse;
asm volatile ( // NOLINT
"pxor %%xmm0,%%xmm0 \n"
"pxor %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
"movdqa " MEMACCESS(0) ",%%xmm1 \n"
"lea " MEMLEA(0x10, 0) ",%0 \n"
"movdqa " MEMACCESS(1) ",%%xmm2 \n"
"lea " MEMLEA(0x10, 1) ",%1 \n"
"sub $0x10,%2 \n"
"movdqa %%xmm1,%%xmm3 \n"
"psubusb %%xmm2,%%xmm1 \n"
"psubusb %%xmm3,%%xmm2 \n"
"por %%xmm2,%%xmm1 \n"
"movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm5,%%xmm1 \n"
"punpckhbw %%xmm5,%%xmm2 \n"
"pmaddwd %%xmm1,%%xmm1 \n"
"pmaddwd %%xmm2,%%xmm2 \n"
"paddd %%xmm1,%%xmm0 \n"
"paddd %%xmm2,%%xmm0 \n"
"jg 1b \n"
"pshufd $0xee,%%xmm0,%%xmm1 \n"
"paddd %%xmm1,%%xmm0 \n"
"pshufd $0x1,%%xmm0,%%xmm1 \n"
"paddd %%xmm1,%%xmm0 \n"
"movd %%xmm0,%3 \n"
: "+r"(src_a), // %0
"+r"(src_b), // %1
"+r"(count), // %2
"=g"(sse) // %3
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif
); // NOLINT
return sse;
}
#endif // defined(__x86_64__) || defined(__i386__)
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
#define HAS_HASHDJB2_SSE41
static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
static uvec32 kHashMul0 = {
0x0c3525e1, // 33 ^ 15
0xa3476dc1, // 33 ^ 14
0x3b4039a1, // 33 ^ 13
0x4f5f0981, // 33 ^ 12
};
static uvec32 kHashMul1 = {
0x30f35d61, // 33 ^ 11
0x855cb541, // 33 ^ 10
0x040a9121, // 33 ^ 9
0x747c7101, // 33 ^ 8
};
static uvec32 kHashMul2 = {
0xec41d4e1, // 33 ^ 7
0x4cfa3cc1, // 33 ^ 6
0x025528a1, // 33 ^ 5
0x00121881, // 33 ^ 4
};
static uvec32 kHashMul3 = {
0x00008c61, // 33 ^ 3
0x00000441, // 33 ^ 2
0x00000021, // 33 ^ 1
0x00000001, // 33 ^ 0
};
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
uint32 hash;
asm volatile ( // NOLINT
"movd %2,%%xmm0 \n"
"pxor %%xmm7,%%xmm7 \n"
"movdqa %4,%%xmm6 \n"
LABELALIGN
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm1 \n"
"lea " MEMLEA(0x10, 0) ",%0 \n"
"pmulld %%xmm6,%%xmm0 \n"
"movdqa %5,%%xmm5 \n"
"movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm7,%%xmm2 \n"
"movdqa %%xmm2,%%xmm3 \n"
"punpcklwd %%xmm7,%%xmm3 \n"
"pmulld %%xmm5,%%xmm3 \n"
"movdqa %6,%%xmm5 \n"
"movdqa %%xmm2,%%xmm4 \n"
"punpckhwd %%xmm7,%%xmm4 \n"
"pmulld %%xmm5,%%xmm4 \n"
"movdqa %7,%%xmm5 \n"
"punpckhbw %%xmm7,%%xmm1 \n"
"movdqa %%xmm1,%%xmm2 \n"
"punpcklwd %%xmm7,%%xmm2 \n"
"pmulld %%xmm5,%%xmm2 \n"
"movdqa %8,%%xmm5 \n"
"punpckhwd %%xmm7,%%xmm1 \n"
"pmulld %%xmm5,%%xmm1 \n"
"paddd %%xmm4,%%xmm3 \n"
"paddd %%xmm2,%%xmm1 \n"
"sub $0x10,%1 \n"
"paddd %%xmm3,%%xmm1 \n"
"pshufd $0xe,%%xmm1,%%xmm2 \n"
"paddd %%xmm2,%%xmm1 \n"
"pshufd $0x1,%%xmm1,%%xmm2 \n"
"paddd %%xmm2,%%xmm1 \n"
"paddd %%xmm1,%%xmm0 \n"
"jg 1b \n"
"movd %%xmm0,%3 \n"
: "+r"(src), // %0
"+r"(count), // %1
"+rm"(seed), // %2
"=g"(hash) // %3
: "m"(kHash16x33), // %4
"m"(kHashMul0), // %5
"m"(kHashMul1), // %6
"m"(kHashMul2), // %7
"m"(kHashMul3) // %8
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
#endif
); // NOLINT
return hash;
}
#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -9,6 +9,8 @@
*/ */
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h" #include "libyuv/row.h"
#ifdef __cplusplus #ifdef __cplusplus
@ -16,9 +18,10 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) // This module is for 32 bit Visual C x86 and clangcl
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
__declspec(naked) __declspec(align(16)) __declspec(naked)
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
__asm { __asm {
mov eax, [esp + 4] // src_a mov eax, [esp + 4] // src_a
@ -27,13 +30,11 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
pxor xmm0, xmm0 pxor xmm0, xmm0
pxor xmm5, xmm5 pxor xmm5, xmm5
align 4
wloop: wloop:
movdqa xmm1, [eax] movdqu xmm1, [eax]
lea eax, [eax + 16] lea eax, [eax + 16]
movdqa xmm2, [edx] movdqu xmm2, [edx]
lea edx, [edx + 16] lea edx, [edx + 16]
sub ecx, 16
movdqa xmm3, xmm1 // abs trick movdqa xmm3, xmm1 // abs trick
psubusb xmm1, xmm2 psubusb xmm1, xmm2
psubusb xmm2, xmm3 psubusb xmm2, xmm3
@ -45,6 +46,7 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
pmaddwd xmm2, xmm2 pmaddwd xmm2, xmm2
paddd xmm0, xmm1 paddd xmm0, xmm1
paddd xmm0, xmm2 paddd xmm0, xmm2
sub ecx, 16
jg wloop jg wloop
pshufd xmm1, xmm0, 0xee pshufd xmm1, xmm0, 0xee
@ -60,7 +62,7 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
#if _MSC_VER >= 1700 #if _MSC_VER >= 1700
// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX. // C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
#pragma warning(disable: 4752) #pragma warning(disable: 4752)
__declspec(naked) __declspec(align(16)) __declspec(naked)
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) { uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
__asm { __asm {
mov eax, [esp + 4] // src_a mov eax, [esp + 4] // src_a
@ -70,12 +72,10 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
vpxor ymm5, ymm5, ymm5 // constant 0 for unpck vpxor ymm5, ymm5, ymm5 // constant 0 for unpck
sub edx, eax sub edx, eax
align 4
wloop: wloop:
vmovdqu ymm1, [eax] vmovdqu ymm1, [eax]
vmovdqu ymm2, [eax + edx] vmovdqu ymm2, [eax + edx]
lea eax, [eax + 32] lea eax, [eax + 32]
sub ecx, 32
vpsubusb ymm3, ymm1, ymm2 // abs difference trick vpsubusb ymm3, ymm1, ymm2 // abs difference trick
vpsubusb ymm2, ymm2, ymm1 vpsubusb ymm2, ymm2, ymm1
vpor ymm1, ymm2, ymm3 vpor ymm1, ymm2, ymm3
@ -85,6 +85,7 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
vpmaddwd ymm1, ymm1, ymm1 vpmaddwd ymm1, ymm1, ymm1
vpaddd ymm0, ymm0, ymm1 vpaddd ymm0, ymm0, ymm1
vpaddd ymm0, ymm0, ymm2 vpaddd ymm0, ymm0, ymm2
sub ecx, 32
jg wloop jg wloop
vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes. vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes.
@ -100,42 +101,33 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
} }
#endif // _MSC_VER >= 1700 #endif // _MSC_VER >= 1700
#define HAS_HASHDJB2_SSE41 uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16 uvec32 kHashMul0 = {
static uvec32 kHashMul0 = {
0x0c3525e1, // 33 ^ 15 0x0c3525e1, // 33 ^ 15
0xa3476dc1, // 33 ^ 14 0xa3476dc1, // 33 ^ 14
0x3b4039a1, // 33 ^ 13 0x3b4039a1, // 33 ^ 13
0x4f5f0981, // 33 ^ 12 0x4f5f0981, // 33 ^ 12
}; };
static uvec32 kHashMul1 = { uvec32 kHashMul1 = {
0x30f35d61, // 33 ^ 11 0x30f35d61, // 33 ^ 11
0x855cb541, // 33 ^ 10 0x855cb541, // 33 ^ 10
0x040a9121, // 33 ^ 9 0x040a9121, // 33 ^ 9
0x747c7101, // 33 ^ 8 0x747c7101, // 33 ^ 8
}; };
static uvec32 kHashMul2 = { uvec32 kHashMul2 = {
0xec41d4e1, // 33 ^ 7 0xec41d4e1, // 33 ^ 7
0x4cfa3cc1, // 33 ^ 6 0x4cfa3cc1, // 33 ^ 6
0x025528a1, // 33 ^ 5 0x025528a1, // 33 ^ 5
0x00121881, // 33 ^ 4 0x00121881, // 33 ^ 4
}; };
static uvec32 kHashMul3 = { uvec32 kHashMul3 = {
0x00008c61, // 33 ^ 3 0x00008c61, // 33 ^ 3
0x00000441, // 33 ^ 2 0x00000441, // 33 ^ 2
0x00000021, // 33 ^ 1 0x00000021, // 33 ^ 1
0x00000001, // 33 ^ 0 0x00000001, // 33 ^ 0
}; };
// 27: 66 0F 38 40 C6 pmulld xmm0,xmm6 __declspec(naked)
// 44: 66 0F 38 40 DD pmulld xmm3,xmm5
// 59: 66 0F 38 40 E5 pmulld xmm4,xmm5
// 72: 66 0F 38 40 D5 pmulld xmm2,xmm5
// 83: 66 0F 38 40 CD pmulld xmm1,xmm5
#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \
_asm _emit 0x40 _asm _emit reg
__declspec(naked) __declspec(align(16))
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
__asm { __asm {
mov eax, [esp + 4] // src mov eax, [esp + 4] // src
@ -143,34 +135,32 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
movd xmm0, [esp + 12] // seed movd xmm0, [esp + 12] // seed
pxor xmm7, xmm7 // constant 0 for unpck pxor xmm7, xmm7 // constant 0 for unpck
movdqa xmm6, kHash16x33 movdqa xmm6, xmmword ptr kHash16x33
align 4
wloop: wloop:
movdqu xmm1, [eax] // src[0-15] movdqu xmm1, [eax] // src[0-15]
lea eax, [eax + 16] lea eax, [eax + 16]
pmulld(0xc6) // pmulld xmm0,xmm6 hash *= 33 ^ 16 pmulld xmm0, xmm6 // hash *= 33 ^ 16
movdqa xmm5, kHashMul0 movdqa xmm5, xmmword ptr kHashMul0
movdqa xmm2, xmm1 movdqa xmm2, xmm1
punpcklbw xmm2, xmm7 // src[0-7] punpcklbw xmm2, xmm7 // src[0-7]
movdqa xmm3, xmm2 movdqa xmm3, xmm2
punpcklwd xmm3, xmm7 // src[0-3] punpcklwd xmm3, xmm7 // src[0-3]
pmulld(0xdd) // pmulld xmm3, xmm5 pmulld xmm3, xmm5
movdqa xmm5, kHashMul1 movdqa xmm5, xmmword ptr kHashMul1
movdqa xmm4, xmm2 movdqa xmm4, xmm2
punpckhwd xmm4, xmm7 // src[4-7] punpckhwd xmm4, xmm7 // src[4-7]
pmulld(0xe5) // pmulld xmm4, xmm5 pmulld xmm4, xmm5
movdqa xmm5, kHashMul2 movdqa xmm5, xmmword ptr kHashMul2
punpckhbw xmm1, xmm7 // src[8-15] punpckhbw xmm1, xmm7 // src[8-15]
movdqa xmm2, xmm1 movdqa xmm2, xmm1
punpcklwd xmm2, xmm7 // src[8-11] punpcklwd xmm2, xmm7 // src[8-11]
pmulld(0xd5) // pmulld xmm2, xmm5 pmulld xmm2, xmm5
movdqa xmm5, kHashMul3 movdqa xmm5, xmmword ptr kHashMul3
punpckhwd xmm1, xmm7 // src[12-15] punpckhwd xmm1, xmm7 // src[12-15]
pmulld(0xcd) // pmulld xmm1, xmm5 pmulld xmm1, xmm5
paddd xmm3, xmm4 // add 16 results paddd xmm3, xmm4 // add 16 results
paddd xmm1, xmm2 paddd xmm1, xmm2
sub ecx, 16
paddd xmm1, xmm3 paddd xmm1, xmm3
pshufd xmm2, xmm1, 0x0e // upper 2 dwords pshufd xmm2, xmm1, 0x0e // upper 2 dwords
@ -178,6 +168,7 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
pshufd xmm2, xmm1, 0x01 pshufd xmm2, xmm1, 0x01
paddd xmm1, xmm2 paddd xmm1, xmm2
paddd xmm0, xmm1 paddd xmm0, xmm1
sub ecx, 16
jg wloop jg wloop
movd eax, xmm0 // return hash movd eax, xmm0 // return hash
@ -187,44 +178,43 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
// Visual C 2012 required for AVX2. // Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700 #if _MSC_VER >= 1700
__declspec(naked) __declspec(align(16)) __declspec(naked)
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) { uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
__asm { __asm {
mov eax, [esp + 4] // src mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count mov ecx, [esp + 8] // count
movd xmm0, [esp + 12] // seed vmovd xmm0, [esp + 12] // seed
movdqa xmm6, kHash16x33
align 4
wloop: wloop:
vpmovzxbd xmm3, dword ptr [eax] // src[0-3] vpmovzxbd xmm3, [eax] // src[0-3]
pmulld xmm0, xmm6 // hash *= 33 ^ 16 vpmulld xmm0, xmm0, xmmword ptr kHash16x33 // hash *= 33 ^ 16
vpmovzxbd xmm4, dword ptr [eax + 4] // src[4-7] vpmovzxbd xmm4, [eax + 4] // src[4-7]
pmulld xmm3, kHashMul0 vpmulld xmm3, xmm3, xmmword ptr kHashMul0
vpmovzxbd xmm2, dword ptr [eax + 8] // src[8-11] vpmovzxbd xmm2, [eax + 8] // src[8-11]
pmulld xmm4, kHashMul1 vpmulld xmm4, xmm4, xmmword ptr kHashMul1
vpmovzxbd xmm1, dword ptr [eax + 12] // src[12-15] vpmovzxbd xmm1, [eax + 12] // src[12-15]
pmulld xmm2, kHashMul2 vpmulld xmm2, xmm2, xmmword ptr kHashMul2
lea eax, [eax + 16] lea eax, [eax + 16]
pmulld xmm1, kHashMul3 vpmulld xmm1, xmm1, xmmword ptr kHashMul3
paddd xmm3, xmm4 // add 16 results vpaddd xmm3, xmm3, xmm4 // add 16 results
paddd xmm1, xmm2 vpaddd xmm1, xmm1, xmm2
vpaddd xmm1, xmm1, xmm3
vpshufd xmm2, xmm1, 0x0e // upper 2 dwords
vpaddd xmm1, xmm1,xmm2
vpshufd xmm2, xmm1, 0x01
vpaddd xmm1, xmm1, xmm2
vpaddd xmm0, xmm0, xmm1
sub ecx, 16 sub ecx, 16
paddd xmm1, xmm3
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
paddd xmm1, xmm2
pshufd xmm2, xmm1, 0x01
paddd xmm1, xmm2
paddd xmm0, xmm1
jg wloop jg wloop
movd eax, xmm0 // return hash vmovd eax, xmm0 // return hash
vzeroupper
ret ret
} }
} }
#endif // _MSC_VER >= 1700 #endif // _MSC_VER >= 1700
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) #endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -13,7 +13,6 @@
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/convert.h" // For I420Copy #include "libyuv/convert.h" // For I420Copy
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
#include "libyuv/planar_functions.h" #include "libyuv/planar_functions.h"
#include "libyuv/rotate.h" #include "libyuv/rotate.h"
#include "libyuv/scale.h" // For ScalePlane() #include "libyuv/scale.h" // For ScalePlane()
@ -174,14 +173,15 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y,
src_stride_y = src_stride_u = src_stride_v = dst_stride_yuy2 = 0; src_stride_y = src_stride_u = src_stride_v = dst_stride_yuy2 = 0;
} }
#if defined(HAS_I422TOYUY2ROW_SSE2) #if defined(HAS_I422TOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { if (TestCpuFlag(kCpuHasSSE2)) {
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_SSE2; I422ToYUY2Row = I422ToYUY2Row_SSE2;
} }
} }
#elif defined(HAS_I422TOYUY2ROW_NEON) #endif
if (TestCpuFlag(kCpuHasNEON) && width >= 16) { #if defined(HAS_I422TOYUY2ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToYUY2Row = I422ToYUY2Row_Any_NEON; I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_NEON; I422ToYUY2Row = I422ToYUY2Row_NEON;
@ -220,14 +220,15 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y,
dst_stride_yuy2 = -dst_stride_yuy2; dst_stride_yuy2 = -dst_stride_yuy2;
} }
#if defined(HAS_I422TOYUY2ROW_SSE2) #if defined(HAS_I422TOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { if (TestCpuFlag(kCpuHasSSE2)) {
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_SSE2; I422ToYUY2Row = I422ToYUY2Row_SSE2;
} }
} }
#elif defined(HAS_I422TOYUY2ROW_NEON) #endif
if (TestCpuFlag(kCpuHasNEON) && width >= 16) { #if defined(HAS_I422TOYUY2ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToYUY2Row = I422ToYUY2Row_Any_NEON; I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_NEON; I422ToYUY2Row = I422ToYUY2Row_NEON;
@ -280,14 +281,15 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y,
src_stride_y = src_stride_u = src_stride_v = dst_stride_uyvy = 0; src_stride_y = src_stride_u = src_stride_v = dst_stride_uyvy = 0;
} }
#if defined(HAS_I422TOUYVYROW_SSE2) #if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { if (TestCpuFlag(kCpuHasSSE2)) {
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_SSE2; I422ToUYVYRow = I422ToUYVYRow_SSE2;
} }
} }
#elif defined(HAS_I422TOUYVYROW_NEON) #endif
if (TestCpuFlag(kCpuHasNEON) && width >= 16) { #if defined(HAS_I422TOUYVYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToUYVYRow = I422ToUYVYRow_Any_NEON; I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_NEON; I422ToUYVYRow = I422ToUYVYRow_NEON;
@ -326,14 +328,15 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y,
dst_stride_uyvy = -dst_stride_uyvy; dst_stride_uyvy = -dst_stride_uyvy;
} }
#if defined(HAS_I422TOUYVYROW_SSE2) #if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { if (TestCpuFlag(kCpuHasSSE2)) {
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_SSE2; I422ToUYVYRow = I422ToUYVYRow_SSE2;
} }
} }
#elif defined(HAS_I422TOUYVYROW_NEON) #endif
if (TestCpuFlag(kCpuHasNEON) && width >= 16) { #if defined(HAS_I422TOUYVYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToUYVYRow = I422ToUYVYRow_Any_NEON; I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_NEON; I422ToUYVYRow = I422ToUYVYRow_NEON;
@ -397,20 +400,15 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
src_stride_u = src_stride_v = dst_stride_uv = 0; src_stride_u = src_stride_v = dst_stride_uv = 0;
} }
#if defined(HAS_MERGEUVROW_SSE2) #if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2; MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
if (IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUVRow_ = MergeUVRow_SSE2; MergeUVRow_ = MergeUVRow_SSE2;
} }
} }
}
#endif #endif
#if defined(HAS_MERGEUVROW_AVX2) #if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2; MergeUVRow_ = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) { if (IS_ALIGNED(halfwidth, 32)) {
MergeUVRow_ = MergeUVRow_AVX2; MergeUVRow_ = MergeUVRow_AVX2;
@ -418,7 +416,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
} }
#endif #endif
#if defined(HAS_MERGEUVROW_NEON) #if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON; MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_NEON; MergeUVRow_ = MergeUVRow_NEON;
@ -452,185 +450,67 @@ int I420ToNV21(const uint8* src_y, int src_stride_y,
width, height); width, height);
} }
// Convert I420 to ARGB. // Convert I422 to RGBA with matrix
LIBYUV_API static int I420ToRGBAMatrix(const uint8* src_y, int src_stride_y,
int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb, uint8* dst_rgba, int dst_stride_rgba,
const struct YuvConstants* yuvconstants,
int width, int height) { int width, int height) {
int y; int y;
void (*I422ToARGBRow)(const uint8* y_buf, void (*I422ToRGBARow)(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width) = I422ToARGBRow_C; const struct YuvConstants* yuvconstants,
if (!src_y || !src_u || !src_v || !dst_argb || int width) = I422ToRGBARow_C;
if (!src_y || !src_u || !src_v || !dst_rgba ||
width <= 0 || height == 0) { width <= 0 || height == 0) {
return -1; return -1;
} }
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
height = -height; height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
dst_stride_argb = -dst_stride_argb; dst_stride_rgba = -dst_stride_rgba;
} }
#if defined(HAS_I422TOARGBROW_SSSE3) #if defined(HAS_I422TORGBAROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3; I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3; I422ToRGBARow = I422ToRGBARow_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
} }
} }
#endif #endif
#if defined(HAS_I422TOARGBROW_AVX2) #if defined(HAS_I422TORGBAROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 16) { if (TestCpuFlag(kCpuHasAVX2)) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2; I422ToRGBARow = I422ToRGBARow_Any_AVX2;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2; I422ToRGBARow = I422ToRGBARow_AVX2;
} }
} }
#endif #endif
#if defined(HAS_I422TOARGBROW_NEON) #if defined(HAS_I422TORGBAROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGBRow = I422ToARGBRow_Any_NEON; I422ToRGBARow = I422ToRGBARow_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON; I422ToRGBARow = I422ToRGBARow_NEON;
} }
} }
#endif #endif
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2) #if defined(HAS_I422TORGBAROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2; I422ToRGBARow = I422ToRGBARow_MIPS_DSPR2;
} }
#endif #endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, dst_argb, width); I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
dst_argb += dst_stride_argb; dst_rgba += dst_stride_rgba;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to BGRA.
LIBYUV_API
int I420ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_bgra, int dst_stride_bgra,
int width, int height) {
int y;
void (*I422ToBGRARow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToBGRARow_C;
if (!src_y || !src_u || !src_v || !dst_bgra ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
dst_stride_bgra = -dst_stride_bgra;
}
#if defined(HAS_I422TOBGRAROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
I422ToBGRARow = I422ToBGRARow_SSSE3;
}
}
}
#elif defined(HAS_I422TOBGRAROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToBGRARow = I422ToBGRARow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToBGRARow = I422ToBGRARow_NEON;
}
}
#elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
dst_bgra += dst_stride_bgra;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to ABGR.
LIBYUV_API
int I420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height) {
int y;
void (*I422ToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToABGRRow_C;
if (!src_y || !src_u || !src_v || !dst_abgr ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
dst_stride_abgr = -dst_stride_abgr;
}
#if defined(HAS_I422TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
I422ToABGRRow = I422ToABGRRow_SSSE3;
}
}
}
#elif defined(HAS_I422TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToABGRRow = I422ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToABGRRow = I422ToABGRRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y; src_y += src_stride_y;
if (y & 1) { if (y & 1) {
src_u += src_stride_u; src_u += src_stride_u;
@ -647,44 +527,81 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
uint8* dst_rgba, int dst_stride_rgba, uint8* dst_rgba, int dst_stride_rgba,
int width, int height) { int width, int height) {
return I420ToRGBAMatrix(src_y, src_stride_y,
src_u, src_stride_u,
src_v, src_stride_v,
dst_rgba, dst_stride_rgba,
&kYuvI601Constants,
width, height);
}
// Convert I420 to BGRA.
LIBYUV_API
int I420ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_bgra, int dst_stride_bgra,
int width, int height) {
return I420ToRGBAMatrix(src_y, src_stride_y,
src_v, src_stride_v, // Swap U and V
src_u, src_stride_u,
dst_bgra, dst_stride_bgra,
&kYvuI601Constants, // Use Yvu matrix
width, height);
}
// Convert I420 to RGB24 with matrix
static int I420ToRGB24Matrix(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_rgb24, int dst_stride_rgb24,
const struct YuvConstants* yuvconstants,
int width, int height) {
int y; int y;
void (*I422ToRGBARow)(const uint8* y_buf, void (*I422ToRGB24Row)(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width) = I422ToRGBARow_C; const struct YuvConstants* yuvconstants,
if (!src_y || !src_u || !src_v || !dst_rgba || int width) = I422ToRGB24Row_C;
if (!src_y || !src_u || !src_v || !dst_rgb24 ||
width <= 0 || height == 0) { width <= 0 || height == 0) {
return -1; return -1;
} }
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
height = -height; height = -height;
dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba; dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
dst_stride_rgba = -dst_stride_rgba; dst_stride_rgb24 = -dst_stride_rgb24;
} }
#if defined(HAS_I422TORGBAROW_SSSE3) #if defined(HAS_I422TORGB24ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToRGBARow = I422ToRGBARow_Any_SSSE3; I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3; I422ToRGB24Row = I422ToRGB24Row_SSSE3;
if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
I422ToRGBARow = I422ToRGBARow_SSSE3;
} }
} }
#endif
#if defined(HAS_I422TORGB24ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToRGB24Row = I422ToRGB24Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToRGB24Row = I422ToRGB24Row_AVX2;
} }
#elif defined(HAS_I422TORGBAROW_NEON) }
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { #endif
I422ToRGBARow = I422ToRGBARow_Any_NEON; #if defined(HAS_I422TORGB24ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I422ToRGBARow = I422ToRGBARow_NEON; I422ToRGB24Row = I422ToRGB24Row_NEON;
} }
} }
#endif #endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width); I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width);
dst_rgba += dst_stride_rgba; dst_rgb24 += dst_stride_rgb24;
src_y += src_stride_y; src_y += src_stride_y;
if (y & 1) { if (y & 1) {
src_u += src_stride_u; src_u += src_stride_u;
@ -701,48 +618,12 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
uint8* dst_rgb24, int dst_stride_rgb24, uint8* dst_rgb24, int dst_stride_rgb24,
int width, int height) { int width, int height) {
int y; return I420ToRGB24Matrix(src_y, src_stride_y,
void (*I422ToRGB24Row)(const uint8* y_buf, src_u, src_stride_u,
const uint8* u_buf, src_v, src_stride_v,
const uint8* v_buf, dst_rgb24, dst_stride_rgb24,
uint8* rgb_buf, &kYuvI601Constants,
int width) = I422ToRGB24Row_C; width, height);
if (!src_y || !src_u || !src_v || !dst_rgb24 ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
dst_stride_rgb24 = -dst_stride_rgb24;
}
#if defined(HAS_I422TORGB24ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToRGB24Row = I422ToRGB24Row_SSSE3;
}
}
#elif defined(HAS_I422TORGB24ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToRGB24Row = I422ToRGB24Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, width);
dst_rgb24 += dst_stride_rgb24;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
} }
// Convert I420 to RAW. // Convert I420 to RAW.
@ -752,48 +633,12 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
uint8* dst_raw, int dst_stride_raw, uint8* dst_raw, int dst_stride_raw,
int width, int height) { int width, int height) {
int y; return I420ToRGB24Matrix(src_y, src_stride_y,
void (*I422ToRAWRow)(const uint8* y_buf, src_v, src_stride_v, // Swap U and V
const uint8* u_buf, src_u, src_stride_u,
const uint8* v_buf, dst_raw, dst_stride_raw,
uint8* rgb_buf, &kYvuI601Constants, // Use Yvu matrix
int width) = I422ToRAWRow_C; width, height);
if (!src_y || !src_u || !src_v || !dst_raw ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_raw = dst_raw + (height - 1) * dst_stride_raw;
dst_stride_raw = -dst_stride_raw;
}
#if defined(HAS_I422TORAWROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToRAWRow = I422ToRAWRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToRAWRow = I422ToRAWRow_SSSE3;
}
}
#elif defined(HAS_I422TORAWROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToRAWRow = I422ToRAWRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToRAWRow = I422ToRAWRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
I422ToRAWRow(src_y, src_u, src_v, dst_raw, width);
dst_raw += dst_stride_raw;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
} }
// Convert I420 to ARGB1555. // Convert I420 to ARGB1555.
@ -808,6 +653,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) = I422ToARGB1555Row_C; int width) = I422ToARGB1555Row_C;
if (!src_y || !src_u || !src_v || !dst_argb1555 || if (!src_y || !src_u || !src_v || !dst_argb1555 ||
width <= 0 || height == 0) { width <= 0 || height == 0) {
@ -820,14 +666,23 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
dst_stride_argb1555 = -dst_stride_argb1555; dst_stride_argb1555 = -dst_stride_argb1555;
} }
#if defined(HAS_I422TOARGB1555ROW_SSSE3) #if defined(HAS_I422TOARGB1555ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_SSSE3; I422ToARGB1555Row = I422ToARGB1555Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I422ToARGB1555Row = I422ToARGB1555Row_SSSE3; I422ToARGB1555Row = I422ToARGB1555Row_SSSE3;
} }
} }
#elif defined(HAS_I422TOARGB1555ROW_NEON) #endif
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { #if defined(HAS_I422TOARGB1555ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToARGB1555Row = I422ToARGB1555Row_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGB1555ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_NEON; I422ToARGB1555Row = I422ToARGB1555Row_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I422ToARGB1555Row = I422ToARGB1555Row_NEON; I422ToARGB1555Row = I422ToARGB1555Row_NEON;
@ -836,7 +691,8 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
#endif #endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, width); I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, &kYuvI601Constants,
width);
dst_argb1555 += dst_stride_argb1555; dst_argb1555 += dst_stride_argb1555;
src_y += src_stride_y; src_y += src_stride_y;
if (y & 1) { if (y & 1) {
@ -860,6 +716,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) = I422ToARGB4444Row_C; int width) = I422ToARGB4444Row_C;
if (!src_y || !src_u || !src_v || !dst_argb4444 || if (!src_y || !src_u || !src_v || !dst_argb4444 ||
width <= 0 || height == 0) { width <= 0 || height == 0) {
@ -872,14 +729,23 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
dst_stride_argb4444 = -dst_stride_argb4444; dst_stride_argb4444 = -dst_stride_argb4444;
} }
#if defined(HAS_I422TOARGB4444ROW_SSSE3) #if defined(HAS_I422TOARGB4444ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_SSSE3; I422ToARGB4444Row = I422ToARGB4444Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I422ToARGB4444Row = I422ToARGB4444Row_SSSE3; I422ToARGB4444Row = I422ToARGB4444Row_SSSE3;
} }
} }
#elif defined(HAS_I422TOARGB4444ROW_NEON) #endif
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { #if defined(HAS_I422TOARGB4444ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToARGB4444Row = I422ToARGB4444Row_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGB4444ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_NEON; I422ToARGB4444Row = I422ToARGB4444Row_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I422ToARGB4444Row = I422ToARGB4444Row_NEON; I422ToARGB4444Row = I422ToARGB4444Row_NEON;
@ -888,7 +754,8 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
#endif #endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, width); I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, &kYuvI601Constants,
width);
dst_argb4444 += dst_stride_argb4444; dst_argb4444 += dst_stride_argb4444;
src_y += src_stride_y; src_y += src_stride_y;
if (y & 1) { if (y & 1) {
@ -911,6 +778,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) = I422ToRGB565Row_C; int width) = I422ToRGB565Row_C;
if (!src_y || !src_u || !src_v || !dst_rgb565 || if (!src_y || !src_u || !src_v || !dst_rgb565 ||
width <= 0 || height == 0) { width <= 0 || height == 0) {
@ -923,14 +791,23 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
dst_stride_rgb565 = -dst_stride_rgb565; dst_stride_rgb565 = -dst_stride_rgb565;
} }
#if defined(HAS_I422TORGB565ROW_SSSE3) #if defined(HAS_I422TORGB565ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToRGB565Row = I422ToRGB565Row_Any_SSSE3; I422ToRGB565Row = I422ToRGB565Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I422ToRGB565Row = I422ToRGB565Row_SSSE3; I422ToRGB565Row = I422ToRGB565Row_SSSE3;
} }
} }
#elif defined(HAS_I422TORGB565ROW_NEON) #endif
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { #if defined(HAS_I422TORGB565ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToRGB565Row = I422ToRGB565Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToRGB565Row = I422ToRGB565Row_AVX2;
}
}
#endif
#if defined(HAS_I422TORGB565ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToRGB565Row = I422ToRGB565Row_Any_NEON; I422ToRGB565Row = I422ToRGB565Row_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I422ToRGB565Row = I422ToRGB565Row_NEON; I422ToRGB565Row = I422ToRGB565Row_NEON;
@ -939,7 +816,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
#endif #endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, width); I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvI601Constants, width);
dst_rgb565 += dst_stride_rgb565; dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y; src_y += src_stride_y;
if (y & 1) { if (y & 1) {
@ -950,6 +827,118 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
return 0; return 0;
} }
// Ordered 8x8 dither for 888 to 565. Values from 0 to 7.
static const uint8 kDither565_4x4[16] = {
0, 4, 1, 5,
6, 2, 7, 3,
1, 5, 0, 4,
7, 3, 6, 2,
};
// Convert I420 to RGB565 with dithering.
LIBYUV_API
int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_rgb565, int dst_stride_rgb565,
const uint8* dither4x4, int width, int height) {
int y;
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) = I422ToARGBRow_C;
void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
const uint32 dither4, int width) = ARGBToRGB565DitherRow_C;
if (!src_y || !src_u || !src_v || !dst_rgb565 ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
dst_stride_rgb565 = -dst_stride_rgb565;
}
if (!dither4x4) {
dither4x4 = kDither565_4x4;
}
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SSE2;
}
}
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_NEON;
}
}
#endif
{
// Allocate a row of argb.
align_buffer_64(row_argb, width * 4);
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width);
ARGBToRGB565DitherRow(row_argb, dst_rgb565,
*(uint32*)(dither4x4 + ((y & 3) << 2)), width);
dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
free_aligned_buffer_64(row_argb);
}
return 0;
}
// Convert I420 to specified format // Convert I420 to specified format
LIBYUV_API LIBYUV_API
int ConvertFromI420(const uint8* y, int y_stride, int ConvertFromI420(const uint8* y, int y_stride,
@ -1054,38 +1043,6 @@ int ConvertFromI420(const uint8* y, int y_stride,
dst_sample_stride ? dst_sample_stride : width * 4, dst_sample_stride ? dst_sample_stride : width * 4,
width, height); width, height);
break; break;
case FOURCC_BGGR:
r = I420ToBayerBGGR(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_GBRG:
r = I420ToBayerGBRG(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_GRBG:
r = I420ToBayerGRBG(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_RGGB:
r = I420ToBayerRGGB(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_I400: case FOURCC_I400:
r = I400Copy(y, y_stride, r = I400Copy(y, y_stride,
dst_sample, dst_sample,
@ -1116,7 +1073,7 @@ int ConvertFromI420(const uint8* y, int y_stride,
width, height); width, height);
break; break;
} }
// TODO(fbarchard): Add M420 and Q420. // TODO(fbarchard): Add M420.
// Triplanar formats // Triplanar formats
// TODO(fbarchard): halfstride instead of halfwidth // TODO(fbarchard): halfstride instead of halfwidth
case FOURCC_I420: case FOURCC_I420:

View File

@ -12,7 +12,6 @@
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
#include "libyuv/planar_functions.h" #include "libyuv/planar_functions.h"
#include "libyuv/row.h" #include "libyuv/row.h"
@ -29,10 +28,10 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height) { int width, int height) {
int y; int y;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) = ARGBToUV444Row_C; int width) = ARGBToUV444Row_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -51,17 +50,15 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0; src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
} }
#if defined(HAS_ARGBTOUV444ROW_SSSE3) #if defined(HAS_ARGBTOUV444ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3; ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUV444Row = ARGBToUV444Row_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUV444Row = ARGBToUV444Row_SSSE3; ARGBToUV444Row = ARGBToUV444Row_SSSE3;
} }
} }
} #endif
#elif defined(HAS_ARGBTOUV444ROW_NEON) #if defined(HAS_ARGBTOUV444ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUV444Row = ARGBToUV444Row_Any_NEON; ARGBToUV444Row = ARGBToUV444Row_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
ARGBToUV444Row = ARGBToUV444Row_NEON; ARGBToUV444Row = ARGBToUV444Row_NEON;
@ -69,19 +66,23 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
} }
}
#elif defined(HAS_ARGBTOYROW_NEON) #endif
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { #if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON; ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON; ARGBToYRow = ARGBToYRow_NEON;
@ -109,8 +110,8 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
int width, int height) { int width, int height) {
int y; int y;
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) = ARGBToUV422Row_C; int width) = ARGBToUV422Row_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1; return -1;
@ -130,37 +131,39 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0; src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
} }
#if defined(HAS_ARGBTOUV422ROW_SSSE3) #if defined(HAS_ARGBTOUV422ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3; ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUV422Row = ARGBToUV422Row_SSSE3; ARGBToUV422Row = ARGBToUV422Row_SSSE3;
} }
} }
} #endif
#elif defined(HAS_ARGBTOUV422ROW_NEON) #if defined(HAS_ARGBTOUV422ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) { if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUV422Row = ARGBToUV422Row_Any_NEON; ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_NEON; ARGBToUV422Row = ARGBToUV422Row_NEON;
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
} }
#elif defined(HAS_ARGBTOYROW_NEON) }
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { #endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON; ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON; ARGBToYRow = ARGBToYRow_NEON;
@ -188,8 +191,8 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
int width, int height) { int width, int height) {
int y; int y;
void (*ARGBToUV411Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, void (*ARGBToUV411Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) = ARGBToUV411Row_C; int width) = ARGBToUV411Row_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1; return -1;
@ -209,19 +212,15 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0; src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
} }
#if defined(HAS_ARGBTOYROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_ARGBTOYROW_AVX2) #if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) { if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2;
@ -229,7 +228,7 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_NEON) #if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON; ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON; ARGBToYRow = ARGBToYRow_NEON;
@ -237,7 +236,7 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
} }
#endif #endif
#if defined(HAS_ARGBTOUV411ROW_NEON) #if defined(HAS_ARGBTOUV411ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 32) { if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUV411Row = ARGBToUV411Row_Any_NEON; ARGBToUV411Row = ARGBToUV411Row_Any_NEON;
if (IS_ALIGNED(width, 32)) { if (IS_ALIGNED(width, 32)) {
ARGBToUV411Row = ARGBToUV411Row_NEON; ARGBToUV411Row = ARGBToUV411Row_NEON;
@ -265,7 +264,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
int halfwidth = (width + 1) >> 1; int halfwidth = (width + 1) >> 1;
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUVRow_C; int width) = MergeUVRow_C;
@ -281,22 +280,27 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
src_stride_argb = -src_stride_argb; src_stride_argb = -src_stride_argb;
} }
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToUVRow = ARGBToUVRow_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
#endif
#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2;
} }
} }
#elif defined(HAS_ARGBTOYROW_NEON) #endif
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { #if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON; ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON; ARGBToYRow = ARGBToYRow_NEON;
@ -304,7 +308,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
} }
#endif #endif
#if defined(HAS_ARGBTOUVROW_NEON) #if defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) { if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON; ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON; ARGBToUVRow = ARGBToUVRow_NEON;
@ -312,18 +316,15 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
} }
#endif #endif
#if defined(HAS_MERGEUVROW_SSE2) #if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2; MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUVRow_ = MergeUVRow_SSE2; MergeUVRow_ = MergeUVRow_SSE2;
} }
} }
}
#endif #endif
#if defined(HAS_MERGEUVROW_AVX2) #if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2; MergeUVRow_ = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) { if (IS_ALIGNED(halfwidth, 32)) {
MergeUVRow_ = MergeUVRow_AVX2; MergeUVRow_ = MergeUVRow_AVX2;
@ -331,7 +332,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
} }
#endif #endif
#if defined(HAS_MERGEUVROW_NEON) #if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON; MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_NEON; MergeUVRow_ = MergeUVRow_NEON;
@ -340,8 +341,8 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
#endif #endif
{ {
// Allocate a rows of uv. // Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2); align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8* row_v = row_u + ((halfwidth + 15) & ~15); uint8* row_v = row_u + ((halfwidth + 31) & ~31);
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
@ -372,7 +373,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
int halfwidth = (width + 1) >> 1; int halfwidth = (width + 1) >> 1;
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUVRow_C; int width) = MergeUVRow_C;
@ -388,22 +389,27 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
src_stride_argb = -src_stride_argb; src_stride_argb = -src_stride_argb;
} }
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToUVRow = ARGBToUVRow_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
#endif
#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2;
} }
} }
#elif defined(HAS_ARGBTOYROW_NEON) #endif
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { #if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON; ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON; ARGBToYRow = ARGBToYRow_NEON;
@ -411,7 +417,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
} }
#endif #endif
#if defined(HAS_ARGBTOUVROW_NEON) #if defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) { if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON; ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON; ARGBToUVRow = ARGBToUVRow_NEON;
@ -419,18 +425,15 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
} }
#endif #endif
#if defined(HAS_MERGEUVROW_SSE2) #if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2; MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUVRow_ = MergeUVRow_SSE2; MergeUVRow_ = MergeUVRow_SSE2;
} }
} }
}
#endif #endif
#if defined(HAS_MERGEUVROW_AVX2) #if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2; MergeUVRow_ = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) { if (IS_ALIGNED(halfwidth, 32)) {
MergeUVRow_ = MergeUVRow_AVX2; MergeUVRow_ = MergeUVRow_AVX2;
@ -438,7 +441,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
} }
#endif #endif
#if defined(HAS_MERGEUVROW_NEON) #if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON; MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_NEON; MergeUVRow_ = MergeUVRow_NEON;
@ -447,8 +450,8 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
#endif #endif
{ {
// Allocate a rows of uv. // Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2); align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8* row_v = row_u + ((halfwidth + 15) & ~15); uint8* row_v = row_u + ((halfwidth + 31) & ~31);
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
@ -476,8 +479,8 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
int width, int height) { int width, int height) {
int y; int y;
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) = ARGBToUV422Row_C; int width) = ARGBToUV422Row_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u, void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_yuy2, int width) = I422ToYUY2Row_C; const uint8* src_v, uint8* dst_yuy2, int width) = I422ToYUY2Row_C;
@ -500,17 +503,15 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_yuy2 = 0; src_stride_argb = dst_stride_yuy2 = 0;
} }
#if defined(HAS_ARGBTOUV422ROW_SSSE3) #if defined(HAS_ARGBTOUV422ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3; ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUV422Row = ARGBToUV422Row_SSSE3; ARGBToUV422Row = ARGBToUV422Row_SSSE3;
} }
} }
} #endif
#elif defined(HAS_ARGBTOUV422ROW_NEON) #if defined(HAS_ARGBTOUV422ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) { if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUV422Row = ARGBToUV422Row_Any_NEON; ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_NEON; ARGBToUV422Row = ARGBToUV422Row_NEON;
@ -518,17 +519,23 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
} }
#elif defined(HAS_ARGBTOYROW_NEON) }
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { #endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON; ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON; ARGBToYRow = ARGBToYRow_NEON;
@ -537,14 +544,15 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
#endif #endif
#if defined(HAS_I422TOYUY2ROW_SSE2) #if defined(HAS_I422TOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { if (TestCpuFlag(kCpuHasSSE2)) {
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_SSE2; I422ToYUY2Row = I422ToYUY2Row_SSE2;
} }
} }
#elif defined(HAS_I422TOYUY2ROW_NEON) #endif
if (TestCpuFlag(kCpuHasNEON) && width >= 16) { #if defined(HAS_I422TOYUY2ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToYUY2Row = I422ToYUY2Row_Any_NEON; I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_NEON; I422ToYUY2Row = I422ToYUY2Row_NEON;
@ -578,8 +586,8 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
int width, int height) { int width, int height) {
int y; int y;
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) = ARGBToUV422Row_C; int width) = ARGBToUV422Row_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u, void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_uyvy, int width) = I422ToUYVYRow_C; const uint8* src_v, uint8* dst_uyvy, int width) = I422ToUYVYRow_C;
@ -602,17 +610,15 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_uyvy = 0; src_stride_argb = dst_stride_uyvy = 0;
} }
#if defined(HAS_ARGBTOUV422ROW_SSSE3) #if defined(HAS_ARGBTOUV422ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3; ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUV422Row = ARGBToUV422Row_SSSE3; ARGBToUV422Row = ARGBToUV422Row_SSSE3;
} }
} }
} #endif
#elif defined(HAS_ARGBTOUV422ROW_NEON) #if defined(HAS_ARGBTOUV422ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) { if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUV422Row = ARGBToUV422Row_Any_NEON; ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_NEON; ARGBToUV422Row = ARGBToUV422Row_NEON;
@ -620,17 +626,23 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
} }
#elif defined(HAS_ARGBTOYROW_NEON) }
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { #endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON; ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON; ARGBToYRow = ARGBToYRow_NEON;
@ -639,14 +651,15 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
#endif #endif
#if defined(HAS_I422TOUYVYROW_SSE2) #if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { if (TestCpuFlag(kCpuHasSSE2)) {
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_SSE2; I422ToUYVYRow = I422ToUYVYRow_SSE2;
} }
} }
#elif defined(HAS_I422TOUYVYROW_NEON) #endif
if (TestCpuFlag(kCpuHasNEON) && width >= 16) { #if defined(HAS_I422TOUYVYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToUYVYRow = I422ToUYVYRow_Any_NEON; I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_NEON; I422ToUYVYRow = I422ToUYVYRow_NEON;
@ -679,7 +692,7 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
int width, int height) { int width, int height) {
int y; int y;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
if (!src_argb || !dst_y || width <= 0 || height == 0) { if (!src_argb || !dst_y || width <= 0 || height == 0) {
return -1; return -1;
@ -697,19 +710,15 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_y = 0; src_stride_argb = dst_stride_y = 0;
} }
#if defined(HAS_ARGBTOYROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_ARGBTOYROW_AVX2) #if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) { if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2;
@ -717,7 +726,7 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_NEON) #if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON; ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON; ARGBToYRow = ARGBToYRow_NEON;
@ -755,7 +764,7 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb24, int dst_stride_rgb24, uint8* dst_rgb24, int dst_stride_rgb24,
int width, int height) { int width, int height) {
int y; int y;
void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
ARGBToRGB24Row_C; ARGBToRGB24Row_C;
if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) { if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) {
return -1; return -1;
@ -773,14 +782,15 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_rgb24 = 0; src_stride_argb = dst_stride_rgb24 = 0;
} }
#if defined(HAS_ARGBTORGB24ROW_SSSE3) #if defined(HAS_ARGBTORGB24ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3; ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToRGB24Row = ARGBToRGB24Row_SSSE3; ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
} }
} }
#elif defined(HAS_ARGBTORGB24ROW_NEON) #endif
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { #if defined(HAS_ARGBTORGB24ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON; ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
ARGBToRGB24Row = ARGBToRGB24Row_NEON; ARGBToRGB24Row = ARGBToRGB24Row_NEON;
@ -802,7 +812,7 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
uint8* dst_raw, int dst_stride_raw, uint8* dst_raw, int dst_stride_raw,
int width, int height) { int width, int height) {
int y; int y;
void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) = void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int width) =
ARGBToRAWRow_C; ARGBToRAWRow_C;
if (!src_argb || !dst_raw || width <= 0 || height == 0) { if (!src_argb || !dst_raw || width <= 0 || height == 0) {
return -1; return -1;
@ -820,14 +830,15 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_raw = 0; src_stride_argb = dst_stride_raw = 0;
} }
#if defined(HAS_ARGBTORAWROW_SSSE3) #if defined(HAS_ARGBTORAWROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3; ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToRAWRow = ARGBToRAWRow_SSSE3; ARGBToRAWRow = ARGBToRAWRow_SSSE3;
} }
} }
#elif defined(HAS_ARGBTORAWROW_NEON) #endif
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { #if defined(HAS_ARGBTORAWROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToRAWRow = ARGBToRAWRow_Any_NEON; ARGBToRAWRow = ARGBToRAWRow_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
ARGBToRAWRow = ARGBToRAWRow_NEON; ARGBToRAWRow = ARGBToRAWRow_NEON;
@ -843,13 +854,74 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
return 0; return 0;
} }
// Ordered 8x8 dither for 888 to 565. Values from 0 to 7.
static const uint8 kDither565_4x4[16] = {
0, 4, 1, 5,
6, 2, 7, 3,
1, 5, 0, 4,
7, 3, 6, 2,
};
// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
LIBYUV_API
int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565,
const uint8* dither4x4, int width, int height) {
int y;
void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
const uint32 dither4, int width) = ARGBToRGB565DitherRow_C;
if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
if (!dither4x4) {
dither4x4 = kDither565_4x4;
}
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SSE2;
}
}
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToRGB565DitherRow(src_argb, dst_rgb565,
*(uint32*)(dither4x4 + ((y & 3) << 2)), width);
src_argb += src_stride_argb;
dst_rgb565 += dst_stride_rgb565;
}
return 0;
}
// Convert ARGB To RGB565. // Convert ARGB To RGB565.
// TODO(fbarchard): Consider using dither function low level with zeros.
LIBYUV_API LIBYUV_API
int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565, uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height) { int width, int height) {
int y; int y;
void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
ARGBToRGB565Row_C; ARGBToRGB565Row_C;
if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) { if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
return -1; return -1;
@ -867,15 +939,23 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_rgb565 = 0; src_stride_argb = dst_stride_rgb565 = 0;
} }
#if defined(HAS_ARGBTORGB565ROW_SSE2) #if defined(HAS_ARGBTORGB565ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 4 && if (TestCpuFlag(kCpuHasSSE2)) {
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2; ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) { if (IS_ALIGNED(width, 4)) {
ARGBToRGB565Row = ARGBToRGB565Row_SSE2; ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
} }
} }
#elif defined(HAS_ARGBTORGB565ROW_NEON) #endif
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { #if defined(HAS_ARGBTORGB565ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBToRGB565Row = ARGBToRGB565Row_AVX2;
}
}
#endif
#if defined(HAS_ARGBTORGB565ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_NEON; ARGBToRGB565Row = ARGBToRGB565Row_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
ARGBToRGB565Row = ARGBToRGB565Row_NEON; ARGBToRGB565Row = ARGBToRGB565Row_NEON;
@ -897,7 +977,7 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb1555, int dst_stride_argb1555, uint8* dst_argb1555, int dst_stride_argb1555,
int width, int height) { int width, int height) {
int y; int y;
void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
ARGBToARGB1555Row_C; ARGBToARGB1555Row_C;
if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) { if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) {
return -1; return -1;
@ -915,15 +995,23 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_argb1555 = 0; src_stride_argb = dst_stride_argb1555 = 0;
} }
#if defined(HAS_ARGBTOARGB1555ROW_SSE2) #if defined(HAS_ARGBTOARGB1555ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 4 && if (TestCpuFlag(kCpuHasSSE2)) {
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2; ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) { if (IS_ALIGNED(width, 4)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2; ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
} }
} }
#elif defined(HAS_ARGBTOARGB1555ROW_NEON) #endif
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { #if defined(HAS_ARGBTOARGB1555ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOARGB1555ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_NEON; ARGBToARGB1555Row = ARGBToARGB1555Row_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_NEON; ARGBToARGB1555Row = ARGBToARGB1555Row_NEON;
@ -945,7 +1033,7 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb4444, int dst_stride_argb4444, uint8* dst_argb4444, int dst_stride_argb4444,
int width, int height) { int width, int height) {
int y; int y;
void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
ARGBToARGB4444Row_C; ARGBToARGB4444Row_C;
if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) { if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) {
return -1; return -1;
@ -963,15 +1051,23 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_argb4444 = 0; src_stride_argb = dst_stride_argb4444 = 0;
} }
#if defined(HAS_ARGBTOARGB4444ROW_SSE2) #if defined(HAS_ARGBTOARGB4444ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 4 && if (TestCpuFlag(kCpuHasSSE2)) {
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2; ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) { if (IS_ALIGNED(width, 4)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2; ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2;
} }
} }
#elif defined(HAS_ARGBTOARGB4444ROW_NEON) #endif
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { #if defined(HAS_ARGBTOARGB4444ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOARGB4444ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_Any_NEON; ARGBToARGB4444Row = ARGBToARGB4444Row_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_NEON; ARGBToARGB4444Row = ARGBToARGB4444Row_NEON;
@ -997,7 +1093,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
int y; int y;
void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb, void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C; uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) = void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) =
ARGBToYJRow_C; ARGBToYJRow_C;
if (!src_argb || if (!src_argb ||
!dst_yj || !dst_u || !dst_v || !dst_yj || !dst_u || !dst_v ||
@ -1011,23 +1107,17 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
src_stride_argb = -src_stride_argb; src_stride_argb = -src_stride_argb;
} }
#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3) #if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
ARGBToYJRow = ARGBToYJRow_Any_SSSE3; ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_Unaligned_SSSE3;
ARGBToYJRow = ARGBToYJRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUVJRow = ARGBToUVJRow_SSSE3; ARGBToUVJRow = ARGBToUVJRow_SSSE3;
if (IS_ALIGNED(dst_yj, 16) && IS_ALIGNED(dst_stride_yj, 16)) {
ARGBToYJRow = ARGBToYJRow_SSSE3; ARGBToYJRow = ARGBToYJRow_SSSE3;
} }
} }
}
}
#endif #endif
#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2) #if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2; ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) { if (IS_ALIGNED(width, 32)) {
ARGBToYJRow = ARGBToYJRow_AVX2; ARGBToYJRow = ARGBToYJRow_AVX2;
@ -1035,7 +1125,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
} }
#endif #endif
#if defined(HAS_ARGBTOYJROW_NEON) #if defined(HAS_ARGBTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYJRow = ARGBToYJRow_Any_NEON; ARGBToYJRow = ARGBToYJRow_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_NEON; ARGBToYJRow = ARGBToYJRow_NEON;
@ -1043,7 +1133,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
} }
#endif #endif
#if defined(HAS_ARGBTOUVJROW_NEON) #if defined(HAS_ARGBTOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) { if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVJRow = ARGBToUVJRow_Any_NEON; ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_NEON; ARGBToUVJRow = ARGBToUVJRow_NEON;
@ -1067,13 +1157,95 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
return 0; return 0;
} }
// ARGB little endian (bgra in memory) to J422
LIBYUV_API
int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
int y;
void (*ARGBToUVJ422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int width) = ARGBToUVJ422Row_C;
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYJRow_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
if (src_stride_argb == width * 4 &&
dst_stride_y == width &&
dst_stride_u * 2 == width &&
dst_stride_v * 2 == width) {
width *= height;
height = 1;
src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
}
#if defined(HAS_ARGBTOUVJ422ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJ422Row = ARGBToUVJ422Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJ422Row = ARGBToUVJ422Row_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVJ422ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVJ422Row = ARGBToUVJ422Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJ422Row = ARGBToUVJ422Row_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYJRow = ARGBToYJRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYJRow = ARGBToYJRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToUVJ422Row(src_argb, dst_u, dst_v, width);
ARGBToYJRow(src_argb, dst_y, width);
src_argb += src_stride_argb;
dst_y += dst_stride_y;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
return 0;
}
// Convert ARGB to J400. // Convert ARGB to J400.
LIBYUV_API LIBYUV_API
int ARGBToJ400(const uint8* src_argb, int src_stride_argb, int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
uint8* dst_yj, int dst_stride_yj, uint8* dst_yj, int dst_stride_yj,
int width, int height) { int width, int height) {
int y; int y;
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) = void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) =
ARGBToYJRow_C; ARGBToYJRow_C;
if (!src_argb || !dst_yj || width <= 0 || height == 0) { if (!src_argb || !dst_yj || width <= 0 || height == 0) {
return -1; return -1;
@ -1091,19 +1263,15 @@ int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_yj = 0; src_stride_argb = dst_stride_yj = 0;
} }
#if defined(HAS_ARGBTOYJROW_SSSE3) #if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYJRow = ARGBToYJRow_Any_SSSE3; ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_yj, 16) && IS_ALIGNED(dst_stride_yj, 16)) {
ARGBToYJRow = ARGBToYJRow_SSSE3; ARGBToYJRow = ARGBToYJRow_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_ARGBTOYJROW_AVX2) #if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2; ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) { if (IS_ALIGNED(width, 32)) {
ARGBToYJRow = ARGBToYJRow_AVX2; ARGBToYJRow = ARGBToYJRow_AVX2;
@ -1111,7 +1279,7 @@ int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
} }
#endif #endif
#if defined(HAS_ARGBTOYJROW_NEON) #if defined(HAS_ARGBTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYJRow = ARGBToYJRow_Any_NEON; ARGBToYJRow = ARGBToYJRow_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_NEON; ARGBToYJRow = ARGBToYJRow_NEON;

View File

@ -11,7 +11,6 @@
#include "libyuv/convert_argb.h" #include "libyuv/convert_argb.h"
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
#ifdef HAVE_JPEG #ifdef HAVE_JPEG
#include "libyuv/mjpeg_decoder.h" #include "libyuv/mjpeg_decoder.h"
#endif #endif
@ -144,36 +143,6 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
crop_argb, argb_stride, crop_argb, argb_stride,
crop_width, inv_crop_height); crop_width, inv_crop_height);
break; break;
// TODO(fbarchard): Support cropping Bayer by odd numbers
// by adjusting fourcc.
case FOURCC_BGGR:
src = sample + (src_width * crop_y + crop_x);
r = BayerBGGRToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_GBRG:
src = sample + (src_width * crop_y + crop_x);
r = BayerGBRGToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_GRBG:
src = sample + (src_width * crop_y + crop_x);
r = BayerGRBGToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGGB:
src = sample + (src_width * crop_y + crop_x);
r = BayerRGGBToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_I400: case FOURCC_I400:
src = sample + src_width * crop_y + crop_x; src = sample + src_width * crop_y + crop_x;
r = I400ToARGB(src, src_width, r = I400ToARGB(src, src_width,
@ -205,15 +174,6 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
crop_argb, argb_stride, crop_argb, argb_stride,
crop_width, inv_crop_height); crop_width, inv_crop_height);
break; break;
// case FOURCC_Q420:
// src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x;
// src_uv = sample + (src_width + aligned_src_width * 2) * crop_y +
// src_width + crop_x * 2;
// r = Q420ToARGB(src, src_width * 3,
// src_uv, src_width * 3,
// crop_argb, argb_stride,
// crop_width, inv_crop_height);
// break;
// Triplanar formats // Triplanar formats
case FOURCC_I420: case FOURCC_I420:
case FOURCC_YU12: case FOURCC_YU12:
@ -241,6 +201,25 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
crop_width, inv_crop_height); crop_width, inv_crop_height);
break; break;
} }
case FOURCC_J420: {
const uint8* src_y = sample + (src_width * crop_y + crop_x);
const uint8* src_u;
const uint8* src_v;
int halfwidth = (src_width + 1) / 2;
int halfheight = (abs_src_height + 1) / 2;
src_u = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
src_v = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
r = J420ToARGB(src_y, src_width,
src_u, halfwidth,
src_v, halfwidth,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
}
case FOURCC_I422: case FOURCC_I422:
case FOURCC_YV16: { case FOURCC_YV16: {
const uint8* src_y = sample + src_width * crop_y + crop_x; const uint8* src_y = sample + src_width * crop_y + crop_x;

View File

@ -12,7 +12,6 @@
#include "libyuv/convert.h" #include "libyuv/convert.h"
#include "libyuv/format_conversion.h"
#include "libyuv/video_common.h" #include "libyuv/video_common.h"
#ifdef __cplusplus #ifdef __cplusplus
@ -173,40 +172,6 @@ int ConvertToI420(const uint8* sample,
v, v_stride, v, v_stride,
crop_width, inv_crop_height); crop_width, inv_crop_height);
break; break;
// TODO(fbarchard): Support cropping Bayer by odd numbers
// by adjusting fourcc.
case FOURCC_BGGR:
src = sample + (src_width * crop_y + crop_x);
r = BayerBGGRToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_GBRG:
src = sample + (src_width * crop_y + crop_x);
r = BayerGBRGToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_GRBG:
src = sample + (src_width * crop_y + crop_x);
r = BayerGRBGToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGGB:
src = sample + (src_width * crop_y + crop_x);
r = BayerRGGBToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_I400: case FOURCC_I400:
src = sample + src_width * crop_y + crop_x; src = sample + src_width * crop_y + crop_x;
r = I400ToI420(src, src_width, r = I400ToI420(src, src_width,
@ -218,7 +183,8 @@ int ConvertToI420(const uint8* sample,
// Biplanar formats // Biplanar formats
case FOURCC_NV12: case FOURCC_NV12:
src = sample + (src_width * crop_y + crop_x); src = sample + (src_width * crop_y + crop_x);
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x; src_uv = sample + (src_width * src_height) +
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
r = NV12ToI420Rotate(src, src_width, r = NV12ToI420Rotate(src, src_width,
src_uv, aligned_src_width, src_uv, aligned_src_width,
y, y_stride, y, y_stride,
@ -228,7 +194,8 @@ int ConvertToI420(const uint8* sample,
break; break;
case FOURCC_NV21: case FOURCC_NV21:
src = sample + (src_width * crop_y + crop_x); src = sample + (src_width * crop_y + crop_x);
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x; src_uv = sample + (src_width * src_height) +
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
// Call NV12 but with u and v parameters swapped. // Call NV12 but with u and v parameters swapped.
r = NV12ToI420Rotate(src, src_width, r = NV12ToI420Rotate(src, src_width,
src_uv, aligned_src_width, src_uv, aligned_src_width,
@ -245,17 +212,6 @@ int ConvertToI420(const uint8* sample,
v, v_stride, v, v_stride,
crop_width, inv_crop_height); crop_width, inv_crop_height);
break; break;
case FOURCC_Q420:
src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x;
src_uv = sample + (src_width + aligned_src_width * 2) * crop_y +
src_width + crop_x * 2;
r = Q420ToI420(src, src_width * 3,
src_uv, src_width * 3,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
// Triplanar formats // Triplanar formats
case FOURCC_I420: case FOURCC_I420:
case FOURCC_YU12: case FOURCC_YU12:

View File

@ -14,8 +14,8 @@
#include <intrin.h> // For __cpuidex() #include <intrin.h> // For __cpuidex()
#endif #endif
#if !defined(__pnacl__) && !defined(__CLR_VER) && \ #if !defined(__pnacl__) && !defined(__CLR_VER) && \
!defined(__native_client__) && \ !defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \
defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) defined(_MSC_VER) && !defined(__clang__) && (_MSC_FULL_VER >= 160040219)
#include <immintrin.h> // For _xgetbv() #include <immintrin.h> // For _xgetbv()
#endif #endif
@ -36,19 +36,20 @@ extern "C" {
// For functions that use the stack and have runtime checks for overflow, // For functions that use the stack and have runtime checks for overflow,
// use SAFEBUFFERS to avoid additional check. // use SAFEBUFFERS to avoid additional check.
#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) #if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219)
#define SAFEBUFFERS __declspec(safebuffers) #define SAFEBUFFERS __declspec(safebuffers)
#else #else
#define SAFEBUFFERS #define SAFEBUFFERS
#endif #endif
// Low level cpuid for X86. Returns zeros on other CPUs. // Low level cpuid for X86.
#if !defined(__pnacl__) && !defined(__CLR_VER) && \ #if (defined(_M_IX86) || defined(_M_X64) || \
(defined(_M_IX86) || defined(_M_X64) || \ defined(__i386__) || defined(__x86_64__)) && \
defined(__i386__) || defined(__x86_64__)) !defined(__pnacl__) && !defined(__CLR_VER)
LIBYUV_API LIBYUV_API
void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) { void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
#if defined(_MSC_VER) && !defined(__clang__) #if defined(_MSC_VER) && !defined(__clang__)
// Visual C version uses intrinsic or inline x86 assembly.
#if (_MSC_FULL_VER >= 160040219) #if (_MSC_FULL_VER >= 160040219)
__cpuidex((int*)(cpu_info), info_eax, info_ecx); __cpuidex((int*)(cpu_info), info_eax, info_ecx);
#elif defined(_M_IX86) #elif defined(_M_IX86)
@ -62,16 +63,17 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
mov [edi + 8], ecx mov [edi + 8], ecx
mov [edi + 12], edx mov [edi + 12], edx
} }
#else #else // Visual C but not x86
if (info_ecx == 0) { if (info_ecx == 0) {
__cpuid((int*)(cpu_info), info_eax); __cpuid((int*)(cpu_info), info_eax);
} else { } else {
cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0; cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0;
} }
#endif #endif
#else // defined(_MSC_VER) // GCC version uses inline x86 assembly.
#else // defined(_MSC_VER) && !defined(__clang__)
uint32 info_ebx, info_edx; uint32 info_ebx, info_edx;
asm volatile ( // NOLINT asm volatile (
#if defined( __i386__) && defined(__PIC__) #if defined( __i386__) && defined(__PIC__)
// Preserve ebx for fpic 32 bit. // Preserve ebx for fpic 32 bit.
"mov %%ebx, %%edi \n" "mov %%ebx, %%edi \n"
@ -87,35 +89,47 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
cpu_info[1] = info_ebx; cpu_info[1] = info_ebx;
cpu_info[2] = info_ecx; cpu_info[2] = info_ecx;
cpu_info[3] = info_edx; cpu_info[3] = info_edx;
#endif // defined(_MSC_VER) #endif // defined(_MSC_VER) && !defined(__clang__)
} }
#else // (defined(_M_IX86) || defined(_M_X64) ...
#if !defined(__native_client__)
#define HAS_XGETBV
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
int TestOsSaveYmm() {
uint32 xcr0 = 0u;
#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required.
#elif defined(_M_IX86) && defined(_MSC_VER)
__asm {
xor ecx, ecx // xcr 0
_asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
mov xcr0, eax
}
#elif defined(__i386__) || defined(__x86_64__)
asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
#endif // defined(_MSC_VER)
return((xcr0 & 6) == 6); // Is ymm saved?
}
#endif // !defined(__native_client__)
#else
LIBYUV_API LIBYUV_API
void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) { void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0; cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
} }
#endif #endif
// For VS2010 and earlier emit can be used:
// _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
// __asm {
// xor ecx, ecx // xcr 0
// xgetbv
// mov xcr0, eax
// }
// For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code.
// https://code.google.com/p/libyuv/issues/detail?id=529
#if defined(_M_IX86) && (_MSC_VER < 1900)
#pragma optimize("g", off)
#endif
#if (defined(_M_IX86) || defined(_M_X64) || \
defined(__i386__) || defined(__x86_64__)) && \
!defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
#define HAS_XGETBV
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
int GetXCR0() {
uint32 xcr0 = 0u;
#if (_MSC_FULL_VER >= 160040219)
xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required.
#elif defined(__i386__) || defined(__x86_64__)
asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
#endif // defined(__i386__) || defined(__x86_64__)
return xcr0;
}
#endif // defined(_M_IX86) || defined(_M_X64) ..
// Return optimization to previous setting.
#if defined(_M_IX86) && (_MSC_VER < 1900)
#pragma optimize("g", on)
#endif
// based on libvpx arm_cpudetect.c // based on libvpx arm_cpudetect.c
// For Arm, but public to allow testing on any CPU // For Arm, but public to allow testing on any CPU
LIBYUV_API SAFEBUFFERS LIBYUV_API SAFEBUFFERS
@ -134,36 +148,21 @@ int ArmCpuCaps(const char* cpuinfo_name) {
fclose(f); fclose(f);
return kCpuHasNEON; return kCpuHasNEON;
} }
// aarch64 uses asimd for Neon.
p = strstr(cpuinfo_line, " asimd");
if (p && (p[6] == ' ' || p[6] == '\n')) {
fclose(f);
return kCpuHasNEON;
}
} }
} }
fclose(f); fclose(f);
return 0; return 0;
} }
#if defined(__mips__) && defined(__linux__)
static int MipsCpuCaps(const char* search_string) {
char cpuinfo_line[512];
const char* file_name = "/proc/cpuinfo";
FILE* f = fopen(file_name, "r");
if (!f) {
// Assume DSP if /proc/cpuinfo is unavailable.
// This will occur for Chrome sandbox for Pepper or Render process.
return kCpuHasMIPS_DSP;
}
while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f) != NULL) {
if (strstr(cpuinfo_line, search_string) != NULL) {
fclose(f);
return kCpuHasMIPS_DSP;
}
}
fclose(f);
return 0;
}
#endif
// CPU detect function for SIMD instruction sets. // CPU detect function for SIMD instruction sets.
LIBYUV_API LIBYUV_API
int cpu_info_ = kCpuInit; // cpu_info is not initialized yet. int cpu_info_ = 0; // cpu_info is not initialized yet.
// Test environment variable for disabling CPU features. Any non-zero value // Test environment variable for disabling CPU features. Any non-zero value
// to disable. Zero ignored to make it easy to set the variable on/off. // to disable. Zero ignored to make it easy to set the variable on/off.
@ -186,8 +185,9 @@ static LIBYUV_BOOL TestEnv(const char*) {
LIBYUV_API SAFEBUFFERS LIBYUV_API SAFEBUFFERS
int InitCpuFlags(void) { int InitCpuFlags(void) {
// TODO(fbarchard): swap kCpuInit logic so 0 means uninitialized.
int cpu_info = 0;
#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86) #if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86)
uint32 cpu_info0[4] = { 0, 0, 0, 0 }; uint32 cpu_info0[4] = { 0, 0, 0, 0 };
uint32 cpu_info1[4] = { 0, 0, 0, 0 }; uint32 cpu_info1[4] = { 0, 0, 0, 0 };
uint32 cpu_info7[4] = { 0, 0, 0, 0 }; uint32 cpu_info7[4] = { 0, 0, 0, 0 };
@ -196,7 +196,7 @@ int InitCpuFlags(void) {
if (cpu_info0[0] >= 7) { if (cpu_info0[0] >= 7) {
CpuId(7, 0, cpu_info7); CpuId(7, 0, cpu_info7);
} }
cpu_info_ = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) | cpu_info = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) | ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) | ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
@ -205,83 +205,93 @@ int InitCpuFlags(void) {
kCpuHasX86; kCpuHasX86;
#ifdef HAS_XGETBV #ifdef HAS_XGETBV
if ((cpu_info1[2] & 0x18000000) == 0x18000000 && // AVX and OSSave // AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv
TestOsSaveYmm()) { // Saves YMM. if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave
cpu_info_ |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | ((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers
kCpuHasAVX; cpu_info |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | kCpuHasAVX;
// Detect AVX512bw
if ((GetXCR0() & 0xe0) == 0xe0) {
cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX3 : 0;
}
} }
#endif #endif
// Environment variable overrides for testing. // Environment variable overrides for testing.
if (TestEnv("LIBYUV_DISABLE_X86")) { if (TestEnv("LIBYUV_DISABLE_X86")) {
cpu_info_ &= ~kCpuHasX86; cpu_info &= ~kCpuHasX86;
} }
if (TestEnv("LIBYUV_DISABLE_SSE2")) { if (TestEnv("LIBYUV_DISABLE_SSE2")) {
cpu_info_ &= ~kCpuHasSSE2; cpu_info &= ~kCpuHasSSE2;
} }
if (TestEnv("LIBYUV_DISABLE_SSSE3")) { if (TestEnv("LIBYUV_DISABLE_SSSE3")) {
cpu_info_ &= ~kCpuHasSSSE3; cpu_info &= ~kCpuHasSSSE3;
} }
if (TestEnv("LIBYUV_DISABLE_SSE41")) { if (TestEnv("LIBYUV_DISABLE_SSE41")) {
cpu_info_ &= ~kCpuHasSSE41; cpu_info &= ~kCpuHasSSE41;
} }
if (TestEnv("LIBYUV_DISABLE_SSE42")) { if (TestEnv("LIBYUV_DISABLE_SSE42")) {
cpu_info_ &= ~kCpuHasSSE42; cpu_info &= ~kCpuHasSSE42;
} }
if (TestEnv("LIBYUV_DISABLE_AVX")) { if (TestEnv("LIBYUV_DISABLE_AVX")) {
cpu_info_ &= ~kCpuHasAVX; cpu_info &= ~kCpuHasAVX;
} }
if (TestEnv("LIBYUV_DISABLE_AVX2")) { if (TestEnv("LIBYUV_DISABLE_AVX2")) {
cpu_info_ &= ~kCpuHasAVX2; cpu_info &= ~kCpuHasAVX2;
} }
if (TestEnv("LIBYUV_DISABLE_ERMS")) { if (TestEnv("LIBYUV_DISABLE_ERMS")) {
cpu_info_ &= ~kCpuHasERMS; cpu_info &= ~kCpuHasERMS;
} }
if (TestEnv("LIBYUV_DISABLE_FMA3")) { if (TestEnv("LIBYUV_DISABLE_FMA3")) {
cpu_info_ &= ~kCpuHasFMA3; cpu_info &= ~kCpuHasFMA3;
}
if (TestEnv("LIBYUV_DISABLE_AVX3")) {
cpu_info &= ~kCpuHasAVX3;
} }
#elif defined(__mips__) && defined(__linux__)
// Linux mips parse text file for dsp detect.
cpu_info_ = MipsCpuCaps("dsp"); // set kCpuHasMIPS_DSP.
#if defined(__mips_dspr2)
cpu_info_ |= kCpuHasMIPS_DSPR2;
#endif #endif
cpu_info_ |= kCpuHasMIPS; #if defined(__mips__) && defined(__linux__)
#if defined(__mips_dspr2)
cpu_info |= kCpuHasMIPS_DSPR2;
#endif
cpu_info |= kCpuHasMIPS;
if (getenv("LIBYUV_DISABLE_MIPS")) { if (getenv("LIBYUV_DISABLE_MIPS")) {
cpu_info_ &= ~kCpuHasMIPS; cpu_info &= ~kCpuHasMIPS;
}
if (getenv("LIBYUV_DISABLE_MIPS_DSP")) {
cpu_info_ &= ~kCpuHasMIPS_DSP;
} }
if (getenv("LIBYUV_DISABLE_MIPS_DSPR2")) { if (getenv("LIBYUV_DISABLE_MIPS_DSPR2")) {
cpu_info_ &= ~kCpuHasMIPS_DSPR2; cpu_info &= ~kCpuHasMIPS_DSPR2;
} }
#elif defined(__arm__) || defined(__aarch64__) #endif
#if defined(__arm__) || defined(__aarch64__)
// gcc -mfpu=neon defines __ARM_NEON__ // gcc -mfpu=neon defines __ARM_NEON__
// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon. // __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
// For Linux, /proc/cpuinfo can be tested but without that assume Neon. // For Linux, /proc/cpuinfo can be tested but without that assume Neon.
#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__) #if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
cpu_info_ = kCpuHasNEON; cpu_info = kCpuHasNEON;
// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon // For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
// flag in it. // flag in it.
// So for aarch64, neon enabling is hard coded here. // So for aarch64, neon enabling is hard coded here.
#elif defined(__aarch64__) #endif
cpu_info_ = kCpuHasNEON; #if defined(__aarch64__)
cpu_info = kCpuHasNEON;
#else #else
// Linux arm parse text file for neon detect. // Linux arm parse text file for neon detect.
cpu_info_ = ArmCpuCaps("/proc/cpuinfo"); cpu_info = ArmCpuCaps("/proc/cpuinfo");
#endif #endif
cpu_info_ |= kCpuHasARM; cpu_info |= kCpuHasARM;
if (TestEnv("LIBYUV_DISABLE_NEON")) { if (TestEnv("LIBYUV_DISABLE_NEON")) {
cpu_info_ &= ~kCpuHasNEON; cpu_info &= ~kCpuHasNEON;
} }
#endif // __arm__ #endif // __arm__
if (TestEnv("LIBYUV_DISABLE_ASM")) { if (TestEnv("LIBYUV_DISABLE_ASM")) {
cpu_info_ = 0; cpu_info = 0;
} }
return cpu_info_; cpu_info |= kCpuInitialized;
cpu_info_ = cpu_info;
return cpu_info;
} }
// Note that use of this function is not thread safe.
LIBYUV_API LIBYUV_API
void MaskCpuFlags(int enable_flags) { void MaskCpuFlags(int enable_flags) {
cpu_info_ = InitCpuFlags() & enable_flags; cpu_info_ = InitCpuFlags() & enable_flags;

View File

@ -1,554 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/format_conversion.h"
#include "libyuv/basic_types.h"
#include "libyuv/cpu_id.h"
#include "libyuv/video_common.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// generate a selector mask useful for pshufb
static uint32 GenerateSelector(int select0, int select1) {
return (uint32)(select0) |
(uint32)((select1 + 4) << 8) |
(uint32)((select0 + 8) << 16) |
(uint32)((select1 + 12) << 24);
}
static int MakeSelectors(const int blue_index,
const int green_index,
const int red_index,
uint32 dst_fourcc_bayer,
uint32* index_map) {
// Now build a lookup table containing the indices for the four pixels in each
// 2x2 Bayer grid.
switch (dst_fourcc_bayer) {
case FOURCC_BGGR:
index_map[0] = GenerateSelector(blue_index, green_index);
index_map[1] = GenerateSelector(green_index, red_index);
break;
case FOURCC_GBRG:
index_map[0] = GenerateSelector(green_index, blue_index);
index_map[1] = GenerateSelector(red_index, green_index);
break;
case FOURCC_RGGB:
index_map[0] = GenerateSelector(red_index, green_index);
index_map[1] = GenerateSelector(green_index, blue_index);
break;
case FOURCC_GRBG:
index_map[0] = GenerateSelector(green_index, red_index);
index_map[1] = GenerateSelector(blue_index, green_index);
break;
default:
return -1; // Bad FourCC
}
return 0;
}
// Converts 32 bit ARGB to Bayer RGB formats.
LIBYUV_API
int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height,
uint32 dst_fourcc_bayer) {
int y;
const int blue_index = 0; // Offsets for ARGB format
const int green_index = 1;
const int red_index = 2;
uint32 index_map[2];
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) = ARGBToBayerRow_C;
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
}
}
#elif defined(HAS_ARGBTOBAYERROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_NEON;
}
}
#endif
if (MakeSelectors(blue_index, green_index, red_index,
dst_fourcc_bayer, index_map)) {
return -1; // Bad FourCC
}
for (y = 0; y < height; ++y) {
ARGBToBayerRow(src_argb, dst_bayer, index_map[y & 1], width);
src_argb += src_stride_argb;
dst_bayer += dst_stride_bayer;
}
return 0;
}
#define AVG(a, b) (((a) + (b)) >> 1)
static void BayerRowBG(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_argb, int pix) {
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 g = src_bayer0[1];
uint8 r = src_bayer1[1];
int x;
for (x = 0; x < pix - 2; x += 2) {
dst_argb[0] = src_bayer0[0];
dst_argb[1] = AVG(g, src_bayer0[1]);
dst_argb[2] = AVG(r, src_bayer1[1]);
dst_argb[3] = 255U;
dst_argb[4] = AVG(src_bayer0[0], src_bayer0[2]);
dst_argb[5] = src_bayer0[1];
dst_argb[6] = src_bayer1[1];
dst_argb[7] = 255U;
g = src_bayer0[1];
r = src_bayer1[1];
src_bayer0 += 2;
src_bayer1 += 2;
dst_argb += 8;
}
dst_argb[0] = src_bayer0[0];
dst_argb[1] = AVG(g, src_bayer0[1]);
dst_argb[2] = AVG(r, src_bayer1[1]);
dst_argb[3] = 255U;
if (!(pix & 1)) {
dst_argb[4] = src_bayer0[0];
dst_argb[5] = src_bayer0[1];
dst_argb[6] = src_bayer1[1];
dst_argb[7] = 255U;
}
}
static void BayerRowRG(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_argb, int pix) {
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 g = src_bayer0[1];
uint8 b = src_bayer1[1];
int x;
for (x = 0; x < pix - 2; x += 2) {
dst_argb[0] = AVG(b, src_bayer1[1]);
dst_argb[1] = AVG(g, src_bayer0[1]);
dst_argb[2] = src_bayer0[0];
dst_argb[3] = 255U;
dst_argb[4] = src_bayer1[1];
dst_argb[5] = src_bayer0[1];
dst_argb[6] = AVG(src_bayer0[0], src_bayer0[2]);
dst_argb[7] = 255U;
g = src_bayer0[1];
b = src_bayer1[1];
src_bayer0 += 2;
src_bayer1 += 2;
dst_argb += 8;
}
dst_argb[0] = AVG(b, src_bayer1[1]);
dst_argb[1] = AVG(g, src_bayer0[1]);
dst_argb[2] = src_bayer0[0];
dst_argb[3] = 255U;
if (!(pix & 1)) {
dst_argb[4] = src_bayer1[1];
dst_argb[5] = src_bayer0[1];
dst_argb[6] = src_bayer0[0];
dst_argb[7] = 255U;
}
}
static void BayerRowGB(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_argb, int pix) {
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 b = src_bayer0[1];
int x;
for (x = 0; x < pix - 2; x += 2) {
dst_argb[0] = AVG(b, src_bayer0[1]);
dst_argb[1] = src_bayer0[0];
dst_argb[2] = src_bayer1[0];
dst_argb[3] = 255U;
dst_argb[4] = src_bayer0[1];
dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]);
dst_argb[6] = AVG(src_bayer1[0], src_bayer1[2]);
dst_argb[7] = 255U;
b = src_bayer0[1];
src_bayer0 += 2;
src_bayer1 += 2;
dst_argb += 8;
}
dst_argb[0] = AVG(b, src_bayer0[1]);
dst_argb[1] = src_bayer0[0];
dst_argb[2] = src_bayer1[0];
dst_argb[3] = 255U;
if (!(pix & 1)) {
dst_argb[4] = src_bayer0[1];
dst_argb[5] = src_bayer0[0];
dst_argb[6] = src_bayer1[0];
dst_argb[7] = 255U;
}
}
static void BayerRowGR(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_argb, int pix) {
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 r = src_bayer0[1];
int x;
for (x = 0; x < pix - 2; x += 2) {
dst_argb[0] = src_bayer1[0];
dst_argb[1] = src_bayer0[0];
dst_argb[2] = AVG(r, src_bayer0[1]);
dst_argb[3] = 255U;
dst_argb[4] = AVG(src_bayer1[0], src_bayer1[2]);
dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]);
dst_argb[6] = src_bayer0[1];
dst_argb[7] = 255U;
r = src_bayer0[1];
src_bayer0 += 2;
src_bayer1 += 2;
dst_argb += 8;
}
dst_argb[0] = src_bayer1[0];
dst_argb[1] = src_bayer0[0];
dst_argb[2] = AVG(r, src_bayer0[1]);
dst_argb[3] = 255U;
if (!(pix & 1)) {
dst_argb[4] = src_bayer1[0];
dst_argb[5] = src_bayer0[0];
dst_argb[6] = src_bayer0[1];
dst_argb[7] = 255U;
}
}
// Converts any Bayer RGB format to ARGB.
LIBYUV_API
int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height,
uint32 src_fourcc_bayer) {
int y;
void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix);
void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix);
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
switch (src_fourcc_bayer) {
case FOURCC_BGGR:
BayerRow0 = BayerRowBG;
BayerRow1 = BayerRowGR;
break;
case FOURCC_GBRG:
BayerRow0 = BayerRowGB;
BayerRow1 = BayerRowRG;
break;
case FOURCC_GRBG:
BayerRow0 = BayerRowGR;
BayerRow1 = BayerRowBG;
break;
case FOURCC_RGGB:
BayerRow0 = BayerRowRG;
BayerRow1 = BayerRowGB;
break;
default:
return -1; // Bad FourCC
}
for (y = 0; y < height - 1; y += 2) {
BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
dst_argb + dst_stride_argb, width);
src_bayer += src_stride_bayer * 2;
dst_argb += dst_stride_argb * 2;
}
if (height & 1) {
BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
}
return 0;
}
// Converts any Bayer RGB format to ARGB.
LIBYUV_API
int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height,
uint32 src_fourcc_bayer) {
void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix);
void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix);
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
// Negative height means invert the image.
if (height < 0) {
int halfheight;
height = -height;
halfheight = (height + 1) >> 1;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_u = dst_u + (halfheight - 1) * dst_stride_u;
dst_v = dst_v + (halfheight - 1) * dst_stride_v;
dst_stride_y = -dst_stride_y;
dst_stride_u = -dst_stride_u;
dst_stride_v = -dst_stride_v;
}
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
ARGBToUVRow = ARGBToUVRow_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
#endif
switch (src_fourcc_bayer) {
case FOURCC_BGGR:
BayerRow0 = BayerRowBG;
BayerRow1 = BayerRowGR;
break;
case FOURCC_GBRG:
BayerRow0 = BayerRowGB;
BayerRow1 = BayerRowRG;
break;
case FOURCC_GRBG:
BayerRow0 = BayerRowGR;
BayerRow1 = BayerRowBG;
break;
case FOURCC_RGGB:
BayerRow0 = BayerRowRG;
BayerRow1 = BayerRowGB;
break;
default:
return -1; // Bad FourCC
}
{
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 15) & ~15;
align_buffer_64(row, kRowSize * 2);
int y;
for (y = 0; y < height - 1; y += 2) {
BayerRow0(src_bayer, src_stride_bayer, row, width);
BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
row + kRowSize, width);
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
src_bayer += src_stride_bayer * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
BayerRow0(src_bayer, src_stride_bayer, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
}
free_aligned_buffer_64(row);
}
return 0;
}
// Convert I420 to Bayer.
LIBYUV_API
int I420ToBayer(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height,
uint32 dst_fourcc_bayer) {
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToARGBRow_C;
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) = ARGBToBayerRow_C;
const int blue_index = 0; // Offsets for ARGB format
const int green_index = 1;
const int red_index = 2;
uint32 index_map[2];
// Negative height means invert the image.
if (height < 0) {
int halfheight;
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
}
}
#elif defined(HAS_ARGBTOBAYERROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_NEON;
}
}
#endif
if (MakeSelectors(blue_index, green_index, red_index,
dst_fourcc_bayer, index_map)) {
return -1; // Bad FourCC
}
{
// Allocate a row of ARGB.
align_buffer_64(row, width * 4);
int y;
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToBayerRow(row, dst_bayer, index_map[y & 1], width);
dst_bayer += dst_stride_bayer;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
free_aligned_buffer_64(row);
}
return 0;
}
#define MAKEBAYERFOURCC(BAYER) \
LIBYUV_API \
int Bayer##BAYER##ToI420(const uint8* src_bayer, int src_stride_bayer, \
uint8* dst_y, int dst_stride_y, \
uint8* dst_u, int dst_stride_u, \
uint8* dst_v, int dst_stride_v, \
int width, int height) { \
return BayerToI420(src_bayer, src_stride_bayer, \
dst_y, dst_stride_y, \
dst_u, dst_stride_u, \
dst_v, dst_stride_v, \
width, height, \
FOURCC_##BAYER); \
} \
\
LIBYUV_API \
int I420ToBayer##BAYER(const uint8* src_y, int src_stride_y, \
const uint8* src_u, int src_stride_u, \
const uint8* src_v, int src_stride_v, \
uint8* dst_bayer, int dst_stride_bayer, \
int width, int height) { \
return I420ToBayer(src_y, src_stride_y, \
src_u, src_stride_u, \
src_v, src_stride_v, \
dst_bayer, dst_stride_bayer, \
width, height, \
FOURCC_##BAYER); \
} \
\
LIBYUV_API \
int ARGBToBayer##BAYER(const uint8* src_argb, int src_stride_argb, \
uint8* dst_bayer, int dst_stride_bayer, \
int width, int height) { \
return ARGBToBayer(src_argb, src_stride_argb, \
dst_bayer, dst_stride_bayer, \
width, height, \
FOURCC_##BAYER); \
} \
\
LIBYUV_API \
int Bayer##BAYER##ToARGB(const uint8* src_bayer, int src_stride_bayer, \
uint8* dst_argb, int dst_stride_argb, \
int width, int height) { \
return BayerToARGB(src_bayer, src_stride_bayer, \
dst_argb, dst_stride_argb, \
width, height, \
FOURCC_##BAYER); \
}
MAKEBAYERFOURCC(BGGR)
MAKEBAYERFOURCC(GBRG)
MAKEBAYERFOURCC(GRBG)
MAKEBAYERFOURCC(RGGB)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -18,6 +18,12 @@
// Must be included before jpeglib. // Must be included before jpeglib.
#include <setjmp.h> #include <setjmp.h>
#define HAVE_SETJMP #define HAVE_SETJMP
#if defined(_MSC_VER)
// disable warning 4324: structure was padded due to __declspec(align())
#pragma warning(disable:4324)
#endif
#endif #endif
struct FILE; // For jpeglib.h. struct FILE; // For jpeglib.h.
@ -53,8 +59,7 @@ const int MJpegDecoder::kColorSpaceYCCK = JCS_YCCK;
// Methods that are passed to jpeglib. // Methods that are passed to jpeglib.
boolean fill_input_buffer(jpeg_decompress_struct* cinfo); boolean fill_input_buffer(jpeg_decompress_struct* cinfo);
void init_source(jpeg_decompress_struct* cinfo); void init_source(jpeg_decompress_struct* cinfo);
void skip_input_data(jpeg_decompress_struct* cinfo, void skip_input_data(jpeg_decompress_struct* cinfo, long num_bytes); // NOLINT
long num_bytes); // NOLINT
void term_source(jpeg_decompress_struct* cinfo); void term_source(jpeg_decompress_struct* cinfo);
void ErrorHandler(jpeg_common_struct* cinfo); void ErrorHandler(jpeg_common_struct* cinfo);
@ -423,8 +428,7 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) {
return TRUE; return TRUE;
} }
void skip_input_data(j_decompress_ptr cinfo, void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT
long num_bytes) { // NOLINT
cinfo->src->next_input_byte += num_bytes; cinfo->src->next_input_byte += num_bytes;
} }

View File

@ -10,36 +10,60 @@
#include "libyuv/mjpeg_decoder.h" #include "libyuv/mjpeg_decoder.h"
#include <string.h> // For memchr.
#ifdef __cplusplus #ifdef __cplusplus
namespace libyuv { namespace libyuv {
extern "C" { extern "C" {
#endif #endif
// Helper function to validate the jpeg appears intact. // Helper function to scan for EOI marker (0xff 0xd9).
// TODO(fbarchard): Optimize case where SOI is found but EOI is not. static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) {
LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) { if (sample_size >= 2) {
size_t i; const uint8* end = sample + sample_size - 1;
if (sample_size < 64) { const uint8* it = sample;
// ERROR: Invalid jpeg size: sample_size while (it < end) {
return LIBYUV_FALSE; // TODO(fbarchard): scan for 0xd9 instead.
it = static_cast<const uint8 *>(memchr(it, 0xff, end - it));
if (it == NULL) {
break;
} }
if (sample[0] != 0xff || sample[1] != 0xd8) { // Start Of Image if (it[1] == 0xd9) {
// ERROR: Invalid jpeg initial start code
return LIBYUV_FALSE;
}
for (i = sample_size - 2; i > 1;) {
if (sample[i] != 0xd9) {
if (sample[i] == 0xff && sample[i + 1] == 0xd9) { // End Of Image
return LIBYUV_TRUE; // Success: Valid jpeg. return LIBYUV_TRUE; // Success: Valid jpeg.
} }
--i; ++it; // Skip over current 0xff.
} }
--i;
} }
// ERROR: Invalid jpeg end code not found. Size sample_size // ERROR: Invalid jpeg end code not found. Size sample_size
return LIBYUV_FALSE; return LIBYUV_FALSE;
} }
// Helper function to validate the jpeg appears intact.
LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) {
// Maximum size that ValidateJpeg will consider valid.
const size_t kMaxJpegSize = 0x7fffffffull;
const size_t kBackSearchSize = 1024;
if (sample_size < 64 || sample_size > kMaxJpegSize || !sample) {
// ERROR: Invalid jpeg size: sample_size
return LIBYUV_FALSE;
}
if (sample[0] != 0xff || sample[1] != 0xd8) { // SOI marker
// ERROR: Invalid jpeg initial start code
return LIBYUV_FALSE;
}
// Look for the End Of Image (EOI) marker near the end of the buffer.
if (sample_size > kBackSearchSize) {
if (ScanEOI(sample + sample_size - kBackSearchSize, kBackSearchSize)) {
return LIBYUV_TRUE; // Success: Valid jpeg.
}
// Reduce search size for forward search.
sample_size = sample_size - kBackSearchSize + 1;
}
// Step over SOI marker and scan for EOI.
return ScanEOI(sample + 2, sample_size - 2);
}
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -27,36 +27,31 @@ extern "C" {
(defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__)) (defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__))
#define HAS_SCALEARGBROWDOWNEVEN_SSE2 #define HAS_SCALEARGBROWDOWNEVEN_SSE2
void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride, void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride,
int src_stepx, int src_stepx, uint8* dst_ptr, int dst_width);
uint8* dst_ptr, int dst_width);
#endif #endif
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON)) (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SCALEARGBROWDOWNEVEN_NEON #define HAS_SCALEARGBROWDOWNEVEN_NEON
void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, int src_stride, void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, int src_stride,
int src_stepx, int src_stepx, uint8* dst_ptr, int dst_width);
uint8* dst_ptr, int dst_width);
#endif #endif
void ScaleARGBRowDownEven_C(const uint8* src_ptr, int, void ScaleARGBRowDownEven_C(const uint8* src_ptr, int,
int src_stepx, int src_stepx, uint8* dst_ptr, int dst_width);
uint8* dst_ptr, int dst_width);
static void ARGBTranspose(const uint8* src, int src_stride, static void ARGBTranspose(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride, int width, int height) {
int width, int height) {
int i; int i;
int src_pixel_step = src_stride >> 2; int src_pixel_step = src_stride >> 2;
void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride, void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride,
int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C; int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4) && // Width of dest. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4)) { // Width of dest.
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2; ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
} }
#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON) #endif
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4) && // Width of dest. #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
IS_ALIGNED(src, 4)) { if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4)) { // Width of dest.
ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON; ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON;
} }
#endif #endif
@ -69,8 +64,7 @@ static void ARGBTranspose(const uint8* src, int src_stride,
} }
void ARGBRotate90(const uint8* src, int src_stride, void ARGBRotate90(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride, int width, int height) {
int width, int height) {
// Rotate by 90 is a ARGBTranspose with the source read // Rotate by 90 is a ARGBTranspose with the source read
// from bottom to top. So set the source pointer to the end // from bottom to top. So set the source pointer to the end
// of the buffer and flip the sign of the source stride. // of the buffer and flip the sign of the source stride.
@ -80,8 +74,7 @@ void ARGBRotate90(const uint8* src, int src_stride,
} }
void ARGBRotate270(const uint8* src, int src_stride, void ARGBRotate270(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride, int width, int height) {
int width, int height) {
// Rotate by 270 is a ARGBTranspose with the destination written // Rotate by 270 is a ARGBTranspose with the destination written
// from bottom to top. So set the destination pointer to the end // from bottom to top. So set the destination pointer to the end
// of the buffer and flip the sign of the destination stride. // of the buffer and flip the sign of the destination stride.
@ -91,8 +84,7 @@ void ARGBRotate270(const uint8* src, int src_stride,
} }
void ARGBRotate180(const uint8* src, int src_stride, void ARGBRotate180(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride, int width, int height) {
int width, int height) {
// Swap first and last row and mirror the content. Uses a temporary row. // Swap first and last row and mirror the content. Uses a temporary row.
align_buffer_64(row, width * 4); align_buffer_64(row, width * 4);
const uint8* src_bot = src + src_stride * (height - 1); const uint8* src_bot = src + src_stride * (height - 1);
@ -102,38 +94,38 @@ void ARGBRotate180(const uint8* src, int src_stride,
void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) = void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
ARGBMirrorRow_C; ARGBMirrorRow_C;
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
#if defined(HAS_ARGBMIRRORROW_SSSE3) #if defined(HAS_ARGBMIRRORROW_NEON)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) && if (TestCpuFlag(kCpuHasNEON)) {
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { if (IS_ALIGNED(width, 4)) {
ARGBMirrorRow = ARGBMirrorRow_SSSE3; ARGBMirrorRow = ARGBMirrorRow_NEON;
}
}
#endif
#if defined(HAS_ARGBMIRRORROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBMirrorRow = ARGBMirrorRow_SSE2;
}
} }
#endif #endif
#if defined(HAS_ARGBMIRRORROW_AVX2) #if defined(HAS_ARGBMIRRORROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) { if (TestCpuFlag(kCpuHasAVX2)) {
ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBMirrorRow = ARGBMirrorRow_AVX2; ARGBMirrorRow = ARGBMirrorRow_AVX2;
} }
#endif
#if defined(HAS_ARGBMIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
ARGBMirrorRow = ARGBMirrorRow_NEON;
}
#endif
#if defined(HAS_COPYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width * 4, 32)) {
CopyRow = CopyRow_NEON;
}
#endif
#if defined(HAS_COPYROW_X86)
if (TestCpuFlag(kCpuHasX86)) {
CopyRow = CopyRow_X86;
} }
#endif #endif
#if defined(HAS_COPYROW_SSE2) #if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width * 4, 32) && if (TestCpuFlag(kCpuHasSSE2)) {
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { }
CopyRow = CopyRow_SSE2; #endif
#if defined(HAS_COPYROW_AVX)
if (TestCpuFlag(kCpuHasAVX)) {
CopyRow = IS_ALIGNED(width * 4, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
} }
#endif #endif
#if defined(HAS_COPYROW_ERMS) #if defined(HAS_COPYROW_ERMS)
@ -141,6 +133,11 @@ void ARGBRotate180(const uint8* src, int src_stride,
CopyRow = CopyRow_ERMS; CopyRow = CopyRow_ERMS;
} }
#endif #endif
#if defined(HAS_COPYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
#if defined(HAS_COPYROW_MIPS) #if defined(HAS_COPYROW_MIPS)
if (TestCpuFlag(kCpuHasMIPS)) { if (TestCpuFlag(kCpuHasMIPS)) {
CopyRow = CopyRow_MIPS; CopyRow = CopyRow_MIPS;
@ -162,8 +159,7 @@ void ARGBRotate180(const uint8* src, int src_stride,
LIBYUV_API LIBYUV_API
int ARGBRotate(const uint8* src_argb, int src_stride_argb, int ARGBRotate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height,
int width, int height,
enum RotationMode mode) { enum RotationMode mode) {
if (!src_argb || width <= 0 || height == 0 || !dst_argb) { if (!src_argb || width <= 0 || height == 0 || !dst_argb) {
return -1; return -1;

View File

@ -9,6 +9,7 @@
*/ */
#include "libyuv/row.h" #include "libyuv/row.h"
#include "libyuv/rotate_row.h"
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
@ -22,8 +23,7 @@ extern "C" {
(_MIPS_SIM == _MIPS_SIM_ABI32) (_MIPS_SIM == _MIPS_SIM_ABI32)
void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride, void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride, int width) {
int width) {
__asm__ __volatile__ ( __asm__ __volatile__ (
".set push \n" ".set push \n"
".set noreorder \n" ".set noreorder \n"
@ -106,9 +106,8 @@ void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
); );
} }
void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride, void TransposeWx8_Fast_MIPS_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride, int width) {
int width) {
__asm__ __volatile__ ( __asm__ __volatile__ (
".set noat \n" ".set noat \n"
".set push \n" ".set push \n"

View File

@ -9,6 +9,7 @@
*/ */
#include "libyuv/row.h" #include "libyuv/row.h"
#include "libyuv/rotate_row.h"
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
@ -17,7 +18,8 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
!defined(__aarch64__)
static uvec8 kVTbl4x4Transpose = static uvec8 kVTbl4x4Transpose =
{ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
@ -33,7 +35,6 @@ void TransposeWx8_NEON(const uint8* src, int src_stride,
"sub %5, #8 \n" "sub %5, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane // handle 8x8 blocks. this should be the majority of the plane
".p2align 2 \n"
"1: \n" "1: \n"
"mov %0, %1 \n" "mov %0, %1 \n"
@ -254,7 +255,6 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
"sub %7, #8 \n" "sub %7, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane // handle 8x8 blocks. this should be the majority of the plane
".p2align 2 \n"
"1: \n" "1: \n"
"mov %0, %1 \n" "mov %0, %1 \n"
@ -525,7 +525,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
"q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11" "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
); );
} }
#endif #endif // defined(__ARM_NEON__) && !defined(__aarch64__)
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"

View File

@ -9,6 +9,7 @@
*/ */
#include "libyuv/row.h" #include "libyuv/row.h"
#include "libyuv/rotate_row.h"
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
@ -17,522 +18,524 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
// This module is for GCC Neon armv8 64 bit.
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
//this ifdef should be removed if TransposeWx8_NEON's aarch64 has
//been done
#ifdef HAS_TRANSPOSE_WX8_NEON
static uvec8 kVTbl4x4Transpose = static uvec8 kVTbl4x4Transpose =
{ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
void TransposeWx8_NEON(const uint8* src, int src_stride, void TransposeWx8_NEON(const uint8* src, int src_stride,
uint8* dst, int dst_stride, uint8* dst, int dst_stride, int width) {
int width) {
const uint8* src_temp = NULL; const uint8* src_temp = NULL;
int64 width64 = (int64) width; // Work around clang 3.4 warning.
asm volatile ( asm volatile (
// loops are on blocks of 8. loop will stop when // loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter // counter gets to or below 0. starting the counter
// at w-8 allow for this // at w-8 allow for this
"sub %5, #8 \n" "sub %3, %3, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane // handle 8x8 blocks. this should be the majority of the plane
".p2align 2 \n"
"1: \n" "1: \n"
"mov %0, %1 \n" "mov %0, %1 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.8 {d0}, [%0], %2 \n" "ld1 {v0.8b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.8 {d1}, [%0], %2 \n" "ld1 {v1.8b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.8 {d2}, [%0], %2 \n" "ld1 {v2.8b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.8 {d3}, [%0], %2 \n" "ld1 {v3.8b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.8 {d4}, [%0], %2 \n" "ld1 {v4.8b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.8 {d5}, [%0], %2 \n" "ld1 {v5.8b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.8 {d6}, [%0], %2 \n" "ld1 {v6.8b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.8 {d7}, [%0] \n" "ld1 {v7.8b}, [%0] \n"
"vtrn.8 d1, d0 \n" "trn2 v16.8b, v0.8b, v1.8b \n"
"vtrn.8 d3, d2 \n" "trn1 v17.8b, v0.8b, v1.8b \n"
"vtrn.8 d5, d4 \n" "trn2 v18.8b, v2.8b, v3.8b \n"
"vtrn.8 d7, d6 \n" "trn1 v19.8b, v2.8b, v3.8b \n"
"trn2 v20.8b, v4.8b, v5.8b \n"
"trn1 v21.8b, v4.8b, v5.8b \n"
"trn2 v22.8b, v6.8b, v7.8b \n"
"trn1 v23.8b, v6.8b, v7.8b \n"
"vtrn.16 d1, d3 \n" "trn2 v3.4h, v17.4h, v19.4h \n"
"vtrn.16 d0, d2 \n" "trn1 v1.4h, v17.4h, v19.4h \n"
"vtrn.16 d5, d7 \n" "trn2 v2.4h, v16.4h, v18.4h \n"
"vtrn.16 d4, d6 \n" "trn1 v0.4h, v16.4h, v18.4h \n"
"trn2 v7.4h, v21.4h, v23.4h \n"
"trn1 v5.4h, v21.4h, v23.4h \n"
"trn2 v6.4h, v20.4h, v22.4h \n"
"trn1 v4.4h, v20.4h, v22.4h \n"
"vtrn.32 d1, d5 \n" "trn2 v21.2s, v1.2s, v5.2s \n"
"vtrn.32 d0, d4 \n" "trn1 v17.2s, v1.2s, v5.2s \n"
"vtrn.32 d3, d7 \n" "trn2 v20.2s, v0.2s, v4.2s \n"
"vtrn.32 d2, d6 \n" "trn1 v16.2s, v0.2s, v4.2s \n"
"trn2 v23.2s, v3.2s, v7.2s \n"
"trn1 v19.2s, v3.2s, v7.2s \n"
"trn2 v22.2s, v2.2s, v6.2s \n"
"trn1 v18.2s, v2.2s, v6.2s \n"
"vrev16.8 q0, q0 \n" "mov %0, %2 \n"
"vrev16.8 q1, q1 \n"
"vrev16.8 q2, q2 \n"
"vrev16.8 q3, q3 \n"
"mov %0, %3 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.8 {d1}, [%0], %4 \n" "st1 {v17.8b}, [%0], %6 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.8 {d0}, [%0], %4 \n" "st1 {v16.8b}, [%0], %6 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.8 {d3}, [%0], %4 \n" "st1 {v19.8b}, [%0], %6 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.8 {d2}, [%0], %4 \n" "st1 {v18.8b}, [%0], %6 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.8 {d5}, [%0], %4 \n" "st1 {v21.8b}, [%0], %6 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.8 {d4}, [%0], %4 \n" "st1 {v20.8b}, [%0], %6 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.8 {d7}, [%0], %4 \n" "st1 {v23.8b}, [%0], %6 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.8 {d6}, [%0] \n" "st1 {v22.8b}, [%0] \n"
"add %1, #8 \n" // src += 8 "add %1, %1, #8 \n" // src += 8
"add %3, %3, %4, lsl #3 \n" // dst += 8 * dst_stride "add %2, %2, %6, lsl #3 \n" // dst += 8 * dst_stride
"subs %5, #8 \n" // w -= 8 "subs %3, %3, #8 \n" // w -= 8
"bge 1b \n" "b.ge 1b \n"
// add 8 back to counter. if the result is 0 there are // add 8 back to counter. if the result is 0 there are
// no residuals. // no residuals.
"adds %5, #8 \n" "adds %3, %3, #8 \n"
"beq 4f \n" "b.eq 4f \n"
// some residual, so between 1 and 7 lines left to transpose // some residual, so between 1 and 7 lines left to transpose
"cmp %5, #2 \n" "cmp %3, #2 \n"
"blt 3f \n" "b.lt 3f \n"
"cmp %5, #4 \n" "cmp %3, #4 \n"
"blt 2f \n" "b.lt 2f \n"
// 4x8 block // 4x8 block
"mov %0, %1 \n" "mov %0, %1 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.32 {d0[0]}, [%0], %2 \n" "ld1 {v0.s}[0], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.32 {d0[1]}, [%0], %2 \n" "ld1 {v0.s}[1], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.32 {d1[0]}, [%0], %2 \n" "ld1 {v0.s}[2], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.32 {d1[1]}, [%0], %2 \n" "ld1 {v0.s}[3], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.32 {d2[0]}, [%0], %2 \n" "ld1 {v1.s}[0], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.32 {d2[1]}, [%0], %2 \n" "ld1 {v1.s}[1], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.32 {d3[0]}, [%0], %2 \n" "ld1 {v1.s}[2], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.32 {d3[1]}, [%0] \n" "ld1 {v1.s}[3], [%0] \n"
"mov %0, %3 \n" "mov %0, %2 \n"
MEMACCESS(6) MEMACCESS(4)
"vld1.8 {q3}, [%6] \n" "ld1 {v2.16b}, [%4] \n"
"vtbl.8 d4, {d0, d1}, d6 \n" "tbl v3.16b, {v0.16b}, v2.16b \n"
"vtbl.8 d5, {d0, d1}, d7 \n" "tbl v0.16b, {v1.16b}, v2.16b \n"
"vtbl.8 d0, {d2, d3}, d6 \n"
"vtbl.8 d1, {d2, d3}, d7 \n"
// TODO(frkoenig): Rework shuffle above to // TODO(frkoenig): Rework shuffle above to
// write out with 4 instead of 8 writes. // write out with 4 instead of 8 writes.
MEMACCESS(0) MEMACCESS(0)
"vst1.32 {d4[0]}, [%0], %4 \n" "st1 {v3.s}[0], [%0], %6 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.32 {d4[1]}, [%0], %4 \n" "st1 {v3.s}[1], [%0], %6 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.32 {d5[0]}, [%0], %4 \n" "st1 {v3.s}[2], [%0], %6 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.32 {d5[1]}, [%0] \n" "st1 {v3.s}[3], [%0] \n"
"add %0, %3, #4 \n" "add %0, %2, #4 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.32 {d0[0]}, [%0], %4 \n" "st1 {v0.s}[0], [%0], %6 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.32 {d0[1]}, [%0], %4 \n" "st1 {v0.s}[1], [%0], %6 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.32 {d1[0]}, [%0], %4 \n" "st1 {v0.s}[2], [%0], %6 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.32 {d1[1]}, [%0] \n" "st1 {v0.s}[3], [%0] \n"
"add %1, #4 \n" // src += 4 "add %1, %1, #4 \n" // src += 4
"add %3, %3, %4, lsl #2 \n" // dst += 4 * dst_stride "add %2, %2, %6, lsl #2 \n" // dst += 4 * dst_stride
"subs %5, #4 \n" // w -= 4 "subs %3, %3, #4 \n" // w -= 4
"beq 4f \n" "b.eq 4f \n"
// some residual, check to see if it includes a 2x8 block, // some residual, check to see if it includes a 2x8 block,
// or less // or less
"cmp %5, #2 \n" "cmp %3, #2 \n"
"blt 3f \n" "b.lt 3f \n"
// 2x8 block // 2x8 block
"2: \n" "2: \n"
"mov %0, %1 \n" "mov %0, %1 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.16 {d0[0]}, [%0], %2 \n" "ld1 {v0.h}[0], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.16 {d1[0]}, [%0], %2 \n" "ld1 {v1.h}[0], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.16 {d0[1]}, [%0], %2 \n" "ld1 {v0.h}[1], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.16 {d1[1]}, [%0], %2 \n" "ld1 {v1.h}[1], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.16 {d0[2]}, [%0], %2 \n" "ld1 {v0.h}[2], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.16 {d1[2]}, [%0], %2 \n" "ld1 {v1.h}[2], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.16 {d0[3]}, [%0], %2 \n" "ld1 {v0.h}[3], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.16 {d1[3]}, [%0] \n" "ld1 {v1.h}[3], [%0] \n"
"vtrn.8 d0, d1 \n" "trn2 v2.8b, v0.8b, v1.8b \n"
"trn1 v3.8b, v0.8b, v1.8b \n"
"mov %0, %3 \n" "mov %0, %2 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.64 {d0}, [%0], %4 \n" "st1 {v3.8b}, [%0], %6 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.64 {d1}, [%0] \n" "st1 {v2.8b}, [%0] \n"
"add %1, #2 \n" // src += 2 "add %1, %1, #2 \n" // src += 2
"add %3, %3, %4, lsl #1 \n" // dst += 2 * dst_stride "add %2, %2, %6, lsl #1 \n" // dst += 2 * dst_stride
"subs %5, #2 \n" // w -= 2 "subs %3, %3, #2 \n" // w -= 2
"beq 4f \n" "b.eq 4f \n"
// 1x8 block // 1x8 block
"3: \n" "3: \n"
MEMACCESS(1) MEMACCESS(1)
"vld1.8 {d0[0]}, [%1], %2 \n" "ld1 {v0.b}[0], [%1], %5 \n"
MEMACCESS(1) MEMACCESS(1)
"vld1.8 {d0[1]}, [%1], %2 \n" "ld1 {v0.b}[1], [%1], %5 \n"
MEMACCESS(1) MEMACCESS(1)
"vld1.8 {d0[2]}, [%1], %2 \n" "ld1 {v0.b}[2], [%1], %5 \n"
MEMACCESS(1) MEMACCESS(1)
"vld1.8 {d0[3]}, [%1], %2 \n" "ld1 {v0.b}[3], [%1], %5 \n"
MEMACCESS(1) MEMACCESS(1)
"vld1.8 {d0[4]}, [%1], %2 \n" "ld1 {v0.b}[4], [%1], %5 \n"
MEMACCESS(1) MEMACCESS(1)
"vld1.8 {d0[5]}, [%1], %2 \n" "ld1 {v0.b}[5], [%1], %5 \n"
MEMACCESS(1) MEMACCESS(1)
"vld1.8 {d0[6]}, [%1], %2 \n" "ld1 {v0.b}[6], [%1], %5 \n"
MEMACCESS(1) MEMACCESS(1)
"vld1.8 {d0[7]}, [%1] \n" "ld1 {v0.b}[7], [%1] \n"
MEMACCESS(3) MEMACCESS(2)
"vst1.64 {d0}, [%3] \n" "st1 {v0.8b}, [%2] \n"
"4: \n" "4: \n"
: "+r"(src_temp), // %0 : "+r"(src_temp), // %0
"+r"(src), // %1 "+r"(src), // %1
"+r"(src_stride), // %2 "+r"(dst), // %2
"+r"(dst), // %3 "+r"(width64) // %3
"+r"(dst_stride), // %4 : "r"(&kVTbl4x4Transpose), // %4
"+r"(width) // %5 "r"(static_cast<ptrdiff_t>(src_stride)), // %5
: "r"(&kVTbl4x4Transpose) // %6 "r"(static_cast<ptrdiff_t>(dst_stride)) // %6
: "memory", "cc", "q0", "q1", "q2", "q3" : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
"v17", "v18", "v19", "v20", "v21", "v22", "v23"
); );
} }
#endif //HAS_TRANSPOSE_WX8_NEON
//this ifdef should be removed if TransposeUVWx8_NEON's aarch64 has static uint8 kVTbl4x4TransposeDi[32] =
//been done { 0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54,
#ifdef HAS_TRANSPOSE_UVWX8_NEON 1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55};
static uvec8 kVTbl4x4TransposeDi =
{ 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 };
void TransposeUVWx8_NEON(const uint8* src, int src_stride, void TransposeUVWx8_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a, uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, uint8* dst_b, int dst_stride_b,
int width) { int width) {
const uint8* src_temp = NULL; const uint8* src_temp = NULL;
int64 width64 = (int64) width; // Work around clang 3.4 warning.
asm volatile ( asm volatile (
// loops are on blocks of 8. loop will stop when // loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter // counter gets to or below 0. starting the counter
// at w-8 allow for this // at w-8 allow for this
"sub %7, #8 \n" "sub %4, %4, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane // handle 8x8 blocks. this should be the majority of the plane
".p2align 2 \n"
"1: \n" "1: \n"
"mov %0, %1 \n" "mov %0, %1 \n"
MEMACCESS(0) MEMACCESS(0)
"vld2.8 {d0, d1}, [%0], %2 \n" "ld1 {v0.16b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld2.8 {d2, d3}, [%0], %2 \n" "ld1 {v1.16b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld2.8 {d4, d5}, [%0], %2 \n" "ld1 {v2.16b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld2.8 {d6, d7}, [%0], %2 \n" "ld1 {v3.16b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld2.8 {d16, d17}, [%0], %2 \n" "ld1 {v4.16b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld2.8 {d18, d19}, [%0], %2 \n" "ld1 {v5.16b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld2.8 {d20, d21}, [%0], %2 \n" "ld1 {v6.16b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld2.8 {d22, d23}, [%0] \n" "ld1 {v7.16b}, [%0] \n"
"vtrn.8 q1, q0 \n" "trn1 v16.16b, v0.16b, v1.16b \n"
"vtrn.8 q3, q2 \n" "trn2 v17.16b, v0.16b, v1.16b \n"
"vtrn.8 q9, q8 \n" "trn1 v18.16b, v2.16b, v3.16b \n"
"vtrn.8 q11, q10 \n" "trn2 v19.16b, v2.16b, v3.16b \n"
"trn1 v20.16b, v4.16b, v5.16b \n"
"trn2 v21.16b, v4.16b, v5.16b \n"
"trn1 v22.16b, v6.16b, v7.16b \n"
"trn2 v23.16b, v6.16b, v7.16b \n"
"vtrn.16 q1, q3 \n" "trn1 v0.8h, v16.8h, v18.8h \n"
"vtrn.16 q0, q2 \n" "trn2 v1.8h, v16.8h, v18.8h \n"
"vtrn.16 q9, q11 \n" "trn1 v2.8h, v20.8h, v22.8h \n"
"vtrn.16 q8, q10 \n" "trn2 v3.8h, v20.8h, v22.8h \n"
"trn1 v4.8h, v17.8h, v19.8h \n"
"trn2 v5.8h, v17.8h, v19.8h \n"
"trn1 v6.8h, v21.8h, v23.8h \n"
"trn2 v7.8h, v21.8h, v23.8h \n"
"vtrn.32 q1, q9 \n" "trn1 v16.4s, v0.4s, v2.4s \n"
"vtrn.32 q0, q8 \n" "trn2 v17.4s, v0.4s, v2.4s \n"
"vtrn.32 q3, q11 \n" "trn1 v18.4s, v1.4s, v3.4s \n"
"vtrn.32 q2, q10 \n" "trn2 v19.4s, v1.4s, v3.4s \n"
"trn1 v20.4s, v4.4s, v6.4s \n"
"trn2 v21.4s, v4.4s, v6.4s \n"
"trn1 v22.4s, v5.4s, v7.4s \n"
"trn2 v23.4s, v5.4s, v7.4s \n"
"vrev16.8 q0, q0 \n" "mov %0, %2 \n"
"vrev16.8 q1, q1 \n"
"vrev16.8 q2, q2 \n" MEMACCESS(0)
"vrev16.8 q3, q3 \n" "st1 {v16.d}[0], [%0], %6 \n"
"vrev16.8 q8, q8 \n" MEMACCESS(0)
"vrev16.8 q9, q9 \n" "st1 {v18.d}[0], [%0], %6 \n"
"vrev16.8 q10, q10 \n" MEMACCESS(0)
"vrev16.8 q11, q11 \n" "st1 {v17.d}[0], [%0], %6 \n"
MEMACCESS(0)
"st1 {v19.d}[0], [%0], %6 \n"
MEMACCESS(0)
"st1 {v16.d}[1], [%0], %6 \n"
MEMACCESS(0)
"st1 {v18.d}[1], [%0], %6 \n"
MEMACCESS(0)
"st1 {v17.d}[1], [%0], %6 \n"
MEMACCESS(0)
"st1 {v19.d}[1], [%0] \n"
"mov %0, %3 \n" "mov %0, %3 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.8 {d2}, [%0], %4 \n" "st1 {v20.d}[0], [%0], %7 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.8 {d0}, [%0], %4 \n" "st1 {v22.d}[0], [%0], %7 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.8 {d6}, [%0], %4 \n" "st1 {v21.d}[0], [%0], %7 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.8 {d4}, [%0], %4 \n" "st1 {v23.d}[0], [%0], %7 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.8 {d18}, [%0], %4 \n" "st1 {v20.d}[1], [%0], %7 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.8 {d16}, [%0], %4 \n" "st1 {v22.d}[1], [%0], %7 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.8 {d22}, [%0], %4 \n" "st1 {v21.d}[1], [%0], %7 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.8 {d20}, [%0] \n" "st1 {v23.d}[1], [%0] \n"
"mov %0, %5 \n" "add %1, %1, #16 \n" // src += 8*2
"add %2, %2, %6, lsl #3 \n" // dst_a += 8 * dst_stride_a
MEMACCESS(0) "add %3, %3, %7, lsl #3 \n" // dst_b += 8 * dst_stride_b
"vst1.8 {d3}, [%0], %6 \n" "subs %4, %4, #8 \n" // w -= 8
MEMACCESS(0) "b.ge 1b \n"
"vst1.8 {d1}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d7}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d5}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d19}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d17}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d23}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d21}, [%0] \n"
"add %1, #8*2 \n" // src += 8*2
"add %3, %3, %4, lsl #3 \n" // dst_a += 8 * dst_stride_a
"add %5, %5, %6, lsl #3 \n" // dst_b += 8 * dst_stride_b
"subs %7, #8 \n" // w -= 8
"bge 1b \n"
// add 8 back to counter. if the result is 0 there are // add 8 back to counter. if the result is 0 there are
// no residuals. // no residuals.
"adds %7, #8 \n" "adds %4, %4, #8 \n"
"beq 4f \n" "b.eq 4f \n"
// some residual, so between 1 and 7 lines left to transpose // some residual, so between 1 and 7 lines left to transpose
"cmp %7, #2 \n" "cmp %4, #2 \n"
"blt 3f \n" "b.lt 3f \n"
"cmp %7, #4 \n" "cmp %4, #4 \n"
"blt 2f \n" "b.lt 2f \n"
// TODO(frkoenig): Clean this up // TODO(frkoenig): Clean this up
// 4x8 block // 4x8 block
"mov %0, %1 \n" "mov %0, %1 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.64 {d0}, [%0], %2 \n" "ld1 {v0.8b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.64 {d1}, [%0], %2 \n" "ld1 {v1.8b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.64 {d2}, [%0], %2 \n" "ld1 {v2.8b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.64 {d3}, [%0], %2 \n" "ld1 {v3.8b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.64 {d4}, [%0], %2 \n" "ld1 {v4.8b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.64 {d5}, [%0], %2 \n" "ld1 {v5.8b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.64 {d6}, [%0], %2 \n" "ld1 {v6.8b}, [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.64 {d7}, [%0] \n" "ld1 {v7.8b}, [%0] \n"
MEMACCESS(8) MEMACCESS(8)
"vld1.8 {q15}, [%8] \n" "ld1 {v30.16b}, [%8], #16 \n"
"ld1 {v31.16b}, [%8] \n"
"vtrn.8 q0, q1 \n" "tbl v16.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v30.16b \n"
"vtrn.8 q2, q3 \n" "tbl v17.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v31.16b \n"
"tbl v18.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v30.16b \n"
"tbl v19.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v31.16b \n"
"vtbl.8 d16, {d0, d1}, d30 \n" "mov %0, %2 \n"
"vtbl.8 d17, {d0, d1}, d31 \n"
"vtbl.8 d18, {d2, d3}, d30 \n" MEMACCESS(0)
"vtbl.8 d19, {d2, d3}, d31 \n" "st1 {v16.s}[0], [%0], %6 \n"
"vtbl.8 d20, {d4, d5}, d30 \n" MEMACCESS(0)
"vtbl.8 d21, {d4, d5}, d31 \n" "st1 {v16.s}[1], [%0], %6 \n"
"vtbl.8 d22, {d6, d7}, d30 \n" MEMACCESS(0)
"vtbl.8 d23, {d6, d7}, d31 \n" "st1 {v16.s}[2], [%0], %6 \n"
MEMACCESS(0)
"st1 {v16.s}[3], [%0], %6 \n"
"add %0, %2, #4 \n"
MEMACCESS(0)
"st1 {v18.s}[0], [%0], %6 \n"
MEMACCESS(0)
"st1 {v18.s}[1], [%0], %6 \n"
MEMACCESS(0)
"st1 {v18.s}[2], [%0], %6 \n"
MEMACCESS(0)
"st1 {v18.s}[3], [%0] \n"
"mov %0, %3 \n" "mov %0, %3 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.32 {d16[0]}, [%0], %4 \n" "st1 {v17.s}[0], [%0], %7 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.32 {d16[1]}, [%0], %4 \n" "st1 {v17.s}[1], [%0], %7 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.32 {d17[0]}, [%0], %4 \n" "st1 {v17.s}[2], [%0], %7 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.32 {d17[1]}, [%0], %4 \n" "st1 {v17.s}[3], [%0], %7 \n"
"add %0, %3, #4 \n" "add %0, %3, #4 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.32 {d20[0]}, [%0], %4 \n" "st1 {v19.s}[0], [%0], %7 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.32 {d20[1]}, [%0], %4 \n" "st1 {v19.s}[1], [%0], %7 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.32 {d21[0]}, [%0], %4 \n" "st1 {v19.s}[2], [%0], %7 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.32 {d21[1]}, [%0] \n" "st1 {v19.s}[3], [%0] \n"
"mov %0, %5 \n" "add %1, %1, #8 \n" // src += 4 * 2
"add %2, %2, %6, lsl #2 \n" // dst_a += 4 * dst_stride_a
MEMACCESS(0) "add %3, %3, %7, lsl #2 \n" // dst_b += 4 * dst_stride_b
"vst1.32 {d18[0]}, [%0], %6 \n" "subs %4, %4, #4 \n" // w -= 4
MEMACCESS(0) "b.eq 4f \n"
"vst1.32 {d18[1]}, [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d19[0]}, [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d19[1]}, [%0], %6 \n"
"add %0, %5, #4 \n"
MEMACCESS(0)
"vst1.32 {d22[0]}, [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d22[1]}, [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d23[0]}, [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d23[1]}, [%0] \n"
"add %1, #4*2 \n" // src += 4 * 2
"add %3, %3, %4, lsl #2 \n" // dst_a += 4 * dst_stride_a
"add %5, %5, %6, lsl #2 \n" // dst_b += 4 * dst_stride_b
"subs %7, #4 \n" // w -= 4
"beq 4f \n"
// some residual, check to see if it includes a 2x8 block, // some residual, check to see if it includes a 2x8 block,
// or less // or less
"cmp %7, #2 \n" "cmp %4, #2 \n"
"blt 3f \n" "b.lt 3f \n"
// 2x8 block // 2x8 block
"2: \n" "2: \n"
"mov %0, %1 \n" "mov %0, %1 \n"
MEMACCESS(0) MEMACCESS(0)
"vld2.16 {d0[0], d2[0]}, [%0], %2 \n" "ld2 {v0.h, v1.h}[0], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld2.16 {d1[0], d3[0]}, [%0], %2 \n" "ld2 {v2.h, v3.h}[0], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld2.16 {d0[1], d2[1]}, [%0], %2 \n" "ld2 {v0.h, v1.h}[1], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld2.16 {d1[1], d3[1]}, [%0], %2 \n" "ld2 {v2.h, v3.h}[1], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld2.16 {d0[2], d2[2]}, [%0], %2 \n" "ld2 {v0.h, v1.h}[2], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld2.16 {d1[2], d3[2]}, [%0], %2 \n" "ld2 {v2.h, v3.h}[2], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld2.16 {d0[3], d2[3]}, [%0], %2 \n" "ld2 {v0.h, v1.h}[3], [%0], %5 \n"
MEMACCESS(0) MEMACCESS(0)
"vld2.16 {d1[3], d3[3]}, [%0] \n" "ld2 {v2.h, v3.h}[3], [%0] \n"
"vtrn.8 d0, d1 \n" "trn1 v4.8b, v0.8b, v2.8b \n"
"vtrn.8 d2, d3 \n" "trn2 v5.8b, v0.8b, v2.8b \n"
"trn1 v6.8b, v1.8b, v3.8b \n"
"trn2 v7.8b, v1.8b, v3.8b \n"
"mov %0, %2 \n"
MEMACCESS(0)
"st1 {v4.d}[0], [%0], %6 \n"
MEMACCESS(0)
"st1 {v6.d}[0], [%0] \n"
"mov %0, %3 \n" "mov %0, %3 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.64 {d0}, [%0], %4 \n" "st1 {v5.d}[0], [%0], %7 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.64 {d2}, [%0] \n" "st1 {v7.d}[0], [%0] \n"
"mov %0, %5 \n" "add %1, %1, #4 \n" // src += 2 * 2
"add %2, %2, %6, lsl #1 \n" // dst_a += 2 * dst_stride_a
MEMACCESS(0) "add %3, %3, %7, lsl #1 \n" // dst_b += 2 * dst_stride_b
"vst1.64 {d1}, [%0], %6 \n" "subs %4, %4, #2 \n" // w -= 2
MEMACCESS(0) "b.eq 4f \n"
"vst1.64 {d3}, [%0] \n"
"add %1, #2*2 \n" // src += 2 * 2
"add %3, %3, %4, lsl #1 \n" // dst_a += 2 * dst_stride_a
"add %5, %5, %6, lsl #1 \n" // dst_b += 2 * dst_stride_b
"subs %7, #2 \n" // w -= 2
"beq 4f \n"
// 1x8 block // 1x8 block
"3: \n" "3: \n"
MEMACCESS(1) MEMACCESS(1)
"vld2.8 {d0[0], d1[0]}, [%1], %2 \n" "ld2 {v0.b, v1.b}[0], [%1], %5 \n"
MEMACCESS(1) MEMACCESS(1)
"vld2.8 {d0[1], d1[1]}, [%1], %2 \n" "ld2 {v0.b, v1.b}[1], [%1], %5 \n"
MEMACCESS(1) MEMACCESS(1)
"vld2.8 {d0[2], d1[2]}, [%1], %2 \n" "ld2 {v0.b, v1.b}[2], [%1], %5 \n"
MEMACCESS(1) MEMACCESS(1)
"vld2.8 {d0[3], d1[3]}, [%1], %2 \n" "ld2 {v0.b, v1.b}[3], [%1], %5 \n"
MEMACCESS(1) MEMACCESS(1)
"vld2.8 {d0[4], d1[4]}, [%1], %2 \n" "ld2 {v0.b, v1.b}[4], [%1], %5 \n"
MEMACCESS(1) MEMACCESS(1)
"vld2.8 {d0[5], d1[5]}, [%1], %2 \n" "ld2 {v0.b, v1.b}[5], [%1], %5 \n"
MEMACCESS(1) MEMACCESS(1)
"vld2.8 {d0[6], d1[6]}, [%1], %2 \n" "ld2 {v0.b, v1.b}[6], [%1], %5 \n"
MEMACCESS(1) MEMACCESS(1)
"vld2.8 {d0[7], d1[7]}, [%1] \n" "ld2 {v0.b, v1.b}[7], [%1] \n"
MEMACCESS(2)
"st1 {v0.d}[0], [%2] \n"
MEMACCESS(3) MEMACCESS(3)
"vst1.64 {d0}, [%3] \n" "st1 {v1.d}[0], [%3] \n"
MEMACCESS(5)
"vst1.64 {d1}, [%5] \n"
"4: \n" "4: \n"
: "+r"(src_temp), // %0 : "+r"(src_temp), // %0
"+r"(src), // %1 "+r"(src), // %1
"+r"(src_stride), // %2 "+r"(dst_a), // %2
"+r"(dst_a), // %3 "+r"(dst_b), // %3
"+r"(dst_stride_a), // %4 "+r"(width64) // %4
"+r"(dst_b), // %5 : "r"(static_cast<ptrdiff_t>(src_stride)), // %5
"+r"(dst_stride_b), // %6 "r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6
"+r"(width) // %7 "r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7
: "r"(&kVTbl4x4TransposeDi) // %8 "r"(&kVTbl4x4TransposeDi) // %8
: "memory", "cc", : "memory", "cc",
"q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11" "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
"v30", "v31"
); );
} }
#endif // HAS_TRANSPOSE_UVWX8_NEON #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#endif // __aarch64__
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -389,7 +389,6 @@ void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"blez $t4, 2f \n" "blez $t4, 2f \n"
" andi %[width], %[width], 0xf \n" // residual " andi %[width], %[width], 0xf \n" // residual
".p2align 2 \n"
"1: \n" "1: \n"
"addiu $t4, $t4, -1 \n" "addiu $t4, $t4, -1 \n"
"lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0 "lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0
@ -447,89 +446,6 @@ void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
); );
} }
void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
uint8* dst_v, int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"srl $t4, %[width], 4 \n" // multiplies of 16
"blez $t4, 2f \n"
" andi %[width], %[width], 0xf \n" // residual
".p2align 2 \n"
"1: \n"
"addiu $t4, $t4, -1 \n"
"lwr $t0, 0(%[src_uv]) \n"
"lwl $t0, 3(%[src_uv]) \n" // V1 | U1 | V0 | U0
"lwr $t1, 4(%[src_uv]) \n"
"lwl $t1, 7(%[src_uv]) \n" // V3 | U3 | V2 | U2
"lwr $t2, 8(%[src_uv]) \n"
"lwl $t2, 11(%[src_uv]) \n" // V5 | U5 | V4 | U4
"lwr $t3, 12(%[src_uv]) \n"
"lwl $t3, 15(%[src_uv]) \n" // V7 | U7 | V6 | U6
"lwr $t5, 16(%[src_uv]) \n"
"lwl $t5, 19(%[src_uv]) \n" // V9 | U9 | V8 | U8
"lwr $t6, 20(%[src_uv]) \n"
"lwl $t6, 23(%[src_uv]) \n" // V11 | U11 | V10 | U10
"lwr $t7, 24(%[src_uv]) \n"
"lwl $t7, 27(%[src_uv]) \n" // V13 | U13 | V12 | U12
"lwr $t8, 28(%[src_uv]) \n"
"lwl $t8, 31(%[src_uv]) \n" // V15 | U15 | V14 | U14
"precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
"precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
"precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
"precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
"precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
"precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
"precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12
"precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12
"addiu %[src_uv], %[src_uv], 32 \n"
"swr $t9, 0(%[dst_v]) \n"
"swl $t9, 3(%[dst_v]) \n"
"swr $t0, 0(%[dst_u]) \n"
"swl $t0, 3(%[dst_u]) \n"
"swr $t1, 4(%[dst_v]) \n"
"swl $t1, 7(%[dst_v]) \n"
"swr $t2, 4(%[dst_u]) \n"
"swl $t2, 7(%[dst_u]) \n"
"swr $t3, 8(%[dst_v]) \n"
"swl $t3, 11(%[dst_v]) \n"
"swr $t5, 8(%[dst_u]) \n"
"swl $t5, 11(%[dst_u]) \n"
"swr $t6, 12(%[dst_v]) \n"
"swl $t6, 15(%[dst_v]) \n"
"swr $t7, 12(%[dst_u]) \n"
"swl $t7, 15(%[dst_u]) \n"
"addiu %[dst_u], %[dst_u], 16 \n"
"bgtz $t4, 1b \n"
" addiu %[dst_v], %[dst_v], 16 \n"
"beqz %[width], 3f \n"
" nop \n"
"2: \n"
"lbu $t0, 0(%[src_uv]) \n"
"lbu $t1, 1(%[src_uv]) \n"
"addiu %[src_uv], %[src_uv], 2 \n"
"addiu %[width], %[width], -1 \n"
"sb $t0, 0(%[dst_u]) \n"
"sb $t1, 0(%[dst_v]) \n"
"addiu %[dst_u], %[dst_u], 1 \n"
"bgtz %[width], 2b \n"
" addiu %[dst_v], %[dst_v], 1 \n"
"3: \n"
".set pop \n"
: [src_uv] "+r" (src_uv),
[width] "+r" (width),
[dst_u] "+r" (dst_u),
[dst_v] "+r" (dst_v)
:
: "t0", "t1", "t2", "t3",
"t4", "t5", "t6", "t7", "t8", "t9"
);
}
void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) { void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
__asm__ __volatile__ ( __asm__ __volatile__ (
".set push \n" ".set push \n"
@ -540,7 +456,6 @@ void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
"blez $t4, 2f \n" "blez $t4, 2f \n"
" addu %[src], %[src], %[width] \n" // src += width " addu %[src], %[src], %[width] \n" // src += width
".p2align 2 \n"
"1: \n" "1: \n"
"lw $t0, -16(%[src]) \n" // |3|2|1|0| "lw $t0, -16(%[src]) \n" // |3|2|1|0|
"lw $t1, -12(%[src]) \n" // |7|6|5|4| "lw $t1, -12(%[src]) \n" // |7|6|5|4|
@ -595,7 +510,6 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"blez %[x], 2f \n" "blez %[x], 2f \n"
" addu %[src_uv], %[src_uv], $t4 \n" " addu %[src_uv], %[src_uv], $t4 \n"
".p2align 2 \n"
"1: \n" "1: \n"
"lw $t0, -32(%[src_uv]) \n" // |3|2|1|0| "lw $t0, -32(%[src_uv]) \n" // |3|2|1|0|
"lw $t1, -28(%[src_uv]) \n" // |7|6|5|4| "lw $t1, -28(%[src_uv]) \n" // |7|6|5|4|
@ -679,7 +593,7 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
// t8 = | 0 | G1 | 0 | g1 | // t8 = | 0 | G1 | 0 | g1 |
// t2 = | 0 | R0 | 0 | r0 | // t2 = | 0 | R0 | 0 | r0 |
// t1 = | 0 | R1 | 0 | r1 | // t1 = | 0 | R1 | 0 | r1 |
#define I422ToTransientMipsRGB \ #define YUVTORGB \
"lw $t0, 0(%[y_buf]) \n" \ "lw $t0, 0(%[y_buf]) \n" \
"lhu $t1, 0(%[u_buf]) \n" \ "lhu $t1, 0(%[u_buf]) \n" \
"lhu $t2, 0(%[v_buf]) \n" \ "lhu $t2, 0(%[v_buf]) \n" \
@ -738,10 +652,12 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"addu.ph $t2, $t2, $s5 \n" \ "addu.ph $t2, $t2, $s5 \n" \
"addu.ph $t1, $t1, $s5 \n" "addu.ph $t1, $t1, $s5 \n"
// TODO(fbarchard): accept yuv conversion constants.
void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf, void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) { int width) {
__asm__ __volatile__ ( __asm__ __volatile__ (
".set push \n" ".set push \n"
@ -756,9 +672,8 @@ void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
"lui $s6, 0xff00 \n" "lui $s6, 0xff00 \n"
"ori $s6, 0xff00 \n" // |ff|00|ff|00|ff| "ori $s6, 0xff00 \n" // |ff|00|ff|00|ff|
".p2align 2 \n"
"1: \n" "1: \n"
I422ToTransientMipsRGB YUVTORGB
// Arranging into argb format // Arranging into argb format
"precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1| "precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1|
"precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0| "precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0|
@ -800,134 +715,8 @@ void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
); );
} }
void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"beqz %[width], 2f \n"
" repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
"repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
"repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
"repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
"repl.ph $s4, 16 \n" // |0|16|0|16|
"repl.ph $s5, 128 \n" // |128|128|
"lui $s6, 0xff00 \n"
"ori $s6, 0xff00 \n" // |ff|00|ff|00|
".p2align 2 \n"
"1: \n"
I422ToTransientMipsRGB
// Arranging into abgr format
"precr.qb.ph $t0, $t8, $t1 \n" // |G1|g1|R1|r1|
"precr.qb.ph $t3, $t9, $t2 \n" // |G0|g0|R0|r0|
"precrq.qb.ph $t8, $t0, $t3 \n" // |G1|R1|G0|R0|
"precr.qb.ph $t9, $t0, $t3 \n" // |g1|r1|g0|r0|
"precr.qb.ph $t2, $t4, $t5 \n" // |B1|b1|B0|b0|
"addiu %[width], -4 \n"
"addiu %[y_buf], 4 \n"
"preceu.ph.qbla $t1, $t2 \n" // |0 |B1|0 |B0|
"preceu.ph.qbra $t2, $t2 \n" // |0 |b1|0 |b0|
"or $t1, $t1, $s6 \n" // |ff|B1|ff|B0|
"or $t2, $t2, $s6 \n" // |ff|b1|ff|b0|
"precrq.ph.w $t0, $t2, $t9 \n" // |ff|b1|g1|r1|
"precrq.ph.w $t3, $t1, $t8 \n" // |ff|B1|G1|R1|
"sll $t9, $t9, 16 \n"
"sll $t8, $t8, 16 \n"
"packrl.ph $t2, $t2, $t9 \n" // |ff|b0|g0|r0|
"packrl.ph $t1, $t1, $t8 \n" // |ff|B0|G0|R0|
// Store results.
"sw $t2, 0(%[rgb_buf]) \n"
"sw $t0, 4(%[rgb_buf]) \n"
"sw $t1, 8(%[rgb_buf]) \n"
"sw $t3, 12(%[rgb_buf]) \n"
"bnez %[width], 1b \n"
" addiu %[rgb_buf], 16 \n"
"2: \n"
".set pop \n"
:[y_buf] "+r" (y_buf),
[u_buf] "+r" (u_buf),
[v_buf] "+r" (v_buf),
[width] "+r" (width),
[rgb_buf] "+r" (rgb_buf)
:
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9",
"s0", "s1", "s2", "s3",
"s4", "s5", "s6"
);
}
void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"beqz %[width], 2f \n"
" repl.ph $s0, 74 \n" // |YG|YG| = |74 |74 |
"repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
"repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
"repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
"repl.ph $s4, 16 \n" // |0|16|0|16|
"repl.ph $s5, 128 \n" // |128|128|
"lui $s6, 0xff \n"
"ori $s6, 0xff \n" // |00|ff|00|ff|
".p2align 2 \n"
"1: \n"
I422ToTransientMipsRGB
// Arranging into bgra format
"precr.qb.ph $t4, $t4, $t8 \n" // |B1|b1|G1|g1|
"precr.qb.ph $t5, $t5, $t9 \n" // |B0|b0|G0|g0|
"precrq.qb.ph $t8, $t4, $t5 \n" // |B1|G1|B0|G0|
"precr.qb.ph $t9, $t4, $t5 \n" // |b1|g1|b0|g0|
"precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
"addiu %[width], -4 \n"
"addiu %[y_buf], 4 \n"
"preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
"preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
"sll $t1, $t1, 8 \n" // |R1|0 |R0|0 |
"sll $t2, $t2, 8 \n" // |r1|0 |r0|0 |
"or $t1, $t1, $s6 \n" // |R1|ff|R0|ff|
"or $t2, $t2, $s6 \n" // |r1|ff|r0|ff|
"precrq.ph.w $t0, $t9, $t2 \n" // |b1|g1|r1|ff|
"precrq.ph.w $t3, $t8, $t1 \n" // |B1|G1|R1|ff|
"sll $t1, $t1, 16 \n"
"sll $t2, $t2, 16 \n"
"packrl.ph $t2, $t9, $t2 \n" // |b0|g0|r0|ff|
"packrl.ph $t1, $t8, $t1 \n" // |B0|G0|R0|ff|
// Store results.
"sw $t2, 0(%[rgb_buf]) \n"
"sw $t0, 4(%[rgb_buf]) \n"
"sw $t1, 8(%[rgb_buf]) \n"
"sw $t3, 12(%[rgb_buf]) \n"
"bnez %[width], 1b \n"
" addiu %[rgb_buf], 16 \n"
"2: \n"
".set pop \n"
:[y_buf] "+r" (y_buf),
[u_buf] "+r" (u_buf),
[v_buf] "+r" (v_buf),
[width] "+r" (width),
[rgb_buf] "+r" (rgb_buf)
:
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9",
"s0", "s1", "s2", "s3",
"s4", "s5", "s6"
);
}
// Bilinear filter 8x2 -> 8x1 // Bilinear filter 8x2 -> 8x1
void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr, void InterpolateRow_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width, ptrdiff_t src_stride, int dst_width,
int source_y_fraction) { int source_y_fraction) {
int y0_fraction = 256 - source_y_fraction; int y0_fraction = 256 - source_y_fraction;
@ -940,7 +729,6 @@ void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
"replv.ph $t0, %[y0_fraction] \n" "replv.ph $t0, %[y0_fraction] \n"
"replv.ph $t1, %[source_y_fraction] \n" "replv.ph $t1, %[source_y_fraction] \n"
".p2align 2 \n"
"1: \n" "1: \n"
"lw $t2, 0(%[src_ptr]) \n" "lw $t2, 0(%[src_ptr]) \n"
"lw $t3, 0(%[src_ptr1]) \n" "lw $t3, 0(%[src_ptr1]) \n"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,146 +0,0 @@
;
; Copyright 2012 The LibYuv Project Authors. All rights reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%ifdef __YASM_VERSION_ID__
%if __YASM_VERSION_ID__ < 01020000h
%error AVX2 is supported only by yasm 1.2.0 or later.
%endif
%endif
%include "x86inc.asm"
SECTION .text
; cglobal numeric constants are parameters, gpr regs, mm regs
; void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix)
%macro YUY2TOYROW 2-3
cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix
%ifidn %1,YUY2
pcmpeqb m2, m2, m2 ; generate mask 0x00ff00ff
psrlw m2, m2, 8
%endif
ALIGN 4
.convertloop:
mov%2 m0, [src_yuy2q]
mov%2 m1, [src_yuy2q + mmsize]
lea src_yuy2q, [src_yuy2q + mmsize * 2]
%ifidn %1,YUY2
pand m0, m0, m2 ; YUY2 even bytes are Y
pand m1, m1, m2
%else
psrlw m0, m0, 8 ; UYVY odd bytes are Y
psrlw m1, m1, 8
%endif
packuswb m0, m0, m1
%if cpuflag(AVX2)
vpermq m0, m0, 0xd8
%endif
sub pixd, mmsize
mov%2 [dst_yq], m0
lea dst_yq, [dst_yq + mmsize]
jg .convertloop
REP_RET
%endmacro
; TODO(fbarchard): Remove MMX. Add SSSE3 pshufb version.
INIT_MMX MMX
YUY2TOYROW YUY2,a,
YUY2TOYROW YUY2,u,_Unaligned
YUY2TOYROW UYVY,a,
YUY2TOYROW UYVY,u,_Unaligned
INIT_XMM SSE2
YUY2TOYROW YUY2,a,
YUY2TOYROW YUY2,u,_Unaligned
YUY2TOYROW UYVY,a,
YUY2TOYROW UYVY,u,_Unaligned
INIT_YMM AVX2
YUY2TOYROW YUY2,a,
YUY2TOYROW UYVY,a,
; void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix)
%macro SplitUVRow 1-2
cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix
pcmpeqb m4, m4, m4 ; generate mask 0x00ff00ff
psrlw m4, m4, 8
sub dst_vq, dst_uq
ALIGN 4
.convertloop:
mov%1 m0, [src_uvq]
mov%1 m1, [src_uvq + mmsize]
lea src_uvq, [src_uvq + mmsize * 2]
psrlw m2, m0, 8 ; odd bytes
psrlw m3, m1, 8
pand m0, m0, m4 ; even bytes
pand m1, m1, m4
packuswb m0, m0, m1
packuswb m2, m2, m3
%if cpuflag(AVX2)
vpermq m0, m0, 0xd8
vpermq m2, m2, 0xd8
%endif
mov%1 [dst_uq], m0
mov%1 [dst_uq + dst_vq], m2
lea dst_uq, [dst_uq + mmsize]
sub pixd, mmsize
jg .convertloop
REP_RET
%endmacro
INIT_MMX MMX
SplitUVRow a,
SplitUVRow u,_Unaligned
INIT_XMM SSE2
SplitUVRow a,
SplitUVRow u,_Unaligned
INIT_YMM AVX2
SplitUVRow a,
; void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
; int width);
%macro MergeUVRow_ 1-2
cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix
sub src_vq, src_uq
ALIGN 4
.convertloop:
mov%1 m0, [src_uq]
mov%1 m1, [src_vq]
lea src_uq, [src_uq + mmsize]
punpcklbw m2, m0, m1 // first 8 UV pairs
punpckhbw m0, m0, m1 // next 8 UV pairs
%if cpuflag(AVX2)
vperm2i128 m1, m2, m0, 0x20 // low 128 of ymm2 and low 128 of ymm0
vperm2i128 m2, m2, m0, 0x31 // high 128 of ymm2 and high 128 of ymm0
mov%1 [dst_uvq], m1
mov%1 [dst_uvq + mmsize], m2
%else
mov%1 [dst_uvq], m2
mov%1 [dst_uvq + mmsize], m0
%endif
lea dst_uvq, [dst_uvq + mmsize * 2]
sub pixd, mmsize
jg .convertloop
REP_RET
%endmacro
INIT_MMX MMX
MergeUVRow_ a,
MergeUVRow_ u,_Unaligned
INIT_XMM SSE2
MergeUVRow_ a,
MergeUVRow_ u,_Unaligned
INIT_YMM AVX2
MergeUVRow_ a,

View File

@ -23,9 +23,6 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
// Remove this macro if OVERREAD is safe.
#define AVOID_OVERREAD 1
static __inline int Abs(int v) { static __inline int Abs(int v) {
return v >= 0 ? v : -v; return v >= 0 ? v : -v;
} }
@ -45,8 +42,7 @@ static void ScalePlaneDown2(int src_width, int src_height,
void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) = uint8* dst_ptr, int dst_width) =
filtering == kFilterNone ? ScaleRowDown2_C : filtering == kFilterNone ? ScaleRowDown2_C :
(filtering == kFilterLinear ? ScaleRowDown2Linear_C : (filtering == kFilterLinear ? ScaleRowDown2Linear_C : ScaleRowDown2Box_C);
ScaleRowDown2Box_C);
int row_stride = src_stride << 1; int row_stride = src_stride << 1;
if (!filtering) { if (!filtering) {
src_ptr += src_stride; // Point to odd rows. src_ptr += src_stride; // Point to odd rows.
@ -54,23 +50,42 @@ static void ScalePlaneDown2(int src_width, int src_height,
} }
#if defined(HAS_SCALEROWDOWN2_NEON) #if defined(HAS_SCALEROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) { if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON; ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_NEON :
} (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON :
#elif defined(HAS_SCALEROWDOWN2_SSE2) ScaleRowDown2Box_Any_NEON);
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) { if (IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 : ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON :
(filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 : (filtering == kFilterLinear ? ScaleRowDown2Linear_NEON :
ScaleRowDown2Box_Unaligned_SSE2); ScaleRowDown2Box_NEON);
if (IS_ALIGNED(src_ptr, 16) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
ScaleRowDown2Box_SSE2);
} }
} }
#elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2) #endif
#if defined(HAS_SCALEROWDOWN2_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_SSSE3 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3 :
ScaleRowDown2Box_Any_SSSE3);
if (IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSSE3 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3 :
ScaleRowDown2Box_SSSE3);
}
}
#endif
#if defined(HAS_SCALEROWDOWN2_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_AVX2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2 :
ScaleRowDown2Box_Any_AVX2);
if (IS_ALIGNED(dst_width, 32)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_AVX2 :
ScaleRowDown2Box_AVX2);
}
}
#endif
#if defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) && if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
@ -112,21 +127,15 @@ static void ScalePlaneDown2_16(int src_width, int src_height,
ScaleRowDown2 = filtering ? ScaleRowDown2Box_16_NEON : ScaleRowDown2 = filtering ? ScaleRowDown2Box_16_NEON :
ScaleRowDown2_16_NEON; ScaleRowDown2_16_NEON;
} }
#elif defined(HAS_SCALEROWDOWN2_16_SSE2) #endif
#if defined(HAS_SCALEROWDOWN2_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) { if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering == kFilterNone ?
ScaleRowDown2_Unaligned_16_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_16_SSE2 :
ScaleRowDown2Box_Unaligned_16_SSE2);
if (IS_ALIGNED(src_ptr, 16) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 : ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 :
ScaleRowDown2Box_16_SSE2); ScaleRowDown2Box_16_SSE2);
} }
} #endif
#elif defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2) #if defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) && if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
@ -165,16 +174,33 @@ static void ScalePlaneDown4(int src_width, int src_height,
src_stride = 0; src_stride = 0;
} }
#if defined(HAS_SCALEROWDOWN4_NEON) #if defined(HAS_SCALEROWDOWN4_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) { if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowDown4 = filtering ?
ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON; ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
} }
#elif defined(HAS_SCALEROWDOWN4_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
} }
#elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2) #endif
#if defined(HAS_SCALEROWDOWN4_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowDown4 = filtering ?
ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3;
if (IS_ALIGNED(dst_width, 8)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3;
}
}
#endif
#if defined(HAS_SCALEROWDOWN4_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowDown4 = filtering ?
ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
if (IS_ALIGNED(dst_width, 16)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
}
}
#endif
#if defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) && if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
@ -212,14 +238,14 @@ static void ScalePlaneDown4_16(int src_width, int src_height,
ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_NEON : ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_NEON :
ScaleRowDown4_16_NEON; ScaleRowDown4_16_NEON;
} }
#elif defined(HAS_SCALEROWDOWN4_16_SSE2) #endif
if (TestCpuFlag(kCpuHasSSE2) && #if defined(HAS_SCALEROWDOWN4_16_SSE2)
IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 :
ScaleRowDown4_16_SSE2; ScaleRowDown4_16_SSE2;
} }
#elif defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2) #endif
#if defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) && if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
@ -260,7 +286,15 @@ static void ScalePlaneDown34(int src_width, int src_height,
ScaleRowDown34_1 = ScaleRowDown34_1_Box_C; ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
} }
#if defined(HAS_SCALEROWDOWN34_NEON) #if defined(HAS_SCALEROWDOWN34_NEON)
if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) { if (TestCpuFlag(kCpuHasNEON)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
}
if (dst_width % 24 == 0) {
if (!filtering) { if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_NEON; ScaleRowDown34_0 = ScaleRowDown34_NEON;
ScaleRowDown34_1 = ScaleRowDown34_NEON; ScaleRowDown34_1 = ScaleRowDown34_NEON;
@ -269,10 +303,18 @@ static void ScalePlaneDown34(int src_width, int src_height,
ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON; ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
} }
} }
}
#endif #endif
#if defined(HAS_SCALEROWDOWN34_SSSE3) #if defined(HAS_SCALEROWDOWN34_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && if (TestCpuFlag(kCpuHasSSSE3)) {
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
}
if (dst_width % 24 == 0) {
if (!filtering) { if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_SSSE3; ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_SSSE3; ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
@ -281,6 +323,7 @@ static void ScalePlaneDown34(int src_width, int src_height,
ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3; ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2) #if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) && if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
@ -351,8 +394,7 @@ static void ScalePlaneDown34_16(int src_width, int src_height,
} }
#endif #endif
#if defined(HAS_SCALEROWDOWN34_16_SSSE3) #if defined(HAS_SCALEROWDOWN34_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
if (!filtering) { if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3; ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3; ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
@ -435,8 +477,17 @@ static void ScalePlaneDown38(int src_width, int src_height,
ScaleRowDown38_3 = ScaleRowDown38_3_Box_C; ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_C; ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
} }
#if defined(HAS_SCALEROWDOWN38_NEON) #if defined(HAS_SCALEROWDOWN38_NEON)
if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) { if (TestCpuFlag(kCpuHasNEON)) {
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
}
if (dst_width % 12 == 0) {
if (!filtering) { if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_NEON; ScaleRowDown38_3 = ScaleRowDown38_NEON;
ScaleRowDown38_2 = ScaleRowDown38_NEON; ScaleRowDown38_2 = ScaleRowDown38_NEON;
@ -445,18 +496,28 @@ static void ScalePlaneDown38(int src_width, int src_height,
ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON; ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
} }
} }
#elif defined(HAS_SCALEROWDOWN38_SSSE3) }
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && #endif
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { #if defined(HAS_SCALEROWDOWN38_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (!filtering) { if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
}
if (dst_width % 12 == 0 && !filtering) {
ScaleRowDown38_3 = ScaleRowDown38_SSSE3; ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_SSSE3; ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
} else { }
if (dst_width % 6 == 0 && filtering) {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3; ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3; ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
} }
} }
#elif defined(HAS_SCALEROWDOWN38_MIPS_DSPR2) #endif
#if defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) && if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
@ -522,9 +583,9 @@ static void ScalePlaneDown38_16(int src_width, int src_height,
ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON; ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
} }
} }
#elif defined(HAS_SCALEROWDOWN38_16_SSSE3) #endif
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && #if defined(HAS_SCALEROWDOWN38_16_SSSE3)
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
if (!filtering) { if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3; ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3; ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
@ -533,7 +594,8 @@ static void ScalePlaneDown38_16(int src_width, int src_height,
ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3; ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
} }
} }
#elif defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2) #endif
#if defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) && if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
@ -570,65 +632,7 @@ static void ScalePlaneDown38_16(int src_width, int src_height,
} }
} }
static __inline uint32 SumBox(int iboxwidth, int iboxheight, #define MIN1(x) ((x) < 1 ? 1 : (x))
ptrdiff_t src_stride, const uint8* src_ptr) {
uint32 sum = 0u;
int y;
assert(iboxwidth > 0);
assert(iboxheight > 0);
for (y = 0; y < iboxheight; ++y) {
int x;
for (x = 0; x < iboxwidth; ++x) {
sum += src_ptr[x];
}
src_ptr += src_stride;
}
return sum;
}
static __inline uint32 SumBox_16(int iboxwidth, int iboxheight,
ptrdiff_t src_stride, const uint16* src_ptr) {
uint32 sum = 0u;
int y;
assert(iboxwidth > 0);
assert(iboxheight > 0);
for (y = 0; y < iboxheight; ++y) {
int x;
for (x = 0; x < iboxwidth; ++x) {
sum += src_ptr[x];
}
src_ptr += src_stride;
}
return sum;
}
static void ScalePlaneBoxRow_C(int dst_width, int boxheight,
int x, int dx, ptrdiff_t src_stride,
const uint8* src_ptr, uint8* dst_ptr) {
int i;
int boxwidth;
for (i = 0; i < dst_width; ++i) {
int ix = x >> 16;
x += dx;
boxwidth = (x >> 16) - ix;
*dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) /
(boxwidth * boxheight);
}
}
static void ScalePlaneBoxRow_16_C(int dst_width, int boxheight,
int x, int dx, ptrdiff_t src_stride,
const uint16* src_ptr, uint16* dst_ptr) {
int i;
int boxwidth;
for (i = 0; i < dst_width; ++i) {
int ix = x >> 16;
x += dx;
boxwidth = (x >> 16) - ix;
*dst_ptr++ = SumBox_16(boxwidth, boxheight, src_stride, src_ptr + ix) /
(boxwidth * boxheight);
}
}
static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) { static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
uint32 sum = 0u; uint32 sum = 0u;
@ -654,15 +658,15 @@ static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr) { const uint16* src_ptr, uint8* dst_ptr) {
int i; int i;
int scaletbl[2]; int scaletbl[2];
int minboxwidth = (dx >> 16); int minboxwidth = dx >> 16;
int* scaleptr = scaletbl - minboxwidth; int* scaleptr = scaletbl - minboxwidth;
int boxwidth; int boxwidth;
scaletbl[0] = 65536 / (minboxwidth * boxheight); scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight); scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
for (i = 0; i < dst_width; ++i) { for (i = 0; i < dst_width; ++i) {
int ix = x >> 16; int ix = x >> 16;
x += dx; x += dx;
boxwidth = (x >> 16) - ix; boxwidth = MIN1((x >> 16) - ix);
*dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16; *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
} }
} }
@ -671,25 +675,36 @@ static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx,
const uint32* src_ptr, uint16* dst_ptr) { const uint32* src_ptr, uint16* dst_ptr) {
int i; int i;
int scaletbl[2]; int scaletbl[2];
int minboxwidth = (dx >> 16); int minboxwidth = dx >> 16;
int* scaleptr = scaletbl - minboxwidth; int* scaleptr = scaletbl - minboxwidth;
int boxwidth; int boxwidth;
scaletbl[0] = 65536 / (minboxwidth * boxheight); scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight); scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
for (i = 0; i < dst_width; ++i) { for (i = 0; i < dst_width; ++i) {
int ix = x >> 16; int ix = x >> 16;
x += dx; x += dx;
boxwidth = (x >> 16) - ix; boxwidth = MIN1((x >> 16) - ix);
*dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) * *dst_ptr++ =
scaleptr[boxwidth] >> 16; SumPixels_16(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
}
}
static void ScaleAddCols0_C(int dst_width, int boxheight, int x, int,
const uint16* src_ptr, uint8* dst_ptr) {
int scaleval = 65536 / boxheight;
int i;
src_ptr += (x >> 16);
for (i = 0; i < dst_width; ++i) {
*dst_ptr++ = src_ptr[i] * scaleval >> 16;
} }
} }
static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx, static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr) { const uint16* src_ptr, uint8* dst_ptr) {
int boxwidth = (dx >> 16); int boxwidth = MIN1(dx >> 16);
int scaleval = 65536 / (boxwidth * boxheight); int scaleval = 65536 / (boxwidth * boxheight);
int i; int i;
x >>= 16;
for (i = 0; i < dst_width; ++i) { for (i = 0; i < dst_width; ++i) {
*dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16; *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
x += boxwidth; x += boxwidth;
@ -698,7 +713,7 @@ static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx, static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx,
const uint32* src_ptr, uint16* dst_ptr) { const uint32* src_ptr, uint16* dst_ptr) {
int boxwidth = (dx >> 16); int boxwidth = MIN1(dx >> 16);
int scaleval = 65536 / (boxwidth * boxheight); int scaleval = 65536 / (boxwidth * boxheight);
int i; int i;
for (i = 0; i < dst_width; ++i) { for (i = 0; i < dst_width; ++i) {
@ -718,7 +733,7 @@ static void ScalePlaneBox(int src_width, int src_height,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_stride, int dst_stride, int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr) { const uint8* src_ptr, uint8* dst_ptr) {
int j; int j, k;
// Initial source x/y coordinate and step values as 16.16 fixed point. // Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0; int x = 0;
int y = 0; int y = 0;
@ -728,10 +743,40 @@ static void ScalePlaneBox(int src_width, int src_height,
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
&x, &y, &dx, &dy); &x, &y, &dx, &dy);
src_width = Abs(src_width); src_width = Abs(src_width);
// TODO(fbarchard): Remove this and make AddRows handle boxheight 1. {
if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) { // Allocate a row buffer of uint16.
uint8* dst = dst_ptr; align_buffer_64(row16, src_width * 2);
int j; void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_C:
((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
void (*ScaleAddRow)(const uint8* src_ptr, uint16* dst_ptr, int src_width) =
ScaleAddRow_C;
#if defined(HAS_SCALEADDROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleAddRow = ScaleAddRow_Any_SSE2;
if (IS_ALIGNED(src_width, 16)) {
ScaleAddRow = ScaleAddRow_SSE2;
}
}
#endif
#if defined(HAS_SCALEADDROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleAddRow = ScaleAddRow_Any_AVX2;
if (IS_ALIGNED(src_width, 32)) {
ScaleAddRow = ScaleAddRow_AVX2;
}
}
#endif
#if defined(HAS_SCALEADDROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ScaleAddRow = ScaleAddRow_Any_NEON;
if (IS_ALIGNED(src_width, 16)) {
ScaleAddRow = ScaleAddRow_NEON;
}
}
#endif
for (j = 0; j < dst_height; ++j) { for (j = 0; j < dst_height; ++j) {
int boxheight; int boxheight;
int iy = y >> 16; int iy = y >> 16;
@ -740,46 +785,13 @@ static void ScalePlaneBox(int src_width, int src_height,
if (y > max_y) { if (y > max_y) {
y = max_y; y = max_y;
} }
boxheight = (y >> 16) - iy; boxheight = MIN1((y >> 16) - iy);
ScalePlaneBoxRow_C(dst_width, boxheight, memset(row16, 0, src_width * 2);
x, dx, src_stride, for (k = 0; k < boxheight; ++k) {
src, dst); ScaleAddRow(src, (uint16 *)(row16), src_width);
dst += dst_stride; src += src_stride;
} }
return; ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), dst_ptr);
}
{
// Allocate a row buffer of uint16.
align_buffer_64(row16, src_width * 2);
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C;
void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;
#if defined(HAS_SCALEADDROWS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
#ifdef AVOID_OVERREAD
IS_ALIGNED(src_width, 16) &&
#endif
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleAddRows = ScaleAddRows_SSE2;
}
#endif
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
const uint8* src = src_ptr + iy * src_stride;
y += dy;
if (y > (src_height << 16)) {
y = (src_height << 16);
}
boxheight = (y >> 16) - iy;
ScaleAddRows(src, src_stride, (uint16*)(row16),
src_width, boxheight);
ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16),
dst_ptr);
dst_ptr += dst_stride; dst_ptr += dst_stride;
} }
free_aligned_buffer_64(row16); free_aligned_buffer_64(row16);
@ -790,7 +802,7 @@ static void ScalePlaneBox_16(int src_width, int src_height,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_stride, int dst_stride, int src_stride, int dst_stride,
const uint16* src_ptr, uint16* dst_ptr) { const uint16* src_ptr, uint16* dst_ptr) {
int j; int j, k;
// Initial source x/y coordinate and step values as 16.16 fixed point. // Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0; int x = 0;
int y = 0; int y = 0;
@ -800,10 +812,21 @@ static void ScalePlaneBox_16(int src_width, int src_height,
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
&x, &y, &dx, &dy); &x, &y, &dx, &dy);
src_width = Abs(src_width); src_width = Abs(src_width);
// TODO(fbarchard): Remove this and make AddRows handle boxheight 1. {
if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) { // Allocate a row buffer of uint32.
uint16* dst = dst_ptr; align_buffer_64(row32, src_width * 4);
int j; void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint32* src_ptr, uint16* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;
void (*ScaleAddRow)(const uint16* src_ptr, uint32* dst_ptr, int src_width) =
ScaleAddRow_16_C;
#if defined(HAS_SCALEADDROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
ScaleAddRow = ScaleAddRow_16_SSE2;
}
#endif
for (j = 0; j < dst_height; ++j) { for (j = 0; j < dst_height; ++j) {
int boxheight; int boxheight;
int iy = y >> 16; int iy = y >> 16;
@ -812,46 +835,13 @@ static void ScalePlaneBox_16(int src_width, int src_height,
if (y > max_y) { if (y > max_y) {
y = max_y; y = max_y;
} }
boxheight = (y >> 16) - iy; boxheight = MIN1((y >> 16) - iy);
ScalePlaneBoxRow_16_C(dst_width, boxheight, memset(row32, 0, src_width * 4);
x, dx, src_stride, for (k = 0; k < boxheight; ++k) {
src, dst); ScaleAddRow(src, (uint32 *)(row32), src_width);
dst += dst_stride; src += src_stride;
} }
return; ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), dst_ptr);
}
{
// Allocate a row buffer of uint32.
align_buffer_64(row32, src_width * 4);
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint32* src_ptr, uint16* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;
void (*ScaleAddRows)(const uint16* src_ptr, ptrdiff_t src_stride,
uint32* dst_ptr, int src_width, int src_height) = ScaleAddRows_16_C;
#if defined(HAS_SCALEADDROWS_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
#ifdef AVOID_OVERREAD
IS_ALIGNED(src_width, 16) &&
#endif
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleAddRows = ScaleAddRows_16_SSE2;
}
#endif
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
const uint16* src = src_ptr + iy * src_stride;
y += dy;
if (y > (src_height << 16)) {
y = (src_height << 16);
}
boxheight = (y >> 16) - iy;
ScaleAddRows(src, src_stride, (uint32*)(row32),
src_width, boxheight);
ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32),
dst_ptr);
dst_ptr += dst_stride; dst_ptr += dst_stride;
} }
free_aligned_buffer_64(row32); free_aligned_buffer_64(row32);
@ -885,30 +875,16 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
&x, &y, &dx, &dy); &x, &y, &dx, &dy);
src_width = Abs(src_width); src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3) #if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3; InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(src_width, 16)) { if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3; InterpolateRow = InterpolateRow_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_INTERPOLATEROW_AVX2) #if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) { if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2; InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(src_width, 32)) { if (IS_ALIGNED(src_width, 32)) {
InterpolateRow = InterpolateRow_AVX2; InterpolateRow = InterpolateRow_AVX2;
@ -916,7 +892,7 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
} }
#endif #endif
#if defined(HAS_INTERPOLATEROW_NEON) #if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) { if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON; InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(src_width, 16)) { if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_NEON; InterpolateRow = InterpolateRow_NEON;
@ -924,7 +900,7 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
} }
#endif #endif
#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2) #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) { if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
if (IS_ALIGNED(src_width, 4)) { if (IS_ALIGNED(src_width, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2; InterpolateRow = InterpolateRow_MIPS_DSPR2;
@ -937,6 +913,14 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_SSSE3; ScaleFilterCols = ScaleFilterCols_SSSE3;
} }
#endif
#if defined(HAS_SCALEFILTERCOLS_NEON)
if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleFilterCols_NEON;
}
}
#endif #endif
if (y > max_y) { if (y > max_y) {
y = max_y; y = max_y;
@ -988,29 +972,23 @@ void ScalePlaneBilinearDown_16(int src_width, int src_height,
src_width = Abs(src_width); src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_16_SSE2) #if defined(HAS_INTERPOLATEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) { if (TestCpuFlag(kCpuHasSSE2)) {
InterpolateRow = InterpolateRow_Any_16_SSE2; InterpolateRow = InterpolateRow_Any_16_SSE2;
if (IS_ALIGNED(src_width, 16)) { if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSE2; InterpolateRow = InterpolateRow_16_SSE2;
} }
} }
}
#endif #endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3) #if defined(HAS_INTERPOLATEROW_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_16_SSSE3; InterpolateRow = InterpolateRow_Any_16_SSSE3;
if (IS_ALIGNED(src_width, 16)) { if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSSE3; InterpolateRow = InterpolateRow_16_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_INTERPOLATEROW_16_AVX2) #if defined(HAS_INTERPOLATEROW_16_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) { if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_16_AVX2; InterpolateRow = InterpolateRow_Any_16_AVX2;
if (IS_ALIGNED(src_width, 32)) { if (IS_ALIGNED(src_width, 32)) {
InterpolateRow = InterpolateRow_16_AVX2; InterpolateRow = InterpolateRow_16_AVX2;
@ -1018,7 +996,7 @@ void ScalePlaneBilinearDown_16(int src_width, int src_height,
} }
#endif #endif
#if defined(HAS_INTERPOLATEROW_16_NEON) #if defined(HAS_INTERPOLATEROW_16_NEON)
if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) { if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_16_NEON; InterpolateRow = InterpolateRow_Any_16_NEON;
if (IS_ALIGNED(src_width, 16)) { if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_16_NEON; InterpolateRow = InterpolateRow_16_NEON;
@ -1026,7 +1004,7 @@ void ScalePlaneBilinearDown_16(int src_width, int src_height,
} }
#endif #endif
#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2) #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) { if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2; InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
if (IS_ALIGNED(src_width, 4)) { if (IS_ALIGNED(src_width, 4)) {
InterpolateRow = InterpolateRow_16_MIPS_DSPR2; InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
@ -1086,30 +1064,16 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
&x, &y, &dx, &dy); &x, &y, &dx, &dy);
src_width = Abs(src_width); src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3) #if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3; InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 16)) { if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3; InterpolateRow = InterpolateRow_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_INTERPOLATEROW_AVX2) #if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) { if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2; InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width, 32)) { if (IS_ALIGNED(dst_width, 32)) {
InterpolateRow = InterpolateRow_AVX2; InterpolateRow = InterpolateRow_AVX2;
@ -1117,7 +1081,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
} }
#endif #endif
#if defined(HAS_INTERPOLATEROW_NEON) #if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) { if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON; InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width, 16)) { if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_NEON; InterpolateRow = InterpolateRow_NEON;
@ -1125,7 +1089,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
} }
#endif #endif
#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2) #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) { if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
if (IS_ALIGNED(dst_width, 4)) { if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2; InterpolateRow = InterpolateRow_MIPS_DSPR2;
@ -1140,13 +1104,19 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_SSSE3; ScaleFilterCols = ScaleFilterCols_SSSE3;
} }
#endif
#if defined(HAS_SCALEFILTERCOLS_NEON)
if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleFilterCols_NEON;
}
}
#endif #endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) { if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleFilterCols = ScaleColsUp2_C; ScaleFilterCols = ScaleColsUp2_C;
#if defined(HAS_SCALECOLS_SSE2) #if defined(HAS_SCALECOLS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleFilterCols = ScaleColsUp2_SSE2; ScaleFilterCols = ScaleColsUp2_SSE2;
} }
#endif #endif
@ -1160,7 +1130,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
const uint8* src = src_ptr + yi * src_stride; const uint8* src = src_ptr + yi * src_stride;
// Allocate 2 row buffers. // Allocate 2 row buffers.
const int kRowSize = (dst_width + 15) & ~15; const int kRowSize = (dst_width + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, kRowSize * 2);
uint8* rowptr = row; uint8* rowptr = row;
@ -1226,29 +1196,23 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
src_width = Abs(src_width); src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_16_SSE2) #if defined(HAS_INTERPOLATEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) { if (TestCpuFlag(kCpuHasSSE2)) {
InterpolateRow = InterpolateRow_Any_16_SSE2; InterpolateRow = InterpolateRow_Any_16_SSE2;
if (IS_ALIGNED(dst_width, 16)) { if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSE2; InterpolateRow = InterpolateRow_16_SSE2;
} }
} }
}
#endif #endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3) #if defined(HAS_INTERPOLATEROW_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_16_SSSE3; InterpolateRow = InterpolateRow_Any_16_SSSE3;
if (IS_ALIGNED(dst_width, 16)) { if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSSE3; InterpolateRow = InterpolateRow_16_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_INTERPOLATEROW_16_AVX2) #if defined(HAS_INTERPOLATEROW_16_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) { if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_16_AVX2; InterpolateRow = InterpolateRow_Any_16_AVX2;
if (IS_ALIGNED(dst_width, 32)) { if (IS_ALIGNED(dst_width, 32)) {
InterpolateRow = InterpolateRow_16_AVX2; InterpolateRow = InterpolateRow_16_AVX2;
@ -1256,7 +1220,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
} }
#endif #endif
#if defined(HAS_INTERPOLATEROW_16_NEON) #if defined(HAS_INTERPOLATEROW_16_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) { if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_16_NEON; InterpolateRow = InterpolateRow_Any_16_NEON;
if (IS_ALIGNED(dst_width, 16)) { if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_16_NEON; InterpolateRow = InterpolateRow_16_NEON;
@ -1264,7 +1228,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
} }
#endif #endif
#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2) #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) { if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2; InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
if (IS_ALIGNED(dst_width, 4)) { if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_16_MIPS_DSPR2; InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
@ -1283,9 +1247,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
if (!filtering && src_width * 2 == dst_width && x < 0x8000) { if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleFilterCols = ScaleColsUp2_16_C; ScaleFilterCols = ScaleColsUp2_16_C;
#if defined(HAS_SCALECOLS_16_SSE2) #if defined(HAS_SCALECOLS_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleFilterCols = ScaleColsUp2_16_SSE2; ScaleFilterCols = ScaleColsUp2_16_SSE2;
} }
#endif #endif
@ -1299,7 +1261,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
const uint16* src = src_ptr + yi * src_stride; const uint16* src = src_ptr + yi * src_stride;
// Allocate 2 row buffers. // Allocate 2 row buffers.
const int kRowSize = (dst_width + 15) & ~15; const int kRowSize = (dst_width + 31) & ~31;
align_buffer_64(row, kRowSize * 4); align_buffer_64(row, kRowSize * 4);
uint16* rowptr = (uint16*)row; uint16* rowptr = (uint16*)row;
@ -1366,17 +1328,14 @@ static void ScalePlaneSimple(int src_width, int src_height,
if (src_width * 2 == dst_width && x < 0x8000) { if (src_width * 2 == dst_width && x < 0x8000) {
ScaleCols = ScaleColsUp2_C; ScaleCols = ScaleColsUp2_C;
#if defined(HAS_SCALECOLS_SSE2) #if defined(HAS_SCALECOLS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleCols = ScaleColsUp2_SSE2; ScaleCols = ScaleColsUp2_SSE2;
} }
#endif #endif
} }
for (i = 0; i < dst_height; ++i) { for (i = 0; i < dst_height; ++i) {
ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
dst_width, x, dx);
dst_ptr += dst_stride; dst_ptr += dst_stride;
y += dy; y += dy;
} }
@ -1401,9 +1360,7 @@ static void ScalePlaneSimple_16(int src_width, int src_height,
if (src_width * 2 == dst_width && x < 0x8000) { if (src_width * 2 == dst_width && x < 0x8000) {
ScaleCols = ScaleColsUp2_16_C; ScaleCols = ScaleColsUp2_16_C;
#if defined(HAS_SCALECOLS_16_SSE2) #if defined(HAS_SCALECOLS_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleCols = ScaleColsUp2_16_SSE2; ScaleCols = ScaleColsUp2_16_SSE2;
} }
#endif #endif
@ -1428,8 +1385,7 @@ void ScalePlane(const uint8* src, int src_stride,
enum FilterMode filtering) { enum FilterMode filtering) {
// Simplify filtering when possible. // Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height, filtering = ScaleFilterReduce(src_width, src_height,
dst_width, dst_height, dst_width, dst_height, filtering);
filtering);
// Negative height means invert the image. // Negative height means invert the image.
if (src_height < 0) { if (src_height < 0) {
@ -1445,9 +1401,9 @@ void ScalePlane(const uint8* src, int src_stride,
CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height); CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
return; return;
} }
if (dst_width == src_width) { if (dst_width == src_width && filtering != kFilterBox) {
int dy = FixedDiv(src_height, dst_height); int dy = FixedDiv(src_height, dst_height);
// Arbitrary scale vertically, but unscaled vertically. // Arbitrary scale vertically, but unscaled horizontally.
ScalePlaneVertical(src_height, ScalePlaneVertical(src_height,
dst_width, dst_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, src_stride, dst_stride, src, dst,
@ -1478,7 +1434,7 @@ void ScalePlane(const uint8* src, int src_stride,
return; return;
} }
if (4 * dst_width == src_width && 4 * dst_height == src_height && if (4 * dst_width == src_width && 4 * dst_height == src_height &&
filtering != kFilterBilinear) { (filtering == kFilterBox || filtering == kFilterNone)) {
// optimized, 1/4 // optimized, 1/4
ScalePlaneDown4(src_width, src_height, dst_width, dst_height, ScalePlaneDown4(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering); src_stride, dst_stride, src, dst, filtering);
@ -1512,8 +1468,7 @@ void ScalePlane_16(const uint16* src, int src_stride,
enum FilterMode filtering) { enum FilterMode filtering) {
// Simplify filtering when possible. // Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height, filtering = ScaleFilterReduce(src_width, src_height,
dst_width, dst_height, dst_width, dst_height, filtering);
filtering);
// Negative height means invert the image. // Negative height means invert the image.
if (src_height < 0) { if (src_height < 0) {
@ -1606,6 +1561,7 @@ int I420Scale(const uint8* src_y, int src_stride_y,
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 ||
!dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
return -1; return -1;
} }
@ -1637,6 +1593,7 @@ int I420Scale_16(const uint16* src_y, int src_stride_y,
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 ||
!dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
return -1; return -1;
} }

View File

@ -53,18 +53,27 @@ static void ScaleARGBDown2(int src_width, int src_height,
} }
#if defined(HAS_SCALEARGBROWDOWN2_SSE2) #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && if (TestCpuFlag(kCpuHasSSE2)) {
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) && ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_SSE2 :
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2 :
ScaleARGBRowDown2Box_Any_SSE2);
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 : ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
ScaleARGBRowDown2Box_SSE2); ScaleARGBRowDown2Box_SSE2);
} }
#elif defined(HAS_SCALEARGBROWDOWN2_NEON) }
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) && #endif
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) { #if defined(HAS_SCALEARGBROWDOWN2_NEON)
ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON : if (TestCpuFlag(kCpuHasNEON)) {
ScaleARGBRowDown2_NEON; ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_NEON :
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON :
ScaleARGBRowDown2Box_Any_NEON);
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_NEON :
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON :
ScaleARGBRowDown2Box_NEON);
}
} }
#endif #endif
@ -88,7 +97,7 @@ static void ScaleARGBDown4Box(int src_width, int src_height,
int x, int dx, int y, int dy) { int x, int dx, int y, int dy) {
int j; int j;
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 2 * 4 + 15) & ~15; const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, kRowSize * 2);
int row_stride = src_stride * (dy >> 16); int row_stride = src_stride * (dy >> 16);
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
@ -98,17 +107,22 @@ static void ScaleARGBDown4Box(int src_width, int src_height,
assert(dx == 65536 * 4); // Test scale factor of 4. assert(dx == 65536 * 4); // Test scale factor of 4.
assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4. assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
#if defined(HAS_SCALEARGBROWDOWN2_SSE2) #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && if (TestCpuFlag(kCpuHasSSE2)) {
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) && ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2;
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2; ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
} }
#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
} }
#endif #endif
#if defined(HAS_SCALEARGBROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
}
}
#endif
for (j = 0; j < dst_height; ++j) { for (j = 0; j < dst_height; ++j) {
ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2); ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
@ -139,17 +153,24 @@ static void ScaleARGBDownEven(int src_width, int src_height,
assert(IS_ALIGNED(src_height, 2)); assert(IS_ALIGNED(src_height, 2));
src_argb += (y >> 16) * src_stride + (x >> 16) * 4; src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && if (TestCpuFlag(kCpuHasSSE2)) {
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 :
ScaleARGBRowDownEven_Any_SSE2;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 : ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
ScaleARGBRowDownEven_SSE2; ScaleARGBRowDownEven_SSE2;
} }
#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON) }
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) && #endif
IS_ALIGNED(src_argb, 4)) { #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON :
ScaleARGBRowDownEven_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON : ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
ScaleARGBRowDownEven_NEON; ScaleARGBRowDownEven_NEON;
} }
}
#endif #endif
if (filtering == kFilterLinear) { if (filtering == kFilterLinear) {
@ -189,30 +210,16 @@ static void ScaleARGBBilinearDown(int src_width, int src_height,
clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4. clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
src_argb += xl * 4; src_argb += xl * 4;
x -= (int)(xl << 16); x -= (int)(xl << 16);
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3) #if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3; InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(clip_src_width, 16)) { if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3; InterpolateRow = InterpolateRow_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_INTERPOLATEROW_AVX2) #if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) { if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2; InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(clip_src_width, 32)) { if (IS_ALIGNED(clip_src_width, 32)) {
InterpolateRow = InterpolateRow_AVX2; InterpolateRow = InterpolateRow_AVX2;
@ -220,15 +227,15 @@ static void ScaleARGBBilinearDown(int src_width, int src_height,
} }
#endif #endif
#if defined(HAS_INTERPOLATEROW_NEON) #if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) { if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON; InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(clip_src_width, 16)) { if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_NEON; InterpolateRow = InterpolateRow_NEON;
} }
} }
#endif #endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 && if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) { IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
if (IS_ALIGNED(clip_src_width, 4)) { if (IS_ALIGNED(clip_src_width, 4)) {
@ -240,6 +247,14 @@ static void ScaleARGBBilinearDown(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
} }
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
}
}
#endif #endif
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
// Allocate a row of ARGB. // Allocate a row of ARGB.
@ -285,30 +300,16 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
int dst_width, int x, int dx) = int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
const int max_y = (src_height - 1) << 16; const int max_y = (src_height - 1) << 16;
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3) #if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) { if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3; InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) { if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3; InterpolateRow = InterpolateRow_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_INTERPOLATEROW_AVX2) #if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) { if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2; InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width, 8)) { if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_AVX2; InterpolateRow = InterpolateRow_AVX2;
@ -316,15 +317,15 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
} }
#endif #endif
#if defined(HAS_INTERPOLATEROW_NEON) #if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) { if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON; InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) { if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_NEON; InterpolateRow = InterpolateRow_NEON;
} }
} }
#endif #endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 && if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) { IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2; InterpolateRow = InterpolateRow_MIPS_DSPR2;
} }
@ -338,17 +339,31 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
} }
#endif #endif
#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
if (filtering && TestCpuFlag(kCpuHasNEON)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_SSE2) #if defined(HAS_SCALEARGBCOLS_SSE2)
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBCols_SSE2; ScaleARGBFilterCols = ScaleARGBCols_SSE2;
} }
#endif
#if defined(HAS_SCALEARGBCOLS_NEON)
if (!filtering && TestCpuFlag(kCpuHasNEON)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBCols_NEON;
}
}
#endif #endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) { if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C; ScaleARGBFilterCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2) #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
} }
#endif #endif
@ -363,7 +378,7 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
const uint8* src = src_argb + yi * src_stride; const uint8* src = src_argb + yi * src_stride;
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 15) & ~15; const int kRowSize = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, kRowSize * 2);
uint8* rowptr = row; uint8* rowptr = row;
@ -427,18 +442,15 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
uint8* rgb_buf, uint8* rgb_buf,
int width) = I422ToARGBRow_C; int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_SSSE3) #if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) { if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3; I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(src_width, 8)) { if (IS_ALIGNED(src_width, 8)) {
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I422ToARGBRow = I422ToARGBRow_SSSE3; I422ToARGBRow = I422ToARGBRow_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_I422TOARGBROW_AVX2) #if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) { if (TestCpuFlag(kCpuHasAVX2)) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2; I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(src_width, 16)) { if (IS_ALIGNED(src_width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2; I422ToARGBRow = I422ToARGBRow_AVX2;
@ -446,7 +458,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
} }
#endif #endif
#if defined(HAS_I422TOARGBROW_NEON) #if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) { if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGBRow = I422ToARGBRow_Any_NEON; I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(src_width, 8)) { if (IS_ALIGNED(src_width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON; I422ToARGBRow = I422ToARGBRow_NEON;
@ -466,30 +478,16 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) = ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C; InterpolateRow_C;
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3) #if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) { if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3; InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) { if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
InterpolateRow = InterpolateRow_SSSE3; InterpolateRow = InterpolateRow_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_INTERPOLATEROW_AVX2) #if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) { if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2; InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width, 8)) { if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_AVX2; InterpolateRow = InterpolateRow_AVX2;
@ -497,15 +495,15 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
} }
#endif #endif
#if defined(HAS_INTERPOLATEROW_NEON) #if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) { if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON; InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) { if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_NEON; InterpolateRow = InterpolateRow_NEON;
} }
} }
#endif #endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 && if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2; InterpolateRow = InterpolateRow_MIPS_DSPR2;
} }
@ -523,17 +521,31 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
} }
#endif #endif
#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
if (filtering && TestCpuFlag(kCpuHasNEON)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_SSE2) #if defined(HAS_SCALEARGBCOLS_SSE2)
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBCols_SSE2; ScaleARGBFilterCols = ScaleARGBCols_SSE2;
} }
#endif
#if defined(HAS_SCALEARGBCOLS_NEON)
if (!filtering && TestCpuFlag(kCpuHasNEON)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBCols_NEON;
}
}
#endif #endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) { if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C; ScaleARGBFilterCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2) #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
} }
#endif #endif
@ -551,7 +563,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
const uint8* src_row_v = src_v + uv_yi * src_stride_v; const uint8* src_row_v = src_v + uv_yi * src_stride_v;
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 15) & ~15; const int kRowSize = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, kRowSize * 2);
// Allocate 1 row of ARGB for source conversion. // Allocate 1 row of ARGB for source conversion.
@ -636,13 +648,19 @@ static void ScaleARGBSimple(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBCols = ScaleARGBCols_SSE2; ScaleARGBCols = ScaleARGBCols_SSE2;
} }
#endif
#if defined(HAS_SCALEARGBCOLS_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ScaleARGBCols = ScaleARGBCols_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBCols = ScaleARGBCols_NEON;
}
}
#endif #endif
if (src_width * 2 == dst_width && x < 0x8000) { if (src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBCols = ScaleARGBColsUp2_C; ScaleARGBCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2) #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBCols = ScaleARGBColsUp2_SSE2; ScaleARGBCols = ScaleARGBColsUp2_SSE2;
} }
#endif #endif
@ -776,6 +794,7 @@ int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
if (!src_argb || src_width == 0 || src_height == 0 || if (!src_argb || src_width == 0 || src_height == 0 ||
!dst_argb || dst_width <= 0 || dst_height <= 0 || !dst_argb || dst_width <= 0 || dst_height <= 0 ||
clip_x < 0 || clip_y < 0 || clip_x < 0 || clip_y < 0 ||
clip_width > 32768 || clip_height > 32768 ||
(clip_x + clip_width) > dst_width || (clip_x + clip_width) > dst_width ||
(clip_y + clip_height) > dst_height) { (clip_y + clip_height) > dst_height) {
return -1; return -1;
@ -794,6 +813,7 @@ int ARGBScale(const uint8* src_argb, int src_stride_argb,
int dst_width, int dst_height, int dst_width, int dst_height,
enum FilterMode filtering) { enum FilterMode filtering) {
if (!src_argb || src_width == 0 || src_height == 0 || if (!src_argb || src_width == 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 ||
!dst_argb || dst_width <= 0 || dst_height <= 0) { !dst_argb || dst_width <= 0 || dst_height <= 0) {
return -1; return -1;
} }
@ -803,6 +823,37 @@ int ARGBScale(const uint8* src_argb, int src_stride_argb,
return 0; return 0;
} }
// Scale with YUV conversion to ARGB and clipping.
LIBYUV_API
int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint32 src_fourcc,
int src_width, int src_height,
uint8* dst_argb, int dst_stride_argb,
uint32 dst_fourcc,
int dst_width, int dst_height,
int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering) {
uint8* argb_buffer = (uint8*)malloc(src_width * src_height * 4);
int r;
I420ToARGB(src_y, src_stride_y,
src_u, src_stride_u,
src_v, src_stride_v,
argb_buffer, src_width * 4,
src_width, src_height);
r = ARGBScaleClip(argb_buffer, src_width * 4,
src_width, src_height,
dst_argb, dst_stride_argb,
dst_width, dst_height,
clip_x, clip_y, clip_width, clip_height,
filtering);
free(argb_buffer);
return r;
}
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv

View File

@ -621,39 +621,31 @@ void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
} }
} }
void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
uint16* dst_ptr, int src_width, int src_height) {
int x; int x;
assert(src_width > 0); assert(src_width > 0);
assert(src_height > 0); for (x = 0; x < src_width - 1; x += 2) {
for (x = 0; x < src_width; ++x) { dst_ptr[0] += src_ptr[0];
const uint8* s = src_ptr + x; dst_ptr[1] += src_ptr[1];
unsigned int sum = 0u; src_ptr += 2;
int y; dst_ptr += 2;
for (y = 0; y < src_height; ++y) {
sum += s[0];
s += src_stride;
} }
// TODO(fbarchard): Consider limitting height to 256 to avoid overflow. if (src_width & 1) {
dst_ptr[x] = sum < 65535u ? sum : 65535u; dst_ptr[0] += src_ptr[0];
} }
} }
void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride, void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) {
uint32* dst_ptr, int src_width, int src_height) {
int x; int x;
assert(src_width > 0); assert(src_width > 0);
assert(src_height > 0); for (x = 0; x < src_width - 1; x += 2) {
for (x = 0; x < src_width; ++x) { dst_ptr[0] += src_ptr[0];
const uint16* s = src_ptr + x; dst_ptr[1] += src_ptr[1];
unsigned int sum = 0u; src_ptr += 2;
int y; dst_ptr += 2;
for (y = 0; y < src_height; ++y) {
sum += s[0];
s += src_stride;
} }
// No risk of overflow here now if (src_width & 1) {
dst_ptr[x] = sum; dst_ptr[0] += src_ptr[0];
} }
} }
@ -884,32 +876,16 @@ void ScalePlaneVertical(int src_height,
assert(dst_width > 0); assert(dst_width > 0);
assert(dst_height > 0); assert(dst_height > 0);
src_argb += (x >> 16) * bpp; src_argb += (x >> 16) * bpp;
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3) #if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3; InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width_bytes, 16)) { if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3; InterpolateRow = InterpolateRow_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_INTERPOLATEROW_AVX2) #if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) { if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2; InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width_bytes, 32)) { if (IS_ALIGNED(dst_width_bytes, 32)) {
InterpolateRow = InterpolateRow_AVX2; InterpolateRow = InterpolateRow_AVX2;
@ -917,15 +893,15 @@ void ScalePlaneVertical(int src_height,
} }
#endif #endif
#if defined(HAS_INTERPOLATEROW_NEON) #if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) { if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON; InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width_bytes, 16)) { if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_NEON; InterpolateRow = InterpolateRow_NEON;
} }
} }
#endif #endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 && if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) && IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) { IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
@ -967,31 +943,23 @@ void ScalePlaneVertical_16(int src_height,
assert(dst_height > 0); assert(dst_height > 0);
src_argb += (x >> 16) * wpp; src_argb += (x >> 16) * wpp;
#if defined(HAS_INTERPOLATEROW_16_SSE2) #if defined(HAS_INTERPOLATEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) { if (TestCpuFlag(kCpuHasSSE2)) {
InterpolateRow = InterpolateRow_Any_16_SSE2; InterpolateRow = InterpolateRow_Any_16_SSE2;
if (IS_ALIGNED(dst_width_bytes, 16)) { if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSE2; InterpolateRow = InterpolateRow_16_SSE2;
} }
} }
}
#endif #endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3) #if defined(HAS_INTERPOLATEROW_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) { if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_16_SSSE3; InterpolateRow = InterpolateRow_Any_16_SSSE3;
if (IS_ALIGNED(dst_width_bytes, 16)) { if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSSE3; InterpolateRow = InterpolateRow_16_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_INTERPOLATEROW_16_AVX2) #if defined(HAS_INTERPOLATEROW_16_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) { if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_16_AVX2; InterpolateRow = InterpolateRow_Any_16_AVX2;
if (IS_ALIGNED(dst_width_bytes, 32)) { if (IS_ALIGNED(dst_width_bytes, 32)) {
InterpolateRow = InterpolateRow_16_AVX2; InterpolateRow = InterpolateRow_16_AVX2;
@ -999,15 +967,15 @@ void ScalePlaneVertical_16(int src_height,
} }
#endif #endif
#if defined(HAS_INTERPOLATEROW_16_NEON) #if defined(HAS_INTERPOLATEROW_16_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) { if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_16_NEON; InterpolateRow = InterpolateRow_Any_16_NEON;
if (IS_ALIGNED(dst_width_bytes, 16)) { if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_16_NEON; InterpolateRow = InterpolateRow_16_NEON;
} }
} }
#endif #endif
#if defined(HAS_INTERPOLATEROWS_16_MIPS_DSPR2) #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 && if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) && IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) { IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2; InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
@ -1046,10 +1014,6 @@ enum FilterMode ScaleFilterReduce(int src_width, int src_height,
if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) { if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
filtering = kFilterBilinear; filtering = kFilterBilinear;
} }
// If scaling to larger, switch from Box to Bilinear.
if (dst_width >= src_width || dst_height >= src_height) {
filtering = kFilterBilinear;
}
} }
if (filtering == kFilterBilinear) { if (filtering == kFilterBilinear) {
if (src_height == 1) { if (src_height == 1) {

View File

@ -31,7 +31,6 @@ void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
"beqz $t9, 2f \n" "beqz $t9, 2f \n"
" nop \n" " nop \n"
".p2align 2 \n"
"1: \n" "1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
@ -90,7 +89,6 @@ void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
"bltz $t9, 2f \n" "bltz $t9, 2f \n"
" nop \n" " nop \n"
".p2align 2 \n"
"1: \n" "1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
@ -188,7 +186,6 @@ void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
"beqz $t9, 2f \n" "beqz $t9, 2f \n"
" nop \n" " nop \n"
".p2align 2 \n"
"1: \n" "1: \n"
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0| "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4| "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
@ -248,7 +245,6 @@ void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
"srl $t9, %[dst_width], 1 \n" "srl $t9, %[dst_width], 1 \n"
"andi $t8, %[dst_width], 1 \n" "andi $t8, %[dst_width], 1 \n"
".p2align 2 \n"
"1: \n" "1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 0(%[s1]) \n" // |7|6|5|4| "lw $t1, 0(%[s1]) \n" // |7|6|5|4|
@ -319,7 +315,6 @@ void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
__asm__ __volatile__ ( __asm__ __volatile__ (
".set push \n" ".set push \n"
".set noreorder \n" ".set noreorder \n"
".p2align 2 \n"
"1: \n" "1: \n"
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0| "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4| "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
@ -368,7 +363,6 @@ void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
".set noreorder \n" ".set noreorder \n"
"repl.ph $t3, 3 \n" // 0x00030003 "repl.ph $t3, 3 \n" // 0x00030003
".p2align 2 \n"
"1: \n" "1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0| "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
@ -425,7 +419,6 @@ void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
".set noreorder \n" ".set noreorder \n"
"repl.ph $t2, 3 \n" // 0x00030003 "repl.ph $t2, 3 \n" // 0x00030003
".p2align 2 \n"
"1: \n" "1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0| "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
@ -477,7 +470,6 @@ void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
".set push \n" ".set push \n"
".set noreorder \n" ".set noreorder \n"
".p2align 2 \n"
"1: \n" "1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
@ -528,7 +520,6 @@ void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
".set push \n" ".set push \n"
".set noreorder \n" ".set noreorder \n"
".p2align 2 \n"
"1: \n" "1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4| "lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
@ -586,7 +577,6 @@ void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
".set push \n" ".set push \n"
".set noreorder \n" ".set noreorder \n"
".p2align 2 \n"
"1: \n" "1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4| "lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|

View File

@ -16,7 +16,8 @@ extern "C" {
#endif #endif
// This module is for GCC Neon. // This module is for GCC Neon.
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
!defined(__aarch64__)
// NEON downscalers with interpolation. // NEON downscalers with interpolation.
// Provided by Fritz Koenig // Provided by Fritz Koenig
@ -25,7 +26,6 @@ extern "C" {
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) { uint8* dst, int dst_width) {
asm volatile ( asm volatile (
".p2align 2 \n"
"1: \n" "1: \n"
// load even pixels into q0, odd into q1 // load even pixels into q0, odd into q1
MEMACCESS(0) MEMACCESS(0)
@ -42,13 +42,35 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
); );
} }
// Read 32x1 average down and write 16x1.
void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
"1: \n"
MEMACCESS(0)
"vld1.8 {q0, q1}, [%0]! \n" // load pixels and post inc
"subs %2, %2, #16 \n" // 16 processed per loop
"vpaddl.u8 q0, q0 \n" // add adjacent
"vpaddl.u8 q1, q1 \n"
"vrshrn.u16 d0, q0, #1 \n" // downshift, round and pack
"vrshrn.u16 d1, q1, #1 \n"
MEMACCESS(1)
"vst1.8 {q0}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst), // %1
"+r"(dst_width) // %2
:
: "q0", "q1" // Clobber List
);
}
// Read 32x2 average down and write 16x1. // Read 32x2 average down and write 16x1.
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) { uint8* dst, int dst_width) {
asm volatile ( asm volatile (
// change the stride to row 2 pointer // change the stride to row 2 pointer
"add %1, %0 \n" "add %1, %0 \n"
".p2align 2 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.8 {q0, q1}, [%0]! \n" // load row 1 and post inc "vld1.8 {q0, q1}, [%0]! \n" // load row 1 and post inc
@ -76,7 +98,6 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
asm volatile ( asm volatile (
".p2align 2 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
@ -98,7 +119,6 @@ void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
const uint8* src_ptr2 = src_ptr + src_stride * 2; const uint8* src_ptr2 = src_ptr + src_stride * 2;
const uint8* src_ptr3 = src_ptr + src_stride * 3; const uint8* src_ptr3 = src_ptr + src_stride * 3;
asm volatile ( asm volatile (
".p2align 2 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n" // load up 16x4 "vld1.8 {q0}, [%0]! \n" // load up 16x4
@ -137,7 +157,6 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
asm volatile ( asm volatile (
".p2align 2 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
@ -160,7 +179,6 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
asm volatile ( asm volatile (
"vmov.u8 d24, #3 \n" "vmov.u8 d24, #3 \n"
"add %3, %0 \n" "add %3, %0 \n"
".p2align 2 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
@ -220,7 +238,6 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
asm volatile ( asm volatile (
"vmov.u8 d24, #3 \n" "vmov.u8 d24, #3 \n"
"add %3, %0 \n" "add %3, %0 \n"
".p2align 2 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
@ -275,7 +292,6 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
asm volatile ( asm volatile (
MEMACCESS(3) MEMACCESS(3)
"vld1.8 {q3}, [%3] \n" "vld1.8 {q3}, [%3] \n"
".p2align 2 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.8 {d0, d1, d2, d3}, [%0]! \n" "vld1.8 {d0, d1, d2, d3}, [%0]! \n"
@ -309,7 +325,6 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
MEMACCESS(7) MEMACCESS(7)
"vld1.8 {q15}, [%7] \n" "vld1.8 {q15}, [%7] \n"
"add %3, %0 \n" "add %3, %0 \n"
".p2align 2 \n"
"1: \n" "1: \n"
// d0 = 00 40 01 41 02 42 03 43 // d0 = 00 40 01 41 02 42 03 43
@ -425,7 +440,6 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
MEMACCESS(5) MEMACCESS(5)
"vld1.8 {q14}, [%5] \n" "vld1.8 {q14}, [%5] \n"
"add %3, %0 \n" "add %3, %0 \n"
".p2align 2 \n"
"1: \n" "1: \n"
// d0 = 00 40 01 41 02 42 03 43 // d0 = 00 40 01 41 02 42 03 43
@ -516,6 +530,110 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
); );
} }
void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height) {
const uint8* src_tmp = NULL;
asm volatile (
"1: \n"
"mov %0, %1 \n"
"mov r12, %5 \n"
"veor q2, q2, q2 \n"
"veor q3, q3, q3 \n"
"2: \n"
// load 16 pixels into q0
MEMACCESS(0)
"vld1.8 {q0}, [%0], %3 \n"
"vaddw.u8 q3, q3, d1 \n"
"vaddw.u8 q2, q2, d0 \n"
"subs r12, r12, #1 \n"
"bgt 2b \n"
MEMACCESS(2)
"vst1.16 {q2, q3}, [%2]! \n" // store pixels
"add %1, %1, #16 \n"
"subs %4, %4, #16 \n" // 16 processed per loop
"bgt 1b \n"
: "+r"(src_tmp), // %0
"+r"(src_ptr), // %1
"+r"(dst_ptr), // %2
"+r"(src_stride), // %3
"+r"(src_width), // %4
"+r"(src_height) // %5
:
: "memory", "cc", "r12", "q0", "q1", "q2", "q3" // Clobber List
);
}
// TODO(Yang Zhang): Investigate less load instructions for
// the x/dx stepping
#define LOAD2_DATA8_LANE(n) \
"lsr %5, %3, #16 \n" \
"add %6, %1, %5 \n" \
"add %3, %3, %4 \n" \
MEMACCESS(6) \
"vld2.8 {d6["#n"], d7["#n"]}, [%6] \n"
void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) {
int dx_offset[4] = {0, 1, 2, 3};
int* tmp = dx_offset;
const uint8* src_tmp = src_ptr;
asm volatile (
"vdup.32 q0, %3 \n" // x
"vdup.32 q1, %4 \n" // dx
"vld1.32 {q2}, [%5] \n" // 0 1 2 3
"vshl.i32 q3, q1, #2 \n" // 4 * dx
"vmul.s32 q1, q1, q2 \n"
// x , x + 1 * dx, x + 2 * dx, x + 3 * dx
"vadd.s32 q1, q1, q0 \n"
// x + 4 * dx, x + 5 * dx, x + 6 * dx, x + 7 * dx
"vadd.s32 q2, q1, q3 \n"
"vshl.i32 q0, q3, #1 \n" // 8 * dx
"1: \n"
LOAD2_DATA8_LANE(0)
LOAD2_DATA8_LANE(1)
LOAD2_DATA8_LANE(2)
LOAD2_DATA8_LANE(3)
LOAD2_DATA8_LANE(4)
LOAD2_DATA8_LANE(5)
LOAD2_DATA8_LANE(6)
LOAD2_DATA8_LANE(7)
"vmov q10, q1 \n"
"vmov q11, q2 \n"
"vuzp.16 q10, q11 \n"
"vmovl.u8 q8, d6 \n"
"vmovl.u8 q9, d7 \n"
"vsubl.s16 q11, d18, d16 \n"
"vsubl.s16 q12, d19, d17 \n"
"vmovl.u16 q13, d20 \n"
"vmovl.u16 q10, d21 \n"
"vmul.s32 q11, q11, q13 \n"
"vmul.s32 q12, q12, q10 \n"
"vshrn.s32 d18, q11, #16 \n"
"vshrn.s32 d19, q12, #16 \n"
"vadd.s16 q8, q8, q9 \n"
"vmovn.s16 d6, q8 \n"
MEMACCESS(0)
"vst1.8 {d6}, [%0]! \n" // store pixels
"vadd.s32 q1, q1, q0 \n"
"vadd.s32 q2, q2, q0 \n"
"subs %2, %2, #8 \n" // 8 processed per loop
"bgt 1b \n"
: "+r"(dst_ptr), // %0
"+r"(src_ptr), // %1
"+r"(dst_width), // %2
"+r"(x), // %3
"+r"(dx), // %4
"+r"(tmp), // %5
"+r"(src_tmp) // %6
:
: "memory", "cc", "q0", "q1", "q2", "q3",
"q8", "q9", "q10", "q11", "q12", "q13"
);
}
#undef LOAD2_DATA8_LANE
// 16x2 -> 16x1 // 16x2 -> 16x1
void ScaleFilterRows_NEON(uint8* dst_ptr, void ScaleFilterRows_NEON(uint8* dst_ptr,
const uint8* src_ptr, ptrdiff_t src_stride, const uint8* src_ptr, ptrdiff_t src_stride,
@ -618,7 +736,6 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) { uint8* dst, int dst_width) {
asm volatile ( asm volatile (
".p2align 2 \n"
"1: \n" "1: \n"
// load even pixels into q0, odd into q1 // load even pixels into q0, odd into q1
MEMACCESS(0) MEMACCESS(0)
@ -639,12 +756,39 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
); );
} }
void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) {
asm volatile (
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
MEMACCESS(0)
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
"subs %2, %2, #8 \n" // 8 processed per loop
"vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
"vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
"vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts.
"vrshrn.u16 d0, q0, #1 \n" // downshift, round and pack
"vrshrn.u16 d1, q1, #1 \n"
"vrshrn.u16 d2, q2, #1 \n"
"vrshrn.u16 d3, q3, #1 \n"
MEMACCESS(1)
"vst4.8 {d0, d1, d2, d3}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(dst_width) // %2
:
: "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
);
}
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) { uint8* dst, int dst_width) {
asm volatile ( asm volatile (
// change the stride to row 2 pointer // change the stride to row 2 pointer
"add %1, %1, %0 \n" "add %1, %1, %0 \n"
".p2align 2 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
@ -685,7 +829,6 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx, uint8* dst_argb, int dst_width) { int src_stepx, uint8* dst_argb, int dst_width) {
asm volatile ( asm volatile (
"mov r12, %3, lsl #2 \n" "mov r12, %3, lsl #2 \n"
".p2align 2 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.32 {d0[0]}, [%0], r12 \n" "vld1.32 {d0[0]}, [%0], r12 \n"
@ -715,7 +858,6 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
asm volatile ( asm volatile (
"mov r12, %4, lsl #2 \n" "mov r12, %4, lsl #2 \n"
"add %1, %1, %0 \n" "add %1, %1, %0 \n"
".p2align 2 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.8 {d0}, [%0], r12 \n" // Read 4 2x2 blocks -> 2x1 "vld1.8 {d0}, [%0], r12 \n" // Read 4 2x2 blocks -> 2x1
@ -756,7 +898,118 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
); );
} }
#endif // __ARM_NEON__ // TODO(Yang Zhang): Investigate less load instructions for
// the x/dx stepping
#define LOAD1_DATA32_LANE(dn, n) \
"lsr %5, %3, #16 \n" \
"add %6, %1, %5, lsl #2 \n" \
"add %3, %3, %4 \n" \
MEMACCESS(6) \
"vld1.32 {"#dn"["#n"]}, [%6] \n"
void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
int tmp = 0;
const uint8* src_tmp = src_argb;
asm volatile (
"1: \n"
LOAD1_DATA32_LANE(d0, 0)
LOAD1_DATA32_LANE(d0, 1)
LOAD1_DATA32_LANE(d1, 0)
LOAD1_DATA32_LANE(d1, 1)
LOAD1_DATA32_LANE(d2, 0)
LOAD1_DATA32_LANE(d2, 1)
LOAD1_DATA32_LANE(d3, 0)
LOAD1_DATA32_LANE(d3, 1)
MEMACCESS(0)
"vst1.32 {q0, q1}, [%0]! \n" // store pixels
"subs %2, %2, #8 \n" // 8 processed per loop
"bgt 1b \n"
: "+r"(dst_argb), // %0
"+r"(src_argb), // %1
"+r"(dst_width), // %2
"+r"(x), // %3
"+r"(dx), // %4
"+r"(tmp), // %5
"+r"(src_tmp) // %6
:
: "memory", "cc", "q0", "q1"
);
}
#undef LOAD1_DATA32_LANE
// TODO(Yang Zhang): Investigate less load instructions for
// the x/dx stepping
#define LOAD2_DATA32_LANE(dn1, dn2, n) \
"lsr %5, %3, #16 \n" \
"add %6, %1, %5, lsl #2 \n" \
"add %3, %3, %4 \n" \
MEMACCESS(6) \
"vld2.32 {"#dn1"["#n"], "#dn2"["#n"]}, [%6] \n"
void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
int dx_offset[4] = {0, 1, 2, 3};
int* tmp = dx_offset;
const uint8* src_tmp = src_argb;
asm volatile (
"vdup.32 q0, %3 \n" // x
"vdup.32 q1, %4 \n" // dx
"vld1.32 {q2}, [%5] \n" // 0 1 2 3
"vshl.i32 q9, q1, #2 \n" // 4 * dx
"vmul.s32 q1, q1, q2 \n"
"vmov.i8 q3, #0x7f \n" // 0x7F
"vmov.i16 q15, #0x7f \n" // 0x7F
// x , x + 1 * dx, x + 2 * dx, x + 3 * dx
"vadd.s32 q8, q1, q0 \n"
"1: \n"
// d0, d1: a
// d2, d3: b
LOAD2_DATA32_LANE(d0, d2, 0)
LOAD2_DATA32_LANE(d0, d2, 1)
LOAD2_DATA32_LANE(d1, d3, 0)
LOAD2_DATA32_LANE(d1, d3, 1)
"vshrn.i32 d22, q8, #9 \n"
"vand.16 d22, d22, d30 \n"
"vdup.8 d24, d22[0] \n"
"vdup.8 d25, d22[2] \n"
"vdup.8 d26, d22[4] \n"
"vdup.8 d27, d22[6] \n"
"vext.8 d4, d24, d25, #4 \n"
"vext.8 d5, d26, d27, #4 \n" // f
"veor.8 q10, q2, q3 \n" // 0x7f ^ f
"vmull.u8 q11, d0, d20 \n"
"vmull.u8 q12, d1, d21 \n"
"vmull.u8 q13, d2, d4 \n"
"vmull.u8 q14, d3, d5 \n"
"vadd.i16 q11, q11, q13 \n"
"vadd.i16 q12, q12, q14 \n"
"vshrn.i16 d0, q11, #7 \n"
"vshrn.i16 d1, q12, #7 \n"
MEMACCESS(0)
"vst1.32 {d0, d1}, [%0]! \n" // store pixels
"vadd.s32 q8, q8, q9 \n"
"subs %2, %2, #4 \n" // 4 processed per loop
"bgt 1b \n"
: "+r"(dst_argb), // %0
"+r"(src_argb), // %1
"+r"(dst_width), // %2
"+r"(x), // %3
"+r"(dx), // %4
"+r"(tmp), // %5
"+r"(src_tmp) // %6
:
: "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9",
"q10", "q11", "q12", "q13", "q14", "q15"
);
}
#undef LOAD2_DATA32_LANE
#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"

View File

@ -8,16 +8,18 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
#include "libyuv/scale.h"
#include "libyuv/row.h" #include "libyuv/row.h"
#include "libyuv/scale_row.h"
#ifdef __cplusplus #ifdef __cplusplus
namespace libyuv { namespace libyuv {
extern "C" { extern "C" {
#endif #endif
// This module is for GCC Neon. // This module is for GCC Neon armv8 64 bit.
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef HAS_SCALEROWDOWN2_NEON
// Read 32x1 throw away even pixels, and write 16x1. // Read 32x1 throw away even pixels, and write 16x1.
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) { uint8* dst, int dst_width) {
@ -26,10 +28,33 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
// load even pixels into v0, odd into v1 // load even pixels into v0, odd into v1
MEMACCESS(0) MEMACCESS(0)
"ld2 {v0.16b,v1.16b}, [%0], #32 \n" "ld2 {v0.16b,v1.16b}, [%0], #32 \n"
"subs %2, %2, #16 \n" // 16 processed per loop "subs %w2, %w2, #16 \n" // 16 processed per loop
MEMACCESS(1) MEMACCESS(1)
"st1 {v1.16b}, [%1], #16 \n" // store odd pixels "st1 {v1.16b}, [%1], #16 \n" // store odd pixels
"bgt 1b \n" "b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst), // %1
"+r"(dst_width) // %2
:
: "v0", "v1" // Clobber List
);
}
// Read 32x1 average down and write 16x1.
void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b,v1.16b}, [%0], #32 \n" // load pixels and post inc
"subs %w2, %w2, #16 \n" // 16 processed per loop
"uaddlp v0.8h, v0.16b \n" // add adjacent
"uaddlp v1.8h, v1.16b \n"
"rshrn v0.8b, v0.8h, #1 \n" // downshift, round and pack
"rshrn2 v0.16b, v1.8h, #1 \n"
MEMACCESS(1)
"st1 {v0.16b}, [%1], #16 \n"
"b.gt 1b \n"
: "+r"(src_ptr), // %0 : "+r"(src_ptr), // %0
"+r"(dst), // %1 "+r"(dst), // %1
"+r"(dst_width) // %2 "+r"(dst_width) // %2
@ -37,9 +62,7 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "v0", "v1" // Clobber List : "v0", "v1" // Clobber List
); );
} }
#endif //HAS_SCALEROWDOWN2_NEON
#ifdef HAS_SCALEROWDOWN2_NEON
// Read 32x2 average down and write 16x1. // Read 32x2 average down and write 16x1.
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) { uint8* dst, int dst_width) {
@ -51,7 +74,7 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"ld1 {v0.16b,v1.16b}, [%0], #32 \n" // load row 1 and post inc "ld1 {v0.16b,v1.16b}, [%0], #32 \n" // load row 1 and post inc
MEMACCESS(1) MEMACCESS(1)
"ld1 {v2.16b, v3.16b}, [%1], #32 \n" // load row 2 and post inc "ld1 {v2.16b, v3.16b}, [%1], #32 \n" // load row 2 and post inc
"subs %3, %3, #16 \n" // 16 processed per loop "subs %w3, %w3, #16 \n" // 16 processed per loop
"uaddlp v0.8h, v0.16b \n" // row 1 add adjacent "uaddlp v0.8h, v0.16b \n" // row 1 add adjacent
"uaddlp v1.8h, v1.16b \n" "uaddlp v1.8h, v1.16b \n"
"uadalp v0.8h, v2.16b \n" // row 2 add adjacent + row1 "uadalp v0.8h, v2.16b \n" // row 2 add adjacent + row1
@ -60,7 +83,7 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"rshrn2 v0.16b, v1.8h, #2 \n" "rshrn2 v0.16b, v1.8h, #2 \n"
MEMACCESS(2) MEMACCESS(2)
"st1 {v0.16b}, [%2], #16 \n" "st1 {v0.16b}, [%2], #16 \n"
"bgt 1b \n" "b.gt 1b \n"
: "+r"(src_ptr), // %0 : "+r"(src_ptr), // %0
"+r"(src_stride), // %1 "+r"(src_stride), // %1
"+r"(dst), // %2 "+r"(dst), // %2
@ -69,19 +92,17 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "v0", "v1", "v2", "v3" // Clobber List : "v0", "v1", "v2", "v3" // Clobber List
); );
} }
#endif //HAS_SCALEROWDOWN2_NEON
#ifdef HAS_SCALEROWDOWN4_NEON
void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
asm volatile ( asm volatile (
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"ld4 {v0.8b-3.8b}, [%0], #32 \n" // src line 0 "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
"subs %2, %2, #8 \n" // 8 processed per loop "subs %w2, %w2, #8 \n" // 8 processed per loop
MEMACCESS(1) MEMACCESS(1)
"st1 {v2.8b}, [%1], #8 \n" "st1 {v2.8b}, [%1], #8 \n"
"bgt 1b \n" "b.gt 1b \n"
: "+r"(src_ptr), // %0 : "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1 "+r"(dst_ptr), // %1
"+r"(dst_width) // %2 "+r"(dst_width) // %2
@ -89,9 +110,7 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "v0", "v1", "v2", "v3", "memory", "cc" : "v0", "v1", "v2", "v3", "memory", "cc"
); );
} }
#endif //HAS_SCALEROWDOWN4_NEON
#ifdef HAS_SCALEROWDOWN4_NEON
void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
const uint8* src_ptr1 = src_ptr + src_stride; const uint8* src_ptr1 = src_ptr + src_stride;
@ -102,12 +121,12 @@ asm volatile (
MEMACCESS(0) MEMACCESS(0)
"ld1 {v0.16b}, [%0], #16 \n" // load up 16x4 "ld1 {v0.16b}, [%0], #16 \n" // load up 16x4
MEMACCESS(3) MEMACCESS(3)
"ld1 {v1.16b}, [%3], #16 \n" "ld1 {v1.16b}, [%2], #16 \n"
MEMACCESS(4) MEMACCESS(4)
"ld1 {v2.16b}, [%4], #16 \n" "ld1 {v2.16b}, [%3], #16 \n"
MEMACCESS(5) MEMACCESS(5)
"ld1 {v3.16b}, [%5], #16 \n" "ld1 {v3.16b}, [%4], #16 \n"
"subs %2, %2, #4 \n" "subs %w5, %w5, #4 \n"
"uaddlp v0.8h, v0.16b \n" "uaddlp v0.8h, v0.16b \n"
"uadalp v0.8h, v1.16b \n" "uadalp v0.8h, v1.16b \n"
"uadalp v0.8h, v2.16b \n" "uadalp v0.8h, v2.16b \n"
@ -116,20 +135,18 @@ asm volatile (
"rshrn v0.8b, v0.8h, #4 \n" // divide by 16 w/rounding "rshrn v0.8b, v0.8h, #4 \n" // divide by 16 w/rounding
MEMACCESS(1) MEMACCESS(1)
"st1 {v0.s}[0], [%1], #4 \n" "st1 {v0.s}[0], [%1], #4 \n"
"bgt 1b \n" "b.gt 1b \n"
: "+r"(src_ptr), // %0 : "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1 "+r"(dst_ptr), // %1
"+r"(dst_width), // %2 "+r"(src_ptr1), // %2
"+r"(src_ptr1), // %3 "+r"(src_ptr2), // %3
"+r"(src_ptr2), // %4 "+r"(src_ptr3), // %4
"+r"(src_ptr3) // %5 "+r"(dst_width) // %5
: :
: "v0", "v1", "v2", "v3", "memory", "cc" : "v0", "v1", "v2", "v3", "memory", "cc"
); );
} }
#endif //HAS_SCALEROWDOWN4_NEON
#ifdef HAS_SCALEROWDOWN34_NEON
// Down scale from 4 to 3 pixels. Use the neon multilane read/write // Down scale from 4 to 3 pixels. Use the neon multilane read/write
// to load up the every 4th pixel into a 4 different registers. // to load up the every 4th pixel into a 4 different registers.
// Point samples 32 pixels to 24 pixels. // Point samples 32 pixels to 24 pixels.
@ -139,12 +156,12 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
asm volatile ( asm volatile (
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // src line 0 "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
"subs %2, %2, #24 \n" "subs %w2, %w2, #24 \n"
"mov v2.8b, v3.8b \n" // order v0, v1, v2 "orr v2.16b, v3.16b, v3.16b \n" // order v0, v1, v2
MEMACCESS(1) MEMACCESS(1)
"st3 {v0.8b-v2.8b}, [%1], #24 \n" "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n"
"bgt 1b \n" "b.gt 1b \n"
: "+r"(src_ptr), // %0 : "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1 "+r"(dst_ptr), // %1
"+r"(dst_width) // %2 "+r"(dst_width) // %2
@ -152,9 +169,7 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
: "v0", "v1", "v2", "v3", "memory", "cc" : "v0", "v1", "v2", "v3", "memory", "cc"
); );
} }
#endif //HAS_SCALEROWDOWN34_NEON
#ifdef HAS_SCALEROWDOWN34_NEON
void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
@ -163,10 +178,10 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
"add %3, %3, %0 \n" "add %3, %3, %0 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // src line 0 "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
MEMACCESS(3) MEMACCESS(3)
"ld4 {v4.8b-v7.8b}, [%3], #32 \n" // src line 1 "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" // src line 1
"subs %2, %2, #24 \n" "subs %w2, %w2, #24 \n"
// filter src line 0 with src line 1 // filter src line 0 with src line 1
// expand chars to shorts to allow for room // expand chars to shorts to allow for room
@ -202,9 +217,9 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
"uqrshrn v2.8b, v16.8h, #2 \n" "uqrshrn v2.8b, v16.8h, #2 \n"
MEMACCESS(1) MEMACCESS(1)
"st3 {v0.8b-v2.8b}, [%1], #24 \n" "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n"
"bgt 1b \n" "b.gt 1b \n"
: "+r"(src_ptr), // %0 : "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1 "+r"(dst_ptr), // %1
"+r"(dst_width), // %2 "+r"(dst_width), // %2
@ -214,9 +229,7 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
"v20", "memory", "cc" "v20", "memory", "cc"
); );
} }
#endif //ScaleRowDown34_0_Box_NEON
#ifdef HAS_SCALEROWDOWN34_NEON
void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
@ -225,10 +238,10 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
"add %3, %3, %0 \n" "add %3, %3, %0 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // src line 0 "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
MEMACCESS(3) MEMACCESS(3)
"ld4 {v4.8b-v7.8b}, [%3], #32 \n" // src line 1 "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" // src line 1
"subs %2, %2, #24 \n" "subs %w2, %w2, #24 \n"
// average src line 0 with src line 1 // average src line 0 with src line 1
"urhadd v0.8b, v0.8b, v4.8b \n" "urhadd v0.8b, v0.8b, v4.8b \n"
"urhadd v1.8b, v1.8b, v5.8b \n" "urhadd v1.8b, v1.8b, v5.8b \n"
@ -249,8 +262,8 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
"uqrshrn v2.8b, v4.8h, #2 \n" "uqrshrn v2.8b, v4.8h, #2 \n"
MEMACCESS(1) MEMACCESS(1)
"st3 {v0.8b-v2.8b}, [%1], #24 \n" "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n"
"bgt 1b \n" "b.gt 1b \n"
: "+r"(src_ptr), // %0 : "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1 "+r"(dst_ptr), // %1
"+r"(dst_width), // %2 "+r"(dst_width), // %2
@ -259,9 +272,7 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "memory", "cc" : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "memory", "cc"
); );
} }
#endif //HAS_SCALEROWDOWN34_NEON
#ifdef HAS_SCALEROWDOWN38_NEON
static uvec8 kShuf38 = static uvec8 kShuf38 =
{ 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 }; { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
static uvec8 kShuf38_2 = static uvec8 kShuf38_2 =
@ -283,13 +294,13 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"ld1 {v0.16b,v1.16b}, [%0], #32 \n" "ld1 {v0.16b,v1.16b}, [%0], #32 \n"
"subs %2, %2, #12 \n" "subs %w2, %w2, #12 \n"
"tbl v2.16b, {v0.16b,v1.16b}, v3.16b \n" "tbl v2.16b, {v0.16b,v1.16b}, v3.16b \n"
MEMACCESS(1) MEMACCESS(1)
"st1 {v2.8b}, [%1], #8 \n" "st1 {v2.8b}, [%1], #8 \n"
MEMACCESS(1) MEMACCESS(1)
"st1 {v2.s}[2], [%1], #4 \n" "st1 {v2.s}[2], [%1], #4 \n"
"bgt 1b \n" "b.gt 1b \n"
: "+r"(src_ptr), // %0 : "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1 "+r"(dst_ptr), // %1
"+r"(dst_width) // %2 "+r"(dst_width) // %2
@ -298,14 +309,12 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
); );
} }
#endif //HAS_SCALEROWDOWN38_NEON
#ifdef HAS_SCALEROWDOWN38_NEON
// 32x3 -> 12x1 // 32x3 -> 12x1
void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
const uint8* src_ptr1 = src_ptr + src_stride * 2; const uint8* src_ptr1 = src_ptr + src_stride * 2;
ptrdiff_t tmp_src_stride = src_stride;
asm volatile ( asm volatile (
MEMACCESS(5) MEMACCESS(5)
@ -314,7 +323,7 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
"ld1 {v30.16b}, [%6] \n" "ld1 {v30.16b}, [%6] \n"
MEMACCESS(7) MEMACCESS(7)
"ld1 {v31.8h}, [%7] \n" "ld1 {v31.8h}, [%7] \n"
"add %3, %3, %0 \n" "add %2, %2, %0 \n"
"1: \n" "1: \n"
// 00 40 01 41 02 42 03 43 // 00 40 01 41 02 42 03 43
@ -322,12 +331,12 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
// 20 60 21 61 22 62 23 63 // 20 60 21 61 22 62 23 63
// 30 70 31 71 32 72 33 73 // 30 70 31 71 32 72 33 73
MEMACCESS(0) MEMACCESS(0)
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n"
MEMACCESS(3) MEMACCESS(3)
"ld4 {v4.8b-v7.8b}, [%3], #32 \n" "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n"
MEMACCESS(4) MEMACCESS(4)
"ld4 {v16.8b-v19.8b}, [%4], #32 \n" "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%3], #32 \n"
"subs %2, %2, #12 \n" "subs %w4, %w4, #12 \n"
// Shuffle the input data around to get align the data // Shuffle the input data around to get align the data
// so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
@ -414,12 +423,12 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
"st1 {v3.8b}, [%1], #8 \n" "st1 {v3.8b}, [%1], #8 \n"
MEMACCESS(1) MEMACCESS(1)
"st1 {v3.s}[2], [%1], #4 \n" "st1 {v3.s}[2], [%1], #4 \n"
"bgt 1b \n" "b.gt 1b \n"
: "+r"(src_ptr), // %0 : "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1 "+r"(dst_ptr), // %1
"+r"(dst_width), // %2 "+r"(tmp_src_stride), // %2
"+r"(src_stride), // %3 "+r"(src_ptr1), // %3
"+r"(src_ptr1) // %4 "+r"(dst_width) // %4
: "r"(&kMult38_Div6), // %5 : "r"(&kMult38_Div6), // %5
"r"(&kShuf38_2), // %6 "r"(&kShuf38_2), // %6
"r"(&kMult38_Div9) // %7 "r"(&kMult38_Div9) // %7
@ -428,19 +437,19 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
"v30", "v31", "memory", "cc" "v30", "v31", "memory", "cc"
); );
} }
#endif //HAS_SCALEROWDOWN38_NEON
#ifdef HAS_SCALEROWDOWN38_NEON
// 32x2 -> 12x1 // 32x2 -> 12x1
void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
// TODO(fbarchard): use src_stride directly for clang 3.5+.
ptrdiff_t tmp_src_stride = src_stride;
asm volatile ( asm volatile (
MEMACCESS(4) MEMACCESS(4)
"ld1 {v30.8h}, [%4] \n" "ld1 {v30.8h}, [%4] \n"
MEMACCESS(5) MEMACCESS(5)
"ld1 {v31.16b}, [%5] \n" "ld1 {v31.16b}, [%5] \n"
"add %3, %3, %0 \n" "add %2, %2, %0 \n"
"1: \n" "1: \n"
// 00 40 01 41 02 42 03 43 // 00 40 01 41 02 42 03 43
@ -448,10 +457,10 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
// 20 60 21 61 22 62 23 63 // 20 60 21 61 22 62 23 63
// 30 70 31 71 32 72 33 73 // 30 70 31 71 32 72 33 73
MEMACCESS(0) MEMACCESS(0)
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n"
MEMACCESS(3) MEMACCESS(3)
"ld4 {v4.8b-v7.8b}, [%3], #32 \n" "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n"
"subs %2, %2, #12 \n" "subs %w3, %w3, #12 \n"
// Shuffle the input data around to get align the data // Shuffle the input data around to get align the data
// so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
@ -524,18 +533,124 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
"st1 {v3.8b}, [%1], #8 \n" "st1 {v3.8b}, [%1], #8 \n"
MEMACCESS(1) MEMACCESS(1)
"st1 {v3.s}[2], [%1], #4 \n" "st1 {v3.s}[2], [%1], #4 \n"
"bgt 1b \n" "b.gt 1b \n"
: "+r"(src_ptr), // %0 : "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1 "+r"(dst_ptr), // %1
"+r"(dst_width), // %2 "+r"(tmp_src_stride), // %2
"+r"(src_stride) // %3 "+r"(dst_width) // %3
: "r"(&kMult38_Div6), // %4 : "r"(&kMult38_Div6), // %4
"r"(&kShuf38_2) // %5 "r"(&kShuf38_2) // %5
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
"v18", "v19", "v30", "v31", "memory", "cc" "v18", "v19", "v30", "v31", "memory", "cc"
); );
} }
#endif //HAS_SCALEROWDOWN38_NEON
void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height) {
const uint8* src_tmp = NULL;
asm volatile (
"1: \n"
"mov %0, %1 \n"
"mov w12, %w5 \n"
"eor v2.16b, v2.16b, v2.16b \n"
"eor v3.16b, v3.16b, v3.16b \n"
"2: \n"
// load 16 pixels into q0
MEMACCESS(0)
"ld1 {v0.16b}, [%0], %3 \n"
"uaddw2 v3.8h, v3.8h, v0.16b \n"
"uaddw v2.8h, v2.8h, v0.8b \n"
"subs w12, w12, #1 \n"
"b.gt 2b \n"
MEMACCESS(2)
"st1 {v2.8h, v3.8h}, [%2], #32 \n" // store pixels
"add %1, %1, #16 \n"
"subs %w4, %w4, #16 \n" // 16 processed per loop
"b.gt 1b \n"
: "+r"(src_tmp), // %0
"+r"(src_ptr), // %1
"+r"(dst_ptr), // %2
"+r"(src_stride), // %3
"+r"(src_width), // %4
"+r"(src_height) // %5
:
: "memory", "cc", "w12", "v0", "v1", "v2", "v3" // Clobber List
);
}
// TODO(Yang Zhang): Investigate less load instructions for
// the x/dx stepping
#define LOAD2_DATA8_LANE(n) \
"lsr %5, %3, #16 \n" \
"add %6, %1, %5 \n" \
"add %3, %3, %4 \n" \
MEMACCESS(6) \
"ld2 {v4.b, v5.b}["#n"], [%6] \n"
void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) {
int dx_offset[4] = {0, 1, 2, 3};
int* tmp = dx_offset;
const uint8* src_tmp = src_ptr;
int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning.
int64 x64 = (int64) x;
int64 dx64 = (int64) dx;
asm volatile (
"dup v0.4s, %w3 \n" // x
"dup v1.4s, %w4 \n" // dx
"ld1 {v2.4s}, [%5] \n" // 0 1 2 3
"shl v3.4s, v1.4s, #2 \n" // 4 * dx
"mul v1.4s, v1.4s, v2.4s \n"
// x , x + 1 * dx, x + 2 * dx, x + 3 * dx
"add v1.4s, v1.4s, v0.4s \n"
// x + 4 * dx, x + 5 * dx, x + 6 * dx, x + 7 * dx
"add v2.4s, v1.4s, v3.4s \n"
"shl v0.4s, v3.4s, #1 \n" // 8 * dx
"1: \n"
LOAD2_DATA8_LANE(0)
LOAD2_DATA8_LANE(1)
LOAD2_DATA8_LANE(2)
LOAD2_DATA8_LANE(3)
LOAD2_DATA8_LANE(4)
LOAD2_DATA8_LANE(5)
LOAD2_DATA8_LANE(6)
LOAD2_DATA8_LANE(7)
"mov v6.16b, v1.16b \n"
"mov v7.16b, v2.16b \n"
"uzp1 v6.8h, v6.8h, v7.8h \n"
"ushll v4.8h, v4.8b, #0 \n"
"ushll v5.8h, v5.8b, #0 \n"
"ssubl v16.4s, v5.4h, v4.4h \n"
"ssubl2 v17.4s, v5.8h, v4.8h \n"
"ushll v7.4s, v6.4h, #0 \n"
"ushll2 v6.4s, v6.8h, #0 \n"
"mul v16.4s, v16.4s, v7.4s \n"
"mul v17.4s, v17.4s, v6.4s \n"
"shrn v6.4h, v16.4s, #16 \n"
"shrn2 v6.8h, v17.4s, #16 \n"
"add v4.8h, v4.8h, v6.8h \n"
"xtn v4.8b, v4.8h \n"
MEMACCESS(0)
"st1 {v4.8b}, [%0], #8 \n" // store pixels
"add v1.4s, v1.4s, v0.4s \n"
"add v2.4s, v2.4s, v0.4s \n"
"subs %w2, %w2, #8 \n" // 8 processed per loop
"b.gt 1b \n"
: "+r"(dst_ptr), // %0
"+r"(src_ptr), // %1
"+r"(dst_width64), // %2
"+r"(x64), // %3
"+r"(dx64), // %4
"+r"(tmp), // %5
"+r"(src_tmp) // %6
:
: "memory", "cc", "v0", "v1", "v2", "v3",
"v4", "v5", "v6", "v7", "v16", "v17"
);
}
#undef LOAD2_DATA8_LANE
// 16x2 -> 16x1 // 16x2 -> 16x1
void ScaleFilterRows_NEON(uint8* dst_ptr, void ScaleFilterRows_NEON(uint8* dst_ptr,
@ -543,15 +658,15 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
int dst_width, int source_y_fraction) { int dst_width, int source_y_fraction) {
int y_fraction = 256 - source_y_fraction; int y_fraction = 256 - source_y_fraction;
asm volatile ( asm volatile (
"cmp %4, #0 \n" "cmp %w4, #0 \n"
"beq 100f \n" "b.eq 100f \n"
"add %2, %2, %1 \n" "add %2, %2, %1 \n"
"cmp %4, #64 \n" "cmp %w4, #64 \n"
"beq 75f \n" "b.eq 75f \n"
"cmp %4, #128 \n" "cmp %w4, #128 \n"
"beq 50f \n" "b.eq 50f \n"
"cmp %4, #192 \n" "cmp %w4, #192 \n"
"beq 25f \n" "b.eq 25f \n"
"dup v5.8b, %w4 \n" "dup v5.8b, %w4 \n"
"dup v4.8b, %w5 \n" "dup v4.8b, %w5 \n"
@ -561,7 +676,7 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
"ld1 {v0.16b}, [%1], #16 \n" "ld1 {v0.16b}, [%1], #16 \n"
MEMACCESS(2) MEMACCESS(2)
"ld1 {v1.16b}, [%2], #16 \n" "ld1 {v1.16b}, [%2], #16 \n"
"subs %3, %3, #16 \n" "subs %w3, %w3, #16 \n"
"umull v6.8h, v0.8b, v4.8b \n" "umull v6.8h, v0.8b, v4.8b \n"
"umull2 v7.8h, v0.16b, v4.16b \n" "umull2 v7.8h, v0.16b, v4.16b \n"
"umlal v6.8h, v1.8b, v5.8b \n" "umlal v6.8h, v1.8b, v5.8b \n"
@ -570,7 +685,7 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
"rshrn2 v0.16b, v7.8h, #8 \n" "rshrn2 v0.16b, v7.8h, #8 \n"
MEMACCESS(0) MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n" "st1 {v0.16b}, [%0], #16 \n"
"bgt 1b \n" "b.gt 1b \n"
"b 99f \n" "b 99f \n"
// Blend 25 / 75. // Blend 25 / 75.
@ -579,12 +694,12 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
"ld1 {v0.16b}, [%1], #16 \n" "ld1 {v0.16b}, [%1], #16 \n"
MEMACCESS(2) MEMACCESS(2)
"ld1 {v1.16b}, [%2], #16 \n" "ld1 {v1.16b}, [%2], #16 \n"
"subs %3, %3, #16 \n" "subs %w3, %w3, #16 \n"
"urhadd v0.16b, v0.16b, v1.16b \n" "urhadd v0.16b, v0.16b, v1.16b \n"
"urhadd v0.16b, v0.16b, v1.16b \n" "urhadd v0.16b, v0.16b, v1.16b \n"
MEMACCESS(0) MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n" "st1 {v0.16b}, [%0], #16 \n"
"bgt 25b \n" "b.gt 25b \n"
"b 99f \n" "b 99f \n"
// Blend 50 / 50. // Blend 50 / 50.
@ -593,11 +708,11 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
"ld1 {v0.16b}, [%1], #16 \n" "ld1 {v0.16b}, [%1], #16 \n"
MEMACCESS(2) MEMACCESS(2)
"ld1 {v1.16b}, [%2], #16 \n" "ld1 {v1.16b}, [%2], #16 \n"
"subs %3, %3, #16 \n" "subs %w3, %w3, #16 \n"
"urhadd v0.16b, v0.16b, v1.16b \n" "urhadd v0.16b, v0.16b, v1.16b \n"
MEMACCESS(0) MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n" "st1 {v0.16b}, [%0], #16 \n"
"bgt 50b \n" "b.gt 50b \n"
"b 99f \n" "b 99f \n"
// Blend 75 / 25. // Blend 75 / 25.
@ -606,22 +721,22 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
"ld1 {v1.16b}, [%1], #16 \n" "ld1 {v1.16b}, [%1], #16 \n"
MEMACCESS(2) MEMACCESS(2)
"ld1 {v0.16b}, [%2], #16 \n" "ld1 {v0.16b}, [%2], #16 \n"
"subs %3, %3, #16 \n" "subs %w3, %w3, #16 \n"
"urhadd v0.16b, v0.16b, v1.16b \n" "urhadd v0.16b, v0.16b, v1.16b \n"
"urhadd v0.16b, v0.16b, v1.16b \n" "urhadd v0.16b, v0.16b, v1.16b \n"
MEMACCESS(0) MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n" "st1 {v0.16b}, [%0], #16 \n"
"bgt 75b \n" "b.gt 75b \n"
"b 99f \n" "b 99f \n"
// Blend 100 / 0 - Copy row unchanged. // Blend 100 / 0 - Copy row unchanged.
"100: \n" "100: \n"
MEMACCESS(1) MEMACCESS(1)
"ld1 {v0.16b}, [%1], #16 \n" "ld1 {v0.16b}, [%1], #16 \n"
"subs %3, %3, #16 \n" "subs %w3, %w3, #16 \n"
MEMACCESS(0) MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n" "st1 {v0.16b}, [%0], #16 \n"
"bgt 100b \n" "b.gt 100b \n"
"99: \n" "99: \n"
MEMACCESS(0) MEMACCESS(0)
@ -637,7 +752,6 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
); );
} }
#ifdef HAS_SCALEARGBROWDOWN2_NEON
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) { uint8* dst, int dst_width) {
asm volatile ( asm volatile (
@ -647,12 +761,12 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"ld2 {v0.4s, v1.4s}, [%0], #32 \n" "ld2 {v0.4s, v1.4s}, [%0], #32 \n"
MEMACCESS (0) MEMACCESS (0)
"ld2 {v2.4s, v3.4s}, [%0], #32 \n" "ld2 {v2.4s, v3.4s}, [%0], #32 \n"
"subs %2, %2, #8 \n" // 8 processed per loop "subs %w2, %w2, #8 \n" // 8 processed per loop
MEMACCESS (1) MEMACCESS (1)
"st1 {v1.16b}, [%1], #16 \n" // store odd pixels "st1 {v1.16b}, [%1], #16 \n" // store odd pixels
MEMACCESS (1) MEMACCESS (1)
"st1 {v3.16b}, [%1], #16 \n" "st1 {v3.16b}, [%1], #16 \n"
"bgt 1b \n" "b.gt 1b \n"
: "+r" (src_ptr), // %0 : "+r" (src_ptr), // %0
"+r" (dst), // %1 "+r" (dst), // %1
"+r" (dst_width) // %2 "+r" (dst_width) // %2
@ -660,9 +774,34 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List : "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
); );
} }
#endif //HAS_SCALEARGBROWDOWN2_NEON
#ifdef HAS_SCALEARGBROWDOWN2_NEON void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) {
asm volatile (
"1: \n"
MEMACCESS (0)
// load 8 ARGB pixels.
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n"
"subs %w2, %w2, #8 \n" // 8 processed per loop.
"uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
"uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
"uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
"uaddlp v3.8h, v3.16b \n" // A 16 bytes -> 8 shorts.
"rshrn v0.8b, v0.8h, #1 \n" // downshift, round and pack
"rshrn v1.8b, v1.8h, #1 \n"
"rshrn v2.8b, v2.8h, #1 \n"
"rshrn v3.8b, v3.8h, #1 \n"
MEMACCESS (1)
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n"
"b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(dst_width) // %2
:
: "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
);
}
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) { uint8* dst, int dst_width) {
asm volatile ( asm volatile (
@ -670,14 +809,14 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"add %1, %1, %0 \n" "add %1, %1, %0 \n"
"1: \n" "1: \n"
MEMACCESS (0) MEMACCESS (0)
"ld4 {v0.16b - v3.16b}, [%0], #64 \n" // load 8 ARGB pixels. "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 8 ARGB pixels.
"subs %3, %3, #8 \n" // 8 processed per loop. "subs %w3, %w3, #8 \n" // 8 processed per loop.
"uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
"uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
"uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
"uaddlp v3.8h, v3.16b \n" // A 16 bytes -> 8 shorts. "uaddlp v3.8h, v3.16b \n" // A 16 bytes -> 8 shorts.
MEMACCESS (1) MEMACCESS (1)
"ld4 {v16.16b - v19.16b}, [%1], #64 \n" // load 8 more ARGB pixels. "ld4 {v16.16b,v17.16b,v18.16b,v19.16b}, [%1], #64 \n" // load 8 more ARGB pixels.
"uadalp v0.8h, v16.16b \n" // B 16 bytes -> 8 shorts. "uadalp v0.8h, v16.16b \n" // B 16 bytes -> 8 shorts.
"uadalp v1.8h, v17.16b \n" // G 16 bytes -> 8 shorts. "uadalp v1.8h, v17.16b \n" // G 16 bytes -> 8 shorts.
"uadalp v2.8h, v18.16b \n" // R 16 bytes -> 8 shorts. "uadalp v2.8h, v18.16b \n" // R 16 bytes -> 8 shorts.
@ -687,8 +826,8 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"rshrn v2.8b, v2.8h, #2 \n" "rshrn v2.8b, v2.8h, #2 \n"
"rshrn v3.8b, v3.8h, #2 \n" "rshrn v3.8b, v3.8h, #2 \n"
MEMACCESS (2) MEMACCESS (2)
"st4 {v0.8b - v3.8b}, [%2], #32 \n" "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n"
"bgt 1b \n" "b.gt 1b \n"
: "+r" (src_ptr), // %0 : "+r" (src_ptr), // %0
"+r" (src_stride), // %1 "+r" (src_stride), // %1
"+r" (dst), // %2 "+r" (dst), // %2
@ -697,9 +836,7 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "memory", "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19" : "memory", "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"
); );
} }
#endif //HAS_SCALEARGBROWDOWN2_NEON
#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
// Reads 4 pixels at a time. // Reads 4 pixels at a time.
// Alignment requirement: src_argb 4 byte aligned. // Alignment requirement: src_argb 4 byte aligned.
void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride, void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
@ -714,23 +851,21 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
"ld1 {v0.s}[2], [%0], %3 \n" "ld1 {v0.s}[2], [%0], %3 \n"
MEMACCESS(0) MEMACCESS(0)
"ld1 {v0.s}[3], [%0], %3 \n" "ld1 {v0.s}[3], [%0], %3 \n"
"subs %2, %2, #4 \n" // 4 pixels per loop. "subs %w2, %w2, #4 \n" // 4 pixels per loop.
MEMACCESS(1) MEMACCESS(1)
"st1 {v0.16b}, [%1], #16 \n" "st1 {v0.16b}, [%1], #16 \n"
"bgt 1b \n" "b.gt 1b \n"
: "+r"(src_argb), // %0 : "+r"(src_argb), // %0
"+r"(dst_argb), // %1 "+r"(dst_argb), // %1
"+r"(dst_width) // %2 "+r"(dst_width) // %2
: "r"(src_stepx * 4) // %3 : "r"((int64)(src_stepx * 4)) // %3
: "memory", "cc", "v0" : "memory", "cc", "v0"
); );
} }
#endif //HAS_SCALEARGBROWDOWNEVEN_NEON
#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
// Reads 4 pixels at a time. // Reads 4 pixels at a time.
// Alignment requirement: src_argb 4 byte aligned. // Alignment requirement: src_argb 4 byte aligned.
// TODO, might be worth another optimization pass in future. // TODO(Yang Zhang): Might be worth another optimization pass in future.
// It could be upgraded to 8 pixels at a time to start with. // It could be upgraded to 8 pixels at a time to start with.
void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride, void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx, int src_stepx,
@ -768,20 +903,138 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
"add v4.8h, v4.8h, v6.8h \n" // (e+f)_(g+h) "add v4.8h, v4.8h, v6.8h \n" // (e+f)_(g+h)
"rshrn v0.8b, v0.8h, #2 \n" // first 2 pixels. "rshrn v0.8b, v0.8h, #2 \n" // first 2 pixels.
"rshrn2 v0.16b, v4.8h, #2 \n" // next 2 pixels. "rshrn2 v0.16b, v4.8h, #2 \n" // next 2 pixels.
"subs %3, %3, #4 \n" // 4 pixels per loop. "subs %w3, %w3, #4 \n" // 4 pixels per loop.
MEMACCESS(2) MEMACCESS(2)
"st1 {v0.16b}, [%2], #16 \n" "st1 {v0.16b}, [%2], #16 \n"
"bgt 1b \n" "b.gt 1b \n"
: "+r"(src_argb), // %0 : "+r"(src_argb), // %0
"+r"(src_stride), // %1 "+r"(src_stride), // %1
"+r"(dst_argb), // %2 "+r"(dst_argb), // %2
"+r"(dst_width) // %3 "+r"(dst_width) // %3
: "r"(src_stepx * 4) // %4 : "r"((int64)(src_stepx * 4)) // %4
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
); );
} }
#endif // HAS_SCALEARGBROWDOWNEVEN_NEON
#endif // __aarch64__ // TODO(Yang Zhang): Investigate less load instructions for
// the x/dx stepping
#define LOAD1_DATA32_LANE(vn, n) \
"lsr %5, %3, #16 \n" \
"add %6, %1, %5, lsl #2 \n" \
"add %3, %3, %4 \n" \
MEMACCESS(6) \
"ld1 {"#vn".s}["#n"], [%6] \n"
void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
const uint8* src_tmp = src_argb;
int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning.
int64 x64 = (int64) x;
int64 dx64 = (int64) dx;
int64 tmp64 = 0;
asm volatile (
"1: \n"
LOAD1_DATA32_LANE(v0, 0)
LOAD1_DATA32_LANE(v0, 1)
LOAD1_DATA32_LANE(v0, 2)
LOAD1_DATA32_LANE(v0, 3)
LOAD1_DATA32_LANE(v1, 0)
LOAD1_DATA32_LANE(v1, 1)
LOAD1_DATA32_LANE(v1, 2)
LOAD1_DATA32_LANE(v1, 3)
MEMACCESS(0)
"st1 {v0.4s, v1.4s}, [%0], #32 \n" // store pixels
"subs %w2, %w2, #8 \n" // 8 processed per loop
"b.gt 1b \n"
: "+r"(dst_argb), // %0
"+r"(src_argb), // %1
"+r"(dst_width64), // %2
"+r"(x64), // %3
"+r"(dx64), // %4
"+r"(tmp64), // %5
"+r"(src_tmp) // %6
:
: "memory", "cc", "v0", "v1"
);
}
#undef LOAD1_DATA32_LANE
// TODO(Yang Zhang): Investigate less load instructions for
// the x/dx stepping
#define LOAD2_DATA32_LANE(vn1, vn2, n) \
"lsr %5, %3, #16 \n" \
"add %6, %1, %5, lsl #2 \n" \
"add %3, %3, %4 \n" \
MEMACCESS(6) \
"ld2 {"#vn1".s, "#vn2".s}["#n"], [%6] \n"
void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
int dx_offset[4] = {0, 1, 2, 3};
int* tmp = dx_offset;
const uint8* src_tmp = src_argb;
int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning.
int64 x64 = (int64) x;
int64 dx64 = (int64) dx;
asm volatile (
"dup v0.4s, %w3 \n" // x
"dup v1.4s, %w4 \n" // dx
"ld1 {v2.4s}, [%5] \n" // 0 1 2 3
"shl v6.4s, v1.4s, #2 \n" // 4 * dx
"mul v1.4s, v1.4s, v2.4s \n"
"movi v3.16b, #0x7f \n" // 0x7F
"movi v4.8h, #0x7f \n" // 0x7F
// x , x + 1 * dx, x + 2 * dx, x + 3 * dx
"add v5.4s, v1.4s, v0.4s \n"
"1: \n"
// d0, d1: a
// d2, d3: b
LOAD2_DATA32_LANE(v0, v1, 0)
LOAD2_DATA32_LANE(v0, v1, 1)
LOAD2_DATA32_LANE(v0, v1, 2)
LOAD2_DATA32_LANE(v0, v1, 3)
"shrn v2.4h, v5.4s, #9 \n"
"and v2.8b, v2.8b, v4.8b \n"
"dup v16.8b, v2.b[0] \n"
"dup v17.8b, v2.b[2] \n"
"dup v18.8b, v2.b[4] \n"
"dup v19.8b, v2.b[6] \n"
"ext v2.8b, v16.8b, v17.8b, #4 \n"
"ext v17.8b, v18.8b, v19.8b, #4 \n"
"ins v2.d[1], v17.d[0] \n" // f
"eor v7.16b, v2.16b, v3.16b \n" // 0x7f ^ f
"umull v16.8h, v0.8b, v7.8b \n"
"umull2 v17.8h, v0.16b, v7.16b \n"
"umull v18.8h, v1.8b, v2.8b \n"
"umull2 v19.8h, v1.16b, v2.16b \n"
"add v16.8h, v16.8h, v18.8h \n"
"add v17.8h, v17.8h, v19.8h \n"
"shrn v0.8b, v16.8h, #7 \n"
"shrn2 v0.16b, v17.8h, #7 \n"
MEMACCESS(0)
"st1 {v0.4s}, [%0], #16 \n" // store pixels
"add v5.4s, v5.4s, v6.4s \n"
"subs %w2, %w2, #4 \n" // 4 processed per loop
"b.gt 1b \n"
: "+r"(dst_argb), // %0
"+r"(src_argb), // %1
"+r"(dst_width64), // %2
"+r"(x64), // %3
"+r"(dx64), // %4
"+r"(tmp), // %5
"+r"(src_tmp) // %6
:
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5",
"v6", "v7", "v16", "v17", "v18", "v19"
);
}
#undef LOAD2_DATA32_LANE
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -33,7 +33,7 @@ static const struct FourCCAliasEntry kFourCCAliases[] = {
{FOURCC_2VUY, FOURCC_UYVY}, // kCMPixelFormat_422YpCbCr8 {FOURCC_2VUY, FOURCC_UYVY}, // kCMPixelFormat_422YpCbCr8
{FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not. {FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not.
{FOURCC_DMB1, FOURCC_MJPG}, {FOURCC_DMB1, FOURCC_MJPG},
{FOURCC_BA81, FOURCC_BGGR}, {FOURCC_BA81, FOURCC_BGGR}, // deprecated.
{FOURCC_RGB3, FOURCC_RAW }, {FOURCC_RGB3, FOURCC_RAW },
{FOURCC_BGR3, FOURCC_24BG}, {FOURCC_BGR3, FOURCC_24BG},
{FOURCC_CM32, FOURCC_BGRA}, // kCMPixelFormat_32ARGB {FOURCC_CM32, FOURCC_BGRA}, // kCMPixelFormat_32ARGB

File diff suppressed because it is too large Load Diff

View File

@ -17,6 +17,7 @@
** language. The code for the "sqlite3" command-line shell is also in a ** language. The code for the "sqlite3" command-line shell is also in a
** separate file. This file contains only code for the core SQLite library. ** separate file. This file contains only code for the core SQLite library.
*/ */
#define SQLITE_THREADSAFE 0
#define SQLITE_CORE 1 #define SQLITE_CORE 1
#define SQLITE_AMALGAMATION 1 #define SQLITE_AMALGAMATION 1
#ifndef SQLITE_PRIVATE #ifndef SQLITE_PRIVATE

View File

@ -4,7 +4,7 @@
#include <android/log.h> #include <android/log.h>
#include <jni.h> #include <jni.h>
#define LOG_TAG "tmessages" #define LOG_TAG "tmessages_native"
#ifndef LOG_DISABLED #ifndef LOG_DISABLED
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__) #define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__) #define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__)

View File

@ -39,13 +39,14 @@
<uses-permission android:name="android.permission.USE_FINGERPRINT" /> <uses-permission android:name="android.permission.USE_FINGERPRINT" />
<application <application
android:name=".ApplicationLoader" android:name="org.telegram.messenger.ApplicationLoader"
android:allowBackup="false" android:allowBackup="false"
android:hardwareAccelerated="@bool/useHardwareAcceleration" android:hardwareAccelerated="@bool/useHardwareAcceleration"
android:icon="@drawable/ic_launcher" android:icon="@drawable/ic_launcher"
android:label="@string/ShortAppName" android:label="@string/ShortAppName"
android:largeHeap="true" android:largeHeap="true"
android:theme="@style/Theme.TMessages.Start"> android:theme="@style/Theme.TMessages.Start"
android:manageSpaceActivity="org.telegram.ui.ManageSpaceActivity">
<activity <activity
android:name="org.telegram.ui.LaunchActivity" android:name="org.telegram.ui.LaunchActivity"
@ -106,7 +107,14 @@
<category android:name="android.intent.category.DEFAULT" /> <category android:name="android.intent.category.DEFAULT" />
<data android:scheme="tg" /> <data android:scheme="tg" />
</intent-filter> </intent-filter>
<meta-data android:name="android.service.chooser.chooser_target_service" android:value=".TgChooserTargetService" /> <meta-data android:name="android.service.chooser.chooser_target_service" android:value="org.telegram.messenger.TgChooserTargetService" />
</activity>
<activity
android:name="org.telegram.ui.ManageSpaceActivity"
android:configChanges="keyboard|keyboardHidden|orientation|screenSize"
android:hardwareAccelerated="@bool/useHardwareAcceleration"
android:launchMode="singleTask"
android:windowSoftInputMode="adjustPan">
</activity> </activity>
<activity <activity
android:name="org.telegram.ui.IntroActivity" android:name="org.telegram.ui.IntroActivity"
@ -122,25 +130,25 @@
android:windowSoftInputMode="adjustResize|stateHidden"> android:windowSoftInputMode="adjustResize|stateHidden">
</activity> </activity>
<receiver android:name=".AutoMessageHeardReceiver"> <receiver android:name="org.telegram.messenger.AutoMessageHeardReceiver">
<intent-filter> <intent-filter>
<action android:name="org.telegram.messenger.ACTION_MESSAGE_HEARD"/> <action android:name="org.telegram.messenger.ACTION_MESSAGE_HEARD"/>
</intent-filter> </intent-filter>
</receiver> </receiver>
<receiver android:name=".AutoMessageReplyReceiver"> <receiver android:name="org.telegram.messenger.AutoMessageReplyReceiver">
<intent-filter> <intent-filter>
<action android:name="org.telegram.messenger.ACTION_MESSAGE_REPLY"/> <action android:name="org.telegram.messenger.ACTION_MESSAGE_REPLY"/>
</intent-filter> </intent-filter>
</receiver> </receiver>
<receiver android:name=".SmsListener"> <receiver android:name="org.telegram.messenger.SmsListener">
<intent-filter> <intent-filter>
<action android:name="android.provider.Telephony.SMS_RECEIVED" /> <action android:name="android.provider.Telephony.SMS_RECEIVED" />
</intent-filter> </intent-filter>
</receiver> </receiver>
<service android:name=".AuthenticatorService" android:exported="true"> <service android:name="org.telegram.messenger.AuthenticatorService" android:exported="true">
<intent-filter> <intent-filter>
<action android:name="android.accounts.AccountAuthenticator"/> <action android:name="android.accounts.AccountAuthenticator"/>
</intent-filter> </intent-filter>
@ -148,7 +156,7 @@
android:resource="@xml/auth"/> android:resource="@xml/auth"/>
</service> </service>
<service android:name=".ContactsSyncAdapterService" android:exported="true"> <service android:name="org.telegram.messenger.ContactsSyncAdapterService" android:exported="true">
<intent-filter> <intent-filter>
<action android:name="android.content.SyncAdapter" /> <action android:name="android.content.SyncAdapter" />
</intent-filter> </intent-filter>
@ -168,11 +176,12 @@
</service> </service>
<service android:name="org.telegram.messenger.NotificationsService" android:enabled="true"/> <service android:name="org.telegram.messenger.NotificationsService" android:enabled="true"/>
<service android:name=".NotificationRepeat" android:exported="false"/> <service android:name="org.telegram.messenger.NotificationRepeat" android:exported="false"/>
<service android:name=".VideoEncodingService" android:enabled="true"/> <service android:name="org.telegram.messenger.ClearCacheService" android:exported="false"/>
<service android:name=".MusicPlayerService" android:exported="true" android:enabled="true"/> <service android:name="org.telegram.messenger.VideoEncodingService" android:enabled="true"/>
<service android:name="org.telegram.messenger.MusicPlayerService" android:exported="true" android:enabled="true"/>
<receiver android:name=".MusicPlayerReceiver" > <receiver android:name="org.telegram.messenger.MusicPlayerReceiver" >
<intent-filter> <intent-filter>
<action android:name="org.telegram.android.musicplayer.close" /> <action android:name="org.telegram.android.musicplayer.close" />
<action android:name="org.telegram.android.musicplayer.pause" /> <action android:name="org.telegram.android.musicplayer.pause" />
@ -191,7 +200,7 @@
</intent-filter> </intent-filter>
</receiver> </receiver>
<receiver android:name=".WearReplyReceiver" android:enabled="true"/> <receiver android:name="org.telegram.messenger.WearReplyReceiver" android:enabled="true"/>
<uses-library android:name="com.sec.android.app.multiwindow" android:required="false" /> <uses-library android:name="com.sec.android.app.multiwindow" android:required="false" />
<meta-data android:name="com.sec.android.support.multiwindow" android:value="true" /> <meta-data android:name="com.sec.android.support.multiwindow" android:value="true" />
@ -199,8 +208,8 @@
<meta-data android:name="com.sec.android.multiwindow.DEFAULT_SIZE_H" android:value="598dp" /> <meta-data android:name="com.sec.android.multiwindow.DEFAULT_SIZE_H" android:value="598dp" />
<meta-data android:name="com.sec.android.multiwindow.MINIMUM_SIZE_W" android:value="632dp" /> <meta-data android:name="com.sec.android.multiwindow.MINIMUM_SIZE_W" android:value="632dp" />
<meta-data android:name="com.sec.android.multiwindow.MINIMUM_SIZE_H" android:value="598dp" /> <meta-data android:name="com.sec.android.multiwindow.MINIMUM_SIZE_H" android:value="598dp" />
<!--
<meta-data android:name="com.google.android.gms.car.application" android:resource="@xml/automotive_app_desc" /> <meta-data android:name="com.google.android.gms.car.application" android:resource="@xml/automotive_app_desc" />-->
</application> </application>

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.SQLite; package org.telegram.SQLite;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.SQLite; package org.telegram.SQLite;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.SQLite; package org.telegram.SQLite;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.SQLite; package org.telegram.SQLite;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.SQLite; package org.telegram.SQLite;
@ -135,6 +135,10 @@ public class SQLitePreparedStatement {
bindLong(sqliteStatementHandle, index, value); bindLong(sqliteStatementHandle, index, value);
} }
public void bindNull(int index) throws SQLiteException {
bindNull(sqliteStatementHandle, index);
}
native void bindByteBuffer(int statementHandle, int index, ByteBuffer value, int length) throws SQLiteException; native void bindByteBuffer(int statementHandle, int index, ByteBuffer value, int length) throws SQLiteException;
native void bindString(int statementHandle, int index, String value) throws SQLiteException; native void bindString(int statementHandle, int index, String value) throws SQLiteException;
native void bindInt(int statementHandle, int index, int value) throws SQLiteException; native void bindInt(int statementHandle, int index, int value) throws SQLiteException;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.messenger; package org.telegram.messenger;
@ -22,7 +22,6 @@ import android.content.pm.PackageInfo;
import android.content.res.Configuration; import android.content.res.Configuration;
import android.graphics.drawable.ColorDrawable; import android.graphics.drawable.ColorDrawable;
import android.graphics.drawable.Drawable; import android.graphics.drawable.Drawable;
import android.os.AsyncTask;
import android.os.Build; import android.os.Build;
import android.os.Handler; import android.os.Handler;
import android.os.PowerManager; import android.os.PowerManager;
@ -30,8 +29,8 @@ import android.util.Base64;
import com.google.android.gms.common.ConnectionResult; import com.google.android.gms.common.ConnectionResult;
import com.google.android.gms.common.GooglePlayServicesUtil; import com.google.android.gms.common.GooglePlayServicesUtil;
import com.google.android.gms.gcm.GoogleCloudMessaging;
import org.telegram.SQLite.DatabaseHandler;
import org.telegram.tgnet.ConnectionsManager; import org.telegram.tgnet.ConnectionsManager;
import org.telegram.tgnet.SerializedData; import org.telegram.tgnet.SerializedData;
import org.telegram.tgnet.TLRPC; import org.telegram.tgnet.TLRPC;
@ -40,17 +39,11 @@ import org.telegram.ui.Components.ForegroundDetector;
import java.io.File; import java.io.File;
import java.io.RandomAccessFile; import java.io.RandomAccessFile;
import java.util.Calendar; import java.util.Calendar;
import java.util.concurrent.atomic.AtomicInteger;
public class ApplicationLoader extends Application { public class ApplicationLoader extends Application {
private static NetworkAlarm networkAlarm = null;
private static PendingIntent pendingIntent;
private GoogleCloudMessaging gcm;
private AtomicInteger msgId = new AtomicInteger();
private String regid;
public static final String EXTRA_MESSAGE = "message";
public static final String PROPERTY_REG_ID = "registration_id";
private static final String PROPERTY_APP_VERSION = "appVersion";
private static final int PLAY_SERVICES_RESOLUTION_REQUEST = 9000;
private static Drawable cachedWallpaper; private static Drawable cachedWallpaper;
private static int selectedColor; private static int selectedColor;
private static boolean isCustomTheme; private static boolean isCustomTheme;
@ -63,6 +56,7 @@ public class ApplicationLoader extends Application {
public static volatile boolean isScreenOn = false; public static volatile boolean isScreenOn = false;
public static volatile boolean mainInterfacePaused = true; public static volatile boolean mainInterfacePaused = true;
public static DatabaseHandler databaseHandler;
public static boolean SHOW_ANDROID_EMOJI; public static boolean SHOW_ANDROID_EMOJI;
public static boolean KEEP_ORIGINAL_FILENAME; public static boolean KEEP_ORIGINAL_FILENAME;
public static boolean USE_DEVICE_FONT; public static boolean USE_DEVICE_FONT;
@ -186,7 +180,8 @@ public class ApplicationLoader extends Application {
} catch (Exception e) { } catch (Exception e) {
FileLog.e("tmessages", e); FileLog.e("tmessages", e);
} }
return new File("/data/data/org.telegram.messenger/files"); //return new File("/data/data/org.telegram.messenger/files");
return new File("/data/data/org.telegram." + (BuildConfig.DEBUG ? "plus.beta" : "plus") +"/files");
} }
public static void postInitApplication() { public static void postInitApplication() {
@ -239,16 +234,16 @@ public class ApplicationLoader extends Application {
appVersion = "App version unknown"; appVersion = "App version unknown";
systemVersion = "SDK " + Build.VERSION.SDK_INT; systemVersion = "SDK " + Build.VERSION.SDK_INT;
} }
if (langCode.length() == 0) { if (langCode.trim().length() == 0) {
langCode = "en"; langCode = "en";
} }
if (deviceModel.length() == 0) { if (deviceModel.trim().length() == 0) {
deviceModel = "Android unknown"; deviceModel = "Android unknown";
} }
if (appVersion.length() == 0) { if (appVersion.trim().length() == 0) {
appVersion = "App version unknown"; appVersion = "App version unknown";
} }
if (systemVersion.length() == 0) { if (systemVersion.trim().length() == 0) {
systemVersion = "SDK Unknown"; systemVersion = "SDK Unknown";
} }
@ -280,7 +275,11 @@ public class ApplicationLoader extends Application {
applicationContext = getApplicationContext(); applicationContext = getApplicationContext();
NativeLoader.initNativeLibs(ApplicationLoader.applicationContext); NativeLoader.initNativeLibs(ApplicationLoader.applicationContext);
try{
ConnectionsManager.native_setJava(Build.VERSION.SDK_INT == 14 || Build.VERSION.SDK_INT == 15); ConnectionsManager.native_setJava(Build.VERSION.SDK_INT == 14 || Build.VERSION.SDK_INT == 15);
} catch (Exception e) {
FileLog.e("tmessages", e);
}
if (Build.VERSION.SDK_INT >= 14) { if (Build.VERSION.SDK_INT >= 14) {
new ForegroundDetector(this); new ForegroundDetector(this);
@ -288,10 +287,12 @@ public class ApplicationLoader extends Application {
applicationHandler = new Handler(applicationContext.getMainLooper()); applicationHandler = new Handler(applicationContext.getMainLooper());
//plus //plus
SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("mainconfig", Activity.MODE_PRIVATE); databaseHandler = new DatabaseHandler(applicationContext);
SHOW_ANDROID_EMOJI = preferences.getBoolean("showAndroidEmoji", false); //SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("mainconfig", Activity.MODE_PRIVATE);
KEEP_ORIGINAL_FILENAME = preferences.getBoolean("keepOriginalFilename", false); SharedPreferences plusPreferences = ApplicationLoader.applicationContext.getSharedPreferences("plusconfig", Activity.MODE_PRIVATE);
USE_DEVICE_FONT = preferences.getBoolean("useDeviceFont", false); SHOW_ANDROID_EMOJI = plusPreferences.getBoolean("showAndroidEmoji", false);
KEEP_ORIGINAL_FILENAME = plusPreferences.getBoolean("keepOriginalFilename", false);
USE_DEVICE_FONT = plusPreferences.getBoolean("useDeviceFont", false);
// //
startPushService(); startPushService();
} }
@ -300,13 +301,22 @@ public class ApplicationLoader extends Application {
SharedPreferences preferences = applicationContext.getSharedPreferences("Notifications", MODE_PRIVATE); SharedPreferences preferences = applicationContext.getSharedPreferences("Notifications", MODE_PRIVATE);
if (preferences.getBoolean("pushService", true)) { if (preferences.getBoolean("pushService", true)) {
networkAlarm = new NetworkAlarm();
/*} else {
AlarmManager am = (AlarmManager) applicationContext.getSystemService(Context.ALARM_SERVICE);
Intent i = new Intent(applicationContext, ApplicationLoader.class);
pendingIntent = PendingIntent.getBroadcast(applicationContext, 0, i, 0);
am.cancel(pendingIntent);
am.setRepeating(AlarmManager.RTC_WAKEUP, System.currentTimeMillis(), 60000, pendingIntent);
}*/
applicationContext.startService(new Intent(applicationContext, NotificationsService.class)); applicationContext.startService(new Intent(applicationContext, NotificationsService.class));
//if (android.os.Build.VERSION.SDK_INT >= 19) { //if (android.os.Build.VERSION.SDK_INT >= 19) {
FileLog.e("ApplicationLoader", "startPushService"); ///FileLog.e("ApplicationLoader", "startPushService");
Calendar cal = Calendar.getInstance(); ///Calendar cal = Calendar.getInstance();
PendingIntent pintent = PendingIntent.getService(applicationContext, 0, new Intent(applicationContext, NotificationsService.class), 0); ///PendingIntent pintent = PendingIntent.getService(applicationContext, 0, new Intent(applicationContext, NotificationsService.class), 0);
AlarmManager alarm = (AlarmManager) applicationContext.getSystemService(Context.ALARM_SERVICE); ///AlarmManager alarm = (AlarmManager) applicationContext.getSystemService(Context.ALARM_SERVICE);
alarm.setRepeating(AlarmManager.RTC_WAKEUP, cal.getTimeInMillis(), 30000, pintent); ///alarm.setRepeating(AlarmManager.RTC_WAKEUP, cal.getTimeInMillis(), 30000, pintent);
//PendingIntent pintent = PendingIntent.getService(applicationContext, 0, new Intent(applicationContext, NotificationsService.class), 0); //PendingIntent pintent = PendingIntent.getService(applicationContext, 0, new Intent(applicationContext, NotificationsService.class), 0);
//AlarmManager alarm = (AlarmManager)applicationContext.getSystemService(Context.ALARM_SERVICE); //AlarmManager alarm = (AlarmManager)applicationContext.getSystemService(Context.ALARM_SERVICE);
@ -318,13 +328,26 @@ public class ApplicationLoader extends Application {
} }
public static void stopPushService() { public static void stopPushService() {
if (networkAlarm != null) {
networkAlarm = null;
}// else {
// AlarmManager am = (AlarmManager) applicationContext.getSystemService(Context.ALARM_SERVICE);
// am.cancel(pendingIntent);
//}
applicationContext.stopService(new Intent(applicationContext, NotificationsService.class)); applicationContext.stopService(new Intent(applicationContext, NotificationsService.class));
PendingIntent pintent = PendingIntent.getService(applicationContext, 0, new Intent(applicationContext, NotificationsService.class), 0); ///PendingIntent pintent = PendingIntent.getService(applicationContext, 0, new Intent(applicationContext, NotificationsService.class), 0);
AlarmManager alarm = (AlarmManager)applicationContext.getSystemService(Context.ALARM_SERVICE); ///AlarmManager alarm = (AlarmManager)applicationContext.getSystemService(Context.ALARM_SERVICE);
alarm.cancel(pintent); ///alarm.cancel(pintent);
} }
public static void setAlarm(int timeout) {
FileLog.d("tmessages", "setting alarm to wake us in " + String.valueOf(timeout) + "ms");
if (networkAlarm != null) {
networkAlarm.setAlarm(applicationContext, timeout);
}
}
@Override @Override
public void onConfigurationChanged(Configuration newConfig) { public void onConfigurationChanged(Configuration newConfig) {
super.onConfigurationChanged(newConfig); super.onConfigurationChanged(newConfig);
@ -341,13 +364,10 @@ public class ApplicationLoader extends Application {
@Override @Override
public void run() { public void run() {
if (checkPlayServices()) { if (checkPlayServices()) {
gcm = GoogleCloudMessaging.getInstance(ApplicationLoader.this); if (UserConfig.pushString == null || UserConfig.pushString.length() == 0) {
regid = getRegistrationId(); FileLog.d("tmessages", "GCM Registration not found.");
Intent intent = new Intent(applicationContext, GcmRegistrationIntentService.class);
if (regid.length() == 0) { startService(intent);
registerInBackground();
} else {
sendRegistrationIdToBackend(false);
} }
} else { } else {
FileLog.d("tmessages", "No valid Google Play Services APK found."); FileLog.d("tmessages", "No valid Google Play Services APK found.");
@ -369,91 +389,4 @@ public class ApplicationLoader extends Application {
} }
return true;*/ return true;*/
} }
private String getRegistrationId() {
final SharedPreferences prefs = getGCMPreferences(applicationContext);
String registrationId = prefs.getString(PROPERTY_REG_ID, "");
if (registrationId.length() == 0) {
FileLog.d("tmessages", "Registration not found.");
return "";
}
int registeredVersion = prefs.getInt(PROPERTY_APP_VERSION, Integer.MIN_VALUE);
if (registeredVersion != BuildVars.BUILD_VERSION) {
FileLog.d("tmessages", "App version changed.");
return "";
}
return registrationId;
}
private SharedPreferences getGCMPreferences(Context context) {
return getSharedPreferences(ApplicationLoader.class.getSimpleName(), Context.MODE_PRIVATE);
}
private void registerInBackground() {
AsyncTask<String, String, Boolean> task = new AsyncTask<String, String, Boolean>() {
@Override
protected Boolean doInBackground(String... objects) {
if (gcm == null) {
gcm = GoogleCloudMessaging.getInstance(applicationContext);
}
int count = 0;
while (count < 1000) {
try {
count++;
regid = gcm.register(BuildVars.GCM_SENDER_ID);
sendRegistrationIdToBackend(true);
storeRegistrationId(applicationContext, regid);
return true;
} catch (Exception e) {
FileLog.e("tmessages", e);
}
try {
if (count % 20 == 0) {
Thread.sleep(60000 * 30);
} else {
Thread.sleep(5000);
}
} catch (InterruptedException e) {
FileLog.e("tmessages", e);
}
}
return false;
}
};
if (android.os.Build.VERSION.SDK_INT >= 11) {
task.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR, null, null, null);
} else {
task.execute(null, null, null);
}
}
private void sendRegistrationIdToBackend(final boolean isNew) {
Utilities.stageQueue.postRunnable(new Runnable() {
@Override
public void run() {
UserConfig.pushString = regid;
UserConfig.registeredForPush = !isNew;
UserConfig.saveConfig(false);
if (UserConfig.getClientUserId() != 0) {
AndroidUtilities.runOnUIThread(new Runnable() {
@Override
public void run() {
MessagesController.getInstance().registerForPush(regid);
}
});
}
}
});
}
private void storeRegistrationId(Context context, String regId) {
final SharedPreferences prefs = getGCMPreferences(context);
int appVersion = BuildVars.BUILD_VERSION;
FileLog.e("tmessages", "Saving regId on app version " + appVersion);
SharedPreferences.Editor editor = prefs.edit();
editor.putString(PROPERTY_REG_ID, regId);
editor.putInt(PROPERTY_APP_VERSION, appVersion);
editor.commit();
}
} }

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.messenger; package org.telegram.messenger;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.messenger; package org.telegram.messenger;
@ -13,8 +13,8 @@ import org.telegram.tgnet.RequestDelegate;
import org.telegram.tgnet.TLObject; import org.telegram.tgnet.TLObject;
import org.telegram.tgnet.TLRPC; import org.telegram.tgnet.TLRPC;
import java.io.RandomAccessFile;
import java.io.File; import java.io.File;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel; import java.nio.channels.FileChannel;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Scanner; import java.util.Scanner;
@ -22,9 +22,9 @@ import java.util.Scanner;
public class FileLoadOperation { public class FileLoadOperation {
private static class RequestInfo { private static class RequestInfo {
private int requestToken = 0; private int requestToken;
private int offset = 0; private int offset;
private TLRPC.TL_upload_file response = null; private TLRPC.TL_upload_file response;
} }
private final static int stateIdle = 0; private final static int stateIdle = 0;
@ -49,8 +49,9 @@ public class FileLoadOperation {
private int currentDownloadChunkSize; private int currentDownloadChunkSize;
private int currentMaxDownloadRequests; private int currentMaxDownloadRequests;
private int requestsCount; private int requestsCount;
private int renameRetryCount;
private int nextDownloadOffset = 0; private int nextDownloadOffset;
private ArrayList<RequestInfo> requestInfos; private ArrayList<RequestInfo> requestInfos;
private ArrayList<RequestInfo> delayedRequestInfos; private ArrayList<RequestInfo> delayedRequestInfos;
@ -61,9 +62,9 @@ public class FileLoadOperation {
private String ext; private String ext;
private RandomAccessFile fileOutputStream; private RandomAccessFile fileOutputStream;
private RandomAccessFile fiv; private RandomAccessFile fiv;
private File storePath = null; private File storePath;
private File tempPath = null; private File tempPath;
private boolean isForceRequest = false; private boolean isForceRequest;
private static String orgName = null; private static String orgName = null;
@ -145,11 +146,14 @@ public class FileLoadOperation {
key = documentLocation.key; key = documentLocation.key;
} else if (documentLocation instanceof TLRPC.TL_document) { } else if (documentLocation instanceof TLRPC.TL_document) {
location = new TLRPC.TL_inputDocumentFileLocation(); location = new TLRPC.TL_inputDocumentFileLocation();
datacenter_id = documentLocation.dc_id;
location.id = documentLocation.id; location.id = documentLocation.id;
location.access_hash = documentLocation.access_hash; location.access_hash = documentLocation.access_hash;
datacenter_id = documentLocation.dc_id;
} }
if (totalBytesCount <= 0) {
totalBytesCount = documentLocation.size; totalBytesCount = documentLocation.size;
}
if (ext == null) {
ext = FileLoader.getDocumentFileName(documentLocation); ext = FileLoader.getDocumentFileName(documentLocation);
int idx; int idx;
if (ext == null || (idx = ext.lastIndexOf(".")) == -1) { if (ext == null || (idx = ext.lastIndexOf(".")) == -1) {
@ -160,10 +164,12 @@ public class FileLoadOperation {
ext = ""; ext = "";
} }
} }
if(ApplicationLoader.KEEP_ORIGINAL_FILENAME && !ext.contains("webp")){
if(ApplicationLoader.KEEP_ORIGINAL_FILENAME && !ext.contains("webp") && !FileLoader.isGif(documentLocation)) {
orgName = FileLoader.getDocName(documentLocation); orgName = FileLoader.getDocName(documentLocation);
} }
} }
}
public void setForceRequest(boolean forceRequest) { public void setForceRequest(boolean forceRequest) {
isForceRequest = forceRequest; isForceRequest = forceRequest;
@ -200,7 +206,7 @@ public class FileLoadOperation {
String fileNameTemp; String fileNameTemp;
String fileNameIv = null; String fileNameIv = null;
if (location.volume_id != 0 && location.local_id != 0) { if (location.volume_id != 0 && location.local_id != 0) {
fileNameTemp = location.volume_id + "_" + location.local_id + "_temp." + ext; fileNameTemp = location.volume_id + "_" + location.local_id + ".temp";
fileNameFinal = location.volume_id + "_" + location.local_id + "." + ext; fileNameFinal = location.volume_id + "_" + location.local_id + "." + ext;
if (key != null) { if (key != null) {
fileNameIv = location.volume_id + "_" + location.local_id + ".iv"; fileNameIv = location.volume_id + "_" + location.local_id + ".iv";
@ -216,7 +222,7 @@ public class FileLoadOperation {
return; return;
} }
} else { } else {
fileNameTemp = datacenter_id + "_" + location.id + "_temp" + ext; fileNameTemp = datacenter_id + "_" + location.id + ".temp";
fileNameFinal = datacenter_id + "_" + location.id + ext; fileNameFinal = datacenter_id + "_" + location.id + ext;
if (key != null) { if (key != null) {
fileNameIv = datacenter_id + "_" + location.id + ".iv"; fileNameIv = datacenter_id + "_" + location.id + ".iv";
@ -256,8 +262,7 @@ public class FileLoadOperation {
nextDownloadOffset = downloadedBytes = downloadedBytes / currentDownloadChunkSize * currentDownloadChunkSize; nextDownloadOffset = downloadedBytes = downloadedBytes / currentDownloadChunkSize * currentDownloadChunkSize;
} }
//if (BuildVars.DEBUG_VERSION) { if (BuildVars.DEBUG_VERSION) {
if (BuildConfig.DEBUG) {
FileLog.d("tmessages", "start loading file to temp = " + cacheFileTemp + " final = " + cacheFileFinal); FileLog.d("tmessages", "start loading file to temp = " + cacheFileTemp + " final = " + cacheFileFinal);
} }
@ -341,6 +346,11 @@ public class FileLoadOperation {
private void cleanup() { private void cleanup() {
try { try {
if (fileOutputStream != null) { if (fileOutputStream != null) {
try {
fileOutputStream.getChannel().close();
} catch (Exception e) {
FileLog.e("tmessages", e);
}
fileOutputStream.close(); fileOutputStream.close();
fileOutputStream = null; fileOutputStream = null;
} }
@ -357,7 +367,8 @@ public class FileLoadOperation {
FileLog.e("tmessages", e); FileLog.e("tmessages", e);
} }
if (delayedRequestInfos != null) { if (delayedRequestInfos != null) {
for (RequestInfo requestInfo : delayedRequestInfos) { for (int a = 0; a < delayedRequestInfos.size(); a++) {
RequestInfo requestInfo = delayedRequestInfos.get(a);
if (requestInfo.response != null) { if (requestInfo.response != null) {
requestInfo.response.disableFree = false; requestInfo.response.disableFree = false;
requestInfo.response.freeResources(); requestInfo.response.freeResources();
@ -375,18 +386,33 @@ public class FileLoadOperation {
cleanup(); cleanup();
if (cacheIvTemp != null) { if (cacheIvTemp != null) {
cacheIvTemp.delete(); cacheIvTemp.delete();
cacheIvTemp = null;
} }
if (cacheFileTemp != null) { if (cacheFileTemp != null) {
if (!cacheFileTemp.renameTo(cacheFileFinal)) { boolean renameResult = cacheFileTemp.renameTo(cacheFileFinal);
//if (BuildVars.DEBUG_VERSION) { if (!renameResult) {
if (BuildConfig.DEBUG) { if (BuildVars.DEBUG_VERSION) {
FileLog.e("tmessages", "unable to rename temp = " + cacheFileTemp + " to final = " + cacheFileFinal); FileLog.e("tmessages", "unable to rename temp = " + cacheFileTemp + " to final = " + cacheFileFinal + " retry = " + renameRetryCount);
}
renameRetryCount++;
if (renameRetryCount < 3) {
state = stateDownloading;
Utilities.stageQueue.postRunnable(new Runnable() {
@Override
public void run() {
try {
onFinishLoadingFile();
} catch (Exception e) {
delegate.didFailedLoadingFile(FileLoadOperation.this, 0);
}
}
}, 200);
return;
} }
cacheFileFinal = cacheFileTemp; cacheFileFinal = cacheFileTemp;
} }
} }
//if (BuildVars.DEBUG_VERSION) { if (BuildVars.DEBUG_VERSION) {
if (BuildConfig.DEBUG) {
FileLog.e("tmessages", "finished downloading file to " + cacheFileFinal); FileLog.e("tmessages", "finished downloading file to " + cacheFileFinal);
} }
delegate.didFinishLoadingFile(FileLoadOperation.this, cacheFileFinal); delegate.didFinishLoadingFile(FileLoadOperation.this, cacheFileFinal);

View File

@ -3,11 +3,13 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.messenger; package org.telegram.messenger;
import android.util.Log;
import org.telegram.tgnet.TLObject; import org.telegram.tgnet.TLObject;
import org.telegram.tgnet.TLRPC; import org.telegram.tgnet.TLRPC;
@ -84,6 +86,10 @@ public class FileLoader {
mediaDirs = dirs; mediaDirs = dirs;
} }
public File checkDirectory(int type) {
return mediaDirs.get(type);
}
public File getDirectory(int type) { public File getDirectory(int type) {
File dir = mediaDirs.get(type); File dir = mediaDirs.get(type);
if (dir == null && type != MEDIA_DIR_CACHE) { if (dir == null && type != MEDIA_DIR_CACHE) {
@ -703,7 +709,8 @@ public class FileLoader {
if (document.file_name != null) { if (document.file_name != null) {
return document.file_name; return document.file_name;
} }
for (TLRPC.DocumentAttribute documentAttribute : document.attributes) { for (int a = 0; a < document.attributes.size(); a++) {
TLRPC.DocumentAttribute documentAttribute = document.attributes.get(a);
if (documentAttribute instanceof TLRPC.TL_documentAttributeFilename) { if (documentAttribute instanceof TLRPC.TL_documentAttributeFilename) {
return documentAttribute.file_name; return documentAttribute.file_name;
} }
@ -722,22 +729,25 @@ public class FileLoader {
return video.dc_id + "_" + video.id + "." + (ext != null ? ext : "mp4"); return video.dc_id + "_" + video.id + "." + (ext != null ? ext : "mp4");
} else if (attach instanceof TLRPC.Document) { } else if (attach instanceof TLRPC.Document) {
TLRPC.Document document = (TLRPC.Document) attach; TLRPC.Document document = (TLRPC.Document) attach;
String docExt = getDocumentFileName(document); String docExt = null;
if (docExt == null) {
docExt = getDocumentFileName(document);
int idx; int idx;
if (docExt == null || (idx = docExt.lastIndexOf(".")) == -1) { if (docExt == null || (idx = docExt.lastIndexOf(".")) == -1) {
docExt = ""; docExt = "";
} else { } else {
docExt = docExt.substring(idx); docExt = docExt.substring(idx);
} }
}
if (docExt.length() > 1) { if (docExt.length() > 1) {
if(ApplicationLoader.KEEP_ORIGINAL_FILENAME && !docExt.contains("webp"))return getDocName(document); //Plus if(ApplicationLoader.KEEP_ORIGINAL_FILENAME && !docExt.contains("webp") && !isGif(document))return getDocName(document); //Plus
return document.dc_id + "_" + document.id + docExt; return document.dc_id + "_" + document.id + docExt;
} else { } else {
return document.dc_id + "_" + document.id; return document.dc_id + "_" + document.id;
} }
} else if (attach instanceof TLRPC.PhotoSize) { } else if (attach instanceof TLRPC.PhotoSize) {
TLRPC.PhotoSize photo = (TLRPC.PhotoSize) attach; TLRPC.PhotoSize photo = (TLRPC.PhotoSize) attach;
if (photo.location == null) { if (photo.location == null || photo.location instanceof TLRPC.TL_fileLocationUnavailable) {
return ""; return "";
} }
return photo.location.volume_id + "_" + photo.location.local_id + "." + (ext != null ? ext : "jpg"); return photo.location.volume_id + "_" + photo.location.local_id + "." + (ext != null ? ext : "jpg");
@ -745,43 +755,63 @@ public class FileLoader {
TLRPC.Audio audio = (TLRPC.Audio) attach; TLRPC.Audio audio = (TLRPC.Audio) attach;
return audio.dc_id + "_" + audio.id + "." + (ext != null ? ext : "ogg"); return audio.dc_id + "_" + audio.id + "." + (ext != null ? ext : "ogg");
} else if (attach instanceof TLRPC.FileLocation) { } else if (attach instanceof TLRPC.FileLocation) {
if (attach instanceof TLRPC.TL_fileLocationUnavailable) {
return "";
}
TLRPC.FileLocation location = (TLRPC.FileLocation) attach; TLRPC.FileLocation location = (TLRPC.FileLocation) attach;
return location.volume_id + "_" + location.local_id + "." + (ext != null ? ext : "jpg"); return location.volume_id + "_" + location.local_id + "." + (ext != null ? ext : "jpg");
} }
return ""; return "";
} }
//Plus //Plus
public static String getAttachFileName(TLObject attach, boolean out) {
public static boolean isGif(TLRPC.Document document){
String s = getDocumentFileName(document);
if(s.contains(".mp4") || s.contains(".gif")){
//if(s.contains("giphy.") || s.contains("animation.") || s.contains("gif.")){
return true;
//}
}
return false;
}
public static String getAttachFileName(TLObject attach, String ext, boolean out) {
if (attach instanceof TLRPC.Video) { if (attach instanceof TLRPC.Video) {
TLRPC.Video video = (TLRPC.Video) attach; TLRPC.Video video = (TLRPC.Video) attach;
return video.dc_id + "_" + video.id + "." + ("mp4"); return video.dc_id + "_" + video.id + "." + (ext != null ? ext : "mp4");
} else if (attach instanceof TLRPC.Document) { } else if (attach instanceof TLRPC.Document) {
TLRPC.Document document = (TLRPC.Document) attach; TLRPC.Document document = (TLRPC.Document) attach;
String docExt = getDocumentFileName(document); String docExt = null;
if (docExt == null) {
docExt = getDocumentFileName(document);
int idx; int idx;
if (docExt == null || (idx = docExt.lastIndexOf(".")) == -1) { if (docExt == null || (idx = docExt.lastIndexOf(".")) == -1) {
docExt = ""; docExt = "";
} else { } else {
docExt = docExt.substring(idx); docExt = docExt.substring(idx);
} }
}
if (docExt.length() > 1) { if (docExt.length() > 1) {
if(!out && ApplicationLoader.KEEP_ORIGINAL_FILENAME && !docExt.contains("webp"))return getDocName(document); if(!out && ApplicationLoader.KEEP_ORIGINAL_FILENAME && !docExt.contains("webp") && !isGif(document))return getDocName(document);
return document.dc_id + "_" + document.id + docExt; return document.dc_id + "_" + document.id + docExt;
} else { } else {
return document.dc_id + "_" + document.id; return document.dc_id + "_" + document.id;
} }
} else if (attach instanceof TLRPC.PhotoSize) { } else if (attach instanceof TLRPC.PhotoSize) {
TLRPC.PhotoSize photo = (TLRPC.PhotoSize) attach; TLRPC.PhotoSize photo = (TLRPC.PhotoSize) attach;
if (photo.location == null) { if (photo.location == null || photo.location instanceof TLRPC.TL_fileLocationUnavailable) {
return ""; return "";
} }
return photo.location.volume_id + "_" + photo.location.local_id + "." + ( "jpg"); return photo.location.volume_id + "_" + photo.location.local_id + "." + (ext != null ? ext : "jpg");
} else if (attach instanceof TLRPC.Audio) { } else if (attach instanceof TLRPC.Audio) {
TLRPC.Audio audio = (TLRPC.Audio) attach; TLRPC.Audio audio = (TLRPC.Audio) attach;
return audio.dc_id + "_" + audio.id + "." + ( "ogg"); return audio.dc_id + "_" + audio.id + "." + (ext != null ? ext : "ogg");
} else if (attach instanceof TLRPC.FileLocation) { } else if (attach instanceof TLRPC.FileLocation) {
if (attach instanceof TLRPC.TL_fileLocationUnavailable) {
return "";
}
TLRPC.FileLocation location = (TLRPC.FileLocation) attach; TLRPC.FileLocation location = (TLRPC.FileLocation) attach;
return location.volume_id + "_" + location.local_id + "." + ( "jpg"); return location.volume_id + "_" + location.local_id + "." + (ext != null ? ext : "jpg");
} }
return ""; return "";
} }
@ -810,7 +840,7 @@ public class FileLoader {
return name; return name;
} }
public void deleteFiles(final ArrayList<File> files) { public void deleteFiles(final ArrayList<File> files, final int type) {
if (files == null || files.isEmpty()) { if (files == null || files.isEmpty()) {
return; return;
} }
@ -829,16 +859,19 @@ public class FileLoader {
} }
} }
try { try {
File qFile = new File(file.getPath(), "q_" + file.getName()); File qFile = new File(file.getParentFile(), "q_" + file.getName());
if (qFile.exists()) { if (qFile.exists()) {
if (!file.delete()) { if (!qFile.delete()) {
file.deleteOnExit(); qFile.deleteOnExit();
} }
} }
} catch (Exception e) { } catch (Exception e) {
FileLog.e("tmessages", e); FileLog.e("tmessages", e);
} }
} }
if (type == 2) {
ImageLoader.getInstance().clearMemory();
}
} }
}); });
} }

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.messenger; package org.telegram.messenger;
@ -40,8 +40,7 @@ public class FileLog {
} }
public FileLog() { public FileLog() {
//if (!BuildVars.DEBUG_VERSION) { if (!BuildVars.DEBUG_VERSION) {
if (!BuildConfig.DEBUG) {
return; return;
} }
dateFormat = FastDateFormat.getInstance("dd_MM_yyyy_HH_mm_ss", Locale.US); dateFormat = FastDateFormat.getInstance("dd_MM_yyyy_HH_mm_ss", Locale.US);
@ -88,8 +87,7 @@ public class FileLog {
} }
public static void e(final String tag, final String message, final Throwable exception) { public static void e(final String tag, final String message, final Throwable exception) {
//if (!BuildVars.DEBUG_VERSION) { if (!BuildVars.DEBUG_VERSION) {
if (!BuildConfig.DEBUG) {
return; return;
} }
Log.e(tag, message, exception); Log.e(tag, message, exception);
@ -110,8 +108,7 @@ public class FileLog {
} }
public static void e(final String tag, final String message) { public static void e(final String tag, final String message) {
//if (!BuildVars.DEBUG_VERSION) { if (!BuildVars.DEBUG_VERSION) {
if (!BuildConfig.DEBUG) {
return; return;
} }
Log.e(tag, message); Log.e(tag, message);
@ -131,8 +128,7 @@ public class FileLog {
} }
public static void e(final String tag, final Throwable e) { public static void e(final String tag, final Throwable e) {
//if (!BuildVars.DEBUG_VERSION) { if (!BuildVars.DEBUG_VERSION) {
if (!BuildConfig.DEBUG) {
return; return;
} }
e.printStackTrace(); e.printStackTrace();
@ -158,8 +154,7 @@ public class FileLog {
} }
public static void d(final String tag, final String message) { public static void d(final String tag, final String message) {
//if (!BuildVars.DEBUG_VERSION) { if (!BuildVars.DEBUG_VERSION) {
if (!BuildConfig.DEBUG) {
return; return;
} }
Log.d(tag, message); Log.d(tag, message);
@ -179,8 +174,7 @@ public class FileLog {
} }
public static void w(final String tag, final String message) { public static void w(final String tag, final String message) {
//if (!BuildVars.DEBUG_VERSION) { if (!BuildVars.DEBUG_VERSION) {
if (!BuildConfig.DEBUG) {
return; return;
} }
Log.w(tag, message); Log.w(tag, message);

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.messenger; package org.telegram.messenger;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.messenger; package org.telegram.messenger;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.messenger; package org.telegram.messenger;
@ -68,7 +68,7 @@ public class UserConfig {
SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("userconfing", Context.MODE_PRIVATE); SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("userconfing", Context.MODE_PRIVATE);
SharedPreferences.Editor editor = preferences.edit(); SharedPreferences.Editor editor = preferences.edit();
editor.putBoolean("registeredForPush", registeredForPush); editor.putBoolean("registeredForPush", registeredForPush);
editor.putString("pushString", pushString); editor.putString("pushString2", pushString);
editor.putInt("lastSendMessageId", lastSendMessageId); editor.putInt("lastSendMessageId", lastSendMessageId);
editor.putInt("lastLocalId", lastLocalId); editor.putInt("lastLocalId", lastLocalId);
editor.putString("contactsHash", contactsHash); editor.putString("contactsHash", contactsHash);
@ -182,7 +182,7 @@ public class UserConfig {
SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("userconfing", Context.MODE_PRIVATE); SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("userconfing", Context.MODE_PRIVATE);
registeredForPush = preferences.getBoolean("registeredForPush", false); registeredForPush = preferences.getBoolean("registeredForPush", false);
pushString = preferences.getString("pushString", ""); pushString = preferences.getString("pushString2", "");
lastSendMessageId = preferences.getInt("lastSendMessageId", -210000); lastSendMessageId = preferences.getInt("lastSendMessageId", -210000);
lastLocalId = preferences.getInt("lastLocalId", -210000); lastLocalId = preferences.getInt("lastLocalId", -210000);
contactsHash = preferences.getString("contactsHash", ""); contactsHash = preferences.getString("contactsHash", "");
@ -209,7 +209,7 @@ public class UserConfig {
} else { } else {
SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("userconfing", Context.MODE_PRIVATE); SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("userconfing", Context.MODE_PRIVATE);
registeredForPush = preferences.getBoolean("registeredForPush", false); registeredForPush = preferences.getBoolean("registeredForPush", false);
pushString = preferences.getString("pushString", ""); pushString = preferences.getString("pushString2", "");
lastSendMessageId = preferences.getInt("lastSendMessageId", -210000); lastSendMessageId = preferences.getInt("lastSendMessageId", -210000);
lastLocalId = preferences.getInt("lastLocalId", -210000); lastLocalId = preferences.getInt("lastLocalId", -210000);
contactsHash = preferences.getString("contactsHash", ""); contactsHash = preferences.getString("contactsHash", "");

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.messenger; package org.telegram.messenger;
@ -64,7 +64,8 @@ public class Utilities {
public native static void loadBitmap(String path, Bitmap bitmap, int scale, int width, int height, int stride); public native static void loadBitmap(String path, Bitmap bitmap, int scale, int width, int height, int stride);
public native static int pinBitmap(Bitmap bitmap); public native static int pinBitmap(Bitmap bitmap);
public native static void blurBitmap(Object bitmap, int radius, int unpin); public native static int unpinBitmap(Bitmap bitmap);
public native static void blurBitmap(Object bitmap, int radius, int unpin, int width, int height, int stride);
public native static void calcCDT(ByteBuffer hsvBuffer, int width, int height, ByteBuffer buffer); public native static void calcCDT(ByteBuffer hsvBuffer, int width, int height, ByteBuffer buffer);
public native static boolean loadWebpImage(Bitmap bitmap, ByteBuffer buffer, int len, BitmapFactory.Options options, boolean unpin); public native static boolean loadWebpImage(Bitmap bitmap, ByteBuffer buffer, int len, BitmapFactory.Options options, boolean unpin);
public native static int convertVideoFrame(ByteBuffer src, ByteBuffer dest, int destFormat, int width, int height, int padding, int swap); public native static int convertVideoFrame(ByteBuffer src, ByteBuffer dest, int destFormat, int width, int height, int padding, int swap);
@ -91,6 +92,23 @@ public class Utilities {
return val; return val;
} }
public static Long parseLong(String value) {
if (value == null) {
return 0L;
}
Long val = 0L;
try {
Matcher matcher = pattern.matcher(value);
if (matcher.find()) {
String num = matcher.group(0);
val = Long.parseLong(num);
}
} catch (Exception e) {
FileLog.e("tmessages", e);
}
return val;
}
public static String parseIntToString(String value) { public static String parseIntToString(String value) {
Matcher matcher = pattern.matcher(value); Matcher matcher = pattern.matcher(value);
if (matcher.find()) { if (matcher.find()) {

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.ui.ActionBar; package org.telegram.ui.ActionBar;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.ui.ActionBar; package org.telegram.ui.ActionBar;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.ui.ActionBar; package org.telegram.ui.ActionBar;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.ui.ActionBar; package org.telegram.ui.ActionBar;
@ -436,7 +436,7 @@ public class ActionBarMenuItem extends FrameLayoutFixed {
searchField.setOnEditorActionListener(new TextView.OnEditorActionListener() { searchField.setOnEditorActionListener(new TextView.OnEditorActionListener() {
@Override @Override
public boolean onEditorAction(TextView v, int actionId, KeyEvent event) { public boolean onEditorAction(TextView v, int actionId, KeyEvent event) {
if (actionId == EditorInfo.IME_ACTION_SEARCH || event != null && (event.getAction() == KeyEvent.ACTION_UP && event.getKeyCode() == KeyEvent.KEYCODE_SEARCH || event.getAction() == KeyEvent.ACTION_DOWN && event.getKeyCode() == KeyEvent.KEYCODE_ENTER)) { if (/*actionId == EditorInfo.IME_ACTION_SEARCH || */event != null && (event.getAction() == KeyEvent.ACTION_UP && event.getKeyCode() == KeyEvent.KEYCODE_SEARCH || event.getAction() == KeyEvent.ACTION_DOWN && event.getKeyCode() == KeyEvent.KEYCODE_ENTER)) {
AndroidUtilities.hideKeyboard(searchField); AndroidUtilities.hideKeyboard(searchField);
if (listener != null) { if (listener != null) {
listener.onSearchPressed(searchField); listener.onSearchPressed(searchField);

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
//Thanks to https://github.com/JakeWharton/ActionBarSherlock/ //Thanks to https://github.com/JakeWharton/ActionBarSherlock/

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.ui.ActionBar; package org.telegram.ui.ActionBar;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.ui.ActionBar; package org.telegram.ui.ActionBar;
@ -453,11 +453,12 @@ public class DrawerLayoutContainer extends FrameLayout {
child.measure(drawerWidthSpec, drawerHeightSpec); child.measure(drawerWidthSpec, drawerHeightSpec);
} }
} }
//getDrawerLayout().setBackgroundColor(AndroidUtilities.getIntDef("drawerListColor",0xffffffff)); //Plus //Plus
updateListBG(); updateListBG();
} }
private void updateListBG(){ private void updateListBG(){
if(getDrawerLayout() != null) {
SharedPreferences themePrefs = ApplicationLoader.applicationContext.getSharedPreferences(AndroidUtilities.THEME_PREFS, AndroidUtilities.THEME_PREFS_MODE); SharedPreferences themePrefs = ApplicationLoader.applicationContext.getSharedPreferences(AndroidUtilities.THEME_PREFS, AndroidUtilities.THEME_PREFS_MODE);
int mainColor = themePrefs.getInt("drawerListColor", 0xffffffff); int mainColor = themePrefs.getInt("drawerListColor", 0xffffffff);
int value = themePrefs.getInt("drawerRowGradient", 0); int value = themePrefs.getInt("drawerRowGradient", 0);
@ -486,6 +487,7 @@ public class DrawerLayoutContainer extends FrameLayout {
getDrawerLayout().setBackgroundColor(mainColor); getDrawerLayout().setBackgroundColor(mainColor);
} }
} }
}
@Override @Override
protected boolean drawChild(Canvas canvas, View child, long drawingTime) { protected boolean drawChild(Canvas canvas, View child, long drawingTime) {

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.ui.ActionBar; package org.telegram.ui.ActionBar;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.ui.Adapters; package org.telegram.ui.Adapters;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.ui.Adapters; package org.telegram.ui.Adapters;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.ui.Adapters; package org.telegram.ui.Adapters;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.ui.Adapters; package org.telegram.ui.Adapters;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later. * It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE). * You should have received a copy of the license in this archive (see LICENSE).
* *
* Copyright Nikolai Kudashov, 2013-2015. * Copyright Nikolai Kudashov, 2013-2016.
*/ */
package org.telegram.ui.Adapters; package org.telegram.ui.Adapters;

Some files were not shown because too many files have changed in this diff Show More