- Added swipe left/right to change between tabs

- Added option to enable/disable infinite swipe between tabs
- Added new menu Plus and moved there all additional Plus features
- Added option to show/hide direct share button in any chat
- Added switch to use direct share without quoting sender
- Added date indicator toast in chat
- Added current download/upload size in chat screen
- Added sort options (default/unread) to all tabs
- Added different options in groups/supergroups for admins when user avatar is clicked in chat screen: ‘Show profile’, ‘Delete from group’ and ‘Set as admin’
- Added option to remove from admin list if user is already an admin
- Added ‘Mark as read’ for individual chats and ‘Mark all as read’ for every tab
- Added option in Plus settings to show username instead of mobile number in menu
- Bug fixes
This commit is contained in:
rafalense 2016-02-25 10:36:39 +01:00
parent 6c9a71f6d1
commit a4e9947d42
273 changed files with 17155 additions and 27306 deletions

View File

@ -1,4 +1,4 @@
### Plus Messenger for Android
### Plus Messenger for Android (http://plusmessenger.org)
This is an UNOFFICIAL app that uses [Telegram's API](https://core.telegram.org/api)

View File

@ -6,7 +6,8 @@ repositories {
dependencies {
compile 'com.android.support:support-v4:23.1.+'
compile 'com.google.android.gms:play-services:3.2.+'
compile "com.google.android.gms:play-services-gcm:8.4.0"
compile "com.google.android.gms:play-services-maps:8.4.0"
compile 'net.hockeyapp.android:HockeySDK:3.6.+'
compile 'com.googlecode.mp4parser:isoparser:1.0.+'
}
@ -16,6 +17,8 @@ android {
buildToolsVersion '23.0.2'
useLibrary 'org.apache.http.legacy'
//defaultConfig.applicationId = "org.telegram.messenger"
defaultConfig.applicationId = "org.telegram.plus"
compileOptions {
sourceCompatibility JavaVersion.VERSION_1_7
@ -25,6 +28,10 @@ android {
signingConfigs {
debug {
storeFile file("config/debug.keystore")
//storeFile file("config/release.keystore")
//storePassword RELEASE_STORE_PASSWORD
//keyAlias RELEASE_KEY_ALIAS
//keyPassword RELEASE_KEY_PASSWORD
}
release {
@ -47,6 +54,8 @@ android {
debuggable false
jniDebuggable false
//signingConfig signingConfigs.release
minifyEnabled false
proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro'
}
foss {
@ -79,10 +88,9 @@ android {
}
defaultConfig {
applicationId "org.telegram.plus"
minSdkVersion 8
minSdkVersion 9
targetSdkVersion 23
versionCode 689
versionName "3.3.1.1"
versionCode 736
versionName "3.4.2.5"
}
}

View File

@ -27,7 +27,7 @@
android:label="Plus beta"
tools:replace="label"
android:theme="@style/Theme.TMessages.Start"
android:name=".ApplicationLoader"
android:name="org.telegram.messenger.ApplicationLoader"
android:hardwareAccelerated="@bool/useHardwareAcceleration"
android:largeHeap="true">
@ -36,7 +36,8 @@
<activity android:name="net.hockeyapp.android.UpdateActivity" />
<receiver
android:name="org.telegram.messenger.GcmBroadcastReceiver"
android:name="com.google.android.gms.gcm.GcmReceiver"
android:exported="true"
android:permission="com.google.android.c2dm.permission.SEND" >
<intent-filter>
<action android:name="com.google.android.c2dm.intent.RECEIVE" />
@ -44,6 +45,24 @@
<category android:name="org.telegram.plus.beta" />
</intent-filter>
</receiver>
<service
android:name="org.telegram.messenger.GcmPushListenerService"
android:exported="false" >
<intent-filter>
<action android:name="com.google.android.c2dm.intent.RECEIVE" />
</intent-filter>
</service>
<service
android:name="org.telegram.messenger.GcmInstanceIDListenerService"
android:exported="false">
<intent-filter>
<action android:name="com.google.android.gms.iid.InstanceID" />
</intent-filter>
</service>
<service
android:name="org.telegram.messenger.GcmRegistrationIntentService"
android:exported="false">
</service>
<uses-library android:name="com.google.android.maps" android:required="false"/>

View File

@ -25,7 +25,7 @@
android:icon="@drawable/ic_launcher"
android:label="@string/ShortAppName"
android:theme="@style/Theme.TMessages.Start"
android:name=".ApplicationLoader"
android:name="org.telegram.messenger.ApplicationLoader"
android:hardwareAccelerated="@bool/useHardwareAcceleration"
android:largeHeap="true">
@ -34,7 +34,8 @@
<activity android:name="net.hockeyapp.android.UpdateActivity" />
<receiver
android:name=".GcmBroadcastReceiver"
android:name="com.google.android.gms.gcm.GcmReceiver"
android:exported="true"
android:permission="com.google.android.c2dm.permission.SEND" >
<intent-filter>
<action android:name="com.google.android.c2dm.intent.RECEIVE" />
@ -42,6 +43,24 @@
<category android:name="org.telegram.plus" />
</intent-filter>
</receiver>
<service
android:name="org.telegram.messenger.GcmPushListenerService"
android:exported="false" >
<intent-filter>
<action android:name="com.google.android.c2dm.intent.RECEIVE" />
</intent-filter>
</service>
<service
android:name="org.telegram.messenger.GcmInstanceIDListenerService"
android:exported="false">
<intent-filter>
<action android:name="com.google.android.gms.iid.InstanceID" />
</intent-filter>
</service>
<service
android:name="org.telegram.messenger.GcmRegistrationIntentService"
android:exported="false">
</service>
<uses-library android:name="com.google.android.maps" android:required="false"/>

View File

@ -1,5 +1,57 @@
LOCAL_PATH := $(call my-dir)
LOCAL_MODULE := avutil
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
LOCAL_SRC_FILES := ./ffmpeg/armv7-a/libavutil.a
else
ifeq ($(TARGET_ARCH_ABI),armeabi)
LOCAL_SRC_FILES := ./ffmpeg/armv5te/libavutil.a
else
ifeq ($(TARGET_ARCH_ABI),x86)
LOCAL_SRC_FILES := ./ffmpeg/i686/libavutil.a
endif
endif
endif
include $(PREBUILT_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_MODULE := avformat
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
LOCAL_SRC_FILES := ./ffmpeg/armv7-a/libavformat.a
else
ifeq ($(TARGET_ARCH_ABI),armeabi)
LOCAL_SRC_FILES := ./ffmpeg/armv5te/libavformat.a
else
ifeq ($(TARGET_ARCH_ABI),x86)
LOCAL_SRC_FILES := ./ffmpeg/i686/libavformat.a
endif
endif
endif
include $(PREBUILT_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_MODULE := avcodec
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
LOCAL_SRC_FILES := ./ffmpeg/armv7-a/libavcodec.a
else
ifeq ($(TARGET_ARCH_ABI),armeabi)
LOCAL_SRC_FILES := ./ffmpeg/armv5te/libavcodec.a
else
ifeq ($(TARGET_ARCH_ABI),x86)
LOCAL_SRC_FILES := ./ffmpeg/i686/libavcodec.a
endif
endif
endif
include $(PREBUILT_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_MODULE := crypto
@ -26,8 +78,8 @@ LOCAL_MODULE := breakpad
LOCAL_CPPFLAGS := -Wall -std=c++11 -DANDROID -finline-functions -ffast-math -Os -fno-strict-aliasing
LOCAL_C_INCLUDES := \
./breakpad/common/android/include \
./breakpad
$(LOCAL_PATH)/breakpad/common/android/include \
$(LOCAL_PATH)/breakpad
LOCAL_SRC_FILES := \
./breakpad/client/linux/crash_generation/crash_generation_client.cc \
@ -57,7 +109,7 @@ include $(BUILD_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_CPPFLAGS := -Wall -std=c++11 -DANDROID -frtti -DHAVE_PTHREAD -finline-functions -ffast-math -Os
LOCAL_C_INCLUDES += ./boringssl/include/
LOCAL_C_INCLUDES += $(LOCAL_PATH)/boringssl/include/
LOCAL_ARM_MODE := arm
LOCAL_MODULE := tgnet
LOCAL_STATIC_LIBRARIES := crypto
@ -85,15 +137,12 @@ include $(BUILD_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_CFLAGS := -Wall -DANDROID -DHAVE_MALLOC_H -DHAVE_PTHREAD -DWEBP_USE_THREAD -finline-functions -ffast-math -ffunction-sections -fdata-sections -Os
LOCAL_C_INCLUDES += ./libwebp/src
LOCAL_C_INCLUDES += $(LOCAL_PATH)/libwebp/src
LOCAL_ARM_MODE := arm
LOCAL_STATIC_LIBRARIES := cpufeatures
LOCAL_MODULE := webp
ifneq ($(findstring armeabi-v7a, $(TARGET_ARCH_ABI)),)
# Setting LOCAL_ARM_NEON will enable -mfpu=neon which may cause illegal
# instructions to be generated for armv7a code. Instead target the neon code
# specifically.
NEON := c.neon
else
NEON := c
@ -185,19 +234,14 @@ include $(BUILD_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_PRELINK_MODULE := false
LOCAL_STATIC_LIBRARIES := webp sqlite tgnet breakpad
LOCAL_MODULE := tmessages.15
LOCAL_MODULE := tmessages.17
LOCAL_CFLAGS := -w -std=c11 -Os -DNULL=0 -DSOCKLEN_T=socklen_t -DLOCALE_NOT_USED -D_LARGEFILE_SOURCE=1 -D_FILE_OFFSET_BITS=64
LOCAL_CFLAGS += -Drestrict='' -D__EMX__ -DOPUS_BUILD -DFIXED_POINT -DUSE_ALLOCA -DHAVE_LRINT -DHAVE_LRINTF -fno-math-errno
LOCAL_CFLAGS += -DANDROID_NDK -DDISABLE_IMPORTGL -fno-strict-aliasing -fprefetch-loop-arrays -DAVOID_TABLES -DANDROID_TILE_BASED_DECODE -DANDROID_ARMV6_IDCT -ffast-math
LOCAL_CFLAGS += -DANDROID_NDK -DDISABLE_IMPORTGL -fno-strict-aliasing -fprefetch-loop-arrays -DAVOID_TABLES -DANDROID_TILE_BASED_DECODE -DANDROID_ARMV6_IDCT -ffast-math -D__STDC_CONSTANT_MACROS
LOCAL_CPPFLAGS := -DBSD=1 -ffast-math -Os -funroll-loops -std=c++11
LOCAL_LDLIBS := -ljnigraphics -llog -lz
ifeq ($(TARGET_ARCH_ABI),armeabi)
LOCAL_ARM_MODE := thumb
else
LOCAL_ARM_MODE := arm
endif
LOCAL_STATIC_LIBRARIES := webp sqlite tgnet breakpad avformat avcodec avutil
LOCAL_SRC_FILES := \
./opus/src/opus.c \
@ -211,6 +255,23 @@ LOCAL_SRC_FILES := \
./opus/src/mlp.c \
./opus/src/mlp_data.c
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
LOCAL_ARM_MODE := arm
LOCAL_CPPFLAGS += -DLIBYUV_NEON
LOCAL_CFLAGS += -DLIBYUV_NEON
else
ifeq ($(TARGET_ARCH_ABI),armeabi)
LOCAL_ARM_MODE := arm
else
ifeq ($(TARGET_ARCH_ABI),x86)
LOCAL_ARM_MODE := arm
LOCAL_SRC_FILE += \
./libyuv/source/row_x86.asm
endif
endif
endif
LOCAL_SRC_FILES += \
./opus/silk/CNG.c \
./opus/silk/code_signs.c \
@ -346,21 +407,18 @@ LOCAL_SRC_FILES += \
./opus/opusfile/opusfile.c \
./opus/opusfile/stream.c
LOCAL_SRC_FILES += \
./giflib/dgif_lib.c \
./giflib/gifalloc.c
LOCAL_C_INCLUDES := \
./opus/include \
./opus/silk \
./opus/silk/fixed \
./opus/celt \
./opus/ \
./opus/opusfile \
./libyuv/include \
./boringssl/include \
./breakpad/common/android/include \
./breakpad
$(LOCAL_PATH)/opus/include \
$(LOCAL_PATH)/opus/silk \
$(LOCAL_PATH)/opus/silk/fixed \
$(LOCAL_PATH)/opus/celt \
$(LOCAL_PATH)/opus/ \
$(LOCAL_PATH)/opus/opusfile \
$(LOCAL_PATH)/libyuv/include \
$(LOCAL_PATH)/boringssl/include \
$(LOCAL_PATH)/breakpad/common/android/include \
$(LOCAL_PATH)/breakpad \
$(LOCAL_PATH)/ffmpeg/include
LOCAL_SRC_FILES += \
./libjpeg/jcapimin.c \
@ -413,8 +471,8 @@ LOCAL_SRC_FILES += \
LOCAL_SRC_FILES += \
./libyuv/source/compare_common.cc \
./libyuv/source/compare_neon.cc \
./libyuv/source/compare_posix.cc \
./libyuv/source/compare_gcc.cc \
./libyuv/source/compare_neon64.cc \
./libyuv/source/compare_win.cc \
./libyuv/source/compare.cc \
./libyuv/source/convert_argb.cc \
@ -425,32 +483,42 @@ LOCAL_SRC_FILES += \
./libyuv/source/convert_to_i420.cc \
./libyuv/source/convert.cc \
./libyuv/source/cpu_id.cc \
./libyuv/source/format_conversion.cc \
./libyuv/source/mjpeg_decoder.cc \
./libyuv/source/mjpeg_validate.cc \
./libyuv/source/planar_functions.cc \
./libyuv/source/rotate_any.cc \
./libyuv/source/rotate_argb.cc \
./libyuv/source/rotate_common.cc \
./libyuv/source/rotate_gcc.cc \
./libyuv/source/rotate_mips.cc \
./libyuv/source/rotate_neon.cc \
./libyuv/source/rotate_neon64.cc \
./libyuv/source/rotate_win.cc \
./libyuv/source/rotate.cc \
./libyuv/source/row_any.cc \
./libyuv/source/row_common.cc \
./libyuv/source/row_gcc.cc \
./libyuv/source/row_mips.cc \
./libyuv/source/row_neon.cc \
./libyuv/source/row_neon64.cc \
./libyuv/source/row_posix.cc \
./libyuv/source/row_win.cc \
./libyuv/source/scale_any.cc \
./libyuv/source/scale_argb.cc \
./libyuv/source/scale_common.cc \
./libyuv/source/scale_gcc.cc \
./libyuv/source/scale_mips.cc \
./libyuv/source/scale_neon.cc \
./libyuv/source/scale_neon64.cc \
./libyuv/source/scale_posix.cc \
./libyuv/source/scale_win.cc \
./libyuv/source/scale.cc \
./libyuv/source/video_common.cc
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
LOCAL_CFLAGS += -DLIBYUV_NEON
LOCAL_SRC_FILES += \
./libyuv/source/compare_neon.cc.neon \
./libyuv/source/rotate_neon.cc.neon \
./libyuv/source/row_neon.cc.neon \
./libyuv/source/scale_neon.cc.neon
endif
LOCAL_SRC_FILES += \
./jni.c \
./sqlite_cursor.c \
@ -458,10 +526,10 @@ LOCAL_SRC_FILES += \
./sqlite_statement.c \
./sqlite.c \
./audio.c \
./gif.c \
./utils.c \
./image.c \
./video.c \
./gifvideo.cpp \
./TgNetWrapper.cpp \
./NativeLoader.cpp

View File

@ -1,847 +0,0 @@
//thanks to https://github.com/koral--/android-gif-drawable
/*
MIT License
Copyright (c)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
// Copyright (c) 2011 Google Inc. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The GIFLIB distribution is Copyright (c) 1997 Eric S. Raymond
*/
#include <jni.h>
#include <stdio.h>
#include <time.h>
#include <limits.h>
#include "gif.h"
#include "giflib/gif_lib.h"
#define D_GIF_ERR_NO_FRAMES 1000
#define D_GIF_ERR_INVALID_SCR_DIMS 1001
#define D_GIF_ERR_INVALID_IMG_DIMS 1002
#define D_GIF_ERR_IMG_NOT_CONFINED 1003
typedef struct {
uint8_t blue;
uint8_t green;
uint8_t red;
uint8_t alpha;
} argb;
typedef struct {
unsigned int duration;
int transpIndex;
unsigned char disposalMethod;
} FrameInfo;
typedef struct {
GifFileType *gifFilePtr;
unsigned long lastFrameReaminder;
unsigned long nextStartTime;
int currentIndex;
unsigned int lastDrawIndex;
FrameInfo *infos;
argb *backupPtr;
int startPos;
unsigned char *rasterBits;
char *comment;
unsigned short loopCount;
int currentLoop;
jfloat speedFactor;
} GifInfo;
static ColorMapObject *defaultCmap = NULL;
static ColorMapObject *genDefColorMap(void) {
ColorMapObject *cmap = GifMakeMapObject(256, NULL);
if (cmap != NULL) {
int iColor;
for (iColor = 0; iColor < 256; iColor++) {
cmap->Colors[iColor].Red = (GifByteType) iColor;
cmap->Colors[iColor].Green = (GifByteType) iColor;
cmap->Colors[iColor].Blue = (GifByteType) iColor;
}
}
return cmap;
}
jint gifOnJNILoad(JavaVM *vm, void *reserved, JNIEnv *env) {
defaultCmap = genDefColorMap();
if (defaultCmap == NULL) {
return -1;
}
return JNI_VERSION_1_6;
}
void gifOnJNIUnload(JavaVM *vm, void *reserved) {
GifFreeMapObject(defaultCmap);
}
static int fileReadFunc(GifFileType *gif, GifByteType *bytes, int size) {
FILE *file = (FILE *)gif->UserData;
return fread(bytes, 1, size, file);
}
static int fileRewindFun(GifInfo *info) {
return fseek(info->gifFilePtr->UserData, info->startPos, SEEK_SET);
}
static unsigned long getRealTime() {
struct timespec ts;
const clockid_t id = CLOCK_MONOTONIC;
if (id != (clockid_t) - 1 && clock_gettime(id, &ts) != -1) {
return ts.tv_sec * 1000 + ts.tv_nsec / 1000000;
}
return -1;
}
static void cleanUp(GifInfo *info) {
if (info->backupPtr) {
free(info->backupPtr);
info->backupPtr = NULL;
}
if (info->infos) {
free(info->infos);
info->infos = NULL;
}
if (info->rasterBits) {
free(info->rasterBits);
info->rasterBits = NULL;
}
if (info->comment) {
free(info->comment);
info->comment = NULL;
}
GifFileType *GifFile = info->gifFilePtr;
if (GifFile->SColorMap == defaultCmap) {
GifFile->SColorMap = NULL;
}
if (GifFile->SavedImages != NULL) {
SavedImage *sp;
for (sp = GifFile->SavedImages; sp < GifFile->SavedImages + GifFile->ImageCount; sp++) {
if (sp->ImageDesc.ColorMap != NULL) {
GifFreeMapObject(sp->ImageDesc.ColorMap);
sp->ImageDesc.ColorMap = NULL;
}
}
free(GifFile->SavedImages);
GifFile->SavedImages = NULL;
}
DGifCloseFile(GifFile);
free(info);
}
static int getComment(GifByteType *Bytes, char **cmt) {
unsigned int len = (unsigned int) Bytes[0];
unsigned int offset = *cmt != NULL ? strlen(*cmt) : 0;
char *ret = realloc(*cmt, (len + offset + 1) * sizeof(char));
if (ret != NULL) {
memcpy(ret + offset, &Bytes[1], len);
ret[len + offset] = 0;
*cmt = ret;
return GIF_OK;
}
return GIF_ERROR;
}
static void packARGB32(argb *pixel, GifByteType alpha, GifByteType red, GifByteType green, GifByteType blue) {
pixel->alpha = alpha;
pixel->red = red;
pixel->green = green;
pixel->blue = blue;
}
static void getColorFromTable(int idx, argb *dst, const ColorMapObject *cmap) {
int colIdx = (idx >= cmap->ColorCount) ? 0 : idx;
GifColorType *col = &cmap->Colors[colIdx];
packARGB32(dst, 0xFF, col->Red, col->Green, col->Blue);
}
static void eraseColor(argb *bm, int w, int h, argb color) {
int i;
for (i = 0; i < w * h; i++) {
*(bm + i) = color;
}
}
static inline bool setupBackupBmp(GifInfo *info, short transpIndex) {
GifFileType *fGIF = info->gifFilePtr;
info->backupPtr = calloc(fGIF->SWidth * fGIF->SHeight, sizeof(argb));
if (!info->backupPtr) {
info->gifFilePtr->Error = D_GIF_ERR_NOT_ENOUGH_MEM;
return false;
}
argb paintingColor;
if (transpIndex == -1) {
getColorFromTable(fGIF->SBackGroundColor, &paintingColor, fGIF->SColorMap);
} else {
packARGB32(&paintingColor, 0, 0, 0, 0);
}
eraseColor(info->backupPtr, fGIF->SWidth, fGIF->SHeight, paintingColor);
return true;
}
static int readExtensions(int ExtFunction, GifByteType *ExtData, GifInfo *info) {
if (ExtData == NULL) {
return GIF_OK;
}
if (ExtFunction == GRAPHICS_EXT_FUNC_CODE && ExtData[0] == 4) {
FrameInfo *fi = &info->infos[info->gifFilePtr->ImageCount];
fi->transpIndex = -1;
char *b = (char*) ExtData + 1;
short delay = ((b[2] << 8) | b[1]);
fi->duration = delay > 1 ? delay * 10 : 100;
fi->disposalMethod = ((b[0] >> 2) & 7);
if (ExtData[1] & 1) {
fi->transpIndex = 0xff & b[3];
}
if (fi->disposalMethod == 3 && info->backupPtr == NULL) {
if (!setupBackupBmp(info, fi->transpIndex)) {
return GIF_ERROR;
}
}
} else if (ExtFunction == COMMENT_EXT_FUNC_CODE) {
if (getComment(ExtData, &info->comment) == GIF_ERROR) {
info->gifFilePtr->Error = D_GIF_ERR_NOT_ENOUGH_MEM;
return GIF_ERROR;
}
} else if (ExtFunction == APPLICATION_EXT_FUNC_CODE && ExtData[0] == 11) {
if (strncmp("NETSCAPE2.0", &ExtData[1], 11) == 0 || strncmp("ANIMEXTS1.0", &ExtData[1], 11) == 0) {
if (DGifGetExtensionNext(info->gifFilePtr, &ExtData, &ExtFunction) == GIF_ERROR) {
return GIF_ERROR;
}
if (ExtFunction == APPLICATION_EXT_FUNC_CODE && ExtData[0] == 3 && ExtData[1] == 1) {
info->loopCount = (unsigned short) (ExtData[2] + (ExtData[3] << 8));
}
}
}
return GIF_OK;
}
static int DDGifSlurp(GifFileType *GifFile, GifInfo* info, bool shouldDecode) {
GifRecordType RecordType;
GifByteType *ExtData;
int codeSize;
int ExtFunction;
size_t ImageSize;
do {
if (DGifGetRecordType(GifFile, &RecordType) == GIF_ERROR) {
return (GIF_ERROR);
}
switch (RecordType) {
case IMAGE_DESC_RECORD_TYPE:
if (DGifGetImageDesc(GifFile, !shouldDecode) == GIF_ERROR) {
return (GIF_ERROR);
}
int i = shouldDecode ? info->currentIndex : GifFile->ImageCount - 1;
SavedImage *sp = &GifFile->SavedImages[i];
ImageSize = sp->ImageDesc.Width * sp->ImageDesc.Height;
if (sp->ImageDesc.Width < 1 || sp->ImageDesc.Height < 1 || ImageSize > (SIZE_MAX / sizeof(GifPixelType))) {
GifFile->Error = D_GIF_ERR_INVALID_IMG_DIMS;
return GIF_ERROR;
}
if (sp->ImageDesc.Width > GifFile->SWidth || sp->ImageDesc.Height > GifFile->SHeight) {
GifFile->Error = D_GIF_ERR_IMG_NOT_CONFINED;
return GIF_ERROR;
}
if (shouldDecode) {
sp->RasterBits = info->rasterBits;
if (sp->ImageDesc.Interlace) {
int i, j;
int InterlacedOffset[] = { 0, 4, 2, 1 };
int InterlacedJumps[] = { 8, 8, 4, 2 };
for (i = 0; i < 4; i++) {
for (j = InterlacedOffset[i]; j < sp->ImageDesc.Height; j += InterlacedJumps[i]) {
if (DGifGetLine(GifFile, sp->RasterBits + j * sp->ImageDesc.Width, sp->ImageDesc.Width) == GIF_ERROR) {
return GIF_ERROR;
}
}
}
} else {
if (DGifGetLine(GifFile, sp->RasterBits, ImageSize) == GIF_ERROR) {
return (GIF_ERROR);
}
}
if (info->currentIndex >= GifFile->ImageCount - 1) {
if (info->loopCount > 0) {
info->currentLoop++;
}
if (fileRewindFun(info) != 0) {
info->gifFilePtr->Error = D_GIF_ERR_READ_FAILED;
return GIF_ERROR;
}
}
return GIF_OK;
} else {
if (DGifGetCode(GifFile, &codeSize, &ExtData) == GIF_ERROR) {
return (GIF_ERROR);
}
while (ExtData != NULL) {
if (DGifGetCodeNext(GifFile, &ExtData) == GIF_ERROR) {
return (GIF_ERROR);
}
}
}
break;
case EXTENSION_RECORD_TYPE:
if (DGifGetExtension(GifFile, &ExtFunction, &ExtData) == GIF_ERROR) {
return (GIF_ERROR);
}
if (!shouldDecode) {
FrameInfo *tmpInfos = realloc(info->infos, (GifFile->ImageCount + 1) * sizeof(FrameInfo));
if (tmpInfos == NULL) {
return GIF_ERROR;
}
info->infos = tmpInfos;
if (readExtensions(ExtFunction, ExtData, info) == GIF_ERROR) {
return GIF_ERROR;
}
}
while (ExtData != NULL) {
if (DGifGetExtensionNext(GifFile, &ExtData, &ExtFunction) == GIF_ERROR) {
return (GIF_ERROR);
}
if (!shouldDecode) {
if (readExtensions(ExtFunction, ExtData, info) == GIF_ERROR) {
return GIF_ERROR;
}
}
}
break;
case TERMINATE_RECORD_TYPE:
break;
default:
break;
}
} while (RecordType != TERMINATE_RECORD_TYPE);
bool ok = true;
if (shouldDecode) {
ok = (fileRewindFun(info) == 0);
}
if (ok) {
return (GIF_OK);
} else {
info->gifFilePtr->Error = D_GIF_ERR_READ_FAILED;
return (GIF_ERROR);
}
}
static void copyLine(argb *dst, const unsigned char *src, const ColorMapObject *cmap, int transparent, int width) {
for (; width > 0; width--, src++, dst++) {
if (*src != transparent) {
getColorFromTable(*src, dst, cmap);
}
}
}
static argb *getAddr(argb *bm, int width, int left, int top) {
return bm + top * width + left;
}
static void blitNormal(argb *bm, int width, int height, const SavedImage *frame, const ColorMapObject *cmap, int transparent) {
const unsigned char* src = (unsigned char*) frame->RasterBits;
argb *dst = getAddr(bm, width, frame->ImageDesc.Left, frame->ImageDesc.Top);
GifWord copyWidth = frame->ImageDesc.Width;
if (frame->ImageDesc.Left + copyWidth > width) {
copyWidth = width - frame->ImageDesc.Left;
}
GifWord copyHeight = frame->ImageDesc.Height;
if (frame->ImageDesc.Top + copyHeight > height) {
copyHeight = height - frame->ImageDesc.Top;
}
for (; copyHeight > 0; copyHeight--) {
copyLine(dst, src, cmap, transparent, copyWidth);
src += frame->ImageDesc.Width;
dst += width;
}
}
static void fillRect(argb *bm, int bmWidth, int bmHeight, GifWord left, GifWord top, GifWord width, GifWord height, argb col) {
uint32_t* dst = (uint32_t*) getAddr(bm, bmWidth, left, top);
GifWord copyWidth = width;
if (left + copyWidth > bmWidth) {
copyWidth = bmWidth - left;
}
GifWord copyHeight = height;
if (top + copyHeight > bmHeight) {
copyHeight = bmHeight - top;
}
uint32_t* pColor = (uint32_t *) (&col);
for (; copyHeight > 0; copyHeight--) {
memset(dst, *pColor, copyWidth * sizeof(argb));
dst += bmWidth;
}
}
static void drawFrame(argb *bm, int bmWidth, int bmHeight, const SavedImage *frame, const ColorMapObject *cmap, short transpIndex) {
if (frame->ImageDesc.ColorMap != NULL) {
cmap = frame->ImageDesc.ColorMap;
if (cmap->ColorCount != (1 << cmap->BitsPerPixel)) {
cmap = defaultCmap;
}
}
blitNormal(bm, bmWidth, bmHeight, frame, cmap, transpIndex);
}
static bool checkIfCover(const SavedImage *target, const SavedImage *covered) {
if (target->ImageDesc.Left <= covered->ImageDesc.Left
&& covered->ImageDesc.Left + covered->ImageDesc.Width
<= target->ImageDesc.Left + target->ImageDesc.Width
&& target->ImageDesc.Top <= covered->ImageDesc.Top
&& covered->ImageDesc.Top + covered->ImageDesc.Height
<= target->ImageDesc.Top + target->ImageDesc.Height) {
return true;
}
return false;
}
static inline void disposeFrameIfNeeded(argb *bm, GifInfo *info, unsigned int idx) {
argb* backup = info->backupPtr;
argb color;
packARGB32(&color, 0, 0, 0, 0);
GifFileType *fGif = info->gifFilePtr;
SavedImage* cur = &fGif->SavedImages[idx - 1];
SavedImage* next = &fGif->SavedImages[idx];
bool curTrans = info->infos[idx - 1].transpIndex != -1;
int curDisposal = info->infos[idx - 1].disposalMethod;
bool nextTrans = info->infos[idx].transpIndex != -1;
int nextDisposal = info->infos[idx].disposalMethod;
argb *tmp;
if ((curDisposal == 2 || curDisposal == 3) && (nextTrans || !checkIfCover(next, cur))) {
switch (curDisposal) {
case 2:
fillRect(bm, fGif->SWidth, fGif->SHeight, cur->ImageDesc.Left, cur->ImageDesc.Top, cur->ImageDesc.Width, cur->ImageDesc.Height, color);
break;
case 3:
tmp = bm;
bm = backup;
backup = tmp;
break;
}
}
if (nextDisposal == 3) {
memcpy(backup, bm, fGif->SWidth * fGif->SHeight * sizeof(argb));
}
}
static void reset(GifInfo *info) {
if (fileRewindFun(info) != 0) {
return;
}
info->nextStartTime = 0;
info->currentLoop = -1;
info->currentIndex = -1;
}
static void getBitmap(argb *bm, GifInfo *info) {
GifFileType* fGIF = info->gifFilePtr;
argb paintingColor;
int i = info->currentIndex;
if (DDGifSlurp(fGIF, info, true) == GIF_ERROR) {
return;
}
SavedImage* cur = &fGIF->SavedImages[i];
int transpIndex = info->infos[i].transpIndex;
if (i == 0) {
if (transpIndex == -1) {
getColorFromTable(fGIF->SBackGroundColor, &paintingColor, fGIF->SColorMap);
} else {
packARGB32(&paintingColor, 0, 0, 0, 0);
}
eraseColor(bm, fGIF->SWidth, fGIF->SHeight, paintingColor);
} else {
disposeFrameIfNeeded(bm, info, i);
}
drawFrame(bm, fGIF->SWidth, fGIF->SHeight, cur, fGIF->SColorMap, transpIndex);
}
static void setMetaData(int width, int height, int ImageCount, int errorCode, JNIEnv *env, jintArray metaData) {
jint *const ints = (*env)->GetIntArrayElements(env, metaData, 0);
if (ints == NULL) {
return;
}
ints[0] = width;
ints[1] = height;
ints[2] = ImageCount;
ints[3] = errorCode;
(*env)->ReleaseIntArrayElements(env, metaData, ints, 0);
}
static jint open(GifFileType *GifFileIn, int Error, int startPos, JNIEnv *env, jintArray metaData) {
if (startPos < 0) {
Error = D_GIF_ERR_NOT_READABLE;
DGifCloseFile(GifFileIn);
}
if (Error != 0 || GifFileIn == NULL) {
setMetaData(0, 0, 0, Error, env, metaData);
return (jint) NULL;
}
int width = GifFileIn->SWidth, height = GifFileIn->SHeight;
unsigned int wxh = width * height;
if (wxh < 1 || wxh > INT_MAX) {
DGifCloseFile(GifFileIn);
setMetaData(width, height, 0, D_GIF_ERR_INVALID_SCR_DIMS, env, metaData);
return (jint) NULL;
}
GifInfo *info = malloc(sizeof(GifInfo));
if (info == NULL) {
DGifCloseFile(GifFileIn);
setMetaData(width, height, 0, D_GIF_ERR_NOT_ENOUGH_MEM, env, metaData);
return (jint) NULL;
}
info->gifFilePtr = GifFileIn;
info->startPos = startPos;
info->currentIndex = -1;
info->nextStartTime = 0;
info->lastFrameReaminder = ULONG_MAX;
info->comment = NULL;
info->loopCount = 0;
info->currentLoop = -1;
info->speedFactor = 1.0;
info->rasterBits = calloc(GifFileIn->SHeight * GifFileIn->SWidth, sizeof(GifPixelType));
info->infos = malloc(sizeof(FrameInfo));
info->backupPtr = NULL;
if (info->rasterBits == NULL || info->infos == NULL) {
cleanUp(info);
setMetaData(width, height, 0, D_GIF_ERR_NOT_ENOUGH_MEM, env, metaData);
return (jint) NULL;
}
info->infos->duration = 0;
info->infos->disposalMethod = 0;
info->infos->transpIndex = -1;
if (GifFileIn->SColorMap == NULL || GifFileIn->SColorMap->ColorCount != (1 << GifFileIn->SColorMap->BitsPerPixel)) {
GifFreeMapObject(GifFileIn->SColorMap);
GifFileIn->SColorMap = defaultCmap;
}
DDGifSlurp(GifFileIn, info, false);
int imgCount = GifFileIn->ImageCount;
if (imgCount < 1) {
Error = D_GIF_ERR_NO_FRAMES;
}
if (fileRewindFun(info) != 0) {
Error = D_GIF_ERR_READ_FAILED;
}
if (Error != 0) {
cleanUp(info);
}
setMetaData(width, height, imgCount, Error, env, metaData);
return (jint)(Error == 0 ? info : NULL);
}
JNIEXPORT jlong JNICALL Java_org_telegram_ui_Components_GifDrawable_getAllocationByteCount(JNIEnv *env, jclass class, jobject gifInfo) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL) {
return 0;
}
unsigned int pxCount = info->gifFilePtr->SWidth + info->gifFilePtr->SHeight;
jlong sum = pxCount * sizeof(char);
if (info->backupPtr != NULL) {
sum += pxCount * sizeof(argb);
}
return sum;
}
JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_reset(JNIEnv *env, jclass class, jobject gifInfo) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL) {
return;
}
reset(info);
}
JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_setSpeedFactor(JNIEnv *env, jclass class, jobject gifInfo, jfloat factor) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL) {
return;
}
info->speedFactor = factor;
}
JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_seekToTime(JNIEnv *env, jclass class, jobject gifInfo, jint desiredPos, jintArray jPixels) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL || jPixels == NULL) {
return;
}
int imgCount = info->gifFilePtr->ImageCount;
if (imgCount <= 1) {
return;
}
unsigned long sum = 0;
int i;
for (i = 0; i < imgCount; i++) {
unsigned long newSum = sum + info->infos[i].duration;
if (newSum >= desiredPos) {
break;
}
sum = newSum;
}
if (i < info->currentIndex) {
return;
}
unsigned long lastFrameRemainder = desiredPos - sum;
if (i == imgCount - 1 && lastFrameRemainder > info->infos[i].duration) {
lastFrameRemainder = info->infos[i].duration;
}
if (i > info->currentIndex) {
jint *const pixels = (*env)->GetIntArrayElements(env, jPixels, 0);
if (pixels == NULL) {
return;
}
while (info->currentIndex <= i) {
info->currentIndex++;
getBitmap((argb*) pixels, info);
}
(*env)->ReleaseIntArrayElements(env, jPixels, pixels, 0);
}
info->lastFrameReaminder = lastFrameRemainder;
if (info->speedFactor == 1.0) {
info->nextStartTime = getRealTime() + lastFrameRemainder;
} else {
info->nextStartTime = getRealTime() + lastFrameRemainder * info->speedFactor;
}
}
JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_seekToFrame(JNIEnv *env, jclass class, jobject gifInfo, jint desiredIdx, jintArray jPixels) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL|| jPixels==NULL) {
return;
}
if (desiredIdx <= info->currentIndex) {
return;
}
int imgCount = info->gifFilePtr->ImageCount;
if (imgCount <= 1) {
return;
}
jint *const pixels = (*env)->GetIntArrayElements(env, jPixels, 0);
if (pixels == NULL) {
return;
}
info->lastFrameReaminder = 0;
if (desiredIdx >= imgCount) {
desiredIdx = imgCount - 1;
}
while (info->currentIndex < desiredIdx) {
info->currentIndex++;
getBitmap((argb *) pixels, info);
}
(*env)->ReleaseIntArrayElements(env, jPixels, pixels, 0);
if (info->speedFactor == 1.0) {
info->nextStartTime = getRealTime() + info->infos[info->currentIndex].duration;
} else {
info->nextStartTime = getRealTime() + info->infos[info->currentIndex].duration * info->speedFactor;
}
}
JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_renderFrame(JNIEnv *env, jclass class, jintArray jPixels, jobject gifInfo, jintArray metaData) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL || jPixels == NULL) {
return;
}
bool needRedraw = false;
unsigned long rt = getRealTime();
if (rt >= info->nextStartTime && info->currentLoop < info->loopCount) {
if (++info->currentIndex >= info->gifFilePtr->ImageCount) {
info->currentIndex = 0;
}
needRedraw = true;
}
jint *const rawMetaData = (*env)->GetIntArrayElements(env, metaData, 0);
if (rawMetaData == NULL) {
return;
}
if (needRedraw) {
jint *const pixels = (*env)->GetIntArrayElements(env, jPixels, 0);
if (pixels == NULL) {
(*env)->ReleaseIntArrayElements(env, metaData, rawMetaData, 0);
return;
}
getBitmap((argb *)pixels, info);
rawMetaData[3] = info->gifFilePtr->Error;
(*env)->ReleaseIntArrayElements(env, jPixels, pixels, 0);
unsigned int scaledDuration = info->infos[info->currentIndex].duration;
if (info->speedFactor != 1.0) {
scaledDuration /= info->speedFactor;
if (scaledDuration<=0) {
scaledDuration=1;
} else if (scaledDuration > INT_MAX) {
scaledDuration = INT_MAX;
}
}
info->nextStartTime = rt + scaledDuration;
rawMetaData[4] = scaledDuration;
} else {
long delay = info->nextStartTime-rt;
if (delay < 0) {
rawMetaData[4] = -1;
} else {
rawMetaData[4] = (int) delay;
}
}
(*env)->ReleaseIntArrayElements(env, metaData, rawMetaData, 0);
}
JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_free(JNIEnv *env, jclass class, jobject gifInfo) {
if (gifInfo == NULL) {
return;
}
GifInfo *info = (GifInfo *)gifInfo;
FILE *file = info->gifFilePtr->UserData;
if (file) {
fclose(file);
}
info->gifFilePtr->UserData = NULL;
cleanUp(info);
}
JNIEXPORT jstring JNICALL Java_org_telegram_ui_Components_GifDrawable_getComment(JNIEnv *env, jclass class, jobject gifInfo) {
if (gifInfo == NULL) {
return NULL;
}
GifInfo *info = (GifInfo *)gifInfo;
return (*env)->NewStringUTF(env, info->comment);
}
JNIEXPORT jint JNICALL Java_org_telegram_ui_Components_GifDrawable_getLoopCount(JNIEnv *env, jclass class, jobject gifInfo) {
if (gifInfo == NULL) {
return 0;
}
return ((GifInfo *)gifInfo)->loopCount;
}
JNIEXPORT jint JNICALL Java_org_telegram_ui_Components_GifDrawable_getDuration(JNIEnv *env, jclass class, jobject gifInfo) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL) {
return 0;
}
int i;
unsigned long sum = 0;
for (i = 0; i < info->gifFilePtr->ImageCount; i++) {
sum += info->infos[i].duration;
}
return sum;
}
JNIEXPORT jint JNICALL Java_org_telegram_ui_Components_GifDrawable_getCurrentPosition(JNIEnv *env, jclass class, jobject gifInfo) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL) {
return 0;
}
int idx = info->currentIndex;
if (idx < 0 || info->gifFilePtr->ImageCount <= 1) {
return 0;
}
int i;
unsigned int sum = 0;
for (i = 0; i < idx; i++) {
sum += info->infos[i].duration;
}
unsigned long remainder = info->lastFrameReaminder == ULONG_MAX ? getRealTime() - info->nextStartTime : info->lastFrameReaminder;
return (int) (sum + remainder);
}
JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_saveRemainder(JNIEnv *env, jclass class, jobject gifInfo) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL) {
return;
}
info->lastFrameReaminder = getRealTime() - info->nextStartTime;
}
JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_restoreRemainder(JNIEnv *env, jclass class, jobject gifInfo) {
GifInfo *info = (GifInfo *)gifInfo;
if (info == NULL || info->lastFrameReaminder == ULONG_MAX) {
return;
}
info->nextStartTime = getRealTime() + info->lastFrameReaminder;
info->lastFrameReaminder = ULONG_MAX;
}
JNIEXPORT jint JNICALL Java_org_telegram_ui_Components_GifDrawable_openFile(JNIEnv *env, jclass class, jintArray metaData, jstring jfname) {
if (jfname == NULL) {
setMetaData(0, 0, 0, D_GIF_ERR_OPEN_FAILED, env, metaData);
return (jint) NULL;
}
const char *const fname = (*env)->GetStringUTFChars(env, jfname, 0);
FILE *file = fopen(fname, "rb");
(*env)->ReleaseStringUTFChars(env, jfname, fname);
if (file == NULL) {
setMetaData(0, 0, 0, D_GIF_ERR_OPEN_FAILED, env, metaData);
return (jint) NULL;
}
int Error = 0;
GifFileType *GifFileIn = DGifOpen(file, &fileReadFunc, &Error);
return open(GifFileIn, Error, ftell(file), env, metaData);
}

View File

@ -1,7 +0,0 @@
#ifndef gif_h
#define gif_h
jint gifOnJNILoad(JavaVM *vm, void *reserved, JNIEnv *env);
void gifOnJNIUnload(JavaVM *vm, void *reserved);
#endif

View File

@ -1,13 +0,0 @@
// giflib config.h
#ifndef GIF_CONFIG_H_DEFINED
#define GIF_CONFIG_H_DEFINED
#include <sys/types.h>
#define HAVE_STDINT_H
#define HAVE_FCNTL_H
typedef uint32_t UINT32;
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,132 +0,0 @@
/*****************************************************************************
gif_hash.c -- module to support the following operations:
1. InitHashTable - initialize hash table.
2. ClearHashTable - clear the hash table to an empty state.
2. InsertHashTable - insert one item into data structure.
3. ExistsHashTable - test if item exists in data structure.
This module is used to hash the GIF codes during encoding.
*****************************************************************************/
#include <unistd.h>
#include <stdint.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include "gif_lib.h"
#include "gif_hash.h"
#include "gif_lib_private.h"
/* #define DEBUG_HIT_RATE Debug number of misses per hash Insert/Exists. */
#ifdef DEBUG_HIT_RATE
static long NumberOfTests = 0,
NumberOfMisses = 0;
#endif /* DEBUG_HIT_RATE */
static int KeyItem(uint32_t Item);
/******************************************************************************
Initialize HashTable - allocate the memory needed and clear it. *
******************************************************************************/
GifHashTableType *_InitHashTable(void)
{
GifHashTableType *HashTable;
if ((HashTable = (GifHashTableType *) malloc(sizeof(GifHashTableType)))
== NULL)
return NULL;
_ClearHashTable(HashTable);
return HashTable;
}
/******************************************************************************
Routine to clear the HashTable to an empty state. *
This part is a little machine depended. Use the commented part otherwise. *
******************************************************************************/
void _ClearHashTable(GifHashTableType *HashTable)
{
memset(HashTable -> HTable, 0xFF, HT_SIZE * sizeof(uint32_t));
}
/******************************************************************************
Routine to insert a new Item into the HashTable. The data is assumed to be *
new one. *
******************************************************************************/
void _InsertHashTable(GifHashTableType *HashTable, uint32_t Key, int Code)
{
int HKey = KeyItem(Key);
uint32_t *HTable = HashTable -> HTable;
#ifdef DEBUG_HIT_RATE
NumberOfTests++;
NumberOfMisses++;
#endif /* DEBUG_HIT_RATE */
while (HT_GET_KEY(HTable[HKey]) != 0xFFFFFL) {
#ifdef DEBUG_HIT_RATE
NumberOfMisses++;
#endif /* DEBUG_HIT_RATE */
HKey = (HKey + 1) & HT_KEY_MASK;
}
HTable[HKey] = HT_PUT_KEY(Key) | HT_PUT_CODE(Code);
}
/******************************************************************************
Routine to test if given Key exists in HashTable and if so returns its code *
Returns the Code if key was found, -1 if not. *
******************************************************************************/
int _ExistsHashTable(GifHashTableType *HashTable, uint32_t Key)
{
int HKey = KeyItem(Key);
uint32_t *HTable = HashTable -> HTable, HTKey;
#ifdef DEBUG_HIT_RATE
NumberOfTests++;
NumberOfMisses++;
#endif /* DEBUG_HIT_RATE */
while ((HTKey = HT_GET_KEY(HTable[HKey])) != 0xFFFFFL) {
#ifdef DEBUG_HIT_RATE
NumberOfMisses++;
#endif /* DEBUG_HIT_RATE */
if (Key == HTKey) return HT_GET_CODE(HTable[HKey]);
HKey = (HKey + 1) & HT_KEY_MASK;
}
return -1;
}
/******************************************************************************
Routine to generate an HKey for the hashtable out of the given unique key. *
The given Key is assumed to be 20 bits as follows: lower 8 bits are the *
new postfix character, while the upper 12 bits are the prefix code. *
Because the average hit ratio is only 2 (2 hash references per entry), *
evaluating more complex keys (such as twin prime keys) does not worth it! *
******************************************************************************/
static int KeyItem(uint32_t Item)
{
return ((Item >> 12) ^ Item) & HT_KEY_MASK;
}
#ifdef DEBUG_HIT_RATE
/******************************************************************************
Debugging routine to print the hit ratio - number of times the hash table *
was tested per operation. This routine was used to test the KeyItem routine *
******************************************************************************/
void HashTablePrintHitRatio(void)
{
printf("Hash Table Hit Ratio is %ld/%ld = %ld%%.\n",
NumberOfMisses, NumberOfTests,
NumberOfMisses * 100 / NumberOfTests);
}
#endif /* DEBUG_HIT_RATE */
/* end */

View File

@ -1,39 +0,0 @@
/******************************************************************************
gif_hash.h - magfic constants and declarations for GIF LZW
******************************************************************************/
#ifndef _GIF_HASH_H_
#define _GIF_HASH_H_
#include <unistd.h>
#include <stdint.h>
#define HT_SIZE 8192 /* 12bits = 4096 or twice as big! */
#define HT_KEY_MASK 0x1FFF /* 13bits keys */
#define HT_KEY_NUM_BITS 13 /* 13bits keys */
#define HT_MAX_KEY 8191 /* 13bits - 1, maximal code possible */
#define HT_MAX_CODE 4095 /* Biggest code possible in 12 bits. */
/* The 32 bits of the long are divided into two parts for the key & code: */
/* 1. The code is 12 bits as our compression algorithm is limited to 12bits */
/* 2. The key is 12 bits Prefix code + 8 bit new char or 20 bits. */
/* The key is the upper 20 bits. The code is the lower 12. */
#define HT_GET_KEY(l) (l >> 12)
#define HT_GET_CODE(l) (l & 0x0FFF)
#define HT_PUT_KEY(l) (l << 12)
#define HT_PUT_CODE(l) (l & 0x0FFF)
typedef struct GifHashTableType {
uint32_t HTable[HT_SIZE];
} GifHashTableType;
GifHashTableType *_InitHashTable(void);
void _ClearHashTable(GifHashTableType *HashTable);
void _InsertHashTable(GifHashTableType *HashTable, uint32_t Key, int Code);
int _ExistsHashTable(GifHashTableType *HashTable, uint32_t Key);
#endif /* _GIF_HASH_H_ */
/* end */

View File

@ -1,307 +0,0 @@
/******************************************************************************
gif_lib.h - service library for decoding and encoding GIF images
*****************************************************************************/
#ifndef _GIF_LIB_H_
#define _GIF_LIB_H_ 1
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
#define GIFLIB_MAJOR 5
#define GIFLIB_MINOR 0
#define GIFLIB_RELEASE 5
#define GIF_ERROR 0
#define GIF_OK 1
#include <stddef.h>
#include <stdbool.h>
#define GIF_STAMP "GIFVER" /* First chars in file - GIF stamp. */
#define GIF_STAMP_LEN sizeof(GIF_STAMP) - 1
#define GIF_VERSION_POS 3 /* Version first character in stamp. */
#define GIF87_STAMP "GIF87a" /* First chars in file - GIF stamp. */
#define GIF89_STAMP "GIF89a" /* First chars in file - GIF stamp. */
typedef unsigned char GifPixelType;
typedef unsigned char *GifRowType;
typedef unsigned char GifByteType;
typedef unsigned int GifPrefixType;
typedef int GifWord;
typedef struct GifColorType {
GifByteType Red, Green, Blue;
} GifColorType;
typedef struct ColorMapObject {
int ColorCount;
int BitsPerPixel;
bool SortFlag;
GifColorType *Colors; /* on malloc(3) heap */
} ColorMapObject;
typedef struct GifImageDesc {
GifWord Left, Top, Width, Height; /* Current image dimensions. */
bool Interlace; /* Sequential/Interlaced lines. */
ColorMapObject *ColorMap; /* The local color map */
} GifImageDesc;
typedef struct ExtensionBlock {
int ByteCount;
GifByteType *Bytes; /* on malloc(3) heap */
int Function; /* The block function code */
#define CONTINUE_EXT_FUNC_CODE 0x00 /* continuation subblock */
#define COMMENT_EXT_FUNC_CODE 0xfe /* comment */
#define GRAPHICS_EXT_FUNC_CODE 0xf9 /* graphics control (GIF89) */
#define PLAINTEXT_EXT_FUNC_CODE 0x01 /* plaintext */
#define APPLICATION_EXT_FUNC_CODE 0xff /* application block */
} ExtensionBlock;
typedef struct SavedImage {
GifImageDesc ImageDesc;
GifByteType *RasterBits; /* on malloc(3) heap */
int ExtensionBlockCount; /* Count of extensions before image */
ExtensionBlock *ExtensionBlocks; /* Extensions before image */
} SavedImage;
typedef struct GifFileType {
GifWord SWidth, SHeight; /* Size of virtual canvas */
GifWord SColorResolution; /* How many colors can we generate? */
GifWord SBackGroundColor; /* Background color for virtual canvas */
GifByteType AspectByte; /* Used to compute pixel aspect ratio */
ColorMapObject *SColorMap; /* Global colormap, NULL if nonexistent. */
int ImageCount; /* Number of current image (both APIs) */
GifImageDesc Image; /* Current image (low-level API) */
SavedImage *SavedImages; /* Image sequence (high-level API) */
int ExtensionBlockCount; /* Count extensions past last image */
ExtensionBlock *ExtensionBlocks; /* Extensions past last image */
int Error; /* Last error condition reported */
void *UserData; /* hook to attach user data (TVT) */
void *Private; /* Don't mess with this! */
} GifFileType;
#define GIF_ASPECT_RATIO(n) ((n)+15.0/64.0)
typedef enum {
UNDEFINED_RECORD_TYPE,
SCREEN_DESC_RECORD_TYPE,
IMAGE_DESC_RECORD_TYPE, /* Begin with ',' */
EXTENSION_RECORD_TYPE, /* Begin with '!' */
TERMINATE_RECORD_TYPE /* Begin with ';' */
} GifRecordType;
/* func type to read gif data from arbitrary sources (TVT) */
typedef int (*InputFunc) (GifFileType *, GifByteType *, int);
/* func type to write gif data to arbitrary targets.
* Returns count of bytes written. (MRB)
*/
typedef int (*OutputFunc) (GifFileType *, const GifByteType *, int);
/******************************************************************************
GIF89 structures
******************************************************************************/
typedef struct GraphicsControlBlock {
int DisposalMode;
#define DISPOSAL_UNSPECIFIED 0 /* No disposal specified. */
#define DISPOSE_DO_NOT 1 /* Leave image in place */
#define DISPOSE_BACKGROUND 2 /* Set area too background color */
#define DISPOSE_PREVIOUS 3 /* Restore to previous content */
bool UserInputFlag; /* User confirmation required before disposal */
int DelayTime; /* pre-display delay in 0.01sec units */
int TransparentColor; /* Palette index for transparency, -1 if none */
#define NO_TRANSPARENT_COLOR -1
} GraphicsControlBlock;
/******************************************************************************
GIF encoding routines
******************************************************************************/
/* Main entry points */
GifFileType *EGifOpenFileName(const char *GifFileName,
const bool GifTestExistence, int *Error);
GifFileType *EGifOpenFileHandle(const int GifFileHandle, int *Error);
GifFileType *EGifOpen(void *userPtr, OutputFunc writeFunc, int *Error);
int EGifSpew(GifFileType * GifFile);
char *EGifGetGifVersion(GifFileType *GifFile); /* new in 5.x */
int EGifCloseFile(GifFileType * GifFile);
#define E_GIF_ERR_OPEN_FAILED 1 /* And EGif possible errors. */
#define E_GIF_ERR_WRITE_FAILED 2
#define E_GIF_ERR_HAS_SCRN_DSCR 3
#define E_GIF_ERR_HAS_IMAG_DSCR 4
#define E_GIF_ERR_NO_COLOR_MAP 5
#define E_GIF_ERR_DATA_TOO_BIG 6
#define E_GIF_ERR_NOT_ENOUGH_MEM 7
#define E_GIF_ERR_DISK_IS_FULL 8
#define E_GIF_ERR_CLOSE_FAILED 9
#define E_GIF_ERR_NOT_WRITEABLE 10
/* These are legacy. You probably do not want to call them directly */
int EGifPutScreenDesc(GifFileType *GifFile,
const int GifWidth, const int GifHeight,
const int GifColorRes,
const int GifBackGround,
const ColorMapObject *GifColorMap);
int EGifPutImageDesc(GifFileType *GifFile,
const int GifLeft, const int GifTop,
const int GifWidth, const int GifHeight,
const bool GifInterlace,
const ColorMapObject *GifColorMap);
void EGifSetGifVersion(GifFileType *GifFile, const bool gif89);
int EGifPutLine(GifFileType *GifFile, GifPixelType *GifLine,
int GifLineLen);
int EGifPutPixel(GifFileType *GifFile, const GifPixelType GifPixel);
int EGifPutComment(GifFileType *GifFile, const char *GifComment);
int EGifPutExtensionLeader(GifFileType *GifFile, const int GifExtCode);
int EGifPutExtensionBlock(GifFileType *GifFile,
const int GifExtLen, const void *GifExtension);
int EGifPutExtensionTrailer(GifFileType *GifFile);
int EGifPutExtension(GifFileType *GifFile, const int GifExtCode,
const int GifExtLen,
const void *GifExtension);
int EGifPutCode(GifFileType *GifFile, int GifCodeSize,
const GifByteType *GifCodeBlock);
int EGifPutCodeNext(GifFileType *GifFile,
const GifByteType *GifCodeBlock);
/******************************************************************************
GIF decoding routines
******************************************************************************/
/* Main entry points */
GifFileType *DGifOpenFileName(const char *GifFileName, int *Error);
GifFileType *DGifOpenFileHandle(int GifFileHandle, int *Error);
int DGifSlurp(GifFileType * GifFile);
GifFileType *DGifOpen(void *userPtr, InputFunc readFunc, int *Error); /* new one (TVT) */
int DGifCloseFile(GifFileType * GifFile);
#define D_GIF_ERR_OPEN_FAILED 101 /* And DGif possible errors. */
#define D_GIF_ERR_READ_FAILED 102
#define D_GIF_ERR_NOT_GIF_FILE 103
#define D_GIF_ERR_NO_SCRN_DSCR 104
#define D_GIF_ERR_NO_IMAG_DSCR 105
#define D_GIF_ERR_NO_COLOR_MAP 106
#define D_GIF_ERR_WRONG_RECORD 107
#define D_GIF_ERR_DATA_TOO_BIG 108
#define D_GIF_ERR_NOT_ENOUGH_MEM 109
#define D_GIF_ERR_CLOSE_FAILED 110
#define D_GIF_ERR_NOT_READABLE 111
#define D_GIF_ERR_IMAGE_DEFECT 112
#define D_GIF_ERR_EOF_TOO_SOON 113
/* These are legacy. You probably do not want to call them directly */
int DGifGetScreenDesc(GifFileType *GifFile);
int DGifGetRecordType(GifFileType *GifFile, GifRecordType *GifType);
int DGifGetImageDesc(GifFileType *GifFile, bool changeImageCount);
int DGifGetLine(GifFileType *GifFile, GifPixelType *GifLine, int GifLineLen);
int DGifGetPixel(GifFileType *GifFile, GifPixelType GifPixel);
int DGifGetComment(GifFileType *GifFile, char *GifComment);
int DGifGetExtension(GifFileType *GifFile, int *GifExtCode,
GifByteType **GifExtension);
int DGifGetExtensionNext(GifFileType *GifFile, GifByteType **GifExtension,int* ExtCode);
int DGifGetCode(GifFileType *GifFile, int *GifCodeSize,
GifByteType **GifCodeBlock);
int DGifGetCodeNext(GifFileType *GifFile, GifByteType **GifCodeBlock);
int DGifGetLZCodes(GifFileType *GifFile, int *GifCode);
/******************************************************************************
Color table quantization (deprecated)
******************************************************************************/
int GifQuantizeBuffer(unsigned int Width, unsigned int Height,
int *ColorMapSize, GifByteType * RedInput,
GifByteType * GreenInput, GifByteType * BlueInput,
GifByteType * OutputBuffer,
GifColorType * OutputColorMap);
/******************************************************************************
Error handling and reporting.
******************************************************************************/
extern char *GifErrorString(int ErrorCode); /* new in 2012 - ESR */
/*****************************************************************************
Everything below this point is new after version 1.2, supporting `slurp
mode' for doing I/O in two big belts with all the image-bashing in core.
******************************************************************************/
/******************************************************************************
Color map handling from gif_alloc.c
******************************************************************************/
extern ColorMapObject *GifMakeMapObject(int ColorCount,
const GifColorType *ColorMap);
extern void GifFreeMapObject(ColorMapObject *Object);
extern ColorMapObject *GifUnionColorMap(const ColorMapObject *ColorIn1,
const ColorMapObject *ColorIn2,
GifPixelType ColorTransIn2[]);
extern int GifBitSize(int n);
/******************************************************************************
Support for the in-core structures allocation (slurp mode).
******************************************************************************/
extern void GifApplyTranslation(SavedImage *Image, GifPixelType Translation[]);
extern int GifAddExtensionBlock(int *ExtensionBlock_Count,
ExtensionBlock **ExtensionBlocks,
int Function,
unsigned int Len, unsigned char ExtData[]);
extern void GifFreeExtensions(int *ExtensionBlock_Count,
ExtensionBlock **ExtensionBlocks);
extern SavedImage *GifMakeSavedImage(GifFileType *GifFile,
const SavedImage *CopyFrom);
extern void GifFreeSavedImages(GifFileType *GifFile);
/******************************************************************************
5.x functions for GIF89 graphics control blocks
******************************************************************************/
int DGifExtensionToGCB(const size_t GifExtensionLength,
const GifByteType *GifExtension,
GraphicsControlBlock *GCB);
size_t EGifGCBToExtension(const GraphicsControlBlock *GCB,
GifByteType *GifExtension);
int DGifSavedExtensionToGCB(GifFileType *GifFile,
int ImageIndex,
GraphicsControlBlock *GCB);
int EGifGCBToSavedExtension(const GraphicsControlBlock *GCB,
GifFileType *GifFile,
int ImageIndex);
/******************************************************************************
The library's internal utility font
******************************************************************************/
#define GIF_FONT_WIDTH 8
#define GIF_FONT_HEIGHT 8
extern const unsigned char GifAsciiTable8x8[][GIF_FONT_WIDTH];
extern void GifDrawText8x8(SavedImage *Image,
const int x, const int y,
const char *legend, const int color);
extern void GifDrawBox(SavedImage *Image,
const int x, const int y,
const int w, const int d, const int color);
extern void GifDrawRectangle(SavedImage *Image,
const int x, const int y,
const int w, const int d, const int color);
extern void GifDrawBoxedText8x8(SavedImage *Image,
const int x, const int y,
const char *legend,
const int border, const int bg, const int fg);
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* _GIF_LIB_H */
/* end */

View File

@ -1,59 +0,0 @@
/****************************************************************************
gif_lib_private.h - internal giflib routines and structures
****************************************************************************/
#ifndef _GIF_LIB_PRIVATE_H
#define _GIF_LIB_PRIVATE_H
#include "gif_lib.h"
#include "gif_hash.h"
#define EXTENSION_INTRODUCER 0x21
#define DESCRIPTOR_INTRODUCER 0x2c
#define TERMINATOR_INTRODUCER 0x3b
#define LZ_MAX_CODE 4095 /* Biggest code possible in 12 bits. */
#define LZ_BITS 12
#define FLUSH_OUTPUT 4096 /* Impossible code, to signal flush. */
#define FIRST_CODE 4097 /* Impossible code, to signal first. */
#define NO_SUCH_CODE 4098 /* Impossible code, to signal empty. */
#define FILE_STATE_WRITE 0x01
#define FILE_STATE_SCREEN 0x02
#define FILE_STATE_IMAGE 0x04
#define FILE_STATE_READ 0x08
#define IS_READABLE(Private) (Private->FileState & FILE_STATE_READ)
#define IS_WRITEABLE(Private) (Private->FileState & FILE_STATE_WRITE)
typedef struct GifFilePrivateType {
GifWord FileState, FileHandle, /* Where all this data goes to! */
BitsPerPixel, /* Bits per pixel (Codes uses at least this + 1). */
ClearCode, /* The CLEAR LZ code. */
EOFCode, /* The EOF LZ code. */
RunningCode, /* The next code algorithm can generate. */
RunningBits, /* The number of bits required to represent RunningCode. */
MaxCode1, /* 1 bigger than max. possible code, in RunningBits bits. */
LastCode, /* The code before the current code. */
CrntCode, /* Current algorithm code. */
StackPtr, /* For character stack (see below). */
CrntShiftState; /* Number of bits in CrntShiftDWord. */
unsigned long CrntShiftDWord; /* For bytes decomposition into codes. */
unsigned long PixelCount; /* Number of pixels in image. */
FILE *File; /* File as stream. */
InputFunc Read; /* function to read gif input (TVT) */
OutputFunc Write; /* function to write gif output (MRB) */
GifByteType Buf[256]; /* Compressed input is buffered here. */
GifByteType Stack[LZ_MAX_CODE]; /* Decoded pixels are stacked here. */
GifByteType Suffix[LZ_MAX_CODE + 1]; /* So we can trace the codes. */
GifPrefixType Prefix[LZ_MAX_CODE + 1];
GifHashTableType *HashTable;
bool gif89;
} GifFilePrivateType;
#endif /* _GIF_LIB_PRIVATE_H */
/* end */

View File

@ -1,400 +0,0 @@
/*****************************************************************************
GIF construction tools
****************************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "gif_lib.h"
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
/******************************************************************************
Miscellaneous utility functions
******************************************************************************/
/* return smallest bitfield size n will fit in */
int
GifBitSize(int n)
{
register int i;
for (i = 1; i <= 8; i++)
if ((1 << i) >= n)
break;
return (i);
}
/******************************************************************************
Color map object functions
******************************************************************************/
/*
* Allocate a color map of given size; initialize with contents of
* ColorMap if that pointer is non-NULL.
*/
ColorMapObject *
GifMakeMapObject(int ColorCount, const GifColorType *ColorMap)
{
ColorMapObject *Object;
/*** FIXME: Our ColorCount has to be a power of two. Is it necessary to
* make the user know that or should we automatically round up instead? */
if (ColorCount != (1 << GifBitSize(ColorCount))) {
return ((ColorMapObject *) NULL);
}
Object = (ColorMapObject *)malloc(sizeof(ColorMapObject));
if (Object == (ColorMapObject *) NULL) {
return ((ColorMapObject *) NULL);
}
Object->Colors = (GifColorType *)calloc(ColorCount, sizeof(GifColorType));
if (Object->Colors == (GifColorType *) NULL) {
free(Object);
return ((ColorMapObject *) NULL);
}
Object->ColorCount = ColorCount;
Object->BitsPerPixel = GifBitSize(ColorCount);
if (ColorMap != NULL) {
memcpy((char *)Object->Colors,
(char *)ColorMap, ColorCount * sizeof(GifColorType));
}
return (Object);
}
/*******************************************************************************
Free a color map object
*******************************************************************************/
void
GifFreeMapObject(ColorMapObject *Object)
{
if (Object != NULL) {
(void)free(Object->Colors);
(void)free(Object);
}
}
#ifdef DEBUG
void
DumpColorMap(ColorMapObject *Object,
FILE * fp)
{
if (Object != NULL) {
int i, j, Len = Object->ColorCount;
for (i = 0; i < Len; i += 4) {
for (j = 0; j < 4 && j < Len; j++) {
(void)fprintf(fp, "%3d: %02x %02x %02x ", i + j,
Object->Colors[i + j].Red,
Object->Colors[i + j].Green,
Object->Colors[i + j].Blue);
}
(void)fprintf(fp, "\n");
}
}
}
#endif /* DEBUG */
/*******************************************************************************
Compute the union of two given color maps and return it. If result can't
fit into 256 colors, NULL is returned, the allocated union otherwise.
ColorIn1 is copied as is to ColorUnion, while colors from ColorIn2 are
copied iff they didn't exist before. ColorTransIn2 maps the old
ColorIn2 into the ColorUnion color map table./
*******************************************************************************/
ColorMapObject *
GifUnionColorMap(const ColorMapObject *ColorIn1,
const ColorMapObject *ColorIn2,
GifPixelType ColorTransIn2[])
{
int i, j, CrntSlot, RoundUpTo, NewGifBitSize;
ColorMapObject *ColorUnion;
/*
* We don't worry about duplicates within either color map; if
* the caller wants to resolve those, he can perform unions
* with an empty color map.
*/
/* Allocate table which will hold the result for sure. */
ColorUnion = GifMakeMapObject(MAX(ColorIn1->ColorCount,
ColorIn2->ColorCount) * 2, NULL);
if (ColorUnion == NULL)
return (NULL);
/*
* Copy ColorIn1 to ColorUnion.
*/
for (i = 0; i < ColorIn1->ColorCount; i++)
ColorUnion->Colors[i] = ColorIn1->Colors[i];
CrntSlot = ColorIn1->ColorCount;
/*
* Potentially obnoxious hack:
*
* Back CrntSlot down past all contiguous {0, 0, 0} slots at the end
* of table 1. This is very useful if your display is limited to
* 16 colors.
*/
while (ColorIn1->Colors[CrntSlot - 1].Red == 0
&& ColorIn1->Colors[CrntSlot - 1].Green == 0
&& ColorIn1->Colors[CrntSlot - 1].Blue == 0)
CrntSlot--;
/* Copy ColorIn2 to ColorUnion (use old colors if they exist): */
for (i = 0; i < ColorIn2->ColorCount && CrntSlot <= 256; i++) {
/* Let's see if this color already exists: */
for (j = 0; j < ColorIn1->ColorCount; j++)
if (memcmp (&ColorIn1->Colors[j], &ColorIn2->Colors[i],
sizeof(GifColorType)) == 0)
break;
if (j < ColorIn1->ColorCount)
ColorTransIn2[i] = j; /* color exists in Color1 */
else {
/* Color is new - copy it to a new slot: */
ColorUnion->Colors[CrntSlot] = ColorIn2->Colors[i];
ColorTransIn2[i] = CrntSlot++;
}
}
if (CrntSlot > 256) {
GifFreeMapObject(ColorUnion);
return ((ColorMapObject *) NULL);
}
NewGifBitSize = GifBitSize(CrntSlot);
RoundUpTo = (1 << NewGifBitSize);
if (RoundUpTo != ColorUnion->ColorCount) {
register GifColorType *Map = ColorUnion->Colors;
/*
* Zero out slots up to next power of 2.
* We know these slots exist because of the way ColorUnion's
* start dimension was computed.
*/
for (j = CrntSlot; j < RoundUpTo; j++)
Map[j].Red = Map[j].Green = Map[j].Blue = 0;
/* perhaps we can shrink the map? */
if (RoundUpTo < ColorUnion->ColorCount)
ColorUnion->Colors = (GifColorType *)realloc(Map,
sizeof(GifColorType) * RoundUpTo);
}
ColorUnion->ColorCount = RoundUpTo;
ColorUnion->BitsPerPixel = NewGifBitSize;
return (ColorUnion);
}
/*******************************************************************************
Apply a given color translation to the raster bits of an image
*******************************************************************************/
void
GifApplyTranslation(SavedImage *Image, GifPixelType Translation[])
{
register int i;
register int RasterSize = Image->ImageDesc.Height * Image->ImageDesc.Width;
for (i = 0; i < RasterSize; i++)
Image->RasterBits[i] = Translation[Image->RasterBits[i]];
}
/******************************************************************************
Extension record functions
******************************************************************************/
int
GifAddExtensionBlock(int *ExtensionBlockCount,
ExtensionBlock **ExtensionBlocks,
int Function,
unsigned int Len,
unsigned char ExtData[])
{
ExtensionBlock *ep;
if (*ExtensionBlocks == NULL)
*ExtensionBlocks=(ExtensionBlock *)malloc(sizeof(ExtensionBlock));
else
*ExtensionBlocks = (ExtensionBlock *)realloc(*ExtensionBlocks,
sizeof(ExtensionBlock) *
(*ExtensionBlockCount + 1));
if (*ExtensionBlocks == NULL)
return (GIF_ERROR);
ep = &(*ExtensionBlocks)[(*ExtensionBlockCount)++];
ep->Function = Function;
ep->ByteCount=Len;
ep->Bytes = (GifByteType *)malloc(ep->ByteCount);
if (ep->Bytes == NULL)
return (GIF_ERROR);
if (ExtData != NULL) {
memcpy(ep->Bytes, ExtData, Len);
}
return (GIF_OK);
}
void
GifFreeExtensions(int *ExtensionBlockCount,
ExtensionBlock **ExtensionBlocks)
{
ExtensionBlock *ep;
if (*ExtensionBlocks == NULL)
return;
for (ep = *ExtensionBlocks;
ep < (*ExtensionBlocks + *ExtensionBlockCount);
ep++)
(void)free((char *)ep->Bytes);
(void)free((char *)*ExtensionBlocks);
*ExtensionBlocks = NULL;
*ExtensionBlockCount = 0;
}
/******************************************************************************
Image block allocation functions
******************************************************************************/
/* Private Function:
* Frees the last image in the GifFile->SavedImages array
*/
void
FreeLastSavedImage(GifFileType *GifFile)
{
SavedImage *sp;
if ((GifFile == NULL) || (GifFile->SavedImages == NULL))
return;
/* Remove one SavedImage from the GifFile */
GifFile->ImageCount--;
sp = &GifFile->SavedImages[GifFile->ImageCount];
/* Deallocate its Colormap */
if (sp->ImageDesc.ColorMap != NULL) {
GifFreeMapObject(sp->ImageDesc.ColorMap);
sp->ImageDesc.ColorMap = NULL;
}
/* Deallocate the image data */
if (sp->RasterBits != NULL)
free((char *)sp->RasterBits);
/* Deallocate any extensions */
GifFreeExtensions(&sp->ExtensionBlockCount, &sp->ExtensionBlocks);
/*** FIXME: We could realloc the GifFile->SavedImages structure but is
* there a point to it? Saves some memory but we'd have to do it every
* time. If this is used in GifFreeSavedImages then it would be inefficient
* (The whole array is going to be deallocated.) If we just use it when
* we want to free the last Image it's convenient to do it here.
*/
}
/*
* Append an image block to the SavedImages array
*/
SavedImage *
GifMakeSavedImage(GifFileType *GifFile, const SavedImage *CopyFrom)
{
if (GifFile->SavedImages == NULL)
GifFile->SavedImages = (SavedImage *)malloc(sizeof(SavedImage));
else
GifFile->SavedImages = (SavedImage *)realloc(GifFile->SavedImages,
sizeof(SavedImage) * (GifFile->ImageCount + 1));
if (GifFile->SavedImages == NULL)
return ((SavedImage *)NULL);
else {
SavedImage *sp = &GifFile->SavedImages[GifFile->ImageCount++];
memset((char *)sp, '\0', sizeof(SavedImage));
if (CopyFrom != NULL) {
memcpy((char *)sp, CopyFrom, sizeof(SavedImage));
/*
* Make our own allocated copies of the heap fields in the
* copied record. This guards against potential aliasing
* problems.
*/
/* first, the local color map */
if (sp->ImageDesc.ColorMap != NULL) {
sp->ImageDesc.ColorMap = GifMakeMapObject(
CopyFrom->ImageDesc.ColorMap->ColorCount,
CopyFrom->ImageDesc.ColorMap->Colors);
if (sp->ImageDesc.ColorMap == NULL) {
FreeLastSavedImage(GifFile);
return (SavedImage *)(NULL);
}
}
/* next, the raster */
sp->RasterBits = (unsigned char *)malloc(sizeof(GifPixelType) *
CopyFrom->ImageDesc.Height *
CopyFrom->ImageDesc.Width);
if (sp->RasterBits == NULL) {
FreeLastSavedImage(GifFile);
return (SavedImage *)(NULL);
}
memcpy(sp->RasterBits, CopyFrom->RasterBits,
sizeof(GifPixelType) * CopyFrom->ImageDesc.Height *
CopyFrom->ImageDesc.Width);
/* finally, the extension blocks */
if (sp->ExtensionBlocks != NULL) {
sp->ExtensionBlocks = (ExtensionBlock *)malloc(
sizeof(ExtensionBlock) *
CopyFrom->ExtensionBlockCount);
if (sp->ExtensionBlocks == NULL) {
FreeLastSavedImage(GifFile);
return (SavedImage *)(NULL);
}
memcpy(sp->ExtensionBlocks, CopyFrom->ExtensionBlocks,
sizeof(ExtensionBlock) * CopyFrom->ExtensionBlockCount);
}
}
return (sp);
}
}
void
GifFreeSavedImages(GifFileType *GifFile)
{
SavedImage *sp;
if ((GifFile == NULL) || (GifFile->SavedImages == NULL)) {
return;
}
for (sp = GifFile->SavedImages;
sp < GifFile->SavedImages + GifFile->ImageCount; sp++) {
if (sp->ImageDesc.ColorMap != NULL) {
GifFreeMapObject(sp->ImageDesc.ColorMap);
sp->ImageDesc.ColorMap = NULL;
}
if (sp->RasterBits != NULL)
free((char *)sp->RasterBits);
GifFreeExtensions(&sp->ExtensionBlockCount, &sp->ExtensionBlocks);
}
free((char *)GifFile->SavedImages);
GifFile->SavedImages = NULL;
}
/* end */

View File

@ -68,7 +68,7 @@ static void fastBlurMore(int imageWidth, int imageHeight, int imageStride, void
const int r1 = radius + 1;
const int div = radius * 2 + 1;
if (radius > 15 || div >= w || div >= h || w * h > 128 * 128 || imageStride > imageWidth * 4) {
if (radius > 15 || div >= w || div >= h || w * h > 150 * 150 || imageStride > imageWidth * 4) {
return;
}
@ -151,6 +151,9 @@ static void fastBlurMore(int imageWidth, int imageHeight, int imageStride, void
static void fastBlur(int imageWidth, int imageHeight, int imageStride, void *pixels, int radius) {
uint8_t *pix = (uint8_t *)pixels;
if (pix == NULL) {
return;
}
const int w = imageWidth;
const int h = imageHeight;
const int stride = imageStride;
@ -169,7 +172,7 @@ static void fastBlur(int imageWidth, int imageHeight, int imageStride, void *pix
return;
}
if (radius > 15 || div >= w || div >= h || w * h > 128 * 128 || imageStride > imageWidth * 4) {
if (radius > 15 || div >= w || div >= h || w * h > 150 * 150 || imageStride > imageWidth * 4) {
return;
}
@ -265,18 +268,12 @@ METHODDEF(void) my_error_exit(j_common_ptr cinfo) {
longjmp(myerr->setjmp_buffer, 1);
}
JNIEXPORT void Java_org_telegram_messenger_Utilities_blurBitmap(JNIEnv *env, jclass class, jobject bitmap, int radius, int unpin) {
JNIEXPORT void Java_org_telegram_messenger_Utilities_blurBitmap(JNIEnv *env, jclass class, jobject bitmap, int radius, int unpin, int width, int height, int stride) {
if (!bitmap) {
return;
}
AndroidBitmapInfo info;
if (AndroidBitmap_getInfo(env, bitmap, &info) < 0) {
return;
}
if (info.format != ANDROID_BITMAP_FORMAT_RGBA_8888 || !info.width || !info.height || !info.stride) {
if (!width || !height || !stride) {
return;
}
@ -285,9 +282,9 @@ JNIEXPORT void Java_org_telegram_messenger_Utilities_blurBitmap(JNIEnv *env, jcl
return;
}
if (radius <= 3) {
fastBlur(info.width, info.height, info.stride, pixels, radius);
fastBlur(width, height, stride, pixels, radius);
} else {
fastBlurMore(info.width, info.height, info.stride, pixels, radius);
fastBlurMore(width, height, stride, pixels, radius);
}
if (unpin) {
AndroidBitmap_unlockPixels(env, bitmap);
@ -399,10 +396,20 @@ JNIEXPORT void Java_org_telegram_messenger_Utilities_calcCDT(JNIEnv *env, jclass
}
JNIEXPORT int Java_org_telegram_messenger_Utilities_pinBitmap(JNIEnv *env, jclass class, jobject bitmap) {
if (bitmap == NULL) {
return;
}
unsigned char *pixels;
return AndroidBitmap_lockPixels(env, bitmap, &pixels) >= 0 ? 1 : 0;
}
JNIEXPORT int Java_org_telegram_messenger_Utilities_unpinBitmap(JNIEnv *env, jclass class, jobject bitmap) {
if (bitmap == NULL) {
return;
}
AndroidBitmap_unlockPixels(env, bitmap);
}
JNIEXPORT void Java_org_telegram_messenger_Utilities_loadBitmap(JNIEnv *env, jclass class, jstring path, jobject bitmap, int scale, int width, int height, int stride) {
AndroidBitmapInfo info;

View File

@ -7,10 +7,10 @@
#include <openssl/aes.h>
#include "utils.h"
#include "sqlite.h"
#include "gif.h"
#include "image.h"
int registerNativeTgNetFunctions(JavaVM *vm, JNIEnv *env);
int gifvideoOnJNILoad(JavaVM *vm, JNIEnv *env);
jint JNI_OnLoad(JavaVM *vm, void *reserved) {
JNIEnv *env = 0;
@ -28,17 +28,19 @@ jint JNI_OnLoad(JavaVM *vm, void *reserved) {
return -1;
}
if (registerNativeTgNetFunctions(vm, env) != JNI_TRUE) {
if (gifvideoOnJNILoad(vm, env) == -1) {
return -1;
}
gifOnJNILoad(vm, reserved, env);
if (registerNativeTgNetFunctions(vm, env) != JNI_TRUE) {
return -1;
}
return JNI_VERSION_1_6;
}
void JNI_OnUnload(JavaVM *vm, void *reserved) {
gifOnJNIUnload(vm, reserved);
}
JNIEXPORT void Java_org_telegram_messenger_Utilities_aesIgeEncryption(JNIEnv *env, jclass class, jobject buffer, jbyteArray key, jbyteArray iv, jboolean encrypt, int offset, int length) {

View File

@ -18,7 +18,6 @@
#include "libyuv/convert_from.h"
#include "libyuv/convert_from_argb.h"
#include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
#include "libyuv/mjpeg_decoder.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"

View File

@ -22,6 +22,11 @@ extern "C" {
LIBYUV_API
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed);
// Scan an opaque argb image and return fourcc based on alpha offset.
// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
LIBYUV_API
uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height);
// Sum Square Error - used to compute Mean Square Error or PSNR.
LIBYUV_API
uint64 ComputeSumSquareError(const uint8* src_a,

View File

@ -71,6 +71,8 @@ int I400ToI420(const uint8* src_y, int src_stride_y,
uint8* dst_v, int dst_stride_v,
int width, int height);
#define J400ToJ420 I400ToI420
// Convert NV12 to I420.
LIBYUV_API
int NV12ToI420(const uint8* src_y, int src_stride_y,
@ -113,15 +115,6 @@ int M420ToI420(const uint8* src_m420, int src_stride_m420,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert Q420 to I420.
LIBYUV_API
int Q420ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// ARGB little endian (bgra in memory) to I420.
LIBYUV_API
int ARGBToI420(const uint8* src_frame, int src_stride_frame,
@ -211,8 +204,6 @@ int MJPGSize(const uint8* sample, size_t sample_size,
int* width, int* height);
#endif
// Note Bayer formats (BGGR) To I420 are in format_conversion.h
// Convert camera sample to I420 with cropping, rotation and vertical flip.
// "src_size" is needed to parse MJPG.
// "dst_stride_y" number of bytes in a row of the dst_y plane.

View File

@ -18,7 +18,6 @@
#include "libyuv/rotate.h"
// TODO(fbarchard): This set of functions should exactly match convert.h
// Add missing Q420.
// TODO(fbarchard): Add tests. Create random content of right size and convert
// with C vs Opt and or to I420 and compare.
// TODO(fbarchard): Some of these functions lack parameter setting.
@ -61,6 +60,22 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert J444 to ARGB.
LIBYUV_API
int J444ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I444 to ABGR.
LIBYUV_API
int I444ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert I411 to ARGB.
LIBYUV_API
int I411ToARGB(const uint8* src_y, int src_stride_y,
@ -69,20 +84,38 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I400 (grey) to ARGB.
// Convert I420 with Alpha to preattenuated ARGB.
LIBYUV_API
int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
const uint8* src_a, int src_stride_a,
uint8* dst_argb, int dst_stride_argb,
int width, int height, int attenuate);
// Convert I420 with Alpha to preattenuated ABGR.
LIBYUV_API
int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
const uint8* src_a, int src_stride_a,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height, int attenuate);
// Convert I400 (grey) to ARGB. Reverse of ARGBToI400.
LIBYUV_API
int I400ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Alias.
#define YToARGB I400ToARGB_Reference
// Convert I400 to ARGB. Reverse of ARGBToI400.
// Convert J400 (jpeg grey) to ARGB.
LIBYUV_API
int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
int J400ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Alias.
#define YToARGB I400ToARGB
// Convert NV12 to ARGB.
LIBYUV_API
@ -104,13 +137,6 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// TODO(fbarchard): Convert Q420 to ARGB.
// LIBYUV_API
// int Q420ToARGB(const uint8* src_y, int src_stride_y,
// const uint8* src_yuy2, int src_stride_yuy2,
// uint8* dst_argb, int dst_stride_argb,
// int width, int height);
// Convert YUY2 to ARGB.
LIBYUV_API
int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
@ -123,6 +149,70 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert J420 to ARGB.
LIBYUV_API
int J420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert J422 to ARGB.
LIBYUV_API
int J422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert J420 to ABGR.
LIBYUV_API
int J420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert J422 to ABGR.
LIBYUV_API
int J422ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert H420 to ARGB.
LIBYUV_API
int H420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert H422 to ARGB.
LIBYUV_API
int H422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert H420 to ABGR.
LIBYUV_API
int H420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert H422 to ABGR.
LIBYUV_API
int H422ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// BGRA little endian (argb in memory) to ARGB.
LIBYUV_API
int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
@ -184,8 +274,6 @@ int MJPGToARGB(const uint8* sample, size_t sample_size,
int dst_width, int dst_height);
#endif
// Note Bayer formats (BGGR) to ARGB are in format_conversion.h.
// Convert camera sample to ARGB with cropping, rotation and vertical flip.
// "src_size" is needed to parse MJPG.
// "dst_stride_argb" number of bytes in a row of the dst_argb plane.

View File

@ -57,7 +57,6 @@ int I400Copy(const uint8* src_y, int src_stride_y,
int width, int height);
// TODO(fbarchard): I420ToM420
// TODO(fbarchard): I420ToQ420
LIBYUV_API
int I420ToNV12(const uint8* src_y, int src_stride_y,
@ -138,6 +137,17 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
// Convert I420 To RGB565 with 4x4 dither matrix (16 bytes).
// Values in dither matrix from 0 to 7 recommended.
// The order of the dither matrix is first byte is upper left.
LIBYUV_API
int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
const uint8* dither4x4, int width, int height);
LIBYUV_API
int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
@ -152,8 +162,6 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
// Note Bayer formats (BGGR) To I420 are in format_conversion.h.
// Convert I420 to specified format.
// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.

View File

@ -61,6 +61,16 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height);
// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
// Values in dither matrix from 0 to 7 recommended.
// The order of the dither matrix is first byte is upper left.
// TODO(fbarchard): Consider pointer to 2d array for dither4x4.
// const uint8(*dither)[4][4];
LIBYUV_API
int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565,
const uint8* dither4x4, int width, int height);
// Convert ARGB To ARGB1555.
LIBYUV_API
int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
@ -105,6 +115,14 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB to J422.
LIBYUV_API
int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
uint8* dst_yj, int dst_stride_yj,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB To I411.
LIBYUV_API
int ARGBToI411(const uint8* src_argb, int src_stride_argb,
@ -125,6 +143,12 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB)
LIBYUV_API
int ARGBToG(const uint8* src_argb, int src_stride_argb,
uint8* dst_g, int dst_stride_g,
int width, int height);
// Convert ARGB To NV12.
LIBYUV_API
int ARGBToNV12(const uint8* src_argb, int src_stride_argb,

View File

@ -18,9 +18,8 @@ namespace libyuv {
extern "C" {
#endif
// TODO(fbarchard): Consider overlapping bits for different architectures.
// Internal flag to indicate cpuid requires initialization.
#define kCpuInit 0x1
static const int kCpuInitialized = 0x1;
// These flags are only valid on ARM processors.
static const int kCpuHasARM = 0x2;
@ -37,12 +36,12 @@ static const int kCpuHasAVX = 0x200;
static const int kCpuHasAVX2 = 0x400;
static const int kCpuHasERMS = 0x800;
static const int kCpuHasFMA3 = 0x1000;
static const int kCpuHasAVX3 = 0x2000;
// 0x2000, 0x4000, 0x8000 reserved for future X86 flags.
// These flags are only valid on MIPS processors.
static const int kCpuHasMIPS = 0x10000;
static const int kCpuHasMIPS_DSP = 0x20000;
static const int kCpuHasMIPS_DSPR2 = 0x40000;
static const int kCpuHasMIPS_DSPR2 = 0x20000;
// Internal function used to auto-init.
LIBYUV_API
@ -57,7 +56,7 @@ int ArmCpuCaps(const char* cpuinfo_name);
// returns non-zero if instruction set is detected
static __inline int TestCpuFlag(int test_flag) {
LIBYUV_API extern int cpu_info_;
return (cpu_info_ == kCpuInit ? InitCpuFlags() : cpu_info_) & test_flag;
return (!cpu_info_ ? InitCpuFlags() : cpu_info_) & test_flag;
}
// For testing, allow CPU flags to be disabled.

View File

@ -1,168 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_FORMATCONVERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_FORMATCONVERSION_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Convert Bayer RGB formats to I420.
LIBYUV_API
int BayerBGGRToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int BayerGBRGToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int BayerGRBGToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int BayerRGGBToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Temporary API mapper.
#define BayerRGBToI420(b, bs, f, y, ys, u, us, v, vs, w, h) \
BayerToI420(b, bs, y, ys, u, us, v, vs, w, h, f)
LIBYUV_API
int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height,
uint32 src_fourcc_bayer);
// Convert I420 to Bayer RGB formats.
LIBYUV_API
int I420ToBayerBGGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToBayerGBRG(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToBayerGRBG(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToBayerRGGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
// Temporary API mapper.
#define I420ToBayerRGB(y, ys, u, us, v, vs, b, bs, f, w, h) \
I420ToBayer(y, ys, u, us, v, vs, b, bs, w, h, f)
LIBYUV_API
int I420ToBayer(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height,
uint32 dst_fourcc_bayer);
// Convert Bayer RGB formats to ARGB.
LIBYUV_API
int BayerBGGRToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int BayerGBRGToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int BayerGRBGToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int BayerRGGBToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Temporary API mapper.
#define BayerRGBToARGB(b, bs, f, a, as, w, h) BayerToARGB(b, bs, a, as, w, h, f)
LIBYUV_API
int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height,
uint32 src_fourcc_bayer);
// Converts ARGB to Bayer RGB formats.
LIBYUV_API
int ARGBToBayerBGGR(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height);
LIBYUV_API
int ARGBToBayerGBRG(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height);
LIBYUV_API
int ARGBToBayerGRBG(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height);
LIBYUV_API
int ARGBToBayerRGGB(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height);
// Temporary API mapper.
#define ARGBToBayerRGB(a, as, b, bs, f, w, h) ARGBToBayer(b, bs, a, as, w, h, f)
LIBYUV_API
int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height,
uint32 dst_fourcc_bayer);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_FORMATCONVERSION_H_ NOLINT

View File

@ -45,6 +45,7 @@ int I400ToI400(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height);
#define J400ToJ400 I400ToI400
// Copy I422 to I422.
#define I422ToI422 I422Copy
@ -84,6 +85,18 @@ int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_uv, int dst_stride_uv,
int width, int height);
LIBYUV_API
int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_y, int dst_stride_y,
uint8* dst_uv, int dst_stride_uv,
int width, int height);
// Convert I420 to I400. (calls CopyPlane ignoring u/v).
LIBYUV_API
int I420ToI400(const uint8* src_y, int src_stride_y,
@ -93,6 +106,7 @@ int I420ToI400(const uint8* src_y, int src_stride_y,
int width, int height);
// Alias
#define J420ToJ400 I420ToI400
#define I420ToI420Mirror I420Mirror
// I420 mirror.
@ -131,13 +145,6 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height);
// Convert NV21 to RGB565.
LIBYUV_API
int NV21ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height);
// I422ToARGB is in convert_argb.h
// Convert I422 to BGRA.
LIBYUV_API
@ -163,6 +170,14 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
uint8* dst_rgba, int dst_stride_rgba,
int width, int height);
// Alias
#define RGB24ToRAW RAWToRGB24
LIBYUV_API
int RAWToRGB24(const uint8* src_raw, int src_stride_raw,
uint8* dst_rgb24, int dst_stride_rgb24,
int width, int height);
// Draw a rectangle into I420.
LIBYUV_API
int I420Rect(uint8* dst_y, int dst_stride_y,
@ -267,13 +282,13 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Copy ARGB to ARGB.
// Copy Alpha channel of ARGB to alpha of ARGB.
LIBYUV_API
int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Copy ARGB to ARGB.
// Copy Y channel to Alpha of ARGB.
LIBYUV_API
int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
@ -287,6 +302,7 @@ LIBYUV_API
ARGBBlendRow GetARGBBlend();
// Alpha Blend ARGB images and store to destination.
// Source is pre-multiplied by alpha using ARGBAttenuate.
// Alpha of destination is set to 255.
LIBYUV_API
int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
@ -294,6 +310,31 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Alpha Blend plane and store to destination.
// Source is not pre-multiplied by alpha.
LIBYUV_API
int BlendPlane(const uint8* src_y0, int src_stride_y0,
const uint8* src_y1, int src_stride_y1,
const uint8* alpha, int alpha_stride,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Alpha Blend YUV images and store to destination.
// Source is not pre-multiplied by alpha.
// Alpha is full width x height and subsampled to half size to apply to UV.
LIBYUV_API
int I420Blend(const uint8* src_y0, int src_stride_y0,
const uint8* src_u0, int src_stride_u0,
const uint8* src_v0, int src_stride_v0,
const uint8* src_y1, int src_stride_y1,
const uint8* src_u1, int src_stride_u1,
const uint8* src_v1, int src_stride_v1,
const uint8* alpha, int alpha_stride,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
LIBYUV_API
int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
@ -375,36 +416,57 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height, uint32 value);
// Interpolate between two ARGB images using specified amount of interpolation
// Interpolate between two images using specified amount of interpolation
// (0 to 255) and store to destination.
// 'interpolation' is specified as 8 bit fraction where 0 means 100% src_argb0
// and 255 means 1% src_argb0 and 99% src_argb1.
// Internally uses ARGBScale bilinear filtering.
// Caveat: This function will write up to 16 bytes beyond the end of dst_argb.
// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0
// and 255 means 1% src0 and 99% src1.
LIBYUV_API
int InterpolatePlane(const uint8* src0, int src_stride0,
const uint8* src1, int src_stride1,
uint8* dst, int dst_stride,
int width, int height, int interpolation);
// Interpolate between two ARGB images using specified amount of interpolation
// Internally calls InterpolatePlane with width * 4 (bpp).
LIBYUV_API
int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
const uint8* src_argb1, int src_stride_argb1,
uint8* dst_argb, int dst_stride_argb,
int width, int height, int interpolation);
#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
defined(TARGET_IPHONE_SIMULATOR)
// Interpolate between two YUV images using specified amount of interpolation
// Internally calls InterpolatePlane on each plane where the U and V planes
// are half width and half height.
LIBYUV_API
int I420Interpolate(const uint8* src0_y, int src0_stride_y,
const uint8* src0_u, int src0_stride_u,
const uint8* src0_v, int src0_stride_v,
const uint8* src1_y, int src1_stride_y,
const uint8* src1_u, int src1_stride_u,
const uint8* src1_v, int src1_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height, int interpolation);
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__i386__) && !defined(__SSE2__))
#define LIBYUV_DISABLE_X86
#endif
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_ARGBAFFINEROW_SSE2
#endif
// Row functions for copying a pixels from a source with a slope to a row
// Row function for copying pixels from a source with a slope to a row
// of destination. Useful for scaling, rotation, mirror, texture mapping.
LIBYUV_API
void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width);
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
LIBYUV_API
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width);
#define HAS_ARGBAFFINEROW_SSE2
#endif // LIBYUV_DISABLE_X86
// Shuffle ARGB channel order. e.g. BGRA to ARGB.
// shuffler is 16 bytes and must be aligned.

File diff suppressed because it is too large Load Diff

View File

@ -34,6 +34,7 @@ void ScalePlane(const uint8* src, int src_stride,
int dst_width, int dst_height,
enum FilterMode filtering);
LIBYUV_API
void ScalePlane_16(const uint16* src, int src_stride,
int src_width, int src_height,
uint16* dst, int dst_stride,

View File

@ -35,7 +35,6 @@ int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering);
// TODO(fbarchard): Implement this.
// Scale with YUV conversion to ARGB and clipping.
LIBYUV_API
int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,

View File

@ -12,53 +12,79 @@
#define INCLUDE_LIBYUV_SCALE_ROW_H_
#include "libyuv/basic_types.h"
#include "libyuv/scale.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
defined(TARGET_IPHONE_SIMULATOR)
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__i386__) && !defined(__SSE2__))
#define LIBYUV_DISABLE_X86
#endif
// GCC >= 4.7.0 required for AVX2.
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
#define GCC_HAS_AVX2 1
#endif // GNUC >= 4.7
#endif // __GNUC__
// clang >= 3.4.0 required for AVX2.
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
#define CLANG_HAS_AVX2 1
#endif // clang >= 3.4
#endif // __clang__
// Visual C 2012 required for AVX2.
#if defined(_M_IX86) && !defined(__clang__) && \
defined(_MSC_VER) && _MSC_VER >= 1700
#define VISUALC_HAS_AVX2 1
#endif // VisualStudio >= 2012
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_SCALEROWDOWN2_SSE2
#define HAS_SCALEROWDOWN4_SSE2
#define HAS_SCALEROWDOWN34_SSSE3
#define HAS_SCALEROWDOWN38_SSSE3
#define HAS_SCALEADDROWS_SSE2
#define HAS_SCALEFILTERCOLS_SSSE3
#define HAS_SCALECOLSUP2_SSE2
#define HAS_FIXEDDIV1_X86
#define HAS_FIXEDDIV_X86
#define HAS_SCALEARGBCOLS_SSE2
#define HAS_SCALEARGBCOLSUP2_SSE2
#define HAS_SCALEARGBFILTERCOLS_SSSE3
#define HAS_SCALEARGBROWDOWN2_SSE2
#define HAS_SCALEARGBROWDOWNEVEN_SSE2
#define HAS_SCALEARGBCOLS_SSE2
#define HAS_SCALEARGBFILTERCOLS_SSSE3
#define HAS_SCALEARGBCOLSUP2_SSE2
#define HAS_FIXEDDIV_X86
#define HAS_FIXEDDIV1_X86
#define HAS_SCALECOLSUP2_SSE2
#define HAS_SCALEFILTERCOLS_SSSE3
#define HAS_SCALEROWDOWN2_SSSE3
#define HAS_SCALEROWDOWN34_SSSE3
#define HAS_SCALEROWDOWN38_SSSE3
#define HAS_SCALEROWDOWN4_SSSE3
#define HAS_SCALEADDROW_SSE2
#endif
// The following are available on all x86 platforms, but
// require VS2012, clang 3.4 or gcc 4.7.
// The code supports NaCL but requires a new compiler and validator.
#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
#define HAS_SCALEADDROW_AVX2
#define HAS_SCALEROWDOWN2_AVX2
#define HAS_SCALEROWDOWN4_AVX2
#endif
// The following are available on Neon platforms:
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON))
#define HAS_SCALEROWDOWN2_NEON
#define HAS_SCALEROWDOWN4_NEON
#define HAS_SCALEROWDOWN34_NEON
#define HAS_SCALEROWDOWN38_NEON
#define HAS_SCALEARGBROWDOWNEVEN_NEON
#define HAS_SCALEARGBROWDOWN2_NEON
#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
(defined(__aarch64__) || defined(LIBYUV_NEON))
#define HAS_SCALEROWDOWN2_NEON
#define HAS_SCALEROWDOWN4_NEON
#define HAS_SCALEROWDOWN34_NEON
#define HAS_SCALEROWDOWN38_NEON
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SCALEARGBCOLS_NEON
#define HAS_SCALEARGBROWDOWN2_NEON
#define HAS_SCALEARGBROWDOWNEVEN_NEON
#define HAS_SCALEFILTERCOLS_NEON
#define HAS_SCALEROWDOWN2_NEON
#define HAS_SCALEROWDOWN34_NEON
#define HAS_SCALEROWDOWN38_NEON
#define HAS_SCALEROWDOWN4_NEON
#define HAS_SCALEARGBFILTERCOLS_NEON
#endif
// The following are available on Mips platforms:
@ -172,10 +198,8 @@ void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int dst_width);
void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height);
void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint32* dst_ptr, int src_width, int src_height);
void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width);
void ScaleARGBRowDown2_C(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
@ -202,25 +226,28 @@ void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
// Specialized scalers for x86.
void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
@ -237,46 +264,124 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width,
int src_height);
void ScaleRowDown2_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Linear_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_1_Box_Any_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_0_Box_Any_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_3_Box_Any_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_2_Box_Any_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleAddRow_Any_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleAddRow_Any_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
// ARGB Column functions
void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
// Row functions.
void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBFilterCols_Any_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBCols_Any_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
// ARGB Row functions
void ScaleARGBRowDown2_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleARGBRowDown2_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Linear_Any_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Box_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleARGBRowDown2Linear_Any_NEON(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx, uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleARGBRowDownEven_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEven_Any_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_Any_NEON(const uint8* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
// ScaleRowDown2Box also used by planar functions
// NEON downscalers with interpolation.
@ -284,7 +389,8 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
// Note - not static due to reuse in convert for 444 to 420.
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
@ -319,6 +425,42 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown4_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_0_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_1_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
// 32 -> 12
void ScaleRowDown38_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
// 32x3 -> 12x1
void ScaleRowDown38_3_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
// 32x2 -> 12x1
void ScaleRowDown38_2_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleAddRow_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleAddRow_Any_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
void ScaleFilterCols_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1074
#define LIBYUV_VERSION 1561
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT

View File

@ -62,7 +62,7 @@ enum FourCC {
// 2 Secondary YUV formats: row biplanar.
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated.
// 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
@ -75,7 +75,7 @@ enum FourCC {
FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE.
FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE.
// 4 Secondary RGB formats: 4 Bayer Patterns.
// 4 Secondary RGB formats: 4 Bayer Patterns. deprecated.
FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
@ -90,7 +90,8 @@ enum FourCC {
FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
FOURCC_J420 = FOURCC('J', '4', '2', '0'),
FOURCC_J400 = FOURCC('J', '4', '0', '0'),
FOURCC_J400 = FOURCC('J', '4', '0', '0'), // unofficial fourcc
FOURCC_H420 = FOURCC('H', '4', '2', '0'), // unofficial fourcc
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
@ -150,6 +151,7 @@ enum FourCCBpp {
FOURCC_BPP_YU12 = 12,
FOURCC_BPP_J420 = 12,
FOURCC_BPP_J400 = 8,
FOURCC_BPP_H420 = 12,
FOURCC_BPP_MJPG = 0, // 0 means unknown.
FOURCC_BPP_H264 = 0,
FOURCC_BPP_IYUV = 12,

View File

@ -17,38 +17,23 @@
#endif
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/cpu_id.h"
#include "libyuv/row.h"
#include "libyuv/video_common.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// hash seed of 5381 recommended.
// Internal C version of HashDjb2 with int sized count for efficiency.
uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
// This module is for Visual C x86
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || \
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
#define HAS_HASHDJB2_SSE41
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
#if _MSC_VER >= 1700
#define HAS_HASHDJB2_AVX2
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
#endif
#endif // HAS_HASHDJB2_SSE41
// hash seed of 5381 recommended.
LIBYUV_API
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
const int kBlockSize = 1 << 15; // 32768;
int remainder;
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) =
HashDjb2_C;
#if defined(HAS_HASHDJB2_SSE41)
if (TestCpuFlag(kCpuHasSSE41)) {
HashDjb2_SSE = HashDjb2_SSE41;
@ -78,22 +63,53 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
return seed;
}
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SUMSQUAREERROR_NEON
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
#endif
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_SUMSQUAREERROR_SSE2
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
#endif
// Visual C 2012 required for AVX2.
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && _MSC_VER >= 1700
#define HAS_SUMSQUAREERROR_AVX2
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
#endif
static uint32 ARGBDetectRow_C(const uint8* argb, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB.
return FOURCC_BGRA;
}
if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA.
return FOURCC_ARGB;
}
if (argb[4] != 255) { // Second pixel first byte is not Alpha of 255.
return FOURCC_BGRA;
}
if (argb[7] != 255) { // Second pixel 4th byte is not Alpha of 255.
return FOURCC_ARGB;
}
argb += 8;
}
if (width & 1) {
if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB.
return FOURCC_BGRA;
}
if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA.
return FOURCC_ARGB;
}
}
return 0;
}
// Scan an opaque argb image and return fourcc based on alpha offset.
// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
LIBYUV_API
uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) {
uint32 fourcc = 0;
int h;
// Coalesce rows.
if (stride_argb == width * 4) {
width *= height;
height = 1;
stride_argb = 0;
}
for (h = 0; h < height && fourcc == 0; ++h) {
fourcc = ARGBDetectRow_C(argb, width);
argb += stride_argb;
}
return fourcc;
}
// TODO(fbarchard): Refactor into row function.
LIBYUV_API
@ -114,8 +130,7 @@ uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
}
#endif
#if defined(HAS_SUMSQUAREERROR_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) {
if (TestCpuFlag(kCpuHasSSE2)) {
// Note only used for multiples of 16 so count is not checked.
SumSquareError = SumSquareError_SSE2;
}

View File

@ -10,6 +10,8 @@
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {

View File

@ -9,6 +9,8 @@
*/
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
@ -16,7 +18,8 @@ namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
!defined(__aarch64__)
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
volatile uint32 sse;
@ -26,7 +29,6 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
"vmov.u8 q9, #0 \n"
"vmov.u8 q11, #0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n"
@ -56,46 +58,7 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
return sse;
}
#elif !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
volatile uint32 sse;
asm volatile (
"eor v16.16b, v16.16b, v16.16b \n"
"eor v18.16b, v18.16b, v18.16b \n"
"eor v17.16b, v17.16b, v17.16b \n"
"eor v19.16b, v19.16b, v19.16b \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b}, [%0], #16 \n"
MEMACCESS(1)
"ld1 {v1.16b}, [%1], #16 \n"
"subs %2, %2, #16 \n"
"usubl v2.8h, v0.8b, v1.8b \n"
"usubl2 v3.8h, v0.16b, v1.16b \n"
"smlal v16.4s, v2.4h, v2.4h \n"
"smlal v17.4s, v3.4h, v3.4h \n"
"smlal2 v18.4s, v2.8h, v2.8h \n"
"smlal2 v19.4s, v3.8h, v3.8h \n"
"bgt 1b \n"
"add v16.4s, v16.4s, v17.4s \n"
"add v18.4s, v18.4s, v19.4s \n"
"add v19.4s, v16.4s, v18.4s \n"
"addv s0, v19.4s \n"
"fmov %w3, s0 \n"
: "+r"(src_a),
"+r"(src_b),
"+r"(count),
"=r"(sse)
:
: "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
return sse;
}
#endif // __ARM_NEON__
#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
#ifdef __cplusplus
} // extern "C"

View File

@ -1,158 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
uint32 sse;
asm volatile ( // NOLINT
"pxor %%xmm0,%%xmm0 \n"
"pxor %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
"movdqa " MEMACCESS(0) ",%%xmm1 \n"
"lea " MEMLEA(0x10, 0) ",%0 \n"
"movdqa " MEMACCESS(1) ",%%xmm2 \n"
"lea " MEMLEA(0x10, 1) ",%1 \n"
"sub $0x10,%2 \n"
"movdqa %%xmm1,%%xmm3 \n"
"psubusb %%xmm2,%%xmm1 \n"
"psubusb %%xmm3,%%xmm2 \n"
"por %%xmm2,%%xmm1 \n"
"movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm5,%%xmm1 \n"
"punpckhbw %%xmm5,%%xmm2 \n"
"pmaddwd %%xmm1,%%xmm1 \n"
"pmaddwd %%xmm2,%%xmm2 \n"
"paddd %%xmm1,%%xmm0 \n"
"paddd %%xmm2,%%xmm0 \n"
"jg 1b \n"
"pshufd $0xee,%%xmm0,%%xmm1 \n"
"paddd %%xmm1,%%xmm0 \n"
"pshufd $0x1,%%xmm0,%%xmm1 \n"
"paddd %%xmm1,%%xmm0 \n"
"movd %%xmm0,%3 \n"
: "+r"(src_a), // %0
"+r"(src_b), // %1
"+r"(count), // %2
"=g"(sse) // %3
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif
); // NOLINT
return sse;
}
#endif // defined(__x86_64__) || defined(__i386__)
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
#define HAS_HASHDJB2_SSE41
static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
static uvec32 kHashMul0 = {
0x0c3525e1, // 33 ^ 15
0xa3476dc1, // 33 ^ 14
0x3b4039a1, // 33 ^ 13
0x4f5f0981, // 33 ^ 12
};
static uvec32 kHashMul1 = {
0x30f35d61, // 33 ^ 11
0x855cb541, // 33 ^ 10
0x040a9121, // 33 ^ 9
0x747c7101, // 33 ^ 8
};
static uvec32 kHashMul2 = {
0xec41d4e1, // 33 ^ 7
0x4cfa3cc1, // 33 ^ 6
0x025528a1, // 33 ^ 5
0x00121881, // 33 ^ 4
};
static uvec32 kHashMul3 = {
0x00008c61, // 33 ^ 3
0x00000441, // 33 ^ 2
0x00000021, // 33 ^ 1
0x00000001, // 33 ^ 0
};
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
uint32 hash;
asm volatile ( // NOLINT
"movd %2,%%xmm0 \n"
"pxor %%xmm7,%%xmm7 \n"
"movdqa %4,%%xmm6 \n"
LABELALIGN
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm1 \n"
"lea " MEMLEA(0x10, 0) ",%0 \n"
"pmulld %%xmm6,%%xmm0 \n"
"movdqa %5,%%xmm5 \n"
"movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm7,%%xmm2 \n"
"movdqa %%xmm2,%%xmm3 \n"
"punpcklwd %%xmm7,%%xmm3 \n"
"pmulld %%xmm5,%%xmm3 \n"
"movdqa %6,%%xmm5 \n"
"movdqa %%xmm2,%%xmm4 \n"
"punpckhwd %%xmm7,%%xmm4 \n"
"pmulld %%xmm5,%%xmm4 \n"
"movdqa %7,%%xmm5 \n"
"punpckhbw %%xmm7,%%xmm1 \n"
"movdqa %%xmm1,%%xmm2 \n"
"punpcklwd %%xmm7,%%xmm2 \n"
"pmulld %%xmm5,%%xmm2 \n"
"movdqa %8,%%xmm5 \n"
"punpckhwd %%xmm7,%%xmm1 \n"
"pmulld %%xmm5,%%xmm1 \n"
"paddd %%xmm4,%%xmm3 \n"
"paddd %%xmm2,%%xmm1 \n"
"sub $0x10,%1 \n"
"paddd %%xmm3,%%xmm1 \n"
"pshufd $0xe,%%xmm1,%%xmm2 \n"
"paddd %%xmm2,%%xmm1 \n"
"pshufd $0x1,%%xmm1,%%xmm2 \n"
"paddd %%xmm2,%%xmm1 \n"
"paddd %%xmm1,%%xmm0 \n"
"jg 1b \n"
"movd %%xmm0,%3 \n"
: "+r"(src), // %0
"+r"(count), // %1
"+rm"(seed), // %2
"=g"(hash) // %3
: "m"(kHash16x33), // %4
"m"(kHashMul0), // %5
"m"(kHashMul1), // %6
"m"(kHashMul2), // %7
"m"(kHashMul3) // %8
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
#endif
); // NOLINT
return hash;
}
#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -9,6 +9,8 @@
*/
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
@ -16,9 +18,10 @@ namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
// This module is for 32 bit Visual C x86 and clangcl
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
__declspec(naked) __declspec(align(16))
__declspec(naked)
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
@ -27,13 +30,11 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
pxor xmm0, xmm0
pxor xmm5, xmm5
align 4
wloop:
movdqa xmm1, [eax]
movdqu xmm1, [eax]
lea eax, [eax + 16]
movdqa xmm2, [edx]
movdqu xmm2, [edx]
lea edx, [edx + 16]
sub ecx, 16
movdqa xmm3, xmm1 // abs trick
psubusb xmm1, xmm2
psubusb xmm2, xmm3
@ -45,6 +46,7 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
pmaddwd xmm2, xmm2
paddd xmm0, xmm1
paddd xmm0, xmm2
sub ecx, 16
jg wloop
pshufd xmm1, xmm0, 0xee
@ -60,7 +62,7 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
#if _MSC_VER >= 1700
// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
#pragma warning(disable: 4752)
__declspec(naked) __declspec(align(16))
__declspec(naked)
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
@ -70,12 +72,10 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
vpxor ymm5, ymm5, ymm5 // constant 0 for unpck
sub edx, eax
align 4
wloop:
vmovdqu ymm1, [eax]
vmovdqu ymm2, [eax + edx]
lea eax, [eax + 32]
sub ecx, 32
vpsubusb ymm3, ymm1, ymm2 // abs difference trick
vpsubusb ymm2, ymm2, ymm1
vpor ymm1, ymm2, ymm3
@ -85,6 +85,7 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
vpmaddwd ymm1, ymm1, ymm1
vpaddd ymm0, ymm0, ymm1
vpaddd ymm0, ymm0, ymm2
sub ecx, 32
jg wloop
vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes.
@ -100,42 +101,33 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
}
#endif // _MSC_VER >= 1700
#define HAS_HASHDJB2_SSE41
static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
static uvec32 kHashMul0 = {
uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
uvec32 kHashMul0 = {
0x0c3525e1, // 33 ^ 15
0xa3476dc1, // 33 ^ 14
0x3b4039a1, // 33 ^ 13
0x4f5f0981, // 33 ^ 12
};
static uvec32 kHashMul1 = {
uvec32 kHashMul1 = {
0x30f35d61, // 33 ^ 11
0x855cb541, // 33 ^ 10
0x040a9121, // 33 ^ 9
0x747c7101, // 33 ^ 8
};
static uvec32 kHashMul2 = {
uvec32 kHashMul2 = {
0xec41d4e1, // 33 ^ 7
0x4cfa3cc1, // 33 ^ 6
0x025528a1, // 33 ^ 5
0x00121881, // 33 ^ 4
};
static uvec32 kHashMul3 = {
uvec32 kHashMul3 = {
0x00008c61, // 33 ^ 3
0x00000441, // 33 ^ 2
0x00000021, // 33 ^ 1
0x00000001, // 33 ^ 0
};
// 27: 66 0F 38 40 C6 pmulld xmm0,xmm6
// 44: 66 0F 38 40 DD pmulld xmm3,xmm5
// 59: 66 0F 38 40 E5 pmulld xmm4,xmm5
// 72: 66 0F 38 40 D5 pmulld xmm2,xmm5
// 83: 66 0F 38 40 CD pmulld xmm1,xmm5
#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \
_asm _emit 0x40 _asm _emit reg
__declspec(naked) __declspec(align(16))
__declspec(naked)
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
__asm {
mov eax, [esp + 4] // src
@ -143,34 +135,32 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
movd xmm0, [esp + 12] // seed
pxor xmm7, xmm7 // constant 0 for unpck
movdqa xmm6, kHash16x33
movdqa xmm6, xmmword ptr kHash16x33
align 4
wloop:
movdqu xmm1, [eax] // src[0-15]
lea eax, [eax + 16]
pmulld(0xc6) // pmulld xmm0,xmm6 hash *= 33 ^ 16
movdqa xmm5, kHashMul0
pmulld xmm0, xmm6 // hash *= 33 ^ 16
movdqa xmm5, xmmword ptr kHashMul0
movdqa xmm2, xmm1
punpcklbw xmm2, xmm7 // src[0-7]
movdqa xmm3, xmm2
punpcklwd xmm3, xmm7 // src[0-3]
pmulld(0xdd) // pmulld xmm3, xmm5
movdqa xmm5, kHashMul1
pmulld xmm3, xmm5
movdqa xmm5, xmmword ptr kHashMul1
movdqa xmm4, xmm2
punpckhwd xmm4, xmm7 // src[4-7]
pmulld(0xe5) // pmulld xmm4, xmm5
movdqa xmm5, kHashMul2
pmulld xmm4, xmm5
movdqa xmm5, xmmword ptr kHashMul2
punpckhbw xmm1, xmm7 // src[8-15]
movdqa xmm2, xmm1
punpcklwd xmm2, xmm7 // src[8-11]
pmulld(0xd5) // pmulld xmm2, xmm5
movdqa xmm5, kHashMul3
pmulld xmm2, xmm5
movdqa xmm5, xmmword ptr kHashMul3
punpckhwd xmm1, xmm7 // src[12-15]
pmulld(0xcd) // pmulld xmm1, xmm5
pmulld xmm1, xmm5
paddd xmm3, xmm4 // add 16 results
paddd xmm1, xmm2
sub ecx, 16
paddd xmm1, xmm3
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
@ -178,6 +168,7 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
pshufd xmm2, xmm1, 0x01
paddd xmm1, xmm2
paddd xmm0, xmm1
sub ecx, 16
jg wloop
movd eax, xmm0 // return hash
@ -187,44 +178,43 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
// Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700
__declspec(naked) __declspec(align(16))
__declspec(naked)
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
__asm {
mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count
movd xmm0, [esp + 12] // seed
movdqa xmm6, kHash16x33
vmovd xmm0, [esp + 12] // seed
align 4
wloop:
vpmovzxbd xmm3, dword ptr [eax] // src[0-3]
pmulld xmm0, xmm6 // hash *= 33 ^ 16
vpmovzxbd xmm4, dword ptr [eax + 4] // src[4-7]
pmulld xmm3, kHashMul0
vpmovzxbd xmm2, dword ptr [eax + 8] // src[8-11]
pmulld xmm4, kHashMul1
vpmovzxbd xmm1, dword ptr [eax + 12] // src[12-15]
pmulld xmm2, kHashMul2
vpmovzxbd xmm3, [eax] // src[0-3]
vpmulld xmm0, xmm0, xmmword ptr kHash16x33 // hash *= 33 ^ 16
vpmovzxbd xmm4, [eax + 4] // src[4-7]
vpmulld xmm3, xmm3, xmmword ptr kHashMul0
vpmovzxbd xmm2, [eax + 8] // src[8-11]
vpmulld xmm4, xmm4, xmmword ptr kHashMul1
vpmovzxbd xmm1, [eax + 12] // src[12-15]
vpmulld xmm2, xmm2, xmmword ptr kHashMul2
lea eax, [eax + 16]
pmulld xmm1, kHashMul3
paddd xmm3, xmm4 // add 16 results
paddd xmm1, xmm2
vpmulld xmm1, xmm1, xmmword ptr kHashMul3
vpaddd xmm3, xmm3, xmm4 // add 16 results
vpaddd xmm1, xmm1, xmm2
vpaddd xmm1, xmm1, xmm3
vpshufd xmm2, xmm1, 0x0e // upper 2 dwords
vpaddd xmm1, xmm1,xmm2
vpshufd xmm2, xmm1, 0x01
vpaddd xmm1, xmm1, xmm2
vpaddd xmm0, xmm0, xmm1
sub ecx, 16
paddd xmm1, xmm3
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
paddd xmm1, xmm2
pshufd xmm2, xmm1, 0x01
paddd xmm1, xmm2
paddd xmm0, xmm1
jg wloop
movd eax, xmm0 // return hash
vmovd eax, xmm0 // return hash
vzeroupper
ret
}
}
#endif // _MSC_VER >= 1700
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
#ifdef __cplusplus
} // extern "C"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -13,7 +13,6 @@
#include "libyuv/basic_types.h"
#include "libyuv/convert.h" // For I420Copy
#include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#include "libyuv/scale.h" // For ScalePlane()
@ -174,14 +173,15 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y,
src_stride_y = src_stride_u = src_stride_v = dst_stride_yuy2 = 0;
}
#if defined(HAS_I422TOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
if (TestCpuFlag(kCpuHasSSE2)) {
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_SSE2;
}
}
#elif defined(HAS_I422TOYUY2ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
#endif
#if defined(HAS_I422TOYUY2ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_NEON;
@ -220,14 +220,15 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y,
dst_stride_yuy2 = -dst_stride_yuy2;
}
#if defined(HAS_I422TOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
if (TestCpuFlag(kCpuHasSSE2)) {
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_SSE2;
}
}
#elif defined(HAS_I422TOYUY2ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
#endif
#if defined(HAS_I422TOYUY2ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_NEON;
@ -280,14 +281,15 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y,
src_stride_y = src_stride_u = src_stride_v = dst_stride_uyvy = 0;
}
#if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
if (TestCpuFlag(kCpuHasSSE2)) {
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_SSE2;
}
}
#elif defined(HAS_I422TOUYVYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
#endif
#if defined(HAS_I422TOUYVYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_NEON;
@ -326,14 +328,15 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y,
dst_stride_uyvy = -dst_stride_uyvy;
}
#if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
if (TestCpuFlag(kCpuHasSSE2)) {
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_SSE2;
}
}
#elif defined(HAS_I422TOUYVYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
#endif
#if defined(HAS_I422TOUYVYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_NEON;
@ -397,20 +400,15 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
src_stride_u = src_stride_v = dst_stride_uv = 0;
}
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
if (IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUVRow_ = MergeUVRow_SSE2;
}
MergeUVRow_ = MergeUVRow_SSE2;
}
}
#endif
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
MergeUVRow_ = MergeUVRow_AVX2;
@ -418,7 +416,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
}
#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_NEON;
@ -452,185 +450,67 @@ int I420ToNV21(const uint8* src_y, int src_stride_y,
width, height);
}
// Convert I420 to ARGB.
LIBYUV_API
int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Convert I422 to RGBA with matrix
static int I420ToRGBAMatrix(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_rgba, int dst_stride_rgba,
const struct YuvConstants* yuvconstants,
int width, int height) {
int y;
void (*I422ToARGBRow)(const uint8* y_buf,
void (*I422ToRGBARow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToARGBRow_C;
if (!src_y || !src_u || !src_v || !dst_argb ||
const struct YuvConstants* yuvconstants,
int width) = I422ToRGBARow_C;
if (!src_y || !src_u || !src_v || !dst_rgba ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
dst_stride_rgba = -dst_stride_rgba;
}
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
#if defined(HAS_I422TORGBAROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
I422ToRGBARow = I422ToRGBARow_SSSE3;
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
#if defined(HAS_I422TORGBAROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToRGBARow = I422ToRGBARow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2;
I422ToRGBARow = I422ToRGBARow_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
#if defined(HAS_I422TORGBAROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToRGBARow = I422ToRGBARow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON;
I422ToRGBARow = I422ToRGBARow_NEON;
}
}
#endif
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
#if defined(HAS_I422TORGBAROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) {
I422ToRGBARow = I422ToRGBARow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to BGRA.
LIBYUV_API
int I420ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_bgra, int dst_stride_bgra,
int width, int height) {
int y;
void (*I422ToBGRARow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToBGRARow_C;
if (!src_y || !src_u || !src_v || !dst_bgra ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
dst_stride_bgra = -dst_stride_bgra;
}
#if defined(HAS_I422TOBGRAROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
I422ToBGRARow = I422ToBGRARow_SSSE3;
}
}
}
#elif defined(HAS_I422TOBGRAROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToBGRARow = I422ToBGRARow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToBGRARow = I422ToBGRARow_NEON;
}
}
#elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
dst_bgra += dst_stride_bgra;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to ABGR.
LIBYUV_API
int I420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height) {
int y;
void (*I422ToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToABGRRow_C;
if (!src_y || !src_u || !src_v || !dst_abgr ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
dst_stride_abgr = -dst_stride_abgr;
}
#if defined(HAS_I422TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
I422ToABGRRow = I422ToABGRRow_SSSE3;
}
}
}
#elif defined(HAS_I422TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToABGRRow = I422ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToABGRRow = I422ToABGRRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
dst_abgr += dst_stride_abgr;
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
dst_rgba += dst_stride_rgba;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
@ -647,44 +527,81 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y,
const uint8* src_v, int src_stride_v,
uint8* dst_rgba, int dst_stride_rgba,
int width, int height) {
return I420ToRGBAMatrix(src_y, src_stride_y,
src_u, src_stride_u,
src_v, src_stride_v,
dst_rgba, dst_stride_rgba,
&kYuvI601Constants,
width, height);
}
// Convert I420 to BGRA.
LIBYUV_API
int I420ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_bgra, int dst_stride_bgra,
int width, int height) {
return I420ToRGBAMatrix(src_y, src_stride_y,
src_v, src_stride_v, // Swap U and V
src_u, src_stride_u,
dst_bgra, dst_stride_bgra,
&kYvuI601Constants, // Use Yvu matrix
width, height);
}
// Convert I420 to RGB24 with matrix
static int I420ToRGB24Matrix(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_rgb24, int dst_stride_rgb24,
const struct YuvConstants* yuvconstants,
int width, int height) {
int y;
void (*I422ToRGBARow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToRGBARow_C;
if (!src_y || !src_u || !src_v || !dst_rgba ||
void (*I422ToRGB24Row)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) = I422ToRGB24Row_C;
if (!src_y || !src_u || !src_v || !dst_rgb24 ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
dst_stride_rgba = -dst_stride_rgba;
dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
dst_stride_rgb24 = -dst_stride_rgb24;
}
#if defined(HAS_I422TORGBAROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
#if defined(HAS_I422TORGB24ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
I422ToRGBARow = I422ToRGBARow_SSSE3;
}
I422ToRGB24Row = I422ToRGB24Row_SSSE3;
}
}
#elif defined(HAS_I422TORGBAROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToRGBARow = I422ToRGBARow_Any_NEON;
#endif
#if defined(HAS_I422TORGB24ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToRGB24Row = I422ToRGB24Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToRGB24Row = I422ToRGB24Row_AVX2;
}
}
#endif
#if defined(HAS_I422TORGB24ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToRGBARow = I422ToRGBARow_NEON;
I422ToRGB24Row = I422ToRGB24Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
dst_rgba += dst_stride_rgba;
I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width);
dst_rgb24 += dst_stride_rgb24;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
@ -701,99 +618,27 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* src_v, int src_stride_v,
uint8* dst_rgb24, int dst_stride_rgb24,
int width, int height) {
int y;
void (*I422ToRGB24Row)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToRGB24Row_C;
if (!src_y || !src_u || !src_v || !dst_rgb24 ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
dst_stride_rgb24 = -dst_stride_rgb24;
}
#if defined(HAS_I422TORGB24ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToRGB24Row = I422ToRGB24Row_SSSE3;
}
}
#elif defined(HAS_I422TORGB24ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToRGB24Row = I422ToRGB24Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, width);
dst_rgb24 += dst_stride_rgb24;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
return I420ToRGB24Matrix(src_y, src_stride_y,
src_u, src_stride_u,
src_v, src_stride_v,
dst_rgb24, dst_stride_rgb24,
&kYuvI601Constants,
width, height);
}
// Convert I420 to RAW.
LIBYUV_API
int I420ToRAW(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_raw, int dst_stride_raw,
int width, int height) {
int y;
void (*I422ToRAWRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToRAWRow_C;
if (!src_y || !src_u || !src_v || !dst_raw ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_raw = dst_raw + (height - 1) * dst_stride_raw;
dst_stride_raw = -dst_stride_raw;
}
#if defined(HAS_I422TORAWROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToRAWRow = I422ToRAWRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToRAWRow = I422ToRAWRow_SSSE3;
}
}
#elif defined(HAS_I422TORAWROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToRAWRow = I422ToRAWRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToRAWRow = I422ToRAWRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
I422ToRAWRow(src_y, src_u, src_v, dst_raw, width);
dst_raw += dst_stride_raw;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_raw, int dst_stride_raw,
int width, int height) {
return I420ToRGB24Matrix(src_y, src_stride_y,
src_v, src_stride_v, // Swap U and V
src_u, src_stride_u,
dst_raw, dst_stride_raw,
&kYvuI601Constants, // Use Yvu matrix
width, height);
}
// Convert I420 to ARGB1555.
@ -808,6 +653,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) = I422ToARGB1555Row_C;
if (!src_y || !src_u || !src_v || !dst_argb1555 ||
width <= 0 || height == 0) {
@ -820,14 +666,23 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
dst_stride_argb1555 = -dst_stride_argb1555;
}
#if defined(HAS_I422TOARGB1555ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToARGB1555Row = I422ToARGB1555Row_SSSE3;
}
}
#elif defined(HAS_I422TOARGB1555ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
#endif
#if defined(HAS_I422TOARGB1555ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToARGB1555Row = I422ToARGB1555Row_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGB1555ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToARGB1555Row = I422ToARGB1555Row_NEON;
@ -836,7 +691,8 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, width);
I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, &kYuvI601Constants,
width);
dst_argb1555 += dst_stride_argb1555;
src_y += src_stride_y;
if (y & 1) {
@ -860,6 +716,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) = I422ToARGB4444Row_C;
if (!src_y || !src_u || !src_v || !dst_argb4444 ||
width <= 0 || height == 0) {
@ -872,14 +729,23 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
dst_stride_argb4444 = -dst_stride_argb4444;
}
#if defined(HAS_I422TOARGB4444ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToARGB4444Row = I422ToARGB4444Row_SSSE3;
}
}
#elif defined(HAS_I422TOARGB4444ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
#endif
#if defined(HAS_I422TOARGB4444ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToARGB4444Row = I422ToARGB4444Row_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGB4444ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToARGB4444Row = I422ToARGB4444Row_NEON;
@ -888,7 +754,8 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, width);
I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, &kYuvI601Constants,
width);
dst_argb4444 += dst_stride_argb4444;
src_y += src_stride_y;
if (y & 1) {
@ -911,6 +778,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) = I422ToRGB565Row_C;
if (!src_y || !src_u || !src_v || !dst_rgb565 ||
width <= 0 || height == 0) {
@ -923,14 +791,23 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
dst_stride_rgb565 = -dst_stride_rgb565;
}
#if defined(HAS_I422TORGB565ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToRGB565Row = I422ToRGB565Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToRGB565Row = I422ToRGB565Row_SSSE3;
}
}
#elif defined(HAS_I422TORGB565ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
#endif
#if defined(HAS_I422TORGB565ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToRGB565Row = I422ToRGB565Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToRGB565Row = I422ToRGB565Row_AVX2;
}
}
#endif
#if defined(HAS_I422TORGB565ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToRGB565Row = I422ToRGB565Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToRGB565Row = I422ToRGB565Row_NEON;
@ -939,7 +816,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, width);
I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvI601Constants, width);
dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y;
if (y & 1) {
@ -950,6 +827,118 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
return 0;
}
// Ordered 8x8 dither for 888 to 565. Values from 0 to 7.
static const uint8 kDither565_4x4[16] = {
0, 4, 1, 5,
6, 2, 7, 3,
1, 5, 0, 4,
7, 3, 6, 2,
};
// Convert I420 to RGB565 with dithering.
LIBYUV_API
int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_rgb565, int dst_stride_rgb565,
const uint8* dither4x4, int width, int height) {
int y;
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) = I422ToARGBRow_C;
void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
const uint32 dither4, int width) = ARGBToRGB565DitherRow_C;
if (!src_y || !src_u || !src_v || !dst_rgb565 ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
dst_stride_rgb565 = -dst_stride_rgb565;
}
if (!dither4x4) {
dither4x4 = kDither565_4x4;
}
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SSE2;
}
}
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_NEON;
}
}
#endif
{
// Allocate a row of argb.
align_buffer_64(row_argb, width * 4);
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width);
ARGBToRGB565DitherRow(row_argb, dst_rgb565,
*(uint32*)(dither4x4 + ((y & 3) << 2)), width);
dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
free_aligned_buffer_64(row_argb);
}
return 0;
}
// Convert I420 to specified format
LIBYUV_API
int ConvertFromI420(const uint8* y, int y_stride,
@ -1054,38 +1043,6 @@ int ConvertFromI420(const uint8* y, int y_stride,
dst_sample_stride ? dst_sample_stride : width * 4,
width, height);
break;
case FOURCC_BGGR:
r = I420ToBayerBGGR(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_GBRG:
r = I420ToBayerGBRG(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_GRBG:
r = I420ToBayerGRBG(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_RGGB:
r = I420ToBayerRGGB(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_I400:
r = I400Copy(y, y_stride,
dst_sample,
@ -1116,7 +1073,7 @@ int ConvertFromI420(const uint8* y, int y_stride,
width, height);
break;
}
// TODO(fbarchard): Add M420 and Q420.
// TODO(fbarchard): Add M420.
// Triplanar formats
// TODO(fbarchard): halfstride instead of halfwidth
case FOURCC_I420:

View File

@ -12,7 +12,6 @@
#include "libyuv/basic_types.h"
#include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
#include "libyuv/planar_functions.h"
#include "libyuv/row.h"
@ -29,10 +28,10 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
uint8* dst_v, int dst_stride_v,
int width, int height) {
int y;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) = ARGBToUV444Row_C;
int width) = ARGBToUV444Row_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
@ -51,17 +50,15 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
}
#if defined(HAS_ARGBTOUV444ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUV444Row = ARGBToUV444Row_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUV444Row = ARGBToUV444Row_SSSE3;
}
ARGBToUV444Row = ARGBToUV444Row_SSSE3;
}
}
#elif defined(HAS_ARGBTOUV444ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
#endif
#if defined(HAS_ARGBTOUV444ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUV444Row = ARGBToUV444Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToUV444Row = ARGBToUV444Row_NEON;
@ -69,19 +66,23 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
@ -109,8 +110,8 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
int width, int height) {
int y;
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) = ARGBToUV422Row_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
int width) = ARGBToUV422Row_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
@ -130,37 +131,39 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
}
#if defined(HAS_ARGBTOUV422ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUV422Row = ARGBToUV422Row_SSSE3;
}
ARGBToUV422Row = ARGBToUV422Row_SSSE3;
}
}
#elif defined(HAS_ARGBTOUV422ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
#endif
#if defined(HAS_ARGBTOUV422ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
@ -188,8 +191,8 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
int width, int height) {
int y;
void (*ARGBToUV411Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) = ARGBToUV411Row_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
int width) = ARGBToUV411Row_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
@ -209,19 +212,15 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
}
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
@ -229,7 +228,7 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
@ -237,7 +236,7 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOUV411ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 32) {
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUV411Row = ARGBToUV411Row_Any_NEON;
if (IS_ALIGNED(width, 32)) {
ARGBToUV411Row = ARGBToUV411Row_NEON;
@ -265,7 +264,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
int halfwidth = (width + 1) >> 1;
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUVRow_C;
@ -281,22 +280,27 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
#endif
#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
@ -304,7 +308,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
@ -312,18 +316,15 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUVRow_ = MergeUVRow_SSE2;
}
MergeUVRow_ = MergeUVRow_SSE2;
}
}
#endif
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
MergeUVRow_ = MergeUVRow_AVX2;
@ -331,7 +332,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_NEON;
@ -340,8 +341,8 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
#endif
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
uint8* row_v = row_u + ((halfwidth + 15) & ~15);
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8* row_v = row_u + ((halfwidth + 31) & ~31);
for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
@ -372,7 +373,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
int halfwidth = (width + 1) >> 1;
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUVRow_C;
@ -388,22 +389,27 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
#endif
#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
@ -411,7 +417,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
@ -419,18 +425,15 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUVRow_ = MergeUVRow_SSE2;
}
MergeUVRow_ = MergeUVRow_SSE2;
}
}
#endif
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
MergeUVRow_ = MergeUVRow_AVX2;
@ -438,7 +441,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_NEON;
@ -447,8 +450,8 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
#endif
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
uint8* row_v = row_u + ((halfwidth + 15) & ~15);
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8* row_v = row_u + ((halfwidth + 31) & ~31);
for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
@ -476,8 +479,8 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
int width, int height) {
int y;
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) = ARGBToUV422Row_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
int width) = ARGBToUV422Row_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_yuy2, int width) = I422ToYUY2Row_C;
@ -500,17 +503,15 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_yuy2 = 0;
}
#if defined(HAS_ARGBTOUV422ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUV422Row = ARGBToUV422Row_SSSE3;
}
ARGBToUV422Row = ARGBToUV422Row_SSSE3;
}
}
#elif defined(HAS_ARGBTOUV422ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
#endif
#if defined(HAS_ARGBTOUV422ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_NEON;
@ -518,17 +519,23 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
@ -537,14 +544,15 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
#endif
#if defined(HAS_I422TOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
if (TestCpuFlag(kCpuHasSSE2)) {
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_SSE2;
}
}
#elif defined(HAS_I422TOYUY2ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
#endif
#if defined(HAS_I422TOYUY2ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_NEON;
@ -578,8 +586,8 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
int width, int height) {
int y;
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) = ARGBToUV422Row_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
int width) = ARGBToUV422Row_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_uyvy, int width) = I422ToUYVYRow_C;
@ -602,17 +610,15 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_uyvy = 0;
}
#if defined(HAS_ARGBTOUV422ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUV422Row = ARGBToUV422Row_SSSE3;
}
ARGBToUV422Row = ARGBToUV422Row_SSSE3;
}
}
#elif defined(HAS_ARGBTOUV422ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
#endif
#if defined(HAS_ARGBTOUV422ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_NEON;
@ -620,17 +626,23 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
@ -639,14 +651,15 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
#endif
#if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
if (TestCpuFlag(kCpuHasSSE2)) {
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_SSE2;
}
}
#elif defined(HAS_I422TOUYVYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
#endif
#if defined(HAS_I422TOUYVYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_NEON;
@ -679,7 +692,7 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
int width, int height) {
int y;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
if (!src_argb || !dst_y || width <= 0 || height == 0) {
return -1;
@ -697,19 +710,15 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_y = 0;
}
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
@ -717,7 +726,7 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
@ -755,7 +764,7 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb24, int dst_stride_rgb24,
int width, int height) {
int y;
void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
ARGBToRGB24Row_C;
if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) {
return -1;
@ -773,14 +782,15 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_rgb24 = 0;
}
#if defined(HAS_ARGBTORGB24ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
}
}
#elif defined(HAS_ARGBTORGB24ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
#endif
#if defined(HAS_ARGBTORGB24ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToRGB24Row = ARGBToRGB24Row_NEON;
@ -802,7 +812,7 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
uint8* dst_raw, int dst_stride_raw,
int width, int height) {
int y;
void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) =
void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int width) =
ARGBToRAWRow_C;
if (!src_argb || !dst_raw || width <= 0 || height == 0) {
return -1;
@ -820,14 +830,15 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_raw = 0;
}
#if defined(HAS_ARGBTORAWROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToRAWRow = ARGBToRAWRow_SSSE3;
}
}
#elif defined(HAS_ARGBTORAWROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
#endif
#if defined(HAS_ARGBTORAWROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToRAWRow = ARGBToRAWRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToRAWRow = ARGBToRAWRow_NEON;
@ -843,13 +854,74 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
return 0;
}
// Ordered 8x8 dither for 888 to 565. Values from 0 to 7.
static const uint8 kDither565_4x4[16] = {
0, 4, 1, 5,
6, 2, 7, 3,
1, 5, 0, 4,
7, 3, 6, 2,
};
// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
LIBYUV_API
int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565,
const uint8* dither4x4, int width, int height) {
int y;
void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
const uint32 dither4, int width) = ARGBToRGB565DitherRow_C;
if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
if (!dither4x4) {
dither4x4 = kDither565_4x4;
}
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SSE2;
}
}
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToRGB565DitherRow(src_argb, dst_rgb565,
*(uint32*)(dither4x4 + ((y & 3) << 2)), width);
src_argb += src_stride_argb;
dst_rgb565 += dst_stride_rgb565;
}
return 0;
}
// Convert ARGB To RGB565.
// TODO(fbarchard): Consider using dither function low level with zeros.
LIBYUV_API
int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height) {
int y;
void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
ARGBToRGB565Row_C;
if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
@ -867,15 +939,23 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_rgb565 = 0;
}
#if defined(HAS_ARGBTORGB565ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
}
}
#elif defined(HAS_ARGBTORGB565ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
#endif
#if defined(HAS_ARGBTORGB565ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBToRGB565Row = ARGBToRGB565Row_AVX2;
}
}
#endif
#if defined(HAS_ARGBTORGB565ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToRGB565Row = ARGBToRGB565Row_NEON;
@ -897,7 +977,7 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb1555, int dst_stride_argb1555,
int width, int height) {
int y;
void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
ARGBToARGB1555Row_C;
if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) {
return -1;
@ -915,15 +995,23 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_argb1555 = 0;
}
#if defined(HAS_ARGBTOARGB1555ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
}
}
#elif defined(HAS_ARGBTOARGB1555ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
#endif
#if defined(HAS_ARGBTOARGB1555ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOARGB1555ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_NEON;
@ -945,7 +1033,7 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb4444, int dst_stride_argb4444,
int width, int height) {
int y;
void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
ARGBToARGB4444Row_C;
if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) {
return -1;
@ -963,15 +1051,23 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_argb4444 = 0;
}
#if defined(HAS_ARGBTOARGB4444ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2;
}
}
#elif defined(HAS_ARGBTOARGB4444ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
#endif
#if defined(HAS_ARGBTOARGB4444ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOARGB4444ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_NEON;
@ -996,8 +1092,8 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
int width, int height) {
int y;
void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) =
ARGBToYJRow_C;
if (!src_argb ||
!dst_yj || !dst_u || !dst_v ||
@ -1011,23 +1107,17 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_Unaligned_SSSE3;
ARGBToYJRow = ARGBToYJRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
if (IS_ALIGNED(dst_yj, 16) && IS_ALIGNED(dst_stride_yj, 16)) {
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
#if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYJRow = ARGBToYJRow_AVX2;
@ -1035,7 +1125,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYJRow = ARGBToYJRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_NEON;
@ -1043,7 +1133,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_NEON;
@ -1067,13 +1157,95 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
return 0;
}
// ARGB little endian (bgra in memory) to J422
LIBYUV_API
int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
int y;
void (*ARGBToUVJ422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int width) = ARGBToUVJ422Row_C;
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYJRow_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
if (src_stride_argb == width * 4 &&
dst_stride_y == width &&
dst_stride_u * 2 == width &&
dst_stride_v * 2 == width) {
width *= height;
height = 1;
src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
}
#if defined(HAS_ARGBTOUVJ422ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJ422Row = ARGBToUVJ422Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJ422Row = ARGBToUVJ422Row_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVJ422ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVJ422Row = ARGBToUVJ422Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJ422Row = ARGBToUVJ422Row_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYJRow = ARGBToYJRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYJRow = ARGBToYJRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToUVJ422Row(src_argb, dst_u, dst_v, width);
ARGBToYJRow(src_argb, dst_y, width);
src_argb += src_stride_argb;
dst_y += dst_stride_y;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
return 0;
}
// Convert ARGB to J400.
LIBYUV_API
int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
uint8* dst_yj, int dst_stride_yj,
int width, int height) {
int y;
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) =
ARGBToYJRow_C;
if (!src_argb || !dst_yj || width <= 0 || height == 0) {
return -1;
@ -1091,19 +1263,15 @@ int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_yj = 0;
}
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_yj, 16) && IS_ALIGNED(dst_stride_yj, 16)) {
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYJRow = ARGBToYJRow_AVX2;
@ -1111,7 +1279,7 @@ int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYJRow = ARGBToYJRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_NEON;

View File

@ -11,7 +11,6 @@
#include "libyuv/convert_argb.h"
#include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
#ifdef HAVE_JPEG
#include "libyuv/mjpeg_decoder.h"
#endif
@ -144,36 +143,6 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
// TODO(fbarchard): Support cropping Bayer by odd numbers
// by adjusting fourcc.
case FOURCC_BGGR:
src = sample + (src_width * crop_y + crop_x);
r = BayerBGGRToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_GBRG:
src = sample + (src_width * crop_y + crop_x);
r = BayerGBRGToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_GRBG:
src = sample + (src_width * crop_y + crop_x);
r = BayerGRBGToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGGB:
src = sample + (src_width * crop_y + crop_x);
r = BayerRGGBToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_I400:
src = sample + src_width * crop_y + crop_x;
r = I400ToARGB(src, src_width,
@ -205,15 +174,6 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
// case FOURCC_Q420:
// src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x;
// src_uv = sample + (src_width + aligned_src_width * 2) * crop_y +
// src_width + crop_x * 2;
// r = Q420ToARGB(src, src_width * 3,
// src_uv, src_width * 3,
// crop_argb, argb_stride,
// crop_width, inv_crop_height);
// break;
// Triplanar formats
case FOURCC_I420:
case FOURCC_YU12:
@ -241,6 +201,25 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
crop_width, inv_crop_height);
break;
}
case FOURCC_J420: {
const uint8* src_y = sample + (src_width * crop_y + crop_x);
const uint8* src_u;
const uint8* src_v;
int halfwidth = (src_width + 1) / 2;
int halfheight = (abs_src_height + 1) / 2;
src_u = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
src_v = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
r = J420ToARGB(src_y, src_width,
src_u, halfwidth,
src_v, halfwidth,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
}
case FOURCC_I422:
case FOURCC_YV16: {
const uint8* src_y = sample + src_width * crop_y + crop_x;

View File

@ -12,7 +12,6 @@
#include "libyuv/convert.h"
#include "libyuv/format_conversion.h"
#include "libyuv/video_common.h"
#ifdef __cplusplus
@ -173,40 +172,6 @@ int ConvertToI420(const uint8* sample,
v, v_stride,
crop_width, inv_crop_height);
break;
// TODO(fbarchard): Support cropping Bayer by odd numbers
// by adjusting fourcc.
case FOURCC_BGGR:
src = sample + (src_width * crop_y + crop_x);
r = BayerBGGRToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_GBRG:
src = sample + (src_width * crop_y + crop_x);
r = BayerGBRGToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_GRBG:
src = sample + (src_width * crop_y + crop_x);
r = BayerGRBGToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGGB:
src = sample + (src_width * crop_y + crop_x);
r = BayerRGGBToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_I400:
src = sample + src_width * crop_y + crop_x;
r = I400ToI420(src, src_width,
@ -218,7 +183,8 @@ int ConvertToI420(const uint8* sample,
// Biplanar formats
case FOURCC_NV12:
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
src_uv = sample + (src_width * src_height) +
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
r = NV12ToI420Rotate(src, src_width,
src_uv, aligned_src_width,
y, y_stride,
@ -228,7 +194,8 @@ int ConvertToI420(const uint8* sample,
break;
case FOURCC_NV21:
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
src_uv = sample + (src_width * src_height) +
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
// Call NV12 but with u and v parameters swapped.
r = NV12ToI420Rotate(src, src_width,
src_uv, aligned_src_width,
@ -245,17 +212,6 @@ int ConvertToI420(const uint8* sample,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_Q420:
src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x;
src_uv = sample + (src_width + aligned_src_width * 2) * crop_y +
src_width + crop_x * 2;
r = Q420ToI420(src, src_width * 3,
src_uv, src_width * 3,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
// Triplanar formats
case FOURCC_I420:
case FOURCC_YU12:

View File

@ -14,8 +14,8 @@
#include <intrin.h> // For __cpuidex()
#endif
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
!defined(__native_client__) && \
defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
!defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \
defined(_MSC_VER) && !defined(__clang__) && (_MSC_FULL_VER >= 160040219)
#include <immintrin.h> // For _xgetbv()
#endif
@ -36,19 +36,20 @@ extern "C" {
// For functions that use the stack and have runtime checks for overflow,
// use SAFEBUFFERS to avoid additional check.
#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
#if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219)
#define SAFEBUFFERS __declspec(safebuffers)
#else
#define SAFEBUFFERS
#endif
// Low level cpuid for X86. Returns zeros on other CPUs.
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
(defined(_M_IX86) || defined(_M_X64) || \
defined(__i386__) || defined(__x86_64__))
// Low level cpuid for X86.
#if (defined(_M_IX86) || defined(_M_X64) || \
defined(__i386__) || defined(__x86_64__)) && \
!defined(__pnacl__) && !defined(__CLR_VER)
LIBYUV_API
void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
#if defined(_MSC_VER) && !defined(__clang__)
// Visual C version uses intrinsic or inline x86 assembly.
#if (_MSC_FULL_VER >= 160040219)
__cpuidex((int*)(cpu_info), info_eax, info_ecx);
#elif defined(_M_IX86)
@ -62,16 +63,17 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
mov [edi + 8], ecx
mov [edi + 12], edx
}
#else
#else // Visual C but not x86
if (info_ecx == 0) {
__cpuid((int*)(cpu_info), info_eax);
} else {
cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0;
}
#endif
#else // defined(_MSC_VER)
// GCC version uses inline x86 assembly.
#else // defined(_MSC_VER) && !defined(__clang__)
uint32 info_ebx, info_edx;
asm volatile ( // NOLINT
asm volatile (
#if defined( __i386__) && defined(__PIC__)
// Preserve ebx for fpic 32 bit.
"mov %%ebx, %%edi \n"
@ -87,35 +89,47 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
cpu_info[1] = info_ebx;
cpu_info[2] = info_ecx;
cpu_info[3] = info_edx;
#endif // defined(_MSC_VER)
#endif // defined(_MSC_VER) && !defined(__clang__)
}
#if !defined(__native_client__)
#define HAS_XGETBV
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
int TestOsSaveYmm() {
uint32 xcr0 = 0u;
#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required.
#elif defined(_M_IX86) && defined(_MSC_VER)
__asm {
xor ecx, ecx // xcr 0
_asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
mov xcr0, eax
}
#elif defined(__i386__) || defined(__x86_64__)
asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
#endif // defined(_MSC_VER)
return((xcr0 & 6) == 6); // Is ymm saved?
}
#endif // !defined(__native_client__)
#else
#else // (defined(_M_IX86) || defined(_M_X64) ...
LIBYUV_API
void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
}
#endif
// For VS2010 and earlier emit can be used:
// _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
// __asm {
// xor ecx, ecx // xcr 0
// xgetbv
// mov xcr0, eax
// }
// For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code.
// https://code.google.com/p/libyuv/issues/detail?id=529
#if defined(_M_IX86) && (_MSC_VER < 1900)
#pragma optimize("g", off)
#endif
#if (defined(_M_IX86) || defined(_M_X64) || \
defined(__i386__) || defined(__x86_64__)) && \
!defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
#define HAS_XGETBV
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
int GetXCR0() {
uint32 xcr0 = 0u;
#if (_MSC_FULL_VER >= 160040219)
xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required.
#elif defined(__i386__) || defined(__x86_64__)
asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
#endif // defined(__i386__) || defined(__x86_64__)
return xcr0;
}
#endif // defined(_M_IX86) || defined(_M_X64) ..
// Return optimization to previous setting.
#if defined(_M_IX86) && (_MSC_VER < 1900)
#pragma optimize("g", on)
#endif
// based on libvpx arm_cpudetect.c
// For Arm, but public to allow testing on any CPU
LIBYUV_API SAFEBUFFERS
@ -134,36 +148,21 @@ int ArmCpuCaps(const char* cpuinfo_name) {
fclose(f);
return kCpuHasNEON;
}
// aarch64 uses asimd for Neon.
p = strstr(cpuinfo_line, " asimd");
if (p && (p[6] == ' ' || p[6] == '\n')) {
fclose(f);
return kCpuHasNEON;
}
}
}
fclose(f);
return 0;
}
#if defined(__mips__) && defined(__linux__)
static int MipsCpuCaps(const char* search_string) {
char cpuinfo_line[512];
const char* file_name = "/proc/cpuinfo";
FILE* f = fopen(file_name, "r");
if (!f) {
// Assume DSP if /proc/cpuinfo is unavailable.
// This will occur for Chrome sandbox for Pepper or Render process.
return kCpuHasMIPS_DSP;
}
while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f) != NULL) {
if (strstr(cpuinfo_line, search_string) != NULL) {
fclose(f);
return kCpuHasMIPS_DSP;
}
}
fclose(f);
return 0;
}
#endif
// CPU detect function for SIMD instruction sets.
LIBYUV_API
int cpu_info_ = kCpuInit; // cpu_info is not initialized yet.
int cpu_info_ = 0; // cpu_info is not initialized yet.
// Test environment variable for disabling CPU features. Any non-zero value
// to disable. Zero ignored to make it easy to set the variable on/off.
@ -186,8 +185,9 @@ static LIBYUV_BOOL TestEnv(const char*) {
LIBYUV_API SAFEBUFFERS
int InitCpuFlags(void) {
// TODO(fbarchard): swap kCpuInit logic so 0 means uninitialized.
int cpu_info = 0;
#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86)
uint32 cpu_info0[4] = { 0, 0, 0, 0 };
uint32 cpu_info1[4] = { 0, 0, 0, 0 };
uint32 cpu_info7[4] = { 0, 0, 0, 0 };
@ -196,92 +196,102 @@ int InitCpuFlags(void) {
if (cpu_info0[0] >= 7) {
CpuId(7, 0, cpu_info7);
}
cpu_info_ = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
kCpuHasX86;
cpu_info = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
kCpuHasX86;
#ifdef HAS_XGETBV
if ((cpu_info1[2] & 0x18000000) == 0x18000000 && // AVX and OSSave
TestOsSaveYmm()) { // Saves YMM.
cpu_info_ |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
kCpuHasAVX;
// AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv
if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave
((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers
cpu_info |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | kCpuHasAVX;
// Detect AVX512bw
if ((GetXCR0() & 0xe0) == 0xe0) {
cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX3 : 0;
}
}
#endif
// Environment variable overrides for testing.
if (TestEnv("LIBYUV_DISABLE_X86")) {
cpu_info_ &= ~kCpuHasX86;
cpu_info &= ~kCpuHasX86;
}
if (TestEnv("LIBYUV_DISABLE_SSE2")) {
cpu_info_ &= ~kCpuHasSSE2;
cpu_info &= ~kCpuHasSSE2;
}
if (TestEnv("LIBYUV_DISABLE_SSSE3")) {
cpu_info_ &= ~kCpuHasSSSE3;
cpu_info &= ~kCpuHasSSSE3;
}
if (TestEnv("LIBYUV_DISABLE_SSE41")) {
cpu_info_ &= ~kCpuHasSSE41;
cpu_info &= ~kCpuHasSSE41;
}
if (TestEnv("LIBYUV_DISABLE_SSE42")) {
cpu_info_ &= ~kCpuHasSSE42;
cpu_info &= ~kCpuHasSSE42;
}
if (TestEnv("LIBYUV_DISABLE_AVX")) {
cpu_info_ &= ~kCpuHasAVX;
cpu_info &= ~kCpuHasAVX;
}
if (TestEnv("LIBYUV_DISABLE_AVX2")) {
cpu_info_ &= ~kCpuHasAVX2;
cpu_info &= ~kCpuHasAVX2;
}
if (TestEnv("LIBYUV_DISABLE_ERMS")) {
cpu_info_ &= ~kCpuHasERMS;
cpu_info &= ~kCpuHasERMS;
}
if (TestEnv("LIBYUV_DISABLE_FMA3")) {
cpu_info_ &= ~kCpuHasFMA3;
cpu_info &= ~kCpuHasFMA3;
}
if (TestEnv("LIBYUV_DISABLE_AVX3")) {
cpu_info &= ~kCpuHasAVX3;
}
#elif defined(__mips__) && defined(__linux__)
// Linux mips parse text file for dsp detect.
cpu_info_ = MipsCpuCaps("dsp"); // set kCpuHasMIPS_DSP.
#if defined(__mips_dspr2)
cpu_info_ |= kCpuHasMIPS_DSPR2;
#endif
cpu_info_ |= kCpuHasMIPS;
#if defined(__mips__) && defined(__linux__)
#if defined(__mips_dspr2)
cpu_info |= kCpuHasMIPS_DSPR2;
#endif
cpu_info |= kCpuHasMIPS;
if (getenv("LIBYUV_DISABLE_MIPS")) {
cpu_info_ &= ~kCpuHasMIPS;
}
if (getenv("LIBYUV_DISABLE_MIPS_DSP")) {
cpu_info_ &= ~kCpuHasMIPS_DSP;
cpu_info &= ~kCpuHasMIPS;
}
if (getenv("LIBYUV_DISABLE_MIPS_DSPR2")) {
cpu_info_ &= ~kCpuHasMIPS_DSPR2;
cpu_info &= ~kCpuHasMIPS_DSPR2;
}
#elif defined(__arm__) || defined(__aarch64__)
#endif
#if defined(__arm__) || defined(__aarch64__)
// gcc -mfpu=neon defines __ARM_NEON__
// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
cpu_info_ = kCpuHasNEON;
cpu_info = kCpuHasNEON;
// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
// flag in it.
// So for aarch64, neon enabling is hard coded here.
#elif defined(__aarch64__)
cpu_info_ = kCpuHasNEON;
#endif
#if defined(__aarch64__)
cpu_info = kCpuHasNEON;
#else
// Linux arm parse text file for neon detect.
cpu_info_ = ArmCpuCaps("/proc/cpuinfo");
cpu_info = ArmCpuCaps("/proc/cpuinfo");
#endif
cpu_info_ |= kCpuHasARM;
cpu_info |= kCpuHasARM;
if (TestEnv("LIBYUV_DISABLE_NEON")) {
cpu_info_ &= ~kCpuHasNEON;
cpu_info &= ~kCpuHasNEON;
}
#endif // __arm__
if (TestEnv("LIBYUV_DISABLE_ASM")) {
cpu_info_ = 0;
cpu_info = 0;
}
return cpu_info_;
cpu_info |= kCpuInitialized;
cpu_info_ = cpu_info;
return cpu_info;
}
// Note that use of this function is not thread safe.
LIBYUV_API
void MaskCpuFlags(int enable_flags) {
cpu_info_ = InitCpuFlags() & enable_flags;

View File

@ -1,554 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/format_conversion.h"
#include "libyuv/basic_types.h"
#include "libyuv/cpu_id.h"
#include "libyuv/video_common.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// generate a selector mask useful for pshufb
static uint32 GenerateSelector(int select0, int select1) {
return (uint32)(select0) |
(uint32)((select1 + 4) << 8) |
(uint32)((select0 + 8) << 16) |
(uint32)((select1 + 12) << 24);
}
static int MakeSelectors(const int blue_index,
const int green_index,
const int red_index,
uint32 dst_fourcc_bayer,
uint32* index_map) {
// Now build a lookup table containing the indices for the four pixels in each
// 2x2 Bayer grid.
switch (dst_fourcc_bayer) {
case FOURCC_BGGR:
index_map[0] = GenerateSelector(blue_index, green_index);
index_map[1] = GenerateSelector(green_index, red_index);
break;
case FOURCC_GBRG:
index_map[0] = GenerateSelector(green_index, blue_index);
index_map[1] = GenerateSelector(red_index, green_index);
break;
case FOURCC_RGGB:
index_map[0] = GenerateSelector(red_index, green_index);
index_map[1] = GenerateSelector(green_index, blue_index);
break;
case FOURCC_GRBG:
index_map[0] = GenerateSelector(green_index, red_index);
index_map[1] = GenerateSelector(blue_index, green_index);
break;
default:
return -1; // Bad FourCC
}
return 0;
}
// Converts 32 bit ARGB to Bayer RGB formats.
LIBYUV_API
int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height,
uint32 dst_fourcc_bayer) {
int y;
const int blue_index = 0; // Offsets for ARGB format
const int green_index = 1;
const int red_index = 2;
uint32 index_map[2];
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) = ARGBToBayerRow_C;
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
}
}
#elif defined(HAS_ARGBTOBAYERROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_NEON;
}
}
#endif
if (MakeSelectors(blue_index, green_index, red_index,
dst_fourcc_bayer, index_map)) {
return -1; // Bad FourCC
}
for (y = 0; y < height; ++y) {
ARGBToBayerRow(src_argb, dst_bayer, index_map[y & 1], width);
src_argb += src_stride_argb;
dst_bayer += dst_stride_bayer;
}
return 0;
}
#define AVG(a, b) (((a) + (b)) >> 1)
static void BayerRowBG(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_argb, int pix) {
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 g = src_bayer0[1];
uint8 r = src_bayer1[1];
int x;
for (x = 0; x < pix - 2; x += 2) {
dst_argb[0] = src_bayer0[0];
dst_argb[1] = AVG(g, src_bayer0[1]);
dst_argb[2] = AVG(r, src_bayer1[1]);
dst_argb[3] = 255U;
dst_argb[4] = AVG(src_bayer0[0], src_bayer0[2]);
dst_argb[5] = src_bayer0[1];
dst_argb[6] = src_bayer1[1];
dst_argb[7] = 255U;
g = src_bayer0[1];
r = src_bayer1[1];
src_bayer0 += 2;
src_bayer1 += 2;
dst_argb += 8;
}
dst_argb[0] = src_bayer0[0];
dst_argb[1] = AVG(g, src_bayer0[1]);
dst_argb[2] = AVG(r, src_bayer1[1]);
dst_argb[3] = 255U;
if (!(pix & 1)) {
dst_argb[4] = src_bayer0[0];
dst_argb[5] = src_bayer0[1];
dst_argb[6] = src_bayer1[1];
dst_argb[7] = 255U;
}
}
static void BayerRowRG(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_argb, int pix) {
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 g = src_bayer0[1];
uint8 b = src_bayer1[1];
int x;
for (x = 0; x < pix - 2; x += 2) {
dst_argb[0] = AVG(b, src_bayer1[1]);
dst_argb[1] = AVG(g, src_bayer0[1]);
dst_argb[2] = src_bayer0[0];
dst_argb[3] = 255U;
dst_argb[4] = src_bayer1[1];
dst_argb[5] = src_bayer0[1];
dst_argb[6] = AVG(src_bayer0[0], src_bayer0[2]);
dst_argb[7] = 255U;
g = src_bayer0[1];
b = src_bayer1[1];
src_bayer0 += 2;
src_bayer1 += 2;
dst_argb += 8;
}
dst_argb[0] = AVG(b, src_bayer1[1]);
dst_argb[1] = AVG(g, src_bayer0[1]);
dst_argb[2] = src_bayer0[0];
dst_argb[3] = 255U;
if (!(pix & 1)) {
dst_argb[4] = src_bayer1[1];
dst_argb[5] = src_bayer0[1];
dst_argb[6] = src_bayer0[0];
dst_argb[7] = 255U;
}
}
static void BayerRowGB(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_argb, int pix) {
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 b = src_bayer0[1];
int x;
for (x = 0; x < pix - 2; x += 2) {
dst_argb[0] = AVG(b, src_bayer0[1]);
dst_argb[1] = src_bayer0[0];
dst_argb[2] = src_bayer1[0];
dst_argb[3] = 255U;
dst_argb[4] = src_bayer0[1];
dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]);
dst_argb[6] = AVG(src_bayer1[0], src_bayer1[2]);
dst_argb[7] = 255U;
b = src_bayer0[1];
src_bayer0 += 2;
src_bayer1 += 2;
dst_argb += 8;
}
dst_argb[0] = AVG(b, src_bayer0[1]);
dst_argb[1] = src_bayer0[0];
dst_argb[2] = src_bayer1[0];
dst_argb[3] = 255U;
if (!(pix & 1)) {
dst_argb[4] = src_bayer0[1];
dst_argb[5] = src_bayer0[0];
dst_argb[6] = src_bayer1[0];
dst_argb[7] = 255U;
}
}
static void BayerRowGR(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_argb, int pix) {
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 r = src_bayer0[1];
int x;
for (x = 0; x < pix - 2; x += 2) {
dst_argb[0] = src_bayer1[0];
dst_argb[1] = src_bayer0[0];
dst_argb[2] = AVG(r, src_bayer0[1]);
dst_argb[3] = 255U;
dst_argb[4] = AVG(src_bayer1[0], src_bayer1[2]);
dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]);
dst_argb[6] = src_bayer0[1];
dst_argb[7] = 255U;
r = src_bayer0[1];
src_bayer0 += 2;
src_bayer1 += 2;
dst_argb += 8;
}
dst_argb[0] = src_bayer1[0];
dst_argb[1] = src_bayer0[0];
dst_argb[2] = AVG(r, src_bayer0[1]);
dst_argb[3] = 255U;
if (!(pix & 1)) {
dst_argb[4] = src_bayer1[0];
dst_argb[5] = src_bayer0[0];
dst_argb[6] = src_bayer0[1];
dst_argb[7] = 255U;
}
}
// Converts any Bayer RGB format to ARGB.
LIBYUV_API
int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height,
uint32 src_fourcc_bayer) {
int y;
void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix);
void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix);
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
switch (src_fourcc_bayer) {
case FOURCC_BGGR:
BayerRow0 = BayerRowBG;
BayerRow1 = BayerRowGR;
break;
case FOURCC_GBRG:
BayerRow0 = BayerRowGB;
BayerRow1 = BayerRowRG;
break;
case FOURCC_GRBG:
BayerRow0 = BayerRowGR;
BayerRow1 = BayerRowBG;
break;
case FOURCC_RGGB:
BayerRow0 = BayerRowRG;
BayerRow1 = BayerRowGB;
break;
default:
return -1; // Bad FourCC
}
for (y = 0; y < height - 1; y += 2) {
BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
dst_argb + dst_stride_argb, width);
src_bayer += src_stride_bayer * 2;
dst_argb += dst_stride_argb * 2;
}
if (height & 1) {
BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
}
return 0;
}
// Converts any Bayer RGB format to ARGB.
LIBYUV_API
int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height,
uint32 src_fourcc_bayer) {
void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix);
void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix);
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
// Negative height means invert the image.
if (height < 0) {
int halfheight;
height = -height;
halfheight = (height + 1) >> 1;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_u = dst_u + (halfheight - 1) * dst_stride_u;
dst_v = dst_v + (halfheight - 1) * dst_stride_v;
dst_stride_y = -dst_stride_y;
dst_stride_u = -dst_stride_u;
dst_stride_v = -dst_stride_v;
}
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
ARGBToUVRow = ARGBToUVRow_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
#endif
switch (src_fourcc_bayer) {
case FOURCC_BGGR:
BayerRow0 = BayerRowBG;
BayerRow1 = BayerRowGR;
break;
case FOURCC_GBRG:
BayerRow0 = BayerRowGB;
BayerRow1 = BayerRowRG;
break;
case FOURCC_GRBG:
BayerRow0 = BayerRowGR;
BayerRow1 = BayerRowBG;
break;
case FOURCC_RGGB:
BayerRow0 = BayerRowRG;
BayerRow1 = BayerRowGB;
break;
default:
return -1; // Bad FourCC
}
{
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 15) & ~15;
align_buffer_64(row, kRowSize * 2);
int y;
for (y = 0; y < height - 1; y += 2) {
BayerRow0(src_bayer, src_stride_bayer, row, width);
BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
row + kRowSize, width);
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
src_bayer += src_stride_bayer * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
BayerRow0(src_bayer, src_stride_bayer, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
}
free_aligned_buffer_64(row);
}
return 0;
}
// Convert I420 to Bayer.
LIBYUV_API
int I420ToBayer(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height,
uint32 dst_fourcc_bayer) {
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToARGBRow_C;
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) = ARGBToBayerRow_C;
const int blue_index = 0; // Offsets for ARGB format
const int green_index = 1;
const int red_index = 2;
uint32 index_map[2];
// Negative height means invert the image.
if (height < 0) {
int halfheight;
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
}
}
#elif defined(HAS_ARGBTOBAYERROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_NEON;
}
}
#endif
if (MakeSelectors(blue_index, green_index, red_index,
dst_fourcc_bayer, index_map)) {
return -1; // Bad FourCC
}
{
// Allocate a row of ARGB.
align_buffer_64(row, width * 4);
int y;
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToBayerRow(row, dst_bayer, index_map[y & 1], width);
dst_bayer += dst_stride_bayer;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
free_aligned_buffer_64(row);
}
return 0;
}
#define MAKEBAYERFOURCC(BAYER) \
LIBYUV_API \
int Bayer##BAYER##ToI420(const uint8* src_bayer, int src_stride_bayer, \
uint8* dst_y, int dst_stride_y, \
uint8* dst_u, int dst_stride_u, \
uint8* dst_v, int dst_stride_v, \
int width, int height) { \
return BayerToI420(src_bayer, src_stride_bayer, \
dst_y, dst_stride_y, \
dst_u, dst_stride_u, \
dst_v, dst_stride_v, \
width, height, \
FOURCC_##BAYER); \
} \
\
LIBYUV_API \
int I420ToBayer##BAYER(const uint8* src_y, int src_stride_y, \
const uint8* src_u, int src_stride_u, \
const uint8* src_v, int src_stride_v, \
uint8* dst_bayer, int dst_stride_bayer, \
int width, int height) { \
return I420ToBayer(src_y, src_stride_y, \
src_u, src_stride_u, \
src_v, src_stride_v, \
dst_bayer, dst_stride_bayer, \
width, height, \
FOURCC_##BAYER); \
} \
\
LIBYUV_API \
int ARGBToBayer##BAYER(const uint8* src_argb, int src_stride_argb, \
uint8* dst_bayer, int dst_stride_bayer, \
int width, int height) { \
return ARGBToBayer(src_argb, src_stride_argb, \
dst_bayer, dst_stride_bayer, \
width, height, \
FOURCC_##BAYER); \
} \
\
LIBYUV_API \
int Bayer##BAYER##ToARGB(const uint8* src_bayer, int src_stride_bayer, \
uint8* dst_argb, int dst_stride_argb, \
int width, int height) { \
return BayerToARGB(src_bayer, src_stride_bayer, \
dst_argb, dst_stride_argb, \
width, height, \
FOURCC_##BAYER); \
}
MAKEBAYERFOURCC(BGGR)
MAKEBAYERFOURCC(GBRG)
MAKEBAYERFOURCC(GRBG)
MAKEBAYERFOURCC(RGGB)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -18,6 +18,12 @@
// Must be included before jpeglib.
#include <setjmp.h>
#define HAVE_SETJMP
#if defined(_MSC_VER)
// disable warning 4324: structure was padded due to __declspec(align())
#pragma warning(disable:4324)
#endif
#endif
struct FILE; // For jpeglib.h.
@ -53,8 +59,7 @@ const int MJpegDecoder::kColorSpaceYCCK = JCS_YCCK;
// Methods that are passed to jpeglib.
boolean fill_input_buffer(jpeg_decompress_struct* cinfo);
void init_source(jpeg_decompress_struct* cinfo);
void skip_input_data(jpeg_decompress_struct* cinfo,
long num_bytes); // NOLINT
void skip_input_data(jpeg_decompress_struct* cinfo, long num_bytes); // NOLINT
void term_source(jpeg_decompress_struct* cinfo);
void ErrorHandler(jpeg_common_struct* cinfo);
@ -423,8 +428,7 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) {
return TRUE;
}
void skip_input_data(j_decompress_ptr cinfo,
long num_bytes) { // NOLINT
void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT
cinfo->src->next_input_byte += num_bytes;
}

View File

@ -10,36 +10,60 @@
#include "libyuv/mjpeg_decoder.h"
#include <string.h> // For memchr.
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Helper function to validate the jpeg appears intact.
// TODO(fbarchard): Optimize case where SOI is found but EOI is not.
LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) {
size_t i;
if (sample_size < 64) {
// ERROR: Invalid jpeg size: sample_size
return LIBYUV_FALSE;
}
if (sample[0] != 0xff || sample[1] != 0xd8) { // Start Of Image
// ERROR: Invalid jpeg initial start code
return LIBYUV_FALSE;
}
for (i = sample_size - 2; i > 1;) {
if (sample[i] != 0xd9) {
if (sample[i] == 0xff && sample[i + 1] == 0xd9) { // End Of Image
// Helper function to scan for EOI marker (0xff 0xd9).
static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) {
if (sample_size >= 2) {
const uint8* end = sample + sample_size - 1;
const uint8* it = sample;
while (it < end) {
// TODO(fbarchard): scan for 0xd9 instead.
it = static_cast<const uint8 *>(memchr(it, 0xff, end - it));
if (it == NULL) {
break;
}
if (it[1] == 0xd9) {
return LIBYUV_TRUE; // Success: Valid jpeg.
}
--i;
++it; // Skip over current 0xff.
}
--i;
}
// ERROR: Invalid jpeg end code not found. Size sample_size
return LIBYUV_FALSE;
}
// Helper function to validate the jpeg appears intact.
LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) {
// Maximum size that ValidateJpeg will consider valid.
const size_t kMaxJpegSize = 0x7fffffffull;
const size_t kBackSearchSize = 1024;
if (sample_size < 64 || sample_size > kMaxJpegSize || !sample) {
// ERROR: Invalid jpeg size: sample_size
return LIBYUV_FALSE;
}
if (sample[0] != 0xff || sample[1] != 0xd8) { // SOI marker
// ERROR: Invalid jpeg initial start code
return LIBYUV_FALSE;
}
// Look for the End Of Image (EOI) marker near the end of the buffer.
if (sample_size > kBackSearchSize) {
if (ScanEOI(sample + sample_size - kBackSearchSize, kBackSearchSize)) {
return LIBYUV_TRUE; // Success: Valid jpeg.
}
// Reduce search size for forward search.
sample_size = sample_size - kBackSearchSize + 1;
}
// Step over SOI marker and scan for EOI.
return ScanEOI(sample + 2, sample_size - 2);
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -27,36 +27,31 @@ extern "C" {
(defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__))
#define HAS_SCALEARGBROWDOWNEVEN_SSE2
void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride,
int src_stepx,
uint8* dst_ptr, int dst_width);
int src_stepx, uint8* dst_ptr, int dst_width);
#endif
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON))
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SCALEARGBROWDOWNEVEN_NEON
void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, int src_stride,
int src_stepx,
uint8* dst_ptr, int dst_width);
int src_stepx, uint8* dst_ptr, int dst_width);
#endif
void ScaleARGBRowDownEven_C(const uint8* src_ptr, int,
int src_stepx,
uint8* dst_ptr, int dst_width);
int src_stepx, uint8* dst_ptr, int dst_width);
static void ARGBTranspose(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
uint8* dst, int dst_stride, int width, int height) {
int i;
int src_pixel_step = src_stride >> 2;
void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride,
int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4) && // Width of dest.
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4)) { // Width of dest.
ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
}
#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4) && // Width of dest.
IS_ALIGNED(src, 4)) {
#endif
#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4)) { // Width of dest.
ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON;
}
#endif
@ -69,8 +64,7 @@ static void ARGBTranspose(const uint8* src, int src_stride,
}
void ARGBRotate90(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
uint8* dst, int dst_stride, int width, int height) {
// Rotate by 90 is a ARGBTranspose with the source read
// from bottom to top. So set the source pointer to the end
// of the buffer and flip the sign of the source stride.
@ -80,8 +74,7 @@ void ARGBRotate90(const uint8* src, int src_stride,
}
void ARGBRotate270(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
uint8* dst, int dst_stride, int width, int height) {
// Rotate by 270 is a ARGBTranspose with the destination written
// from bottom to top. So set the destination pointer to the end
// of the buffer and flip the sign of the destination stride.
@ -91,8 +84,7 @@ void ARGBRotate270(const uint8* src, int src_stride,
}
void ARGBRotate180(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
uint8* dst, int dst_stride, int width, int height) {
// Swap first and last row and mirror the content. Uses a temporary row.
align_buffer_64(row, width * 4);
const uint8* src_bot = src + src_stride * (height - 1);
@ -102,38 +94,38 @@ void ARGBRotate180(const uint8* src, int src_stride,
void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
ARGBMirrorRow_C;
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
#if defined(HAS_ARGBMIRRORROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
ARGBMirrorRow = ARGBMirrorRow_SSSE3;
#if defined(HAS_ARGBMIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
if (IS_ALIGNED(width, 4)) {
ARGBMirrorRow = ARGBMirrorRow_NEON;
}
}
#endif
#if defined(HAS_ARGBMIRRORROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBMirrorRow = ARGBMirrorRow_SSE2;
}
}
#endif
#if defined(HAS_ARGBMIRRORROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
ARGBMirrorRow = ARGBMirrorRow_AVX2;
}
#endif
#if defined(HAS_ARGBMIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
ARGBMirrorRow = ARGBMirrorRow_NEON;
}
#endif
#if defined(HAS_COPYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width * 4, 32)) {
CopyRow = CopyRow_NEON;
}
#endif
#if defined(HAS_COPYROW_X86)
if (TestCpuFlag(kCpuHasX86)) {
CopyRow = CopyRow_X86;
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBMirrorRow = ARGBMirrorRow_AVX2;
}
}
#endif
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width * 4, 32) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
CopyRow = CopyRow_SSE2;
if (TestCpuFlag(kCpuHasSSE2)) {
CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
}
#endif
#if defined(HAS_COPYROW_AVX)
if (TestCpuFlag(kCpuHasAVX)) {
CopyRow = IS_ALIGNED(width * 4, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
}
#endif
#if defined(HAS_COPYROW_ERMS)
@ -141,6 +133,11 @@ void ARGBRotate180(const uint8* src, int src_stride,
CopyRow = CopyRow_ERMS;
}
#endif
#if defined(HAS_COPYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
#if defined(HAS_COPYROW_MIPS)
if (TestCpuFlag(kCpuHasMIPS)) {
CopyRow = CopyRow_MIPS;
@ -162,8 +159,7 @@ void ARGBRotate180(const uint8* src, int src_stride,
LIBYUV_API
int ARGBRotate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height,
uint8* dst_argb, int dst_stride_argb, int width, int height,
enum RotationMode mode) {
if (!src_argb || width <= 0 || height == 0 || !dst_argb) {
return -1;

View File

@ -9,6 +9,7 @@
*/
#include "libyuv/row.h"
#include "libyuv/rotate_row.h"
#include "libyuv/basic_types.h"
@ -22,8 +23,7 @@ extern "C" {
(_MIPS_SIM == _MIPS_SIM_ABI32)
void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width) {
uint8* dst, int dst_stride, int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
@ -106,9 +106,8 @@ void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
);
}
void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width) {
void TransposeWx8_Fast_MIPS_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width) {
__asm__ __volatile__ (
".set noat \n"
".set push \n"

View File

@ -9,6 +9,7 @@
*/
#include "libyuv/row.h"
#include "libyuv/rotate_row.h"
#include "libyuv/basic_types.h"
@ -17,7 +18,8 @@ namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
!defined(__aarch64__)
static uvec8 kVTbl4x4Transpose =
{ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
@ -33,7 +35,6 @@ void TransposeWx8_NEON(const uint8* src, int src_stride,
"sub %5, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
".p2align 2 \n"
"1: \n"
"mov %0, %1 \n"
@ -254,7 +255,6 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
"sub %7, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
".p2align 2 \n"
"1: \n"
"mov %0, %1 \n"
@ -525,7 +525,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
"q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
);
}
#endif
#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
#ifdef __cplusplus
} // extern "C"

View File

@ -9,6 +9,7 @@
*/
#include "libyuv/row.h"
#include "libyuv/rotate_row.h"
#include "libyuv/basic_types.h"
@ -17,522 +18,524 @@ namespace libyuv {
extern "C" {
#endif
// This module is for GCC Neon armv8 64 bit.
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
//this ifdef should be removed if TransposeWx8_NEON's aarch64 has
//been done
#ifdef HAS_TRANSPOSE_WX8_NEON
static uvec8 kVTbl4x4Transpose =
{ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
{ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
void TransposeWx8_NEON(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width) {
uint8* dst, int dst_stride, int width) {
const uint8* src_temp = NULL;
int64 width64 = (int64) width; // Work around clang 3.4 warning.
asm volatile (
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
// at w-8 allow for this
"sub %5, #8 \n"
"sub %3, %3, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
".p2align 2 \n"
"1: \n"
"mov %0, %1 \n"
"1: \n"
"mov %0, %1 \n"
MEMACCESS(0)
"vld1.8 {d0}, [%0], %2 \n"
"ld1 {v0.8b}, [%0], %5 \n"
MEMACCESS(0)
"vld1.8 {d1}, [%0], %2 \n"
"ld1 {v1.8b}, [%0], %5 \n"
MEMACCESS(0)
"vld1.8 {d2}, [%0], %2 \n"
"ld1 {v2.8b}, [%0], %5 \n"
MEMACCESS(0)
"vld1.8 {d3}, [%0], %2 \n"
"ld1 {v3.8b}, [%0], %5 \n"
MEMACCESS(0)
"vld1.8 {d4}, [%0], %2 \n"
"ld1 {v4.8b}, [%0], %5 \n"
MEMACCESS(0)
"vld1.8 {d5}, [%0], %2 \n"
"ld1 {v5.8b}, [%0], %5 \n"
MEMACCESS(0)
"vld1.8 {d6}, [%0], %2 \n"
"ld1 {v6.8b}, [%0], %5 \n"
MEMACCESS(0)
"vld1.8 {d7}, [%0] \n"
"ld1 {v7.8b}, [%0] \n"
"vtrn.8 d1, d0 \n"
"vtrn.8 d3, d2 \n"
"vtrn.8 d5, d4 \n"
"vtrn.8 d7, d6 \n"
"trn2 v16.8b, v0.8b, v1.8b \n"
"trn1 v17.8b, v0.8b, v1.8b \n"
"trn2 v18.8b, v2.8b, v3.8b \n"
"trn1 v19.8b, v2.8b, v3.8b \n"
"trn2 v20.8b, v4.8b, v5.8b \n"
"trn1 v21.8b, v4.8b, v5.8b \n"
"trn2 v22.8b, v6.8b, v7.8b \n"
"trn1 v23.8b, v6.8b, v7.8b \n"
"vtrn.16 d1, d3 \n"
"vtrn.16 d0, d2 \n"
"vtrn.16 d5, d7 \n"
"vtrn.16 d4, d6 \n"
"trn2 v3.4h, v17.4h, v19.4h \n"
"trn1 v1.4h, v17.4h, v19.4h \n"
"trn2 v2.4h, v16.4h, v18.4h \n"
"trn1 v0.4h, v16.4h, v18.4h \n"
"trn2 v7.4h, v21.4h, v23.4h \n"
"trn1 v5.4h, v21.4h, v23.4h \n"
"trn2 v6.4h, v20.4h, v22.4h \n"
"trn1 v4.4h, v20.4h, v22.4h \n"
"vtrn.32 d1, d5 \n"
"vtrn.32 d0, d4 \n"
"vtrn.32 d3, d7 \n"
"vtrn.32 d2, d6 \n"
"trn2 v21.2s, v1.2s, v5.2s \n"
"trn1 v17.2s, v1.2s, v5.2s \n"
"trn2 v20.2s, v0.2s, v4.2s \n"
"trn1 v16.2s, v0.2s, v4.2s \n"
"trn2 v23.2s, v3.2s, v7.2s \n"
"trn1 v19.2s, v3.2s, v7.2s \n"
"trn2 v22.2s, v2.2s, v6.2s \n"
"trn1 v18.2s, v2.2s, v6.2s \n"
"vrev16.8 q0, q0 \n"
"vrev16.8 q1, q1 \n"
"vrev16.8 q2, q2 \n"
"vrev16.8 q3, q3 \n"
"mov %0, %3 \n"
"mov %0, %2 \n"
MEMACCESS(0)
"vst1.8 {d1}, [%0], %4 \n"
"st1 {v17.8b}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d0}, [%0], %4 \n"
"st1 {v16.8b}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d3}, [%0], %4 \n"
"st1 {v19.8b}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d2}, [%0], %4 \n"
"st1 {v18.8b}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d5}, [%0], %4 \n"
"st1 {v21.8b}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d4}, [%0], %4 \n"
"st1 {v20.8b}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d7}, [%0], %4 \n"
"st1 {v23.8b}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d6}, [%0] \n"
"st1 {v22.8b}, [%0] \n"
"add %1, #8 \n" // src += 8
"add %3, %3, %4, lsl #3 \n" // dst += 8 * dst_stride
"subs %5, #8 \n" // w -= 8
"bge 1b \n"
"add %1, %1, #8 \n" // src += 8
"add %2, %2, %6, lsl #3 \n" // dst += 8 * dst_stride
"subs %3, %3, #8 \n" // w -= 8
"b.ge 1b \n"
// add 8 back to counter. if the result is 0 there are
// no residuals.
"adds %5, #8 \n"
"beq 4f \n"
"adds %3, %3, #8 \n"
"b.eq 4f \n"
// some residual, so between 1 and 7 lines left to transpose
"cmp %5, #2 \n"
"blt 3f \n"
"cmp %3, #2 \n"
"b.lt 3f \n"
"cmp %5, #4 \n"
"blt 2f \n"
"cmp %3, #4 \n"
"b.lt 2f \n"
// 4x8 block
"mov %0, %1 \n"
"mov %0, %1 \n"
MEMACCESS(0)
"vld1.32 {d0[0]}, [%0], %2 \n"
"ld1 {v0.s}[0], [%0], %5 \n"
MEMACCESS(0)
"vld1.32 {d0[1]}, [%0], %2 \n"
"ld1 {v0.s}[1], [%0], %5 \n"
MEMACCESS(0)
"vld1.32 {d1[0]}, [%0], %2 \n"
"ld1 {v0.s}[2], [%0], %5 \n"
MEMACCESS(0)
"vld1.32 {d1[1]}, [%0], %2 \n"
"ld1 {v0.s}[3], [%0], %5 \n"
MEMACCESS(0)
"vld1.32 {d2[0]}, [%0], %2 \n"
"ld1 {v1.s}[0], [%0], %5 \n"
MEMACCESS(0)
"vld1.32 {d2[1]}, [%0], %2 \n"
"ld1 {v1.s}[1], [%0], %5 \n"
MEMACCESS(0)
"vld1.32 {d3[0]}, [%0], %2 \n"
"ld1 {v1.s}[2], [%0], %5 \n"
MEMACCESS(0)
"vld1.32 {d3[1]}, [%0] \n"
"ld1 {v1.s}[3], [%0] \n"
"mov %0, %3 \n"
"mov %0, %2 \n"
MEMACCESS(6)
"vld1.8 {q3}, [%6] \n"
MEMACCESS(4)
"ld1 {v2.16b}, [%4] \n"
"vtbl.8 d4, {d0, d1}, d6 \n"
"vtbl.8 d5, {d0, d1}, d7 \n"
"vtbl.8 d0, {d2, d3}, d6 \n"
"vtbl.8 d1, {d2, d3}, d7 \n"
"tbl v3.16b, {v0.16b}, v2.16b \n"
"tbl v0.16b, {v1.16b}, v2.16b \n"
// TODO(frkoenig): Rework shuffle above to
// write out with 4 instead of 8 writes.
MEMACCESS(0)
"vst1.32 {d4[0]}, [%0], %4 \n"
"st1 {v3.s}[0], [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d4[1]}, [%0], %4 \n"
"st1 {v3.s}[1], [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d5[0]}, [%0], %4 \n"
"st1 {v3.s}[2], [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d5[1]}, [%0] \n"
"st1 {v3.s}[3], [%0] \n"
"add %0, %3, #4 \n"
"add %0, %2, #4 \n"
MEMACCESS(0)
"vst1.32 {d0[0]}, [%0], %4 \n"
"st1 {v0.s}[0], [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d0[1]}, [%0], %4 \n"
"st1 {v0.s}[1], [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d1[0]}, [%0], %4 \n"
"st1 {v0.s}[2], [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d1[1]}, [%0] \n"
"st1 {v0.s}[3], [%0] \n"
"add %1, #4 \n" // src += 4
"add %3, %3, %4, lsl #2 \n" // dst += 4 * dst_stride
"subs %5, #4 \n" // w -= 4
"beq 4f \n"
"add %1, %1, #4 \n" // src += 4
"add %2, %2, %6, lsl #2 \n" // dst += 4 * dst_stride
"subs %3, %3, #4 \n" // w -= 4
"b.eq 4f \n"
// some residual, check to see if it includes a 2x8 block,
// or less
"cmp %5, #2 \n"
"blt 3f \n"
"cmp %3, #2 \n"
"b.lt 3f \n"
// 2x8 block
"2: \n"
"mov %0, %1 \n"
"2: \n"
"mov %0, %1 \n"
MEMACCESS(0)
"vld1.16 {d0[0]}, [%0], %2 \n"
"ld1 {v0.h}[0], [%0], %5 \n"
MEMACCESS(0)
"vld1.16 {d1[0]}, [%0], %2 \n"
"ld1 {v1.h}[0], [%0], %5 \n"
MEMACCESS(0)
"vld1.16 {d0[1]}, [%0], %2 \n"
"ld1 {v0.h}[1], [%0], %5 \n"
MEMACCESS(0)
"vld1.16 {d1[1]}, [%0], %2 \n"
"ld1 {v1.h}[1], [%0], %5 \n"
MEMACCESS(0)
"vld1.16 {d0[2]}, [%0], %2 \n"
"ld1 {v0.h}[2], [%0], %5 \n"
MEMACCESS(0)
"vld1.16 {d1[2]}, [%0], %2 \n"
"ld1 {v1.h}[2], [%0], %5 \n"
MEMACCESS(0)
"vld1.16 {d0[3]}, [%0], %2 \n"
"ld1 {v0.h}[3], [%0], %5 \n"
MEMACCESS(0)
"vld1.16 {d1[3]}, [%0] \n"
"ld1 {v1.h}[3], [%0] \n"
"vtrn.8 d0, d1 \n"
"trn2 v2.8b, v0.8b, v1.8b \n"
"trn1 v3.8b, v0.8b, v1.8b \n"
"mov %0, %3 \n"
"mov %0, %2 \n"
MEMACCESS(0)
"vst1.64 {d0}, [%0], %4 \n"
"st1 {v3.8b}, [%0], %6 \n"
MEMACCESS(0)
"vst1.64 {d1}, [%0] \n"
"st1 {v2.8b}, [%0] \n"
"add %1, #2 \n" // src += 2
"add %3, %3, %4, lsl #1 \n" // dst += 2 * dst_stride
"subs %5, #2 \n" // w -= 2
"beq 4f \n"
"add %1, %1, #2 \n" // src += 2
"add %2, %2, %6, lsl #1 \n" // dst += 2 * dst_stride
"subs %3, %3, #2 \n" // w -= 2
"b.eq 4f \n"
// 1x8 block
"3: \n"
"3: \n"
MEMACCESS(1)
"vld1.8 {d0[0]}, [%1], %2 \n"
"ld1 {v0.b}[0], [%1], %5 \n"
MEMACCESS(1)
"vld1.8 {d0[1]}, [%1], %2 \n"
"ld1 {v0.b}[1], [%1], %5 \n"
MEMACCESS(1)
"vld1.8 {d0[2]}, [%1], %2 \n"
"ld1 {v0.b}[2], [%1], %5 \n"
MEMACCESS(1)
"vld1.8 {d0[3]}, [%1], %2 \n"
"ld1 {v0.b}[3], [%1], %5 \n"
MEMACCESS(1)
"vld1.8 {d0[4]}, [%1], %2 \n"
"ld1 {v0.b}[4], [%1], %5 \n"
MEMACCESS(1)
"vld1.8 {d0[5]}, [%1], %2 \n"
"ld1 {v0.b}[5], [%1], %5 \n"
MEMACCESS(1)
"vld1.8 {d0[6]}, [%1], %2 \n"
"ld1 {v0.b}[6], [%1], %5 \n"
MEMACCESS(1)
"vld1.8 {d0[7]}, [%1] \n"
"ld1 {v0.b}[7], [%1] \n"
MEMACCESS(3)
"vst1.64 {d0}, [%3] \n"
MEMACCESS(2)
"st1 {v0.8b}, [%2] \n"
"4: \n"
"4: \n"
: "+r"(src_temp), // %0
"+r"(src), // %1
"+r"(src_stride), // %2
"+r"(dst), // %3
"+r"(dst_stride), // %4
"+r"(width) // %5
: "r"(&kVTbl4x4Transpose) // %6
: "memory", "cc", "q0", "q1", "q2", "q3"
: "+r"(src_temp), // %0
"+r"(src), // %1
"+r"(dst), // %2
"+r"(width64) // %3
: "r"(&kVTbl4x4Transpose), // %4
"r"(static_cast<ptrdiff_t>(src_stride)), // %5
"r"(static_cast<ptrdiff_t>(dst_stride)) // %6
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
"v17", "v18", "v19", "v20", "v21", "v22", "v23"
);
}
#endif //HAS_TRANSPOSE_WX8_NEON
//this ifdef should be removed if TransposeUVWx8_NEON's aarch64 has
//been done
#ifdef HAS_TRANSPOSE_UVWX8_NEON
static uvec8 kVTbl4x4TransposeDi =
{ 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 };
static uint8 kVTbl4x4TransposeDi[32] =
{ 0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54,
1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55};
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width) {
const uint8* src_temp = NULL;
int64 width64 = (int64) width; // Work around clang 3.4 warning.
asm volatile (
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
// at w-8 allow for this
"sub %7, #8 \n"
"sub %4, %4, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
".p2align 2 \n"
"1: \n"
"mov %0, %1 \n"
MEMACCESS(0)
"vld2.8 {d0, d1}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d2, d3}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d4, d5}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d6, d7}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d16, d17}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d18, d19}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d20, d21}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d22, d23}, [%0] \n"
"vtrn.8 q1, q0 \n"
"vtrn.8 q3, q2 \n"
"vtrn.8 q9, q8 \n"
"vtrn.8 q11, q10 \n"
"vtrn.16 q1, q3 \n"
"vtrn.16 q0, q2 \n"
"vtrn.16 q9, q11 \n"
"vtrn.16 q8, q10 \n"
"vtrn.32 q1, q9 \n"
"vtrn.32 q0, q8 \n"
"vtrn.32 q3, q11 \n"
"vtrn.32 q2, q10 \n"
"vrev16.8 q0, q0 \n"
"vrev16.8 q1, q1 \n"
"vrev16.8 q2, q2 \n"
"vrev16.8 q3, q3 \n"
"vrev16.8 q8, q8 \n"
"vrev16.8 q9, q9 \n"
"vrev16.8 q10, q10 \n"
"vrev16.8 q11, q11 \n"
"mov %0, %3 \n"
"mov %0, %1 \n"
MEMACCESS(0)
"vst1.8 {d2}, [%0], %4 \n"
"ld1 {v0.16b}, [%0], %5 \n"
MEMACCESS(0)
"vst1.8 {d0}, [%0], %4 \n"
"ld1 {v1.16b}, [%0], %5 \n"
MEMACCESS(0)
"vst1.8 {d6}, [%0], %4 \n"
"ld1 {v2.16b}, [%0], %5 \n"
MEMACCESS(0)
"vst1.8 {d4}, [%0], %4 \n"
"ld1 {v3.16b}, [%0], %5 \n"
MEMACCESS(0)
"vst1.8 {d18}, [%0], %4 \n"
"ld1 {v4.16b}, [%0], %5 \n"
MEMACCESS(0)
"vst1.8 {d16}, [%0], %4 \n"
"ld1 {v5.16b}, [%0], %5 \n"
MEMACCESS(0)
"vst1.8 {d22}, [%0], %4 \n"
"ld1 {v6.16b}, [%0], %5 \n"
MEMACCESS(0)
"vst1.8 {d20}, [%0] \n"
"ld1 {v7.16b}, [%0] \n"
"mov %0, %5 \n"
"trn1 v16.16b, v0.16b, v1.16b \n"
"trn2 v17.16b, v0.16b, v1.16b \n"
"trn1 v18.16b, v2.16b, v3.16b \n"
"trn2 v19.16b, v2.16b, v3.16b \n"
"trn1 v20.16b, v4.16b, v5.16b \n"
"trn2 v21.16b, v4.16b, v5.16b \n"
"trn1 v22.16b, v6.16b, v7.16b \n"
"trn2 v23.16b, v6.16b, v7.16b \n"
"trn1 v0.8h, v16.8h, v18.8h \n"
"trn2 v1.8h, v16.8h, v18.8h \n"
"trn1 v2.8h, v20.8h, v22.8h \n"
"trn2 v3.8h, v20.8h, v22.8h \n"
"trn1 v4.8h, v17.8h, v19.8h \n"
"trn2 v5.8h, v17.8h, v19.8h \n"
"trn1 v6.8h, v21.8h, v23.8h \n"
"trn2 v7.8h, v21.8h, v23.8h \n"
"trn1 v16.4s, v0.4s, v2.4s \n"
"trn2 v17.4s, v0.4s, v2.4s \n"
"trn1 v18.4s, v1.4s, v3.4s \n"
"trn2 v19.4s, v1.4s, v3.4s \n"
"trn1 v20.4s, v4.4s, v6.4s \n"
"trn2 v21.4s, v4.4s, v6.4s \n"
"trn1 v22.4s, v5.4s, v7.4s \n"
"trn2 v23.4s, v5.4s, v7.4s \n"
"mov %0, %2 \n"
MEMACCESS(0)
"vst1.8 {d3}, [%0], %6 \n"
"st1 {v16.d}[0], [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d1}, [%0], %6 \n"
"st1 {v18.d}[0], [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d7}, [%0], %6 \n"
"st1 {v17.d}[0], [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d5}, [%0], %6 \n"
"st1 {v19.d}[0], [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d19}, [%0], %6 \n"
"st1 {v16.d}[1], [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d17}, [%0], %6 \n"
"st1 {v18.d}[1], [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d23}, [%0], %6 \n"
"st1 {v17.d}[1], [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d21}, [%0] \n"
"st1 {v19.d}[1], [%0] \n"
"add %1, #8*2 \n" // src += 8*2
"add %3, %3, %4, lsl #3 \n" // dst_a += 8 * dst_stride_a
"add %5, %5, %6, lsl #3 \n" // dst_b += 8 * dst_stride_b
"subs %7, #8 \n" // w -= 8
"bge 1b \n"
"mov %0, %3 \n"
MEMACCESS(0)
"st1 {v20.d}[0], [%0], %7 \n"
MEMACCESS(0)
"st1 {v22.d}[0], [%0], %7 \n"
MEMACCESS(0)
"st1 {v21.d}[0], [%0], %7 \n"
MEMACCESS(0)
"st1 {v23.d}[0], [%0], %7 \n"
MEMACCESS(0)
"st1 {v20.d}[1], [%0], %7 \n"
MEMACCESS(0)
"st1 {v22.d}[1], [%0], %7 \n"
MEMACCESS(0)
"st1 {v21.d}[1], [%0], %7 \n"
MEMACCESS(0)
"st1 {v23.d}[1], [%0] \n"
"add %1, %1, #16 \n" // src += 8*2
"add %2, %2, %6, lsl #3 \n" // dst_a += 8 * dst_stride_a
"add %3, %3, %7, lsl #3 \n" // dst_b += 8 * dst_stride_b
"subs %4, %4, #8 \n" // w -= 8
"b.ge 1b \n"
// add 8 back to counter. if the result is 0 there are
// no residuals.
"adds %7, #8 \n"
"beq 4f \n"
"adds %4, %4, #8 \n"
"b.eq 4f \n"
// some residual, so between 1 and 7 lines left to transpose
"cmp %7, #2 \n"
"blt 3f \n"
"cmp %4, #2 \n"
"b.lt 3f \n"
"cmp %7, #4 \n"
"blt 2f \n"
"cmp %4, #4 \n"
"b.lt 2f \n"
// TODO(frkoenig): Clean this up
// 4x8 block
"mov %0, %1 \n"
"mov %0, %1 \n"
MEMACCESS(0)
"vld1.64 {d0}, [%0], %2 \n"
"ld1 {v0.8b}, [%0], %5 \n"
MEMACCESS(0)
"vld1.64 {d1}, [%0], %2 \n"
"ld1 {v1.8b}, [%0], %5 \n"
MEMACCESS(0)
"vld1.64 {d2}, [%0], %2 \n"
"ld1 {v2.8b}, [%0], %5 \n"
MEMACCESS(0)
"vld1.64 {d3}, [%0], %2 \n"
"ld1 {v3.8b}, [%0], %5 \n"
MEMACCESS(0)
"vld1.64 {d4}, [%0], %2 \n"
"ld1 {v4.8b}, [%0], %5 \n"
MEMACCESS(0)
"vld1.64 {d5}, [%0], %2 \n"
"ld1 {v5.8b}, [%0], %5 \n"
MEMACCESS(0)
"vld1.64 {d6}, [%0], %2 \n"
"ld1 {v6.8b}, [%0], %5 \n"
MEMACCESS(0)
"vld1.64 {d7}, [%0] \n"
"ld1 {v7.8b}, [%0] \n"
MEMACCESS(8)
"vld1.8 {q15}, [%8] \n"
"ld1 {v30.16b}, [%8], #16 \n"
"ld1 {v31.16b}, [%8] \n"
"vtrn.8 q0, q1 \n"
"vtrn.8 q2, q3 \n"
"tbl v16.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v30.16b \n"
"tbl v17.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v31.16b \n"
"tbl v18.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v30.16b \n"
"tbl v19.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v31.16b \n"
"vtbl.8 d16, {d0, d1}, d30 \n"
"vtbl.8 d17, {d0, d1}, d31 \n"
"vtbl.8 d18, {d2, d3}, d30 \n"
"vtbl.8 d19, {d2, d3}, d31 \n"
"vtbl.8 d20, {d4, d5}, d30 \n"
"vtbl.8 d21, {d4, d5}, d31 \n"
"vtbl.8 d22, {d6, d7}, d30 \n"
"vtbl.8 d23, {d6, d7}, d31 \n"
"mov %0, %3 \n"
"mov %0, %2 \n"
MEMACCESS(0)
"vst1.32 {d16[0]}, [%0], %4 \n"
"st1 {v16.s}[0], [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d16[1]}, [%0], %4 \n"
"st1 {v16.s}[1], [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d17[0]}, [%0], %4 \n"
"st1 {v16.s}[2], [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d17[1]}, [%0], %4 \n"
"st1 {v16.s}[3], [%0], %6 \n"
"add %0, %3, #4 \n"
"add %0, %2, #4 \n"
MEMACCESS(0)
"vst1.32 {d20[0]}, [%0], %4 \n"
"st1 {v18.s}[0], [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d20[1]}, [%0], %4 \n"
"st1 {v18.s}[1], [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d21[0]}, [%0], %4 \n"
"st1 {v18.s}[2], [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d21[1]}, [%0] \n"
"st1 {v18.s}[3], [%0] \n"
"mov %0, %5 \n"
"mov %0, %3 \n"
MEMACCESS(0)
"vst1.32 {d18[0]}, [%0], %6 \n"
"st1 {v17.s}[0], [%0], %7 \n"
MEMACCESS(0)
"vst1.32 {d18[1]}, [%0], %6 \n"
"st1 {v17.s}[1], [%0], %7 \n"
MEMACCESS(0)
"vst1.32 {d19[0]}, [%0], %6 \n"
"st1 {v17.s}[2], [%0], %7 \n"
MEMACCESS(0)
"vst1.32 {d19[1]}, [%0], %6 \n"
"st1 {v17.s}[3], [%0], %7 \n"
"add %0, %5, #4 \n"
"add %0, %3, #4 \n"
MEMACCESS(0)
"vst1.32 {d22[0]}, [%0], %6 \n"
"st1 {v19.s}[0], [%0], %7 \n"
MEMACCESS(0)
"vst1.32 {d22[1]}, [%0], %6 \n"
"st1 {v19.s}[1], [%0], %7 \n"
MEMACCESS(0)
"vst1.32 {d23[0]}, [%0], %6 \n"
"st1 {v19.s}[2], [%0], %7 \n"
MEMACCESS(0)
"vst1.32 {d23[1]}, [%0] \n"
"st1 {v19.s}[3], [%0] \n"
"add %1, #4*2 \n" // src += 4 * 2
"add %3, %3, %4, lsl #2 \n" // dst_a += 4 * dst_stride_a
"add %5, %5, %6, lsl #2 \n" // dst_b += 4 * dst_stride_b
"subs %7, #4 \n" // w -= 4
"beq 4f \n"
"add %1, %1, #8 \n" // src += 4 * 2
"add %2, %2, %6, lsl #2 \n" // dst_a += 4 * dst_stride_a
"add %3, %3, %7, lsl #2 \n" // dst_b += 4 * dst_stride_b
"subs %4, %4, #4 \n" // w -= 4
"b.eq 4f \n"
// some residual, check to see if it includes a 2x8 block,
// or less
"cmp %7, #2 \n"
"blt 3f \n"
"cmp %4, #2 \n"
"b.lt 3f \n"
// 2x8 block
"2: \n"
"mov %0, %1 \n"
"mov %0, %1 \n"
MEMACCESS(0)
"vld2.16 {d0[0], d2[0]}, [%0], %2 \n"
"ld2 {v0.h, v1.h}[0], [%0], %5 \n"
MEMACCESS(0)
"vld2.16 {d1[0], d3[0]}, [%0], %2 \n"
"ld2 {v2.h, v3.h}[0], [%0], %5 \n"
MEMACCESS(0)
"vld2.16 {d0[1], d2[1]}, [%0], %2 \n"
"ld2 {v0.h, v1.h}[1], [%0], %5 \n"
MEMACCESS(0)
"vld2.16 {d1[1], d3[1]}, [%0], %2 \n"
"ld2 {v2.h, v3.h}[1], [%0], %5 \n"
MEMACCESS(0)
"vld2.16 {d0[2], d2[2]}, [%0], %2 \n"
"ld2 {v0.h, v1.h}[2], [%0], %5 \n"
MEMACCESS(0)
"vld2.16 {d1[2], d3[2]}, [%0], %2 \n"
"ld2 {v2.h, v3.h}[2], [%0], %5 \n"
MEMACCESS(0)
"vld2.16 {d0[3], d2[3]}, [%0], %2 \n"
"ld2 {v0.h, v1.h}[3], [%0], %5 \n"
MEMACCESS(0)
"vld2.16 {d1[3], d3[3]}, [%0] \n"
"ld2 {v2.h, v3.h}[3], [%0] \n"
"vtrn.8 d0, d1 \n"
"vtrn.8 d2, d3 \n"
"trn1 v4.8b, v0.8b, v2.8b \n"
"trn2 v5.8b, v0.8b, v2.8b \n"
"trn1 v6.8b, v1.8b, v3.8b \n"
"trn2 v7.8b, v1.8b, v3.8b \n"
"mov %0, %3 \n"
"mov %0, %2 \n"
MEMACCESS(0)
"vst1.64 {d0}, [%0], %4 \n"
"st1 {v4.d}[0], [%0], %6 \n"
MEMACCESS(0)
"vst1.64 {d2}, [%0] \n"
"st1 {v6.d}[0], [%0] \n"
"mov %0, %5 \n"
"mov %0, %3 \n"
MEMACCESS(0)
"vst1.64 {d1}, [%0], %6 \n"
"st1 {v5.d}[0], [%0], %7 \n"
MEMACCESS(0)
"vst1.64 {d3}, [%0] \n"
"st1 {v7.d}[0], [%0] \n"
"add %1, #2*2 \n" // src += 2 * 2
"add %3, %3, %4, lsl #1 \n" // dst_a += 2 * dst_stride_a
"add %5, %5, %6, lsl #1 \n" // dst_b += 2 * dst_stride_b
"subs %7, #2 \n" // w -= 2
"beq 4f \n"
"add %1, %1, #4 \n" // src += 2 * 2
"add %2, %2, %6, lsl #1 \n" // dst_a += 2 * dst_stride_a
"add %3, %3, %7, lsl #1 \n" // dst_b += 2 * dst_stride_b
"subs %4, %4, #2 \n" // w -= 2
"b.eq 4f \n"
// 1x8 block
"3: \n"
MEMACCESS(1)
"vld2.8 {d0[0], d1[0]}, [%1], %2 \n"
"ld2 {v0.b, v1.b}[0], [%1], %5 \n"
MEMACCESS(1)
"vld2.8 {d0[1], d1[1]}, [%1], %2 \n"
"ld2 {v0.b, v1.b}[1], [%1], %5 \n"
MEMACCESS(1)
"vld2.8 {d0[2], d1[2]}, [%1], %2 \n"
"ld2 {v0.b, v1.b}[2], [%1], %5 \n"
MEMACCESS(1)
"vld2.8 {d0[3], d1[3]}, [%1], %2 \n"
"ld2 {v0.b, v1.b}[3], [%1], %5 \n"
MEMACCESS(1)
"vld2.8 {d0[4], d1[4]}, [%1], %2 \n"
"ld2 {v0.b, v1.b}[4], [%1], %5 \n"
MEMACCESS(1)
"vld2.8 {d0[5], d1[5]}, [%1], %2 \n"
"ld2 {v0.b, v1.b}[5], [%1], %5 \n"
MEMACCESS(1)
"vld2.8 {d0[6], d1[6]}, [%1], %2 \n"
"ld2 {v0.b, v1.b}[6], [%1], %5 \n"
MEMACCESS(1)
"vld2.8 {d0[7], d1[7]}, [%1] \n"
"ld2 {v0.b, v1.b}[7], [%1] \n"
MEMACCESS(2)
"st1 {v0.d}[0], [%2] \n"
MEMACCESS(3)
"vst1.64 {d0}, [%3] \n"
MEMACCESS(5)
"vst1.64 {d1}, [%5] \n"
"st1 {v1.d}[0], [%3] \n"
"4: \n"
: "+r"(src_temp), // %0
"+r"(src), // %1
"+r"(src_stride), // %2
"+r"(dst_a), // %3
"+r"(dst_stride_a), // %4
"+r"(dst_b), // %5
"+r"(dst_stride_b), // %6
"+r"(width) // %7
: "r"(&kVTbl4x4TransposeDi) // %8
: "+r"(src_temp), // %0
"+r"(src), // %1
"+r"(dst_a), // %2
"+r"(dst_b), // %3
"+r"(width64) // %4
: "r"(static_cast<ptrdiff_t>(src_stride)), // %5
"r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6
"r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7
"r"(&kVTbl4x4TransposeDi) // %8
: "memory", "cc",
"q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
"v30", "v31"
);
}
#endif // HAS_TRANSPOSE_UVWX8_NEON
#endif // __aarch64__
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef __cplusplus
} // extern "C"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -389,7 +389,6 @@ void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"blez $t4, 2f \n"
" andi %[width], %[width], 0xf \n" // residual
".p2align 2 \n"
"1: \n"
"addiu $t4, $t4, -1 \n"
"lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0
@ -447,89 +446,6 @@ void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
);
}
void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
uint8* dst_v, int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"srl $t4, %[width], 4 \n" // multiplies of 16
"blez $t4, 2f \n"
" andi %[width], %[width], 0xf \n" // residual
".p2align 2 \n"
"1: \n"
"addiu $t4, $t4, -1 \n"
"lwr $t0, 0(%[src_uv]) \n"
"lwl $t0, 3(%[src_uv]) \n" // V1 | U1 | V0 | U0
"lwr $t1, 4(%[src_uv]) \n"
"lwl $t1, 7(%[src_uv]) \n" // V3 | U3 | V2 | U2
"lwr $t2, 8(%[src_uv]) \n"
"lwl $t2, 11(%[src_uv]) \n" // V5 | U5 | V4 | U4
"lwr $t3, 12(%[src_uv]) \n"
"lwl $t3, 15(%[src_uv]) \n" // V7 | U7 | V6 | U6
"lwr $t5, 16(%[src_uv]) \n"
"lwl $t5, 19(%[src_uv]) \n" // V9 | U9 | V8 | U8
"lwr $t6, 20(%[src_uv]) \n"
"lwl $t6, 23(%[src_uv]) \n" // V11 | U11 | V10 | U10
"lwr $t7, 24(%[src_uv]) \n"
"lwl $t7, 27(%[src_uv]) \n" // V13 | U13 | V12 | U12
"lwr $t8, 28(%[src_uv]) \n"
"lwl $t8, 31(%[src_uv]) \n" // V15 | U15 | V14 | U14
"precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
"precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
"precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
"precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
"precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
"precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
"precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12
"precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12
"addiu %[src_uv], %[src_uv], 32 \n"
"swr $t9, 0(%[dst_v]) \n"
"swl $t9, 3(%[dst_v]) \n"
"swr $t0, 0(%[dst_u]) \n"
"swl $t0, 3(%[dst_u]) \n"
"swr $t1, 4(%[dst_v]) \n"
"swl $t1, 7(%[dst_v]) \n"
"swr $t2, 4(%[dst_u]) \n"
"swl $t2, 7(%[dst_u]) \n"
"swr $t3, 8(%[dst_v]) \n"
"swl $t3, 11(%[dst_v]) \n"
"swr $t5, 8(%[dst_u]) \n"
"swl $t5, 11(%[dst_u]) \n"
"swr $t6, 12(%[dst_v]) \n"
"swl $t6, 15(%[dst_v]) \n"
"swr $t7, 12(%[dst_u]) \n"
"swl $t7, 15(%[dst_u]) \n"
"addiu %[dst_u], %[dst_u], 16 \n"
"bgtz $t4, 1b \n"
" addiu %[dst_v], %[dst_v], 16 \n"
"beqz %[width], 3f \n"
" nop \n"
"2: \n"
"lbu $t0, 0(%[src_uv]) \n"
"lbu $t1, 1(%[src_uv]) \n"
"addiu %[src_uv], %[src_uv], 2 \n"
"addiu %[width], %[width], -1 \n"
"sb $t0, 0(%[dst_u]) \n"
"sb $t1, 0(%[dst_v]) \n"
"addiu %[dst_u], %[dst_u], 1 \n"
"bgtz %[width], 2b \n"
" addiu %[dst_v], %[dst_v], 1 \n"
"3: \n"
".set pop \n"
: [src_uv] "+r" (src_uv),
[width] "+r" (width),
[dst_u] "+r" (dst_u),
[dst_v] "+r" (dst_v)
:
: "t0", "t1", "t2", "t3",
"t4", "t5", "t6", "t7", "t8", "t9"
);
}
void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
__asm__ __volatile__ (
".set push \n"
@ -540,7 +456,6 @@ void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
"blez $t4, 2f \n"
" addu %[src], %[src], %[width] \n" // src += width
".p2align 2 \n"
"1: \n"
"lw $t0, -16(%[src]) \n" // |3|2|1|0|
"lw $t1, -12(%[src]) \n" // |7|6|5|4|
@ -595,7 +510,6 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"blez %[x], 2f \n"
" addu %[src_uv], %[src_uv], $t4 \n"
".p2align 2 \n"
"1: \n"
"lw $t0, -32(%[src_uv]) \n" // |3|2|1|0|
"lw $t1, -28(%[src_uv]) \n" // |7|6|5|4|
@ -679,7 +593,7 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
// t8 = | 0 | G1 | 0 | g1 |
// t2 = | 0 | R0 | 0 | r0 |
// t1 = | 0 | R1 | 0 | r1 |
#define I422ToTransientMipsRGB \
#define YUVTORGB \
"lw $t0, 0(%[y_buf]) \n" \
"lhu $t1, 0(%[u_buf]) \n" \
"lhu $t2, 0(%[v_buf]) \n" \
@ -738,10 +652,12 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"addu.ph $t2, $t2, $s5 \n" \
"addu.ph $t1, $t1, $s5 \n"
// TODO(fbarchard): accept yuv conversion constants.
void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
__asm__ __volatile__ (
".set push \n"
@ -756,9 +672,8 @@ void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
"lui $s6, 0xff00 \n"
"ori $s6, 0xff00 \n" // |ff|00|ff|00|ff|
".p2align 2 \n"
"1: \n"
I422ToTransientMipsRGB
YUVTORGB
// Arranging into argb format
"precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1|
"precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0|
@ -800,136 +715,10 @@ void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
);
}
void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"beqz %[width], 2f \n"
" repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
"repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
"repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
"repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
"repl.ph $s4, 16 \n" // |0|16|0|16|
"repl.ph $s5, 128 \n" // |128|128|
"lui $s6, 0xff00 \n"
"ori $s6, 0xff00 \n" // |ff|00|ff|00|
".p2align 2 \n"
"1: \n"
I422ToTransientMipsRGB
// Arranging into abgr format
"precr.qb.ph $t0, $t8, $t1 \n" // |G1|g1|R1|r1|
"precr.qb.ph $t3, $t9, $t2 \n" // |G0|g0|R0|r0|
"precrq.qb.ph $t8, $t0, $t3 \n" // |G1|R1|G0|R0|
"precr.qb.ph $t9, $t0, $t3 \n" // |g1|r1|g0|r0|
"precr.qb.ph $t2, $t4, $t5 \n" // |B1|b1|B0|b0|
"addiu %[width], -4 \n"
"addiu %[y_buf], 4 \n"
"preceu.ph.qbla $t1, $t2 \n" // |0 |B1|0 |B0|
"preceu.ph.qbra $t2, $t2 \n" // |0 |b1|0 |b0|
"or $t1, $t1, $s6 \n" // |ff|B1|ff|B0|
"or $t2, $t2, $s6 \n" // |ff|b1|ff|b0|
"precrq.ph.w $t0, $t2, $t9 \n" // |ff|b1|g1|r1|
"precrq.ph.w $t3, $t1, $t8 \n" // |ff|B1|G1|R1|
"sll $t9, $t9, 16 \n"
"sll $t8, $t8, 16 \n"
"packrl.ph $t2, $t2, $t9 \n" // |ff|b0|g0|r0|
"packrl.ph $t1, $t1, $t8 \n" // |ff|B0|G0|R0|
// Store results.
"sw $t2, 0(%[rgb_buf]) \n"
"sw $t0, 4(%[rgb_buf]) \n"
"sw $t1, 8(%[rgb_buf]) \n"
"sw $t3, 12(%[rgb_buf]) \n"
"bnez %[width], 1b \n"
" addiu %[rgb_buf], 16 \n"
"2: \n"
".set pop \n"
:[y_buf] "+r" (y_buf),
[u_buf] "+r" (u_buf),
[v_buf] "+r" (v_buf),
[width] "+r" (width),
[rgb_buf] "+r" (rgb_buf)
:
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9",
"s0", "s1", "s2", "s3",
"s4", "s5", "s6"
);
}
void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"beqz %[width], 2f \n"
" repl.ph $s0, 74 \n" // |YG|YG| = |74 |74 |
"repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
"repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
"repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
"repl.ph $s4, 16 \n" // |0|16|0|16|
"repl.ph $s5, 128 \n" // |128|128|
"lui $s6, 0xff \n"
"ori $s6, 0xff \n" // |00|ff|00|ff|
".p2align 2 \n"
"1: \n"
I422ToTransientMipsRGB
// Arranging into bgra format
"precr.qb.ph $t4, $t4, $t8 \n" // |B1|b1|G1|g1|
"precr.qb.ph $t5, $t5, $t9 \n" // |B0|b0|G0|g0|
"precrq.qb.ph $t8, $t4, $t5 \n" // |B1|G1|B0|G0|
"precr.qb.ph $t9, $t4, $t5 \n" // |b1|g1|b0|g0|
"precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
"addiu %[width], -4 \n"
"addiu %[y_buf], 4 \n"
"preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
"preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
"sll $t1, $t1, 8 \n" // |R1|0 |R0|0 |
"sll $t2, $t2, 8 \n" // |r1|0 |r0|0 |
"or $t1, $t1, $s6 \n" // |R1|ff|R0|ff|
"or $t2, $t2, $s6 \n" // |r1|ff|r0|ff|
"precrq.ph.w $t0, $t9, $t2 \n" // |b1|g1|r1|ff|
"precrq.ph.w $t3, $t8, $t1 \n" // |B1|G1|R1|ff|
"sll $t1, $t1, 16 \n"
"sll $t2, $t2, 16 \n"
"packrl.ph $t2, $t9, $t2 \n" // |b0|g0|r0|ff|
"packrl.ph $t1, $t8, $t1 \n" // |B0|G0|R0|ff|
// Store results.
"sw $t2, 0(%[rgb_buf]) \n"
"sw $t0, 4(%[rgb_buf]) \n"
"sw $t1, 8(%[rgb_buf]) \n"
"sw $t3, 12(%[rgb_buf]) \n"
"bnez %[width], 1b \n"
" addiu %[rgb_buf], 16 \n"
"2: \n"
".set pop \n"
:[y_buf] "+r" (y_buf),
[u_buf] "+r" (u_buf),
[v_buf] "+r" (v_buf),
[width] "+r" (width),
[rgb_buf] "+r" (rgb_buf)
:
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9",
"s0", "s1", "s2", "s3",
"s4", "s5", "s6"
);
}
// Bilinear filter 8x2 -> 8x1
void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) {
void InterpolateRow_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) {
int y0_fraction = 256 - source_y_fraction;
const uint8* src_ptr1 = src_ptr + src_stride;
@ -940,7 +729,6 @@ void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
"replv.ph $t0, %[y0_fraction] \n"
"replv.ph $t1, %[source_y_fraction] \n"
".p2align 2 \n"
"1: \n"
"lw $t2, 0(%[src_ptr]) \n"
"lw $t3, 0(%[src_ptr1]) \n"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,146 +0,0 @@
;
; Copyright 2012 The LibYuv Project Authors. All rights reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%ifdef __YASM_VERSION_ID__
%if __YASM_VERSION_ID__ < 01020000h
%error AVX2 is supported only by yasm 1.2.0 or later.
%endif
%endif
%include "x86inc.asm"
SECTION .text
; cglobal numeric constants are parameters, gpr regs, mm regs
; void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix)
%macro YUY2TOYROW 2-3
cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix
%ifidn %1,YUY2
pcmpeqb m2, m2, m2 ; generate mask 0x00ff00ff
psrlw m2, m2, 8
%endif
ALIGN 4
.convertloop:
mov%2 m0, [src_yuy2q]
mov%2 m1, [src_yuy2q + mmsize]
lea src_yuy2q, [src_yuy2q + mmsize * 2]
%ifidn %1,YUY2
pand m0, m0, m2 ; YUY2 even bytes are Y
pand m1, m1, m2
%else
psrlw m0, m0, 8 ; UYVY odd bytes are Y
psrlw m1, m1, 8
%endif
packuswb m0, m0, m1
%if cpuflag(AVX2)
vpermq m0, m0, 0xd8
%endif
sub pixd, mmsize
mov%2 [dst_yq], m0
lea dst_yq, [dst_yq + mmsize]
jg .convertloop
REP_RET
%endmacro
; TODO(fbarchard): Remove MMX. Add SSSE3 pshufb version.
INIT_MMX MMX
YUY2TOYROW YUY2,a,
YUY2TOYROW YUY2,u,_Unaligned
YUY2TOYROW UYVY,a,
YUY2TOYROW UYVY,u,_Unaligned
INIT_XMM SSE2
YUY2TOYROW YUY2,a,
YUY2TOYROW YUY2,u,_Unaligned
YUY2TOYROW UYVY,a,
YUY2TOYROW UYVY,u,_Unaligned
INIT_YMM AVX2
YUY2TOYROW YUY2,a,
YUY2TOYROW UYVY,a,
; void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix)
%macro SplitUVRow 1-2
cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix
pcmpeqb m4, m4, m4 ; generate mask 0x00ff00ff
psrlw m4, m4, 8
sub dst_vq, dst_uq
ALIGN 4
.convertloop:
mov%1 m0, [src_uvq]
mov%1 m1, [src_uvq + mmsize]
lea src_uvq, [src_uvq + mmsize * 2]
psrlw m2, m0, 8 ; odd bytes
psrlw m3, m1, 8
pand m0, m0, m4 ; even bytes
pand m1, m1, m4
packuswb m0, m0, m1
packuswb m2, m2, m3
%if cpuflag(AVX2)
vpermq m0, m0, 0xd8
vpermq m2, m2, 0xd8
%endif
mov%1 [dst_uq], m0
mov%1 [dst_uq + dst_vq], m2
lea dst_uq, [dst_uq + mmsize]
sub pixd, mmsize
jg .convertloop
REP_RET
%endmacro
INIT_MMX MMX
SplitUVRow a,
SplitUVRow u,_Unaligned
INIT_XMM SSE2
SplitUVRow a,
SplitUVRow u,_Unaligned
INIT_YMM AVX2
SplitUVRow a,
; void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
; int width);
%macro MergeUVRow_ 1-2
cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix
sub src_vq, src_uq
ALIGN 4
.convertloop:
mov%1 m0, [src_uq]
mov%1 m1, [src_vq]
lea src_uq, [src_uq + mmsize]
punpcklbw m2, m0, m1 // first 8 UV pairs
punpckhbw m0, m0, m1 // next 8 UV pairs
%if cpuflag(AVX2)
vperm2i128 m1, m2, m0, 0x20 // low 128 of ymm2 and low 128 of ymm0
vperm2i128 m2, m2, m0, 0x31 // high 128 of ymm2 and high 128 of ymm0
mov%1 [dst_uvq], m1
mov%1 [dst_uvq + mmsize], m2
%else
mov%1 [dst_uvq], m2
mov%1 [dst_uvq + mmsize], m0
%endif
lea dst_uvq, [dst_uvq + mmsize * 2]
sub pixd, mmsize
jg .convertloop
REP_RET
%endmacro
INIT_MMX MMX
MergeUVRow_ a,
MergeUVRow_ u,_Unaligned
INIT_XMM SSE2
MergeUVRow_ a,
MergeUVRow_ u,_Unaligned
INIT_YMM AVX2
MergeUVRow_ a,

View File

@ -23,9 +23,6 @@ namespace libyuv {
extern "C" {
#endif
// Remove this macro if OVERREAD is safe.
#define AVOID_OVERREAD 1
static __inline int Abs(int v) {
return v >= 0 ? v : -v;
}
@ -44,9 +41,8 @@ static void ScalePlaneDown2(int src_width, int src_height,
int y;
void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) =
filtering == kFilterNone ? ScaleRowDown2_C :
(filtering == kFilterLinear ? ScaleRowDown2Linear_C :
ScaleRowDown2Box_C);
filtering == kFilterNone ? ScaleRowDown2_C :
(filtering == kFilterLinear ? ScaleRowDown2Linear_C : ScaleRowDown2Box_C);
int row_stride = src_stride << 1;
if (!filtering) {
src_ptr += src_stride; // Point to odd rows.
@ -54,23 +50,42 @@ static void ScalePlaneDown2(int src_width, int src_height,
}
#if defined(HAS_SCALEROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON;
}
#elif defined(HAS_SCALEROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 :
ScaleRowDown2Box_Unaligned_SSE2);
if (IS_ALIGNED(src_ptr, 16) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
ScaleRowDown2Box_SSE2);
if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_NEON :
(filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON :
ScaleRowDown2Box_Any_NEON);
if (IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON :
(filtering == kFilterLinear ? ScaleRowDown2Linear_NEON :
ScaleRowDown2Box_NEON);
}
}
#elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
#endif
#if defined(HAS_SCALEROWDOWN2_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_SSSE3 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3 :
ScaleRowDown2Box_Any_SSSE3);
if (IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSSE3 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3 :
ScaleRowDown2Box_SSSE3);
}
}
#endif
#if defined(HAS_SCALEROWDOWN2_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_AVX2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2 :
ScaleRowDown2Box_Any_AVX2);
if (IS_ALIGNED(dst_width, 32)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_AVX2 :
ScaleRowDown2Box_AVX2);
}
}
#endif
#if defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
@ -112,21 +127,15 @@ static void ScalePlaneDown2_16(int src_width, int src_height,
ScaleRowDown2 = filtering ? ScaleRowDown2Box_16_NEON :
ScaleRowDown2_16_NEON;
}
#elif defined(HAS_SCALEROWDOWN2_16_SSE2)
#endif
#if defined(HAS_SCALEROWDOWN2_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering == kFilterNone ?
ScaleRowDown2_Unaligned_16_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_16_SSE2 :
ScaleRowDown2Box_Unaligned_16_SSE2);
if (IS_ALIGNED(src_ptr, 16) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 :
ScaleRowDown2Box_16_SSE2);
}
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 :
ScaleRowDown2Box_16_SSE2);
}
#elif defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2)
#endif
#if defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
@ -165,16 +174,33 @@ static void ScalePlaneDown4(int src_width, int src_height,
src_stride = 0;
}
#if defined(HAS_SCALEROWDOWN4_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowDown4 = filtering ?
ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
}
}
#elif defined(HAS_SCALEROWDOWN4_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
#endif
#if defined(HAS_SCALEROWDOWN4_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowDown4 = filtering ?
ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3;
if (IS_ALIGNED(dst_width, 8)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3;
}
}
#elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
#endif
#if defined(HAS_SCALEROWDOWN4_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowDown4 = filtering ?
ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
if (IS_ALIGNED(dst_width, 16)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
}
}
#endif
#if defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
@ -212,14 +238,14 @@ static void ScalePlaneDown4_16(int src_width, int src_height,
ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_NEON :
ScaleRowDown4_16_NEON;
}
#elif defined(HAS_SCALEROWDOWN4_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
#endif
#if defined(HAS_SCALEROWDOWN4_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 :
ScaleRowDown4_16_SSE2;
}
#elif defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2)
#endif
#if defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
@ -260,25 +286,42 @@ static void ScalePlaneDown34(int src_width, int src_height,
ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
}
#if defined(HAS_SCALEROWDOWN34_NEON)
if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
if (TestCpuFlag(kCpuHasNEON)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_NEON;
ScaleRowDown34_1 = ScaleRowDown34_NEON;
ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
}
if (dst_width % 24 == 0) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_NEON;
ScaleRowDown34_1 = ScaleRowDown34_NEON;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
}
}
}
#endif
#if defined(HAS_SCALEROWDOWN34_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
if (TestCpuFlag(kCpuHasSSSE3)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
}
if (dst_width % 24 == 0) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
}
}
}
#endif
@ -351,8 +394,7 @@ static void ScalePlaneDown34_16(int src_width, int src_height,
}
#endif
#if defined(HAS_SCALEROWDOWN34_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
@ -435,28 +477,47 @@ static void ScalePlaneDown38(int src_width, int src_height,
ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
}
#if defined(HAS_SCALEROWDOWN38_NEON)
if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
if (TestCpuFlag(kCpuHasNEON)) {
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_NEON;
ScaleRowDown38_2 = ScaleRowDown38_NEON;
ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
}
if (dst_width % 12 == 0) {
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_NEON;
ScaleRowDown38_2 = ScaleRowDown38_NEON;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
}
}
}
#elif defined(HAS_SCALEROWDOWN38_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
#endif
#if defined(HAS_SCALEROWDOWN38_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
}
if (dst_width % 12 == 0 && !filtering) {
ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
} else {
}
if (dst_width % 6 == 0 && filtering) {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
}
}
#elif defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
#endif
#if defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
@ -522,9 +583,9 @@ static void ScalePlaneDown38_16(int src_width, int src_height,
ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
}
}
#elif defined(HAS_SCALEROWDOWN38_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
#endif
#if defined(HAS_SCALEROWDOWN38_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
@ -533,7 +594,8 @@ static void ScalePlaneDown38_16(int src_width, int src_height,
ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
}
}
#elif defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2)
#endif
#if defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
@ -570,65 +632,7 @@ static void ScalePlaneDown38_16(int src_width, int src_height,
}
}
static __inline uint32 SumBox(int iboxwidth, int iboxheight,
ptrdiff_t src_stride, const uint8* src_ptr) {
uint32 sum = 0u;
int y;
assert(iboxwidth > 0);
assert(iboxheight > 0);
for (y = 0; y < iboxheight; ++y) {
int x;
for (x = 0; x < iboxwidth; ++x) {
sum += src_ptr[x];
}
src_ptr += src_stride;
}
return sum;
}
static __inline uint32 SumBox_16(int iboxwidth, int iboxheight,
ptrdiff_t src_stride, const uint16* src_ptr) {
uint32 sum = 0u;
int y;
assert(iboxwidth > 0);
assert(iboxheight > 0);
for (y = 0; y < iboxheight; ++y) {
int x;
for (x = 0; x < iboxwidth; ++x) {
sum += src_ptr[x];
}
src_ptr += src_stride;
}
return sum;
}
static void ScalePlaneBoxRow_C(int dst_width, int boxheight,
int x, int dx, ptrdiff_t src_stride,
const uint8* src_ptr, uint8* dst_ptr) {
int i;
int boxwidth;
for (i = 0; i < dst_width; ++i) {
int ix = x >> 16;
x += dx;
boxwidth = (x >> 16) - ix;
*dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) /
(boxwidth * boxheight);
}
}
static void ScalePlaneBoxRow_16_C(int dst_width, int boxheight,
int x, int dx, ptrdiff_t src_stride,
const uint16* src_ptr, uint16* dst_ptr) {
int i;
int boxwidth;
for (i = 0; i < dst_width; ++i) {
int ix = x >> 16;
x += dx;
boxwidth = (x >> 16) - ix;
*dst_ptr++ = SumBox_16(boxwidth, boxheight, src_stride, src_ptr + ix) /
(boxwidth * boxheight);
}
}
#define MIN1(x) ((x) < 1 ? 1 : (x))
static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
uint32 sum = 0u;
@ -654,15 +658,15 @@ static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr) {
int i;
int scaletbl[2];
int minboxwidth = (dx >> 16);
int minboxwidth = dx >> 16;
int* scaleptr = scaletbl - minboxwidth;
int boxwidth;
scaletbl[0] = 65536 / (minboxwidth * boxheight);
scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
for (i = 0; i < dst_width; ++i) {
int ix = x >> 16;
x += dx;
boxwidth = (x >> 16) - ix;
boxwidth = MIN1((x >> 16) - ix);
*dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
}
}
@ -671,25 +675,36 @@ static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx,
const uint32* src_ptr, uint16* dst_ptr) {
int i;
int scaletbl[2];
int minboxwidth = (dx >> 16);
int minboxwidth = dx >> 16;
int* scaleptr = scaletbl - minboxwidth;
int boxwidth;
scaletbl[0] = 65536 / (minboxwidth * boxheight);
scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
for (i = 0; i < dst_width; ++i) {
int ix = x >> 16;
x += dx;
boxwidth = (x >> 16) - ix;
*dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
scaleptr[boxwidth] >> 16;
boxwidth = MIN1((x >> 16) - ix);
*dst_ptr++ =
SumPixels_16(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
}
}
static void ScaleAddCols0_C(int dst_width, int boxheight, int x, int,
const uint16* src_ptr, uint8* dst_ptr) {
int scaleval = 65536 / boxheight;
int i;
src_ptr += (x >> 16);
for (i = 0; i < dst_width; ++i) {
*dst_ptr++ = src_ptr[i] * scaleval >> 16;
}
}
static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr) {
int boxwidth = (dx >> 16);
int boxwidth = MIN1(dx >> 16);
int scaleval = 65536 / (boxwidth * boxheight);
int i;
x >>= 16;
for (i = 0; i < dst_width; ++i) {
*dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
x += boxwidth;
@ -698,7 +713,7 @@ static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx,
const uint32* src_ptr, uint16* dst_ptr) {
int boxwidth = (dx >> 16);
int boxwidth = MIN1(dx >> 16);
int scaleval = 65536 / (boxwidth * boxheight);
int i;
for (i = 0; i < dst_width; ++i) {
@ -718,7 +733,7 @@ static void ScalePlaneBox(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr) {
int j;
int j, k;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
@ -728,10 +743,40 @@ static void ScalePlaneBox(int src_width, int src_height,
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
// TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
uint8* dst = dst_ptr;
int j;
{
// Allocate a row buffer of uint16.
align_buffer_64(row16, src_width * 2);
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_C:
((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
void (*ScaleAddRow)(const uint8* src_ptr, uint16* dst_ptr, int src_width) =
ScaleAddRow_C;
#if defined(HAS_SCALEADDROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleAddRow = ScaleAddRow_Any_SSE2;
if (IS_ALIGNED(src_width, 16)) {
ScaleAddRow = ScaleAddRow_SSE2;
}
}
#endif
#if defined(HAS_SCALEADDROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleAddRow = ScaleAddRow_Any_AVX2;
if (IS_ALIGNED(src_width, 32)) {
ScaleAddRow = ScaleAddRow_AVX2;
}
}
#endif
#if defined(HAS_SCALEADDROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ScaleAddRow = ScaleAddRow_Any_NEON;
if (IS_ALIGNED(src_width, 16)) {
ScaleAddRow = ScaleAddRow_NEON;
}
}
#endif
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
@ -740,46 +785,13 @@ static void ScalePlaneBox(int src_width, int src_height,
if (y > max_y) {
y = max_y;
}
boxheight = (y >> 16) - iy;
ScalePlaneBoxRow_C(dst_width, boxheight,
x, dx, src_stride,
src, dst);
dst += dst_stride;
}
return;
}
{
// Allocate a row buffer of uint16.
align_buffer_64(row16, src_width * 2);
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C;
void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;
#if defined(HAS_SCALEADDROWS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
#ifdef AVOID_OVERREAD
IS_ALIGNED(src_width, 16) &&
#endif
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleAddRows = ScaleAddRows_SSE2;
}
#endif
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
const uint8* src = src_ptr + iy * src_stride;
y += dy;
if (y > (src_height << 16)) {
y = (src_height << 16);
boxheight = MIN1((y >> 16) - iy);
memset(row16, 0, src_width * 2);
for (k = 0; k < boxheight; ++k) {
ScaleAddRow(src, (uint16 *)(row16), src_width);
src += src_stride;
}
boxheight = (y >> 16) - iy;
ScaleAddRows(src, src_stride, (uint16*)(row16),
src_width, boxheight);
ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16),
dst_ptr);
ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), dst_ptr);
dst_ptr += dst_stride;
}
free_aligned_buffer_64(row16);
@ -790,7 +802,7 @@ static void ScalePlaneBox_16(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint16* src_ptr, uint16* dst_ptr) {
int j;
int j, k;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
@ -800,10 +812,21 @@ static void ScalePlaneBox_16(int src_width, int src_height,
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
// TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
uint16* dst = dst_ptr;
int j;
{
// Allocate a row buffer of uint32.
align_buffer_64(row32, src_width * 4);
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint32* src_ptr, uint16* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;
void (*ScaleAddRow)(const uint16* src_ptr, uint32* dst_ptr, int src_width) =
ScaleAddRow_16_C;
#if defined(HAS_SCALEADDROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
ScaleAddRow = ScaleAddRow_16_SSE2;
}
#endif
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
@ -812,46 +835,13 @@ static void ScalePlaneBox_16(int src_width, int src_height,
if (y > max_y) {
y = max_y;
}
boxheight = (y >> 16) - iy;
ScalePlaneBoxRow_16_C(dst_width, boxheight,
x, dx, src_stride,
src, dst);
dst += dst_stride;
}
return;
}
{
// Allocate a row buffer of uint32.
align_buffer_64(row32, src_width * 4);
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint32* src_ptr, uint16* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;
void (*ScaleAddRows)(const uint16* src_ptr, ptrdiff_t src_stride,
uint32* dst_ptr, int src_width, int src_height) = ScaleAddRows_16_C;
#if defined(HAS_SCALEADDROWS_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
#ifdef AVOID_OVERREAD
IS_ALIGNED(src_width, 16) &&
#endif
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleAddRows = ScaleAddRows_16_SSE2;
}
#endif
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
const uint16* src = src_ptr + iy * src_stride;
y += dy;
if (y > (src_height << 16)) {
y = (src_height << 16);
boxheight = MIN1((y >> 16) - iy);
memset(row32, 0, src_width * 4);
for (k = 0; k < boxheight; ++k) {
ScaleAddRow(src, (uint32 *)(row32), src_width);
src += src_stride;
}
boxheight = (y >> 16) - iy;
ScaleAddRows(src, src_stride, (uint32*)(row32),
src_width, boxheight);
ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32),
dst_ptr);
ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), dst_ptr);
dst_ptr += dst_stride;
}
free_aligned_buffer_64(row32);
@ -885,30 +875,16 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(src_width, 32)) {
InterpolateRow = InterpolateRow_AVX2;
@ -916,7 +892,7 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {
if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_NEON;
@ -924,7 +900,7 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {
if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
if (IS_ALIGNED(src_width, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2;
@ -937,6 +913,14 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_SSSE3;
}
#endif
#if defined(HAS_SCALEFILTERCOLS_NEON)
if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleFilterCols_NEON;
}
}
#endif
if (y > max_y) {
y = max_y;
@ -988,29 +972,23 @@ void ScalePlaneBilinearDown_16(int src_width, int src_height,
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
if (TestCpuFlag(kCpuHasSSE2)) {
InterpolateRow = InterpolateRow_Any_16_SSE2;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSE2;
}
InterpolateRow = InterpolateRow_16_SSE2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_16_SSSE3;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSSE3;
}
InterpolateRow = InterpolateRow_16_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_16_AVX2;
if (IS_ALIGNED(src_width, 32)) {
InterpolateRow = InterpolateRow_16_AVX2;
@ -1018,7 +996,7 @@ void ScalePlaneBilinearDown_16(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_16_NEON)
if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {
if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_16_NEON;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_16_NEON;
@ -1026,7 +1004,7 @@ void ScalePlaneBilinearDown_16(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {
if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
if (IS_ALIGNED(src_width, 4)) {
InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
@ -1080,36 +1058,22 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C;
void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) =
filtering ? ScaleFilterCols_C : ScaleCols_C;
int dst_width, int x, int dx) =
filtering ? ScaleFilterCols_C : ScaleCols_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width, 32)) {
InterpolateRow = InterpolateRow_AVX2;
@ -1117,7 +1081,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {
if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_NEON;
@ -1125,7 +1089,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {
if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2;
@ -1140,13 +1104,19 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_SSSE3;
}
#endif
#if defined(HAS_SCALEFILTERCOLS_NEON)
if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleFilterCols_NEON;
}
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleFilterCols = ScaleColsUp2_C;
#if defined(HAS_SCALECOLS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleColsUp2_SSE2;
}
#endif
@ -1160,7 +1130,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
const uint8* src = src_ptr + yi * src_stride;
// Allocate 2 row buffers.
const int kRowSize = (dst_width + 15) & ~15;
const int kRowSize = (dst_width + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
uint8* rowptr = row;
@ -1219,36 +1189,30 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_16_C;
void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
int dst_width, int x, int dx) =
filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
int dst_width, int x, int dx) =
filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
if (TestCpuFlag(kCpuHasSSE2)) {
InterpolateRow = InterpolateRow_Any_16_SSE2;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSE2;
}
InterpolateRow = InterpolateRow_16_SSE2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_16_SSSE3;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSSE3;
}
InterpolateRow = InterpolateRow_16_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_16_AVX2;
if (IS_ALIGNED(dst_width, 32)) {
InterpolateRow = InterpolateRow_16_AVX2;
@ -1256,7 +1220,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_16_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {
if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_16_NEON;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_16_NEON;
@ -1264,7 +1228,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {
if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
@ -1283,9 +1247,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleFilterCols = ScaleColsUp2_16_C;
#if defined(HAS_SCALECOLS_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleColsUp2_16_SSE2;
}
#endif
@ -1299,7 +1261,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
const uint16* src = src_ptr + yi * src_stride;
// Allocate 2 row buffers.
const int kRowSize = (dst_width + 15) & ~15;
const int kRowSize = (dst_width + 31) & ~31;
align_buffer_64(row, kRowSize * 4);
uint16* rowptr = (uint16*)row;
@ -1366,17 +1328,14 @@ static void ScalePlaneSimple(int src_width, int src_height,
if (src_width * 2 == dst_width && x < 0x8000) {
ScaleCols = ScaleColsUp2_C;
#if defined(HAS_SCALECOLS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleCols = ScaleColsUp2_SSE2;
}
#endif
}
for (i = 0; i < dst_height; ++i) {
ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
dst_width, x, dx);
ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
dst_ptr += dst_stride;
y += dy;
}
@ -1401,9 +1360,7 @@ static void ScalePlaneSimple_16(int src_width, int src_height,
if (src_width * 2 == dst_width && x < 0x8000) {
ScaleCols = ScaleColsUp2_16_C;
#if defined(HAS_SCALECOLS_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleCols = ScaleColsUp2_16_SSE2;
}
#endif
@ -1428,8 +1385,7 @@ void ScalePlane(const uint8* src, int src_stride,
enum FilterMode filtering) {
// Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height,
dst_width, dst_height,
filtering);
dst_width, dst_height, filtering);
// Negative height means invert the image.
if (src_height < 0) {
@ -1445,9 +1401,9 @@ void ScalePlane(const uint8* src, int src_stride,
CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
return;
}
if (dst_width == src_width) {
if (dst_width == src_width && filtering != kFilterBox) {
int dy = FixedDiv(src_height, dst_height);
// Arbitrary scale vertically, but unscaled vertically.
// Arbitrary scale vertically, but unscaled horizontally.
ScalePlaneVertical(src_height,
dst_width, dst_height,
src_stride, dst_stride, src, dst,
@ -1478,7 +1434,7 @@ void ScalePlane(const uint8* src, int src_stride,
return;
}
if (4 * dst_width == src_width && 4 * dst_height == src_height &&
filtering != kFilterBilinear) {
(filtering == kFilterBox || filtering == kFilterNone)) {
// optimized, 1/4
ScalePlaneDown4(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
@ -1512,8 +1468,7 @@ void ScalePlane_16(const uint16* src, int src_stride,
enum FilterMode filtering) {
// Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height,
dst_width, dst_height,
filtering);
dst_width, dst_height, filtering);
// Negative height means invert the image.
if (src_height < 0) {
@ -1606,6 +1561,7 @@ int I420Scale(const uint8* src_y, int src_stride_y,
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 ||
!dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
return -1;
}
@ -1637,6 +1593,7 @@ int I420Scale_16(const uint16* src_y, int src_stride_y,
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 ||
!dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
return -1;
}

View File

@ -53,18 +53,27 @@ static void ScaleARGBDown2(int src_width, int src_height,
}
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
ScaleARGBRowDown2Box_SSE2);
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_SSE2 :
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2 :
ScaleARGBRowDown2Box_Any_SSE2);
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
ScaleARGBRowDown2Box_SSE2);
}
}
#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON :
ScaleARGBRowDown2_NEON;
#endif
#if defined(HAS_SCALEARGBROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_NEON :
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON :
ScaleARGBRowDown2Box_Any_NEON);
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_NEON :
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON :
ScaleARGBRowDown2Box_NEON);
}
}
#endif
@ -88,7 +97,7 @@ static void ScaleARGBDown4Box(int src_width, int src_height,
int x, int dx, int y, int dy) {
int j;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 2 * 4 + 15) & ~15;
const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
int row_stride = src_stride * (dy >> 16);
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
@ -98,17 +107,22 @@ static void ScaleARGBDown4Box(int src_width, int src_height,
assert(dx == 65536 * 4); // Test scale factor of 4.
assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
}
#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
}
}
#endif
#if defined(HAS_SCALEARGBROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
}
}
#endif
for (j = 0; j < dst_height; ++j) {
ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
@ -139,16 +153,23 @@ static void ScaleARGBDownEven(int src_width, int src_height,
assert(IS_ALIGNED(src_height, 2));
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
ScaleARGBRowDownEven_SSE2;
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 :
ScaleARGBRowDownEven_Any_SSE2;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
ScaleARGBRowDownEven_SSE2;
}
}
#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) &&
IS_ALIGNED(src_argb, 4)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
ScaleARGBRowDownEven_NEON;
#endif
#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON :
ScaleARGBRowDownEven_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
ScaleARGBRowDownEven_NEON;
}
}
#endif
@ -189,30 +210,16 @@ static void ScaleARGBBilinearDown(int src_width, int src_height,
clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
src_argb += xl * 4;
x -= (int)(xl << 16);
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) {
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(clip_src_width, 32)) {
InterpolateRow = InterpolateRow_AVX2;
@ -220,15 +227,15 @@ static void ScaleARGBBilinearDown(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) {
if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 &&
#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
if (IS_ALIGNED(clip_src_width, 4)) {
@ -240,6 +247,14 @@ static void ScaleARGBBilinearDown(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
}
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
}
}
#endif
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
// Allocate a row of ARGB.
@ -285,30 +300,16 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
const int max_y = (src_height - 1) << 16;
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_AVX2;
@ -316,15 +317,15 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2;
}
@ -338,17 +339,31 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
}
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
if (filtering && TestCpuFlag(kCpuHasNEON)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_SSE2)
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
}
#endif
#if defined(HAS_SCALEARGBCOLS_NEON)
if (!filtering && TestCpuFlag(kCpuHasNEON)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBCols_NEON;
}
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
}
#endif
@ -363,7 +378,7 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
const uint8* src = src_argb + yi * src_stride;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 15) & ~15;
const int kRowSize = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
uint8* rowptr = row;
@ -427,18 +442,15 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
uint8* rgb_buf,
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) {
if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(src_width, 8)) {
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) {
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(src_width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2;
@ -446,7 +458,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) {
if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(src_width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON;
@ -466,30 +478,16 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C;
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_AVX2;
@ -497,15 +495,15 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2;
}
@ -523,17 +521,31 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
}
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
if (filtering && TestCpuFlag(kCpuHasNEON)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_SSE2)
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
}
#endif
#if defined(HAS_SCALEARGBCOLS_NEON)
if (!filtering && TestCpuFlag(kCpuHasNEON)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBCols_NEON;
}
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
}
#endif
@ -551,7 +563,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
const uint8* src_row_v = src_v + uv_yi * src_stride_v;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 15) & ~15;
const int kRowSize = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
// Allocate 1 row of ARGB for source conversion.
@ -636,13 +648,19 @@ static void ScaleARGBSimple(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBCols = ScaleARGBCols_SSE2;
}
#endif
#if defined(HAS_SCALEARGBCOLS_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ScaleARGBCols = ScaleARGBCols_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBCols = ScaleARGBCols_NEON;
}
}
#endif
if (src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleARGBCols = ScaleARGBColsUp2_SSE2;
}
#endif
@ -776,6 +794,7 @@ int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
if (!src_argb || src_width == 0 || src_height == 0 ||
!dst_argb || dst_width <= 0 || dst_height <= 0 ||
clip_x < 0 || clip_y < 0 ||
clip_width > 32768 || clip_height > 32768 ||
(clip_x + clip_width) > dst_width ||
(clip_y + clip_height) > dst_height) {
return -1;
@ -794,6 +813,7 @@ int ARGBScale(const uint8* src_argb, int src_stride_argb,
int dst_width, int dst_height,
enum FilterMode filtering) {
if (!src_argb || src_width == 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 ||
!dst_argb || dst_width <= 0 || dst_height <= 0) {
return -1;
}
@ -803,6 +823,37 @@ int ARGBScale(const uint8* src_argb, int src_stride_argb,
return 0;
}
// Scale with YUV conversion to ARGB and clipping.
LIBYUV_API
int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint32 src_fourcc,
int src_width, int src_height,
uint8* dst_argb, int dst_stride_argb,
uint32 dst_fourcc,
int dst_width, int dst_height,
int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering) {
uint8* argb_buffer = (uint8*)malloc(src_width * src_height * 4);
int r;
I420ToARGB(src_y, src_stride_y,
src_u, src_stride_u,
src_v, src_stride_v,
argb_buffer, src_width * 4,
src_width, src_height);
r = ARGBScaleClip(argb_buffer, src_width * 4,
src_width, src_height,
dst_argb, dst_stride_argb,
dst_width, dst_height,
clip_x, clip_y, clip_width, clip_height,
filtering);
free(argb_buffer);
return r;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv

View File

@ -621,39 +621,31 @@ void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
}
}
void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height) {
void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
int x;
assert(src_width > 0);
assert(src_height > 0);
for (x = 0; x < src_width; ++x) {
const uint8* s = src_ptr + x;
unsigned int sum = 0u;
int y;
for (y = 0; y < src_height; ++y) {
sum += s[0];
s += src_stride;
}
// TODO(fbarchard): Consider limitting height to 256 to avoid overflow.
dst_ptr[x] = sum < 65535u ? sum : 65535u;
for (x = 0; x < src_width - 1; x += 2) {
dst_ptr[0] += src_ptr[0];
dst_ptr[1] += src_ptr[1];
src_ptr += 2;
dst_ptr += 2;
}
if (src_width & 1) {
dst_ptr[0] += src_ptr[0];
}
}
void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint32* dst_ptr, int src_width, int src_height) {
void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) {
int x;
assert(src_width > 0);
assert(src_height > 0);
for (x = 0; x < src_width; ++x) {
const uint16* s = src_ptr + x;
unsigned int sum = 0u;
int y;
for (y = 0; y < src_height; ++y) {
sum += s[0];
s += src_stride;
}
// No risk of overflow here now
dst_ptr[x] = sum;
for (x = 0; x < src_width - 1; x += 2) {
dst_ptr[0] += src_ptr[0];
dst_ptr[1] += src_ptr[1];
src_ptr += 2;
dst_ptr += 2;
}
if (src_width & 1) {
dst_ptr[0] += src_ptr[0];
}
}
@ -884,32 +876,16 @@ void ScalePlaneVertical(int src_height,
assert(dst_width > 0);
assert(dst_height > 0);
src_argb += (x >> 16) * bpp;
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) {
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width_bytes, 32)) {
InterpolateRow = InterpolateRow_AVX2;
@ -917,15 +893,15 @@ void ScalePlaneVertical(int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) {
if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 &&
#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
@ -967,31 +943,23 @@ void ScalePlaneVertical_16(int src_height,
assert(dst_height > 0);
src_argb += (x >> 16) * wpp;
#if defined(HAS_INTERPOLATEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
if (TestCpuFlag(kCpuHasSSE2)) {
InterpolateRow = InterpolateRow_Any_16_SSE2;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSE2;
}
InterpolateRow = InterpolateRow_16_SSE2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_16_SSSE3;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSSE3;
}
InterpolateRow = InterpolateRow_16_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) {
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_16_AVX2;
if (IS_ALIGNED(dst_width_bytes, 32)) {
InterpolateRow = InterpolateRow_16_AVX2;
@ -999,15 +967,15 @@ void ScalePlaneVertical_16(int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_16_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) {
if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_16_NEON;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_16_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROWS_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 &&
#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
@ -1046,10 +1014,6 @@ enum FilterMode ScaleFilterReduce(int src_width, int src_height,
if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
filtering = kFilterBilinear;
}
// If scaling to larger, switch from Box to Bilinear.
if (dst_width >= src_width || dst_height >= src_height) {
filtering = kFilterBilinear;
}
}
if (filtering == kFilterBilinear) {
if (src_height == 1) {

View File

@ -31,7 +31,6 @@ void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
"beqz $t9, 2f \n"
" nop \n"
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
@ -90,7 +89,6 @@ void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
"bltz $t9, 2f \n"
" nop \n"
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
@ -188,7 +186,6 @@ void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
"beqz $t9, 2f \n"
" nop \n"
".p2align 2 \n"
"1: \n"
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
@ -248,7 +245,6 @@ void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
"srl $t9, %[dst_width], 1 \n"
"andi $t8, %[dst_width], 1 \n"
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 0(%[s1]) \n" // |7|6|5|4|
@ -319,7 +315,6 @@ void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
".p2align 2 \n"
"1: \n"
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
@ -368,7 +363,6 @@ void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
".set noreorder \n"
"repl.ph $t3, 3 \n" // 0x00030003
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
@ -425,7 +419,6 @@ void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
".set noreorder \n"
"repl.ph $t2, 3 \n" // 0x00030003
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
@ -477,7 +470,6 @@ void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
".set push \n"
".set noreorder \n"
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
@ -528,7 +520,6 @@ void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
".set push \n"
".set noreorder \n"
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
@ -586,7 +577,6 @@ void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
".set push \n"
".set noreorder \n"
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|

View File

@ -16,7 +16,8 @@ extern "C" {
#endif
// This module is for GCC Neon.
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
!defined(__aarch64__)
// NEON downscalers with interpolation.
// Provided by Fritz Koenig
@ -25,7 +26,6 @@ extern "C" {
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
".p2align 2 \n"
"1: \n"
// load even pixels into q0, odd into q1
MEMACCESS(0)
@ -42,13 +42,35 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
);
}
// Read 32x1 average down and write 16x1.
void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
"1: \n"
MEMACCESS(0)
"vld1.8 {q0, q1}, [%0]! \n" // load pixels and post inc
"subs %2, %2, #16 \n" // 16 processed per loop
"vpaddl.u8 q0, q0 \n" // add adjacent
"vpaddl.u8 q1, q1 \n"
"vrshrn.u16 d0, q0, #1 \n" // downshift, round and pack
"vrshrn.u16 d1, q1, #1 \n"
MEMACCESS(1)
"vst1.8 {q0}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst), // %1
"+r"(dst_width) // %2
:
: "q0", "q1" // Clobber List
);
}
// Read 32x2 average down and write 16x1.
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
// change the stride to row 2 pointer
"add %1, %0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0, q1}, [%0]! \n" // load row 1 and post inc
@ -76,7 +98,6 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
asm volatile (
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
@ -98,7 +119,6 @@ void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
const uint8* src_ptr2 = src_ptr + src_stride * 2;
const uint8* src_ptr3 = src_ptr + src_stride * 3;
asm volatile (
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n" // load up 16x4
@ -137,7 +157,6 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
asm volatile (
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
@ -160,7 +179,6 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
asm volatile (
"vmov.u8 d24, #3 \n"
"add %3, %0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
@ -220,7 +238,6 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
asm volatile (
"vmov.u8 d24, #3 \n"
"add %3, %0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
@ -275,7 +292,6 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
asm volatile (
MEMACCESS(3)
"vld1.8 {q3}, [%3] \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {d0, d1, d2, d3}, [%0]! \n"
@ -309,7 +325,6 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
MEMACCESS(7)
"vld1.8 {q15}, [%7] \n"
"add %3, %0 \n"
".p2align 2 \n"
"1: \n"
// d0 = 00 40 01 41 02 42 03 43
@ -425,7 +440,6 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
MEMACCESS(5)
"vld1.8 {q14}, [%5] \n"
"add %3, %0 \n"
".p2align 2 \n"
"1: \n"
// d0 = 00 40 01 41 02 42 03 43
@ -516,6 +530,110 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
);
}
void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height) {
const uint8* src_tmp = NULL;
asm volatile (
"1: \n"
"mov %0, %1 \n"
"mov r12, %5 \n"
"veor q2, q2, q2 \n"
"veor q3, q3, q3 \n"
"2: \n"
// load 16 pixels into q0
MEMACCESS(0)
"vld1.8 {q0}, [%0], %3 \n"
"vaddw.u8 q3, q3, d1 \n"
"vaddw.u8 q2, q2, d0 \n"
"subs r12, r12, #1 \n"
"bgt 2b \n"
MEMACCESS(2)
"vst1.16 {q2, q3}, [%2]! \n" // store pixels
"add %1, %1, #16 \n"
"subs %4, %4, #16 \n" // 16 processed per loop
"bgt 1b \n"
: "+r"(src_tmp), // %0
"+r"(src_ptr), // %1
"+r"(dst_ptr), // %2
"+r"(src_stride), // %3
"+r"(src_width), // %4
"+r"(src_height) // %5
:
: "memory", "cc", "r12", "q0", "q1", "q2", "q3" // Clobber List
);
}
// TODO(Yang Zhang): Investigate less load instructions for
// the x/dx stepping
#define LOAD2_DATA8_LANE(n) \
"lsr %5, %3, #16 \n" \
"add %6, %1, %5 \n" \
"add %3, %3, %4 \n" \
MEMACCESS(6) \
"vld2.8 {d6["#n"], d7["#n"]}, [%6] \n"
void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) {
int dx_offset[4] = {0, 1, 2, 3};
int* tmp = dx_offset;
const uint8* src_tmp = src_ptr;
asm volatile (
"vdup.32 q0, %3 \n" // x
"vdup.32 q1, %4 \n" // dx
"vld1.32 {q2}, [%5] \n" // 0 1 2 3
"vshl.i32 q3, q1, #2 \n" // 4 * dx
"vmul.s32 q1, q1, q2 \n"
// x , x + 1 * dx, x + 2 * dx, x + 3 * dx
"vadd.s32 q1, q1, q0 \n"
// x + 4 * dx, x + 5 * dx, x + 6 * dx, x + 7 * dx
"vadd.s32 q2, q1, q3 \n"
"vshl.i32 q0, q3, #1 \n" // 8 * dx
"1: \n"
LOAD2_DATA8_LANE(0)
LOAD2_DATA8_LANE(1)
LOAD2_DATA8_LANE(2)
LOAD2_DATA8_LANE(3)
LOAD2_DATA8_LANE(4)
LOAD2_DATA8_LANE(5)
LOAD2_DATA8_LANE(6)
LOAD2_DATA8_LANE(7)
"vmov q10, q1 \n"
"vmov q11, q2 \n"
"vuzp.16 q10, q11 \n"
"vmovl.u8 q8, d6 \n"
"vmovl.u8 q9, d7 \n"
"vsubl.s16 q11, d18, d16 \n"
"vsubl.s16 q12, d19, d17 \n"
"vmovl.u16 q13, d20 \n"
"vmovl.u16 q10, d21 \n"
"vmul.s32 q11, q11, q13 \n"
"vmul.s32 q12, q12, q10 \n"
"vshrn.s32 d18, q11, #16 \n"
"vshrn.s32 d19, q12, #16 \n"
"vadd.s16 q8, q8, q9 \n"
"vmovn.s16 d6, q8 \n"
MEMACCESS(0)
"vst1.8 {d6}, [%0]! \n" // store pixels
"vadd.s32 q1, q1, q0 \n"
"vadd.s32 q2, q2, q0 \n"
"subs %2, %2, #8 \n" // 8 processed per loop
"bgt 1b \n"
: "+r"(dst_ptr), // %0
"+r"(src_ptr), // %1
"+r"(dst_width), // %2
"+r"(x), // %3
"+r"(dx), // %4
"+r"(tmp), // %5
"+r"(src_tmp) // %6
:
: "memory", "cc", "q0", "q1", "q2", "q3",
"q8", "q9", "q10", "q11", "q12", "q13"
);
}
#undef LOAD2_DATA8_LANE
// 16x2 -> 16x1
void ScaleFilterRows_NEON(uint8* dst_ptr,
const uint8* src_ptr, ptrdiff_t src_stride,
@ -618,7 +736,6 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
".p2align 2 \n"
"1: \n"
// load even pixels into q0, odd into q1
MEMACCESS(0)
@ -639,12 +756,39 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
);
}
void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) {
asm volatile (
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
MEMACCESS(0)
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
"subs %2, %2, #8 \n" // 8 processed per loop
"vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
"vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
"vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts.
"vrshrn.u16 d0, q0, #1 \n" // downshift, round and pack
"vrshrn.u16 d1, q1, #1 \n"
"vrshrn.u16 d2, q2, #1 \n"
"vrshrn.u16 d3, q3, #1 \n"
MEMACCESS(1)
"vst4.8 {d0, d1, d2, d3}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(dst_width) // %2
:
: "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
);
}
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
// change the stride to row 2 pointer
"add %1, %1, %0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
@ -685,7 +829,6 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx, uint8* dst_argb, int dst_width) {
asm volatile (
"mov r12, %3, lsl #2 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.32 {d0[0]}, [%0], r12 \n"
@ -715,7 +858,6 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
asm volatile (
"mov r12, %4, lsl #2 \n"
"add %1, %1, %0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {d0}, [%0], r12 \n" // Read 4 2x2 blocks -> 2x1
@ -756,7 +898,118 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
);
}
#endif // __ARM_NEON__
// TODO(Yang Zhang): Investigate less load instructions for
// the x/dx stepping
#define LOAD1_DATA32_LANE(dn, n) \
"lsr %5, %3, #16 \n" \
"add %6, %1, %5, lsl #2 \n" \
"add %3, %3, %4 \n" \
MEMACCESS(6) \
"vld1.32 {"#dn"["#n"]}, [%6] \n"
void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
int tmp = 0;
const uint8* src_tmp = src_argb;
asm volatile (
"1: \n"
LOAD1_DATA32_LANE(d0, 0)
LOAD1_DATA32_LANE(d0, 1)
LOAD1_DATA32_LANE(d1, 0)
LOAD1_DATA32_LANE(d1, 1)
LOAD1_DATA32_LANE(d2, 0)
LOAD1_DATA32_LANE(d2, 1)
LOAD1_DATA32_LANE(d3, 0)
LOAD1_DATA32_LANE(d3, 1)
MEMACCESS(0)
"vst1.32 {q0, q1}, [%0]! \n" // store pixels
"subs %2, %2, #8 \n" // 8 processed per loop
"bgt 1b \n"
: "+r"(dst_argb), // %0
"+r"(src_argb), // %1
"+r"(dst_width), // %2
"+r"(x), // %3
"+r"(dx), // %4
"+r"(tmp), // %5
"+r"(src_tmp) // %6
:
: "memory", "cc", "q0", "q1"
);
}
#undef LOAD1_DATA32_LANE
// TODO(Yang Zhang): Investigate less load instructions for
// the x/dx stepping
#define LOAD2_DATA32_LANE(dn1, dn2, n) \
"lsr %5, %3, #16 \n" \
"add %6, %1, %5, lsl #2 \n" \
"add %3, %3, %4 \n" \
MEMACCESS(6) \
"vld2.32 {"#dn1"["#n"], "#dn2"["#n"]}, [%6] \n"
void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
int dx_offset[4] = {0, 1, 2, 3};
int* tmp = dx_offset;
const uint8* src_tmp = src_argb;
asm volatile (
"vdup.32 q0, %3 \n" // x
"vdup.32 q1, %4 \n" // dx
"vld1.32 {q2}, [%5] \n" // 0 1 2 3
"vshl.i32 q9, q1, #2 \n" // 4 * dx
"vmul.s32 q1, q1, q2 \n"
"vmov.i8 q3, #0x7f \n" // 0x7F
"vmov.i16 q15, #0x7f \n" // 0x7F
// x , x + 1 * dx, x + 2 * dx, x + 3 * dx
"vadd.s32 q8, q1, q0 \n"
"1: \n"
// d0, d1: a
// d2, d3: b
LOAD2_DATA32_LANE(d0, d2, 0)
LOAD2_DATA32_LANE(d0, d2, 1)
LOAD2_DATA32_LANE(d1, d3, 0)
LOAD2_DATA32_LANE(d1, d3, 1)
"vshrn.i32 d22, q8, #9 \n"
"vand.16 d22, d22, d30 \n"
"vdup.8 d24, d22[0] \n"
"vdup.8 d25, d22[2] \n"
"vdup.8 d26, d22[4] \n"
"vdup.8 d27, d22[6] \n"
"vext.8 d4, d24, d25, #4 \n"
"vext.8 d5, d26, d27, #4 \n" // f
"veor.8 q10, q2, q3 \n" // 0x7f ^ f
"vmull.u8 q11, d0, d20 \n"
"vmull.u8 q12, d1, d21 \n"
"vmull.u8 q13, d2, d4 \n"
"vmull.u8 q14, d3, d5 \n"
"vadd.i16 q11, q11, q13 \n"
"vadd.i16 q12, q12, q14 \n"
"vshrn.i16 d0, q11, #7 \n"
"vshrn.i16 d1, q12, #7 \n"
MEMACCESS(0)
"vst1.32 {d0, d1}, [%0]! \n" // store pixels
"vadd.s32 q8, q8, q9 \n"
"subs %2, %2, #4 \n" // 4 processed per loop
"bgt 1b \n"
: "+r"(dst_argb), // %0
"+r"(src_argb), // %1
"+r"(dst_width), // %2
"+r"(x), // %3
"+r"(dx), // %4
"+r"(tmp), // %5
"+r"(src_tmp) // %6
:
: "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9",
"q10", "q11", "q12", "q13", "q14", "q15"
);
}
#undef LOAD2_DATA32_LANE
#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
#ifdef __cplusplus
} // extern "C"

View File

@ -8,16 +8,18 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/scale.h"
#include "libyuv/row.h"
#include "libyuv/scale_row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// This module is for GCC Neon.
// This module is for GCC Neon armv8 64 bit.
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef HAS_SCALEROWDOWN2_NEON
// Read 32x1 throw away even pixels, and write 16x1.
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
@ -25,11 +27,11 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"1: \n"
// load even pixels into v0, odd into v1
MEMACCESS(0)
"ld2 {v0.16b, v1.16b}, [%0], #32 \n"
"subs %2, %2, #16 \n" // 16 processed per loop
"ld2 {v0.16b,v1.16b}, [%0], #32 \n"
"subs %w2, %w2, #16 \n" // 16 processed per loop
MEMACCESS(1)
"st1 {v1.16b}, [%1], #16 \n" // store odd pixels
"bgt 1b \n"
"b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst), // %1
"+r"(dst_width) // %2
@ -37,9 +39,30 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "v0", "v1" // Clobber List
);
}
#endif //HAS_SCALEROWDOWN2_NEON
#ifdef HAS_SCALEROWDOWN2_NEON
// Read 32x1 average down and write 16x1.
void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b,v1.16b}, [%0], #32 \n" // load pixels and post inc
"subs %w2, %w2, #16 \n" // 16 processed per loop
"uaddlp v0.8h, v0.16b \n" // add adjacent
"uaddlp v1.8h, v1.16b \n"
"rshrn v0.8b, v0.8h, #1 \n" // downshift, round and pack
"rshrn2 v0.16b, v1.8h, #1 \n"
MEMACCESS(1)
"st1 {v0.16b}, [%1], #16 \n"
"b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst), // %1
"+r"(dst_width) // %2
:
: "v0", "v1" // Clobber List
);
}
// Read 32x2 average down and write 16x1.
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
@ -48,10 +71,10 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"add %1, %1, %0 \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b, v1.16b}, [%0], #32 \n" // load row 1 and post inc
"ld1 {v0.16b,v1.16b}, [%0], #32 \n" // load row 1 and post inc
MEMACCESS(1)
"ld1 {v2.16b, v3.16b}, [%1], #32 \n" // load row 2 and post inc
"subs %3, %3, #16 \n" // 16 processed per loop
"subs %w3, %w3, #16 \n" // 16 processed per loop
"uaddlp v0.8h, v0.16b \n" // row 1 add adjacent
"uaddlp v1.8h, v1.16b \n"
"uadalp v0.8h, v2.16b \n" // row 2 add adjacent + row1
@ -60,7 +83,7 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"rshrn2 v0.16b, v1.8h, #2 \n"
MEMACCESS(2)
"st1 {v0.16b}, [%2], #16 \n"
"bgt 1b \n"
"b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(src_stride), // %1
"+r"(dst), // %2
@ -69,19 +92,17 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "v0", "v1", "v2", "v3" // Clobber List
);
}
#endif //HAS_SCALEROWDOWN2_NEON
#ifdef HAS_SCALEROWDOWN4_NEON
void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
asm volatile (
"1: \n"
MEMACCESS(0)
"ld4 {v0.8b-3.8b}, [%0], #32 \n" // src line 0
"subs %2, %2, #8 \n" // 8 processed per loop
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
"subs %w2, %w2, #8 \n" // 8 processed per loop
MEMACCESS(1)
"st1 {v2.8b}, [%1], #8 \n"
"bgt 1b \n"
"b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width) // %2
@ -89,9 +110,7 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "v0", "v1", "v2", "v3", "memory", "cc"
);
}
#endif //HAS_SCALEROWDOWN4_NEON
#ifdef HAS_SCALEROWDOWN4_NEON
void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
const uint8* src_ptr1 = src_ptr + src_stride;
@ -102,12 +121,12 @@ asm volatile (
MEMACCESS(0)
"ld1 {v0.16b}, [%0], #16 \n" // load up 16x4
MEMACCESS(3)
"ld1 {v1.16b}, [%3], #16 \n"
"ld1 {v1.16b}, [%2], #16 \n"
MEMACCESS(4)
"ld1 {v2.16b}, [%4], #16 \n"
"ld1 {v2.16b}, [%3], #16 \n"
MEMACCESS(5)
"ld1 {v3.16b}, [%5], #16 \n"
"subs %2, %2, #4 \n"
"ld1 {v3.16b}, [%4], #16 \n"
"subs %w5, %w5, #4 \n"
"uaddlp v0.8h, v0.16b \n"
"uadalp v0.8h, v1.16b \n"
"uadalp v0.8h, v2.16b \n"
@ -116,20 +135,18 @@ asm volatile (
"rshrn v0.8b, v0.8h, #4 \n" // divide by 16 w/rounding
MEMACCESS(1)
"st1 {v0.s}[0], [%1], #4 \n"
"bgt 1b \n"
"b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width), // %2
"+r"(src_ptr1), // %3
"+r"(src_ptr2), // %4
"+r"(src_ptr3) // %5
"+r"(src_ptr1), // %2
"+r"(src_ptr2), // %3
"+r"(src_ptr3), // %4
"+r"(dst_width) // %5
:
: "v0", "v1", "v2", "v3", "memory", "cc"
);
}
#endif //HAS_SCALEROWDOWN4_NEON
#ifdef HAS_SCALEROWDOWN34_NEON
// Down scale from 4 to 3 pixels. Use the neon multilane read/write
// to load up the every 4th pixel into a 4 different registers.
// Point samples 32 pixels to 24 pixels.
@ -139,12 +156,12 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
asm volatile (
"1: \n"
MEMACCESS(0)
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // src line 0
"subs %2, %2, #24 \n"
"mov v2.8b, v3.8b \n" // order v0, v1, v2
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
"subs %w2, %w2, #24 \n"
"orr v2.16b, v3.16b, v3.16b \n" // order v0, v1, v2
MEMACCESS(1)
"st3 {v0.8b-v2.8b}, [%1], #24 \n"
"bgt 1b \n"
"st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n"
"b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width) // %2
@ -152,9 +169,7 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
: "v0", "v1", "v2", "v3", "memory", "cc"
);
}
#endif //HAS_SCALEROWDOWN34_NEON
#ifdef HAS_SCALEROWDOWN34_NEON
void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
@ -163,10 +178,10 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
"add %3, %3, %0 \n"
"1: \n"
MEMACCESS(0)
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // src line 0
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
MEMACCESS(3)
"ld4 {v4.8b-v7.8b}, [%3], #32 \n" // src line 1
"subs %2, %2, #24 \n"
"ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" // src line 1
"subs %w2, %w2, #24 \n"
// filter src line 0 with src line 1
// expand chars to shorts to allow for room
@ -202,9 +217,9 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
"uqrshrn v2.8b, v16.8h, #2 \n"
MEMACCESS(1)
"st3 {v0.8b-v2.8b}, [%1], #24 \n"
"st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n"
"bgt 1b \n"
"b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width), // %2
@ -214,9 +229,7 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
"v20", "memory", "cc"
);
}
#endif //ScaleRowDown34_0_Box_NEON
#ifdef HAS_SCALEROWDOWN34_NEON
void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
@ -225,10 +238,10 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
"add %3, %3, %0 \n"
"1: \n"
MEMACCESS(0)
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // src line 0
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
MEMACCESS(3)
"ld4 {v4.8b-v7.8b}, [%3], #32 \n" // src line 1
"subs %2, %2, #24 \n"
"ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" // src line 1
"subs %w2, %w2, #24 \n"
// average src line 0 with src line 1
"urhadd v0.8b, v0.8b, v4.8b \n"
"urhadd v1.8b, v1.8b, v5.8b \n"
@ -249,8 +262,8 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
"uqrshrn v2.8b, v4.8h, #2 \n"
MEMACCESS(1)
"st3 {v0.8b-v2.8b}, [%1], #24 \n"
"bgt 1b \n"
"st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n"
"b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width), // %2
@ -259,9 +272,7 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "memory", "cc"
);
}
#endif //HAS_SCALEROWDOWN34_NEON
#ifdef HAS_SCALEROWDOWN38_NEON
static uvec8 kShuf38 =
{ 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
static uvec8 kShuf38_2 =
@ -282,14 +293,14 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
"ld1 {v3.16b}, [%3] \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b, v1.16b}, [%0], #32 \n"
"subs %2, %2, #12 \n"
"tbl v2.16b, {v0.16b, v1.16b}, v3.16b \n"
"ld1 {v0.16b,v1.16b}, [%0], #32 \n"
"subs %w2, %w2, #12 \n"
"tbl v2.16b, {v0.16b,v1.16b}, v3.16b \n"
MEMACCESS(1)
"st1 {v2.8b}, [%1], #8 \n"
MEMACCESS(1)
"st1 {v2.s}[2], [%1], #4 \n"
"bgt 1b \n"
"b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width) // %2
@ -298,14 +309,12 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
);
}
#endif //HAS_SCALEROWDOWN38_NEON
#ifdef HAS_SCALEROWDOWN38_NEON
// 32x3 -> 12x1
void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
const uint8* src_ptr1 = src_ptr + src_stride * 2;
ptrdiff_t tmp_src_stride = src_stride;
asm volatile (
MEMACCESS(5)
@ -314,7 +323,7 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
"ld1 {v30.16b}, [%6] \n"
MEMACCESS(7)
"ld1 {v31.8h}, [%7] \n"
"add %3, %3, %0 \n"
"add %2, %2, %0 \n"
"1: \n"
// 00 40 01 41 02 42 03 43
@ -322,12 +331,12 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
// 20 60 21 61 22 62 23 63
// 30 70 31 71 32 72 33 73
MEMACCESS(0)
"ld4 {v0.8b-v3.8b}, [%0], #32 \n"
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n"
MEMACCESS(3)
"ld4 {v4.8b-v7.8b}, [%3], #32 \n"
"ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n"
MEMACCESS(4)
"ld4 {v16.8b-v19.8b}, [%4], #32 \n"
"subs %2, %2, #12 \n"
"ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%3], #32 \n"
"subs %w4, %w4, #12 \n"
// Shuffle the input data around to get align the data
// so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
@ -414,12 +423,12 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
"st1 {v3.8b}, [%1], #8 \n"
MEMACCESS(1)
"st1 {v3.s}[2], [%1], #4 \n"
"bgt 1b \n"
"b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width), // %2
"+r"(src_stride), // %3
"+r"(src_ptr1) // %4
"+r"(tmp_src_stride), // %2
"+r"(src_ptr1), // %3
"+r"(dst_width) // %4
: "r"(&kMult38_Div6), // %5
"r"(&kShuf38_2), // %6
"r"(&kMult38_Div9) // %7
@ -428,19 +437,19 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
"v30", "v31", "memory", "cc"
);
}
#endif //HAS_SCALEROWDOWN38_NEON
#ifdef HAS_SCALEROWDOWN38_NEON
// 32x2 -> 12x1
void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
// TODO(fbarchard): use src_stride directly for clang 3.5+.
ptrdiff_t tmp_src_stride = src_stride;
asm volatile (
MEMACCESS(4)
"ld1 {v30.8h}, [%4] \n"
MEMACCESS(5)
"ld1 {v31.16b}, [%5] \n"
"add %3, %3, %0 \n"
"add %2, %2, %0 \n"
"1: \n"
// 00 40 01 41 02 42 03 43
@ -448,10 +457,10 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
// 20 60 21 61 22 62 23 63
// 30 70 31 71 32 72 33 73
MEMACCESS(0)
"ld4 {v0.8b-v3.8b}, [%0], #32 \n"
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n"
MEMACCESS(3)
"ld4 {v4.8b-v7.8b}, [%3], #32 \n"
"subs %2, %2, #12 \n"
"ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n"
"subs %w3, %w3, #12 \n"
// Shuffle the input data around to get align the data
// so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
@ -524,18 +533,124 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
"st1 {v3.8b}, [%1], #8 \n"
MEMACCESS(1)
"st1 {v3.s}[2], [%1], #4 \n"
"bgt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width), // %2
"+r"(src_stride) // %3
: "r"(&kMult38_Div6), // %4
"r"(&kShuf38_2) // %5
"b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(tmp_src_stride), // %2
"+r"(dst_width) // %3
: "r"(&kMult38_Div6), // %4
"r"(&kShuf38_2) // %5
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
"v18", "v19", "v30", "v31", "memory", "cc"
);
}
#endif //HAS_SCALEROWDOWN38_NEON
void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height) {
const uint8* src_tmp = NULL;
asm volatile (
"1: \n"
"mov %0, %1 \n"
"mov w12, %w5 \n"
"eor v2.16b, v2.16b, v2.16b \n"
"eor v3.16b, v3.16b, v3.16b \n"
"2: \n"
// load 16 pixels into q0
MEMACCESS(0)
"ld1 {v0.16b}, [%0], %3 \n"
"uaddw2 v3.8h, v3.8h, v0.16b \n"
"uaddw v2.8h, v2.8h, v0.8b \n"
"subs w12, w12, #1 \n"
"b.gt 2b \n"
MEMACCESS(2)
"st1 {v2.8h, v3.8h}, [%2], #32 \n" // store pixels
"add %1, %1, #16 \n"
"subs %w4, %w4, #16 \n" // 16 processed per loop
"b.gt 1b \n"
: "+r"(src_tmp), // %0
"+r"(src_ptr), // %1
"+r"(dst_ptr), // %2
"+r"(src_stride), // %3
"+r"(src_width), // %4
"+r"(src_height) // %5
:
: "memory", "cc", "w12", "v0", "v1", "v2", "v3" // Clobber List
);
}
// TODO(Yang Zhang): Investigate less load instructions for
// the x/dx stepping
#define LOAD2_DATA8_LANE(n) \
"lsr %5, %3, #16 \n" \
"add %6, %1, %5 \n" \
"add %3, %3, %4 \n" \
MEMACCESS(6) \
"ld2 {v4.b, v5.b}["#n"], [%6] \n"
void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) {
int dx_offset[4] = {0, 1, 2, 3};
int* tmp = dx_offset;
const uint8* src_tmp = src_ptr;
int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning.
int64 x64 = (int64) x;
int64 dx64 = (int64) dx;
asm volatile (
"dup v0.4s, %w3 \n" // x
"dup v1.4s, %w4 \n" // dx
"ld1 {v2.4s}, [%5] \n" // 0 1 2 3
"shl v3.4s, v1.4s, #2 \n" // 4 * dx
"mul v1.4s, v1.4s, v2.4s \n"
// x , x + 1 * dx, x + 2 * dx, x + 3 * dx
"add v1.4s, v1.4s, v0.4s \n"
// x + 4 * dx, x + 5 * dx, x + 6 * dx, x + 7 * dx
"add v2.4s, v1.4s, v3.4s \n"
"shl v0.4s, v3.4s, #1 \n" // 8 * dx
"1: \n"
LOAD2_DATA8_LANE(0)
LOAD2_DATA8_LANE(1)
LOAD2_DATA8_LANE(2)
LOAD2_DATA8_LANE(3)
LOAD2_DATA8_LANE(4)
LOAD2_DATA8_LANE(5)
LOAD2_DATA8_LANE(6)
LOAD2_DATA8_LANE(7)
"mov v6.16b, v1.16b \n"
"mov v7.16b, v2.16b \n"
"uzp1 v6.8h, v6.8h, v7.8h \n"
"ushll v4.8h, v4.8b, #0 \n"
"ushll v5.8h, v5.8b, #0 \n"
"ssubl v16.4s, v5.4h, v4.4h \n"
"ssubl2 v17.4s, v5.8h, v4.8h \n"
"ushll v7.4s, v6.4h, #0 \n"
"ushll2 v6.4s, v6.8h, #0 \n"
"mul v16.4s, v16.4s, v7.4s \n"
"mul v17.4s, v17.4s, v6.4s \n"
"shrn v6.4h, v16.4s, #16 \n"
"shrn2 v6.8h, v17.4s, #16 \n"
"add v4.8h, v4.8h, v6.8h \n"
"xtn v4.8b, v4.8h \n"
MEMACCESS(0)
"st1 {v4.8b}, [%0], #8 \n" // store pixels
"add v1.4s, v1.4s, v0.4s \n"
"add v2.4s, v2.4s, v0.4s \n"
"subs %w2, %w2, #8 \n" // 8 processed per loop
"b.gt 1b \n"
: "+r"(dst_ptr), // %0
"+r"(src_ptr), // %1
"+r"(dst_width64), // %2
"+r"(x64), // %3
"+r"(dx64), // %4
"+r"(tmp), // %5
"+r"(src_tmp) // %6
:
: "memory", "cc", "v0", "v1", "v2", "v3",
"v4", "v5", "v6", "v7", "v16", "v17"
);
}
#undef LOAD2_DATA8_LANE
// 16x2 -> 16x1
void ScaleFilterRows_NEON(uint8* dst_ptr,
@ -543,15 +658,15 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
int dst_width, int source_y_fraction) {
int y_fraction = 256 - source_y_fraction;
asm volatile (
"cmp %4, #0 \n"
"beq 100f \n"
"cmp %w4, #0 \n"
"b.eq 100f \n"
"add %2, %2, %1 \n"
"cmp %4, #64 \n"
"beq 75f \n"
"cmp %4, #128 \n"
"beq 50f \n"
"cmp %4, #192 \n"
"beq 25f \n"
"cmp %w4, #64 \n"
"b.eq 75f \n"
"cmp %w4, #128 \n"
"b.eq 50f \n"
"cmp %w4, #192 \n"
"b.eq 25f \n"
"dup v5.8b, %w4 \n"
"dup v4.8b, %w5 \n"
@ -561,7 +676,7 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
"ld1 {v0.16b}, [%1], #16 \n"
MEMACCESS(2)
"ld1 {v1.16b}, [%2], #16 \n"
"subs %3, %3, #16 \n"
"subs %w3, %w3, #16 \n"
"umull v6.8h, v0.8b, v4.8b \n"
"umull2 v7.8h, v0.16b, v4.16b \n"
"umlal v6.8h, v1.8b, v5.8b \n"
@ -570,7 +685,7 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
"rshrn2 v0.16b, v7.8h, #8 \n"
MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n"
"bgt 1b \n"
"b.gt 1b \n"
"b 99f \n"
// Blend 25 / 75.
@ -579,12 +694,12 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
"ld1 {v0.16b}, [%1], #16 \n"
MEMACCESS(2)
"ld1 {v1.16b}, [%2], #16 \n"
"subs %3, %3, #16 \n"
"subs %w3, %w3, #16 \n"
"urhadd v0.16b, v0.16b, v1.16b \n"
"urhadd v0.16b, v0.16b, v1.16b \n"
MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n"
"bgt 25b \n"
"b.gt 25b \n"
"b 99f \n"
// Blend 50 / 50.
@ -593,11 +708,11 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
"ld1 {v0.16b}, [%1], #16 \n"
MEMACCESS(2)
"ld1 {v1.16b}, [%2], #16 \n"
"subs %3, %3, #16 \n"
"subs %w3, %w3, #16 \n"
"urhadd v0.16b, v0.16b, v1.16b \n"
MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n"
"bgt 50b \n"
"b.gt 50b \n"
"b 99f \n"
// Blend 75 / 25.
@ -606,22 +721,22 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
"ld1 {v1.16b}, [%1], #16 \n"
MEMACCESS(2)
"ld1 {v0.16b}, [%2], #16 \n"
"subs %3, %3, #16 \n"
"subs %w3, %w3, #16 \n"
"urhadd v0.16b, v0.16b, v1.16b \n"
"urhadd v0.16b, v0.16b, v1.16b \n"
MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n"
"bgt 75b \n"
"b.gt 75b \n"
"b 99f \n"
// Blend 100 / 0 - Copy row unchanged.
"100: \n"
MEMACCESS(1)
"ld1 {v0.16b}, [%1], #16 \n"
"subs %3, %3, #16 \n"
"subs %w3, %w3, #16 \n"
MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n"
"bgt 100b \n"
"b.gt 100b \n"
"99: \n"
MEMACCESS(0)
@ -637,7 +752,6 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
);
}
#ifdef HAS_SCALEARGBROWDOWN2_NEON
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
@ -647,12 +761,12 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"ld2 {v0.4s, v1.4s}, [%0], #32 \n"
MEMACCESS (0)
"ld2 {v2.4s, v3.4s}, [%0], #32 \n"
"subs %2, %2, #8 \n" // 8 processed per loop
"subs %w2, %w2, #8 \n" // 8 processed per loop
MEMACCESS (1)
"st1 {v1.16b}, [%1], #16 \n" // store odd pixels
MEMACCESS (1)
"st1 {v3.16b}, [%1], #16 \n"
"bgt 1b \n"
"b.gt 1b \n"
: "+r" (src_ptr), // %0
"+r" (dst), // %1
"+r" (dst_width) // %2
@ -660,9 +774,34 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
);
}
#endif //HAS_SCALEARGBROWDOWN2_NEON
#ifdef HAS_SCALEARGBROWDOWN2_NEON
void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) {
asm volatile (
"1: \n"
MEMACCESS (0)
// load 8 ARGB pixels.
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n"
"subs %w2, %w2, #8 \n" // 8 processed per loop.
"uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
"uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
"uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
"uaddlp v3.8h, v3.16b \n" // A 16 bytes -> 8 shorts.
"rshrn v0.8b, v0.8h, #1 \n" // downshift, round and pack
"rshrn v1.8b, v1.8h, #1 \n"
"rshrn v2.8b, v2.8h, #1 \n"
"rshrn v3.8b, v3.8h, #1 \n"
MEMACCESS (1)
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n"
"b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(dst_width) // %2
:
: "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
);
}
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
@ -670,14 +809,14 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"add %1, %1, %0 \n"
"1: \n"
MEMACCESS (0)
"ld4 {v0.16b - v3.16b}, [%0], #64 \n" // load 8 ARGB pixels.
"subs %3, %3, #8 \n" // 8 processed per loop.
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 8 ARGB pixels.
"subs %w3, %w3, #8 \n" // 8 processed per loop.
"uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
"uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
"uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
"uaddlp v3.8h, v3.16b \n" // A 16 bytes -> 8 shorts.
MEMACCESS (1)
"ld4 {v16.16b - v19.16b}, [%1], #64 \n" // load 8 more ARGB pixels.
"ld4 {v16.16b,v17.16b,v18.16b,v19.16b}, [%1], #64 \n" // load 8 more ARGB pixels.
"uadalp v0.8h, v16.16b \n" // B 16 bytes -> 8 shorts.
"uadalp v1.8h, v17.16b \n" // G 16 bytes -> 8 shorts.
"uadalp v2.8h, v18.16b \n" // R 16 bytes -> 8 shorts.
@ -687,8 +826,8 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"rshrn v2.8b, v2.8h, #2 \n"
"rshrn v3.8b, v3.8h, #2 \n"
MEMACCESS (2)
"st4 {v0.8b - v3.8b}, [%2], #32 \n"
"bgt 1b \n"
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n"
"b.gt 1b \n"
: "+r" (src_ptr), // %0
"+r" (src_stride), // %1
"+r" (dst), // %2
@ -697,9 +836,7 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "memory", "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"
);
}
#endif //HAS_SCALEARGBROWDOWN2_NEON
#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
// Reads 4 pixels at a time.
// Alignment requirement: src_argb 4 byte aligned.
void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
@ -714,23 +851,21 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
"ld1 {v0.s}[2], [%0], %3 \n"
MEMACCESS(0)
"ld1 {v0.s}[3], [%0], %3 \n"
"subs %2, %2, #4 \n" // 4 pixels per loop.
"subs %w2, %w2, #4 \n" // 4 pixels per loop.
MEMACCESS(1)
"st1 {v0.16b}, [%1], #16 \n"
"bgt 1b \n"
"b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(dst_width) // %2
: "r"(src_stepx * 4) // %3
: "r"((int64)(src_stepx * 4)) // %3
: "memory", "cc", "v0"
);
}
#endif //HAS_SCALEARGBROWDOWNEVEN_NEON
#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
// Reads 4 pixels at a time.
// Alignment requirement: src_argb 4 byte aligned.
// TODO, might be worth another optimization pass in future.
// TODO(Yang Zhang): Might be worth another optimization pass in future.
// It could be upgraded to 8 pixels at a time to start with.
void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
@ -739,49 +874,167 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
"add %1, %1, %0 \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.8b}, [%0], %4 \n" // Read 4 2x2 blocks -> 2x1
"ld1 {v0.8b}, [%0], %4 \n" // Read 4 2x2 blocks -> 2x1
MEMACCESS(1)
"ld1 {v1.8b}, [%1], %4 \n"
"ld1 {v1.8b}, [%1], %4 \n"
MEMACCESS(0)
"ld1 {v2.8b}, [%0], %4 \n"
"ld1 {v2.8b}, [%0], %4 \n"
MEMACCESS(1)
"ld1 {v3.8b}, [%1], %4 \n"
"ld1 {v3.8b}, [%1], %4 \n"
MEMACCESS(0)
"ld1 {v4.8b}, [%0], %4 \n"
"ld1 {v4.8b}, [%0], %4 \n"
MEMACCESS(1)
"ld1 {v5.8b}, [%1], %4 \n"
"ld1 {v5.8b}, [%1], %4 \n"
MEMACCESS(0)
"ld1 {v6.8b}, [%0], %4 \n"
"ld1 {v6.8b}, [%0], %4 \n"
MEMACCESS(1)
"ld1 {v7.8b}, [%1], %4 \n"
"uaddl v0.8h, v0.8b, v1.8b \n"
"uaddl v2.8h, v2.8b, v3.8b \n"
"uaddl v4.8h, v4.8b, v5.8b \n"
"uaddl v6.8h, v6.8b, v7.8b \n"
"mov v16.d[1], v0.d[1] \n" // ab_cd -> ac_bd
"mov v0.d[1], v2.d[0] \n"
"mov v2.d[0], v16.d[1] \n"
"mov v16.d[1], v4.d[1] \n" // ef_gh -> eg_fh
"mov v4.d[1], v6.d[0] \n"
"mov v6.d[0], v16.d[1] \n"
"add v0.8h, v0.8h, v2.8h \n" // (a+b)_(c+d)
"add v4.8h, v4.8h, v6.8h \n" // (e+f)_(g+h)
"rshrn v0.8b, v0.8h, #2 \n" // first 2 pixels.
"rshrn2 v0.16b, v4.8h, #2 \n" // next 2 pixels.
"subs %3, %3, #4 \n" // 4 pixels per loop.
"ld1 {v7.8b}, [%1], %4 \n"
"uaddl v0.8h, v0.8b, v1.8b \n"
"uaddl v2.8h, v2.8b, v3.8b \n"
"uaddl v4.8h, v4.8b, v5.8b \n"
"uaddl v6.8h, v6.8b, v7.8b \n"
"mov v16.d[1], v0.d[1] \n" // ab_cd -> ac_bd
"mov v0.d[1], v2.d[0] \n"
"mov v2.d[0], v16.d[1] \n"
"mov v16.d[1], v4.d[1] \n" // ef_gh -> eg_fh
"mov v4.d[1], v6.d[0] \n"
"mov v6.d[0], v16.d[1] \n"
"add v0.8h, v0.8h, v2.8h \n" // (a+b)_(c+d)
"add v4.8h, v4.8h, v6.8h \n" // (e+f)_(g+h)
"rshrn v0.8b, v0.8h, #2 \n" // first 2 pixels.
"rshrn2 v0.16b, v4.8h, #2 \n" // next 2 pixels.
"subs %w3, %w3, #4 \n" // 4 pixels per loop.
MEMACCESS(2)
"st1 {v0.16b}, [%2], #16 \n"
"bgt 1b \n"
"b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(src_stride), // %1
"+r"(dst_argb), // %2
"+r"(dst_width) // %3
: "r"(src_stepx * 4) // %4
: "r"((int64)(src_stepx * 4)) // %4
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
);
}
#endif // HAS_SCALEARGBROWDOWNEVEN_NEON
#endif // __aarch64__
// TODO(Yang Zhang): Investigate less load instructions for
// the x/dx stepping
#define LOAD1_DATA32_LANE(vn, n) \
"lsr %5, %3, #16 \n" \
"add %6, %1, %5, lsl #2 \n" \
"add %3, %3, %4 \n" \
MEMACCESS(6) \
"ld1 {"#vn".s}["#n"], [%6] \n"
void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
const uint8* src_tmp = src_argb;
int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning.
int64 x64 = (int64) x;
int64 dx64 = (int64) dx;
int64 tmp64 = 0;
asm volatile (
"1: \n"
LOAD1_DATA32_LANE(v0, 0)
LOAD1_DATA32_LANE(v0, 1)
LOAD1_DATA32_LANE(v0, 2)
LOAD1_DATA32_LANE(v0, 3)
LOAD1_DATA32_LANE(v1, 0)
LOAD1_DATA32_LANE(v1, 1)
LOAD1_DATA32_LANE(v1, 2)
LOAD1_DATA32_LANE(v1, 3)
MEMACCESS(0)
"st1 {v0.4s, v1.4s}, [%0], #32 \n" // store pixels
"subs %w2, %w2, #8 \n" // 8 processed per loop
"b.gt 1b \n"
: "+r"(dst_argb), // %0
"+r"(src_argb), // %1
"+r"(dst_width64), // %2
"+r"(x64), // %3
"+r"(dx64), // %4
"+r"(tmp64), // %5
"+r"(src_tmp) // %6
:
: "memory", "cc", "v0", "v1"
);
}
#undef LOAD1_DATA32_LANE
// TODO(Yang Zhang): Investigate less load instructions for
// the x/dx stepping
#define LOAD2_DATA32_LANE(vn1, vn2, n) \
"lsr %5, %3, #16 \n" \
"add %6, %1, %5, lsl #2 \n" \
"add %3, %3, %4 \n" \
MEMACCESS(6) \
"ld2 {"#vn1".s, "#vn2".s}["#n"], [%6] \n"
void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
int dx_offset[4] = {0, 1, 2, 3};
int* tmp = dx_offset;
const uint8* src_tmp = src_argb;
int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning.
int64 x64 = (int64) x;
int64 dx64 = (int64) dx;
asm volatile (
"dup v0.4s, %w3 \n" // x
"dup v1.4s, %w4 \n" // dx
"ld1 {v2.4s}, [%5] \n" // 0 1 2 3
"shl v6.4s, v1.4s, #2 \n" // 4 * dx
"mul v1.4s, v1.4s, v2.4s \n"
"movi v3.16b, #0x7f \n" // 0x7F
"movi v4.8h, #0x7f \n" // 0x7F
// x , x + 1 * dx, x + 2 * dx, x + 3 * dx
"add v5.4s, v1.4s, v0.4s \n"
"1: \n"
// d0, d1: a
// d2, d3: b
LOAD2_DATA32_LANE(v0, v1, 0)
LOAD2_DATA32_LANE(v0, v1, 1)
LOAD2_DATA32_LANE(v0, v1, 2)
LOAD2_DATA32_LANE(v0, v1, 3)
"shrn v2.4h, v5.4s, #9 \n"
"and v2.8b, v2.8b, v4.8b \n"
"dup v16.8b, v2.b[0] \n"
"dup v17.8b, v2.b[2] \n"
"dup v18.8b, v2.b[4] \n"
"dup v19.8b, v2.b[6] \n"
"ext v2.8b, v16.8b, v17.8b, #4 \n"
"ext v17.8b, v18.8b, v19.8b, #4 \n"
"ins v2.d[1], v17.d[0] \n" // f
"eor v7.16b, v2.16b, v3.16b \n" // 0x7f ^ f
"umull v16.8h, v0.8b, v7.8b \n"
"umull2 v17.8h, v0.16b, v7.16b \n"
"umull v18.8h, v1.8b, v2.8b \n"
"umull2 v19.8h, v1.16b, v2.16b \n"
"add v16.8h, v16.8h, v18.8h \n"
"add v17.8h, v17.8h, v19.8h \n"
"shrn v0.8b, v16.8h, #7 \n"
"shrn2 v0.16b, v17.8h, #7 \n"
MEMACCESS(0)
"st1 {v0.4s}, [%0], #16 \n" // store pixels
"add v5.4s, v5.4s, v6.4s \n"
"subs %w2, %w2, #4 \n" // 4 processed per loop
"b.gt 1b \n"
: "+r"(dst_argb), // %0
"+r"(src_argb), // %1
"+r"(dst_width64), // %2
"+r"(x64), // %3
"+r"(dx64), // %4
"+r"(tmp), // %5
"+r"(src_tmp) // %6
:
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5",
"v6", "v7", "v16", "v17", "v18", "v19"
);
}
#undef LOAD2_DATA32_LANE
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef __cplusplus
} // extern "C"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -33,7 +33,7 @@ static const struct FourCCAliasEntry kFourCCAliases[] = {
{FOURCC_2VUY, FOURCC_UYVY}, // kCMPixelFormat_422YpCbCr8
{FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not.
{FOURCC_DMB1, FOURCC_MJPG},
{FOURCC_BA81, FOURCC_BGGR},
{FOURCC_BA81, FOURCC_BGGR}, // deprecated.
{FOURCC_RGB3, FOURCC_RAW },
{FOURCC_BGR3, FOURCC_24BG},
{FOURCC_CM32, FOURCC_BGRA}, // kCMPixelFormat_32ARGB

File diff suppressed because it is too large Load Diff

View File

@ -17,6 +17,7 @@
** language. The code for the "sqlite3" command-line shell is also in a
** separate file. This file contains only code for the core SQLite library.
*/
#define SQLITE_THREADSAFE 0
#define SQLITE_CORE 1
#define SQLITE_AMALGAMATION 1
#ifndef SQLITE_PRIVATE

View File

@ -4,7 +4,7 @@
#include <android/log.h>
#include <jni.h>
#define LOG_TAG "tmessages"
#define LOG_TAG "tmessages_native"
#ifndef LOG_DISABLED
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__)

View File

@ -39,13 +39,14 @@
<uses-permission android:name="android.permission.USE_FINGERPRINT" />
<application
android:name=".ApplicationLoader"
android:name="org.telegram.messenger.ApplicationLoader"
android:allowBackup="false"
android:hardwareAccelerated="@bool/useHardwareAcceleration"
android:icon="@drawable/ic_launcher"
android:label="@string/ShortAppName"
android:largeHeap="true"
android:theme="@style/Theme.TMessages.Start">
android:theme="@style/Theme.TMessages.Start"
android:manageSpaceActivity="org.telegram.ui.ManageSpaceActivity">
<activity
android:name="org.telegram.ui.LaunchActivity"
@ -106,7 +107,14 @@
<category android:name="android.intent.category.DEFAULT" />
<data android:scheme="tg" />
</intent-filter>
<meta-data android:name="android.service.chooser.chooser_target_service" android:value=".TgChooserTargetService" />
<meta-data android:name="android.service.chooser.chooser_target_service" android:value="org.telegram.messenger.TgChooserTargetService" />
</activity>
<activity
android:name="org.telegram.ui.ManageSpaceActivity"
android:configChanges="keyboard|keyboardHidden|orientation|screenSize"
android:hardwareAccelerated="@bool/useHardwareAcceleration"
android:launchMode="singleTask"
android:windowSoftInputMode="adjustPan">
</activity>
<activity
android:name="org.telegram.ui.IntroActivity"
@ -122,25 +130,25 @@
android:windowSoftInputMode="adjustResize|stateHidden">
</activity>
<receiver android:name=".AutoMessageHeardReceiver">
<receiver android:name="org.telegram.messenger.AutoMessageHeardReceiver">
<intent-filter>
<action android:name="org.telegram.messenger.ACTION_MESSAGE_HEARD"/>
</intent-filter>
</receiver>
<receiver android:name=".AutoMessageReplyReceiver">
<receiver android:name="org.telegram.messenger.AutoMessageReplyReceiver">
<intent-filter>
<action android:name="org.telegram.messenger.ACTION_MESSAGE_REPLY"/>
</intent-filter>
</receiver>
<receiver android:name=".SmsListener">
<receiver android:name="org.telegram.messenger.SmsListener">
<intent-filter>
<action android:name="android.provider.Telephony.SMS_RECEIVED" />
</intent-filter>
</receiver>
<service android:name=".AuthenticatorService" android:exported="true">
<service android:name="org.telegram.messenger.AuthenticatorService" android:exported="true">
<intent-filter>
<action android:name="android.accounts.AccountAuthenticator"/>
</intent-filter>
@ -148,7 +156,7 @@
android:resource="@xml/auth"/>
</service>
<service android:name=".ContactsSyncAdapterService" android:exported="true">
<service android:name="org.telegram.messenger.ContactsSyncAdapterService" android:exported="true">
<intent-filter>
<action android:name="android.content.SyncAdapter" />
</intent-filter>
@ -168,11 +176,12 @@
</service>
<service android:name="org.telegram.messenger.NotificationsService" android:enabled="true"/>
<service android:name=".NotificationRepeat" android:exported="false"/>
<service android:name=".VideoEncodingService" android:enabled="true"/>
<service android:name=".MusicPlayerService" android:exported="true" android:enabled="true"/>
<service android:name="org.telegram.messenger.NotificationRepeat" android:exported="false"/>
<service android:name="org.telegram.messenger.ClearCacheService" android:exported="false"/>
<service android:name="org.telegram.messenger.VideoEncodingService" android:enabled="true"/>
<service android:name="org.telegram.messenger.MusicPlayerService" android:exported="true" android:enabled="true"/>
<receiver android:name=".MusicPlayerReceiver" >
<receiver android:name="org.telegram.messenger.MusicPlayerReceiver" >
<intent-filter>
<action android:name="org.telegram.android.musicplayer.close" />
<action android:name="org.telegram.android.musicplayer.pause" />
@ -191,7 +200,7 @@
</intent-filter>
</receiver>
<receiver android:name=".WearReplyReceiver" android:enabled="true"/>
<receiver android:name="org.telegram.messenger.WearReplyReceiver" android:enabled="true"/>
<uses-library android:name="com.sec.android.app.multiwindow" android:required="false" />
<meta-data android:name="com.sec.android.support.multiwindow" android:value="true" />
@ -199,8 +208,8 @@
<meta-data android:name="com.sec.android.multiwindow.DEFAULT_SIZE_H" android:value="598dp" />
<meta-data android:name="com.sec.android.multiwindow.MINIMUM_SIZE_W" android:value="632dp" />
<meta-data android:name="com.sec.android.multiwindow.MINIMUM_SIZE_H" android:value="598dp" />
<meta-data android:name="com.google.android.gms.car.application" android:resource="@xml/automotive_app_desc" />
<!--
<meta-data android:name="com.google.android.gms.car.application" android:resource="@xml/automotive_app_desc" />-->
</application>

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.SQLite;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.SQLite;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.SQLite;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.SQLite;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.SQLite;
@ -135,6 +135,10 @@ public class SQLitePreparedStatement {
bindLong(sqliteStatementHandle, index, value);
}
public void bindNull(int index) throws SQLiteException {
bindNull(sqliteStatementHandle, index);
}
native void bindByteBuffer(int statementHandle, int index, ByteBuffer value, int length) throws SQLiteException;
native void bindString(int statementHandle, int index, String value) throws SQLiteException;
native void bindInt(int statementHandle, int index, int value) throws SQLiteException;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@ -22,7 +22,6 @@ import android.content.pm.PackageInfo;
import android.content.res.Configuration;
import android.graphics.drawable.ColorDrawable;
import android.graphics.drawable.Drawable;
import android.os.AsyncTask;
import android.os.Build;
import android.os.Handler;
import android.os.PowerManager;
@ -30,8 +29,8 @@ import android.util.Base64;
import com.google.android.gms.common.ConnectionResult;
import com.google.android.gms.common.GooglePlayServicesUtil;
import com.google.android.gms.gcm.GoogleCloudMessaging;
import org.telegram.SQLite.DatabaseHandler;
import org.telegram.tgnet.ConnectionsManager;
import org.telegram.tgnet.SerializedData;
import org.telegram.tgnet.TLRPC;
@ -40,17 +39,11 @@ import org.telegram.ui.Components.ForegroundDetector;
import java.io.File;
import java.io.RandomAccessFile;
import java.util.Calendar;
import java.util.concurrent.atomic.AtomicInteger;
public class ApplicationLoader extends Application {
private static NetworkAlarm networkAlarm = null;
private static PendingIntent pendingIntent;
private GoogleCloudMessaging gcm;
private AtomicInteger msgId = new AtomicInteger();
private String regid;
public static final String EXTRA_MESSAGE = "message";
public static final String PROPERTY_REG_ID = "registration_id";
private static final String PROPERTY_APP_VERSION = "appVersion";
private static final int PLAY_SERVICES_RESOLUTION_REQUEST = 9000;
private static Drawable cachedWallpaper;
private static int selectedColor;
private static boolean isCustomTheme;
@ -63,6 +56,7 @@ public class ApplicationLoader extends Application {
public static volatile boolean isScreenOn = false;
public static volatile boolean mainInterfacePaused = true;
public static DatabaseHandler databaseHandler;
public static boolean SHOW_ANDROID_EMOJI;
public static boolean KEEP_ORIGINAL_FILENAME;
public static boolean USE_DEVICE_FONT;
@ -186,7 +180,8 @@ public class ApplicationLoader extends Application {
} catch (Exception e) {
FileLog.e("tmessages", e);
}
return new File("/data/data/org.telegram.messenger/files");
//return new File("/data/data/org.telegram.messenger/files");
return new File("/data/data/org.telegram." + (BuildConfig.DEBUG ? "plus.beta" : "plus") +"/files");
}
public static void postInitApplication() {
@ -239,16 +234,16 @@ public class ApplicationLoader extends Application {
appVersion = "App version unknown";
systemVersion = "SDK " + Build.VERSION.SDK_INT;
}
if (langCode.length() == 0) {
if (langCode.trim().length() == 0) {
langCode = "en";
}
if (deviceModel.length() == 0) {
if (deviceModel.trim().length() == 0) {
deviceModel = "Android unknown";
}
if (appVersion.length() == 0) {
if (appVersion.trim().length() == 0) {
appVersion = "App version unknown";
}
if (systemVersion.length() == 0) {
if (systemVersion.trim().length() == 0) {
systemVersion = "SDK Unknown";
}
@ -280,7 +275,11 @@ public class ApplicationLoader extends Application {
applicationContext = getApplicationContext();
NativeLoader.initNativeLibs(ApplicationLoader.applicationContext);
ConnectionsManager.native_setJava(Build.VERSION.SDK_INT == 14 || Build.VERSION.SDK_INT == 15);
try{
ConnectionsManager.native_setJava(Build.VERSION.SDK_INT == 14 || Build.VERSION.SDK_INT == 15);
} catch (Exception e) {
FileLog.e("tmessages", e);
}
if (Build.VERSION.SDK_INT >= 14) {
new ForegroundDetector(this);
@ -288,10 +287,12 @@ public class ApplicationLoader extends Application {
applicationHandler = new Handler(applicationContext.getMainLooper());
//plus
SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("mainconfig", Activity.MODE_PRIVATE);
SHOW_ANDROID_EMOJI = preferences.getBoolean("showAndroidEmoji", false);
KEEP_ORIGINAL_FILENAME = preferences.getBoolean("keepOriginalFilename", false);
USE_DEVICE_FONT = preferences.getBoolean("useDeviceFont", false);
databaseHandler = new DatabaseHandler(applicationContext);
//SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("mainconfig", Activity.MODE_PRIVATE);
SharedPreferences plusPreferences = ApplicationLoader.applicationContext.getSharedPreferences("plusconfig", Activity.MODE_PRIVATE);
SHOW_ANDROID_EMOJI = plusPreferences.getBoolean("showAndroidEmoji", false);
KEEP_ORIGINAL_FILENAME = plusPreferences.getBoolean("keepOriginalFilename", false);
USE_DEVICE_FONT = plusPreferences.getBoolean("useDeviceFont", false);
//
startPushService();
}
@ -300,13 +301,22 @@ public class ApplicationLoader extends Application {
SharedPreferences preferences = applicationContext.getSharedPreferences("Notifications", MODE_PRIVATE);
if (preferences.getBoolean("pushService", true)) {
networkAlarm = new NetworkAlarm();
/*} else {
AlarmManager am = (AlarmManager) applicationContext.getSystemService(Context.ALARM_SERVICE);
Intent i = new Intent(applicationContext, ApplicationLoader.class);
pendingIntent = PendingIntent.getBroadcast(applicationContext, 0, i, 0);
am.cancel(pendingIntent);
am.setRepeating(AlarmManager.RTC_WAKEUP, System.currentTimeMillis(), 60000, pendingIntent);
}*/
applicationContext.startService(new Intent(applicationContext, NotificationsService.class));
//if (android.os.Build.VERSION.SDK_INT >= 19) {
FileLog.e("ApplicationLoader", "startPushService");
Calendar cal = Calendar.getInstance();
PendingIntent pintent = PendingIntent.getService(applicationContext, 0, new Intent(applicationContext, NotificationsService.class), 0);
AlarmManager alarm = (AlarmManager) applicationContext.getSystemService(Context.ALARM_SERVICE);
alarm.setRepeating(AlarmManager.RTC_WAKEUP, cal.getTimeInMillis(), 30000, pintent);
///FileLog.e("ApplicationLoader", "startPushService");
///Calendar cal = Calendar.getInstance();
///PendingIntent pintent = PendingIntent.getService(applicationContext, 0, new Intent(applicationContext, NotificationsService.class), 0);
///AlarmManager alarm = (AlarmManager) applicationContext.getSystemService(Context.ALARM_SERVICE);
///alarm.setRepeating(AlarmManager.RTC_WAKEUP, cal.getTimeInMillis(), 30000, pintent);
//PendingIntent pintent = PendingIntent.getService(applicationContext, 0, new Intent(applicationContext, NotificationsService.class), 0);
//AlarmManager alarm = (AlarmManager)applicationContext.getSystemService(Context.ALARM_SERVICE);
@ -318,13 +328,26 @@ public class ApplicationLoader extends Application {
}
public static void stopPushService() {
if (networkAlarm != null) {
networkAlarm = null;
}// else {
// AlarmManager am = (AlarmManager) applicationContext.getSystemService(Context.ALARM_SERVICE);
// am.cancel(pendingIntent);
//}
applicationContext.stopService(new Intent(applicationContext, NotificationsService.class));
PendingIntent pintent = PendingIntent.getService(applicationContext, 0, new Intent(applicationContext, NotificationsService.class), 0);
AlarmManager alarm = (AlarmManager)applicationContext.getSystemService(Context.ALARM_SERVICE);
alarm.cancel(pintent);
///PendingIntent pintent = PendingIntent.getService(applicationContext, 0, new Intent(applicationContext, NotificationsService.class), 0);
///AlarmManager alarm = (AlarmManager)applicationContext.getSystemService(Context.ALARM_SERVICE);
///alarm.cancel(pintent);
}
public static void setAlarm(int timeout) {
FileLog.d("tmessages", "setting alarm to wake us in " + String.valueOf(timeout) + "ms");
if (networkAlarm != null) {
networkAlarm.setAlarm(applicationContext, timeout);
}
}
@Override
public void onConfigurationChanged(Configuration newConfig) {
super.onConfigurationChanged(newConfig);
@ -341,13 +364,10 @@ public class ApplicationLoader extends Application {
@Override
public void run() {
if (checkPlayServices()) {
gcm = GoogleCloudMessaging.getInstance(ApplicationLoader.this);
regid = getRegistrationId();
if (regid.length() == 0) {
registerInBackground();
} else {
sendRegistrationIdToBackend(false);
if (UserConfig.pushString == null || UserConfig.pushString.length() == 0) {
FileLog.d("tmessages", "GCM Registration not found.");
Intent intent = new Intent(applicationContext, GcmRegistrationIntentService.class);
startService(intent);
}
} else {
FileLog.d("tmessages", "No valid Google Play Services APK found.");
@ -369,91 +389,4 @@ public class ApplicationLoader extends Application {
}
return true;*/
}
private String getRegistrationId() {
final SharedPreferences prefs = getGCMPreferences(applicationContext);
String registrationId = prefs.getString(PROPERTY_REG_ID, "");
if (registrationId.length() == 0) {
FileLog.d("tmessages", "Registration not found.");
return "";
}
int registeredVersion = prefs.getInt(PROPERTY_APP_VERSION, Integer.MIN_VALUE);
if (registeredVersion != BuildVars.BUILD_VERSION) {
FileLog.d("tmessages", "App version changed.");
return "";
}
return registrationId;
}
private SharedPreferences getGCMPreferences(Context context) {
return getSharedPreferences(ApplicationLoader.class.getSimpleName(), Context.MODE_PRIVATE);
}
private void registerInBackground() {
AsyncTask<String, String, Boolean> task = new AsyncTask<String, String, Boolean>() {
@Override
protected Boolean doInBackground(String... objects) {
if (gcm == null) {
gcm = GoogleCloudMessaging.getInstance(applicationContext);
}
int count = 0;
while (count < 1000) {
try {
count++;
regid = gcm.register(BuildVars.GCM_SENDER_ID);
sendRegistrationIdToBackend(true);
storeRegistrationId(applicationContext, regid);
return true;
} catch (Exception e) {
FileLog.e("tmessages", e);
}
try {
if (count % 20 == 0) {
Thread.sleep(60000 * 30);
} else {
Thread.sleep(5000);
}
} catch (InterruptedException e) {
FileLog.e("tmessages", e);
}
}
return false;
}
};
if (android.os.Build.VERSION.SDK_INT >= 11) {
task.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR, null, null, null);
} else {
task.execute(null, null, null);
}
}
private void sendRegistrationIdToBackend(final boolean isNew) {
Utilities.stageQueue.postRunnable(new Runnable() {
@Override
public void run() {
UserConfig.pushString = regid;
UserConfig.registeredForPush = !isNew;
UserConfig.saveConfig(false);
if (UserConfig.getClientUserId() != 0) {
AndroidUtilities.runOnUIThread(new Runnable() {
@Override
public void run() {
MessagesController.getInstance().registerForPush(regid);
}
});
}
}
});
}
private void storeRegistrationId(Context context, String regId) {
final SharedPreferences prefs = getGCMPreferences(context);
int appVersion = BuildVars.BUILD_VERSION;
FileLog.e("tmessages", "Saving regId on app version " + appVersion);
SharedPreferences.Editor editor = prefs.edit();
editor.putString(PROPERTY_REG_ID, regId);
editor.putInt(PROPERTY_APP_VERSION, appVersion);
editor.commit();
}
}

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@ -13,8 +13,8 @@ import org.telegram.tgnet.RequestDelegate;
import org.telegram.tgnet.TLObject;
import org.telegram.tgnet.TLRPC;
import java.io.RandomAccessFile;
import java.io.File;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Scanner;
@ -22,9 +22,9 @@ import java.util.Scanner;
public class FileLoadOperation {
private static class RequestInfo {
private int requestToken = 0;
private int offset = 0;
private TLRPC.TL_upload_file response = null;
private int requestToken;
private int offset;
private TLRPC.TL_upload_file response;
}
private final static int stateIdle = 0;
@ -49,8 +49,9 @@ public class FileLoadOperation {
private int currentDownloadChunkSize;
private int currentMaxDownloadRequests;
private int requestsCount;
private int renameRetryCount;
private int nextDownloadOffset = 0;
private int nextDownloadOffset;
private ArrayList<RequestInfo> requestInfos;
private ArrayList<RequestInfo> delayedRequestInfos;
@ -61,9 +62,9 @@ public class FileLoadOperation {
private String ext;
private RandomAccessFile fileOutputStream;
private RandomAccessFile fiv;
private File storePath = null;
private File tempPath = null;
private boolean isForceRequest = false;
private File storePath;
private File tempPath;
private boolean isForceRequest;
private static String orgName = null;
@ -145,23 +146,28 @@ public class FileLoadOperation {
key = documentLocation.key;
} else if (documentLocation instanceof TLRPC.TL_document) {
location = new TLRPC.TL_inputDocumentFileLocation();
datacenter_id = documentLocation.dc_id;
location.id = documentLocation.id;
location.access_hash = documentLocation.access_hash;
datacenter_id = documentLocation.dc_id;
}
totalBytesCount = documentLocation.size;
ext = FileLoader.getDocumentFileName(documentLocation);
int idx;
if (ext == null || (idx = ext.lastIndexOf(".")) == -1) {
ext = "";
} else {
ext = ext.substring(idx);
if (ext.length() <= 1) {
if (totalBytesCount <= 0) {
totalBytesCount = documentLocation.size;
}
if (ext == null) {
ext = FileLoader.getDocumentFileName(documentLocation);
int idx;
if (ext == null || (idx = ext.lastIndexOf(".")) == -1) {
ext = "";
} else {
ext = ext.substring(idx);
if (ext.length() <= 1) {
ext = "";
}
}
if(ApplicationLoader.KEEP_ORIGINAL_FILENAME && !ext.contains("webp") && !FileLoader.isGif(documentLocation)) {
orgName = FileLoader.getDocName(documentLocation);
}
}
if(ApplicationLoader.KEEP_ORIGINAL_FILENAME && !ext.contains("webp")){
orgName = FileLoader.getDocName(documentLocation);
}
}
@ -200,7 +206,7 @@ public class FileLoadOperation {
String fileNameTemp;
String fileNameIv = null;
if (location.volume_id != 0 && location.local_id != 0) {
fileNameTemp = location.volume_id + "_" + location.local_id + "_temp." + ext;
fileNameTemp = location.volume_id + "_" + location.local_id + ".temp";
fileNameFinal = location.volume_id + "_" + location.local_id + "." + ext;
if (key != null) {
fileNameIv = location.volume_id + "_" + location.local_id + ".iv";
@ -216,7 +222,7 @@ public class FileLoadOperation {
return;
}
} else {
fileNameTemp = datacenter_id + "_" + location.id + "_temp" + ext;
fileNameTemp = datacenter_id + "_" + location.id + ".temp";
fileNameFinal = datacenter_id + "_" + location.id + ext;
if (key != null) {
fileNameIv = datacenter_id + "_" + location.id + ".iv";
@ -252,12 +258,11 @@ public class FileLoadOperation {
if (!cacheFileFinal.exists()) {
cacheFileTemp = new File(tempPath, fileNameTemp);
if (cacheFileTemp.exists()) {
downloadedBytes = (int)cacheFileTemp.length();
downloadedBytes = (int) cacheFileTemp.length();
nextDownloadOffset = downloadedBytes = downloadedBytes / currentDownloadChunkSize * currentDownloadChunkSize;
}
//if (BuildVars.DEBUG_VERSION) {
if (BuildConfig.DEBUG) {
if (BuildVars.DEBUG_VERSION) {
FileLog.d("tmessages", "start loading file to temp = " + cacheFileTemp + " final = " + cacheFileFinal);
}
@ -341,6 +346,11 @@ public class FileLoadOperation {
private void cleanup() {
try {
if (fileOutputStream != null) {
try {
fileOutputStream.getChannel().close();
} catch (Exception e) {
FileLog.e("tmessages", e);
}
fileOutputStream.close();
fileOutputStream = null;
}
@ -357,7 +367,8 @@ public class FileLoadOperation {
FileLog.e("tmessages", e);
}
if (delayedRequestInfos != null) {
for (RequestInfo requestInfo : delayedRequestInfos) {
for (int a = 0; a < delayedRequestInfos.size(); a++) {
RequestInfo requestInfo = delayedRequestInfos.get(a);
if (requestInfo.response != null) {
requestInfo.response.disableFree = false;
requestInfo.response.freeResources();
@ -375,18 +386,33 @@ public class FileLoadOperation {
cleanup();
if (cacheIvTemp != null) {
cacheIvTemp.delete();
cacheIvTemp = null;
}
if (cacheFileTemp != null) {
if (!cacheFileTemp.renameTo(cacheFileFinal)) {
//if (BuildVars.DEBUG_VERSION) {
if (BuildConfig.DEBUG) {
FileLog.e("tmessages", "unable to rename temp = " + cacheFileTemp + " to final = " + cacheFileFinal);
boolean renameResult = cacheFileTemp.renameTo(cacheFileFinal);
if (!renameResult) {
if (BuildVars.DEBUG_VERSION) {
FileLog.e("tmessages", "unable to rename temp = " + cacheFileTemp + " to final = " + cacheFileFinal + " retry = " + renameRetryCount);
}
renameRetryCount++;
if (renameRetryCount < 3) {
state = stateDownloading;
Utilities.stageQueue.postRunnable(new Runnable() {
@Override
public void run() {
try {
onFinishLoadingFile();
} catch (Exception e) {
delegate.didFailedLoadingFile(FileLoadOperation.this, 0);
}
}
}, 200);
return;
}
cacheFileFinal = cacheFileTemp;
}
}
//if (BuildVars.DEBUG_VERSION) {
if (BuildConfig.DEBUG) {
if (BuildVars.DEBUG_VERSION) {
FileLog.e("tmessages", "finished downloading file to " + cacheFileFinal);
}
delegate.didFinishLoadingFile(FileLoadOperation.this, cacheFileFinal);

View File

@ -3,11 +3,13 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
import android.util.Log;
import org.telegram.tgnet.TLObject;
import org.telegram.tgnet.TLRPC;
@ -84,6 +86,10 @@ public class FileLoader {
mediaDirs = dirs;
}
public File checkDirectory(int type) {
return mediaDirs.get(type);
}
public File getDirectory(int type) {
File dir = mediaDirs.get(type);
if (dir == null && type != MEDIA_DIR_CACHE) {
@ -703,7 +709,8 @@ public class FileLoader {
if (document.file_name != null) {
return document.file_name;
}
for (TLRPC.DocumentAttribute documentAttribute : document.attributes) {
for (int a = 0; a < document.attributes.size(); a++) {
TLRPC.DocumentAttribute documentAttribute = document.attributes.get(a);
if (documentAttribute instanceof TLRPC.TL_documentAttributeFilename) {
return documentAttribute.file_name;
}
@ -722,22 +729,25 @@ public class FileLoader {
return video.dc_id + "_" + video.id + "." + (ext != null ? ext : "mp4");
} else if (attach instanceof TLRPC.Document) {
TLRPC.Document document = (TLRPC.Document) attach;
String docExt = getDocumentFileName(document);
int idx;
if (docExt == null || (idx = docExt.lastIndexOf(".")) == -1) {
docExt = "";
} else {
docExt = docExt.substring(idx);
String docExt = null;
if (docExt == null) {
docExt = getDocumentFileName(document);
int idx;
if (docExt == null || (idx = docExt.lastIndexOf(".")) == -1) {
docExt = "";
} else {
docExt = docExt.substring(idx);
}
}
if (docExt.length() > 1) {
if(ApplicationLoader.KEEP_ORIGINAL_FILENAME && !docExt.contains("webp"))return getDocName(document); //Plus
if(ApplicationLoader.KEEP_ORIGINAL_FILENAME && !docExt.contains("webp") && !isGif(document))return getDocName(document); //Plus
return document.dc_id + "_" + document.id + docExt;
} else {
return document.dc_id + "_" + document.id;
}
} else if (attach instanceof TLRPC.PhotoSize) {
TLRPC.PhotoSize photo = (TLRPC.PhotoSize) attach;
if (photo.location == null) {
if (photo.location == null || photo.location instanceof TLRPC.TL_fileLocationUnavailable) {
return "";
}
return photo.location.volume_id + "_" + photo.location.local_id + "." + (ext != null ? ext : "jpg");
@ -745,43 +755,63 @@ public class FileLoader {
TLRPC.Audio audio = (TLRPC.Audio) attach;
return audio.dc_id + "_" + audio.id + "." + (ext != null ? ext : "ogg");
} else if (attach instanceof TLRPC.FileLocation) {
if (attach instanceof TLRPC.TL_fileLocationUnavailable) {
return "";
}
TLRPC.FileLocation location = (TLRPC.FileLocation) attach;
return location.volume_id + "_" + location.local_id + "." + (ext != null ? ext : "jpg");
}
return "";
}
//Plus
public static String getAttachFileName(TLObject attach, boolean out) {
public static boolean isGif(TLRPC.Document document){
String s = getDocumentFileName(document);
if(s.contains(".mp4") || s.contains(".gif")){
//if(s.contains("giphy.") || s.contains("animation.") || s.contains("gif.")){
return true;
//}
}
return false;
}
public static String getAttachFileName(TLObject attach, String ext, boolean out) {
if (attach instanceof TLRPC.Video) {
TLRPC.Video video = (TLRPC.Video) attach;
return video.dc_id + "_" + video.id + "." + ("mp4");
return video.dc_id + "_" + video.id + "." + (ext != null ? ext : "mp4");
} else if (attach instanceof TLRPC.Document) {
TLRPC.Document document = (TLRPC.Document) attach;
String docExt = getDocumentFileName(document);
int idx;
if (docExt == null || (idx = docExt.lastIndexOf(".")) == -1) {
docExt = "";
} else {
docExt = docExt.substring(idx);
String docExt = null;
if (docExt == null) {
docExt = getDocumentFileName(document);
int idx;
if (docExt == null || (idx = docExt.lastIndexOf(".")) == -1) {
docExt = "";
} else {
docExt = docExt.substring(idx);
}
}
if (docExt.length() > 1) {
if(!out && ApplicationLoader.KEEP_ORIGINAL_FILENAME && !docExt.contains("webp"))return getDocName(document);
if(!out && ApplicationLoader.KEEP_ORIGINAL_FILENAME && !docExt.contains("webp") && !isGif(document))return getDocName(document);
return document.dc_id + "_" + document.id + docExt;
} else {
return document.dc_id + "_" + document.id;
}
} else if (attach instanceof TLRPC.PhotoSize) {
TLRPC.PhotoSize photo = (TLRPC.PhotoSize) attach;
if (photo.location == null) {
if (photo.location == null || photo.location instanceof TLRPC.TL_fileLocationUnavailable) {
return "";
}
return photo.location.volume_id + "_" + photo.location.local_id + "." + ( "jpg");
return photo.location.volume_id + "_" + photo.location.local_id + "." + (ext != null ? ext : "jpg");
} else if (attach instanceof TLRPC.Audio) {
TLRPC.Audio audio = (TLRPC.Audio) attach;
return audio.dc_id + "_" + audio.id + "." + ( "ogg");
return audio.dc_id + "_" + audio.id + "." + (ext != null ? ext : "ogg");
} else if (attach instanceof TLRPC.FileLocation) {
if (attach instanceof TLRPC.TL_fileLocationUnavailable) {
return "";
}
TLRPC.FileLocation location = (TLRPC.FileLocation) attach;
return location.volume_id + "_" + location.local_id + "." + ( "jpg");
return location.volume_id + "_" + location.local_id + "." + (ext != null ? ext : "jpg");
}
return "";
}
@ -810,7 +840,7 @@ public class FileLoader {
return name;
}
public void deleteFiles(final ArrayList<File> files) {
public void deleteFiles(final ArrayList<File> files, final int type) {
if (files == null || files.isEmpty()) {
return;
}
@ -829,17 +859,20 @@ public class FileLoader {
}
}
try {
File qFile = new File(file.getPath(), "q_" + file.getName());
File qFile = new File(file.getParentFile(), "q_" + file.getName());
if (qFile.exists()) {
if (!file.delete()) {
file.deleteOnExit();
if (!qFile.delete()) {
qFile.deleteOnExit();
}
}
} catch (Exception e) {
FileLog.e("tmessages", e);
}
}
}
if (type == 2) {
ImageLoader.getInstance().clearMemory();
}
}
});
}
}

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@ -40,8 +40,7 @@ public class FileLog {
}
public FileLog() {
//if (!BuildVars.DEBUG_VERSION) {
if (!BuildConfig.DEBUG) {
if (!BuildVars.DEBUG_VERSION) {
return;
}
dateFormat = FastDateFormat.getInstance("dd_MM_yyyy_HH_mm_ss", Locale.US);
@ -88,8 +87,7 @@ public class FileLog {
}
public static void e(final String tag, final String message, final Throwable exception) {
//if (!BuildVars.DEBUG_VERSION) {
if (!BuildConfig.DEBUG) {
if (!BuildVars.DEBUG_VERSION) {
return;
}
Log.e(tag, message, exception);
@ -110,8 +108,7 @@ public class FileLog {
}
public static void e(final String tag, final String message) {
//if (!BuildVars.DEBUG_VERSION) {
if (!BuildConfig.DEBUG) {
if (!BuildVars.DEBUG_VERSION) {
return;
}
Log.e(tag, message);
@ -131,8 +128,7 @@ public class FileLog {
}
public static void e(final String tag, final Throwable e) {
//if (!BuildVars.DEBUG_VERSION) {
if (!BuildConfig.DEBUG) {
if (!BuildVars.DEBUG_VERSION) {
return;
}
e.printStackTrace();
@ -158,8 +154,7 @@ public class FileLog {
}
public static void d(final String tag, final String message) {
//if (!BuildVars.DEBUG_VERSION) {
if (!BuildConfig.DEBUG) {
if (!BuildVars.DEBUG_VERSION) {
return;
}
Log.d(tag, message);
@ -179,8 +174,7 @@ public class FileLog {
}
public static void w(final String tag, final String message) {
//if (!BuildVars.DEBUG_VERSION) {
if (!BuildConfig.DEBUG) {
if (!BuildVars.DEBUG_VERSION) {
return;
}
Log.w(tag, message);

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@ -68,7 +68,7 @@ public class UserConfig {
SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("userconfing", Context.MODE_PRIVATE);
SharedPreferences.Editor editor = preferences.edit();
editor.putBoolean("registeredForPush", registeredForPush);
editor.putString("pushString", pushString);
editor.putString("pushString2", pushString);
editor.putInt("lastSendMessageId", lastSendMessageId);
editor.putInt("lastLocalId", lastLocalId);
editor.putString("contactsHash", contactsHash);
@ -182,7 +182,7 @@ public class UserConfig {
SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("userconfing", Context.MODE_PRIVATE);
registeredForPush = preferences.getBoolean("registeredForPush", false);
pushString = preferences.getString("pushString", "");
pushString = preferences.getString("pushString2", "");
lastSendMessageId = preferences.getInt("lastSendMessageId", -210000);
lastLocalId = preferences.getInt("lastLocalId", -210000);
contactsHash = preferences.getString("contactsHash", "");
@ -209,7 +209,7 @@ public class UserConfig {
} else {
SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("userconfing", Context.MODE_PRIVATE);
registeredForPush = preferences.getBoolean("registeredForPush", false);
pushString = preferences.getString("pushString", "");
pushString = preferences.getString("pushString2", "");
lastSendMessageId = preferences.getInt("lastSendMessageId", -210000);
lastLocalId = preferences.getInt("lastLocalId", -210000);
contactsHash = preferences.getString("contactsHash", "");

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@ -64,7 +64,8 @@ public class Utilities {
public native static void loadBitmap(String path, Bitmap bitmap, int scale, int width, int height, int stride);
public native static int pinBitmap(Bitmap bitmap);
public native static void blurBitmap(Object bitmap, int radius, int unpin);
public native static int unpinBitmap(Bitmap bitmap);
public native static void blurBitmap(Object bitmap, int radius, int unpin, int width, int height, int stride);
public native static void calcCDT(ByteBuffer hsvBuffer, int width, int height, ByteBuffer buffer);
public native static boolean loadWebpImage(Bitmap bitmap, ByteBuffer buffer, int len, BitmapFactory.Options options, boolean unpin);
public native static int convertVideoFrame(ByteBuffer src, ByteBuffer dest, int destFormat, int width, int height, int padding, int swap);
@ -91,6 +92,23 @@ public class Utilities {
return val;
}
public static Long parseLong(String value) {
if (value == null) {
return 0L;
}
Long val = 0L;
try {
Matcher matcher = pattern.matcher(value);
if (matcher.find()) {
String num = matcher.group(0);
val = Long.parseLong(num);
}
} catch (Exception e) {
FileLog.e("tmessages", e);
}
return val;
}
public static String parseIntToString(String value) {
Matcher matcher = pattern.matcher(value);
if (matcher.find()) {

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.ui.ActionBar;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.ui.ActionBar;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.ui.ActionBar;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.ui.ActionBar;
@ -436,7 +436,7 @@ public class ActionBarMenuItem extends FrameLayoutFixed {
searchField.setOnEditorActionListener(new TextView.OnEditorActionListener() {
@Override
public boolean onEditorAction(TextView v, int actionId, KeyEvent event) {
if (actionId == EditorInfo.IME_ACTION_SEARCH || event != null && (event.getAction() == KeyEvent.ACTION_UP && event.getKeyCode() == KeyEvent.KEYCODE_SEARCH || event.getAction() == KeyEvent.ACTION_DOWN && event.getKeyCode() == KeyEvent.KEYCODE_ENTER)) {
if (/*actionId == EditorInfo.IME_ACTION_SEARCH || */event != null && (event.getAction() == KeyEvent.ACTION_UP && event.getKeyCode() == KeyEvent.KEYCODE_SEARCH || event.getAction() == KeyEvent.ACTION_DOWN && event.getKeyCode() == KeyEvent.KEYCODE_ENTER)) {
AndroidUtilities.hideKeyboard(searchField);
if (listener != null) {
listener.onSearchPressed(searchField);

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
//Thanks to https://github.com/JakeWharton/ActionBarSherlock/

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.ui.ActionBar;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.ui.ActionBar;
@ -453,37 +453,39 @@ public class DrawerLayoutContainer extends FrameLayout {
child.measure(drawerWidthSpec, drawerHeightSpec);
}
}
//getDrawerLayout().setBackgroundColor(AndroidUtilities.getIntDef("drawerListColor",0xffffffff)); //Plus
//Plus
updateListBG();
}
private void updateListBG(){
SharedPreferences themePrefs = ApplicationLoader.applicationContext.getSharedPreferences(AndroidUtilities.THEME_PREFS, AndroidUtilities.THEME_PREFS_MODE);
int mainColor = themePrefs.getInt("drawerListColor", 0xffffffff);
int value = themePrefs.getInt("drawerRowGradient", 0);
boolean b = true;//themePrefs.getBoolean("drawerRowGradientListCheck", false);
if(value > 0 && b) {
GradientDrawable.Orientation go;
switch(value) {
case 2:
go = GradientDrawable.Orientation.LEFT_RIGHT;
break;
case 3:
go = GradientDrawable.Orientation.TL_BR;
break;
case 4:
go = GradientDrawable.Orientation.BL_TR;
break;
default:
go = GradientDrawable.Orientation.TOP_BOTTOM;
}
if(getDrawerLayout() != null) {
SharedPreferences themePrefs = ApplicationLoader.applicationContext.getSharedPreferences(AndroidUtilities.THEME_PREFS, AndroidUtilities.THEME_PREFS_MODE);
int mainColor = themePrefs.getInt("drawerListColor", 0xffffffff);
int value = themePrefs.getInt("drawerRowGradient", 0);
boolean b = true;//themePrefs.getBoolean("drawerRowGradientListCheck", false);
if (value > 0 && b) {
GradientDrawable.Orientation go;
switch (value) {
case 2:
go = GradientDrawable.Orientation.LEFT_RIGHT;
break;
case 3:
go = GradientDrawable.Orientation.TL_BR;
break;
case 4:
go = GradientDrawable.Orientation.BL_TR;
break;
default:
go = GradientDrawable.Orientation.TOP_BOTTOM;
}
int gradColor = themePrefs.getInt("drawerRowGradientColor", 0xffffffff);
int[] colors = new int[]{mainColor, gradColor};
GradientDrawable gd = new GradientDrawable(go, colors);
getDrawerLayout().setBackgroundDrawable(gd);
}else{
getDrawerLayout().setBackgroundColor(mainColor);
int gradColor = themePrefs.getInt("drawerRowGradientColor", 0xffffffff);
int[] colors = new int[]{mainColor, gradColor};
GradientDrawable gd = new GradientDrawable(go, colors);
getDrawerLayout().setBackgroundDrawable(gd);
} else {
getDrawerLayout().setBackgroundColor(mainColor);
}
}
}

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.ui.ActionBar;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.ui.Adapters;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.ui.Adapters;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.ui.Adapters;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.ui.Adapters;

View File

@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
* Copyright Nikolai Kudashov, 2013-2015.
* Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.ui.Adapters;

Some files were not shown because too many files have changed in this diff Show More