${toggle_debug_libs} in/exclude debug version of libraries
${toggle_md5} support for output of checksum data
${toggle_static_msvcrt} use static MSVCRT (VS builds only)
+ ${toggle_vp8} VP8 codec support
+ ${toggle_vp9} VP9 codec support
${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders)
${toggle_mem_tracker} track memory usage
${toggle_postproc} postprocessing
enable fast_unaligned #allow unaligned accesses, if supported by hw
enable md5
enable spatial_resampling
+ enable multithread
enable os_support
+ enable temporal_denoising
[ -d ${source_path}/../include ] && enable alt_tree_layout
- for d in vp9; do
-for d in vp8; do
++for d in vp8 vp9; do
[ -d ${source_path}/${d} ] && disable alt_tree_layout;
done
if ! enabled alt_tree_layout; then
# development environment
+ [ -d ${source_path}/vp8 ] && CODECS="${CODECS} vp8_encoder vp8_decoder"
+[ -d ${source_path}/vp9 ] && CODECS="${CODECS} vp9_encoder vp9_decoder"
else
# customer environment
- [ -f ${source_path}/../include/vpx/vp8cx.h ] && CODECS="${CODECS} vp9_encoder"
- [ -f ${source_path}/../include/vpx/vp8dx.h ] && CODECS="${CODECS} vp9_decoder"
+ [ -f ${source_path}/../include/vpx/vp8cx.h ] && CODECS="${CODECS} vp8_encoder"
+ [ -f ${source_path}/../include/vpx/vp8dx.h ] && CODECS="${CODECS} vp8_decoder"
++[ -f ${source_path}/../include/vpx/vp9cx.h ] && CODECS="${CODECS} vp9_encoder"
++[ -f ${source_path}/../include/vpx/vp9dx.h ] && CODECS="${CODECS} vp9_decoder"
+ [ -f ${source_path}/../include/vpx/vp8cx.h ] || disable vp8_encoder
+ [ -f ${source_path}/../include/vpx/vp8dx.h ] || disable vp8_decoder
++[ -f ${source_path}/../include/vpx/vp9cx.h ] || disable vp9_encoder
++[ -f ${source_path}/../include/vpx/vp9dx.h ] || disable vp9_decoder
[ -f ${source_path}/../lib/*/*mt.lib ] && soft_enable static_msvcrt
fi
postproc_visualizer
os_support
unit_tests
+ multi_res_encoding
+ temporal_denoising
+ experimental
+ ${EXPERIMENT_LIST}
"
CMDLINE_SELECT="
extra_warnings
small
postproc_visualizer
unit_tests
+ multi_res_encoding
+ temporal_denoising
+ experimental
"
process_cmdline() {
check_add_cflags -Wpointer-arith
check_add_cflags -Wtype-limits
check_add_cflags -Wcast-qual
- check_add_cflags -Wundef
+ check_add_cflags -Wvla
+ check_add_cflags -Wimplicit-function-declaration
+ check_add_cflags -Wuninitialized
+ check_add_cflags -Wunused-variable
+ check_add_cflags -Wunused-but-set-variable
enabled extra_warnings || check_add_cflags -Wno-unused-function
fi
# We should not link to math library (libm) on RVCT
# when building for bare-metal targets
ifeq ($(CONFIG_OS_SUPPORT), yes)
+ CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m
+CODEC_EXTRA_LIBS-$(CONFIG_VP9) += m
else
ifeq ($(CONFIG_GCC), yes)
+ CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m
+ CODEC_EXTRA_LIBS-$(CONFIG_VP9) += m
endif
endif
#
INC_PATH := $(SRC_PATH_BARE)/../include
else
LIB_PATH-yes += $(if $(BUILD_PFX),$(BUILD_PFX),.)
+ INC_PATH-$(CONFIG_VP8_DECODER) += $(SRC_PATH_BARE)/vp8
+ INC_PATH-$(CONFIG_VP8_ENCODER) += $(SRC_PATH_BARE)/vp8
+ INC_PATH-$(CONFIG_VP9_DECODER) += $(SRC_PATH_BARE)/vp9
+ INC_PATH-$(CONFIG_VP9_ENCODER) += $(SRC_PATH_BARE)/vp9
LIB_PATH := $(call enabled,LIB_PATH)
INC_PATH := $(call enabled,INC_PATH)
endif
#ifndef MKV_DEFS_HPP
#define MKV_DEFS_HPP 1
- // Commenting out values not available in webm, but available in matroska
+ /* Commenting out values not available in webm, but available in matroska */
-enum mkv
-{
- EBML = 0x1A45DFA3,
- EBMLVersion = 0x4286,
- EBMLReadVersion = 0x42F7,
- EBMLMaxIDLength = 0x42F2,
- EBMLMaxSizeLength = 0x42F3,
- DocType = 0x4282,
- DocTypeVersion = 0x4287,
- DocTypeReadVersion = 0x4285,
+enum mkv {
+ EBML = 0x1A45DFA3,
+ EBMLVersion = 0x4286,
+ EBMLReadVersion = 0x42F7,
+ EBMLMaxIDLength = 0x42F2,
+ EBMLMaxSizeLength = 0x42F3,
+ DocType = 0x4282,
+ DocTypeVersion = 0x4287,
+ DocTypeReadVersion = 0x4285,
- // CRC_32 = 0xBF,
+ /* CRC_32 = 0xBF, */
- Void = 0xEC,
- SignatureSlot = 0x1B538667,
- SignatureAlgo = 0x7E8A,
- SignatureHash = 0x7E9A,
- SignaturePublicKey = 0x7EA5,
- Signature = 0x7EB5,
- SignatureElements = 0x7E5B,
- SignatureElementList = 0x7E7B,
- SignedElement = 0x6532,
- /* segment */
- Segment = 0x18538067,
- /* Meta Seek Information */
- SeekHead = 0x114D9B74,
- Seek = 0x4DBB,
- SeekID = 0x53AB,
- SeekPosition = 0x53AC,
- /* Segment Information */
- Info = 0x1549A966,
+ Void = 0xEC,
+ SignatureSlot = 0x1B538667,
+ SignatureAlgo = 0x7E8A,
+ SignatureHash = 0x7E9A,
+ SignaturePublicKey = 0x7EA5,
+ Signature = 0x7EB5,
+ SignatureElements = 0x7E5B,
+ SignatureElementList = 0x7E7B,
+ SignedElement = 0x6532,
- // segment
++ /* segment */
+ Segment = 0x18538067,
- // Meta Seek Information
++ /* Meta Seek Information */
+ SeekHead = 0x114D9B74,
+ Seek = 0x4DBB,
+ SeekID = 0x53AB,
+ SeekPosition = 0x53AC,
- // Segment Information
++ /* Segment Information */
+ Info = 0x1549A966,
- // SegmentUID = 0x73A4,
- // SegmentFilename = 0x7384,
- // PrevUID = 0x3CB923,
- // PrevFilename = 0x3C83AB,
- // NextUID = 0x3EB923,
- // NextFilename = 0x3E83BB,
- // SegmentFamily = 0x4444,
- // ChapterTranslate = 0x6924,
- // ChapterTranslateEditionUID = 0x69FC,
- // ChapterTranslateCodec = 0x69BF,
- // ChapterTranslateID = 0x69A5,
+ /* SegmentUID = 0x73A4, */
+ /* SegmentFilename = 0x7384, */
+ /* PrevUID = 0x3CB923, */
+ /* PrevFilename = 0x3C83AB, */
+ /* NextUID = 0x3EB923, */
+ /* NextFilename = 0x3E83BB, */
+ /* SegmentFamily = 0x4444, */
+ /* ChapterTranslate = 0x6924, */
+ /* ChapterTranslateEditionUID = 0x69FC, */
+ /* ChapterTranslateCodec = 0x69BF, */
+ /* ChapterTranslateID = 0x69A5, */
- TimecodeScale = 0x2AD7B1,
- Segment_Duration = 0x4489,
- DateUTC = 0x4461,
+ TimecodeScale = 0x2AD7B1,
+ Segment_Duration = 0x4489,
+ DateUTC = 0x4461,
- // Title = 0x7BA9,
+ /* Title = 0x7BA9, */
- MuxingApp = 0x4D80,
- WritingApp = 0x5741,
- /* Cluster */
- Cluster = 0x1F43B675,
- Timecode = 0xE7,
+ MuxingApp = 0x4D80,
+ WritingApp = 0x5741,
- // Cluster
++ /* Cluster */
+ Cluster = 0x1F43B675,
+ Timecode = 0xE7,
- // SilentTracks = 0x5854,
- // SilentTrackNumber = 0x58D7,
- // Position = 0xA7,
+ /* SilentTracks = 0x5854, */
+ /* SilentTrackNumber = 0x58D7, */
+ /* Position = 0xA7, */
- PrevSize = 0xAB,
- BlockGroup = 0xA0,
- Block = 0xA1,
+ PrevSize = 0xAB,
+ BlockGroup = 0xA0,
+ Block = 0xA1,
- // BlockVirtual = 0xA2,
- // BlockAdditions = 0x75A1,
- // BlockMore = 0xA6,
- // BlockAddID = 0xEE,
- // BlockAdditional = 0xA5,
+ /* BlockVirtual = 0xA2, */
+ /* BlockAdditions = 0x75A1, */
+ /* BlockMore = 0xA6, */
+ /* BlockAddID = 0xEE, */
+ /* BlockAdditional = 0xA5, */
- BlockDuration = 0x9B,
+ BlockDuration = 0x9B,
- // ReferencePriority = 0xFA,
+ /* ReferencePriority = 0xFA, */
- ReferenceBlock = 0xFB,
+ ReferenceBlock = 0xFB,
- // ReferenceVirtual = 0xFD,
- // CodecState = 0xA4,
- // Slices = 0x8E,
- // TimeSlice = 0xE8,
+ /* ReferenceVirtual = 0xFD, */
+ /* CodecState = 0xA4, */
+ /* Slices = 0x8E, */
+ /* TimeSlice = 0xE8, */
- LaceNumber = 0xCC,
+ LaceNumber = 0xCC,
- // FrameNumber = 0xCD,
- // BlockAdditionID = 0xCB,
- // MkvDelay = 0xCE,
- // Cluster_Duration = 0xCF,
+ /* FrameNumber = 0xCD, */
+ /* BlockAdditionID = 0xCB, */
+ /* MkvDelay = 0xCE, */
+ /* Cluster_Duration = 0xCF, */
- SimpleBlock = 0xA3,
+ SimpleBlock = 0xA3,
- // EncryptedBlock = 0xAF,
- // Track
+ /* EncryptedBlock = 0xAF, */
- /* Track */
- Tracks = 0x1654AE6B,
- TrackEntry = 0xAE,
- TrackNumber = 0xD7,
- TrackUID = 0x73C5,
- TrackType = 0x83,
- FlagEnabled = 0xB9,
- FlagDefault = 0x88,
- FlagForced = 0x55AA,
- FlagLacing = 0x9C,
++ /* Track */
+ Tracks = 0x1654AE6B,
+ TrackEntry = 0xAE,
+ TrackNumber = 0xD7,
+ TrackUID = 0x73C5,
+ TrackType = 0x83,
+ FlagEnabled = 0xB9,
+ FlagDefault = 0x88,
+ FlagForced = 0x55AA,
+ FlagLacing = 0x9C,
- // MinCache = 0x6DE7,
- // MaxCache = 0x6DF8,
+ /* MinCache = 0x6DE7, */
+ /* MaxCache = 0x6DF8, */
- DefaultDuration = 0x23E383,
+ DefaultDuration = 0x23E383,
- // TrackTimecodeScale = 0x23314F,
- // TrackOffset = 0x537F,
- // MaxBlockAdditionID = 0x55EE,
+ /* TrackTimecodeScale = 0x23314F, */
+ /* TrackOffset = 0x537F, */
+ /* MaxBlockAdditionID = 0x55EE, */
- Name = 0x536E,
- Language = 0x22B59C,
- CodecID = 0x86,
- CodecPrivate = 0x63A2,
- CodecName = 0x258688,
+ Name = 0x536E,
+ Language = 0x22B59C,
+ CodecID = 0x86,
+ CodecPrivate = 0x63A2,
+ CodecName = 0x258688,
- // AttachmentLink = 0x7446,
- // CodecSettings = 0x3A9697,
- // CodecInfoURL = 0x3B4040,
- // CodecDownloadURL = 0x26B240,
- // CodecDecodeAll = 0xAA,
- // TrackOverlay = 0x6FAB,
- // TrackTranslate = 0x6624,
- // TrackTranslateEditionUID = 0x66FC,
- // TrackTranslateCodec = 0x66BF,
- // TrackTranslateTrackID = 0x66A5,
- // video
+ /* AttachmentLink = 0x7446, */
+ /* CodecSettings = 0x3A9697, */
+ /* CodecInfoURL = 0x3B4040, */
+ /* CodecDownloadURL = 0x26B240, */
+ /* CodecDecodeAll = 0xAA, */
+ /* TrackOverlay = 0x6FAB, */
+ /* TrackTranslate = 0x6624, */
+ /* TrackTranslateEditionUID = 0x66FC, */
+ /* TrackTranslateCodec = 0x66BF, */
+ /* TrackTranslateTrackID = 0x66A5, */
- /* video */
- Video = 0xE0,
- FlagInterlaced = 0x9A,
- StereoMode = 0x53B8,
- PixelWidth = 0xB0,
- PixelHeight = 0xBA,
- PixelCropBottom = 0x54AA,
- PixelCropTop = 0x54BB,
- PixelCropLeft = 0x54CC,
- PixelCropRight = 0x54DD,
- DisplayWidth = 0x54B0,
- DisplayHeight = 0x54BA,
- DisplayUnit = 0x54B2,
- AspectRatioType = 0x54B3,
++ /* video */
+ Video = 0xE0,
+ FlagInterlaced = 0x9A,
+ StereoMode = 0x53B8,
+ PixelWidth = 0xB0,
+ PixelHeight = 0xBA,
+ PixelCropBottom = 0x54AA,
+ PixelCropTop = 0x54BB,
+ PixelCropLeft = 0x54CC,
+ PixelCropRight = 0x54DD,
+ DisplayWidth = 0x54B0,
+ DisplayHeight = 0x54BA,
+ DisplayUnit = 0x54B2,
+ AspectRatioType = 0x54B3,
- // ColourSpace = 0x2EB524,
- // GammaValue = 0x2FB523,
+ /* ColourSpace = 0x2EB524, */
+ /* GammaValue = 0x2FB523, */
- FrameRate = 0x2383E3,
- /* end video */
- /* audio */
- Audio = 0xE1,
- SamplingFrequency = 0xB5,
- OutputSamplingFrequency = 0x78B5,
- Channels = 0x9F,
+ FrameRate = 0x2383E3,
- // end video
- // audio
++ /* end video */
++ /* audio */
+ Audio = 0xE1,
+ SamplingFrequency = 0xB5,
+ OutputSamplingFrequency = 0x78B5,
+ Channels = 0x9F,
- // ChannelPositions = 0x7D7B,
+ /* ChannelPositions = 0x7D7B, */
- BitDepth = 0x6264,
- /* end audio */
- /* content encoding */
+ BitDepth = 0x6264,
- // end audio
- // content encoding
- // ContentEncodings = 0x6d80,
- // ContentEncoding = 0x6240,
- // ContentEncodingOrder = 0x5031,
- // ContentEncodingScope = 0x5032,
- // ContentEncodingType = 0x5033,
- // ContentCompression = 0x5034,
- // ContentCompAlgo = 0x4254,
- // ContentCompSettings = 0x4255,
- // ContentEncryption = 0x5035,
- // ContentEncAlgo = 0x47e1,
- // ContentEncKeyID = 0x47e2,
- // ContentSignature = 0x47e3,
- // ContentSigKeyID = 0x47e4,
- // ContentSigAlgo = 0x47e5,
- // ContentSigHashAlgo = 0x47e6,
- // end content encoding
- // Cueing Data
++ /* end audio */
++ /* content encoding */
+ /* ContentEncodings = 0x6d80, */
+ /* ContentEncoding = 0x6240, */
+ /* ContentEncodingOrder = 0x5031, */
+ /* ContentEncodingScope = 0x5032, */
+ /* ContentEncodingType = 0x5033, */
+ /* ContentCompression = 0x5034, */
+ /* ContentCompAlgo = 0x4254, */
+ /* ContentCompSettings = 0x4255, */
+ /* ContentEncryption = 0x5035, */
+ /* ContentEncAlgo = 0x47e1, */
+ /* ContentEncKeyID = 0x47e2, */
+ /* ContentSignature = 0x47e3, */
+ /* ContentSigKeyID = 0x47e4, */
+ /* ContentSigAlgo = 0x47e5, */
+ /* ContentSigHashAlgo = 0x47e6, */
- /* end content encoding */
- /* Cueing Data */
- Cues = 0x1C53BB6B,
- CuePoint = 0xBB,
- CueTime = 0xB3,
- CueTrackPositions = 0xB7,
- CueTrack = 0xF7,
- CueClusterPosition = 0xF1,
- CueBlockNumber = 0x5378
++ /* end content encoding */
++ /* Cueing Data */
+ Cues = 0x1C53BB6B,
+ CuePoint = 0xBB,
+ CueTime = 0xB3,
+ CueTrackPositions = 0xB7,
+ CueTrack = 0xF7,
+ CueClusterPosition = 0xF1,
- CueBlockNumber = 0x5378,
- // CueCodecState = 0xEA,
- // CueReference = 0xDB,
- // CueRefTime = 0x96,
- // CueRefCluster = 0x97,
- // CueRefNumber = 0x535F,
- // CueRefCodecState = 0xEB,
- // Attachment
- // Attachments = 0x1941A469,
- // AttachedFile = 0x61A7,
- // FileDescription = 0x467E,
- // FileName = 0x466E,
- // FileMimeType = 0x4660,
- // FileData = 0x465C,
- // FileUID = 0x46AE,
- // FileReferral = 0x4675,
- // Chapters
- // Chapters = 0x1043A770,
- // EditionEntry = 0x45B9,
- // EditionUID = 0x45BC,
- // EditionFlagHidden = 0x45BD,
- // EditionFlagDefault = 0x45DB,
- // EditionFlagOrdered = 0x45DD,
- // ChapterAtom = 0xB6,
- // ChapterUID = 0x73C4,
- // ChapterTimeStart = 0x91,
- // ChapterTimeEnd = 0x92,
- // ChapterFlagHidden = 0x98,
- // ChapterFlagEnabled = 0x4598,
- // ChapterSegmentUID = 0x6E67,
- // ChapterSegmentEditionUID = 0x6EBC,
- // ChapterPhysicalEquiv = 0x63C3,
- // ChapterTrack = 0x8F,
- // ChapterTrackNumber = 0x89,
- // ChapterDisplay = 0x80,
- // ChapString = 0x85,
- // ChapLanguage = 0x437C,
- // ChapCountry = 0x437E,
- // ChapProcess = 0x6944,
- // ChapProcessCodecID = 0x6955,
- // ChapProcessPrivate = 0x450D,
- // ChapProcessCommand = 0x6911,
- // ChapProcessTime = 0x6922,
- // ChapProcessData = 0x6933,
- // Tagging
- // Tags = 0x1254C367,
- // Tag = 0x7373,
- // Targets = 0x63C0,
- // TargetTypeValue = 0x68CA,
- // TargetType = 0x63CA,
- // Tagging_TrackUID = 0x63C5,
- // Tagging_EditionUID = 0x63C9,
- // Tagging_ChapterUID = 0x63C4,
- // AttachmentUID = 0x63C6,
- // SimpleTag = 0x67C8,
- // TagName = 0x45A3,
- // TagLanguage = 0x447A,
- // TagDefault = 0x4484,
- // TagString = 0x4487,
- // TagBinary = 0x4485,
++ CueBlockNumber = 0x5378
+ /* CueCodecState = 0xEA, */
+ /* CueReference = 0xDB, */
+ /* CueRefTime = 0x96, */
+ /* CueRefCluster = 0x97, */
+ /* CueRefNumber = 0x535F, */
+ /* CueRefCodecState = 0xEB, */
- /* Attachment */
++ /* Attachment */
+ /* Attachments = 0x1941A469, */
+ /* AttachedFile = 0x61A7, */
+ /* FileDescription = 0x467E, */
+ /* FileName = 0x466E, */
+ /* FileMimeType = 0x4660, */
+ /* FileData = 0x465C, */
+ /* FileUID = 0x46AE, */
+ /* FileReferral = 0x4675, */
- /* Chapters */
++ /* Chapters */
+ /* Chapters = 0x1043A770, */
+ /* EditionEntry = 0x45B9, */
+ /* EditionUID = 0x45BC, */
+ /* EditionFlagHidden = 0x45BD, */
+ /* EditionFlagDefault = 0x45DB, */
+ /* EditionFlagOrdered = 0x45DD, */
+ /* ChapterAtom = 0xB6, */
+ /* ChapterUID = 0x73C4, */
+ /* ChapterTimeStart = 0x91, */
+ /* ChapterTimeEnd = 0x92, */
+ /* ChapterFlagHidden = 0x98, */
+ /* ChapterFlagEnabled = 0x4598, */
+ /* ChapterSegmentUID = 0x6E67, */
+ /* ChapterSegmentEditionUID = 0x6EBC, */
+ /* ChapterPhysicalEquiv = 0x63C3, */
+ /* ChapterTrack = 0x8F, */
+ /* ChapterTrackNumber = 0x89, */
+ /* ChapterDisplay = 0x80, */
+ /* ChapString = 0x85, */
+ /* ChapLanguage = 0x437C, */
+ /* ChapCountry = 0x437E, */
+ /* ChapProcess = 0x6944, */
+ /* ChapProcessCodecID = 0x6955, */
+ /* ChapProcessPrivate = 0x450D, */
+ /* ChapProcessCommand = 0x6911, */
+ /* ChapProcessTime = 0x6922, */
+ /* ChapProcessData = 0x6933, */
- /* Tagging */
++ /* Tagging */
+ /* Tags = 0x1254C367, */
+ /* Tag = 0x7373, */
+ /* Targets = 0x63C0, */
+ /* TargetTypeValue = 0x68CA, */
+ /* TargetType = 0x63CA, */
+ /* Tagging_TrackUID = 0x63C5, */
+ /* Tagging_EditionUID = 0x63C9, */
+ /* Tagging_ChapterUID = 0x63C4, */
+ /* AttachmentUID = 0x63C6, */
+ /* SimpleTag = 0x67C8, */
+ /* TagName = 0x45A3, */
+ /* TagLanguage = 0x447A, */
+ /* TagDefault = 0x4484, */
+ /* TagString = 0x4487, */
+ /* TagBinary = 0x4485, */
};
#endif
#define LITERALU64(n) n##LLU
#endif
- void Ebml_WriteLen(EbmlGlobal *glob, long long val) {
- // TODO check and make sure we are not > than 0x0100000000000000LLU
- unsigned char size = 8; // size in bytes to output
- unsigned long long minVal = LITERALU64(0x00000000000000ff); // mask to compare for byte size
-void Ebml_WriteLen(EbmlGlobal *glob, int64_t val)
-{
- /* TODO check and make sure we are not > than 0x0100000000000000LLU */
- unsigned char size = 8; /* size in bytes to output */
++void Ebml_WriteLen(EbmlGlobal *glob, int64_t val) {
++ /* TODO check and make sure we are not > than 0x0100000000000000LLU */
++ unsigned char size = 8; /* size in bytes to output */
+
- /* mask to compare for byte size */
- int64_t minVal = 0xff;
++ /* mask to compare for byte size */
++ int64_t minVal = 0xff;
- for (size = 1; size < 8; size ++)
- {
- if (val < minVal)
- break;
+ for (size = 1; size < 8; size ++) {
+ if (val < minVal)
+ break;
- minVal = (minVal << 7);
- }
+ minVal = (minVal << 7);
+ }
- val |= (LITERALU64(0x000000000000080) << ((size - 1) * 7));
- val |= (((uint64_t)0x80) << ((size - 1) * 7));
++ val |= (((uint64_t)0x80) << ((size - 1) * 7));
- Ebml_Serialize(glob, (void *) &val, sizeof(val), size);
+ Ebml_Serialize(glob, (void *) &val, sizeof(val), size);
}
-void Ebml_WriteString(EbmlGlobal *glob, const char *str)
-{
- const size_t size_ = strlen(str);
- const uint64_t size = size_;
- Ebml_WriteLen(glob, size);
- /* TODO: it's not clear from the spec whether the nul terminator
- * should be serialized too. For now we omit the null terminator.
- */
- Ebml_Write(glob, str, (unsigned long)size);
+void Ebml_WriteString(EbmlGlobal *glob, const char *str) {
+ const size_t size_ = strlen(str);
- const unsigned long long size = size_;
++ const uint64_t size = size_;
+ Ebml_WriteLen(glob, size);
- // TODO: it's not clear from the spec whether the nul terminator
- // should be serialized too. For now we omit the null terminator.
- Ebml_Write(glob, str, size);
++ /* TODO: it's not clear from the spec whether the nul terminator
++ * should be serialized too. For now we omit the null terminator.
++ */
++ Ebml_Write(glob, str, (unsigned long)size);
}
-void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr)
-{
- const size_t strlen = wcslen(wstr);
+void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr) {
+ const size_t strlen = wcslen(wstr);
- // TODO: it's not clear from the spec whether the nul terminator
- // should be serialized too. For now we include it.
- const unsigned long long size = strlen;
- /* TODO: it's not clear from the spec whether the nul terminator
- * should be serialized too. For now we include it.
- */
- const uint64_t size = strlen;
++ /* TODO: it's not clear from the spec whether the nul terminator
++ * should be serialized too. For now we include it.
++ */
++ const uint64_t size = strlen;
- Ebml_WriteLen(glob, size);
- Ebml_Write(glob, wstr, (unsigned long)size);
+ Ebml_WriteLen(glob, size);
- Ebml_Write(glob, wstr, size);
++ Ebml_Write(glob, wstr, (unsigned long)size);
}
-void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id)
-{
- int len;
+void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id) {
+ int len;
- if (class_id >= 0x01000000)
- len = 4;
- else if (class_id >= 0x00010000)
- len = 3;
- else if (class_id >= 0x00000100)
- len = 2;
- else
- len = 1;
+ if (class_id >= 0x01000000)
+ len = 4;
+ else if (class_id >= 0x00010000)
+ len = 3;
+ else if (class_id >= 0x00000100)
+ len = 2;
+ else
+ len = 1;
- Ebml_Serialize(glob, (void *)&class_id, sizeof(class_id), len);
+ Ebml_Serialize(glob, (void *)&class_id, sizeof(class_id), len);
}
-void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui)
-{
- unsigned char sizeSerialized = 8 | 0x80;
- Ebml_WriteID(glob, class_id);
- Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
- Ebml_Serialize(glob, &ui, sizeof(ui), 8);
+void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui) {
+ unsigned char sizeSerialized = 8 | 0x80;
+ Ebml_WriteID(glob, class_id);
+ Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
+ Ebml_Serialize(glob, &ui, sizeof(ui), 8);
}
-void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui)
-{
- unsigned char size = 8; /* size in bytes to output */
- unsigned char sizeSerialized = 0;
- unsigned long minVal;
-
- Ebml_WriteID(glob, class_id);
- minVal = 0x7fLU; /* mask to compare for byte size */
+void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui) {
- unsigned char size = 8; // size in bytes to output
++ unsigned char size = 8; /* size in bytes to output */
+ unsigned char sizeSerialized = 0;
+ unsigned long minVal;
- for (size = 1; size < 4; size ++)
- {
- if (ui < minVal)
- {
- break;
- }
+ Ebml_WriteID(glob, class_id);
- minVal = 0x7fLU; // mask to compare for byte size
++ minVal = 0x7fLU; /* mask to compare for byte size */
- minVal <<= 7;
+ for (size = 1; size < 4; size ++) {
+ if (ui < minVal) {
+ break;
}
- sizeSerialized = 0x80 | size;
- Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
- Ebml_Serialize(glob, &ui, sizeof(ui), size);
+ minVal <<= 7;
+ }
+
+ sizeSerialized = 0x80 | size;
+ Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
+ Ebml_Serialize(glob, &ui, sizeof(ui), size);
}
- // TODO: perhaps this is a poor name for this id serializer helper function
+ /* TODO: perhaps this is a poor name for this id serializer helper function */
-void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin)
-{
- int size;
- for (size=4; size > 1; size--)
- {
- if (bin & 0x000000ff << ((size-1) * 8))
- break;
- }
- Ebml_WriteID(glob, class_id);
- Ebml_WriteLen(glob, size);
- Ebml_WriteID(glob, bin);
+void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) {
+ int size;
+ for (size = 4; size > 1; size--) {
+ if (bin & 0x000000ff << ((size - 1) * 8))
+ break;
+ }
+ Ebml_WriteID(glob, class_id);
+ Ebml_WriteLen(glob, size);
+ Ebml_WriteID(glob, bin);
}
-void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d)
-{
- unsigned char len = 0x88;
+void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d) {
+ unsigned char len = 0x88;
- Ebml_WriteID(glob, class_id);
- Ebml_Serialize(glob, &len, sizeof(len), 1);
- Ebml_Serialize(glob, &d, sizeof(d), 8);
+ Ebml_WriteID(glob, class_id);
+ Ebml_Serialize(glob, &len, sizeof(len), 1);
+ Ebml_Serialize(glob, &d, sizeof(d), 8);
}
-void Ebml_WriteSigned16(EbmlGlobal *glob, short val)
-{
- signed long out = ((val & 0x003FFFFF) | 0x00200000) << 8;
- Ebml_Serialize(glob, &out, sizeof(out), 3);
+void Ebml_WriteSigned16(EbmlGlobal *glob, short val) {
+ signed long out = ((val & 0x003FFFFF) | 0x00200000) << 8;
+ Ebml_Serialize(glob, &out, sizeof(out), 3);
}
-void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s)
-{
- Ebml_WriteID(glob, class_id);
- Ebml_WriteString(glob, s);
+void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s) {
+ Ebml_WriteID(glob, class_id);
+ Ebml_WriteString(glob, s);
}
-void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s)
-{
- Ebml_WriteID(glob, class_id);
- Ebml_WriteUTF8(glob, s);
+void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s) {
+ Ebml_WriteID(glob, class_id);
+ Ebml_WriteUTF8(glob, s);
}
-void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length)
-{
- Ebml_WriteID(glob, class_id);
- Ebml_WriteLen(glob, data_length);
- Ebml_Write(glob, data, data_length);
+void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length) {
+ Ebml_WriteID(glob, class_id);
+ Ebml_WriteLen(glob, data_length);
+ Ebml_Write(glob, data, data_length);
}
-void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize)
-{
- unsigned char tmp = 0;
- unsigned long i = 0;
+void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize) {
+ unsigned char tmp = 0;
+ unsigned long i = 0;
- Ebml_WriteID(glob, 0xEC);
- Ebml_WriteLen(glob, vSize);
+ Ebml_WriteID(glob, 0xEC);
+ Ebml_WriteLen(glob, vSize);
- for (i = 0; i < vSize; i++)
- {
- Ebml_Write(glob, &tmp, 1);
- }
+ for (i = 0; i < vSize; i++) {
+ Ebml_Write(glob, &tmp, 1);
+ }
}
- // TODO Serialize Date
+ /* TODO Serialize Date */
ASM:=.asm
endif
++
++#
++# Calculate platform- and compiler-specific offsets for hand coded assembly
++#
++ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC))
++OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU'
++define asm_offsets_template
++$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).S
++ @echo " [CREATE] $$@"
++ $$(qexec)LC_ALL=C grep $$(OFFSET_PATTERN) $$< | tr -d '$$$$\#' $$(ADS2GAS) > $$@
++$$(BUILD_PFX)$(2).S: $(2)
++CLEAN-OBJS += $$(BUILD_PFX)$(1) $(2).S
++endef
++else
++ ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC))
++define asm_offsets_template
++$$(BUILD_PFX)$(1): obj_int_extract
++$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).o
++ @echo " [CREATE] $$@"
++ $$(qexec)./obj_int_extract rvds $$< $$(ADS2GAS) > $$@
++OBJS-yes += $$(BUILD_PFX)$(2).o
++CLEAN-OBJS += $$(BUILD_PFX)$(1)
++$$(filter %$$(ASM).o,$$(OBJS-yes)): $$(BUILD_PFX)$(1)
++endef
++endif # rvct
++endif # !gcc
++
++
CODEC_SRCS-yes += CHANGELOG
CODEC_SRCS-yes += libs.mk
include $(SRC_PATH_BARE)/vpx_scale/vpx_scale.mk
CODEC_SRCS-yes += $(addprefix vpx_scale/,$(call enabled,SCALE_SRCS))
- VP8_PREFIX=vp8/
++ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
++ VP8_PREFIX=vp8/
++ include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
++endif
+
+ ifeq ($(CONFIG_VP8_ENCODER),yes)
- CODEC_SRCS-$(ARCH_ARM) += $(VP8_PREFIX)vp8cx_arm.mk
+ include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx.mk
+ CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_CX_SRCS))
+ CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_CX_EXPORTS))
+ CODEC_SRCS-yes += $(VP8_PREFIX)vp8cx.mk vpx/vp8.h vpx/vp8cx.h
- VP8_PREFIX=vp8/
++ CODEC_SRCS-$(ARCH_ARM) += $(VP8_PREFIX)vp88cx_arm.mk
+ INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h
+ INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/%
+ CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h
+ CODEC_DOC_SECTIONS += vp8 vp8_encoder
+ endif
+
+ ifeq ($(CONFIG_VP8_DECODER),yes)
- CODEC_SRCS-yes += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h vpx/vp8e.h
+ include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8dx.mk
+ CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_DX_SRCS))
+ CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_DX_EXPORTS))
+ CODEC_SRCS-yes += $(VP8_PREFIX)vp8dx.mk vpx/vp8.h vpx/vp8dx.h
+ INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h
+ INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/%
+ CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h
+ CODEC_DOC_SECTIONS += vp8 vp8_decoder
+ endif
+
++ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),)
++ VP9_PREFIX=vp9/
++ include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9_common.mk
++endif
+
+ifeq ($(CONFIG_VP9_ENCODER),yes)
+ VP9_PREFIX=vp9/
+ include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9cx.mk
+ CODEC_SRCS-yes += $(addprefix $(VP9_PREFIX),$(call enabled,VP9_CX_SRCS))
+ CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_CX_EXPORTS))
- INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8e.h include/vpx/vp8cx.h
++ CODEC_SRCS-yes += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h
+ CODEC_SRCS-$(ARCH_ARM) += $(VP9_PREFIX)vp98cx_arm.mk
++ INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h
+ INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/%
+ CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h
+ CODEC_DOC_SECTIONS += vp9 vp9_encoder
+endif
+
+ifeq ($(CONFIG_VP9_DECODER),yes)
+ VP9_PREFIX=vp9/
+ include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9dx.mk
+ CODEC_SRCS-yes += $(addprefix $(VP9_PREFIX),$(call enabled,VP9_DX_SRCS))
+ CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_DX_EXPORTS))
+ CODEC_SRCS-yes += $(VP9_PREFIX)vp9dx.mk vpx/vp8.h vpx/vp8dx.h
+ INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h
+ INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/%
+ CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h
+ CODEC_DOC_SECTIONS += vp9 vp9_decoder
+endif
+
ifeq ($(CONFIG_ENCODERS),yes)
CODEC_DOC_SECTIONS += encoder
vpx.vcproj: $(CODEC_SRCS) vpx.def
@echo " [CREATE] $@"
- $(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
+ $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
- --lib \
- --target=$(TOOLCHAIN) \
+ $(if $(CONFIG_SHARED),--dll,--lib) \
+ --target=$(TOOLCHAIN) \
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
--name=vpx \
--proj-guid=DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74 \
$(qexec)echo 'Version: $(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH)' >> $@
$(qexec)echo 'Requires:' >> $@
$(qexec)echo 'Conflicts:' >> $@
- $(qexec)echo 'Libs: -L$${libdir} -lvpx' >> $@
+ $(qexec)echo 'Libs: -L$${libdir} -lvpx -lm' >> $@
+ $(qexec)echo 'Libs.private: -lm -lpthread' >> $@
$(qexec)echo 'Cflags: -I$${includedir}' >> $@
INSTALL-LIBS-yes += $(LIBSUBDIR)/pkgconfig/vpx.pc
INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc
$(filter %.s.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
--#
--# Calculate platform- and compiler-specific offsets for hand coded assembly
--#
--
--OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU'
--
--ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC))
- $(BUILD_PFX)asm_com_offsets.asm: $(BUILD_PFX)$(VP9_PREFIX)common/asm_com_offsets.c.S
- $(BUILD_PFX)asm_com_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S
-- @echo " [CREATE] $@"
-- $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
- $(BUILD_PFX)$(VP9_PREFIX)common/asm_com_offsets.c.S: $(VP9_PREFIX)common/asm_com_offsets.c
- CLEAN-OBJS += $(BUILD_PFX)asm_com_offsets.asm $(BUILD_PFX)$(VP9_PREFIX)common/asm_com_offsets.c.S
- $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S: $(VP8_PREFIX)common/asm_com_offsets.c
- CLEAN-OBJS += $(BUILD_PFX)asm_com_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S
--
- $(BUILD_PFX)asm_enc_offsets.asm: $(BUILD_PFX)$(VP9_PREFIX)encoder/asm_enc_offsets.c.S
- $(BUILD_PFX)asm_enc_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S
-- @echo " [CREATE] $@"
-- $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
- $(BUILD_PFX)$(VP9_PREFIX)encoder/asm_enc_offsets.c.S: $(VP9_PREFIX)encoder/asm_enc_offsets.c
- CLEAN-OBJS += $(BUILD_PFX)asm_enc_offsets.asm $(BUILD_PFX)$(VP9_PREFIX)encoder/asm_enc_offsets.c.S
- $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S: $(VP8_PREFIX)encoder/asm_enc_offsets.c
- CLEAN-OBJS += $(BUILD_PFX)asm_enc_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S
--
- $(BUILD_PFX)asm_dec_offsets.asm: $(BUILD_PFX)$(VP9_PREFIX)decoder/asm_dec_offsets.c.S
- $(BUILD_PFX)asm_dec_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S
-- @echo " [CREATE] $@"
-- $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
- $(BUILD_PFX)$(VP9_PREFIX)decoder/asm_dec_offsets.c.S: $(VP9_PREFIX)decoder/asm_dec_offsets.c
- CLEAN-OBJS += $(BUILD_PFX)asm_dec_offsets.asm $(BUILD_PFX)$(VP9_PREFIX)decoder/asm_dec_offsets.c.S
- $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S: $(VP8_PREFIX)decoder/asm_dec_offsets.c
- CLEAN-OBJS += $(BUILD_PFX)asm_dec_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S
--else
-- ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC))
-- asm_com_offsets.asm: obj_int_extract
- asm_com_offsets.asm: $(VP9_PREFIX)common/asm_com_offsets.c.o
- asm_com_offsets.asm: $(VP8_PREFIX)common/asm_com_offsets.c.o
-- @echo " [CREATE] $@"
-- $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@
- OBJS-yes += $(VP9_PREFIX)common/asm_com_offsets.c.o
- OBJS-yes += $(VP8_PREFIX)common/asm_com_offsets.c.o
-- CLEAN-OBJS += asm_com_offsets.asm
-- $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_com_offsets.asm
--
-- asm_enc_offsets.asm: obj_int_extract
- asm_enc_offsets.asm: $(VP9_PREFIX)encoder/asm_enc_offsets.c.o
- asm_enc_offsets.asm: $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
-- @echo " [CREATE] $@"
-- $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@
- OBJS-yes += $(VP9_PREFIX)encoder/asm_enc_offsets.c.o
- OBJS-yes += $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
-- CLEAN-OBJS += asm_enc_offsets.asm
-- $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_enc_offsets.asm
--
-- asm_dec_offsets.asm: obj_int_extract
- asm_dec_offsets.asm: $(VP9_PREFIX)decoder/asm_dec_offsets.c.o
- asm_dec_offsets.asm: $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
-- @echo " [CREATE] $@"
-- $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@
- OBJS-yes += $(VP9_PREFIX)decoder/asm_dec_offsets.c.o
- OBJS-yes += $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
-- CLEAN-OBJS += asm_dec_offsets.asm
-- $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_dec_offsets.asm
-- endif
--endif
$(shell $(SRC_PATH_BARE)/build/make/version.sh "$(SRC_PATH_BARE)" $(BUILD_PFX)vpx_version.h)
CLEAN-OBJS += $(BUILD_PFX)vpx_version.h
#
# Rule to generate runtime cpu detection files
#
- $(OBJS-yes:.o=.d): vpx_rtcd.h
- vpx_rtcd.h: $(sort $(filter %rtcd_defs.sh,$(CODEC_SRCS)))
+ $(BUILD_PFX)vpx_rtcd.h: $(SRC_PATH_BARE)/$(sort $(filter %rtcd_defs.sh,$(CODEC_SRCS)))
@echo " [CREATE] $@"
$(qexec)$(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$(TGT_ISA) \
- --sym=vpx_rtcd \
- --config=$(target)$(if $(FAT_ARCHS),,-$(TOOLCHAIN)).mk \
- $(RTCD_OPTIONS) $^ > $@
+ --sym=vpx_rtcd \
+ --config=$(target)$(if $(FAT_ARCHS),,-$(TOOLCHAIN)).mk \
+ $(RTCD_OPTIONS) $^ > $@
CLEAN-OBJS += $(BUILD_PFX)vpx_rtcd.h
++
CODEC_DOC_SRCS += vpx/vpx_codec.h \
vpx/vpx_decoder.h \
vpx/vpx_encoder.h \
- LIBVPX_TEST_SRCS-yes += test.mk
LIBVPX_TEST_SRCS-yes += acm_random.h
- LIBVPX_TEST_SRCS-yes += boolcoder_test.cc
- LIBVPX_TEST_SRCS-yes += dct16x16_test.cc
- LIBVPX_TEST_SRCS-yes += fdct4x4_test.cc
- LIBVPX_TEST_SRCS-yes += fdct8x8_test.cc
- LIBVPX_TEST_SRCS-yes += idct8x8_test.cc
+ LIBVPX_TEST_SRCS-yes += test.mk
LIBVPX_TEST_SRCS-yes += test_libvpx.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += fdct4x4_test.cc
+ LIBVPX_TEST_SRCS-yes += util.h
+ LIBVPX_TEST_SRCS-yes += video_source.h
+
+ ##
+ ## BLACK BOX TESTS
+ ##
+ ## Black box tests only use the public API.
+ ##
+ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += altref_test.cc
+ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += config_test.cc
+ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc
+ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += datarate_test.cc
+ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += encode_test_driver.cc
+ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += encode_test_driver.h
+ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += error_resilience_test.cc
+ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += i420_video_source.h
+ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc
+ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += resize_test.cc
+
+ LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += ../md5_utils.h ../md5_utils.c
+ LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += decode_test_driver.cc
+ LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += decode_test_driver.h
+ LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += ivf_video_source.h
+ LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += test_vector_test.cc
+ ##
+ ## WHITE BOX TESTS
+ ##
+ ## Whitebox tests invoke functions not exposed via the public API. Certain
+ ## shared library builds don't make these functions accessible.
+ ##
+ ifeq ($(CONFIG_SHARED),)
+
+ # These tests require both the encoder and decoder to be built.
+ ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),yesyes)
+ LIBVPX_TEST_SRCS-yes += boolcoder_test.cc
+ endif
+
+ LIBVPX_TEST_SRCS-yes += idctllm_test.cc
+ LIBVPX_TEST_SRCS-yes += intrapred_test.cc
+ LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += pp_filter_test.cc
+ LIBVPX_TEST_SRCS-yes += sad_test.cc
+ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc
+ LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc
+ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc
++LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc
++
++# VP9 tests
++LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
++LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
++LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
++ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),)
++LIBVPX_TEST_SRCS-yes += idct8x8_test.cc
++endif
+
+ endif
+
- LIBVPX_TEST_DATA-yes += hantro_collage_w352h288.yuv
+ ##
+ ## TEST DATA
+ ##
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_ENCODER) += hantro_collage_w352h288.yuv
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-004.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-005.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-006.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-007.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-008.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-009.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-010.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-011.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-012.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-013.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-014.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-015.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-016.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-017.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-018.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1400.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1411.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1416.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1417.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1402.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1412.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1418.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1424.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-01.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-02.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-03.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-04.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1401.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1403.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1407.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1408.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1409.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1410.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1413.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1414.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1415.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1425.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1426.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1427.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1432.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1435.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1436.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1437.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1441.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1442.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1404.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1405.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1406.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1428.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1429.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1430.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1431.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1433.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1434.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1438.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-004.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-005.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-006.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-007.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-008.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-009.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-010.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-011.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-012.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-013.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-014.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-015.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-016.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-017.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-018.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1400.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1411.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1416.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1417.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1402.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1412.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1418.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1424.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1401.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1403.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1407.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1408.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1409.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1410.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1413.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1414.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1415.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1425.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1426.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1427.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1432.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1435.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1436.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1437.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1441.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1442.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-01.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-02.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-03.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-04.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1404.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1405.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1406.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1428.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1429.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1430.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1431.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1433.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1434.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1438.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf.md5
+ LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf.md5
::testing::InitGoogleTest(&argc, argv);
#if ARCH_X86 || ARCH_X86_64
- int simd_caps = x86_simd_caps();
+ const int simd_caps = x86_simd_caps();
- if(!(simd_caps & HAS_MMX))
+ if (!(simd_caps & HAS_MMX))
append_gtest_filter(":-MMX/*");
- if(!(simd_caps & HAS_SSE))
+ if (!(simd_caps & HAS_SSE))
append_gtest_filter(":-SSE/*");
- if(!(simd_caps & HAS_SSE2))
+ if (!(simd_caps & HAS_SSE2))
append_gtest_filter(":-SSE2/*");
- if(!(simd_caps & HAS_SSE3))
+ if (!(simd_caps & HAS_SSE3))
append_gtest_filter(":-SSE3/*");
- if(!(simd_caps & HAS_SSSE3))
+ if (!(simd_caps & HAS_SSSE3))
append_gtest_filter(":-SSSE3/*");
- if(!(simd_caps & HAS_SSE4_1))
+ if (!(simd_caps & HAS_SSE4_1))
append_gtest_filter(":-SSE4_1/*");
#endif
--- /dev/null
--- /dev/null
++/*
++* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
++*
++* Use of this source code is governed by a BSD-style license
++* that can be found in the LICENSE file in the root of the source
++* tree. An additional intellectual property rights grant can be found
++* in the file PATENTS. All contributing project authors may
++* be found in the AUTHORS file in the root of the source tree.
++*/
++
++
++#include <math.h>
++#include <stddef.h>
++#include <stdio.h>
++#include <stdlib.h>
++#include <string.h>
++#include <sys/types.h>
++
++
++extern "C" {
++#include "vpx_rtcd.h"
++}
++
++#include "test/acm_random.h"
++#include "third_party/googletest/src/include/gtest/gtest.h"
++#include "vpx/vpx_integer.h"
++
++
++namespace {
++
++const int cospi8sqrt2minus1 = 20091;
++const int sinpi8sqrt2 = 35468;
++
++void reference_idct4x4(const int16_t *input, int16_t *output) {
++ const int16_t *ip = input;
++ int16_t *op = output;
++
++ for (int i = 0; i < 4; ++i) {
++ const int a1 = ip[0] + ip[8];
++ const int b1 = ip[0] - ip[8];
++ const int temp1 = (ip[4] * sinpi8sqrt2) >> 16;
++ const int temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
++ const int c1 = temp1 - temp2;
++ const int temp3 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
++ const int temp4 = (ip[12] * sinpi8sqrt2) >> 16;
++ const int d1 = temp3 + temp4;
++ op[0] = a1 + d1;
++ op[12] = a1 - d1;
++ op[4] = b1 + c1;
++ op[8] = b1 - c1;
++ ++ip;
++ ++op;
++ }
++ ip = output;
++ op = output;
++ for (int i = 0; i < 4; ++i) {
++ const int a1 = ip[0] + ip[2];
++ const int b1 = ip[0] - ip[2];
++ const int temp1 = (ip[1] * sinpi8sqrt2) >> 16;
++ const int temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
++ const int c1 = temp1 - temp2;
++ const int temp3 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
++ const int temp4 = (ip[3] * sinpi8sqrt2) >> 16;
++ const int d1 = temp3 + temp4;
++ op[0] = (a1 + d1 + 4) >> 3;
++ op[3] = (a1 - d1 + 4) >> 3;
++ op[1] = (b1 + c1 + 4) >> 3;
++ op[2] = (b1 - c1 + 4) >> 3;
++ ip += 4;
++ op += 4;
++ }
++}
++
++using libvpx_test::ACMRandom;
++
++TEST(Vp8FdctTest, SignBiasCheck) {
++ ACMRandom rnd(ACMRandom::DeterministicSeed());
++ int16_t test_input_block[16];
++ int16_t test_output_block[16];
++ const int pitch = 8;
++ int count_sign_block[16][2];
++ const int count_test_block = 1000000;
++
++ memset(count_sign_block, 0, sizeof(count_sign_block));
++
++ for (int i = 0; i < count_test_block; ++i) {
++ // Initialize a test block with input range [-255, 255].
++ for (int j = 0; j < 16; ++j)
++ test_input_block[j] = rnd.Rand8() - rnd.Rand8();
++
++ vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);
++
++ for (int j = 0; j < 16; ++j) {
++ if (test_output_block[j] < 0)
++ ++count_sign_block[j][0];
++ else if (test_output_block[j] > 0)
++ ++count_sign_block[j][1];
++ }
++ }
++
++ bool bias_acceptable = true;
++ for (int j = 0; j < 16; ++j)
++ bias_acceptable = bias_acceptable &&
++ (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 10000);
++
++ EXPECT_EQ(true, bias_acceptable)
++ << "Error: 4x4 FDCT has a sign bias > 1% for input range [-255, 255]";
++
++ memset(count_sign_block, 0, sizeof(count_sign_block));
++
++ for (int i = 0; i < count_test_block; ++i) {
++ // Initialize a test block with input range [-15, 15].
++ for (int j = 0; j < 16; ++j)
++ test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
++
++ vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);
++
++ for (int j = 0; j < 16; ++j) {
++ if (test_output_block[j] < 0)
++ ++count_sign_block[j][0];
++ else if (test_output_block[j] > 0)
++ ++count_sign_block[j][1];
++ }
++ }
++
++ bias_acceptable = true;
++ for (int j = 0; j < 16; ++j)
++ bias_acceptable = bias_acceptable &&
++ (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 100000);
++
++ EXPECT_EQ(true, bias_acceptable)
++ << "Error: 4x4 FDCT has a sign bias > 10% for input range [-15, 15]";
++};
++
++TEST(Vp8FdctTest, RoundTripErrorCheck) {
++ ACMRandom rnd(ACMRandom::DeterministicSeed());
++ int max_error = 0;
++ double total_error = 0;
++ const int count_test_block = 1000000;
++ for (int i = 0; i < count_test_block; ++i) {
++ int16_t test_input_block[16];
++ int16_t test_temp_block[16];
++ int16_t test_output_block[16];
++
++ // Initialize a test block with input range [-255, 255].
++ for (int j = 0; j < 16; ++j)
++ test_input_block[j] = rnd.Rand8() - rnd.Rand8();
++
++ const int pitch = 8;
++ vp8_short_fdct4x4_c(test_input_block, test_temp_block, pitch);
++ reference_idct4x4(test_temp_block, test_output_block);
++
++ for (int j = 0; j < 16; ++j) {
++ const int diff = test_input_block[j] - test_output_block[j];
++ const int error = diff * diff;
++ if (max_error < error)
++ max_error = error;
++ total_error += error;
++ }
++ }
++
++ EXPECT_GE(1, max_error )
++ << "Error: FDCT/IDCT has an individual roundtrip error > 1";
++
++ EXPECT_GE(count_test_block, total_error)
++ << "Error: FDCT/IDCT has average roundtrip error > 1 per block";
++};
++
++} // namespace
--- /dev/null
- BASE_COMMAND = "./configure --enable-internal-stats --enable-experimental"
+#!/usr/bin/python
+
+import getopt
+import subprocess
+import sys
+
+LONG_OPTIONS = ["shard=", "shards="]
++BASE_COMMAND = "./configure --disable-vp8 --disable-unit-tests --enable-internal-stats --enable-experimental"
+
+def RunCommand(command):
+ run = subprocess.Popen(command, shell=True)
+ output = run.communicate()
+ if run.returncode:
+ print "Non-zero return code: " + str(run.returncode) + " => exiting!"
+ sys.exit(1)
+
+def list_of_experiments():
+ experiments = []
+ configure_file = open("configure")
+ list_start = False
+ for line in configure_file.read().split("\n"):
+ if line == 'EXPERIMENT_LIST="':
+ list_start = True
+ elif line == '"':
+ list_start = False
+ elif list_start:
+ currently_broken = ["csm"]
+ experiment = line[4:]
+ if experiment not in currently_broken:
+ experiments.append(experiment)
+ return experiments
+
+def main(argv):
+ # Parse arguments
+ options = {"--shard": 0, "--shards": 1}
+ if "--" in argv:
+ opt_end_index = argv.index("--")
+ else:
+ opt_end_index = len(argv)
+ try:
+ o, _ = getopt.getopt(argv[1:opt_end_index], None, LONG_OPTIONS)
+ except getopt.GetoptError, err:
+ print str(err)
+ print "Usage: %s [--shard=<n> --shards=<n>] -- [configure flag ...]"%argv[0]
+ sys.exit(2)
+
+ options.update(o)
+ extra_args = argv[opt_end_index + 1:]
+
+ # Shard experiment list
+ shard = int(options["--shard"])
+ shards = int(options["--shards"])
+ experiments = list_of_experiments()
+ base_command = " ".join([BASE_COMMAND] + extra_args)
+ configs = [base_command]
+ configs += ["%s --enable-%s" % (base_command, e) for e in experiments]
+ my_configs = zip(configs, range(len(configs)))
+ my_configs = filter(lambda x: x[1] % shards == shard, my_configs)
+ my_configs = [e[0] for e in my_configs]
+
+ # Run configs for this shard
+ for config in my_configs:
+ test_build(config)
+
+def test_build(configure_command):
+ print "\033[34m\033[47mTesting %s\033[0m" % (configure_command)
+ RunCommand(configure_command)
+ RunCommand("make clean")
+ RunCommand("make")
+
+if __name__ == "__main__":
+ main(sys.argv)
--- /dev/null
- INCLUDE asm_enc_offsets.asm
+ ;
+ ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ ;
+ ; Use of this source code is governed by a BSD-style license
+ ; that can be found in the LICENSE file in the root of the source
+ ; tree. An additional intellectual property rights grant can be found
+ ; in the file PATENTS. All contributing project authors may
+ ; be found in the AUTHORS file in the root of the source tree.
+ ;
+
+
+ EXPORT |vp8_start_encode|
+ EXPORT |vp8_encode_bool|
+ EXPORT |vp8_stop_encode|
+ EXPORT |vp8_encode_value|
+ IMPORT |vp8_validate_buffer_arm|
+
++ INCLUDE vp8_asm_enc_offsets.asm
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA |.text|, CODE, READONLY
+
+ ; macro for validating write buffer position
+ ; needs vp8_writer in r0
+ ; start shall not be in r1
+ MACRO
+ VALIDATE_POS $start, $pos
+ push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
+ ldr r2, [r0, #vp8_writer_buffer_end]
+ ldr r3, [r0, #vp8_writer_error]
+ mov r1, $pos
+ mov r0, $start
+ bl vp8_validate_buffer_arm
+ pop {r0-r3, r12, lr}
+ MEND
+
+ ; r0 BOOL_CODER *br
+ ; r1 unsigned char *source
+ ; r2 unsigned char *source_end
+ |vp8_start_encode| PROC
+ str r2, [r0, #vp8_writer_buffer_end]
+ mov r12, #0
+ mov r3, #255
+ mvn r2, #23
+ str r12, [r0, #vp8_writer_lowvalue]
+ str r3, [r0, #vp8_writer_range]
+ str r2, [r0, #vp8_writer_count]
+ str r12, [r0, #vp8_writer_pos]
+ str r1, [r0, #vp8_writer_buffer]
+ bx lr
+ ENDP
+
+ ; r0 BOOL_CODER *br
+ ; r1 int bit
+ ; r2 int probability
+ |vp8_encode_bool| PROC
+ push {r4-r10, lr}
+
+ mov r4, r2
+
+ ldr r2, [r0, #vp8_writer_lowvalue]
+ ldr r5, [r0, #vp8_writer_range]
+ ldr r3, [r0, #vp8_writer_count]
+
+ sub r7, r5, #1 ; range-1
+
+ cmp r1, #0
+ mul r6, r4, r7 ; ((range-1) * probability)
+
+ mov r7, #1
+ add r4, r7, r6, lsr #8 ; 1 + (((range-1) * probability) >> 8)
+
+ addne r2, r2, r4 ; if (bit) lowvalue += split
+ subne r4, r5, r4 ; if (bit) range = range-split
+
+ ; Counting the leading zeros is used to normalize range.
+ clz r6, r4
+ sub r6, r6, #24 ; shift
+
+ ; Flag is set on the sum of count. This flag is used later
+ ; to determine if count >= 0
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi token_count_lt_zero ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset = shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl token_high_bit_not_set
+
+ ldr r4, [r0, #vp8_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos-1
+ b token_zero_while_start
+ token_zero_while_loop
+ mov r9, #0
+ strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+ token_zero_while_start
+ cmp r4, #0
+ ldrge r7, [r0, #vp8_writer_buffer]
+ ldrb r1, [r7, r4]
+ cmpge r1, #0xff
+ beq token_zero_while_loop
+
+ ldr r7, [r0, #vp8_writer_buffer]
+ ldrb r9, [r7, r4] ; w->buffer[x]
+ add r9, r9, #1
+ strb r9, [r7, r4] ; w->buffer[x] + 1
+ token_high_bit_not_set
+ rsb r4, r6, #24 ; 24-offset
+ ldr r9, [r0, #vp8_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp8_writer_pos] ; w->pos
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r1, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r1, [r0, #vp8_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r9, r1 ; validate_buffer at pos
+
+ strb r7, [r9, r4] ; w->buffer[w->pos++]
+
+ token_count_lt_zero
+ lsl r2, r2, r6 ; lowvalue <<= shift
+
+ str r2, [r0, #vp8_writer_lowvalue]
+ str r5, [r0, #vp8_writer_range]
+ str r3, [r0, #vp8_writer_count]
+ pop {r4-r10, pc}
+ ENDP
+
+ ; r0 BOOL_CODER *br
+ |vp8_stop_encode| PROC
+ push {r4-r10, lr}
+
+ ldr r2, [r0, #vp8_writer_lowvalue]
+ ldr r5, [r0, #vp8_writer_range]
+ ldr r3, [r0, #vp8_writer_count]
+
+ mov r10, #32
+
+ stop_encode_loop
+ sub r7, r5, #1 ; range-1
+
+ mov r4, r7, lsl #7 ; ((range-1) * 128)
+
+ mov r7, #1
+ add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8)
+
+ ; Counting the leading zeros is used to normalize range.
+ clz r6, r4
+ sub r6, r6, #24 ; shift
+
+ ; Flag is set on the sum of count. This flag is used later
+ ; to determine if count >= 0
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi token_count_lt_zero_se ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset = shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl token_high_bit_not_set_se
+
+ ldr r4, [r0, #vp8_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos-1
+ b token_zero_while_start_se
+ token_zero_while_loop_se
+ mov r9, #0
+ strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+ token_zero_while_start_se
+ cmp r4, #0
+ ldrge r7, [r0, #vp8_writer_buffer]
+ ldrb r1, [r7, r4]
+ cmpge r1, #0xff
+ beq token_zero_while_loop_se
+
+ ldr r7, [r0, #vp8_writer_buffer]
+ ldrb r9, [r7, r4] ; w->buffer[x]
+ add r9, r9, #1
+ strb r9, [r7, r4] ; w->buffer[x] + 1
+ token_high_bit_not_set_se
+ rsb r4, r6, #24 ; 24-offset
+ ldr r9, [r0, #vp8_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp8_writer_pos] ; w->pos
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r1, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r1, [r0, #vp8_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r9, r1 ; validate_buffer at pos
+
+ strb r7, [r9, r4] ; w->buffer[w->pos++]
+
+ token_count_lt_zero_se
+ lsl r2, r2, r6 ; lowvalue <<= shift
+
+ subs r10, r10, #1
+ bne stop_encode_loop
+
+ str r2, [r0, #vp8_writer_lowvalue]
+ str r5, [r0, #vp8_writer_range]
+ str r3, [r0, #vp8_writer_count]
+ pop {r4-r10, pc}
+
+ ENDP
+
+ ; r0 BOOL_CODER *br
+ ; r1 int data
+ ; r2 int bits
+ |vp8_encode_value| PROC
+ push {r4-r12, lr}
+
+ mov r10, r2
+
+ ldr r2, [r0, #vp8_writer_lowvalue]
+ ldr r5, [r0, #vp8_writer_range]
+ ldr r3, [r0, #vp8_writer_count]
+
+ rsb r4, r10, #32 ; 32-n
+
+ ; v is kept in r1 during the token pack loop
+ lsl r1, r1, r4 ; r1 = v << 32 - n
+
+ encode_value_loop
+ sub r7, r5, #1 ; range-1
+
+ ; Decisions are made based on the bit value shifted
+ ; off of v, so set a flag here based on this.
+ ; This value is refered to as "bb"
+ lsls r1, r1, #1 ; bit = v >> n
+ mov r4, r7, lsl #7 ; ((range-1) * 128)
+
+ mov r7, #1
+ add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8)
+
+ addcs r2, r2, r4 ; if (bit) lowvalue += split
+ subcs r4, r5, r4 ; if (bit) range = range-split
+
+ ; Counting the leading zeros is used to normalize range.
+ clz r6, r4
+ sub r6, r6, #24 ; shift
+
+ ; Flag is set on the sum of count. This flag is used later
+ ; to determine if count >= 0
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi token_count_lt_zero_ev ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset = shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl token_high_bit_not_set_ev
+
+ ldr r4, [r0, #vp8_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos-1
+ b token_zero_while_start_ev
+ token_zero_while_loop_ev
+ mov r9, #0
+ strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+ token_zero_while_start_ev
+ cmp r4, #0
+ ldrge r7, [r0, #vp8_writer_buffer]
+ ldrb r11, [r7, r4]
+ cmpge r11, #0xff
+ beq token_zero_while_loop_ev
+
+ ldr r7, [r0, #vp8_writer_buffer]
+ ldrb r9, [r7, r4] ; w->buffer[x]
+ add r9, r9, #1
+ strb r9, [r7, r4] ; w->buffer[x] + 1
+ token_high_bit_not_set_ev
+ rsb r4, r6, #24 ; 24-offset
+ ldr r9, [r0, #vp8_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp8_writer_pos] ; w->pos
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r11, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r11, [r0, #vp8_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r9, r11 ; validate_buffer at pos
+
+ strb r7, [r9, r4] ; w->buffer[w->pos++]
+
+ token_count_lt_zero_ev
+ lsl r2, r2, r6 ; lowvalue <<= shift
+
+ subs r10, r10, #1
+ bne encode_value_loop
+
+ str r2, [r0, #vp8_writer_lowvalue]
+ str r5, [r0, #vp8_writer_range]
+ str r3, [r0, #vp8_writer_count]
+ pop {r4-r12, pc}
+ ENDP
+
+ END
--- /dev/null
- INCLUDE asm_enc_offsets.asm
+ ;
+ ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ ;
+ ; Use of this source code is governed by a BSD-style license
+ ; that can be found in the LICENSE file in the root of the source
+ ; tree. An additional intellectual property rights grant can be found
+ ; in the file PATENTS. All contributing project authors may
+ ; be found in the AUTHORS file in the root of the source tree.
+ ;
+
+
+ EXPORT |vp8cx_pack_tokens_armv5|
+ IMPORT |vp8_validate_buffer_arm|
+
++ INCLUDE vp8_asm_enc_offsets.asm
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA |.text|, CODE, READONLY
+
+
+ ; macro for validating write buffer position
+ ; needs vp8_writer in r0
+ ; start shall not be in r1
+ MACRO
+ VALIDATE_POS $start, $pos
+ push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
+ ldr r2, [r0, #vp8_writer_buffer_end]
+ ldr r3, [r0, #vp8_writer_error]
+ mov r1, $pos
+ mov r0, $start
+ bl vp8_validate_buffer_arm
+ pop {r0-r3, r12, lr}
+ MEND
+
+
+ ; r0 vp8_writer *w
+ ; r1 const TOKENEXTRA *p
+ ; r2 int xcount
+ ; r3 vp8_coef_encodings
+ ; s0 vp8_extra_bits
+ ; s1 vp8_coef_tree
+ |vp8cx_pack_tokens_armv5| PROC
+ push {r4-r12, lr}
+ sub sp, sp, #16
+
+ ; Add size of xcount * sizeof (TOKENEXTRA) to get stop
+ ; sizeof (TOKENEXTRA) is 8
+ add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA)
+ str r2, [sp, #0]
+ str r3, [sp, #8] ; save vp8_coef_encodings
+ ldr r2, [r0, #vp8_writer_lowvalue]
+ ldr r5, [r0, #vp8_writer_range]
+ ldr r3, [r0, #vp8_writer_count]
+ b check_p_lt_stop
+
+ while_p_lt_stop
+ ldrb r6, [r1, #tokenextra_token] ; t
+ ldr r4, [sp, #8] ; vp8_coef_encodings
+ mov lr, #0
+ add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
+ ldr r9, [r1, #tokenextra_context_tree] ; pp
+
+ ldrb r7, [r1, #tokenextra_skip_eob_node]
+
+ ldr r6, [r4, #vp8_token_value] ; v
+ ldr r8, [r4, #vp8_token_len] ; n
+
+ ; vp8 specific skip_eob_node
+ cmp r7, #0
+ movne lr, #2 ; i = 2
+ subne r8, r8, #1 ; --n
+
+ rsb r4, r8, #32 ; 32-n
+ ldr r10, [sp, #60] ; vp8_coef_tree
+
+ ; v is kept in r12 during the token pack loop
+ lsl r12, r6, r4 ; r12 = v << 32 - n
+
+ ; loop start
+ token_loop
+ ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
+ sub r7, r5, #1 ; range-1
+
+ ; Decisions are made based on the bit value shifted
+ ; off of v, so set a flag here based on this.
+ ; This value is refered to as "bb"
+ lsls r12, r12, #1 ; bb = v >> n
+ mul r6, r4, r7 ; ((range-1) * pp[i>>1]))
+
+ ; bb can only be 0 or 1. So only execute this statement
+ ; if bb == 1, otherwise it will act like i + 0
+ addcs lr, lr, #1 ; i + bb
+
+ mov r7, #1
+ ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
+ add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
+
+ addcs r2, r2, r4 ; if (bb) lowvalue += split
+ subcs r4, r5, r4 ; if (bb) range = range-split
+
+ ; Counting the leading zeros is used to normalize range.
+ clz r6, r4
+ sub r6, r6, #24 ; shift
+
+ ; Flag is set on the sum of count. This flag is used later
+ ; to determine if count >= 0
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi token_count_lt_zero ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset = shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl token_high_bit_not_set
+
+ ldr r4, [r0, #vp8_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos-1
+ b token_zero_while_start
+ token_zero_while_loop
+ mov r10, #0
+ strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+ token_zero_while_start
+ cmp r4, #0
+ ldrge r7, [r0, #vp8_writer_buffer]
+ ldrb r11, [r7, r4]
+ cmpge r11, #0xff
+ beq token_zero_while_loop
+
+ ldr r7, [r0, #vp8_writer_buffer]
+ ldrb r10, [r7, r4] ; w->buffer[x]
+ add r10, r10, #1
+ strb r10, [r7, r4] ; w->buffer[x] + 1
+ token_high_bit_not_set
+ rsb r4, r6, #24 ; 24-offset
+ ldr r10, [r0, #vp8_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp8_writer_pos] ; w->pos
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r11, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r11, [r0, #vp8_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r10, r11 ; validate_buffer at pos
+
+ strb r7, [r10, r4] ; w->buffer[w->pos++]
+
+ ; r10 is used earlier in the loop, but r10 is used as
+ ; temp variable here. So after r10 is used, reload
+ ; vp8_coef_tree_dcd into r10
+ ldr r10, [sp, #60] ; vp8_coef_tree
+
+ token_count_lt_zero
+ lsl r2, r2, r6 ; lowvalue <<= shift
+
+ subs r8, r8, #1 ; --n
+ bne token_loop
+
+ ldrb r6, [r1, #tokenextra_token] ; t
+ ldr r7, [sp, #56] ; vp8_extra_bits
+ ; Add t * sizeof (vp8_extra_bit_struct) to get the desired
+ ; element. Here vp8_extra_bit_struct == 16
+ add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
+
+ ldr r4, [r12, #vp8_extra_bit_struct_base_val]
+ cmp r4, #0
+ beq skip_extra_bits
+
+ ; if( b->base_val)
+ ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
+ ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
+ cmp r8, #0 ; if( L)
+ beq no_extra_bits
+
+ ldr r9, [r12, #vp8_extra_bit_struct_prob]
+ asr r7, lr, #1 ; v=e>>1
+
+ ldr r10, [r12, #vp8_extra_bit_struct_tree]
+ str r10, [sp, #4] ; b->tree
+
+ rsb r4, r8, #32
+ lsl r12, r7, r4
+
+ mov lr, #0 ; i = 0
+
+ extra_bits_loop
+ ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
+ sub r7, r5, #1 ; range-1
+ lsls r12, r12, #1 ; v >> n
+ mul r6, r4, r7 ; (range-1) * pp[i>>1]
+ addcs lr, lr, #1 ; i + bb
+
+ mov r7, #1
+ ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
+ add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
+
+ addcs r2, r2, r4 ; if (bb) lowvalue += split
+ subcs r4, r5, r4 ; if (bb) range = range-split
+
+ clz r6, r4
+ sub r6, r6, #24
+
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi extra_count_lt_zero ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset= shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl extra_high_bit_not_set
+
+ ldr r4, [r0, #vp8_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos - 1
+ b extra_zero_while_start
+ extra_zero_while_loop
+ mov r10, #0
+ strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+ extra_zero_while_start
+ cmp r4, #0
+ ldrge r7, [r0, #vp8_writer_buffer]
+ ldrb r11, [r7, r4]
+ cmpge r11, #0xff
+ beq extra_zero_while_loop
+
+ ldr r7, [r0, #vp8_writer_buffer]
+ ldrb r10, [r7, r4]
+ add r10, r10, #1
+ strb r10, [r7, r4]
+ extra_high_bit_not_set
+ rsb r4, r6, #24 ; 24-offset
+ ldr r10, [r0, #vp8_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp8_writer_pos]
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r11, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r11, [r0, #vp8_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r10, r11 ; validate_buffer at pos
+
+ strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
+ ldr r10, [sp, #4] ; b->tree
+ extra_count_lt_zero
+ lsl r2, r2, r6
+
+ subs r8, r8, #1 ; --n
+ bne extra_bits_loop ; while (n)
+
+ no_extra_bits
+ ldr lr, [r1, #4] ; e = p->Extra
+ add r4, r5, #1 ; range + 1
+ tst lr, #1
+ lsr r4, r4, #1 ; split = (range + 1) >> 1
+ addne r2, r2, r4 ; lowvalue += split
+ subne r4, r5, r4 ; range = range-split
+ tst r2, #0x80000000 ; lowvalue & 0x80000000
+ lsl r5, r4, #1 ; range <<= 1
+ beq end_high_bit_not_set
+
+ ldr r4, [r0, #vp8_writer_pos]
+ mov r7, #0
+ sub r4, r4, #1
+ b end_zero_while_start
+ end_zero_while_loop
+ strb r7, [r6, r4]
+ sub r4, r4, #1 ; x--
+ end_zero_while_start
+ cmp r4, #0
+ ldrge r6, [r0, #vp8_writer_buffer]
+ ldrb r12, [r6, r4]
+ cmpge r12, #0xff
+ beq end_zero_while_loop
+
+ ldr r6, [r0, #vp8_writer_buffer]
+ ldrb r7, [r6, r4]
+ add r7, r7, #1
+ strb r7, [r6, r4]
+ end_high_bit_not_set
+ adds r3, r3, #1 ; ++count
+ lsl r2, r2, #1 ; lowvalue <<= 1
+ bne end_count_zero
+
+ ldr r4, [r0, #vp8_writer_pos]
+ mvn r3, #7
+ ldr r7, [r0, #vp8_writer_buffer]
+ lsr r6, r2, #24 ; lowvalue >> 24
+ add r12, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r12, [r0, #vp8_writer_pos]
+
+ VALIDATE_POS r7, r12 ; validate_buffer at pos
+
+ strb r6, [r7, r4]
+ end_count_zero
+ skip_extra_bits
+ add r1, r1, #TOKENEXTRA_SZ ; ++p
+ check_p_lt_stop
+ ldr r4, [sp, #0] ; stop
+ cmp r1, r4 ; while( p < stop)
+ bcc while_p_lt_stop
+
+ str r2, [r0, #vp8_writer_lowvalue]
+ str r5, [r0, #vp8_writer_range]
+ str r3, [r0, #vp8_writer_count]
+ add sp, sp, #16
+ pop {r4-r12, pc}
+ ENDP
+
+ END
--- /dev/null
- INCLUDE asm_enc_offsets.asm
+ ;
+ ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ ;
+ ; Use of this source code is governed by a BSD-style license
+ ; that can be found in the LICENSE file in the root of the source
+ ; tree. An additional intellectual property rights grant can be found
+ ; in the file PATENTS. All contributing project authors may
+ ; be found in the AUTHORS file in the root of the source tree.
+ ;
+
+
+ EXPORT |vp8cx_pack_mb_row_tokens_armv5|
+ IMPORT |vp8_validate_buffer_arm|
+
++ INCLUDE vp8_asm_enc_offsets.asm
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA |.text|, CODE, READONLY
+
+
+ ; macro for validating write buffer position
+ ; needs vp8_writer in r0
+ ; start shall not be in r1
+ MACRO
+ VALIDATE_POS $start, $pos
+ push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
+ ldr r2, [r0, #vp8_writer_buffer_end]
+ ldr r3, [r0, #vp8_writer_error]
+ mov r1, $pos
+ mov r0, $start
+ bl vp8_validate_buffer_arm
+ pop {r0-r3, r12, lr}
+ MEND
+
+ ; r0 VP8_COMP *cpi
+ ; r1 vp8_writer *w
+ ; r2 vp8_coef_encodings
+ ; r3 vp8_extra_bits
+ ; s0 vp8_coef_tree
+
+ |vp8cx_pack_mb_row_tokens_armv5| PROC
+ push {r4-r12, lr}
+ sub sp, sp, #24
+
+ ; Compute address of cpi->common.mb_rows
+ ldr r4, _VP8_COMP_common_
+ ldr r6, _VP8_COMMON_MBrows_
+ add r4, r0, r4
+
+ ldr r5, [r4, r6] ; load up mb_rows
+
+ str r2, [sp, #20] ; save vp8_coef_encodings
+ str r5, [sp, #12] ; save mb_rows
+ str r3, [sp, #8] ; save vp8_extra_bits
+
+ ldr r4, _VP8_COMP_tplist_
+ add r4, r0, r4
+ ldr r7, [r4, #0] ; dereference cpi->tp_list
+
+ mov r0, r1 ; keep same as other loops
+
+ ldr r2, [r0, #vp8_writer_lowvalue]
+ ldr r5, [r0, #vp8_writer_range]
+ ldr r3, [r0, #vp8_writer_count]
+
+ mb_row_loop
+
+ ldr r1, [r7, #tokenlist_start]
+ ldr r9, [r7, #tokenlist_stop]
+ str r9, [sp, #0] ; save stop for later comparison
+ str r7, [sp, #16] ; tokenlist address for next time
+
+ b check_p_lt_stop
+
+ ; actuall work gets done here!
+
+ while_p_lt_stop
+ ldrb r6, [r1, #tokenextra_token] ; t
+ ldr r4, [sp, #20] ; vp8_coef_encodings
+ mov lr, #0
+ add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
+ ldr r9, [r1, #tokenextra_context_tree] ; pp
+
+ ldrb r7, [r1, #tokenextra_skip_eob_node]
+
+ ldr r6, [r4, #vp8_token_value] ; v
+ ldr r8, [r4, #vp8_token_len] ; n
+
+ ; vp8 specific skip_eob_node
+ cmp r7, #0
+ movne lr, #2 ; i = 2
+ subne r8, r8, #1 ; --n
+
+ rsb r4, r8, #32 ; 32-n
+ ldr r10, [sp, #64] ; vp8_coef_tree
+
+ ; v is kept in r12 during the token pack loop
+ lsl r12, r6, r4 ; r12 = v << 32 - n
+
+ ; loop start
+ token_loop
+ ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
+ sub r7, r5, #1 ; range-1
+
+ ; Decisions are made based on the bit value shifted
+ ; off of v, so set a flag here based on this.
+ ; This value is refered to as "bb"
+ lsls r12, r12, #1 ; bb = v >> n
+ mul r6, r4, r7 ; ((range-1) * pp[i>>1]))
+
+ ; bb can only be 0 or 1. So only execute this statement
+ ; if bb == 1, otherwise it will act like i + 0
+ addcs lr, lr, #1 ; i + bb
+
+ mov r7, #1
+ ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
+ add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
+
+ addcs r2, r2, r4 ; if (bb) lowvalue += split
+ subcs r4, r5, r4 ; if (bb) range = range-split
+
+ ; Counting the leading zeros is used to normalize range.
+ clz r6, r4
+ sub r6, r6, #24 ; shift
+
+ ; Flag is set on the sum of count. This flag is used later
+ ; to determine if count >= 0
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi token_count_lt_zero ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset = shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl token_high_bit_not_set
+
+ ldr r4, [r0, #vp8_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos-1
+ b token_zero_while_start
+ token_zero_while_loop
+ mov r10, #0
+ strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+ token_zero_while_start
+ cmp r4, #0
+ ldrge r7, [r0, #vp8_writer_buffer]
+ ldrb r11, [r7, r4]
+ cmpge r11, #0xff
+ beq token_zero_while_loop
+
+ ldr r7, [r0, #vp8_writer_buffer]
+ ldrb r10, [r7, r4] ; w->buffer[x]
+ add r10, r10, #1
+ strb r10, [r7, r4] ; w->buffer[x] + 1
+ token_high_bit_not_set
+ rsb r4, r6, #24 ; 24-offset
+ ldr r10, [r0, #vp8_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp8_writer_pos] ; w->pos
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r11, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r11, [r0, #vp8_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r10, r11 ; validate_buffer at pos
+
+ strb r7, [r10, r4] ; w->buffer[w->pos++]
+
+ ; r10 is used earlier in the loop, but r10 is used as
+ ; temp variable here. So after r10 is used, reload
+ ; vp8_coef_tree_dcd into r10
+ ldr r10, [sp, #64] ; vp8_coef_tree
+
+ token_count_lt_zero
+ lsl r2, r2, r6 ; lowvalue <<= shift
+
+ subs r8, r8, #1 ; --n
+ bne token_loop
+
+ ldrb r6, [r1, #tokenextra_token] ; t
+ ldr r7, [sp, #8] ; vp8_extra_bits
+ ; Add t * sizeof (vp8_extra_bit_struct) to get the desired
+ ; element. Here vp8_extra_bit_struct == 16
+ add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
+
+ ldr r4, [r12, #vp8_extra_bit_struct_base_val]
+ cmp r4, #0
+ beq skip_extra_bits
+
+ ; if( b->base_val)
+ ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
+ ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
+ cmp r8, #0 ; if( L)
+ beq no_extra_bits
+
+ ldr r9, [r12, #vp8_extra_bit_struct_prob]
+ asr r7, lr, #1 ; v=e>>1
+
+ ldr r10, [r12, #vp8_extra_bit_struct_tree]
+ str r10, [sp, #4] ; b->tree
+
+ rsb r4, r8, #32
+ lsl r12, r7, r4
+
+ mov lr, #0 ; i = 0
+
+ extra_bits_loop
+ ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
+ sub r7, r5, #1 ; range-1
+ lsls r12, r12, #1 ; v >> n
+ mul r6, r4, r7 ; (range-1) * pp[i>>1]
+ addcs lr, lr, #1 ; i + bb
+
+ mov r7, #1
+ ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
+ add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
+
+ addcs r2, r2, r4 ; if (bb) lowvalue += split
+ subcs r4, r5, r4 ; if (bb) range = range-split
+
+ clz r6, r4
+ sub r6, r6, #24
+
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi extra_count_lt_zero ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset= shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl extra_high_bit_not_set
+
+ ldr r4, [r0, #vp8_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos - 1
+ b extra_zero_while_start
+ extra_zero_while_loop
+ mov r10, #0
+ strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+ extra_zero_while_start
+ cmp r4, #0
+ ldrge r7, [r0, #vp8_writer_buffer]
+ ldrb r11, [r7, r4]
+ cmpge r11, #0xff
+ beq extra_zero_while_loop
+
+ ldr r7, [r0, #vp8_writer_buffer]
+ ldrb r10, [r7, r4]
+ add r10, r10, #1
+ strb r10, [r7, r4]
+ extra_high_bit_not_set
+ rsb r4, r6, #24 ; 24-offset
+ ldr r10, [r0, #vp8_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp8_writer_pos]
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r11, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r11, [r0, #vp8_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r10, r11 ; validate_buffer at pos
+
+ strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
+ ldr r10, [sp, #4] ; b->tree
+ extra_count_lt_zero
+ lsl r2, r2, r6
+
+ subs r8, r8, #1 ; --n
+ bne extra_bits_loop ; while (n)
+
+ no_extra_bits
+ ldr lr, [r1, #4] ; e = p->Extra
+ add r4, r5, #1 ; range + 1
+ tst lr, #1
+ lsr r4, r4, #1 ; split = (range + 1) >> 1
+ addne r2, r2, r4 ; lowvalue += split
+ subne r4, r5, r4 ; range = range-split
+ tst r2, #0x80000000 ; lowvalue & 0x80000000
+ lsl r5, r4, #1 ; range <<= 1
+ beq end_high_bit_not_set
+
+ ldr r4, [r0, #vp8_writer_pos]
+ mov r7, #0
+ sub r4, r4, #1
+ b end_zero_while_start
+ end_zero_while_loop
+ strb r7, [r6, r4]
+ sub r4, r4, #1 ; x--
+ end_zero_while_start
+ cmp r4, #0
+ ldrge r6, [r0, #vp8_writer_buffer]
+ ldrb r12, [r6, r4]
+ cmpge r12, #0xff
+ beq end_zero_while_loop
+
+ ldr r6, [r0, #vp8_writer_buffer]
+ ldrb r7, [r6, r4]
+ add r7, r7, #1
+ strb r7, [r6, r4]
+ end_high_bit_not_set
+ adds r3, r3, #1 ; ++count
+ lsl r2, r2, #1 ; lowvalue <<= 1
+ bne end_count_zero
+
+ ldr r4, [r0, #vp8_writer_pos]
+ mvn r3, #7
+ ldr r7, [r0, #vp8_writer_buffer]
+ lsr r6, r2, #24 ; lowvalue >> 24
+ add r12, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r12, [r0, #vp8_writer_pos]
+
+ VALIDATE_POS r7, r12 ; validate_buffer at pos
+
+ strb r6, [r7, r4]
+ end_count_zero
+ skip_extra_bits
+ add r1, r1, #TOKENEXTRA_SZ ; ++p
+ check_p_lt_stop
+ ldr r4, [sp, #0] ; stop
+ cmp r1, r4 ; while( p < stop)
+ bcc while_p_lt_stop
+
+ ldr r6, [sp, #12] ; mb_rows
+ ldr r7, [sp, #16] ; tokenlist address
+ subs r6, r6, #1
+ add r7, r7, #TOKENLIST_SZ ; next element in the array
+ str r6, [sp, #12]
+ bne mb_row_loop
+
+ str r2, [r0, #vp8_writer_lowvalue]
+ str r5, [r0, #vp8_writer_range]
+ str r3, [r0, #vp8_writer_count]
+ add sp, sp, #24
+ pop {r4-r12, pc}
+ ENDP
+
+ _VP8_COMP_common_
+ DCD vp8_comp_common
+ _VP8_COMMON_MBrows_
+ DCD vp8_common_mb_rows
+ _VP8_COMP_tplist_
+ DCD vp8_comp_tplist
+
+ END
--- /dev/null
- INCLUDE asm_enc_offsets.asm
+ ;
+ ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ ;
+ ; Use of this source code is governed by a BSD-style license
+ ; that can be found in the LICENSE file in the root of the source
+ ; tree. An additional intellectual property rights grant can be found
+ ; in the file PATENTS. All contributing project authors may
+ ; be found in the AUTHORS file in the root of the source tree.
+ ;
+
+
+ EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
+ IMPORT |vp8_validate_buffer_arm|
+
++ INCLUDE vp8_asm_enc_offsets.asm
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA |.text|, CODE, READONLY
+
+ ; macro for validating write buffer position
+ ; needs vp8_writer in r0
+ ; start shall not be in r1
+ MACRO
+ VALIDATE_POS $start, $pos
+ push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
+ ldr r2, [r0, #vp8_writer_buffer_end]
+ ldr r3, [r0, #vp8_writer_error]
+ mov r1, $pos
+ mov r0, $start
+ bl vp8_validate_buffer_arm
+ pop {r0-r3, r12, lr}
+ MEND
+
+ ; r0 VP8_COMP *cpi
+ ; r1 unsigned char *cx_data
+ ; r2 const unsigned char *cx_data_end
+ ; r3 int num_part
+ ; s0 vp8_coef_encodings
+ ; s1 vp8_extra_bits,
+ ; s2 const vp8_tree_index *
+
+ |vp8cx_pack_tokens_into_partitions_armv5| PROC
+ push {r4-r12, lr}
+ sub sp, sp, #40
+
+ ; Compute address of cpi->common.mb_rows
+ ldr r4, _VP8_COMP_common_
+ ldr r6, _VP8_COMMON_MBrows_
+ add r4, r0, r4
+
+ ldr r5, [r4, r6] ; load up mb_rows
+
+ str r5, [sp, #36] ; save mb_rows
+ str r1, [sp, #24] ; save ptr = cx_data
+ str r3, [sp, #20] ; save num_part
+ str r2, [sp, #8] ; save cx_data_end
+
+ ldr r4, _VP8_COMP_tplist_
+ add r4, r0, r4
+ ldr r7, [r4, #0] ; dereference cpi->tp_list
+ str r7, [sp, #32] ; store start of cpi->tp_list
+
+ ldr r11, _VP8_COMP_bc_ ; load up vp8_writer out of cpi
+ add r0, r0, r11
+
+ mov r11, #0
+ str r11, [sp, #28] ; i
+
+ numparts_loop
+ ldr r2, _vp8_writer_sz_ ; load up sizeof(vp8_writer)
+ add r0, r2 ; bc[i + 1]
+
+ ldr r10, [sp, #24] ; ptr
+ ldr r5, [sp, #36] ; move mb_rows to the counting section
+ subs r5, r5, r11 ; move start point with each partition
+ ; mb_rows starts at i
+ str r5, [sp, #12]
+
+ ; Reset all of the VP8 Writer data for each partition that
+ ; is processed.
+ ; start_encode
+
+ ldr r3, [sp, #8]
+ str r3, [r0, #vp8_writer_buffer_end]
+
+ mov r2, #0 ; vp8_writer_lowvalue
+ mov r5, #255 ; vp8_writer_range
+ mvn r3, #23 ; vp8_writer_count
+
+ str r2, [r0, #vp8_writer_pos]
+ str r10, [r0, #vp8_writer_buffer]
+
+ ble end_partition ; if (mb_rows <= 0) end partition
+
+ mb_row_loop
+
+ ldr r1, [r7, #tokenlist_start]
+ ldr r9, [r7, #tokenlist_stop]
+ str r9, [sp, #0] ; save stop for later comparison
+ str r7, [sp, #16] ; tokenlist address for next time
+
+ b check_p_lt_stop
+
+ ; actual work gets done here!
+
+ while_p_lt_stop
+ ldrb r6, [r1, #tokenextra_token] ; t
+ ldr r4, [sp, #80] ; vp8_coef_encodings
+ mov lr, #0
+ add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
+ ldr r9, [r1, #tokenextra_context_tree] ; pp
+
+ ldrb r7, [r1, #tokenextra_skip_eob_node]
+
+ ldr r6, [r4, #vp8_token_value] ; v
+ ldr r8, [r4, #vp8_token_len] ; n
+
+ ; vp8 specific skip_eob_node
+ cmp r7, #0
+ movne lr, #2 ; i = 2
+ subne r8, r8, #1 ; --n
+
+ rsb r4, r8, #32 ; 32-n
+ ldr r10, [sp, #88] ; vp8_coef_tree
+
+ ; v is kept in r12 during the token pack loop
+ lsl r12, r6, r4 ; r12 = v << 32 - n
+
+ ; loop start
+ token_loop
+ ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
+ sub r7, r5, #1 ; range-1
+
+ ; Decisions are made based on the bit value shifted
+ ; off of v, so set a flag here based on this.
+ ; This value is refered to as "bb"
+ lsls r12, r12, #1 ; bb = v >> n
+ mul r6, r4, r7 ; ((range-1) * pp[i>>1]))
+
+ ; bb can only be 0 or 1. So only execute this statement
+ ; if bb == 1, otherwise it will act like i + 0
+ addcs lr, lr, #1 ; i + bb
+
+ mov r7, #1
+ ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
+ add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
+
+ addcs r2, r2, r4 ; if (bb) lowvalue += split
+ subcs r4, r5, r4 ; if (bb) range = range-split
+
+ ; Counting the leading zeros is used to normalize range.
+ clz r6, r4
+ sub r6, r6, #24 ; shift
+
+ ; Flag is set on the sum of count. This flag is used later
+ ; to determine if count >= 0
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi token_count_lt_zero ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset = shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl token_high_bit_not_set
+
+ ldr r4, [r0, #vp8_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos-1
+ b token_zero_while_start
+ token_zero_while_loop
+ mov r10, #0
+ strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+ token_zero_while_start
+ cmp r4, #0
+ ldrge r7, [r0, #vp8_writer_buffer]
+ ldrb r11, [r7, r4]
+ cmpge r11, #0xff
+ beq token_zero_while_loop
+
+ ldr r7, [r0, #vp8_writer_buffer]
+ ldrb r10, [r7, r4] ; w->buffer[x]
+ add r10, r10, #1
+ strb r10, [r7, r4] ; w->buffer[x] + 1
+ token_high_bit_not_set
+ rsb r4, r6, #24 ; 24-offset
+ ldr r10, [r0, #vp8_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp8_writer_pos] ; w->pos
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r11, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r11, [r0, #vp8_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r10, r11 ; validate_buffer at pos
+
+ strb r7, [r10, r4] ; w->buffer[w->pos++]
+
+ ; r10 is used earlier in the loop, but r10 is used as
+ ; temp variable here. So after r10 is used, reload
+ ; vp8_coef_tree_dcd into r10
+ ldr r10, [sp, #88] ; vp8_coef_tree
+
+ token_count_lt_zero
+ lsl r2, r2, r6 ; lowvalue <<= shift
+
+ subs r8, r8, #1 ; --n
+ bne token_loop
+
+ ldrb r6, [r1, #tokenextra_token] ; t
+ ldr r7, [sp, #84] ; vp8_extra_bits
+ ; Add t * sizeof (vp8_extra_bit_struct) to get the desired
+ ; element. Here vp8_extra_bit_struct == 16
+ add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
+
+ ldr r4, [r12, #vp8_extra_bit_struct_base_val]
+ cmp r4, #0
+ beq skip_extra_bits
+
+ ; if( b->base_val)
+ ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
+ ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
+ cmp r8, #0 ; if( L)
+ beq no_extra_bits
+
+ ldr r9, [r12, #vp8_extra_bit_struct_prob]
+ asr r7, lr, #1 ; v=e>>1
+
+ ldr r10, [r12, #vp8_extra_bit_struct_tree]
+ str r10, [sp, #4] ; b->tree
+
+ rsb r4, r8, #32
+ lsl r12, r7, r4
+
+ mov lr, #0 ; i = 0
+
+ extra_bits_loop
+ ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
+ sub r7, r5, #1 ; range-1
+ lsls r12, r12, #1 ; v >> n
+ mul r6, r4, r7 ; (range-1) * pp[i>>1]
+ addcs lr, lr, #1 ; i + bb
+
+ mov r7, #1
+ ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
+ add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
+
+ addcs r2, r2, r4 ; if (bb) lowvalue += split
+ subcs r4, r5, r4 ; if (bb) range = range-split
+
+ clz r6, r4
+ sub r6, r6, #24
+
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi extra_count_lt_zero ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset= shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl extra_high_bit_not_set
+
+ ldr r4, [r0, #vp8_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos - 1
+ b extra_zero_while_start
+ extra_zero_while_loop
+ mov r10, #0
+ strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+ extra_zero_while_start
+ cmp r4, #0
+ ldrge r7, [r0, #vp8_writer_buffer]
+ ldrb r11, [r7, r4]
+ cmpge r11, #0xff
+ beq extra_zero_while_loop
+
+ ldr r7, [r0, #vp8_writer_buffer]
+ ldrb r10, [r7, r4]
+ add r10, r10, #1
+ strb r10, [r7, r4]
+ extra_high_bit_not_set
+ rsb r4, r6, #24 ; 24-offset
+ ldr r10, [r0, #vp8_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp8_writer_pos]
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r11, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r11, [r0, #vp8_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r10, r11 ; validate_buffer at pos
+
+ strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
+ ldr r10, [sp, #4] ; b->tree
+ extra_count_lt_zero
+ lsl r2, r2, r6
+
+ subs r8, r8, #1 ; --n
+ bne extra_bits_loop ; while (n)
+
+ no_extra_bits
+ ldr lr, [r1, #4] ; e = p->Extra
+ add r4, r5, #1 ; range + 1
+ tst lr, #1
+ lsr r4, r4, #1 ; split = (range + 1) >> 1
+ addne r2, r2, r4 ; lowvalue += split
+ subne r4, r5, r4 ; range = range-split
+ tst r2, #0x80000000 ; lowvalue & 0x80000000
+ lsl r5, r4, #1 ; range <<= 1
+ beq end_high_bit_not_set
+
+ ldr r4, [r0, #vp8_writer_pos]
+ mov r7, #0
+ sub r4, r4, #1
+ b end_zero_while_start
+ end_zero_while_loop
+ strb r7, [r6, r4]
+ sub r4, r4, #1 ; x--
+ end_zero_while_start
+ cmp r4, #0
+ ldrge r6, [r0, #vp8_writer_buffer]
+ ldrb r12, [r6, r4]
+ cmpge r12, #0xff
+ beq end_zero_while_loop
+
+ ldr r6, [r0, #vp8_writer_buffer]
+ ldrb r7, [r6, r4]
+ add r7, r7, #1
+ strb r7, [r6, r4]
+ end_high_bit_not_set
+ adds r3, r3, #1 ; ++count
+ lsl r2, r2, #1 ; lowvalue <<= 1
+ bne end_count_zero
+
+ ldr r4, [r0, #vp8_writer_pos]
+ mvn r3, #7 ; count = -8
+ ldr r7, [r0, #vp8_writer_buffer]
+ lsr r6, r2, #24 ; lowvalue >> 24
+ add r12, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r12, [r0, #vp8_writer_pos]
+
+ VALIDATE_POS r7, r12 ; validate_buffer at pos
+
+ strb r6, [r7, r4]
+ end_count_zero
+ skip_extra_bits
+ add r1, r1, #TOKENEXTRA_SZ ; ++p
+ check_p_lt_stop
+ ldr r4, [sp, #0] ; stop
+ cmp r1, r4 ; while( p < stop)
+ bcc while_p_lt_stop
+
+ ldr r10, [sp, #20] ; num_parts
+ mov r1, #TOKENLIST_SZ
+ mul r1, r10, r1
+
+ ldr r6, [sp, #12] ; mb_rows
+ ldr r7, [sp, #16] ; tokenlist address
+ subs r6, r6, r10
+ add r7, r7, r1 ; next element in the array
+ str r6, [sp, #12]
+ bgt mb_row_loop
+
+ end_partition
+ mov r12, #32
+
+ stop_encode_loop
+ sub r7, r5, #1 ; range-1
+
+ mov r4, r7, lsl #7 ; ((range-1) * 128)
+
+ mov r7, #1
+ add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8)
+
+ ; Counting the leading zeros is used to normalize range.
+ clz r6, r4
+ sub r6, r6, #24 ; shift
+
+ ; Flag is set on the sum of count. This flag is used later
+ ; to determine if count >= 0
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi token_count_lt_zero_se ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset = shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl token_high_bit_not_set_se
+
+ ldr r4, [r0, #vp8_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos-1
+ b token_zero_while_start_se
+ token_zero_while_loop_se
+ mov r10, #0
+ strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+ token_zero_while_start_se
+ cmp r4, #0
+ ldrge r7, [r0, #vp8_writer_buffer]
+ ldrb r11, [r7, r4]
+ cmpge r11, #0xff
+ beq token_zero_while_loop_se
+
+ ldr r7, [r0, #vp8_writer_buffer]
+ ldrb r10, [r7, r4] ; w->buffer[x]
+ add r10, r10, #1
+ strb r10, [r7, r4] ; w->buffer[x] + 1
+ token_high_bit_not_set_se
+ rsb r4, r6, #24 ; 24-offset
+ ldr r10, [r0, #vp8_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp8_writer_pos] ; w->pos
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r11, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r11, [r0, #vp8_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r10, r11 ; validate_buffer at pos
+
+ strb r7, [r10, r4] ; w->buffer[w->pos++]
+
+ token_count_lt_zero_se
+ lsl r2, r2, r6 ; lowvalue <<= shift
+
+ subs r12, r12, #1
+ bne stop_encode_loop
+
+ ldr r4, [r0, #vp8_writer_pos] ; w->pos
+ ldr r12, [sp, #24] ; ptr
+ add r12, r12, r4 ; ptr += w->pos
+ str r12, [sp, #24]
+
+ ldr r11, [sp, #28] ; i
+ ldr r10, [sp, #20] ; num_parts
+
+ add r11, r11, #1 ; i++
+ str r11, [sp, #28]
+
+ ldr r7, [sp, #32] ; cpi->tp_list[i]
+ mov r1, #TOKENLIST_SZ
+ add r7, r7, r1 ; next element in cpi->tp_list
+ str r7, [sp, #32] ; cpi->tp_list[i+1]
+
+ cmp r10, r11
+ bgt numparts_loop
+
+ add sp, sp, #40
+ pop {r4-r12, pc}
+ ENDP
+
+ _VP8_COMP_common_
+ DCD vp8_comp_common
+ _VP8_COMMON_MBrows_
+ DCD vp8_common_mb_rows
+ _VP8_COMP_tplist_
+ DCD vp8_comp_tplist
+ _VP8_COMP_bc_
+ DCD vp8_comp_bc
+ _vp8_writer_sz_
+ DCD vp8_writer_sz
+
+ END
--- /dev/null
- INCLUDE asm_enc_offsets.asm
+ ;
+ ; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ ;
+ ; Use of this source code is governed by a BSD-style license
+ ; that can be found in the LICENSE file in the root of the source
+ ; tree. An additional intellectual property rights grant can be found
+ ; in the file PATENTS. All contributing project authors may
+ ; be found in the AUTHORS file in the root of the source tree.
+ ;
+
+
+ EXPORT |vp8_fast_quantize_b_armv6|
+
++ INCLUDE vp8_asm_enc_offsets.asm
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+ ; r0 BLOCK *b
+ ; r1 BLOCKD *d
+ |vp8_fast_quantize_b_armv6| PROC
+ stmfd sp!, {r1, r4-r11, lr}
+
+ ldr r3, [r0, #vp8_block_coeff] ; coeff
+ ldr r4, [r0, #vp8_block_quant_fast] ; quant_fast
+ ldr r5, [r0, #vp8_block_round] ; round
+ ldr r6, [r1, #vp8_blockd_qcoeff] ; qcoeff
+ ldr r7, [r1, #vp8_blockd_dqcoeff] ; dqcoeff
+ ldr r8, [r1, #vp8_blockd_dequant] ; dequant
+
+ ldr r2, loop_count ; loop_count=0x1000000. 'lsls' instruction
+ ; is used to update the counter so that
+ ; it can be used to mark nonzero
+ ; quantized coefficient pairs.
+
+ mov r1, #0 ; flags for quantized coeffs
+
+ ; PART 1: quantization and dequantization loop
+ loop
+ ldr r9, [r3], #4 ; [z1 | z0]
+ ldr r10, [r5], #4 ; [r1 | r0]
+ ldr r11, [r4], #4 ; [q1 | q0]
+
+ ssat16 lr, #1, r9 ; [sz1 | sz0]
+ eor r9, r9, lr ; [z1 ^ sz1 | z0 ^ sz0]
+ ssub16 r9, r9, lr ; x = (z ^ sz) - sz
+ sadd16 r9, r9, r10 ; [x1+r1 | x0+r0]
+
+ ldr r12, [r3], #4 ; [z3 | z2]
+
+ smulbb r0, r9, r11 ; [(x0+r0)*q0]
+ smultt r9, r9, r11 ; [(x1+r1)*q1]
+
+ ldr r10, [r5], #4 ; [r3 | r2]
+
+ ssat16 r11, #1, r12 ; [sz3 | sz2]
+ eor r12, r12, r11 ; [z3 ^ sz3 | z2 ^ sz2]
+ pkhtb r0, r9, r0, asr #16 ; [y1 | y0]
+ ldr r9, [r4], #4 ; [q3 | q2]
+ ssub16 r12, r12, r11 ; x = (z ^ sz) - sz
+
+ sadd16 r12, r12, r10 ; [x3+r3 | x2+r2]
+
+ eor r0, r0, lr ; [(y1 ^ sz1) | (y0 ^ sz0)]
+
+ smulbb r10, r12, r9 ; [(x2+r2)*q2]
+ smultt r12, r12, r9 ; [(x3+r3)*q3]
+
+ ssub16 r0, r0, lr ; x = (y ^ sz) - sz
+
+ cmp r0, #0 ; check if zero
+ orrne r1, r1, r2, lsr #24 ; add flag for nonzero coeffs
+
+ str r0, [r6], #4 ; *qcoeff++ = x
+ ldr r9, [r8], #4 ; [dq1 | dq0]
+
+ pkhtb r10, r12, r10, asr #16 ; [y3 | y2]
+ eor r10, r10, r11 ; [(y3 ^ sz3) | (y2 ^ sz2)]
+ ssub16 r10, r10, r11 ; x = (y ^ sz) - sz
+
+ cmp r10, #0 ; check if zero
+ orrne r1, r1, r2, lsr #23 ; add flag for nonzero coeffs
+
+ str r10, [r6], #4 ; *qcoeff++ = x
+ ldr r11, [r8], #4 ; [dq3 | dq2]
+
+ smulbb r12, r0, r9 ; [x0*dq0]
+ smultt r0, r0, r9 ; [x1*dq1]
+
+ smulbb r9, r10, r11 ; [x2*dq2]
+ smultt r10, r10, r11 ; [x3*dq3]
+
+ lsls r2, r2, #2 ; update loop counter
+ strh r12, [r7, #0] ; dqcoeff[0] = [x0*dq0]
+ strh r0, [r7, #2] ; dqcoeff[1] = [x1*dq1]
+ strh r9, [r7, #4] ; dqcoeff[2] = [x2*dq2]
+ strh r10, [r7, #6] ; dqcoeff[3] = [x3*dq3]
+ add r7, r7, #8 ; dqcoeff += 8
+ bne loop
+
+ ; PART 2: check position for eob...
+ ldr r11, [sp, #0] ; restore BLOCKD pointer
+ mov lr, #0 ; init eob
+ cmp r1, #0 ; coeffs after quantization?
+ ldr r12, [r11, #vp8_blockd_eob]
+ beq end ; skip eob calculations if all zero
+
+ ldr r0, [r11, #vp8_blockd_qcoeff]
+
+ ; check shortcut for nonzero qcoeffs
+ tst r1, #0x80
+ bne quant_coeff_15_14
+ tst r1, #0x20
+ bne quant_coeff_13_11
+ tst r1, #0x8
+ bne quant_coeff_12_7
+ tst r1, #0x40
+ bne quant_coeff_10_9
+ tst r1, #0x10
+ bne quant_coeff_8_3
+ tst r1, #0x2
+ bne quant_coeff_6_5
+ tst r1, #0x4
+ bne quant_coeff_4_2
+ b quant_coeff_1_0
+
+ quant_coeff_15_14
+ ldrh r2, [r0, #30] ; rc=15, i=15
+ mov lr, #16
+ cmp r2, #0
+ bne end
+
+ ldrh r3, [r0, #28] ; rc=14, i=14
+ mov lr, #15
+ cmp r3, #0
+ bne end
+
+ quant_coeff_13_11
+ ldrh r2, [r0, #22] ; rc=11, i=13
+ mov lr, #14
+ cmp r2, #0
+ bne end
+
+ quant_coeff_12_7
+ ldrh r3, [r0, #14] ; rc=7, i=12
+ mov lr, #13
+ cmp r3, #0
+ bne end
+
+ ldrh r2, [r0, #20] ; rc=10, i=11
+ mov lr, #12
+ cmp r2, #0
+ bne end
+
+ quant_coeff_10_9
+ ldrh r3, [r0, #26] ; rc=13, i=10
+ mov lr, #11
+ cmp r3, #0
+ bne end
+
+ ldrh r2, [r0, #24] ; rc=12, i=9
+ mov lr, #10
+ cmp r2, #0
+ bne end
+
+ quant_coeff_8_3
+ ldrh r3, [r0, #18] ; rc=9, i=8
+ mov lr, #9
+ cmp r3, #0
+ bne end
+
+ ldrh r2, [r0, #12] ; rc=6, i=7
+ mov lr, #8
+ cmp r2, #0
+ bne end
+
+ quant_coeff_6_5
+ ldrh r3, [r0, #6] ; rc=3, i=6
+ mov lr, #7
+ cmp r3, #0
+ bne end
+
+ ldrh r2, [r0, #4] ; rc=2, i=5
+ mov lr, #6
+ cmp r2, #0
+ bne end
+
+ quant_coeff_4_2
+ ldrh r3, [r0, #10] ; rc=5, i=4
+ mov lr, #5
+ cmp r3, #0
+ bne end
+
+ ldrh r2, [r0, #16] ; rc=8, i=3
+ mov lr, #4
+ cmp r2, #0
+ bne end
+
+ ldrh r3, [r0, #8] ; rc=4, i=2
+ mov lr, #3
+ cmp r3, #0
+ bne end
+
+ quant_coeff_1_0
+ ldrh r2, [r0, #2] ; rc=1, i=1
+ mov lr, #2
+ cmp r2, #0
+ bne end
+
+ mov lr, #1 ; rc=0, i=0
+
+ end
+ strb lr, [r12]
+ ldmfd sp!, {r1, r4-r11, pc}
+
+ ENDP
+
+ loop_count
+ DCD 0x1000000
+
+ END
+
--- /dev/null
- INCLUDE asm_enc_offsets.asm
+ ;
+ ; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ ;
+ ; Use of this source code is governed by a BSD-style license
+ ; that can be found in the LICENSE file in the root of the source
+ ; tree. An additional intellectual property rights grant can be found
+ ; in the file PATENTS. All contributing project authors may
+ ; be found in the AUTHORS file in the root of the source tree.
+ ;
+
+
+ EXPORT |vp8_subtract_mby_armv6|
+ EXPORT |vp8_subtract_mbuv_armv6|
+ EXPORT |vp8_subtract_b_armv6|
+
++ INCLUDE vp8_asm_enc_offsets.asm
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+ ; r0 BLOCK *be
+ ; r1 BLOCKD *bd
+ ; r2 int pitch
+ |vp8_subtract_b_armv6| PROC
+
+ stmfd sp!, {r4-r9}
+
+ ldr r4, [r0, #vp8_block_base_src]
+ ldr r5, [r0, #vp8_block_src]
+ ldr r6, [r0, #vp8_block_src_diff]
+
+ ldr r3, [r4]
+ ldr r7, [r0, #vp8_block_src_stride]
+ add r3, r3, r5 ; src = *base_src + src
+ ldr r8, [r1, #vp8_blockd_predictor]
+
+ mov r9, #4 ; loop count
+
+ loop_block
+
+ ldr r0, [r3], r7 ; src
+ ldr r1, [r8], r2 ; pred
+
+ uxtb16 r4, r0 ; [s2 | s0]
+ uxtb16 r5, r1 ; [p2 | p0]
+ uxtb16 r0, r0, ror #8 ; [s3 | s1]
+ uxtb16 r1, r1, ror #8 ; [p3 | p1]
+
+ usub16 r4, r4, r5 ; [d2 | d0]
+ usub16 r5, r0, r1 ; [d3 | d1]
+
+ subs r9, r9, #1 ; decrement loop counter
+
+ pkhbt r0, r4, r5, lsl #16 ; [d1 | d0]
+ pkhtb r1, r5, r4, asr #16 ; [d3 | d2]
+
+ str r0, [r6, #0] ; diff
+ str r1, [r6, #4] ; diff
+
+ add r6, r6, r2, lsl #1 ; update diff pointer
+ bne loop_block
+
+ ldmfd sp!, {r4-r9}
+ mov pc, lr
+
+ ENDP
+
+
+ ; r0 short *diff
+ ; r1 unsigned char *usrc
+ ; r2 unsigned char *vsrc
+ ; r3 int src_stride
+ ; sp unsigned char *upred
+ ; sp unsigned char *vpred
+ ; sp int pred_stride
+ |vp8_subtract_mbuv_armv6| PROC
+
+ stmfd sp!, {r4-r11}
+
+ add r0, r0, #512 ; set *diff point to Cb
+ mov r4, #8 ; loop count
+ ldr r5, [sp, #32] ; upred
+ ldr r12, [sp, #40] ; pred_stride
+
+ ; Subtract U block
+ loop_u
+ ldr r6, [r1] ; usrc (A)
+ ldr r7, [r5] ; upred (A)
+
+ uxtb16 r8, r6 ; [s2 | s0] (A)
+ uxtb16 r9, r7 ; [p2 | p0] (A)
+ uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
+ uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
+
+ usub16 r6, r8, r9 ; [d2 | d0] (A)
+ usub16 r7, r10, r11 ; [d3 | d1] (A)
+
+ ldr r10, [r1, #4] ; usrc (B)
+ ldr r11, [r5, #4] ; upred (B)
+
+ pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
+ pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
+
+ str r8, [r0], #4 ; diff (A)
+ uxtb16 r8, r10 ; [s2 | s0] (B)
+ str r9, [r0], #4 ; diff (A)
+
+ uxtb16 r9, r11 ; [p2 | p0] (B)
+ uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
+ uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
+
+ usub16 r6, r8, r9 ; [d2 | d0] (B)
+ usub16 r7, r10, r11 ; [d3 | d1] (B)
+
+ add r1, r1, r3 ; update usrc pointer
+ add r5, r5, r12 ; update upred pointer
+
+ pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
+ pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
+
+ str r8, [r0], #4 ; diff (B)
+ subs r4, r4, #1 ; update loop counter
+ str r9, [r0], #4 ; diff (B)
+
+ bne loop_u
+
+ ldr r5, [sp, #36] ; vpred
+ mov r4, #8 ; loop count
+
+ ; Subtract V block
+ loop_v
+ ldr r6, [r2] ; vsrc (A)
+ ldr r7, [r5] ; vpred (A)
+
+ uxtb16 r8, r6 ; [s2 | s0] (A)
+ uxtb16 r9, r7 ; [p2 | p0] (A)
+ uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
+ uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
+
+ usub16 r6, r8, r9 ; [d2 | d0] (A)
+ usub16 r7, r10, r11 ; [d3 | d1] (A)
+
+ ldr r10, [r2, #4] ; vsrc (B)
+ ldr r11, [r5, #4] ; vpred (B)
+
+ pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
+ pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
+
+ str r8, [r0], #4 ; diff (A)
+ uxtb16 r8, r10 ; [s2 | s0] (B)
+ str r9, [r0], #4 ; diff (A)
+
+ uxtb16 r9, r11 ; [p2 | p0] (B)
+ uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
+ uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
+
+ usub16 r6, r8, r9 ; [d2 | d0] (B)
+ usub16 r7, r10, r11 ; [d3 | d1] (B)
+
+ add r2, r2, r3 ; update vsrc pointer
+ add r5, r5, r12 ; update vpred pointer
+
+ pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
+ pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
+
+ str r8, [r0], #4 ; diff (B)
+ subs r4, r4, #1 ; update loop counter
+ str r9, [r0], #4 ; diff (B)
+
+ bne loop_v
+
+ ldmfd sp!, {r4-r11}
+ bx lr
+
+ ENDP
+
+
+ ; r0 short *diff
+ ; r1 unsigned char *src
+ ; r2 int src_stride
+ ; r3 unsigned char *pred
+ ; sp int pred_stride
+ |vp8_subtract_mby_armv6| PROC
+
+ stmfd sp!, {r4-r11}
+ ldr r12, [sp, #32] ; pred_stride
+ mov r4, #16
+ loop
+ ldr r6, [r1] ; src (A)
+ ldr r7, [r3] ; pred (A)
+
+ uxtb16 r8, r6 ; [s2 | s0] (A)
+ uxtb16 r9, r7 ; [p2 | p0] (A)
+ uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
+ uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
+
+ usub16 r6, r8, r9 ; [d2 | d0] (A)
+ usub16 r7, r10, r11 ; [d3 | d1] (A)
+
+ ldr r10, [r1, #4] ; src (B)
+ ldr r11, [r3, #4] ; pred (B)
+
+ pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
+ pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
+
+ str r8, [r0], #4 ; diff (A)
+ uxtb16 r8, r10 ; [s2 | s0] (B)
+ str r9, [r0], #4 ; diff (A)
+
+ uxtb16 r9, r11 ; [p2 | p0] (B)
+ uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
+ uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
+
+ usub16 r6, r8, r9 ; [d2 | d0] (B)
+ usub16 r7, r10, r11 ; [d3 | d1] (B)
+
+ ldr r10, [r1, #8] ; src (C)
+ ldr r11, [r3, #8] ; pred (C)
+
+ pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
+ pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
+
+ str r8, [r0], #4 ; diff (B)
+ uxtb16 r8, r10 ; [s2 | s0] (C)
+ str r9, [r0], #4 ; diff (B)
+
+ uxtb16 r9, r11 ; [p2 | p0] (C)
+ uxtb16 r10, r10, ror #8 ; [s3 | s1] (C)
+ uxtb16 r11, r11, ror #8 ; [p3 | p1] (C)
+
+ usub16 r6, r8, r9 ; [d2 | d0] (C)
+ usub16 r7, r10, r11 ; [d3 | d1] (C)
+
+ ldr r10, [r1, #12] ; src (D)
+ ldr r11, [r3, #12] ; pred (D)
+
+ pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C)
+ pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C)
+
+ str r8, [r0], #4 ; diff (C)
+ uxtb16 r8, r10 ; [s2 | s0] (D)
+ str r9, [r0], #4 ; diff (C)
+
+ uxtb16 r9, r11 ; [p2 | p0] (D)
+ uxtb16 r10, r10, ror #8 ; [s3 | s1] (D)
+ uxtb16 r11, r11, ror #8 ; [p3 | p1] (D)
+
+ usub16 r6, r8, r9 ; [d2 | d0] (D)
+ usub16 r7, r10, r11 ; [d3 | d1] (D)
+
+ add r1, r1, r2 ; update src pointer
+ add r3, r3, r12 ; update pred pointer
+
+ pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D)
+ pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D)
+
+ str r8, [r0], #4 ; diff (D)
+ subs r4, r4, #1 ; update loop counter
+ str r9, [r0], #4 ; diff (D)
+
+ bne loop
+
+ ldmfd sp!, {r4-r11}
+ bx lr
+
+ ENDP
+
+ END
+
--- /dev/null
- INCLUDE asm_enc_offsets.asm
+ ;
+ ; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ ;
+ ; Use of this source code is governed by a BSD-style license
+ ; that can be found in the LICENSE file in the root of the source
+ ; tree. An additional intellectual property rights grant can be found
+ ; in the file PATENTS. All contributing project authors may
+ ; be found in the AUTHORS file in the root of the source tree.
+ ;
+
+
+ EXPORT |vp8_fast_quantize_b_neon|
+ EXPORT |vp8_fast_quantize_b_pair_neon|
+
++ INCLUDE vp8_asm_enc_offsets.asm
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=4
+
+ ;vp8_fast_quantize_b_pair_neon(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2);
+ |vp8_fast_quantize_b_pair_neon| PROC
+
+ stmfd sp!, {r4-r9}
+ vstmdb sp!, {q4-q7}
+
+ ldr r4, [r0, #vp8_block_coeff]
+ ldr r5, [r0, #vp8_block_quant_fast]
+ ldr r6, [r0, #vp8_block_round]
+
+ vld1.16 {q0, q1}, [r4@128] ; load z
+
+ ldr r7, [r2, #vp8_blockd_qcoeff]
+
+ vabs.s16 q4, q0 ; calculate x = abs(z)
+ vabs.s16 q5, q1
+
+ ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative
+ vshr.s16 q2, q0, #15 ; sz
+ vshr.s16 q3, q1, #15
+
+ vld1.s16 {q6, q7}, [r6@128] ; load round_ptr [0-15]
+ vld1.s16 {q8, q9}, [r5@128] ; load quant_ptr [0-15]
+
+ ldr r4, [r1, #vp8_block_coeff]
+
+ vadd.s16 q4, q6 ; x + Round
+ vadd.s16 q5, q7
+
+ vld1.16 {q0, q1}, [r4@128] ; load z2
+
+ vqdmulh.s16 q4, q8 ; y = ((Round+abs(z)) * Quant) >> 16
+ vqdmulh.s16 q5, q9
+
+ vabs.s16 q10, q0 ; calculate x2 = abs(z_2)
+ vabs.s16 q11, q1
+ vshr.s16 q12, q0, #15 ; sz2
+ vshr.s16 q13, q1, #15
+
+ ;modify data to have its original sign
+ veor.s16 q4, q2 ; y^sz
+ veor.s16 q5, q3
+
+ vadd.s16 q10, q6 ; x2 + Round
+ vadd.s16 q11, q7
+
+ ldr r8, [r2, #vp8_blockd_dequant]
+
+ vqdmulh.s16 q10, q8 ; y2 = ((Round+abs(z)) * Quant) >> 16
+ vqdmulh.s16 q11, q9
+
+ vshr.s16 q4, #1 ; right shift 1 after vqdmulh
+ vshr.s16 q5, #1
+
+ vld1.s16 {q6, q7}, [r8@128] ;load dequant_ptr[i]
+
+ vsub.s16 q4, q2 ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement)
+ vsub.s16 q5, q3
+
+ vshr.s16 q10, #1 ; right shift 1 after vqdmulh
+ vshr.s16 q11, #1
+
+ ldr r9, [r2, #vp8_blockd_dqcoeff]
+
+ veor.s16 q10, q12 ; y2^sz2
+ veor.s16 q11, q13
+
+ vst1.s16 {q4, q5}, [r7] ; store: qcoeff = x1
+
+
+ vsub.s16 q10, q12 ; x2=(y^sz)-sz = (y^sz)-(-1) (2's complement)
+ vsub.s16 q11, q13
+
+ ldr r6, [r3, #vp8_blockd_qcoeff]
+
+ vmul.s16 q2, q6, q4 ; x * Dequant
+ vmul.s16 q3, q7, q5
+
+ adr r0, inv_zig_zag ; load ptr of inverse zigzag table
+
+ vceq.s16 q8, q8 ; set q8 to all 1
+
+ vst1.s16 {q10, q11}, [r6] ; store: qcoeff = x2
+
+ vmul.s16 q12, q6, q10 ; x2 * Dequant
+ vmul.s16 q13, q7, q11
+
+ vld1.16 {q6, q7}, [r0@128] ; load inverse scan order
+
+ vtst.16 q14, q4, q8 ; now find eob
+ vtst.16 q15, q5, q8 ; non-zero element is set to all 1
+
+ vst1.s16 {q2, q3}, [r9] ; store dqcoeff = x * Dequant
+
+ ldr r7, [r3, #vp8_blockd_dqcoeff]
+
+ vand q0, q6, q14 ; get all valid numbers from scan array
+ vand q1, q7, q15
+
+ vst1.s16 {q12, q13}, [r7] ; store dqcoeff = x * Dequant
+
+ vtst.16 q2, q10, q8 ; now find eob
+ vtst.16 q3, q11, q8 ; non-zero element is set to all 1
+
+ vmax.u16 q0, q0, q1 ; find maximum value in q0, q1
+
+ vand q10, q6, q2 ; get all valid numbers from scan array
+ vand q11, q7, q3
+ vmax.u16 q10, q10, q11 ; find maximum value in q10, q11
+
+ vmax.u16 d0, d0, d1
+ vmax.u16 d20, d20, d21
+ vmovl.u16 q0, d0
+ vmovl.u16 q10, d20
+
+ vmax.u32 d0, d0, d1
+ vmax.u32 d20, d20, d21
+ vpmax.u32 d0, d0, d0
+ vpmax.u32 d20, d20, d20
+
+ ldr r4, [r2, #vp8_blockd_eob]
+ ldr r5, [r3, #vp8_blockd_eob]
+
+ vst1.8 {d0[0]}, [r4] ; store eob
+ vst1.8 {d20[0]}, [r5] ; store eob
+
+ vldmia sp!, {q4-q7}
+ ldmfd sp!, {r4-r9}
+ bx lr
+
+ ENDP
+
+ ;void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
+ |vp8_fast_quantize_b_neon| PROC
+
+ stmfd sp!, {r4-r7}
+
+ ldr r3, [r0, #vp8_block_coeff]
+ ldr r4, [r0, #vp8_block_quant_fast]
+ ldr r5, [r0, #vp8_block_round]
+
+ vld1.16 {q0, q1}, [r3@128] ; load z
+ vorr.s16 q14, q0, q1 ; check if all zero (step 1)
+ ldr r6, [r1, #vp8_blockd_qcoeff]
+ ldr r7, [r1, #vp8_blockd_dqcoeff]
+ vorr.s16 d28, d28, d29 ; check if all zero (step 2)
+
+ vabs.s16 q12, q0 ; calculate x = abs(z)
+ vabs.s16 q13, q1
+
+ ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative
+ vshr.s16 q2, q0, #15 ; sz
+ vmov r2, r3, d28 ; check if all zero (step 3)
+ vshr.s16 q3, q1, #15
+
+ vld1.s16 {q14, q15}, [r5@128]; load round_ptr [0-15]
+ vld1.s16 {q8, q9}, [r4@128] ; load quant_ptr [0-15]
+
+ vadd.s16 q12, q14 ; x + Round
+ vadd.s16 q13, q15
+
+ adr r0, inv_zig_zag ; load ptr of inverse zigzag table
+
+ vqdmulh.s16 q12, q8 ; y = ((Round+abs(z)) * Quant) >> 16
+ vqdmulh.s16 q13, q9
+
+ vld1.16 {q10, q11}, [r0@128]; load inverse scan order
+
+ vceq.s16 q8, q8 ; set q8 to all 1
+
+ ldr r4, [r1, #vp8_blockd_dequant]
+
+ vshr.s16 q12, #1 ; right shift 1 after vqdmulh
+ vshr.s16 q13, #1
+
+ ldr r5, [r1, #vp8_blockd_eob]
+
+ orr r2, r2, r3 ; check if all zero (step 4)
+ cmp r2, #0 ; check if all zero (step 5)
+ beq zero_output ; check if all zero (step 6)
+
+ ;modify data to have its original sign
+ veor.s16 q12, q2 ; y^sz
+ veor.s16 q13, q3
+
+ vsub.s16 q12, q2 ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement)
+ vsub.s16 q13, q3
+
+ vld1.s16 {q2, q3}, [r4@128] ; load dequant_ptr[i]
+
+ vtst.16 q14, q12, q8 ; now find eob
+ vtst.16 q15, q13, q8 ; non-zero element is set to all 1
+
+ vst1.s16 {q12, q13}, [r6@128]; store: qcoeff = x1
+
+ vand q10, q10, q14 ; get all valid numbers from scan array
+ vand q11, q11, q15
+
+
+ vmax.u16 q0, q10, q11 ; find maximum value in q0, q1
+ vmax.u16 d0, d0, d1
+ vmovl.u16 q0, d0
+
+ vmul.s16 q2, q12 ; x * Dequant
+ vmul.s16 q3, q13
+
+ vmax.u32 d0, d0, d1
+ vpmax.u32 d0, d0, d0
+
+ vst1.s16 {q2, q3}, [r7@128] ; store dqcoeff = x * Dequant
+
+ vst1.8 {d0[0]}, [r5] ; store eob
+
+ ldmfd sp!, {r4-r7}
+ bx lr
+
+ zero_output
+ strb r2, [r5] ; store eob
+ vst1.s16 {q0, q1}, [r6@128] ; qcoeff = 0
+ vst1.s16 {q0, q1}, [r7@128] ; dqcoeff = 0
+
+ ldmfd sp!, {r4-r7}
+ bx lr
+
+ ENDP
+
+ ; default inverse zigzag table is defined in vp8/common/entropy.c
+ ALIGN 16 ; enable use of @128 bit aligned loads
+ inv_zig_zag
+ DCW 0x0001, 0x0002, 0x0006, 0x0007
+ DCW 0x0003, 0x0005, 0x0008, 0x000d
+ DCW 0x0004, 0x0009, 0x000c, 0x000e
+ DCW 0x000a, 0x000b, 0x000f, 0x0010
+
+ END
+
--- /dev/null
- INCLUDE asm_enc_offsets.asm
+ ;
+ ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ ;
+ ; Use of this source code is governed by a BSD-style license
+ ; that can be found in the LICENSE file in the root of the source
+ ; tree. An additional intellectual property rights grant can be found
+ ; in the file PATENTS. All contributing project authors may
+ ; be found in the AUTHORS file in the root of the source tree.
+ ;
+
+ EXPORT |vp8_subtract_b_neon|
+ EXPORT |vp8_subtract_mby_neon|
+ EXPORT |vp8_subtract_mbuv_neon|
+
++ INCLUDE vp8_asm_enc_offsets.asm
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+ ;void vp8_subtract_b_neon(BLOCK *be, BLOCKD *bd, int pitch)
+ |vp8_subtract_b_neon| PROC
+
+ stmfd sp!, {r4-r7}
+
+ ldr r3, [r0, #vp8_block_base_src]
+ ldr r4, [r0, #vp8_block_src]
+ ldr r5, [r0, #vp8_block_src_diff]
+ ldr r3, [r3]
+ ldr r6, [r0, #vp8_block_src_stride]
+ add r3, r3, r4 ; src = *base_src + src
+ ldr r7, [r1, #vp8_blockd_predictor]
+
+ vld1.8 {d0}, [r3], r6 ;load src
+ vld1.8 {d1}, [r7], r2 ;load pred
+ vld1.8 {d2}, [r3], r6
+ vld1.8 {d3}, [r7], r2
+ vld1.8 {d4}, [r3], r6
+ vld1.8 {d5}, [r7], r2
+ vld1.8 {d6}, [r3], r6
+ vld1.8 {d7}, [r7], r2
+
+ vsubl.u8 q10, d0, d1
+ vsubl.u8 q11, d2, d3
+ vsubl.u8 q12, d4, d5
+ vsubl.u8 q13, d6, d7
+
+ mov r2, r2, lsl #1
+
+ vst1.16 {d20}, [r5], r2 ;store diff
+ vst1.16 {d22}, [r5], r2
+ vst1.16 {d24}, [r5], r2
+ vst1.16 {d26}, [r5], r2
+
+ ldmfd sp!, {r4-r7}
+ bx lr
+
+ ENDP
+
+
+ ;==========================================
+ ;void vp8_subtract_mby_neon(short *diff, unsigned char *src, int src_stride
+ ; unsigned char *pred, int pred_stride)
+ |vp8_subtract_mby_neon| PROC
+ push {r4-r7}
+ mov r12, #4
+ ldr r4, [sp, #16] ; pred_stride
+ mov r6, #32 ; "diff" stride x2
+ add r5, r0, #16 ; second diff pointer
+
+ subtract_mby_loop
+ vld1.8 {q0}, [r1], r2 ;load src
+ vld1.8 {q1}, [r3], r4 ;load pred
+ vld1.8 {q2}, [r1], r2
+ vld1.8 {q3}, [r3], r4
+ vld1.8 {q4}, [r1], r2
+ vld1.8 {q5}, [r3], r4
+ vld1.8 {q6}, [r1], r2
+ vld1.8 {q7}, [r3], r4
+
+ vsubl.u8 q8, d0, d2
+ vsubl.u8 q9, d1, d3
+ vsubl.u8 q10, d4, d6
+ vsubl.u8 q11, d5, d7
+ vsubl.u8 q12, d8, d10
+ vsubl.u8 q13, d9, d11
+ vsubl.u8 q14, d12, d14
+ vsubl.u8 q15, d13, d15
+
+ vst1.16 {q8}, [r0], r6 ;store diff
+ vst1.16 {q9}, [r5], r6
+ vst1.16 {q10}, [r0], r6
+ vst1.16 {q11}, [r5], r6
+ vst1.16 {q12}, [r0], r6
+ vst1.16 {q13}, [r5], r6
+ vst1.16 {q14}, [r0], r6
+ vst1.16 {q15}, [r5], r6
+
+ subs r12, r12, #1
+ bne subtract_mby_loop
+
+ pop {r4-r7}
+ bx lr
+ ENDP
+
+ ;=================================
+ ;void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
+ ; int src_stride, unsigned char *upred,
+ ; unsigned char *vpred, int pred_stride)
+
+ |vp8_subtract_mbuv_neon| PROC
+ push {r4-r7}
+ ldr r4, [sp, #16] ; upred
+ ldr r5, [sp, #20] ; vpred
+ ldr r6, [sp, #24] ; pred_stride
+ add r0, r0, #512 ; short *udiff = diff + 256;
+ mov r12, #32 ; "diff" stride x2
+ add r7, r0, #16 ; second diff pointer
+
+ ;u
+ vld1.8 {d0}, [r1], r3 ;load usrc
+ vld1.8 {d1}, [r4], r6 ;load upred
+ vld1.8 {d2}, [r1], r3
+ vld1.8 {d3}, [r4], r6
+ vld1.8 {d4}, [r1], r3
+ vld1.8 {d5}, [r4], r6
+ vld1.8 {d6}, [r1], r3
+ vld1.8 {d7}, [r4], r6
+ vld1.8 {d8}, [r1], r3
+ vld1.8 {d9}, [r4], r6
+ vld1.8 {d10}, [r1], r3
+ vld1.8 {d11}, [r4], r6
+ vld1.8 {d12}, [r1], r3
+ vld1.8 {d13}, [r4], r6
+ vld1.8 {d14}, [r1], r3
+ vld1.8 {d15}, [r4], r6
+
+ vsubl.u8 q8, d0, d1
+ vsubl.u8 q9, d2, d3
+ vsubl.u8 q10, d4, d5
+ vsubl.u8 q11, d6, d7
+ vsubl.u8 q12, d8, d9
+ vsubl.u8 q13, d10, d11
+ vsubl.u8 q14, d12, d13
+ vsubl.u8 q15, d14, d15
+
+ vst1.16 {q8}, [r0], r12 ;store diff
+ vst1.16 {q9}, [r7], r12
+ vst1.16 {q10}, [r0], r12
+ vst1.16 {q11}, [r7], r12
+ vst1.16 {q12}, [r0], r12
+ vst1.16 {q13}, [r7], r12
+ vst1.16 {q14}, [r0], r12
+ vst1.16 {q15}, [r7], r12
+
+ ;v
+ vld1.8 {d0}, [r2], r3 ;load vsrc
+ vld1.8 {d1}, [r5], r6 ;load vpred
+ vld1.8 {d2}, [r2], r3
+ vld1.8 {d3}, [r5], r6
+ vld1.8 {d4}, [r2], r3
+ vld1.8 {d5}, [r5], r6
+ vld1.8 {d6}, [r2], r3
+ vld1.8 {d7}, [r5], r6
+ vld1.8 {d8}, [r2], r3
+ vld1.8 {d9}, [r5], r6
+ vld1.8 {d10}, [r2], r3
+ vld1.8 {d11}, [r5], r6
+ vld1.8 {d12}, [r2], r3
+ vld1.8 {d13}, [r5], r6
+ vld1.8 {d14}, [r2], r3
+ vld1.8 {d15}, [r5], r6
+
+ vsubl.u8 q8, d0, d1
+ vsubl.u8 q9, d2, d3
+ vsubl.u8 q10, d4, d5
+ vsubl.u8 q11, d6, d7
+ vsubl.u8 q12, d8, d9
+ vsubl.u8 q13, d10, d11
+ vsubl.u8 q14, d12, d13
+ vsubl.u8 q15, d14, d15
+
+ vst1.16 {q8}, [r0], r12 ;store diff
+ vst1.16 {q9}, [r7], r12
+ vst1.16 {q10}, [r0], r12
+ vst1.16 {q11}, [r7], r12
+ vst1.16 {q12}, [r0], r12
+ vst1.16 {q13}, [r7], r12
+ vst1.16 {q14}, [r0], r12
+ vst1.16 {q15}, [r7], r12
+
+ pop {r4-r7}
+ bx lr
+
+ ENDP
+
+ END
--- /dev/null
-%include "asm_enc_offsets.asm"
+ ;
+ ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ ;
+ ; Use of this source code is governed by a BSD-style license and patent
+ ; grant that can be found in the LICENSE file in the root of the source
+ ; tree. All contributing project authors may be found in the AUTHORS
+ ; file in the root of the source tree.
+ ;
+
+
+ %include "vpx_ports/x86_abi_support.asm"
++%include "vp8_asm_enc_offsets.asm"
+
+
+ ; void vp8_regular_quantize_b_sse2 | arg
+ ; (BLOCK *b, | 0
+ ; BLOCKD *d) | 1
+
+ global sym(vp8_regular_quantize_b_sse2) PRIVATE
+ sym(vp8_regular_quantize_b_sse2):
+ push rbp
+ mov rbp, rsp
+ SAVE_XMM 7
+ GET_GOT rbx
+
+ %if ABI_IS_32BIT
+ push rdi
+ push rsi
+ %else
+ %ifidn __OUTPUT_FORMAT__,x64
+ push rdi
+ push rsi
+ %endif
+ %endif
+
+ ALIGN_STACK 16, rax
+ %define zrun_zbin_boost 0 ; 8
+ %define abs_minus_zbin 8 ; 32
+ %define temp_qcoeff 40 ; 32
+ %define qcoeff 72 ; 32
+ %define stack_size 104
+ sub rsp, stack_size
+ ; end prolog
+
+ %if ABI_IS_32BIT
+ mov rdi, arg(0) ; BLOCK *b
+ mov rsi, arg(1) ; BLOCKD *d
+ %else
+ %ifidn __OUTPUT_FORMAT__,x64
+ mov rdi, rcx ; BLOCK *b
+ mov rsi, rdx ; BLOCKD *d
+ %else
+ ;mov rdi, rdi ; BLOCK *b
+ ;mov rsi, rsi ; BLOCKD *d
+ %endif
+ %endif
+
+ mov rdx, [rdi + vp8_block_coeff] ; coeff_ptr
+ mov rcx, [rdi + vp8_block_zbin] ; zbin_ptr
+ movd xmm7, [rdi + vp8_block_zbin_extra] ; zbin_oq_value
+
+ ; z
+ movdqa xmm0, [rdx]
+ movdqa xmm4, [rdx + 16]
+ mov rdx, [rdi + vp8_block_round] ; round_ptr
+
+ pshuflw xmm7, xmm7, 0
+ punpcklwd xmm7, xmm7 ; duplicated zbin_oq_value
+
+ movdqa xmm1, xmm0
+ movdqa xmm5, xmm4
+
+ ; sz
+ psraw xmm0, 15
+ psraw xmm4, 15
+
+ ; (z ^ sz)
+ pxor xmm1, xmm0
+ pxor xmm5, xmm4
+
+ ; x = abs(z)
+ psubw xmm1, xmm0
+ psubw xmm5, xmm4
+
+ movdqa xmm2, [rcx]
+ movdqa xmm3, [rcx + 16]
+ mov rcx, [rdi + vp8_block_quant] ; quant_ptr
+
+ ; *zbin_ptr + zbin_oq_value
+ paddw xmm2, xmm7
+ paddw xmm3, xmm7
+
+ ; x - (*zbin_ptr + zbin_oq_value)
+ psubw xmm1, xmm2
+ psubw xmm5, xmm3
+ movdqa [rsp + abs_minus_zbin], xmm1
+ movdqa [rsp + abs_minus_zbin + 16], xmm5
+
+ ; add (zbin_ptr + zbin_oq_value) back
+ paddw xmm1, xmm2
+ paddw xmm5, xmm3
+
+ movdqa xmm2, [rdx]
+ movdqa xmm6, [rdx + 16]
+
+ movdqa xmm3, [rcx]
+ movdqa xmm7, [rcx + 16]
+
+ ; x + round
+ paddw xmm1, xmm2
+ paddw xmm5, xmm6
+
+ ; y = x * quant_ptr >> 16
+ pmulhw xmm3, xmm1
+ pmulhw xmm7, xmm5
+
+ ; y += x
+ paddw xmm1, xmm3
+ paddw xmm5, xmm7
+
+ movdqa [rsp + temp_qcoeff], xmm1
+ movdqa [rsp + temp_qcoeff + 16], xmm5
+
+ pxor xmm6, xmm6
+ ; zero qcoeff
+ movdqa [rsp + qcoeff], xmm6
+ movdqa [rsp + qcoeff + 16], xmm6
+
+ mov rdx, [rdi + vp8_block_zrun_zbin_boost] ; zbin_boost_ptr
+ mov rax, [rdi + vp8_block_quant_shift] ; quant_shift_ptr
+ mov [rsp + zrun_zbin_boost], rdx
+
+ %macro ZIGZAG_LOOP 1
+ ; x
+ movsx ecx, WORD PTR[rsp + abs_minus_zbin + %1 * 2]
+
+ ; if (x >= zbin)
+ sub cx, WORD PTR[rdx] ; x - zbin
+ lea rdx, [rdx + 2] ; zbin_boost_ptr++
+ jl .rq_zigzag_loop_%1 ; x < zbin
+
+ movsx edi, WORD PTR[rsp + temp_qcoeff + %1 * 2]
+
+ ; downshift by quant_shift[rc]
+ movsx cx, BYTE PTR[rax + %1] ; quant_shift_ptr[rc]
+ sar edi, cl ; also sets Z bit
+ je .rq_zigzag_loop_%1 ; !y
+ mov WORD PTR[rsp + qcoeff + %1 * 2], di ;qcoeff_ptr[rc] = temp_qcoeff[rc]
+ mov rdx, [rsp + zrun_zbin_boost] ; reset to b->zrun_zbin_boost
+ .rq_zigzag_loop_%1:
+ %endmacro
+ ; in vp8_default_zig_zag1d order: see vp8/common/entropy.c
+ ZIGZAG_LOOP 0
+ ZIGZAG_LOOP 1
+ ZIGZAG_LOOP 4
+ ZIGZAG_LOOP 8
+ ZIGZAG_LOOP 5
+ ZIGZAG_LOOP 2
+ ZIGZAG_LOOP 3
+ ZIGZAG_LOOP 6
+ ZIGZAG_LOOP 9
+ ZIGZAG_LOOP 12
+ ZIGZAG_LOOP 13
+ ZIGZAG_LOOP 10
+ ZIGZAG_LOOP 7
+ ZIGZAG_LOOP 11
+ ZIGZAG_LOOP 14
+ ZIGZAG_LOOP 15
+
+ movdqa xmm2, [rsp + qcoeff]
+ movdqa xmm3, [rsp + qcoeff + 16]
+
+ mov rcx, [rsi + vp8_blockd_dequant] ; dequant_ptr
+ mov rdi, [rsi + vp8_blockd_dqcoeff] ; dqcoeff_ptr
+
+ ; y ^ sz
+ pxor xmm2, xmm0
+ pxor xmm3, xmm4
+ ; x = (y ^ sz) - sz
+ psubw xmm2, xmm0
+ psubw xmm3, xmm4
+
+ ; dequant
+ movdqa xmm0, [rcx]
+ movdqa xmm1, [rcx + 16]
+
+ mov rcx, [rsi + vp8_blockd_qcoeff] ; qcoeff_ptr
+
+ pmullw xmm0, xmm2
+ pmullw xmm1, xmm3
+
+ movdqa [rcx], xmm2 ; store qcoeff
+ movdqa [rcx + 16], xmm3
+ movdqa [rdi], xmm0 ; store dqcoeff
+ movdqa [rdi + 16], xmm1
+
+ mov rcx, [rsi + vp8_blockd_eob]
+
+ ; select the last value (in zig_zag order) for EOB
+ pcmpeqw xmm2, xmm6
+ pcmpeqw xmm3, xmm6
+ ; !
+ pcmpeqw xmm6, xmm6
+ pxor xmm2, xmm6
+ pxor xmm3, xmm6
+ ; mask inv_zig_zag
+ pand xmm2, [GLOBAL(inv_zig_zag)]
+ pand xmm3, [GLOBAL(inv_zig_zag + 16)]
+ ; select the max value
+ pmaxsw xmm2, xmm3
+ pshufd xmm3, xmm2, 00001110b
+ pmaxsw xmm2, xmm3
+ pshuflw xmm3, xmm2, 00001110b
+ pmaxsw xmm2, xmm3
+ pshuflw xmm3, xmm2, 00000001b
+ pmaxsw xmm2, xmm3
+ movd eax, xmm2
+ and eax, 0xff
+
+ mov BYTE PTR [rcx], al ; store eob
+
+ ; begin epilog
+ add rsp, stack_size
+ pop rsp
+ %if ABI_IS_32BIT
+ pop rsi
+ pop rdi
+ %else
+ %ifidn __OUTPUT_FORMAT__,x64
+ pop rsi
+ pop rdi
+ %endif
+ %endif
+ RESTORE_GOT
+ RESTORE_XMM
+ pop rbp
+ ret
+
+ ; void vp8_fast_quantize_b_sse2 | arg
+ ; (BLOCK *b, | 0
+ ; BLOCKD *d) | 1
+
+ global sym(vp8_fast_quantize_b_sse2) PRIVATE
+ sym(vp8_fast_quantize_b_sse2):
+ push rbp
+ mov rbp, rsp
+ GET_GOT rbx
+
+ %if ABI_IS_32BIT
+ push rdi
+ push rsi
+ %else
+ %ifidn __OUTPUT_FORMAT__,x64
+ push rdi
+ push rsi
+ %else
+ ; these registers are used for passing arguments
+ %endif
+ %endif
+
+ ; end prolog
+
+ %if ABI_IS_32BIT
+ mov rdi, arg(0) ; BLOCK *b
+ mov rsi, arg(1) ; BLOCKD *d
+ %else
+ %ifidn __OUTPUT_FORMAT__,x64
+ mov rdi, rcx ; BLOCK *b
+ mov rsi, rdx ; BLOCKD *d
+ %else
+ ;mov rdi, rdi ; BLOCK *b
+ ;mov rsi, rsi ; BLOCKD *d
+ %endif
+ %endif
+
+ mov rax, [rdi + vp8_block_coeff]
+ mov rcx, [rdi + vp8_block_round]
+ mov rdx, [rdi + vp8_block_quant_fast]
+
+ ; z = coeff
+ movdqa xmm0, [rax]
+ movdqa xmm4, [rax + 16]
+
+ ; dup z so we can save sz
+ movdqa xmm1, xmm0
+ movdqa xmm5, xmm4
+
+ ; sz = z >> 15
+ psraw xmm0, 15
+ psraw xmm4, 15
+
+ ; x = abs(z) = (z ^ sz) - sz
+ pxor xmm1, xmm0
+ pxor xmm5, xmm4
+ psubw xmm1, xmm0
+ psubw xmm5, xmm4
+
+ ; x += round
+ paddw xmm1, [rcx]
+ paddw xmm5, [rcx + 16]
+
+ mov rax, [rsi + vp8_blockd_qcoeff]
+ mov rcx, [rsi + vp8_blockd_dequant]
+ mov rdi, [rsi + vp8_blockd_dqcoeff]
+
+ ; y = x * quant >> 16
+ pmulhw xmm1, [rdx]
+ pmulhw xmm5, [rdx + 16]
+
+ ; x = (y ^ sz) - sz
+ pxor xmm1, xmm0
+ pxor xmm5, xmm4
+ psubw xmm1, xmm0
+ psubw xmm5, xmm4
+
+ ; qcoeff = x
+ movdqa [rax], xmm1
+ movdqa [rax + 16], xmm5
+
+ ; x * dequant
+ movdqa xmm2, xmm1
+ movdqa xmm3, xmm5
+ pmullw xmm2, [rcx]
+ pmullw xmm3, [rcx + 16]
+
+ ; dqcoeff = x * dequant
+ movdqa [rdi], xmm2
+ movdqa [rdi + 16], xmm3
+
+ pxor xmm4, xmm4 ;clear all bits
+ pcmpeqw xmm1, xmm4
+ pcmpeqw xmm5, xmm4
+
+ pcmpeqw xmm4, xmm4 ;set all bits
+ pxor xmm1, xmm4
+ pxor xmm5, xmm4
+
+ pand xmm1, [GLOBAL(inv_zig_zag)]
+ pand xmm5, [GLOBAL(inv_zig_zag + 16)]
+
+ pmaxsw xmm1, xmm5
+
+ mov rcx, [rsi + vp8_blockd_eob]
+
+ ; now down to 8
+ pshufd xmm5, xmm1, 00001110b
+
+ pmaxsw xmm1, xmm5
+
+ ; only 4 left
+ pshuflw xmm5, xmm1, 00001110b
+
+ pmaxsw xmm1, xmm5
+
+ ; okay, just 2!
+ pshuflw xmm5, xmm1, 00000001b
+
+ pmaxsw xmm1, xmm5
+
+ movd eax, xmm1
+ and eax, 0xff
+
+ mov BYTE PTR [rcx], al ; store eob
+
+ ; begin epilog
+ %if ABI_IS_32BIT
+ pop rsi
+ pop rdi
+ %else
+ %ifidn __OUTPUT_FORMAT__,x64
+ pop rsi
+ pop rdi
+ %endif
+ %endif
+
+ RESTORE_GOT
+ pop rbp
+ ret
+
+ SECTION_RODATA
+ align 16
+ inv_zig_zag:
+ dw 0x0001, 0x0002, 0x0006, 0x0007
+ dw 0x0003, 0x0005, 0x0008, 0x000d
+ dw 0x0004, 0x0009, 0x000c, 0x000e
+ dw 0x000a, 0x000b, 0x000f, 0x0010
--- /dev/null
-%include "asm_enc_offsets.asm"
+ ;
+ ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ ;
+ ; Use of this source code is governed by a BSD-style license and patent
+ ; grant that can be found in the LICENSE file in the root of the source
+ ; tree. All contributing project authors may be found in the AUTHORS
+ ; file in the root of the source tree.
+ ;
+
+
+ %include "vpx_ports/x86_abi_support.asm"
++%include "vp8_asm_enc_offsets.asm"
+
+
+ ; void vp8_regular_quantize_b_sse4 | arg
+ ; (BLOCK *b, | 0
+ ; BLOCKD *d) | 1
+
+ global sym(vp8_regular_quantize_b_sse4) PRIVATE
+ sym(vp8_regular_quantize_b_sse4):
+
+ %if ABI_IS_32BIT
+ push rbp
+ mov rbp, rsp
+ GET_GOT rbx
+ push rdi
+ push rsi
+
+ ALIGN_STACK 16, rax
+ %define qcoeff 0 ; 32
+ %define stack_size 32
+ sub rsp, stack_size
+ %else
+ %ifidn __OUTPUT_FORMAT__,x64
+ SAVE_XMM 8, u
+ push rdi
+ push rsi
+ %endif
+ %endif
+ ; end prolog
+
+ %if ABI_IS_32BIT
+ mov rdi, arg(0) ; BLOCK *b
+ mov rsi, arg(1) ; BLOCKD *d
+ %else
+ %ifidn __OUTPUT_FORMAT__,x64
+ mov rdi, rcx ; BLOCK *b
+ mov rsi, rdx ; BLOCKD *d
+ %else
+ ;mov rdi, rdi ; BLOCK *b
+ ;mov rsi, rsi ; BLOCKD *d
+ %endif
+ %endif
+
+ mov rax, [rdi + vp8_block_coeff]
+ mov rcx, [rdi + vp8_block_zbin]
+ mov rdx, [rdi + vp8_block_round]
+ movd xmm7, [rdi + vp8_block_zbin_extra]
+
+ ; z
+ movdqa xmm0, [rax]
+ movdqa xmm1, [rax + 16]
+
+ ; duplicate zbin_oq_value
+ pshuflw xmm7, xmm7, 0
+ punpcklwd xmm7, xmm7
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+
+ ; sz
+ psraw xmm0, 15
+ psraw xmm1, 15
+
+ ; (z ^ sz)
+ pxor xmm2, xmm0
+ pxor xmm3, xmm1
+
+ ; x = abs(z)
+ psubw xmm2, xmm0
+ psubw xmm3, xmm1
+
+ ; zbin
+ movdqa xmm4, [rcx]
+ movdqa xmm5, [rcx + 16]
+
+ ; *zbin_ptr + zbin_oq_value
+ paddw xmm4, xmm7
+ paddw xmm5, xmm7
+
+ movdqa xmm6, xmm2
+ movdqa xmm7, xmm3
+
+ ; x - (*zbin_ptr + zbin_oq_value)
+ psubw xmm6, xmm4
+ psubw xmm7, xmm5
+
+ ; round
+ movdqa xmm4, [rdx]
+ movdqa xmm5, [rdx + 16]
+
+ mov rax, [rdi + vp8_block_quant_shift]
+ mov rcx, [rdi + vp8_block_quant]
+ mov rdx, [rdi + vp8_block_zrun_zbin_boost]
+
+ ; x + round
+ paddw xmm2, xmm4
+ paddw xmm3, xmm5
+
+ ; quant
+ movdqa xmm4, [rcx]
+ movdqa xmm5, [rcx + 16]
+
+ ; y = x * quant_ptr >> 16
+ pmulhw xmm4, xmm2
+ pmulhw xmm5, xmm3
+
+ ; y += x
+ paddw xmm2, xmm4
+ paddw xmm3, xmm5
+
+ pxor xmm4, xmm4
+ %if ABI_IS_32BIT
+ movdqa [rsp + qcoeff], xmm4
+ movdqa [rsp + qcoeff + 16], xmm4
+ %else
+ pxor xmm8, xmm8
+ %endif
+
+ ; quant_shift
+ movdqa xmm5, [rax]
+
+ ; zrun_zbin_boost
+ mov rax, rdx
+
+ %macro ZIGZAG_LOOP 5
+ ; x
+ pextrw ecx, %4, %2
+
+ ; if (x >= zbin)
+ sub cx, WORD PTR[rdx] ; x - zbin
+ lea rdx, [rdx + 2] ; zbin_boost_ptr++
+ jl .rq_zigzag_loop_%1 ; x < zbin
+
+ pextrw edi, %3, %2 ; y
+
+ ; downshift by quant_shift[rc]
+ pextrb ecx, xmm5, %1 ; quant_shift[rc]
+ sar edi, cl ; also sets Z bit
+ je .rq_zigzag_loop_%1 ; !y
+ %if ABI_IS_32BIT
+ mov WORD PTR[rsp + qcoeff + %1 *2], di
+ %else
+ pinsrw %5, edi, %2 ; qcoeff[rc]
+ %endif
+ mov rdx, rax ; reset to b->zrun_zbin_boost
+ .rq_zigzag_loop_%1:
+ %endmacro
+ ; in vp8_default_zig_zag1d order: see vp8/common/entropy.c
+ ZIGZAG_LOOP 0, 0, xmm2, xmm6, xmm4
+ ZIGZAG_LOOP 1, 1, xmm2, xmm6, xmm4
+ ZIGZAG_LOOP 4, 4, xmm2, xmm6, xmm4
+ ZIGZAG_LOOP 8, 0, xmm3, xmm7, xmm8
+ ZIGZAG_LOOP 5, 5, xmm2, xmm6, xmm4
+ ZIGZAG_LOOP 2, 2, xmm2, xmm6, xmm4
+ ZIGZAG_LOOP 3, 3, xmm2, xmm6, xmm4
+ ZIGZAG_LOOP 6, 6, xmm2, xmm6, xmm4
+ ZIGZAG_LOOP 9, 1, xmm3, xmm7, xmm8
+ ZIGZAG_LOOP 12, 4, xmm3, xmm7, xmm8
+ ZIGZAG_LOOP 13, 5, xmm3, xmm7, xmm8
+ ZIGZAG_LOOP 10, 2, xmm3, xmm7, xmm8
+ ZIGZAG_LOOP 7, 7, xmm2, xmm6, xmm4
+ ZIGZAG_LOOP 11, 3, xmm3, xmm7, xmm8
+ ZIGZAG_LOOP 14, 6, xmm3, xmm7, xmm8
+ ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8
+
+ mov rcx, [rsi + vp8_blockd_dequant]
+ mov rdi, [rsi + vp8_blockd_dqcoeff]
+
+ %if ABI_IS_32BIT
+ movdqa xmm4, [rsp + qcoeff]
+ movdqa xmm5, [rsp + qcoeff + 16]
+ %else
+ %define xmm5 xmm8
+ %endif
+
+ ; y ^ sz
+ pxor xmm4, xmm0
+ pxor xmm5, xmm1
+ ; x = (y ^ sz) - sz
+ psubw xmm4, xmm0
+ psubw xmm5, xmm1
+
+ ; dequant
+ movdqa xmm0, [rcx]
+ movdqa xmm1, [rcx + 16]
+
+ mov rcx, [rsi + vp8_blockd_qcoeff]
+
+ pmullw xmm0, xmm4
+ pmullw xmm1, xmm5
+
+ ; store qcoeff
+ movdqa [rcx], xmm4
+ movdqa [rcx + 16], xmm5
+
+ ; store dqcoeff
+ movdqa [rdi], xmm0
+ movdqa [rdi + 16], xmm1
+
+ mov rcx, [rsi + vp8_blockd_eob]
+
+ ; select the last value (in zig_zag order) for EOB
+ pxor xmm6, xmm6
+ pcmpeqw xmm4, xmm6
+ pcmpeqw xmm5, xmm6
+
+ packsswb xmm4, xmm5
+ pshufb xmm4, [GLOBAL(zig_zag1d)]
+ pmovmskb edx, xmm4
+ xor rdi, rdi
+ mov eax, -1
+ xor dx, ax
+ bsr eax, edx
+ sub edi, edx
+ sar edi, 31
+ add eax, 1
+ and eax, edi
+
+ mov BYTE PTR [rcx], al ; store eob
+
+ ; begin epilog
+ %if ABI_IS_32BIT
+ add rsp, stack_size
+ pop rsp
+
+ pop rsi
+ pop rdi
+ RESTORE_GOT
+ pop rbp
+ %else
+ %undef xmm5
+ %ifidn __OUTPUT_FORMAT__,x64
+ pop rsi
+ pop rdi
+ RESTORE_XMM
+ %endif
+ %endif
+
+ ret
+
+ SECTION_RODATA
+ align 16
+ ; vp8/common/entropy.c: vp8_default_zig_zag1d
+ zig_zag1d:
+ db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
--- /dev/null
-%include "asm_enc_offsets.asm"
+ ;
+ ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ ;
+ ; Use of this source code is governed by a BSD-style license and patent
+ ; grant that can be found in the LICENSE file in the root of the source
+ ; tree. All contributing project authors may be found in the AUTHORS
+ ; file in the root of the source tree.
+ ;
+
+
+ %include "vpx_ports/x86_abi_support.asm"
++%include "vp8_asm_enc_offsets.asm"
+
+
+ ; void vp8_fast_quantize_b_ssse3 | arg
+ ; (BLOCK *b, | 0
+ ; BLOCKD *d) | 1
+ ;
+
+ global sym(vp8_fast_quantize_b_ssse3) PRIVATE
+ sym(vp8_fast_quantize_b_ssse3):
+ push rbp
+ mov rbp, rsp
+ GET_GOT rbx
+
+ %if ABI_IS_32BIT
+ push rdi
+ push rsi
+ %else
+ %ifidn __OUTPUT_FORMAT__,x64
+ push rdi
+ push rsi
+ %endif
+ %endif
+ ; end prolog
+
+ %if ABI_IS_32BIT
+ mov rdi, arg(0) ; BLOCK *b
+ mov rsi, arg(1) ; BLOCKD *d
+ %else
+ %ifidn __OUTPUT_FORMAT__,x64
+ mov rdi, rcx ; BLOCK *b
+ mov rsi, rdx ; BLOCKD *d
+ %else
+ ;mov rdi, rdi ; BLOCK *b
+ ;mov rsi, rsi ; BLOCKD *d
+ %endif
+ %endif
+
+ mov rax, [rdi + vp8_block_coeff]
+ mov rcx, [rdi + vp8_block_round]
+ mov rdx, [rdi + vp8_block_quant_fast]
+
+ ; coeff
+ movdqa xmm0, [rax]
+ movdqa xmm4, [rax + 16]
+
+ ; round
+ movdqa xmm2, [rcx]
+ movdqa xmm3, [rcx + 16]
+
+ movdqa xmm1, xmm0
+ movdqa xmm5, xmm4
+
+ ; sz = z >> 15
+ psraw xmm0, 15
+ psraw xmm4, 15
+
+ pabsw xmm1, xmm1
+ pabsw xmm5, xmm5
+
+ paddw xmm1, xmm2
+ paddw xmm5, xmm3
+
+ ; quant_fast
+ pmulhw xmm1, [rdx]
+ pmulhw xmm5, [rdx + 16]
+
+ mov rax, [rsi + vp8_blockd_qcoeff]
+ mov rdi, [rsi + vp8_blockd_dequant]
+ mov rcx, [rsi + vp8_blockd_dqcoeff]
+
+ movdqa xmm2, xmm1 ;store y for getting eob
+ movdqa xmm3, xmm5
+
+ pxor xmm1, xmm0
+ pxor xmm5, xmm4
+ psubw xmm1, xmm0
+ psubw xmm5, xmm4
+
+ movdqa [rax], xmm1
+ movdqa [rax + 16], xmm5
+
+ movdqa xmm0, [rdi]
+ movdqa xmm4, [rdi + 16]
+
+ pmullw xmm0, xmm1
+ pmullw xmm4, xmm5
+ pxor xmm1, xmm1
+
+ pcmpgtw xmm2, xmm1 ;calculate eob
+ pcmpgtw xmm3, xmm1
+ packsswb xmm2, xmm3
+ pshufb xmm2, [GLOBAL(zz_shuf)]
+
+ pmovmskb edx, xmm2
+
+ movdqa [rcx], xmm0 ;store dqcoeff
+ movdqa [rcx + 16], xmm4 ;store dqcoeff
+ mov rcx, [rsi + vp8_blockd_eob]
+
+ bsr eax, edx ;count 0
+ add eax, 1
+
+ cmp edx, 0 ;if all 0, eob=0
+ cmove eax, edx
+
+ mov BYTE PTR [rcx], al ;store eob
+
+ ; begin epilog
+ %if ABI_IS_32BIT
+ pop rsi
+ pop rdi
+ %else
+ %ifidn __OUTPUT_FORMAT__,x64
+ pop rsi
+ pop rdi
+ %endif
+ %endif
+
+ RESTORE_GOT
+ pop rbp
+ ret
+
+ SECTION_RODATA
+ align 16
+ zz_shuf:
+ db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
--- /dev/null
+ ##
+ ## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ ##
+ ## Use of this source code is governed by a BSD-style license
+ ## that can be found in the LICENSE file in the root of the source
+ ## tree. An additional intellectual property rights grant can be found
+ ## in the file PATENTS. All contributing project authors may
+ ## be found in the AUTHORS file in the root of the source tree.
+ ##
+
+ VP8_COMMON_SRCS-yes += vp8_common.mk
+ VP8_COMMON_SRCS-yes += common/pragmas.h
+ VP8_COMMON_SRCS-yes += common/ppflags.h
+ VP8_COMMON_SRCS-yes += common/onyx.h
+ VP8_COMMON_SRCS-yes += common/onyxd.h
+ VP8_COMMON_SRCS-yes += common/alloccommon.c
+ VP8_COMMON_SRCS-yes += common/asm_com_offsets.c
+ VP8_COMMON_SRCS-yes += common/blockd.c
+ VP8_COMMON_SRCS-yes += common/coefupdateprobs.h
+ VP8_COMMON_SRCS-yes += common/debugmodes.c
+ VP8_COMMON_SRCS-yes += common/default_coef_probs.h
+ VP8_COMMON_SRCS-yes += common/dequantize.c
+ VP8_COMMON_SRCS-yes += common/entropy.c
+ VP8_COMMON_SRCS-yes += common/entropymode.c
+ VP8_COMMON_SRCS-yes += common/entropymv.c
+ VP8_COMMON_SRCS-yes += common/extend.c
+ VP8_COMMON_SRCS-yes += common/filter.c
+ VP8_COMMON_SRCS-yes += common/filter.h
+ VP8_COMMON_SRCS-yes += common/findnearmv.c
+ VP8_COMMON_SRCS-yes += common/generic/systemdependent.c
+ VP8_COMMON_SRCS-yes += common/idct_blk.c
+ VP8_COMMON_SRCS-yes += common/idctllm.c
+ VP8_COMMON_SRCS-yes += common/alloccommon.h
+ VP8_COMMON_SRCS-yes += common/blockd.h
+ VP8_COMMON_SRCS-yes += common/common.h
+ VP8_COMMON_SRCS-yes += common/entropy.h
+ VP8_COMMON_SRCS-yes += common/entropymode.h
+ VP8_COMMON_SRCS-yes += common/entropymv.h
+ VP8_COMMON_SRCS-yes += common/extend.h
+ VP8_COMMON_SRCS-yes += common/findnearmv.h
+ VP8_COMMON_SRCS-yes += common/header.h
+ VP8_COMMON_SRCS-yes += common/invtrans.h
+ VP8_COMMON_SRCS-yes += common/loopfilter.h
+ VP8_COMMON_SRCS-yes += common/modecont.h
+ VP8_COMMON_SRCS-yes += common/mv.h
+ VP8_COMMON_SRCS-yes += common/onyxc_int.h
+ VP8_COMMON_SRCS-yes += common/quant_common.h
+ VP8_COMMON_SRCS-yes += common/reconinter.h
+ VP8_COMMON_SRCS-yes += common/reconintra4x4.h
+ VP8_COMMON_SRCS-yes += common/rtcd.c
+ VP8_COMMON_SRCS-yes += common/rtcd_defs.sh
+ VP8_COMMON_SRCS-yes += common/setupintrarecon.h
+ VP8_COMMON_SRCS-yes += common/swapyv12buffer.h
+ VP8_COMMON_SRCS-yes += common/systemdependent.h
+ VP8_COMMON_SRCS-yes += common/threading.h
+ VP8_COMMON_SRCS-yes += common/treecoder.h
+ VP8_COMMON_SRCS-yes += common/loopfilter.c
+ VP8_COMMON_SRCS-yes += common/loopfilter_filters.c
+ VP8_COMMON_SRCS-yes += common/mbpitch.c
+ VP8_COMMON_SRCS-yes += common/modecont.c
+ VP8_COMMON_SRCS-yes += common/quant_common.c
+ VP8_COMMON_SRCS-yes += common/reconinter.c
+ VP8_COMMON_SRCS-yes += common/reconintra.c
+ VP8_COMMON_SRCS-yes += common/reconintra4x4.c
+ VP8_COMMON_SRCS-yes += common/sad_c.c
+ VP8_COMMON_SRCS-yes += common/setupintrarecon.c
+ VP8_COMMON_SRCS-yes += common/swapyv12buffer.c
+ VP8_COMMON_SRCS-yes += common/variance_c.c
+ VP8_COMMON_SRCS-yes += common/variance.h
+ VP8_COMMON_SRCS-yes += common/vp8_entropymodedata.h
+
+
+
+ VP8_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/textblit.c
+ VP8_COMMON_SRCS-yes += common/treecoder.c
+
+ VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.c
+ VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.h
+ VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp8_asm_stubs.c
+ VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/loopfilter_x86.c
+ VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/mfqe.c
+ VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.h
+ VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c
+ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/dequantize_mmx.asm
+ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idct_blk_mmx.c
+ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx.asm
+ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm
+ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm
+ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm
+ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/sad_mmx.asm
+ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/subpixel_mmx.asm
+ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/variance_mmx.c
+ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/variance_impl_mmx.asm
+ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_blk_sse2.c
+ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm
+ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm
+ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_wrapper_sse2.c
+ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/sad_sse2.asm
+ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm
+ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm
+ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm
+ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/variance_sse2.c
+ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/variance_impl_sse2.asm
+ VP8_COMMON_SRCS-$(HAVE_SSE3) += common/x86/sad_sse3.asm
+ VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/sad_ssse3.asm
+ VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/subpixel_ssse3.asm
+ VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/variance_ssse3.c
+ VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/variance_impl_ssse3.asm
+ VP8_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/sad_sse4.asm
+
+ ifeq ($(CONFIG_POSTPROC),yes)
+ VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/postproc_x86.c
+ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm
+ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/mfqe_sse2.asm
+ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
+ endif
+
+ ifeq ($(ARCH_X86_64),yes)
+ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_block_sse2.asm
+ endif
+
+ # common (c)
+ VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/idctllm_dspr2.c
+ VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/filter_dspr2.c
+ VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/loopfilter_filters_dspr2.c
+ VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/reconinter_dspr2.c
+ VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/idct_blk_dspr2.c
+ VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/dequantize_dspr2.c
+
+ # common (c)
+ VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/filter_arm.c
+ VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/loopfilter_arm.c
+ VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/reconintra_arm.c
+ VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/dequantize_arm.c
+ VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/variance_arm.c
+
+ # common (media)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/bilinearfilter_arm.c
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/bilinearfilter_arm.h
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/bilinearfilter_v6$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/copymem8x4_v6$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/copymem8x8_v6$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/copymem16x16_v6$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/dc_only_idct_add_v6$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/iwalsh_v6$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/filter_v6$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/idct_v6$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/loopfilter_v6$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/simpleloopfilter_v6$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/sixtappredict8x4_v6$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/intra4x4_predict_v6$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/dequant_idct_v6$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/dequantize_v6$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/idct_blk_v6.c
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_sad16x16_armv6$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance8x8_armv6$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance16x16_armv6$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6$(ASM)
+
+ # common (neon)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict4x4_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict8x4_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict8x8_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict16x16_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem8x4_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem8x8_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem16x16_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dc_only_idct_add_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iwalsh_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfilter_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimplehorizontaledge_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/mbloopfilter_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/shortidct4x4llm_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sad8_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sad16_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict4x4_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict8x4_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict8x8_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict16x16_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/save_reg_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequant_idct_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_full_2x_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_0_2x_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequantizeb_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_blk_neon.c
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/variance_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance8x8_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16_neon$(ASM)
+ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)
++
++$(eval $(call asm_offsets_template,\
++ vp8_asm_com_offsets.asm, $(VP8_PREFIX)common/asm_com_offsets.c))
--- /dev/null
-include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
-
+ ##
+ ## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ ##
+ ## Use of this source code is governed by a BSD-style license
+ ## that can be found in the LICENSE file in the root of the source
+ ## tree. An additional intellectual property rights grant can be found
+ ## in the file PATENTS. All contributing project authors may
+ ## be found in the AUTHORS file in the root of the source tree.
+ ##
+
+
+ VP8_CX_EXPORTS += exports_enc
+
+ VP8_CX_SRCS-yes += $(VP8_COMMON_SRCS-yes)
+ VP8_CX_SRCS-no += $(VP8_COMMON_SRCS-no)
+ VP8_CX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes)
+ VP8_CX_SRCS_REMOVE-no += $(VP8_COMMON_SRCS_REMOVE-no)
+
+ ifeq ($(ARCH_ARM),yes)
+ include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx_arm.mk
+ endif
+
+ VP8_CX_SRCS-yes += vp8_cx_iface.c
+
+ # encoder
+ #INCLUDES += algo/vpx_common/vpx_mem/include
+ #INCLUDES += common
+ #INCLUDES += common
+ #INCLUDES += common
+ #INCLUDES += algo/vpx_ref/cpu_id/include
+ #INCLUDES += common
+ #INCLUDES += encoder
+
+ VP8_CX_SRCS-yes += encoder/asm_enc_offsets.c
+ VP8_CX_SRCS-yes += encoder/defaultcoefcounts.h
+ VP8_CX_SRCS-yes += encoder/bitstream.c
+ VP8_CX_SRCS-yes += encoder/boolhuff.c
+ VP8_CX_SRCS-yes += encoder/dct.c
+ VP8_CX_SRCS-yes += encoder/encodeframe.c
+ VP8_CX_SRCS-yes += encoder/encodeframe.h
+ VP8_CX_SRCS-yes += encoder/encodeintra.c
+ VP8_CX_SRCS-yes += encoder/encodemb.c
+ VP8_CX_SRCS-yes += encoder/encodemv.c
+ VP8_CX_SRCS-$(CONFIG_MULTITHREAD) += encoder/ethreading.c
+ VP8_CX_SRCS-yes += encoder/firstpass.c
+ VP8_CX_SRCS-yes += encoder/block.h
+ VP8_CX_SRCS-yes += encoder/boolhuff.h
+ VP8_CX_SRCS-yes += encoder/bitstream.h
+ VP8_CX_SRCS-$(CONFIG_TEMPORAL_DENOISING) += encoder/denoising.h
+ VP8_CX_SRCS-$(CONFIG_TEMPORAL_DENOISING) += encoder/denoising.c
+ VP8_CX_SRCS-yes += encoder/encodeintra.h
+ VP8_CX_SRCS-yes += encoder/encodemb.h
+ VP8_CX_SRCS-yes += encoder/encodemv.h
+ VP8_CX_SRCS-yes += encoder/firstpass.h
+ VP8_CX_SRCS-yes += encoder/lookahead.c
+ VP8_CX_SRCS-yes += encoder/lookahead.h
+ VP8_CX_SRCS-yes += encoder/mcomp.h
+ VP8_CX_SRCS-yes += encoder/modecosts.h
+ VP8_CX_SRCS-yes += encoder/onyx_int.h
+ VP8_CX_SRCS-yes += encoder/pickinter.h
+ VP8_CX_SRCS-yes += encoder/psnr.h
+ VP8_CX_SRCS-yes += encoder/quantize.h
+ VP8_CX_SRCS-yes += encoder/ratectrl.h
+ VP8_CX_SRCS-yes += encoder/rdopt.h
+ VP8_CX_SRCS-yes += encoder/tokenize.h
+ VP8_CX_SRCS-yes += encoder/treewriter.h
+ VP8_CX_SRCS-yes += encoder/mcomp.c
+ VP8_CX_SRCS-yes += encoder/modecosts.c
+ VP8_CX_SRCS-yes += encoder/onyx_if.c
+ VP8_CX_SRCS-yes += encoder/pickinter.c
+ VP8_CX_SRCS-yes += encoder/picklpf.c
+ VP8_CX_SRCS-yes += encoder/psnr.c
+ VP8_CX_SRCS-yes += encoder/quantize.c
+ VP8_CX_SRCS-yes += encoder/ratectrl.c
+ VP8_CX_SRCS-yes += encoder/rdopt.c
+ VP8_CX_SRCS-yes += encoder/segmentation.c
+ VP8_CX_SRCS-yes += encoder/segmentation.h
+ VP8_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/ssim.c
+ VP8_CX_SRCS-yes += encoder/tokenize.c
+ VP8_CX_SRCS-yes += encoder/dct_value_cost.h
+ VP8_CX_SRCS-yes += encoder/dct_value_tokens.h
+ VP8_CX_SRCS-yes += encoder/treewriter.c
+ VP8_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.h
+ VP8_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.c
+ VP8_CX_SRCS-yes += encoder/temporal_filter.c
+ VP8_CX_SRCS-$(CONFIG_MULTI_RES_ENCODING) += encoder/mr_dissim.c
+ VP8_CX_SRCS-$(CONFIG_MULTI_RES_ENCODING) += encoder/mr_dissim.h
+
+ ifeq ($(CONFIG_REALTIME_ONLY),yes)
+ VP8_CX_SRCS_REMOVE-yes += encoder/firstpass.c
+ VP8_CX_SRCS_REMOVE-yes += encoder/temporal_filter.c
+ endif
+
+ VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/dct_mmx.asm
+ VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/subtract_mmx.asm
+ VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp8_enc_stubs_mmx.c
+ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
+ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
+ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
+
+ ifeq ($(CONFIG_TEMPORAL_DENOISING),yes)
+ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c
+ ifeq ($(HAVE_SSE2),yes)
+ vp8/encoder/x86/denoising_sse2.c.o: CFLAGS += -msse2
+ endif
+ endif
+
+ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
+ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
+ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c
+ VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.asm
+ VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/quantize_sse4.asm
+ VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
+ VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm
+ VP8_CX_SRCS-$(ARCH_X86_64) += encoder/x86/ssim_opt.asm
+
+ ifeq ($(CONFIG_REALTIME_ONLY),yes)
+ VP8_CX_SRCS_REMOVE-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
+ endif
+
+
+ VP8_CX_SRCS-yes := $(filter-out $(VP8_CX_SRCS_REMOVE-yes),$(VP8_CX_SRCS-yes))
++
++$(eval $(call asm_offsets_template,\
++ vp8_asm_enc_offsets.asm, $(VP8_PREFIX)encoder/asm_enc_offsets.c))
--- /dev/null
-include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
-
+ ##
+ ## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ ##
+ ## Use of this source code is governed by a BSD-style license
+ ## that can be found in the LICENSE file in the root of the source
+ ## tree. An additional intellectual property rights grant can be found
+ ## in the file PATENTS. All contributing project authors may
+ ## be found in the AUTHORS file in the root of the source tree.
+ ##
+
+
+ VP8_DX_EXPORTS += exports_dec
+
+ VP8_DX_SRCS-yes += $(VP8_COMMON_SRCS-yes)
+ VP8_DX_SRCS-no += $(VP8_COMMON_SRCS-no)
+ VP8_DX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes)
+ VP8_DX_SRCS_REMOVE-no += $(VP8_COMMON_SRCS_REMOVE-no)
+
+ VP8_DX_SRCS-yes += vp8_dx_iface.c
+
+ # common
+ #define ARM
+ #define DISABLE_THREAD
+
+ #INCLUDES += algo/vpx_common/vpx_mem/include
+ #INCLUDES += common
+ #INCLUDES += common
+ #INCLUDES += common
+ #INCLUDES += common
+ #INCLUDES += decoder
+
+
+
+ # decoder
+ #define ARM
+ #define DISABLE_THREAD
+
+ #INCLUDES += algo/vpx_common/vpx_mem/include
+ #INCLUDES += common
+ #INCLUDES += common
+ #INCLUDES += common
+ #INCLUDES += common
+ #INCLUDES += decoder
+
+ VP8_DX_SRCS-yes += decoder/asm_dec_offsets.c
+ VP8_DX_SRCS-yes += decoder/dboolhuff.c
+ VP8_DX_SRCS-yes += decoder/decodemv.c
+ VP8_DX_SRCS-yes += decoder/decodframe.c
+ VP8_DX_SRCS-yes += decoder/detokenize.c
+ VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/ec_types.h
+ VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/error_concealment.h
+ VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/error_concealment.c
+ VP8_DX_SRCS-yes += decoder/dboolhuff.h
+ VP8_DX_SRCS-yes += decoder/decodemv.h
+ VP8_DX_SRCS-yes += decoder/decoderthreading.h
+ VP8_DX_SRCS-yes += decoder/detokenize.h
+ VP8_DX_SRCS-yes += decoder/onyxd_int.h
+ VP8_DX_SRCS-yes += decoder/treereader.h
+ VP8_DX_SRCS-yes += decoder/onyxd_if.c
+ VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/threading.c
+
+ VP8_DX_SRCS-yes := $(filter-out $(VP8_DX_SRCS_REMOVE-yes),$(VP8_DX_SRCS-yes))
++
++$(eval $(call asm_offsets_template,\
++ vp8_asm_dec_offsets.asm, $(VP8_PREFIX)decoder/asm_dec_offsets.c))
--- /dev/null
- if [ "CONFIG_SPATIAL_RESAMPLING" != "yes" ]; then
+common_forward_decls() {
+cat <<EOF
+
+struct loop_filter_info;
+struct blockd;
+struct macroblockd;
+struct loop_filter_info;
+
+/* Encoder forward decls */
+struct block;
+struct macroblock;
+struct variance_vtable;
+
+/* Encoder forward decls */
+struct variance_vtable;
+union int_mv;
+struct yv12_buffer_config;
+EOF
+}
+forward_decls common_forward_decls
+
+prototype void vp9_filter_block2d_4x4_8 "const unsigned char *src_ptr, const unsigned int src_stride, const short *HFilter_aligned16, const short *VFilter_aligned16, unsigned char *dst_ptr, unsigned int dst_stride"
+prototype void vp9_filter_block2d_8x4_8 "const unsigned char *src_ptr, const unsigned int src_stride, const short *HFilter_aligned16, const short *VFilter_aligned16, unsigned char *dst_ptr, unsigned int dst_stride"
+prototype void vp9_filter_block2d_8x8_8 "const unsigned char *src_ptr, const unsigned int src_stride, const short *HFilter_aligned16, const short *VFilter_aligned16, unsigned char *dst_ptr, unsigned int dst_stride"
+prototype void vp9_filter_block2d_16x16_8 "const unsigned char *src_ptr, const unsigned int src_stride, const short *HFilter_aligned16, const short *VFilter_aligned16, unsigned char *dst_ptr, unsigned int dst_stride"
+
+# At the very least, MSVC 2008 has compiler bug exhibited by this code; code
+# compiles warning free but a dissassembly of generated code show bugs. To be
+# on the safe side, only enabled when compiled with 'gcc'.
+if [ "$CONFIG_GCC" = "yes" ]; then
+ specialize vp9_filter_block2d_4x4_8 sse4_1 sse2
+fi
+ specialize vp9_filter_block2d_8x4_8 ssse3 #sse4_1 sse2
+ specialize vp9_filter_block2d_8x8_8 ssse3 #sse4_1 sse2
+ specialize vp9_filter_block2d_16x16_8 ssse3 #sse4_1 sse2
+
+#
+# Dequant
+#
+prototype void vp9_dequantize_b "struct blockd *x"
+specialize vp9_dequantize_b mmx
+
+prototype void vp9_dequantize_b_2x2 "struct blockd *x"
+specialize vp9_dequantize_b_2x2
+
+prototype void vp9_dequant_dc_idct_add_y_block_8x8 "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, char *eobs, short *dc, struct macroblockd *xd"
+specialize vp9_dequant_dc_idct_add_y_block_8x8
+
+prototype void vp9_dequant_idct_add_y_block_8x8 "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, char *eobs, struct macroblockd *xd"
+specialize vp9_dequant_idct_add_y_block_8x8
+
+prototype void vp9_dequant_idct_add_uv_block_8x8 "short *q, short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, int stride, char *eobs, struct macroblockd *xd"
+specialize vp9_dequant_idct_add_uv_block_8x8
+
+prototype void vp9_dequant_idct_add_16x16 "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride"
+specialize vp9_dequant_idct_add_16x16
+
+prototype void vp9_dequant_idct_add "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride"
+specialize vp9_dequant_idct_add
+
+prototype void vp9_dequant_dc_idct_add "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc"
+specialize vp9_dequant_dc_idct_add
+
+prototype void vp9_dequant_dc_idct_add_y_block "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, char *eobs, short *dc"
+specialize vp9_dequant_dc_idct_add_y_block mmx
+
+prototype void vp9_dequant_idct_add_y_block "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, char *eobs"
+specialize vp9_dequant_idct_add_y_block mmx
+
+prototype void vp9_dequant_idct_add_uv_block "short *q, short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, int stride, char *eobs"
+specialize vp9_dequant_idct_add_uv_block mmx
+
+#
+# RECON
+#
+prototype void vp9_copy_mem16x16 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
+specialize vp9_copy_mem16x16 mmx sse2 media neon dspr2
+vp9_copy_mem16x16_media=vp9_copy_mem16x16_v6
+vp9_copy_mem16x16_dspr2=vp9_copy_mem16x16_dspr2
+
+prototype void vp9_copy_mem8x8 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
+specialize vp9_copy_mem8x8 mmx media neon dspr2
+vp9_copy_mem8x8_media=vp9_copy_mem8x8_v6
+vp9_copy_mem8x8_dspr2=vp9_copy_mem8x8_dspr2
+
+prototype void vp9_copy_mem8x4 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
+specialize vp9_copy_mem8x4 mmx
+
+prototype void vp9_intra4x4_predict "unsigned char *Above, unsigned char *yleft, int left_stride, B_PREDICTION_MODE b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"
+specialize vp9_intra4x4_predict
+
+prototype void vp9_avg_mem16x16 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
+specialize vp9_avg_mem16x16
+
+prototype void vp9_avg_mem8x8 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
+specialize vp9_avg_mem8x8
+
+prototype void vp9_copy_mem8x4 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
+specialize vp9_copy_mem8x4 mmx media neon dspr2
+vp9_copy_mem8x4_media=vp9_copy_mem8x4_v6
+vp9_copy_mem8x4_dspr2=vp9_copy_mem8x4_dspr2
+
+prototype void vp9_recon_b "unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride"
+specialize vp9_recon_b
+
+prototype void vp9_recon_uv_b "unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride"
+specialize vp9_recon_uv_b
+
+prototype void vp9_recon2b "unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride"
+specialize vp9_recon2b sse2
+
+prototype void vp9_recon4b "unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride"
+specialize vp9_recon4b sse2
+
+prototype void vp9_recon_mb "struct macroblockd *x"
+specialize vp9_recon_mb
+
+prototype void vp9_recon_mby "struct macroblockd *x"
+specialize vp9_recon_mby
+
+prototype void vp9_build_intra_predictors_mby_s "struct macroblockd *x"
+specialize vp9_build_intra_predictors_mby_s
+
+prototype void vp9_build_intra_predictors_sby_s "struct macroblockd *x"
+specialize vp9_build_intra_predictors_sby_s;
+
+prototype void vp9_build_intra_predictors_sbuv_s "struct macroblockd *x"
+specialize vp9_build_intra_predictors_sbuv_s;
+
+prototype void vp9_build_intra_predictors_mby "struct macroblockd *x"
+specialize vp9_build_intra_predictors_mby;
+
+prototype void vp9_build_comp_intra_predictors_mby "struct macroblockd *x"
+specialize vp9_build_comp_intra_predictors_mby;
+
+prototype void vp9_build_intra_predictors_mby_s "struct macroblockd *x"
+specialize vp9_build_intra_predictors_mby_s;
+
+prototype void vp9_build_intra_predictors_mbuv "struct macroblockd *x"
+specialize vp9_build_intra_predictors_mbuv;
+
+prototype void vp9_build_intra_predictors_mbuv_s "struct macroblockd *x"
+specialize vp9_build_intra_predictors_mbuv_s;
+
+prototype void vp9_build_comp_intra_predictors_mbuv "struct macroblockd *x"
+specialize vp9_build_comp_intra_predictors_mbuv;
+
+prototype void vp9_intra4x4_predict "struct blockd *x, int b_mode, unsigned char *predictor"
+specialize vp9_intra4x4_predict;
+
+prototype void vp9_comp_intra4x4_predict "struct blockd *x, int b_mode, int second_mode, unsigned char *predictor"
+specialize vp9_comp_intra4x4_predict;
+
+prototype void vp9_intra8x8_predict "struct blockd *x, int b_mode, unsigned char *predictor"
+specialize vp9_intra8x8_predict;
+
+prototype void vp9_comp_intra8x8_predict "struct blockd *x, int b_mode, int second_mode, unsigned char *predictor"
+specialize vp9_comp_intra8x8_predict;
+
+prototype void vp9_intra_uv4x4_predict "struct blockd *x, int b_mode, unsigned char *predictor"
+specialize vp9_intra_uv4x4_predict;
+
+prototype void vp9_comp_intra_uv4x4_predict "struct blockd *x, int b_mode, int second_mode, unsigned char *predictor"
+specialize vp9_comp_intra_uv4x4_predict;
+
+#
+# Loopfilter
+#
+prototype void vp9_loop_filter_mbv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
+specialize vp9_loop_filter_mbv sse2
+
+prototype void vp9_loop_filter_bv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
+specialize vp9_loop_filter_bv sse2
+
+prototype void vp9_loop_filter_bv8x8 "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
+specialize vp9_loop_filter_bv8x8 sse2
+
+prototype void vp9_loop_filter_mbh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
+specialize vp9_loop_filter_mbh sse2
+
+prototype void vp9_loop_filter_bh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
+specialize vp9_loop_filter_bh sse2
+
+prototype void vp9_loop_filter_bh8x8 "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
+specialize vp9_loop_filter_bh8x8 sse2
+
+prototype void vp9_loop_filter_simple_mbv "unsigned char *y, int ystride, const unsigned char *blimit"
+specialize vp9_loop_filter_simple_mbv mmx sse2 media neon
+vp9_loop_filter_simple_mbv_c=vp9_loop_filter_simple_vertical_edge_c
+vp9_loop_filter_simple_mbv_mmx=vp9_loop_filter_simple_vertical_edge_mmx
+vp9_loop_filter_simple_mbv_sse2=vp9_loop_filter_simple_vertical_edge_sse2
+vp9_loop_filter_simple_mbv_media=vp9_loop_filter_simple_vertical_edge_armv6
+vp9_loop_filter_simple_mbv_neon=vp9_loop_filter_mbvs_neon
+
+prototype void vp9_loop_filter_simple_mbh "unsigned char *y, int ystride, const unsigned char *blimit"
+specialize vp9_loop_filter_simple_mbh mmx sse2 media neon
+vp9_loop_filter_simple_mbh_c=vp9_loop_filter_simple_horizontal_edge_c
+vp9_loop_filter_simple_mbh_mmx=vp9_loop_filter_simple_horizontal_edge_mmx
+vp9_loop_filter_simple_mbh_sse2=vp9_loop_filter_simple_horizontal_edge_sse2
+vp9_loop_filter_simple_mbh_media=vp9_loop_filter_simple_horizontal_edge_armv6
+vp9_loop_filter_simple_mbh_neon=vp9_loop_filter_mbhs_neon
+
+prototype void vp9_loop_filter_simple_bv "unsigned char *y, int ystride, const unsigned char *blimit"
+specialize vp9_loop_filter_simple_bv mmx sse2 media neon
+vp9_loop_filter_simple_bv_c=vp9_loop_filter_bvs_c
+vp9_loop_filter_simple_bv_mmx=vp9_loop_filter_bvs_mmx
+vp9_loop_filter_simple_bv_sse2=vp9_loop_filter_bvs_sse2
+vp9_loop_filter_simple_bv_media=vp9_loop_filter_bvs_armv6
+vp9_loop_filter_simple_bv_neon=vp9_loop_filter_bvs_neon
+
+prototype void vp9_loop_filter_simple_bh "unsigned char *y, int ystride, const unsigned char *blimit"
+specialize vp9_loop_filter_simple_bh mmx sse2 media neon
+vp9_loop_filter_simple_bh_c=vp9_loop_filter_bhs_c
+vp9_loop_filter_simple_bh_mmx=vp9_loop_filter_bhs_mmx
+vp9_loop_filter_simple_bh_sse2=vp9_loop_filter_bhs_sse2
+vp9_loop_filter_simple_bh_media=vp9_loop_filter_bhs_armv6
+vp9_loop_filter_simple_bh_neon=vp9_loop_filter_bhs_neon
+
+#
+# sad 16x3, 3x16
+#
+if [ "$CONFIG_NEWBESTREFMV" = "yes" ]; then
+prototype unsigned int vp9_sad16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
+specialize vp9_sad16x3 sse2
+
+prototype unsigned int vp9_sad3x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
+specialize vp9_sad3x16 sse2
+fi
+
+#
+# Encoder functions below this point.
+#
+if [ "$CONFIG_VP9_ENCODER" = "yes" ]; then
+
+
+# variance
+[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2
+
+prototype unsigned int vp9_variance32x32 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance32x32
+
+prototype unsigned int vp9_variance16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance16x16 mmx sse2
+vp9_variance16x16_sse2=vp9_variance16x16_wmt
+vp9_variance16x16_mmx=vp9_variance16x16_mmx
+
+prototype unsigned int vp9_variance16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance16x8 mmx sse2
+vp9_variance16x8_sse2=vp9_variance16x8_wmt
+vp9_variance16x8_mmx=vp9_variance16x8_mmx
+
+prototype unsigned int vp9_variance8x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance8x16 mmx sse2
+vp9_variance8x16_sse2=vp9_variance8x16_wmt
+vp9_variance8x16_mmx=vp9_variance8x16_mmx
+
+prototype unsigned int vp9_variance8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance8x8 mmx sse2
+vp9_variance8x8_sse2=vp9_variance8x8_wmt
+vp9_variance8x8_mmx=vp9_variance8x8_mmx
+
+prototype unsigned int vp9_variance4x4 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance4x4 mmx sse2
+vp9_variance4x4_sse2=vp9_variance4x4_wmt
+vp9_variance4x4_mmx=vp9_variance4x4_mmx
+
+prototype unsigned int vp9_sub_pixel_variance32x32 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp9_sub_pixel_variance32x32
+
+prototype unsigned int vp9_sub_pixel_variance16x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp9_sub_pixel_variance16x16 sse2 mmx ssse3
+vp9_sub_pixel_variance16x16_sse2=vp9_sub_pixel_variance16x16_wmt
+
+prototype unsigned int vp9_sub_pixel_variance8x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp9_sub_pixel_variance8x16 sse2 mmx
+vp9_sub_pixel_variance8x16_sse2=vp9_sub_pixel_variance8x16_wmt
+
+prototype unsigned int vp9_sub_pixel_variance16x8 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp9_sub_pixel_variance16x8 sse2 mmx ssse3
+vp9_sub_pixel_variance16x8_sse2=vp9_sub_pixel_variance16x8_ssse3;
+vp9_sub_pixel_variance16x8_sse2=vp9_sub_pixel_variance16x8_wmt
+
+prototype unsigned int vp9_sub_pixel_variance8x8 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp9_sub_pixel_variance8x8 sse2 mmx
+vp9_sub_pixel_variance8x8_sse2=vp9_sub_pixel_variance8x8_wmt
+
+prototype unsigned int vp9_sub_pixel_variance4x4 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp9_sub_pixel_variance4x4 sse2 mmx
+vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt
+
+prototype unsigned int vp9_sad32x32 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp9_sad32x32
+
+prototype unsigned int vp9_sad16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp9_sad16x16 mmx sse2 sse3
+vp9_sad16x16_sse2=vp9_sad16x16_wmt
+
+prototype unsigned int vp9_sad16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp9_sad16x8 mmx sse2
+vp9_sad16x8_sse2=vp9_sad16x8_wmt
+
+prototype unsigned int vp9_sad8x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp9_sad8x16 mmx sse2
+vp9_sad8x16_sse2=vp9_sad8x16_wmt
+
+prototype unsigned int vp9_sad8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp9_sad8x8 mmx sse2
+vp9_sad8x8_sse2=vp9_sad8x8_wmt
+
+prototype unsigned int vp9_sad4x4 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp9_sad4x4 mmx sse2
+vp9_sad4x4_sse2=vp9_sad4x4_wmt
+
+prototype unsigned int vp9_variance_halfpixvar16x16_h "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance_halfpixvar16x16_h mmx sse2
+vp9_variance_halfpixvar16x16_h_sse2=vp9_variance_halfpixvar16x16_h_wmt
+
+prototype unsigned int vp9_variance_halfpixvar16x16_v "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance_halfpixvar16x16_v mmx sse2
+vp9_variance_halfpixvar16x16_v_sse2=vp9_variance_halfpixvar16x16_v_wmt
+
+prototype unsigned int vp9_variance_halfpixvar16x16_hv "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance_halfpixvar16x16_hv mmx sse2
+vp9_variance_halfpixvar16x16_hv_sse2=vp9_variance_halfpixvar16x16_hv_wmt
+
+prototype unsigned int vp9_variance_halfpixvar32x32_h "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance_halfpixvar32x32_h
+
+prototype unsigned int vp9_variance_halfpixvar32x32_v "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance_halfpixvar32x32_v
+
+prototype unsigned int vp9_variance_halfpixvar32x32_hv "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance_halfpixvar32x32_hv
+
+prototype void vp9_sad32x32x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp9_sad32x32x3
+
+prototype void vp9_sad16x16x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp9_sad16x16x3 sse3 ssse3
+
+prototype void vp9_sad16x8x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp9_sad16x8x3 sse3 ssse3
+
+prototype void vp9_sad8x16x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp9_sad8x16x3 sse3
+
+prototype void vp9_sad8x8x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp9_sad8x8x3 sse3
+
+prototype void vp9_sad4x4x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp9_sad4x4x3 sse3
+
+prototype void vp9_sad32x32x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+specialize vp9_sad32x32x8
+
+prototype void vp9_sad16x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+specialize vp9_sad16x16x8 sse4
+
+prototype void vp9_sad16x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+specialize vp9_sad16x8x8 sse4
+
+prototype void vp9_sad8x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+specialize vp9_sad8x16x8 sse4
+
+prototype void vp9_sad8x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+specialize vp9_sad8x8x8 sse4
+
+prototype void vp9_sad4x4x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+specialize vp9_sad4x4x8 sse4
+
+prototype void vp9_sad32x32x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp9_sad32x32x4d
+
+prototype void vp9_sad16x16x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp9_sad16x16x4d sse3
+
+prototype void vp9_sad16x8x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp9_sad16x8x4d sse3
+
+prototype void vp9_sad8x16x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp9_sad8x16x4d sse3
+
+prototype void vp9_sad8x8x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp9_sad8x8x4d sse3
+
+prototype void vp9_sad4x4x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp9_sad4x4x4d sse3
+
+#
+# Block copy
+#
+case $arch in
+ x86*)
+ prototype void vp9_copy32xn "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n"
+ specialize vp9_copy32xn sse2 sse3
+ ;;
+esac
+
+prototype unsigned int vp9_sub_pixel_mse16x16 "const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse"
+specialize vp9_sub_pixel_mse16x16 sse2 mmx
+vp9_sub_pixel_mse16x16_sse2=vp9_sub_pixel_mse16x16_wmt
+
+prototype unsigned int vp9_mse16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse"
+specialize vp9_mse16x16 mmx sse2
+vp9_mse16x16_sse2=vp9_mse16x16_wmt
+
+prototype unsigned int vp9_sub_pixel_mse32x32 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp9_sub_pixel_mse32x32
+
+prototype unsigned int vp9_get_mb_ss "const short *"
+specialize vp9_get_mb_ss mmx sse2
+# ENCODEMB INVOKE
+prototype int vp9_mbblock_error "struct macroblock *mb, int dc"
+specialize vp9_mbblock_error mmx sse2
+vp9_mbblock_error_sse2=vp9_mbblock_error_xmm
+
+prototype int vp9_block_error "short *coeff, short *dqcoeff, int block_size"
+specialize vp9_block_error mmx sse2
+vp9_block_error_sse2=vp9_block_error_xmm
+
+prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch"
+specialize vp9_subtract_b mmx sse2
+
+prototype int vp9_mbuverror "struct macroblock *mb"
+specialize vp9_mbuverror mmx sse2
+vp9_mbuverror_sse2=vp9_mbuverror_xmm
+
+prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch"
+specialize vp9_subtract_b mmx sse2
+
+prototype void vp9_subtract_mby "short *diff, unsigned char *src, unsigned char *pred, int stride"
+specialize vp9_subtract_mby mmx sse2
+
+prototype void vp9_subtract_mbuv "short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride"
+specialize vp9_subtract_mbuv mmx sse2
+
+#
+# Structured Similarity (SSIM)
+#
+if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then
+ [ $arch = "x86_64" ] && sse2_on_x86_64=sse2
+
+ prototype void vp9_ssim_parms_8x8 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
+ specialize vp9_ssim_parms_8x8 $sse2_on_x86_64
+
+ prototype void vp9_ssim_parms_16x16 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
+ specialize vp9_ssim_parms_16x16 $sse2_on_x86_64
+fi
+
+# fdct functions
+prototype void vp9_fht "const short *input, int pitch, short *output, int tx_type, int tx_dim"
+specialize vp9_fht
+
+prototype void vp9_short_fdct8x8 "short *InputData, short *OutputData, int pitch"
+specialize vp9_short_fdct8x8
+
+prototype void vp9_short_fhaar2x2 "short *InputData, short *OutputData, int pitch"
+specialize vp9_short_fhaar2x2
+
+prototype void vp9_short_fdct4x4 "short *InputData, short *OutputData, int pitch"
+specialize vp9_short_fdct4x4
+
+prototype void vp9_short_fdct8x4 "short *InputData, short *OutputData, int pitch"
+specialize vp9_short_fdct8x4
+
+prototype void vp9_short_walsh4x4 "short *InputData, short *OutputData, int pitch"
+specialize vp9_short_walsh4x4
+
+prototype void vp9_short_fdct16x16 "short *InputData, short *OutputData, int pitch"
+specialize vp9_short_fdct16x16
+
+prototype void vp9_short_walsh4x4_lossless "short *InputData, short *OutputData, int pitch"
+specialize vp9_short_walsh4x4_lossless
+
+prototype void vp9_short_walsh4x4_x8 "short *InputData, short *OutputData, int pitch"
+specialize vp9_short_walsh4x4_x8
+
+prototype void vp9_short_walsh8x4_x8 "short *InputData, short *OutputData, int pitch"
+specialize vp9_short_walsh8x4_x8
+
+fi
+# end encoder functions
+
+# Scaler functions
++if [ "$CONFIG_SPATIAL_RESAMPLING" = "yes" ]; then
+ prototype void vp8_horizontal_line_4_5_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
+ prototype void vp8_vertical_band_4_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+ prototype void vp8_last_vertical_band_4_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+ prototype void vp8_horizontal_line_2_3_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
+ prototype void vp8_vertical_band_2_3_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+ prototype void vp8_last_vertical_band_2_3_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+ prototype void vp8_horizontal_line_3_5_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
+ prototype void vp8_vertical_band_3_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+ prototype void vp8_last_vertical_band_3_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+ prototype void vp8_horizontal_line_3_4_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
+ prototype void vp8_vertical_band_3_4_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+ prototype void vp8_last_vertical_band_3_4_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+ prototype void vp8_horizontal_line_1_2_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
+ prototype void vp8_vertical_band_1_2_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+ prototype void vp8_last_vertical_band_1_2_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+ prototype void vp8_horizontal_line_5_4_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
+ prototype void vp8_vertical_band_5_4_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+ prototype void vp8_horizontal_line_5_3_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
+ prototype void vp8_vertical_band_5_3_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+ prototype void vp8_horizontal_line_2_1_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
+ prototype void vp8_vertical_band_2_1_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+ prototype void vp8_vertical_band_2_1_scale_i "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+fi
+
+prototype void vp8_yv12_extend_frame_borders "struct yv12_buffer_config *ybf"
+specialize vp8_yv12_extend_frame_borders neon
+
+prototype void vp8_yv12_copy_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
+specialize vp8_yv12_copy_frame neon
+
+prototype void vp8_yv12_copy_y "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
+specialize vp8_yv12_copy_y neon
+
--- /dev/null
- INCLUDE asm_enc_offsets.asm
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_start_encode|
+ EXPORT |vp9_encode_bool|
+ EXPORT |vp8_stop_encode|
+ EXPORT |vp8_encode_value|
+
++ INCLUDE vp9_asm_enc_offsets.asm
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA |.text|, CODE, READONLY
+
+; r0 BOOL_CODER *br
+; r1 unsigned char *source
+
+|vp8_start_encode| PROC
+ mov r12, #0
+ mov r3, #255
+ mvn r2, #23
+ str r12, [r0, #vp9_writer_lowvalue]
+ str r3, [r0, #vp9_writer_range]
+ str r12, [r0, #vp9_writer_value]
+ str r2, [r0, #vp9_writer_count]
+ str r12, [r0, #vp9_writer_pos]
+ str r1, [r0, #vp9_writer_buffer]
+ bx lr
+ ENDP
+
+; r0 BOOL_CODER *br
+; r1 int bit
+; r2 int probability
+|vp9_encode_bool| PROC
+ push {r4-r9, lr}
+
+ mov r4, r2
+
+ ldr r2, [r0, #vp9_writer_lowvalue]
+ ldr r5, [r0, #vp9_writer_range]
+ ldr r3, [r0, #vp9_writer_count]
+
+ sub r7, r5, #1 ; range-1
+
+ cmp r1, #0
+ mul r6, r4, r7 ; ((range-1) * probability)
+
+ mov r7, #1
+ add r4, r7, r6, lsr #8 ; 1 + (((range-1) * probability) >> 8)
+
+ addne r2, r2, r4 ; if (bit) lowvalue += split
+ subne r4, r5, r4 ; if (bit) range = range-split
+
+ ; Counting the leading zeros is used to normalize range.
+ clz r6, r4
+ sub r6, r6, #24 ; shift
+
+ ; Flag is set on the sum of count. This flag is used later
+ ; to determine if count >= 0
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi token_count_lt_zero ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset = shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl token_high_bit_not_set
+
+ ldr r4, [r0, #vp9_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos-1
+ b token_zero_while_start
+token_zero_while_loop
+ mov r9, #0
+ strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+token_zero_while_start
+ cmp r4, #0
+ ldrge r7, [r0, #vp9_writer_buffer]
+ ldrb r1, [r7, r4]
+ cmpge r1, #0xff
+ beq token_zero_while_loop
+
+ ldr r7, [r0, #vp9_writer_buffer]
+ ldrb r9, [r7, r4] ; w->buffer[x]
+ add r9, r9, #1
+ strb r9, [r7, r4] ; w->buffer[x] + 1
+token_high_bit_not_set
+ rsb r4, r6, #24 ; 24-offset
+ ldr r9, [r0, #vp9_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp9_writer_pos] ; w->pos
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r1, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r1, [r0, #vp9_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+ strb r7, [r9, r4] ; w->buffer[w->pos++]
+
+token_count_lt_zero
+ lsl r2, r2, r6 ; lowvalue <<= shift
+
+ str r2, [r0, #vp9_writer_lowvalue]
+ str r5, [r0, #vp9_writer_range]
+ str r3, [r0, #vp9_writer_count]
+ pop {r4-r9, pc}
+ ENDP
+
+; r0 BOOL_CODER *br
+|vp8_stop_encode| PROC
+ push {r4-r10, lr}
+
+ ldr r2, [r0, #vp9_writer_lowvalue]
+ ldr r5, [r0, #vp9_writer_range]
+ ldr r3, [r0, #vp9_writer_count]
+
+ mov r10, #32
+
+stop_encode_loop
+ sub r7, r5, #1 ; range-1
+
+ mov r4, r7, lsl #7 ; ((range-1) * 128)
+
+ mov r7, #1
+ add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8)
+
+ ; Counting the leading zeros is used to normalize range.
+ clz r6, r4
+ sub r6, r6, #24 ; shift
+
+ ; Flag is set on the sum of count. This flag is used later
+ ; to determine if count >= 0
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi token_count_lt_zero_se ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset = shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl token_high_bit_not_set_se
+
+ ldr r4, [r0, #vp9_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos-1
+ b token_zero_while_start_se
+token_zero_while_loop_se
+ mov r9, #0
+ strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+token_zero_while_start_se
+ cmp r4, #0
+ ldrge r7, [r0, #vp9_writer_buffer]
+ ldrb r1, [r7, r4]
+ cmpge r1, #0xff
+ beq token_zero_while_loop_se
+
+ ldr r7, [r0, #vp9_writer_buffer]
+ ldrb r9, [r7, r4] ; w->buffer[x]
+ add r9, r9, #1
+ strb r9, [r7, r4] ; w->buffer[x] + 1
+token_high_bit_not_set_se
+ rsb r4, r6, #24 ; 24-offset
+ ldr r9, [r0, #vp9_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp9_writer_pos] ; w->pos
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r1, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r1, [r0, #vp9_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+ strb r7, [r9, r4] ; w->buffer[w->pos++]
+
+token_count_lt_zero_se
+ lsl r2, r2, r6 ; lowvalue <<= shift
+
+ subs r10, r10, #1
+ bne stop_encode_loop
+
+ str r2, [r0, #vp9_writer_lowvalue]
+ str r5, [r0, #vp9_writer_range]
+ str r3, [r0, #vp9_writer_count]
+ pop {r4-r10, pc}
+
+ ENDP
+
+; r0 BOOL_CODER *br
+; r1 int data
+; r2 int bits
+|vp8_encode_value| PROC
+ push {r4-r11, lr}
+
+ mov r10, r2
+
+ ldr r2, [r0, #vp9_writer_lowvalue]
+ ldr r5, [r0, #vp9_writer_range]
+ ldr r3, [r0, #vp9_writer_count]
+
+ rsb r4, r10, #32 ; 32-n
+
+ ; v is kept in r1 during the token pack loop
+ lsl r1, r1, r4 ; r1 = v << 32 - n
+
+encode_value_loop
+ sub r7, r5, #1 ; range-1
+
+ ; Decisions are made based on the bit value shifted
+ ; off of v, so set a flag here based on this.
+ ; This value is refered to as "bb"
+ lsls r1, r1, #1 ; bit = v >> n
+ mov r4, r7, lsl #7 ; ((range-1) * 128)
+
+ mov r7, #1
+ add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8)
+
+ addcs r2, r2, r4 ; if (bit) lowvalue += split
+ subcs r4, r5, r4 ; if (bit) range = range-split
+
+ ; Counting the leading zeros is used to normalize range.
+ clz r6, r4
+ sub r6, r6, #24 ; shift
+
+ ; Flag is set on the sum of count. This flag is used later
+ ; to determine if count >= 0
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi token_count_lt_zero_ev ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset = shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl token_high_bit_not_set_ev
+
+ ldr r4, [r0, #vp9_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos-1
+ b token_zero_while_start_ev
+token_zero_while_loop_ev
+ mov r9, #0
+ strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+token_zero_while_start_ev
+ cmp r4, #0
+ ldrge r7, [r0, #vp9_writer_buffer]
+ ldrb r11, [r7, r4]
+ cmpge r11, #0xff
+ beq token_zero_while_loop_ev
+
+ ldr r7, [r0, #vp9_writer_buffer]
+ ldrb r9, [r7, r4] ; w->buffer[x]
+ add r9, r9, #1
+ strb r9, [r7, r4] ; w->buffer[x] + 1
+token_high_bit_not_set_ev
+ rsb r4, r6, #24 ; 24-offset
+ ldr r9, [r0, #vp9_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp9_writer_pos] ; w->pos
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r11, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r11, [r0, #vp9_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+ strb r7, [r9, r4] ; w->buffer[w->pos++]
+
+token_count_lt_zero_ev
+ lsl r2, r2, r6 ; lowvalue <<= shift
+
+ subs r10, r10, #1
+ bne encode_value_loop
+
+ str r2, [r0, #vp9_writer_lowvalue]
+ str r5, [r0, #vp9_writer_range]
+ str r3, [r0, #vp9_writer_count]
+ pop {r4-r11, pc}
+ ENDP
+
+ END
--- /dev/null
- INCLUDE asm_enc_offsets.asm
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp8cx_pack_tokens_armv5|
+
++ INCLUDE vp9_asm_enc_offsets.asm
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA |.text|, CODE, READONLY
+
+; r0 vp9_writer *w
+; r1 const TOKENEXTRA *p
+; r2 int xcount
+; r3 vp8_coef_encodings
+; s0 vp8_extra_bits
+; s1 vp8_coef_tree
+|vp8cx_pack_tokens_armv5| PROC
+ push {r4-r11, lr}
+
+ ; Add size of xcount * sizeof (TOKENEXTRA) to get stop
+ ; sizeof (TOKENEXTRA) is 8
+ sub sp, sp, #12
+ add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA)
+ str r2, [sp, #0]
+ str r3, [sp, #8] ; save vp8_coef_encodings
+ ldr r2, [r0, #vp9_writer_lowvalue]
+ ldr r5, [r0, #vp9_writer_range]
+ ldr r3, [r0, #vp9_writer_count]
+ b check_p_lt_stop
+
+while_p_lt_stop
+ ldrb r6, [r1, #tokenextra_token] ; t
+ ldr r4, [sp, #8] ; vp8_coef_encodings
+ mov lr, #0
+ add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
+ ldr r9, [r1, #tokenextra_context_tree] ; pp
+
+ ldrb r7, [r1, #tokenextra_skip_eob_node]
+
+ ldr r6, [r4, #vp9_token_value] ; v
+ ldr r8, [r4, #vp9_token_len] ; n
+
+ ; vp8 specific skip_eob_node
+ cmp r7, #0
+ movne lr, #2 ; i = 2
+ subne r8, r8, #1 ; --n
+
+ rsb r4, r8, #32 ; 32-n
+ ldr r10, [sp, #52] ; vp8_coef_tree
+
+ ; v is kept in r12 during the token pack loop
+ lsl r12, r6, r4 ; r12 = v << 32 - n
+
+; loop start
+token_loop
+ ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
+ sub r7, r5, #1 ; range-1
+
+ ; Decisions are made based on the bit value shifted
+ ; off of v, so set a flag here based on this.
+ ; This value is refered to as "bb"
+ lsls r12, r12, #1 ; bb = v >> n
+ mul r6, r4, r7 ; ((range-1) * pp[i>>1]))
+
+ ; bb can only be 0 or 1. So only execute this statement
+ ; if bb == 1, otherwise it will act like i + 0
+ addcs lr, lr, #1 ; i + bb
+
+ mov r7, #1
+ ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
+ add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
+
+ addcs r2, r2, r4 ; if (bb) lowvalue += split
+ subcs r4, r5, r4 ; if (bb) range = range-split
+
+ ; Counting the leading zeros is used to normalize range.
+ clz r6, r4
+ sub r6, r6, #24 ; shift
+
+ ; Flag is set on the sum of count. This flag is used later
+ ; to determine if count >= 0
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi token_count_lt_zero ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset = shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl token_high_bit_not_set
+
+ ldr r4, [r0, #vp9_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos-1
+ b token_zero_while_start
+token_zero_while_loop
+ mov r10, #0
+ strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+token_zero_while_start
+ cmp r4, #0
+ ldrge r7, [r0, #vp9_writer_buffer]
+ ldrb r11, [r7, r4]
+ cmpge r11, #0xff
+ beq token_zero_while_loop
+
+ ldr r7, [r0, #vp9_writer_buffer]
+ ldrb r10, [r7, r4] ; w->buffer[x]
+ add r10, r10, #1
+ strb r10, [r7, r4] ; w->buffer[x] + 1
+token_high_bit_not_set
+ rsb r4, r6, #24 ; 24-offset
+ ldr r10, [r0, #vp9_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp9_writer_pos] ; w->pos
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r11, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r11, [r0, #vp9_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+ strb r7, [r10, r4] ; w->buffer[w->pos++]
+
+ ; r10 is used earlier in the loop, but r10 is used as
+ ; temp variable here. So after r10 is used, reload
+ ; vp8_coef_tree_dcd into r10
+ ldr r10, [sp, #52] ; vp8_coef_tree
+
+token_count_lt_zero
+ lsl r2, r2, r6 ; lowvalue <<= shift
+
+ subs r8, r8, #1 ; --n
+ bne token_loop
+
+ ldrb r6, [r1, #tokenextra_token] ; t
+ ldr r7, [sp, #48] ; vp8_extra_bits
+ ; Add t * sizeof (vp9_extra_bit_struct) to get the desired
+ ; element. Here vp9_extra_bit_struct == 16
+ add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
+
+ ldr r4, [r12, #vp9_extra_bit_struct_base_val]
+ cmp r4, #0
+ beq skip_extra_bits
+
+; if( b->base_val)
+ ldr r8, [r12, #vp9_extra_bit_struct_len] ; L
+ ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
+ cmp r8, #0 ; if( L)
+ beq no_extra_bits
+
+ ldr r9, [r12, #vp9_extra_bit_struct_prob]
+ asr r7, lr, #1 ; v=e>>1
+
+ ldr r10, [r12, #vp9_extra_bit_struct_tree]
+ str r10, [sp, #4] ; b->tree
+
+ rsb r4, r8, #32
+ lsl r12, r7, r4
+
+ mov lr, #0 ; i = 0
+
+extra_bits_loop
+ ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
+ sub r7, r5, #1 ; range-1
+ lsls r12, r12, #1 ; v >> n
+ mul r6, r4, r7 ; (range-1) * pp[i>>1]
+ addcs lr, lr, #1 ; i + bb
+
+ mov r7, #1
+ ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
+ add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
+
+ addcs r2, r2, r4 ; if (bb) lowvalue += split
+ subcs r4, r5, r4 ; if (bb) range = range-split
+
+ clz r6, r4
+ sub r6, r6, #24
+
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi extra_count_lt_zero ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset= shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl extra_high_bit_not_set
+
+ ldr r4, [r0, #vp9_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos - 1
+ b extra_zero_while_start
+extra_zero_while_loop
+ mov r10, #0
+ strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+extra_zero_while_start
+ cmp r4, #0
+ ldrge r7, [r0, #vp9_writer_buffer]
+ ldrb r11, [r7, r4]
+ cmpge r11, #0xff
+ beq extra_zero_while_loop
+
+ ldr r7, [r0, #vp9_writer_buffer]
+ ldrb r10, [r7, r4]
+ add r10, r10, #1
+ strb r10, [r7, r4]
+extra_high_bit_not_set
+ rsb r4, r6, #24 ; 24-offset
+ ldr r10, [r0, #vp9_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp9_writer_pos]
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r11, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r11, [r0, #vp9_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+ strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
+ ldr r10, [sp, #4] ; b->tree
+extra_count_lt_zero
+ lsl r2, r2, r6
+
+ subs r8, r8, #1 ; --n
+ bne extra_bits_loop ; while (n)
+
+no_extra_bits
+ ldr lr, [r1, #4] ; e = p->Extra
+ add r4, r5, #1 ; range + 1
+ tst lr, #1
+ lsr r4, r4, #1 ; split = (range + 1) >> 1
+ addne r2, r2, r4 ; lowvalue += split
+ subne r4, r5, r4 ; range = range-split
+ tst r2, #0x80000000 ; lowvalue & 0x80000000
+ lsl r5, r4, #1 ; range <<= 1
+ beq end_high_bit_not_set
+
+ ldr r4, [r0, #vp9_writer_pos]
+ mov r7, #0
+ sub r4, r4, #1
+ b end_zero_while_start
+end_zero_while_loop
+ strb r7, [r6, r4]
+ sub r4, r4, #1 ; x--
+end_zero_while_start
+ cmp r4, #0
+ ldrge r6, [r0, #vp9_writer_buffer]
+ ldrb r12, [r6, r4]
+ cmpge r12, #0xff
+ beq end_zero_while_loop
+
+ ldr r6, [r0, #vp9_writer_buffer]
+ ldrb r7, [r6, r4]
+ add r7, r7, #1
+ strb r7, [r6, r4]
+end_high_bit_not_set
+ adds r3, r3, #1 ; ++count
+ lsl r2, r2, #1 ; lowvalue <<= 1
+ bne end_count_zero
+
+ ldr r4, [r0, #vp9_writer_pos]
+ mvn r3, #7
+ ldr r7, [r0, #vp9_writer_buffer]
+ lsr r6, r2, #24 ; lowvalue >> 24
+ add r12, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r12, [r0, #0x10]
+ strb r6, [r7, r4]
+end_count_zero
+skip_extra_bits
+ add r1, r1, #TOKENEXTRA_SZ ; ++p
+check_p_lt_stop
+ ldr r4, [sp, #0] ; stop
+ cmp r1, r4 ; while( p < stop)
+ bcc while_p_lt_stop
+
+ str r2, [r0, #vp9_writer_lowvalue]
+ str r5, [r0, #vp9_writer_range]
+ str r3, [r0, #vp9_writer_count]
+ add sp, sp, #12
+ pop {r4-r11, pc}
+ ENDP
+
+ END
--- /dev/null
- INCLUDE asm_enc_offsets.asm
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp8cx_pack_mb_row_tokens_armv5|
+
++ INCLUDE vp9_asm_enc_offsets.asm
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA |.text|, CODE, READONLY
+
+; r0 VP8_COMP *cpi
+; r1 vp9_writer *w
+; r2 vp8_coef_encodings
+; r3 vp8_extra_bits
+; s0 vp8_coef_tree
+
+|vp8cx_pack_mb_row_tokens_armv5| PROC
+ push {r4-r11, lr}
+ sub sp, sp, #24
+
+ ; Compute address of cpi->common.mb_rows
+ ldr r4, _VP8_COMP_common_
+ ldr r6, _VP8_COMMON_MBrows_
+ add r4, r0, r4
+
+ ldr r5, [r4, r6] ; load up mb_rows
+
+ str r2, [sp, #20] ; save vp8_coef_encodings
+ str r5, [sp, #12] ; save mb_rows
+ str r3, [sp, #8] ; save vp8_extra_bits
+
+ ldr r4, _VP8_COMP_tplist_
+ add r4, r0, r4
+ ldr r7, [r4, #0] ; dereference cpi->tp_list
+
+ mov r0, r1 ; keep same as other loops
+
+ ldr r2, [r0, #vp9_writer_lowvalue]
+ ldr r5, [r0, #vp9_writer_range]
+ ldr r3, [r0, #vp9_writer_count]
+
+mb_row_loop
+
+ ldr r1, [r7, #tokenlist_start]
+ ldr r9, [r7, #tokenlist_stop]
+ str r9, [sp, #0] ; save stop for later comparison
+ str r7, [sp, #16] ; tokenlist address for next time
+
+ b check_p_lt_stop
+
+ ; actuall work gets done here!
+
+while_p_lt_stop
+ ldrb r6, [r1, #tokenextra_token] ; t
+ ldr r4, [sp, #20] ; vp8_coef_encodings
+ mov lr, #0
+ add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
+ ldr r9, [r1, #tokenextra_context_tree] ; pp
+
+ ldrb r7, [r1, #tokenextra_skip_eob_node]
+
+ ldr r6, [r4, #vp9_token_value] ; v
+ ldr r8, [r4, #vp9_token_len] ; n
+
+ ; vp8 specific skip_eob_node
+ cmp r7, #0
+ movne lr, #2 ; i = 2
+ subne r8, r8, #1 ; --n
+
+ rsb r4, r8, #32 ; 32-n
+ ldr r10, [sp, #60] ; vp8_coef_tree
+
+ ; v is kept in r12 during the token pack loop
+ lsl r12, r6, r4 ; r12 = v << 32 - n
+
+; loop start
+token_loop
+ ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
+ sub r7, r5, #1 ; range-1
+
+ ; Decisions are made based on the bit value shifted
+ ; off of v, so set a flag here based on this.
+ ; This value is refered to as "bb"
+ lsls r12, r12, #1 ; bb = v >> n
+ mul r6, r4, r7 ; ((range-1) * pp[i>>1]))
+
+ ; bb can only be 0 or 1. So only execute this statement
+ ; if bb == 1, otherwise it will act like i + 0
+ addcs lr, lr, #1 ; i + bb
+
+ mov r7, #1
+ ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
+ add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
+
+ addcs r2, r2, r4 ; if (bb) lowvalue += split
+ subcs r4, r5, r4 ; if (bb) range = range-split
+
+ ; Counting the leading zeros is used to normalize range.
+ clz r6, r4
+ sub r6, r6, #24 ; shift
+
+ ; Flag is set on the sum of count. This flag is used later
+ ; to determine if count >= 0
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi token_count_lt_zero ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset = shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl token_high_bit_not_set
+
+ ldr r4, [r0, #vp9_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos-1
+ b token_zero_while_start
+token_zero_while_loop
+ mov r10, #0
+ strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+token_zero_while_start
+ cmp r4, #0
+ ldrge r7, [r0, #vp9_writer_buffer]
+ ldrb r11, [r7, r4]
+ cmpge r11, #0xff
+ beq token_zero_while_loop
+
+ ldr r7, [r0, #vp9_writer_buffer]
+ ldrb r10, [r7, r4] ; w->buffer[x]
+ add r10, r10, #1
+ strb r10, [r7, r4] ; w->buffer[x] + 1
+token_high_bit_not_set
+ rsb r4, r6, #24 ; 24-offset
+ ldr r10, [r0, #vp9_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp9_writer_pos] ; w->pos
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r11, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r11, [r0, #vp9_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+ strb r7, [r10, r4] ; w->buffer[w->pos++]
+
+ ; r10 is used earlier in the loop, but r10 is used as
+ ; temp variable here. So after r10 is used, reload
+ ; vp8_coef_tree_dcd into r10
+ ldr r10, [sp, #60] ; vp8_coef_tree
+
+token_count_lt_zero
+ lsl r2, r2, r6 ; lowvalue <<= shift
+
+ subs r8, r8, #1 ; --n
+ bne token_loop
+
+ ldrb r6, [r1, #tokenextra_token] ; t
+ ldr r7, [sp, #8] ; vp8_extra_bits
+ ; Add t * sizeof (vp9_extra_bit_struct) to get the desired
+ ; element. Here vp9_extra_bit_struct == 16
+ add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
+
+ ldr r4, [r12, #vp9_extra_bit_struct_base_val]
+ cmp r4, #0
+ beq skip_extra_bits
+
+; if( b->base_val)
+ ldr r8, [r12, #vp9_extra_bit_struct_len] ; L
+ ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
+ cmp r8, #0 ; if( L)
+ beq no_extra_bits
+
+ ldr r9, [r12, #vp9_extra_bit_struct_prob]
+ asr r7, lr, #1 ; v=e>>1
+
+ ldr r10, [r12, #vp9_extra_bit_struct_tree]
+ str r10, [sp, #4] ; b->tree
+
+ rsb r4, r8, #32
+ lsl r12, r7, r4
+
+ mov lr, #0 ; i = 0
+
+extra_bits_loop
+ ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
+ sub r7, r5, #1 ; range-1
+ lsls r12, r12, #1 ; v >> n
+ mul r6, r4, r7 ; (range-1) * pp[i>>1]
+ addcs lr, lr, #1 ; i + bb
+
+ mov r7, #1
+ ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
+ add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
+
+ addcs r2, r2, r4 ; if (bb) lowvalue += split
+ subcs r4, r5, r4 ; if (bb) range = range-split
+
+ clz r6, r4
+ sub r6, r6, #24
+
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi extra_count_lt_zero ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset= shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl extra_high_bit_not_set
+
+ ldr r4, [r0, #vp9_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos - 1
+ b extra_zero_while_start
+extra_zero_while_loop
+ mov r10, #0
+ strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+extra_zero_while_start
+ cmp r4, #0
+ ldrge r7, [r0, #vp9_writer_buffer]
+ ldrb r11, [r7, r4]
+ cmpge r11, #0xff
+ beq extra_zero_while_loop
+
+ ldr r7, [r0, #vp9_writer_buffer]
+ ldrb r10, [r7, r4]
+ add r10, r10, #1
+ strb r10, [r7, r4]
+extra_high_bit_not_set
+ rsb r4, r6, #24 ; 24-offset
+ ldr r10, [r0, #vp9_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp9_writer_pos]
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r11, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r11, [r0, #vp9_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+ strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
+ ldr r10, [sp, #4] ; b->tree
+extra_count_lt_zero
+ lsl r2, r2, r6
+
+ subs r8, r8, #1 ; --n
+ bne extra_bits_loop ; while (n)
+
+no_extra_bits
+ ldr lr, [r1, #4] ; e = p->Extra
+ add r4, r5, #1 ; range + 1
+ tst lr, #1
+ lsr r4, r4, #1 ; split = (range + 1) >> 1
+ addne r2, r2, r4 ; lowvalue += split
+ subne r4, r5, r4 ; range = range-split
+ tst r2, #0x80000000 ; lowvalue & 0x80000000
+ lsl r5, r4, #1 ; range <<= 1
+ beq end_high_bit_not_set
+
+ ldr r4, [r0, #vp9_writer_pos]
+ mov r7, #0
+ sub r4, r4, #1
+ b end_zero_while_start
+end_zero_while_loop
+ strb r7, [r6, r4]
+ sub r4, r4, #1 ; x--
+end_zero_while_start
+ cmp r4, #0
+ ldrge r6, [r0, #vp9_writer_buffer]
+ ldrb r12, [r6, r4]
+ cmpge r12, #0xff
+ beq end_zero_while_loop
+
+ ldr r6, [r0, #vp9_writer_buffer]
+ ldrb r7, [r6, r4]
+ add r7, r7, #1
+ strb r7, [r6, r4]
+end_high_bit_not_set
+ adds r3, r3, #1 ; ++count
+ lsl r2, r2, #1 ; lowvalue <<= 1
+ bne end_count_zero
+
+ ldr r4, [r0, #vp9_writer_pos]
+ mvn r3, #7
+ ldr r7, [r0, #vp9_writer_buffer]
+ lsr r6, r2, #24 ; lowvalue >> 24
+ add r12, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r12, [r0, #0x10]
+ strb r6, [r7, r4]
+end_count_zero
+skip_extra_bits
+ add r1, r1, #TOKENEXTRA_SZ ; ++p
+check_p_lt_stop
+ ldr r4, [sp, #0] ; stop
+ cmp r1, r4 ; while( p < stop)
+ bcc while_p_lt_stop
+
+ ldr r6, [sp, #12] ; mb_rows
+ ldr r7, [sp, #16] ; tokenlist address
+ subs r6, r6, #1
+ add r7, r7, #TOKENLIST_SZ ; next element in the array
+ str r6, [sp, #12]
+ bne mb_row_loop
+
+ str r2, [r0, #vp9_writer_lowvalue]
+ str r5, [r0, #vp9_writer_range]
+ str r3, [r0, #vp9_writer_count]
+ add sp, sp, #24
+ pop {r4-r11, pc}
+ ENDP
+
+_VP8_COMP_common_
+ DCD vp8_comp_common
+_VP8_COMMON_MBrows_
+ DCD vp8_common_mb_rows
+_VP8_COMP_tplist_
+ DCD vp8_comp_tplist
+
+ END
--- /dev/null
- INCLUDE asm_enc_offsets.asm
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
+
++ INCLUDE vp9_asm_enc_offsets.asm
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA |.text|, CODE, READONLY
+
+; r0 VP8_COMP *cpi
+; r1 unsigned char *cx_data
+; r2 int num_part
+; r3 *size
+; s0 vp8_coef_encodings
+; s1 vp8_extra_bits,
+; s2 const vp9_tree_index *,
+
+|vp8cx_pack_tokens_into_partitions_armv5| PROC
+ push {r4-r11, lr}
+ sub sp, sp, #44
+
+ ; Compute address of cpi->common.mb_rows
+ ldr r4, _VP8_COMP_common_
+ ldr r6, _VP8_COMMON_MBrows_
+ add r4, r0, r4
+
+ ldr r5, [r4, r6] ; load up mb_rows
+
+ str r5, [sp, #36] ; save mb_rows
+ str r1, [sp, #24] ; save cx_data
+ str r2, [sp, #20] ; save num_part
+ str r3, [sp, #8] ; save *size
+
+ ; *size = 3*(num_part -1 );
+ sub r2, r2, #1 ; num_part - 1
+ add r2, r2, r2, lsl #1 ; 3*(num_part - 1)
+ str r2, [r3]
+
+ add r2, r2, r1 ; cx_data + *size
+ str r2, [sp, #40] ; ptr
+
+ ldr r4, _VP8_COMP_tplist_
+ add r4, r0, r4
+ ldr r7, [r4, #0] ; dereference cpi->tp_list
+ str r7, [sp, #32] ; store start of cpi->tp_list
+
+ ldr r11, _VP8_COMP_bc2_ ; load up vp9_writer out of cpi
+ add r0, r0, r11
+
+ mov r11, #0
+ str r11, [sp, #28] ; i
+
+numparts_loop
+ ldr r10, [sp, #40] ; ptr
+ ldr r5, [sp, #36] ; move mb_rows to the counting section
+ sub r5, r5, r11 ; move start point with each partition
+ ; mb_rows starts at i
+ str r5, [sp, #12]
+
+ ; Reset all of the VP8 Writer data for each partition that
+ ; is processed.
+ ; start_encode
+ mov r2, #0 ; vp9_writer_lowvalue
+ mov r5, #255 ; vp9_writer_range
+ mvn r3, #23 ; vp9_writer_count
+
+ str r2, [r0, #vp9_writer_value]
+ str r2, [r0, #vp9_writer_pos]
+ str r10, [r0, #vp9_writer_buffer]
+
+mb_row_loop
+
+ ldr r1, [r7, #tokenlist_start]
+ ldr r9, [r7, #tokenlist_stop]
+ str r9, [sp, #0] ; save stop for later comparison
+ str r7, [sp, #16] ; tokenlist address for next time
+
+ b check_p_lt_stop
+
+ ; actual work gets done here!
+
+while_p_lt_stop
+ ldrb r6, [r1, #tokenextra_token] ; t
+ ldr r4, [sp, #80] ; vp8_coef_encodings
+ mov lr, #0
+ add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
+ ldr r9, [r1, #tokenextra_context_tree] ; pp
+
+ ldrb r7, [r1, #tokenextra_skip_eob_node]
+
+ ldr r6, [r4, #vp9_token_value] ; v
+ ldr r8, [r4, #vp9_token_len] ; n
+
+ ; vp8 specific skip_eob_node
+ cmp r7, #0
+ movne lr, #2 ; i = 2
+ subne r8, r8, #1 ; --n
+
+ rsb r4, r8, #32 ; 32-n
+ ldr r10, [sp, #88] ; vp8_coef_tree
+
+ ; v is kept in r12 during the token pack loop
+ lsl r12, r6, r4 ; r12 = v << 32 - n
+
+; loop start
+token_loop
+ ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
+ sub r7, r5, #1 ; range-1
+
+ ; Decisions are made based on the bit value shifted
+ ; off of v, so set a flag here based on this.
+ ; This value is refered to as "bb"
+ lsls r12, r12, #1 ; bb = v >> n
+ mul r6, r4, r7 ; ((range-1) * pp[i>>1]))
+
+ ; bb can only be 0 or 1. So only execute this statement
+ ; if bb == 1, otherwise it will act like i + 0
+ addcs lr, lr, #1 ; i + bb
+
+ mov r7, #1
+ ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
+ add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
+
+ addcs r2, r2, r4 ; if (bb) lowvalue += split
+ subcs r4, r5, r4 ; if (bb) range = range-split
+
+ ; Counting the leading zeros is used to normalize range.
+ clz r6, r4
+ sub r6, r6, #24 ; shift
+
+ ; Flag is set on the sum of count. This flag is used later
+ ; to determine if count >= 0
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi token_count_lt_zero ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset = shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl token_high_bit_not_set
+
+ ldr r4, [r0, #vp9_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos-1
+ b token_zero_while_start
+token_zero_while_loop
+ mov r10, #0
+ strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+token_zero_while_start
+ cmp r4, #0
+ ldrge r7, [r0, #vp9_writer_buffer]
+ ldrb r11, [r7, r4]
+ cmpge r11, #0xff
+ beq token_zero_while_loop
+
+ ldr r7, [r0, #vp9_writer_buffer]
+ ldrb r10, [r7, r4] ; w->buffer[x]
+ add r10, r10, #1
+ strb r10, [r7, r4] ; w->buffer[x] + 1
+token_high_bit_not_set
+ rsb r4, r6, #24 ; 24-offset
+ ldr r10, [r0, #vp9_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp9_writer_pos] ; w->pos
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r11, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r11, [r0, #vp9_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+ strb r7, [r10, r4] ; w->buffer[w->pos++]
+
+ ; r10 is used earlier in the loop, but r10 is used as
+ ; temp variable here. So after r10 is used, reload
+ ; vp8_coef_tree_dcd into r10
+ ldr r10, [sp, #88] ; vp8_coef_tree
+
+token_count_lt_zero
+ lsl r2, r2, r6 ; lowvalue <<= shift
+
+ subs r8, r8, #1 ; --n
+ bne token_loop
+
+ ldrb r6, [r1, #tokenextra_token] ; t
+ ldr r7, [sp, #84] ; vp8_extra_bits
+ ; Add t * sizeof (vp9_extra_bit_struct) to get the desired
+ ; element. Here vp9_extra_bit_struct == 16
+ add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
+
+ ldr r4, [r12, #vp9_extra_bit_struct_base_val]
+ cmp r4, #0
+ beq skip_extra_bits
+
+; if( b->base_val)
+ ldr r8, [r12, #vp9_extra_bit_struct_len] ; L
+ ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
+ cmp r8, #0 ; if( L)
+ beq no_extra_bits
+
+ ldr r9, [r12, #vp9_extra_bit_struct_prob]
+ asr r7, lr, #1 ; v=e>>1
+
+ ldr r10, [r12, #vp9_extra_bit_struct_tree]
+ str r10, [sp, #4] ; b->tree
+
+ rsb r4, r8, #32
+ lsl r12, r7, r4
+
+ mov lr, #0 ; i = 0
+
+extra_bits_loop
+ ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
+ sub r7, r5, #1 ; range-1
+ lsls r12, r12, #1 ; v >> n
+ mul r6, r4, r7 ; (range-1) * pp[i>>1]
+ addcs lr, lr, #1 ; i + bb
+
+ mov r7, #1
+ ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
+ add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
+
+ addcs r2, r2, r4 ; if (bb) lowvalue += split
+ subcs r4, r5, r4 ; if (bb) range = range-split
+
+ clz r6, r4
+ sub r6, r6, #24
+
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi extra_count_lt_zero ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset= shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl extra_high_bit_not_set
+
+ ldr r4, [r0, #vp9_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos - 1
+ b extra_zero_while_start
+extra_zero_while_loop
+ mov r10, #0
+ strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+extra_zero_while_start
+ cmp r4, #0
+ ldrge r7, [r0, #vp9_writer_buffer]
+ ldrb r11, [r7, r4]
+ cmpge r11, #0xff
+ beq extra_zero_while_loop
+
+ ldr r7, [r0, #vp9_writer_buffer]
+ ldrb r10, [r7, r4]
+ add r10, r10, #1
+ strb r10, [r7, r4]
+extra_high_bit_not_set
+ rsb r4, r6, #24 ; 24-offset
+ ldr r10, [r0, #vp9_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp9_writer_pos]
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r11, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r11, [r0, #vp9_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+ strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
+ ldr r10, [sp, #4] ; b->tree
+extra_count_lt_zero
+ lsl r2, r2, r6
+
+ subs r8, r8, #1 ; --n
+ bne extra_bits_loop ; while (n)
+
+no_extra_bits
+ ldr lr, [r1, #4] ; e = p->Extra
+ add r4, r5, #1 ; range + 1
+ tst lr, #1
+ lsr r4, r4, #1 ; split = (range + 1) >> 1
+ addne r2, r2, r4 ; lowvalue += split
+ subne r4, r5, r4 ; range = range-split
+ tst r2, #0x80000000 ; lowvalue & 0x80000000
+ lsl r5, r4, #1 ; range <<= 1
+ beq end_high_bit_not_set
+
+ ldr r4, [r0, #vp9_writer_pos]
+ mov r7, #0
+ sub r4, r4, #1
+ b end_zero_while_start
+end_zero_while_loop
+ strb r7, [r6, r4]
+ sub r4, r4, #1 ; x--
+end_zero_while_start
+ cmp r4, #0
+ ldrge r6, [r0, #vp9_writer_buffer]
+ ldrb r12, [r6, r4]
+ cmpge r12, #0xff
+ beq end_zero_while_loop
+
+ ldr r6, [r0, #vp9_writer_buffer]
+ ldrb r7, [r6, r4]
+ add r7, r7, #1
+ strb r7, [r6, r4]
+end_high_bit_not_set
+ adds r3, r3, #1 ; ++count
+ lsl r2, r2, #1 ; lowvalue <<= 1
+ bne end_count_zero
+
+ ldr r4, [r0, #vp9_writer_pos]
+ mvn r3, #7
+ ldr r7, [r0, #vp9_writer_buffer]
+ lsr r6, r2, #24 ; lowvalue >> 24
+ add r12, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r12, [r0, #0x10]
+ strb r6, [r7, r4]
+end_count_zero
+skip_extra_bits
+ add r1, r1, #TOKENEXTRA_SZ ; ++p
+check_p_lt_stop
+ ldr r4, [sp, #0] ; stop
+ cmp r1, r4 ; while( p < stop)
+ bcc while_p_lt_stop
+
+ ldr r10, [sp, #20] ; num_parts
+ mov r1, #TOKENLIST_SZ
+ mul r1, r10, r1
+
+ ldr r6, [sp, #12] ; mb_rows
+ ldr r7, [sp, #16] ; tokenlist address
+ subs r6, r6, r10
+ add r7, r7, r1 ; next element in the array
+ str r6, [sp, #12]
+ bgt mb_row_loop
+
+ mov r12, #32
+
+stop_encode_loop
+ sub r7, r5, #1 ; range-1
+
+ mov r4, r7, lsl #7 ; ((range-1) * 128)
+
+ mov r7, #1
+ add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8)
+
+ ; Counting the leading zeros is used to normalize range.
+ clz r6, r4
+ sub r6, r6, #24 ; shift
+
+ ; Flag is set on the sum of count. This flag is used later
+ ; to determine if count >= 0
+ adds r3, r3, r6 ; count += shift
+ lsl r5, r4, r6 ; range <<= shift
+ bmi token_count_lt_zero_se ; if(count >= 0)
+
+ sub r6, r6, r3 ; offset = shift - count
+ sub r4, r6, #1 ; offset-1
+ lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
+ bpl token_high_bit_not_set_se
+
+ ldr r4, [r0, #vp9_writer_pos] ; x
+ sub r4, r4, #1 ; x = w->pos-1
+ b token_zero_while_start_se
+token_zero_while_loop_se
+ mov r10, #0
+ strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
+ sub r4, r4, #1 ; x--
+token_zero_while_start_se
+ cmp r4, #0
+ ldrge r7, [r0, #vp9_writer_buffer]
+ ldrb r11, [r7, r4]
+ cmpge r11, #0xff
+ beq token_zero_while_loop_se
+
+ ldr r7, [r0, #vp9_writer_buffer]
+ ldrb r10, [r7, r4] ; w->buffer[x]
+ add r10, r10, #1
+ strb r10, [r7, r4] ; w->buffer[x] + 1
+token_high_bit_not_set_se
+ rsb r4, r6, #24 ; 24-offset
+ ldr r10, [r0, #vp9_writer_buffer]
+ lsr r7, r2, r4 ; lowvalue >> (24-offset)
+ ldr r4, [r0, #vp9_writer_pos] ; w->pos
+ lsl r2, r2, r6 ; lowvalue <<= offset
+ mov r6, r3 ; shift = count
+ add r11, r4, #1 ; w->pos++
+ bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
+ str r11, [r0, #vp9_writer_pos]
+ sub r3, r3, #8 ; count -= 8
+ strb r7, [r10, r4] ; w->buffer[w->pos++]
+
+token_count_lt_zero_se
+ lsl r2, r2, r6 ; lowvalue <<= shift
+
+ subs r12, r12, #1
+ bne stop_encode_loop
+
+ ldr r10, [sp, #8] ; *size
+ ldr r11, [r10]
+ ldr r4, [r0, #vp9_writer_pos] ; w->pos
+ add r11, r11, r4 ; *size += w->pos
+ str r11, [r10]
+
+ ldr r9, [sp, #20] ; num_parts
+ sub r9, r9, #1
+ ldr r10, [sp, #28] ; i
+ cmp r10, r9 ; if(i<(num_part - 1))
+ bge skip_write_partition
+
+ ldr r12, [sp, #40] ; ptr
+ add r12, r12, r4 ; ptr += w->pos
+ str r12, [sp, #40]
+
+ ldr r9, [sp, #24] ; cx_data
+ mov r8, r4, asr #8
+ strb r4, [r9, #0]
+ strb r8, [r9, #1]
+ mov r4, r4, asr #16
+ strb r4, [r9, #2]
+
+ add r9, r9, #3 ; cx_data += 3
+ str r9, [sp, #24]
+
+skip_write_partition
+
+ ldr r11, [sp, #28] ; i
+ ldr r10, [sp, #20] ; num_parts
+
+ add r11, r11, #1 ; i++
+ str r11, [sp, #28]
+
+ ldr r7, [sp, #32] ; cpi->tp_list[i]
+ mov r1, #TOKENLIST_SZ
+ add r7, r7, r1 ; next element in cpi->tp_list
+ str r7, [sp, #32] ; cpi->tp_list[i+1]
+
+ cmp r10, r11
+ bgt numparts_loop
+
+
+ add sp, sp, #44
+ pop {r4-r11, pc}
+ ENDP
+
+_VP8_COMP_common_
+ DCD vp8_comp_common
+_VP8_COMMON_MBrows_
+ DCD vp8_common_mb_rows
+_VP8_COMP_tplist_
+ DCD vp8_comp_tplist
+_VP8_COMP_bc2_
+ DCD vp8_comp_bc2
+
+ END
--- /dev/null
- INCLUDE asm_enc_offsets.asm
+;
+; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_fast_quantize_b_armv6|
+
++ INCLUDE vp9_asm_enc_offsets.asm
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; r0 BLOCK *b
+; r1 BLOCKD *d
+|vp8_fast_quantize_b_armv6| PROC
+ stmfd sp!, {r1, r4-r11, lr}
+
+ ldr r3, [r0, #vp8_block_coeff] ; coeff
+ ldr r4, [r0, #vp8_block_quant_fast] ; quant_fast
+ ldr r5, [r0, #vp8_block_round] ; round
+ ldr r6, [r1, #vp8_blockd_qcoeff] ; qcoeff
+ ldr r7, [r1, #vp8_blockd_dqcoeff] ; dqcoeff
+ ldr r8, [r1, #vp8_blockd_dequant] ; dequant
+
+ ldr r2, loop_count ; loop_count=0x1000000. 'lsls' instruction
+ ; is used to update the counter so that
+ ; it can be used to mark nonzero
+ ; quantized coefficient pairs.
+
+ mov r1, #0 ; flags for quantized coeffs
+
+ ; PART 1: quantization and dequantization loop
+loop
+ ldr r9, [r3], #4 ; [z1 | z0]
+ ldr r10, [r5], #4 ; [r1 | r0]
+ ldr r11, [r4], #4 ; [q1 | q0]
+
+ ssat16 lr, #1, r9 ; [sz1 | sz0]
+ eor r9, r9, lr ; [z1 ^ sz1 | z0 ^ sz0]
+ ssub16 r9, r9, lr ; x = (z ^ sz) - sz
+ sadd16 r9, r9, r10 ; [x1+r1 | x0+r0]
+
+ ldr r12, [r3], #4 ; [z3 | z2]
+
+ smulbb r0, r9, r11 ; [(x0+r0)*q0]
+ smultt r9, r9, r11 ; [(x1+r1)*q1]
+
+ ldr r10, [r5], #4 ; [r3 | r2]
+
+ ssat16 r11, #1, r12 ; [sz3 | sz2]
+ eor r12, r12, r11 ; [z3 ^ sz3 | z2 ^ sz2]
+ pkhtb r0, r9, r0, asr #16 ; [y1 | y0]
+ ldr r9, [r4], #4 ; [q3 | q2]
+ ssub16 r12, r12, r11 ; x = (z ^ sz) - sz
+
+ sadd16 r12, r12, r10 ; [x3+r3 | x2+r2]
+
+ eor r0, r0, lr ; [(y1 ^ sz1) | (y0 ^ sz0)]
+
+ smulbb r10, r12, r9 ; [(x2+r2)*q2]
+ smultt r12, r12, r9 ; [(x3+r3)*q3]
+
+ ssub16 r0, r0, lr ; x = (y ^ sz) - sz
+
+ cmp r0, #0 ; check if zero
+ orrne r1, r1, r2, lsr #24 ; add flag for nonzero coeffs
+
+ str r0, [r6], #4 ; *qcoeff++ = x
+ ldr r9, [r8], #4 ; [dq1 | dq0]
+
+ pkhtb r10, r12, r10, asr #16 ; [y3 | y2]
+ eor r10, r10, r11 ; [(y3 ^ sz3) | (y2 ^ sz2)]
+ ssub16 r10, r10, r11 ; x = (y ^ sz) - sz
+
+ cmp r10, #0 ; check if zero
+ orrne r1, r1, r2, lsr #23 ; add flag for nonzero coeffs
+
+ str r10, [r6], #4 ; *qcoeff++ = x
+ ldr r11, [r8], #4 ; [dq3 | dq2]
+
+ smulbb r12, r0, r9 ; [x0*dq0]
+ smultt r0, r0, r9 ; [x1*dq1]
+
+ smulbb r9, r10, r11 ; [x2*dq2]
+ smultt r10, r10, r11 ; [x3*dq3]
+
+ lsls r2, r2, #2 ; update loop counter
+ strh r12, [r7, #0] ; dqcoeff[0] = [x0*dq0]
+ strh r0, [r7, #2] ; dqcoeff[1] = [x1*dq1]
+ strh r9, [r7, #4] ; dqcoeff[2] = [x2*dq2]
+ strh r10, [r7, #6] ; dqcoeff[3] = [x3*dq3]
+ add r7, r7, #8 ; dqcoeff += 8
+ bne loop
+
+ ; PART 2: check position for eob...
+ mov lr, #0 ; init eob
+ cmp r1, #0 ; coeffs after quantization?
+ ldr r11, [sp, #0] ; restore BLOCKD pointer
+ beq end ; skip eob calculations if all zero
+
+ ldr r0, [r11, #vp8_blockd_qcoeff]
+
+ ; check shortcut for nonzero qcoeffs
+ tst r1, #0x80
+ bne quant_coeff_15_14
+ tst r1, #0x20
+ bne quant_coeff_13_11
+ tst r1, #0x8
+ bne quant_coeff_12_7
+ tst r1, #0x40
+ bne quant_coeff_10_9
+ tst r1, #0x10
+ bne quant_coeff_8_3
+ tst r1, #0x2
+ bne quant_coeff_6_5
+ tst r1, #0x4
+ bne quant_coeff_4_2
+ b quant_coeff_1_0
+
+quant_coeff_15_14
+ ldrh r2, [r0, #30] ; rc=15, i=15
+ mov lr, #16
+ cmp r2, #0
+ bne end
+
+ ldrh r3, [r0, #28] ; rc=14, i=14
+ mov lr, #15
+ cmp r3, #0
+ bne end
+
+quant_coeff_13_11
+ ldrh r2, [r0, #22] ; rc=11, i=13
+ mov lr, #14
+ cmp r2, #0
+ bne end
+
+quant_coeff_12_7
+ ldrh r3, [r0, #14] ; rc=7, i=12
+ mov lr, #13
+ cmp r3, #0
+ bne end
+
+ ldrh r2, [r0, #20] ; rc=10, i=11
+ mov lr, #12
+ cmp r2, #0
+ bne end
+
+quant_coeff_10_9
+ ldrh r3, [r0, #26] ; rc=13, i=10
+ mov lr, #11
+ cmp r3, #0
+ bne end
+
+ ldrh r2, [r0, #24] ; rc=12, i=9
+ mov lr, #10
+ cmp r2, #0
+ bne end
+
+quant_coeff_8_3
+ ldrh r3, [r0, #18] ; rc=9, i=8
+ mov lr, #9
+ cmp r3, #0
+ bne end
+
+ ldrh r2, [r0, #12] ; rc=6, i=7
+ mov lr, #8
+ cmp r2, #0
+ bne end
+
+quant_coeff_6_5
+ ldrh r3, [r0, #6] ; rc=3, i=6
+ mov lr, #7
+ cmp r3, #0
+ bne end
+
+ ldrh r2, [r0, #4] ; rc=2, i=5
+ mov lr, #6
+ cmp r2, #0
+ bne end
+
+quant_coeff_4_2
+ ldrh r3, [r0, #10] ; rc=5, i=4
+ mov lr, #5
+ cmp r3, #0
+ bne end
+
+ ldrh r2, [r0, #16] ; rc=8, i=3
+ mov lr, #4
+ cmp r2, #0
+ bne end
+
+ ldrh r3, [r0, #8] ; rc=4, i=2
+ mov lr, #3
+ cmp r3, #0
+ bne end
+
+quant_coeff_1_0
+ ldrh r2, [r0, #2] ; rc=1, i=1
+ mov lr, #2
+ cmp r2, #0
+ bne end
+
+ mov lr, #1 ; rc=0, i=0
+
+end
+ str lr, [r11, #vp8_blockd_eob]
+ ldmfd sp!, {r1, r4-r11, pc}
+
+ ENDP
+
+loop_count
+ DCD 0x1000000
+
+ END
+
--- /dev/null
- INCLUDE asm_enc_offsets.asm
+;
+; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_subtract_mby_armv6|
+ EXPORT |vp8_subtract_mbuv_armv6|
+ EXPORT |vp8_subtract_b_armv6|
+
++ INCLUDE vp9_asm_enc_offsets.asm
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; r0 BLOCK *be
+; r1 BLOCKD *bd
+; r2 int pitch
+|vp8_subtract_b_armv6| PROC
+
+ stmfd sp!, {r4-r9}
+
+ ldr r4, [r0, #vp8_block_base_src]
+ ldr r5, [r0, #vp8_block_src]
+ ldr r6, [r0, #vp8_block_src_diff]
+
+ ldr r3, [r4]
+ ldr r7, [r0, #vp8_block_src_stride]
+ add r3, r3, r5 ; src = *base_src + src
+ ldr r8, [r1, #vp8_blockd_predictor]
+
+ mov r9, #4 ; loop count
+
+loop_block
+
+ ldr r0, [r3], r7 ; src
+ ldr r1, [r8], r2 ; pred
+
+ uxtb16 r4, r0 ; [s2 | s0]
+ uxtb16 r5, r1 ; [p2 | p0]
+ uxtb16 r0, r0, ror #8 ; [s3 | s1]
+ uxtb16 r1, r1, ror #8 ; [p3 | p1]
+
+ usub16 r4, r4, r5 ; [d2 | d0]
+ usub16 r5, r0, r1 ; [d3 | d1]
+
+ subs r9, r9, #1 ; decrement loop counter
+
+ pkhbt r0, r4, r5, lsl #16 ; [d1 | d0]
+ pkhtb r1, r5, r4, asr #16 ; [d3 | d2]
+
+ str r0, [r6, #0] ; diff
+ str r1, [r6, #4] ; diff
+
+ add r6, r6, r2, lsl #1 ; update diff pointer
+ bne loop_block
+
+ ldmfd sp!, {r4-r9}
+ mov pc, lr
+
+ ENDP
+
+
+; r0 short *diff
+; r1 unsigned char *usrc
+; r2 unsigned char *vsrc
+; r3 unsigned char *pred
+; stack int stride
+|vp8_subtract_mbuv_armv6| PROC
+
+ stmfd sp!, {r4-r12, lr}
+
+ add r0, r0, #512 ; set *diff point to Cb
+ add r3, r3, #256 ; set *pred point to Cb
+
+ mov r4, #8 ; loop count
+ ldr r5, [sp, #40] ; stride
+
+ ; Subtract U block
+loop_u
+ ldr r6, [r1] ; src (A)
+ ldr r7, [r3], #4 ; pred (A)
+
+ uxtb16 r8, r6 ; [s2 | s0] (A)
+ uxtb16 r9, r7 ; [p2 | p0] (A)
+ uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
+ uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
+
+ usub16 r6, r8, r9 ; [d2 | d0] (A)
+ usub16 r7, r10, r11 ; [d3 | d1] (A)
+
+ ldr r10, [r1, #4] ; src (B)
+ ldr r11, [r3], #4 ; pred (B)
+
+ pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
+ pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
+
+ str r8, [r0], #4 ; diff (A)
+ uxtb16 r8, r10 ; [s2 | s0] (B)
+ str r9, [r0], #4 ; diff (A)
+
+ uxtb16 r9, r11 ; [p2 | p0] (B)
+ uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
+ uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
+
+ usub16 r6, r8, r9 ; [d2 | d0] (B)
+ usub16 r7, r10, r11 ; [d3 | d1] (B)
+
+ add r1, r1, r5 ; update usrc pointer
+
+ pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
+ pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
+
+ str r8, [r0], #4 ; diff (B)
+ subs r4, r4, #1 ; update loop counter
+ str r9, [r0], #4 ; diff (B)
+
+ bne loop_u
+
+ mov r4, #8 ; loop count
+
+ ; Subtract V block
+loop_v
+ ldr r6, [r2] ; src (A)
+ ldr r7, [r3], #4 ; pred (A)
+
+ uxtb16 r8, r6 ; [s2 | s0] (A)
+ uxtb16 r9, r7 ; [p2 | p0] (A)
+ uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
+ uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
+
+ usub16 r6, r8, r9 ; [d2 | d0] (A)
+ usub16 r7, r10, r11 ; [d3 | d1] (A)
+
+ ldr r10, [r2, #4] ; src (B)
+ ldr r11, [r3], #4 ; pred (B)
+
+ pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
+ pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
+
+ str r8, [r0], #4 ; diff (A)
+ uxtb16 r8, r10 ; [s2 | s0] (B)
+ str r9, [r0], #4 ; diff (A)
+
+ uxtb16 r9, r11 ; [p2 | p0] (B)
+ uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
+ uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
+
+ usub16 r6, r8, r9 ; [d2 | d0] (B)
+ usub16 r7, r10, r11 ; [d3 | d1] (B)
+
+ add r2, r2, r5 ; update vsrc pointer
+
+ pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
+ pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
+
+ str r8, [r0], #4 ; diff (B)
+ subs r4, r4, #1 ; update loop counter
+ str r9, [r0], #4 ; diff (B)
+
+ bne loop_v
+
+ ldmfd sp!, {r4-r12, pc}
+
+ ENDP
+
+
+; r0 short *diff
+; r1 unsigned char *src
+; r2 unsigned char *pred
+; r3 int stride
+|vp8_subtract_mby_armv6| PROC
+
+ stmfd sp!, {r4-r11}
+
+ mov r4, #16
+loop
+ ldr r6, [r1] ; src (A)
+ ldr r7, [r2], #4 ; pred (A)
+
+ uxtb16 r8, r6 ; [s2 | s0] (A)
+ uxtb16 r9, r7 ; [p2 | p0] (A)
+ uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
+ uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
+
+ usub16 r6, r8, r9 ; [d2 | d0] (A)
+ usub16 r7, r10, r11 ; [d3 | d1] (A)
+
+ ldr r10, [r1, #4] ; src (B)
+ ldr r11, [r2], #4 ; pred (B)
+
+ pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
+ pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
+
+ str r8, [r0], #4 ; diff (A)
+ uxtb16 r8, r10 ; [s2 | s0] (B)
+ str r9, [r0], #4 ; diff (A)
+
+ uxtb16 r9, r11 ; [p2 | p0] (B)
+ uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
+ uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
+
+ usub16 r6, r8, r9 ; [d2 | d0] (B)
+ usub16 r7, r10, r11 ; [d3 | d1] (B)
+
+ ldr r10, [r1, #8] ; src (C)
+ ldr r11, [r2], #4 ; pred (C)
+
+ pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
+ pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
+
+ str r8, [r0], #4 ; diff (B)
+ uxtb16 r8, r10 ; [s2 | s0] (C)
+ str r9, [r0], #4 ; diff (B)
+
+ uxtb16 r9, r11 ; [p2 | p0] (C)
+ uxtb16 r10, r10, ror #8 ; [s3 | s1] (C)
+ uxtb16 r11, r11, ror #8 ; [p3 | p1] (C)
+
+ usub16 r6, r8, r9 ; [d2 | d0] (C)
+ usub16 r7, r10, r11 ; [d3 | d1] (C)
+
+ ldr r10, [r1, #12] ; src (D)
+ ldr r11, [r2], #4 ; pred (D)
+
+ pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C)
+ pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C)
+
+ str r8, [r0], #4 ; diff (C)
+ uxtb16 r8, r10 ; [s2 | s0] (D)
+ str r9, [r0], #4 ; diff (C)
+
+ uxtb16 r9, r11 ; [p2 | p0] (D)
+ uxtb16 r10, r10, ror #8 ; [s3 | s1] (D)
+ uxtb16 r11, r11, ror #8 ; [p3 | p1] (D)
+
+ usub16 r6, r8, r9 ; [d2 | d0] (D)
+ usub16 r7, r10, r11 ; [d3 | d1] (D)
+
+ add r1, r1, r3 ; update src pointer
+
+ pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D)
+ pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D)
+
+ str r8, [r0], #4 ; diff (D)
+ subs r4, r4, #1 ; update loop counter
+ str r9, [r0], #4 ; diff (D)
+
+ bne loop
+
+ ldmfd sp!, {r4-r11}
+ mov pc, lr
+
+ ENDP
+
+ END
+
--- /dev/null
- INCLUDE asm_enc_offsets.asm
+;
+; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_fast_quantize_b_neon|
+ EXPORT |vp8_fast_quantize_b_pair_neon|
+
++ INCLUDE vp9_asm_enc_offsets.asm
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=4
+
+;vp8_fast_quantize_b_pair_neon(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2);
+|vp8_fast_quantize_b_pair_neon| PROC
+
+ stmfd sp!, {r4-r9}
+ vstmdb sp!, {q4-q7}
+
+ ldr r4, [r0, #vp8_block_coeff]
+ ldr r5, [r0, #vp8_block_quant_fast]
+ ldr r6, [r0, #vp8_block_round]
+
+ vld1.16 {q0, q1}, [r4@128] ; load z
+
+ ldr r7, [r2, #vp8_blockd_qcoeff]
+
+ vabs.s16 q4, q0 ; calculate x = abs(z)
+ vabs.s16 q5, q1
+
+ ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative
+ vshr.s16 q2, q0, #15 ; sz
+ vshr.s16 q3, q1, #15
+
+ vld1.s16 {q6, q7}, [r6@128] ; load round_ptr [0-15]
+ vld1.s16 {q8, q9}, [r5@128] ; load quant_ptr [0-15]
+
+ ldr r4, [r1, #vp8_block_coeff]
+
+ vadd.s16 q4, q6 ; x + Round
+ vadd.s16 q5, q7
+
+ vld1.16 {q0, q1}, [r4@128] ; load z2
+
+ vqdmulh.s16 q4, q8 ; y = ((Round+abs(z)) * Quant) >> 16
+ vqdmulh.s16 q5, q9
+
+ vabs.s16 q10, q0 ; calculate x2 = abs(z_2)
+ vabs.s16 q11, q1
+ vshr.s16 q12, q0, #15 ; sz2
+ vshr.s16 q13, q1, #15
+
+ ;modify data to have its original sign
+ veor.s16 q4, q2 ; y^sz
+ veor.s16 q5, q3
+
+ vadd.s16 q10, q6 ; x2 + Round
+ vadd.s16 q11, q7
+
+ ldr r8, [r2, #vp8_blockd_dequant]
+
+ vqdmulh.s16 q10, q8 ; y2 = ((Round+abs(z)) * Quant) >> 16
+ vqdmulh.s16 q11, q9
+
+ vshr.s16 q4, #1 ; right shift 1 after vqdmulh
+ vshr.s16 q5, #1
+
+ vld1.s16 {q6, q7}, [r8@128] ;load dequant_ptr[i]
+
+ vsub.s16 q4, q2 ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement)
+ vsub.s16 q5, q3
+
+ vshr.s16 q10, #1 ; right shift 1 after vqdmulh
+ vshr.s16 q11, #1
+
+ ldr r9, [r2, #vp8_blockd_dqcoeff]
+
+ veor.s16 q10, q12 ; y2^sz2
+ veor.s16 q11, q13
+
+ vst1.s16 {q4, q5}, [r7] ; store: qcoeff = x1
+
+
+ vsub.s16 q10, q12 ; x2=(y^sz)-sz = (y^sz)-(-1) (2's complement)
+ vsub.s16 q11, q13
+
+ ldr r6, [r3, #vp8_blockd_qcoeff]
+
+ vmul.s16 q2, q6, q4 ; x * Dequant
+ vmul.s16 q3, q7, q5
+
+ ldr r0, _inv_zig_zag_ ; load ptr of inverse zigzag table
+
+ vceq.s16 q8, q8 ; set q8 to all 1
+
+ vst1.s16 {q10, q11}, [r6] ; store: qcoeff = x2
+
+ vmul.s16 q12, q6, q10 ; x2 * Dequant
+ vmul.s16 q13, q7, q11
+
+ vld1.16 {q6, q7}, [r0@128] ; load inverse scan order
+
+ vtst.16 q14, q4, q8 ; now find eob
+ vtst.16 q15, q5, q8 ; non-zero element is set to all 1
+
+ vst1.s16 {q2, q3}, [r9] ; store dqcoeff = x * Dequant
+
+ ldr r7, [r3, #vp8_blockd_dqcoeff]
+
+ vand q0, q6, q14 ; get all valid numbers from scan array
+ vand q1, q7, q15
+
+ vst1.s16 {q12, q13}, [r7] ; store dqcoeff = x * Dequant
+
+ vtst.16 q2, q10, q8 ; now find eob
+ vtst.16 q3, q11, q8 ; non-zero element is set to all 1
+
+ vmax.u16 q0, q0, q1 ; find maximum value in q0, q1
+
+ vand q10, q6, q2 ; get all valid numbers from scan array
+ vand q11, q7, q3
+ vmax.u16 q10, q10, q11 ; find maximum value in q10, q11
+
+ vmax.u16 d0, d0, d1
+ vmax.u16 d20, d20, d21
+ vmovl.u16 q0, d0
+ vmovl.u16 q10, d20
+
+
+ vmax.u32 d0, d0, d1
+ vmax.u32 d20, d20, d21
+ vpmax.u32 d0, d0, d0
+ vpmax.u32 d20, d20, d20
+
+ add r4, r2, #vp8_blockd_eob
+ add r5, r3, #vp8_blockd_eob
+
+ vst1.32 {d0[0]}, [r4@32]
+ vst1.32 {d20[0]}, [r5@32]
+
+ vldmia sp!, {q4-q7}
+ ldmfd sp!, {r4-r9}
+ bx lr
+
+ ENDP
+
+;void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
+|vp8_fast_quantize_b_neon| PROC
+
+ stmfd sp!, {r4-r7}
+
+ ldr r3, [r0, #vp8_block_coeff]
+ ldr r4, [r0, #vp8_block_quant_fast]
+ ldr r5, [r0, #vp8_block_round]
+
+ vld1.16 {q0, q1}, [r3@128] ; load z
+ vorr.s16 q14, q0, q1 ; check if all zero (step 1)
+ ldr r6, [r1, #vp8_blockd_qcoeff]
+ ldr r7, [r1, #vp8_blockd_dqcoeff]
+ vorr.s16 d28, d28, d29 ; check if all zero (step 2)
+
+ vabs.s16 q12, q0 ; calculate x = abs(z)
+ vabs.s16 q13, q1
+
+ ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative
+ vshr.s16 q2, q0, #15 ; sz
+ vmov r2, r3, d28 ; check if all zero (step 3)
+ vshr.s16 q3, q1, #15
+
+ vld1.s16 {q14, q15}, [r5@128]; load round_ptr [0-15]
+ vld1.s16 {q8, q9}, [r4@128] ; load quant_ptr [0-15]
+
+ vadd.s16 q12, q14 ; x + Round
+ vadd.s16 q13, q15
+
+ ldr r0, _inv_zig_zag_ ; load ptr of inverse zigzag table
+
+ vqdmulh.s16 q12, q8 ; y = ((Round+abs(z)) * Quant) >> 16
+ vqdmulh.s16 q13, q9
+
+ vld1.16 {q10, q11}, [r0@128]; load inverse scan order
+
+ vceq.s16 q8, q8 ; set q8 to all 1
+
+ ldr r4, [r1, #vp8_blockd_dequant]
+
+ vshr.s16 q12, #1 ; right shift 1 after vqdmulh
+ vshr.s16 q13, #1
+
+ orr r2, r2, r3 ; check if all zero (step 4)
+ cmp r2, #0 ; check if all zero (step 5)
+ beq zero_output ; check if all zero (step 6)
+
+ ;modify data to have its original sign
+ veor.s16 q12, q2 ; y^sz
+ veor.s16 q13, q3
+
+ vsub.s16 q12, q2 ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement)
+ vsub.s16 q13, q3
+
+ vld1.s16 {q2, q3}, [r4@128] ; load dequant_ptr[i]
+
+ vtst.16 q14, q12, q8 ; now find eob
+ vtst.16 q15, q13, q8 ; non-zero element is set to all 1
+
+ vst1.s16 {q12, q13}, [r6@128]; store: qcoeff = x1
+
+ vand q10, q10, q14 ; get all valid numbers from scan array
+ vand q11, q11, q15
+
+
+ vmax.u16 q0, q10, q11 ; find maximum value in q0, q1
+ vmax.u16 d0, d0, d1
+ vmovl.u16 q0, d0
+
+ vmul.s16 q2, q12 ; x * Dequant
+ vmul.s16 q3, q13
+
+ vmax.u32 d0, d0, d1
+ vpmax.u32 d0, d0, d0
+
+ vst1.s16 {q2, q3}, [r7@128] ; store dqcoeff = x * Dequant
+
+ add r4, r1, #vp8_blockd_eob
+ vst1.32 {d0[0]}, [r4@32]
+
+ ldmfd sp!, {r4-r7}
+ bx lr
+
+zero_output
+ str r2, [r1, #vp8_blockd_eob]
+ vst1.s16 {q0, q1}, [r6@128] ; qcoeff = 0
+ vst1.s16 {q0, q1}, [r7@128] ; dqcoeff = 0
+
+ ldmfd sp!, {r4-r7}
+ bx lr
+
+ ENDP
+
+; default inverse zigzag table is defined in vp9/common/entropy.c
+_inv_zig_zag_
+ DCD inv_zig_zag
+
+ ALIGN 16 ; enable use of @128 bit aligned loads
+inv_zig_zag
+ DCW 0x0001, 0x0002, 0x0006, 0x0007
+ DCW 0x0003, 0x0005, 0x0008, 0x000d
+ DCW 0x0004, 0x0009, 0x000c, 0x000e
+ DCW 0x000a, 0x000b, 0x000f, 0x0010
+
+ END
+
--- /dev/null
- INCLUDE asm_enc_offsets.asm
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+ EXPORT |vp8_subtract_b_neon|
+ EXPORT |vp8_subtract_mby_neon|
+ EXPORT |vp8_subtract_mbuv_neon|
+
++ INCLUDE vp9_asm_enc_offsets.asm
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+;void vp8_subtract_b_neon(BLOCK *be, BLOCKD *bd, int pitch)
+|vp8_subtract_b_neon| PROC
+
+ stmfd sp!, {r4-r7}
+
+ ldr r3, [r0, #vp8_block_base_src]
+ ldr r4, [r0, #vp8_block_src]
+ ldr r5, [r0, #vp8_block_src_diff]
+ ldr r3, [r3]
+ ldr r6, [r0, #vp8_block_src_stride]
+ add r3, r3, r4 ; src = *base_src + src
+ ldr r7, [r1, #vp8_blockd_predictor]
+
+ vld1.8 {d0}, [r3], r6 ;load src
+ vld1.8 {d1}, [r7], r2 ;load pred
+ vld1.8 {d2}, [r3], r6
+ vld1.8 {d3}, [r7], r2
+ vld1.8 {d4}, [r3], r6
+ vld1.8 {d5}, [r7], r2
+ vld1.8 {d6}, [r3], r6
+ vld1.8 {d7}, [r7], r2
+
+ vsubl.u8 q10, d0, d1
+ vsubl.u8 q11, d2, d3
+ vsubl.u8 q12, d4, d5
+ vsubl.u8 q13, d6, d7
+
+ mov r2, r2, lsl #1
+
+ vst1.16 {d20}, [r5], r2 ;store diff
+ vst1.16 {d22}, [r5], r2
+ vst1.16 {d24}, [r5], r2
+ vst1.16 {d26}, [r5], r2
+
+ ldmfd sp!, {r4-r7}
+ bx lr
+
+ ENDP
+
+
+;==========================================
+;void vp8_subtract_mby_neon(short *diff, unsigned char *src, unsigned char *pred, int stride)
+|vp8_subtract_mby_neon| PROC
+ mov r12, #4
+
+subtract_mby_loop
+ vld1.8 {q0}, [r1], r3 ;load src
+ vld1.8 {q1}, [r2]! ;load pred
+ vld1.8 {q2}, [r1], r3
+ vld1.8 {q3}, [r2]!
+ vld1.8 {q4}, [r1], r3
+ vld1.8 {q5}, [r2]!
+ vld1.8 {q6}, [r1], r3
+ vld1.8 {q7}, [r2]!
+
+ vsubl.u8 q8, d0, d2
+ vsubl.u8 q9, d1, d3
+ vsubl.u8 q10, d4, d6
+ vsubl.u8 q11, d5, d7
+ vsubl.u8 q12, d8, d10
+ vsubl.u8 q13, d9, d11
+ vsubl.u8 q14, d12, d14
+ vsubl.u8 q15, d13, d15
+
+ vst1.16 {q8}, [r0]! ;store diff
+ vst1.16 {q9}, [r0]!
+ vst1.16 {q10}, [r0]!
+ vst1.16 {q11}, [r0]!
+ vst1.16 {q12}, [r0]!
+ vst1.16 {q13}, [r0]!
+ vst1.16 {q14}, [r0]!
+ vst1.16 {q15}, [r0]!
+
+ subs r12, r12, #1
+ bne subtract_mby_loop
+
+ bx lr
+ ENDP
+
+;=================================
+;void vp8_subtract_mbuv_neon(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
+|vp8_subtract_mbuv_neon| PROC
+ ldr r12, [sp]
+
+;u
+ add r0, r0, #512 ; short *udiff = diff + 256;
+ add r3, r3, #256 ; unsigned char *upred = pred + 256;
+
+ vld1.8 {d0}, [r1], r12 ;load src
+ vld1.8 {d1}, [r3]! ;load pred
+ vld1.8 {d2}, [r1], r12
+ vld1.8 {d3}, [r3]!
+ vld1.8 {d4}, [r1], r12
+ vld1.8 {d5}, [r3]!
+ vld1.8 {d6}, [r1], r12
+ vld1.8 {d7}, [r3]!
+ vld1.8 {d8}, [r1], r12
+ vld1.8 {d9}, [r3]!
+ vld1.8 {d10}, [r1], r12
+ vld1.8 {d11}, [r3]!
+ vld1.8 {d12}, [r1], r12
+ vld1.8 {d13}, [r3]!
+ vld1.8 {d14}, [r1], r12
+ vld1.8 {d15}, [r3]!
+
+ vsubl.u8 q8, d0, d1
+ vsubl.u8 q9, d2, d3
+ vsubl.u8 q10, d4, d5
+ vsubl.u8 q11, d6, d7
+ vsubl.u8 q12, d8, d9
+ vsubl.u8 q13, d10, d11
+ vsubl.u8 q14, d12, d13
+ vsubl.u8 q15, d14, d15
+
+ vst1.16 {q8}, [r0]! ;store diff
+ vst1.16 {q9}, [r0]!
+ vst1.16 {q10}, [r0]!
+ vst1.16 {q11}, [r0]!
+ vst1.16 {q12}, [r0]!
+ vst1.16 {q13}, [r0]!
+ vst1.16 {q14}, [r0]!
+ vst1.16 {q15}, [r0]!
+
+;v
+ vld1.8 {d0}, [r2], r12 ;load src
+ vld1.8 {d1}, [r3]! ;load pred
+ vld1.8 {d2}, [r2], r12
+ vld1.8 {d3}, [r3]!
+ vld1.8 {d4}, [r2], r12
+ vld1.8 {d5}, [r3]!
+ vld1.8 {d6}, [r2], r12
+ vld1.8 {d7}, [r3]!
+ vld1.8 {d8}, [r2], r12
+ vld1.8 {d9}, [r3]!
+ vld1.8 {d10}, [r2], r12
+ vld1.8 {d11}, [r3]!
+ vld1.8 {d12}, [r2], r12
+ vld1.8 {d13}, [r3]!
+ vld1.8 {d14}, [r2], r12
+ vld1.8 {d15}, [r3]!
+
+ vsubl.u8 q8, d0, d1
+ vsubl.u8 q9, d2, d3
+ vsubl.u8 q10, d4, d5
+ vsubl.u8 q11, d6, d7
+ vsubl.u8 q12, d8, d9
+ vsubl.u8 q13, d10, d11
+ vsubl.u8 q14, d12, d13
+ vsubl.u8 q15, d14, d15
+
+ vst1.16 {q8}, [r0]! ;store diff
+ vst1.16 {q9}, [r0]!
+ vst1.16 {q10}, [r0]!
+ vst1.16 {q11}, [r0]!
+ vst1.16 {q12}, [r0]!
+ vst1.16 {q13}, [r0]!
+ vst1.16 {q14}, [r0]!
+ vst1.16 {q15}, [r0]!
+
+ bx lr
+ ENDP
+
+ END
--- /dev/null
- %include "asm_enc_offsets.asm"
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
++%include "vp9_asm_enc_offsets.asm"
+
+
+; void vp9_regular_quantize_b_sse2 | arg
+; (BLOCK *b, | 0
+; BLOCKD *d) | 1
+
+global sym(vp9_regular_quantize_b_sse2)
+sym(vp9_regular_quantize_b_sse2):
+ push rbp
+ mov rbp, rsp
+ SAVE_XMM 7
+ GET_GOT rbx
+
+%if ABI_IS_32BIT
+ push rdi
+ push rsi
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ push rdi
+ push rsi
+ %endif
+%endif
+
+ ALIGN_STACK 16, rax
+ %define zrun_zbin_boost 0 ; 8
+ %define abs_minus_zbin 8 ; 32
+ %define temp_qcoeff 40 ; 32
+ %define qcoeff 72 ; 32
+ %define stack_size 104
+ sub rsp, stack_size
+ ; end prolog
+
+%if ABI_IS_32BIT
+ mov rdi, arg(0) ; BLOCK *b
+ mov rsi, arg(1) ; BLOCKD *d
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ mov rdi, rcx ; BLOCK *b
+ mov rsi, rdx ; BLOCKD *d
+ %else
+ ;mov rdi, rdi ; BLOCK *b
+ ;mov rsi, rsi ; BLOCKD *d
+ %endif
+%endif
+
+ mov rdx, [rdi + vp9_block_coeff] ; coeff_ptr
+ mov rcx, [rdi + vp9_block_zbin] ; zbin_ptr
+ movd xmm7, [rdi + vp9_block_zbin_extra] ; zbin_oq_value
+
+ ; z
+ movdqa xmm0, [rdx]
+ movdqa xmm4, [rdx + 16]
+ mov rdx, [rdi + vp9_block_round] ; round_ptr
+
+ pshuflw xmm7, xmm7, 0
+ punpcklwd xmm7, xmm7 ; duplicated zbin_oq_value
+
+ movdqa xmm1, xmm0
+ movdqa xmm5, xmm4
+
+ ; sz
+ psraw xmm0, 15
+ psraw xmm4, 15
+
+ ; (z ^ sz)
+ pxor xmm1, xmm0
+ pxor xmm5, xmm4
+
+ ; x = abs(z)
+ psubw xmm1, xmm0
+ psubw xmm5, xmm4
+
+ movdqa xmm2, [rcx]
+ movdqa xmm3, [rcx + 16]
+ mov rcx, [rdi + vp9_block_quant] ; quant_ptr
+
+ ; *zbin_ptr + zbin_oq_value
+ paddw xmm2, xmm7
+ paddw xmm3, xmm7
+
+ ; x - (*zbin_ptr + zbin_oq_value)
+ psubw xmm1, xmm2
+ psubw xmm5, xmm3
+ movdqa [rsp + abs_minus_zbin], xmm1
+ movdqa [rsp + abs_minus_zbin + 16], xmm5
+
+ ; add (zbin_ptr + zbin_oq_value) back
+ paddw xmm1, xmm2
+ paddw xmm5, xmm3
+
+ movdqa xmm2, [rdx]
+ movdqa xmm6, [rdx + 16]
+
+ movdqa xmm3, [rcx]
+ movdqa xmm7, [rcx + 16]
+
+ ; x + round
+ paddw xmm1, xmm2
+ paddw xmm5, xmm6
+
+ ; y = x * quant_ptr >> 16
+ pmulhw xmm3, xmm1
+ pmulhw xmm7, xmm5
+
+ ; y += x
+ paddw xmm1, xmm3
+ paddw xmm5, xmm7
+
+ movdqa [rsp + temp_qcoeff], xmm1
+ movdqa [rsp + temp_qcoeff + 16], xmm5
+
+ pxor xmm6, xmm6
+ ; zero qcoeff
+ movdqa [rsp + qcoeff], xmm6
+ movdqa [rsp + qcoeff + 16], xmm6
+
+ mov rdx, [rdi + vp9_block_zrun_zbin_boost] ; zbin_boost_ptr
+ mov rax, [rdi + vp9_block_quant_shift] ; quant_shift_ptr
+ mov [rsp + zrun_zbin_boost], rdx
+
+%macro ZIGZAG_LOOP 1
+ ; x
+ movsx ecx, WORD PTR[rsp + abs_minus_zbin + %1 * 2]
+
+ ; if (x >= zbin)
+ sub cx, WORD PTR[rdx] ; x - zbin
+ lea rdx, [rdx + 2] ; zbin_boost_ptr++
+ jl .rq_zigzag_loop_%1 ; x < zbin
+
+ movsx edi, WORD PTR[rsp + temp_qcoeff + %1 * 2]
+
+ ; downshift by quant_shift[rc]
+ movsx cx, BYTE PTR[rax + %1] ; quant_shift_ptr[rc]
+ sar edi, cl ; also sets Z bit
+ je .rq_zigzag_loop_%1 ; !y
+ mov WORD PTR[rsp + qcoeff + %1 * 2], di ;qcoeff_ptr[rc] = temp_qcoeff[rc]
+ mov rdx, [rsp + zrun_zbin_boost] ; reset to b->zrun_zbin_boost
+.rq_zigzag_loop_%1:
+%endmacro
+; in vp9_default_zig_zag1d order: see vp9/common/entropy.c
+ZIGZAG_LOOP 0
+ZIGZAG_LOOP 1
+ZIGZAG_LOOP 4
+ZIGZAG_LOOP 8
+ZIGZAG_LOOP 5
+ZIGZAG_LOOP 2
+ZIGZAG_LOOP 3
+ZIGZAG_LOOP 6
+ZIGZAG_LOOP 9
+ZIGZAG_LOOP 12
+ZIGZAG_LOOP 13
+ZIGZAG_LOOP 10
+ZIGZAG_LOOP 7
+ZIGZAG_LOOP 11
+ZIGZAG_LOOP 14
+ZIGZAG_LOOP 15
+
+ movdqa xmm2, [rsp + qcoeff]
+ movdqa xmm3, [rsp + qcoeff + 16]
+
+ mov rcx, [rsi + vp9_blockd_dequant] ; dequant_ptr
+ mov rdi, [rsi + vp9_blockd_dqcoeff] ; dqcoeff_ptr
+
+ ; y ^ sz
+ pxor xmm2, xmm0
+ pxor xmm3, xmm4
+ ; x = (y ^ sz) - sz
+ psubw xmm2, xmm0
+ psubw xmm3, xmm4
+
+ ; dequant
+ movdqa xmm0, [rcx]
+ movdqa xmm1, [rcx + 16]
+
+ mov rcx, [rsi + vp9_blockd_qcoeff] ; qcoeff_ptr
+
+ pmullw xmm0, xmm2
+ pmullw xmm1, xmm3
+
+ movdqa [rcx], xmm2 ; store qcoeff
+ movdqa [rcx + 16], xmm3
+ movdqa [rdi], xmm0 ; store dqcoeff
+ movdqa [rdi + 16], xmm1
+
+ ; select the last value (in zig_zag order) for EOB
+ pcmpeqw xmm2, xmm6
+ pcmpeqw xmm3, xmm6
+ ; !
+ pcmpeqw xmm6, xmm6
+ pxor xmm2, xmm6
+ pxor xmm3, xmm6
+ ; mask inv_zig_zag
+ pand xmm2, [GLOBAL(inv_zig_zag)]
+ pand xmm3, [GLOBAL(inv_zig_zag + 16)]
+ ; select the max value
+ pmaxsw xmm2, xmm3
+ pshufd xmm3, xmm2, 00001110b
+ pmaxsw xmm2, xmm3
+ pshuflw xmm3, xmm2, 00001110b
+ pmaxsw xmm2, xmm3
+ pshuflw xmm3, xmm2, 00000001b
+ pmaxsw xmm2, xmm3
+ movd eax, xmm2
+ and eax, 0xff
+ mov [rsi + vp9_blockd_eob], eax
+
+ ; begin epilog
+ add rsp, stack_size
+ pop rsp
+%if ABI_IS_32BIT
+ pop rsi
+ pop rdi
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ pop rsi
+ pop rdi
+ %endif
+%endif
+ RESTORE_GOT
+ RESTORE_XMM
+ pop rbp
+ ret
+
+; void vp9_fast_quantize_b_sse2 | arg
+; (BLOCK *b, | 0
+; BLOCKD *d) | 1
+
+global sym(vp9_fast_quantize_b_sse2)
+sym(vp9_fast_quantize_b_sse2):
+ push rbp
+ mov rbp, rsp
+ GET_GOT rbx
+
+%if ABI_IS_32BIT
+ push rdi
+ push rsi
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ push rdi
+ push rsi
+ %else
+ ; these registers are used for passing arguments
+ %endif
+%endif
+
+ ; end prolog
+
+%if ABI_IS_32BIT
+ mov rdi, arg(0) ; BLOCK *b
+ mov rsi, arg(1) ; BLOCKD *d
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ mov rdi, rcx ; BLOCK *b
+ mov rsi, rdx ; BLOCKD *d
+ %else
+ ;mov rdi, rdi ; BLOCK *b
+ ;mov rsi, rsi ; BLOCKD *d
+ %endif
+%endif
+
+ mov rax, [rdi + vp9_block_coeff]
+ mov rcx, [rdi + vp9_block_round]
+ mov rdx, [rdi + vp9_block_quant_fast]
+
+ ; z = coeff
+ movdqa xmm0, [rax]
+ movdqa xmm4, [rax + 16]
+
+ ; dup z so we can save sz
+ movdqa xmm1, xmm0
+ movdqa xmm5, xmm4
+
+ ; sz = z >> 15
+ psraw xmm0, 15
+ psraw xmm4, 15
+
+ ; x = abs(z) = (z ^ sz) - sz
+ pxor xmm1, xmm0
+ pxor xmm5, xmm4
+ psubw xmm1, xmm0
+ psubw xmm5, xmm4
+
+ ; x += round
+ paddw xmm1, [rcx]
+ paddw xmm5, [rcx + 16]
+
+ mov rax, [rsi + vp9_blockd_qcoeff]
+ mov rcx, [rsi + vp9_blockd_dequant]
+ mov rdi, [rsi + vp9_blockd_dqcoeff]
+
+ ; y = x * quant >> 16
+ pmulhw xmm1, [rdx]
+ pmulhw xmm5, [rdx + 16]
+
+ ; x = (y ^ sz) - sz
+ pxor xmm1, xmm0
+ pxor xmm5, xmm4
+ psubw xmm1, xmm0
+ psubw xmm5, xmm4
+
+ ; qcoeff = x
+ movdqa [rax], xmm1
+ movdqa [rax + 16], xmm5
+
+ ; x * dequant
+ movdqa xmm2, xmm1
+ movdqa xmm3, xmm5
+ pmullw xmm2, [rcx]
+ pmullw xmm3, [rcx + 16]
+
+ ; dqcoeff = x * dequant
+ movdqa [rdi], xmm2
+ movdqa [rdi + 16], xmm3
+
+ pxor xmm4, xmm4 ;clear all bits
+ pcmpeqw xmm1, xmm4
+ pcmpeqw xmm5, xmm4
+
+ pcmpeqw xmm4, xmm4 ;set all bits
+ pxor xmm1, xmm4
+ pxor xmm5, xmm4
+
+ pand xmm1, [GLOBAL(inv_zig_zag)]
+ pand xmm5, [GLOBAL(inv_zig_zag + 16)]
+
+ pmaxsw xmm1, xmm5
+
+ ; now down to 8
+ pshufd xmm5, xmm1, 00001110b
+
+ pmaxsw xmm1, xmm5
+
+ ; only 4 left
+ pshuflw xmm5, xmm1, 00001110b
+
+ pmaxsw xmm1, xmm5
+
+ ; okay, just 2!
+ pshuflw xmm5, xmm1, 00000001b
+
+ pmaxsw xmm1, xmm5
+
+ movd eax, xmm1
+ and eax, 0xff
+ mov [rsi + vp9_blockd_eob], eax
+
+ ; begin epilog
+%if ABI_IS_32BIT
+ pop rsi
+ pop rdi
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ pop rsi
+ pop rdi
+ %endif
+%endif
+
+ RESTORE_GOT
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+inv_zig_zag:
+ dw 0x0001, 0x0002, 0x0006, 0x0007
+ dw 0x0003, 0x0005, 0x0008, 0x000d
+ dw 0x0004, 0x0009, 0x000c, 0x000e
+ dw 0x000a, 0x000b, 0x000f, 0x0010
--- /dev/null
- %include "asm_enc_offsets.asm"
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
++%include "vp9_asm_enc_offsets.asm"
+
+
+; void vp9_regular_quantize_b_sse4 | arg
+; (BLOCK *b, | 0
+; BLOCKD *d) | 1
+
+global sym(vp9_regular_quantize_b_sse4)
+sym(vp9_regular_quantize_b_sse4):
+
+%if ABI_IS_32BIT
+ push rbp
+ mov rbp, rsp
+ GET_GOT rbx
+ push rdi
+ push rsi
+
+ ALIGN_STACK 16, rax
+ %define qcoeff 0 ; 32
+ %define stack_size 32
+ sub rsp, stack_size
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ SAVE_XMM 8, u
+ push rdi
+ push rsi
+ %endif
+%endif
+ ; end prolog
+
+%if ABI_IS_32BIT
+ mov rdi, arg(0) ; BLOCK *b
+ mov rsi, arg(1) ; BLOCKD *d
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ mov rdi, rcx ; BLOCK *b
+ mov rsi, rdx ; BLOCKD *d
+ %else
+ ;mov rdi, rdi ; BLOCK *b
+ ;mov rsi, rsi ; BLOCKD *d
+ %endif
+%endif
+
+ mov rax, [rdi + vp9_block_coeff]
+ mov rcx, [rdi + vp9_block_zbin]
+ mov rdx, [rdi + vp9_block_round]
+ movd xmm7, [rdi + vp9_block_zbin_extra]
+
+ ; z
+ movdqa xmm0, [rax]
+ movdqa xmm1, [rax + 16]
+
+ ; duplicate zbin_oq_value
+ pshuflw xmm7, xmm7, 0
+ punpcklwd xmm7, xmm7
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+
+ ; sz
+ psraw xmm0, 15
+ psraw xmm1, 15
+
+ ; (z ^ sz)
+ pxor xmm2, xmm0
+ pxor xmm3, xmm1
+
+ ; x = abs(z)
+ psubw xmm2, xmm0
+ psubw xmm3, xmm1
+
+ ; zbin
+ movdqa xmm4, [rcx]
+ movdqa xmm5, [rcx + 16]
+
+ ; *zbin_ptr + zbin_oq_value
+ paddw xmm4, xmm7
+ paddw xmm5, xmm7
+
+ movdqa xmm6, xmm2
+ movdqa xmm7, xmm3
+
+ ; x - (*zbin_ptr + zbin_oq_value)
+ psubw xmm6, xmm4
+ psubw xmm7, xmm5
+
+ ; round
+ movdqa xmm4, [rdx]
+ movdqa xmm5, [rdx + 16]
+
+ mov rax, [rdi + vp9_block_quant_shift]
+ mov rcx, [rdi + vp9_block_quant]
+ mov rdx, [rdi + vp9_block_zrun_zbin_boost]
+
+ ; x + round
+ paddw xmm2, xmm4
+ paddw xmm3, xmm5
+
+ ; quant
+ movdqa xmm4, [rcx]
+ movdqa xmm5, [rcx + 16]
+
+ ; y = x * quant_ptr >> 16
+ pmulhw xmm4, xmm2
+ pmulhw xmm5, xmm3
+
+ ; y += x
+ paddw xmm2, xmm4
+ paddw xmm3, xmm5
+
+ pxor xmm4, xmm4
+%if ABI_IS_32BIT
+ movdqa [rsp + qcoeff], xmm4
+ movdqa [rsp + qcoeff + 16], xmm4
+%else
+ pxor xmm8, xmm8
+%endif
+
+ ; quant_shift
+ movdqa xmm5, [rax]
+
+ ; zrun_zbin_boost
+ mov rax, rdx
+
+%macro ZIGZAG_LOOP 5
+ ; x
+ pextrw ecx, %4, %2
+
+ ; if (x >= zbin)
+ sub cx, WORD PTR[rdx] ; x - zbin
+ lea rdx, [rdx + 2] ; zbin_boost_ptr++
+ jl .rq_zigzag_loop_%1 ; x < zbin
+
+ pextrw edi, %3, %2 ; y
+
+ ; downshift by quant_shift[rc]
+ pextrb ecx, xmm5, %1 ; quant_shift[rc]
+ sar edi, cl ; also sets Z bit
+ je .rq_zigzag_loop_%1 ; !y
+%if ABI_IS_32BIT
+ mov WORD PTR[rsp + qcoeff + %1 *2], di
+%else
+ pinsrw %5, edi, %2 ; qcoeff[rc]
+%endif
+ mov rdx, rax ; reset to b->zrun_zbin_boost
+.rq_zigzag_loop_%1:
+%endmacro
+; in vp9_default_zig_zag1d order: see vp9/common/entropy.c
+ZIGZAG_LOOP 0, 0, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 1, 1, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 4, 4, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 8, 0, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 5, 5, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 2, 2, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 3, 3, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 6, 6, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 9, 1, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 12, 4, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 13, 5, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 10, 2, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 7, 7, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 11, 3, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 14, 6, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8
+
+ mov rcx, [rsi + vp9_blockd_dequant]
+ mov rdi, [rsi + vp9_blockd_dqcoeff]
+
+%if ABI_IS_32BIT
+ movdqa xmm4, [rsp + qcoeff]
+ movdqa xmm5, [rsp + qcoeff + 16]
+%else
+ %define xmm5 xmm8
+%endif
+
+ ; y ^ sz
+ pxor xmm4, xmm0
+ pxor xmm5, xmm1
+ ; x = (y ^ sz) - sz
+ psubw xmm4, xmm0
+ psubw xmm5, xmm1
+
+ ; dequant
+ movdqa xmm0, [rcx]
+ movdqa xmm1, [rcx + 16]
+
+ mov rcx, [rsi + vp9_blockd_qcoeff]
+
+ pmullw xmm0, xmm4
+ pmullw xmm1, xmm5
+
+ ; store qcoeff
+ movdqa [rcx], xmm4
+ movdqa [rcx + 16], xmm5
+
+ ; store dqcoeff
+ movdqa [rdi], xmm0
+ movdqa [rdi + 16], xmm1
+
+ ; select the last value (in zig_zag order) for EOB
+ pxor xmm6, xmm6
+ pcmpeqw xmm4, xmm6
+ pcmpeqw xmm5, xmm6
+
+ packsswb xmm4, xmm5
+ pshufb xmm4, [GLOBAL(zig_zag1d)]
+ pmovmskb edx, xmm4
+ xor rdi, rdi
+ mov eax, -1
+ xor dx, ax
+ bsr eax, edx
+ sub edi, edx
+ sar edi, 31
+ add eax, 1
+ and eax, edi
+
+ mov [rsi + vp9_blockd_eob], eax
+
+ ; begin epilog
+%if ABI_IS_32BIT
+ add rsp, stack_size
+ pop rsp
+
+ pop rsi
+ pop rdi
+ RESTORE_GOT
+ pop rbp
+%else
+ %undef xmm5
+ %ifidn __OUTPUT_FORMAT__,x64
+ pop rsi
+ pop rdi
+ RESTORE_XMM
+ %endif
+%endif
+
+ ret
+
+SECTION_RODATA
+align 16
+; vp9/common/entropy.c: vp9_default_zig_zag1d
+zig_zag1d:
+ db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
--- /dev/null
- %include "asm_enc_offsets.asm"
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
++%include "vp9_asm_enc_offsets.asm"
+
+
+; void vp9_fast_quantize_b_ssse3 | arg
+; (BLOCK *b, | 0
+; BLOCKD *d) | 1
+;
+
+global sym(vp9_fast_quantize_b_ssse3)
+sym(vp9_fast_quantize_b_ssse3):
+ push rbp
+ mov rbp, rsp
+ GET_GOT rbx
+
+%if ABI_IS_32BIT
+ push rdi
+ push rsi
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ push rdi
+ push rsi
+ %endif
+%endif
+ ; end prolog
+
+%if ABI_IS_32BIT
+ mov rdi, arg(0) ; BLOCK *b
+ mov rsi, arg(1) ; BLOCKD *d
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ mov rdi, rcx ; BLOCK *b
+ mov rsi, rdx ; BLOCKD *d
+ %else
+ ;mov rdi, rdi ; BLOCK *b
+ ;mov rsi, rsi ; BLOCKD *d
+ %endif
+%endif
+
+ mov rax, [rdi + vp9_block_coeff]
+ mov rcx, [rdi + vp9_block_round]
+ mov rdx, [rdi + vp9_block_quant_fast]
+
+ ; coeff
+ movdqa xmm0, [rax]
+ movdqa xmm4, [rax + 16]
+
+ ; round
+ movdqa xmm2, [rcx]
+ movdqa xmm3, [rcx + 16]
+
+ movdqa xmm1, xmm0
+ movdqa xmm5, xmm4
+
+ ; sz = z >> 15
+ psraw xmm0, 15
+ psraw xmm4, 15
+
+ pabsw xmm1, xmm1
+ pabsw xmm5, xmm5
+
+ paddw xmm1, xmm2
+ paddw xmm5, xmm3
+
+ ; quant_fast
+ pmulhw xmm1, [rdx]
+ pmulhw xmm5, [rdx + 16]
+
+ mov rax, [rsi + vp9_blockd_qcoeff]
+ mov rdi, [rsi + vp9_blockd_dequant]
+ mov rcx, [rsi + vp9_blockd_dqcoeff]
+
+ pxor xmm1, xmm0
+ pxor xmm5, xmm4
+ psubw xmm1, xmm0
+ psubw xmm5, xmm4
+
+ movdqa [rax], xmm1
+ movdqa [rax + 16], xmm5
+
+ movdqa xmm2, [rdi]
+ movdqa xmm3, [rdi + 16]
+
+ pxor xmm4, xmm4
+ pmullw xmm2, xmm1
+ pmullw xmm3, xmm5
+
+ pcmpeqw xmm1, xmm4 ;non zero mask
+ pcmpeqw xmm5, xmm4 ;non zero mask
+ packsswb xmm1, xmm5
+ pshufb xmm1, [GLOBAL(zz_shuf)]
+
+ pmovmskb edx, xmm1
+
+ xor rdi, rdi
+ mov eax, -1
+ xor dx, ax ;flip the bits for bsr
+ bsr eax, edx
+
+ movdqa [rcx], xmm2 ;store dqcoeff
+ movdqa [rcx + 16], xmm3 ;store dqcoeff
+
+ sub edi, edx ;check for all zeros in bit mask
+ sar edi, 31 ;0 or -1
+ add eax, 1
+ and eax, edi ;if the bit mask was all zero,
+ ;then eob = 0
+ mov [rsi + vp9_blockd_eob], eax
+
+ ; begin epilog
+%if ABI_IS_32BIT
+ pop rsi
+ pop rdi
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ pop rsi
+ pop rdi
+ %endif
+%endif
+
+ RESTORE_GOT
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+zz_shuf:
+ db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
--- /dev/null
+##
+## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+
+VP9_COMMON_SRCS-yes += vp9_common.mk
+VP9_COMMON_SRCS-yes += common/type_aliases.h
+VP9_COMMON_SRCS-yes += common/pragmas.h
+VP9_COMMON_SRCS-yes += common/ppflags.h
+VP9_COMMON_SRCS-yes += common/onyx.h
+VP9_COMMON_SRCS-yes += common/onyxd.h
+VP9_COMMON_SRCS-yes += common/alloccommon.c
+VP9_COMMON_SRCS-yes += common/asm_com_offsets.c
+VP9_COMMON_SRCS-yes += common/blockd.c
+VP9_COMMON_SRCS-yes += common/coefupdateprobs.h
+VP9_COMMON_SRCS-yes += common/debugmodes.c
+VP9_COMMON_SRCS-yes += common/entropy.c
+VP9_COMMON_SRCS-yes += common/entropymode.c
+VP9_COMMON_SRCS-yes += common/entropymv.c
+VP9_COMMON_SRCS-yes += common/extend.c
+VP9_COMMON_SRCS-yes += common/filter.c
+VP9_COMMON_SRCS-yes += common/filter.h
+VP9_COMMON_SRCS-yes += common/findnearmv.c
+VP9_COMMON_SRCS-yes += common/generic/systemdependent.c
+VP9_COMMON_SRCS-yes += common/idctllm.c
+VP9_COMMON_SRCS-yes += common/alloccommon.h
+VP9_COMMON_SRCS-yes += common/blockd.h
+VP9_COMMON_SRCS-yes += common/common.h
+VP9_COMMON_SRCS-yes += common/common_types.h
+VP9_COMMON_SRCS-yes += common/entropy.h
+VP9_COMMON_SRCS-yes += common/entropymode.h
+VP9_COMMON_SRCS-yes += common/entropymv.h
+VP9_COMMON_SRCS-yes += common/extend.h
+VP9_COMMON_SRCS-yes += common/findnearmv.h
+VP9_COMMON_SRCS-yes += common/header.h
+VP9_COMMON_SRCS-yes += common/idct.h
+VP9_COMMON_SRCS-yes += common/invtrans.h
+VP9_COMMON_SRCS-yes += common/loopfilter.h
+VP9_COMMON_SRCS-yes += common/modecont.h
+VP9_COMMON_SRCS-yes += common/mv.h
+VP9_COMMON_SRCS-yes += common/onyxc_int.h
+VP9_COMMON_SRCS-yes += common/pred_common.h
+VP9_COMMON_SRCS-yes += common/pred_common.c
+VP9_COMMON_SRCS-yes += common/quant_common.h
+VP9_COMMON_SRCS-yes += common/reconinter.h
+VP9_COMMON_SRCS-yes += common/reconintra.h
+VP9_COMMON_SRCS-yes += common/reconintra4x4.h
+VP9_COMMON_SRCS-yes += common/rtcd.c
+VP9_COMMON_SRCS-yes += common/rtcd_defs.sh
+VP9_COMMON_SRCS-yes += common/sadmxn.h
+VP9_COMMON_SRCS-yes += common/seg_common.h
+VP9_COMMON_SRCS-yes += common/seg_common.c
+VP9_COMMON_SRCS-yes += common/setupintrarecon.h
+VP9_COMMON_SRCS-yes += common/subpixel.h
+VP9_COMMON_SRCS-yes += common/swapyv12buffer.h
+VP9_COMMON_SRCS-yes += common/systemdependent.h
+VP9_COMMON_SRCS-yes += common/treecoder.h
+VP9_COMMON_SRCS-yes += common/invtrans.c
+VP9_COMMON_SRCS-yes += common/loopfilter.c
+VP9_COMMON_SRCS-yes += common/loopfilter_filters.c
+VP9_COMMON_SRCS-yes += common/mbpitch.c
+VP9_COMMON_SRCS-yes += common/modecont.c
+VP9_COMMON_SRCS-yes += common/modecontext.c
+VP9_COMMON_SRCS-yes += common/mvref_common.c
+VP9_COMMON_SRCS-yes += common/mvref_common.h
+VP9_COMMON_SRCS-yes += common/quant_common.c
+VP9_COMMON_SRCS-yes += common/recon.c
+VP9_COMMON_SRCS-yes += common/reconinter.c
+VP9_COMMON_SRCS-yes += common/reconintra.c
+VP9_COMMON_SRCS-yes += common/reconintra4x4.c
+VP9_COMMON_SRCS-yes += common/setupintrarecon.c
+VP9_COMMON_SRCS-yes += common/swapyv12buffer.c
+VP9_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/textblit.c
+VP9_COMMON_SRCS-yes += common/treecoder.c
+VP9_COMMON_SRCS-$(CONFIG_IMPLICIT_SEGMENTATION) += common/implicit_segmentation.c
+
+VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/idct_x86.h
+VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/subpixel_x86.h
+VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/loopfilter_x86.h
+VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/postproc_x86.h
+VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/x86_systemdependent.c
+VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp8_asm_stubs.c
+VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/loopfilter_x86.c
+VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.h
+VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c
+VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx.asm
+VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm
+VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm
+VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/subpixel_mmx.asm
+VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_wrapper_sse2.c
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm
+VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/subpixel_8t_ssse3.asm
+VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/subpixel_ssse3.asm
+ifeq ($(CONFIG_POSTPROC),yes)
+VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
+endif
+
+# common (c)
+ifeq ($(CONFIG_CSM),yes)
+VP9_COMMON_SRCS-yes += common/maskingmv.c
+VP9_COMMON_SRCS-$(HAVE_SSE3) += common/x86/mask_sse3.asm
+endif
+
+VP9_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/filter_sse4.c
+ifeq ($(HAVE_SSE4_1),yes)
+vp9/common/x86/filter_sse4.c.o: CFLAGS += -msse4
+endif
+
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/filter_sse2.c
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/sadmxn_x86.c
+ifeq ($(HAVE_SSE2),yes)
+vp9/common/x86/filter_sse2.c.o: CFLAGS += -msse2
+vp9/common/x86/loopfilter_x86.c.o: CFLAGS += -msse2
+vp9/common/x86/sadmxn_x86.c.o: CFLAGS += -msse2
+endif
+
+VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/arm_systemdependent.c
+VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/bilinearfilter_arm.c
+VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/bilinearfilter_arm.h
+VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/filter_arm.c
+VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/idct_arm.h
+VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/loopfilter_arm.c
+VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/loopfilter_arm.h
+VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/recon_arm.h
+VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/reconintra_arm.c
+VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/subpixel_arm.h
+
+# common (armv6)
+VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/bilinearfilter_v6$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/copymem8x4_v6$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/copymem8x8_v6$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/copymem16x16_v6$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/dc_only_idct_add_v6$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/iwalsh_v6$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/filter_v6$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/idct_v6$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/loopfilter_v6$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/recon_v6$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/simpleloopfilter_v6$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/sixtappredict8x4_v6$(ASM)
+
+# common (neon)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/bilinearpredict4x4_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/bilinearpredict8x4_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/bilinearpredict8x8_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/bilinearpredict16x16_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/copymem8x4_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/copymem8x8_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/copymem16x16_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/dc_only_idct_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/iwalsh_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfilter_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfiltersimplehorizontaledge_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/mbloopfilter_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon2b_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon4b_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/reconb_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/shortidct4x4llm_1_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/shortidct4x4llm_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/sixtappredict4x4_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/sixtappredict8x4_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/sixtappredict8x8_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/sixtappredict16x16_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon16x16mb_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/save_neon_reg$(ASM)
+VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon_neon.c
++
++
++$(eval $(call asm_offsets_template,\
++ vp9_asm_com_offsets.asm, $(VP9_PREFIX)common/asm_com_offsets.c))
--- /dev/null
-
- include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9_common.mk
-
+##
+## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+
+VP9_CX_EXPORTS += exports_enc
+
+VP9_CX_SRCS-yes += $(VP9_COMMON_SRCS-yes)
+VP9_CX_SRCS-no += $(VP9_COMMON_SRCS-no)
+VP9_CX_SRCS_REMOVE-yes += $(VP9_COMMON_SRCS_REMOVE-yes)
+VP9_CX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no)
+
+ifeq ($(ARCH_ARM),yes)
+ include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9cx_arm.mk
+endif
+
+VP9_CX_SRCS-yes += vp9_cx_iface.c
+
+# encoder
+#INCLUDES += algo/vpx_common/vpx_mem/include
+#INCLUDES += common
+#INCLUDES += common
+#INCLUDES += common
+#INCLUDES += algo/vpx_ref/cpu_id/include
+#INCLUDES += common
+#INCLUDES += encoder
+
+VP9_CX_SRCS-yes += encoder/asm_enc_offsets.c
+VP9_CX_SRCS-yes += encoder/bitstream.c
+VP9_CX_SRCS-yes += encoder/boolhuff.c
+VP9_CX_SRCS-yes += encoder/dct.c
+VP9_CX_SRCS-yes += encoder/encodeframe.c
+VP9_CX_SRCS-yes += encoder/encodeintra.c
+VP9_CX_SRCS-yes += encoder/encodemb.c
+VP9_CX_SRCS-yes += encoder/encodemv.c
+VP9_CX_SRCS-yes += encoder/firstpass.c
+VP9_CX_SRCS-yes += encoder/generic/csystemdependent.c
+VP9_CX_SRCS-yes += encoder/block.h
+VP9_CX_SRCS-yes += encoder/boolhuff.h
+VP9_CX_SRCS-yes += encoder/bitstream.h
+VP9_CX_SRCS-yes += encoder/encodeintra.h
+VP9_CX_SRCS-yes += encoder/encodemb.h
+VP9_CX_SRCS-yes += encoder/encodemv.h
+VP9_CX_SRCS-yes += encoder/firstpass.h
+VP9_CX_SRCS-yes += encoder/lookahead.c
+VP9_CX_SRCS-yes += encoder/lookahead.h
+VP9_CX_SRCS-yes += encoder/mcomp.h
+VP9_CX_SRCS-yes += encoder/modecosts.h
+VP9_CX_SRCS-yes += encoder/onyx_int.h
+VP9_CX_SRCS-yes += encoder/psnr.h
+VP9_CX_SRCS-yes += encoder/quantize.h
+VP9_CX_SRCS-yes += encoder/ratectrl.h
+VP9_CX_SRCS-yes += encoder/rdopt.h
+VP9_CX_SRCS-yes += encoder/tokenize.h
+VP9_CX_SRCS-yes += encoder/treewriter.h
+VP9_CX_SRCS-yes += encoder/variance.h
+VP9_CX_SRCS-yes += encoder/mcomp.c
+VP9_CX_SRCS-yes += encoder/modecosts.c
+VP9_CX_SRCS-yes += encoder/onyx_if.c
+VP9_CX_SRCS-yes += encoder/picklpf.c
+VP9_CX_SRCS-yes += encoder/psnr.c
+VP9_CX_SRCS-yes += encoder/quantize.c
+VP9_CX_SRCS-yes += encoder/ratectrl.c
+VP9_CX_SRCS-yes += encoder/rdopt.c
+VP9_CX_SRCS-yes += encoder/sad_c.c
+VP9_CX_SRCS-yes += encoder/satd_c.c
+VP9_CX_SRCS-yes += encoder/segmentation.c
+VP9_CX_SRCS-yes += encoder/segmentation.h
+VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/ssim.c
+VP9_CX_SRCS-yes += encoder/tokenize.c
+VP9_CX_SRCS-yes += encoder/treewriter.c
+VP9_CX_SRCS-yes += encoder/variance_c.c
+ifeq ($(CONFIG_POSTPROC),yes)
+VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.h
+VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.c
+endif
+VP9_CX_SRCS-yes += encoder/temporal_filter.c
+VP9_CX_SRCS-yes += encoder/temporal_filter.h
+VP9_CX_SRCS-yes += encoder/mbgraph.c
+VP9_CX_SRCS-yes += encoder/mbgraph.h
+
+
+VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/mcomp_x86.h
+VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_x86.h
+VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/temporal_filter_x86.h
+VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/x86_csystemdependent.c
+VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_mmx.c
+VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_impl_mmx.asm
+VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/sad_mmx.asm
+VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/dct_mmx.asm
+VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/subtract_mmx.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/variance_sse2.c
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/variance_impl_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/sad_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/sad_sse3.asm
+VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/sad_ssse3.asm
+VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/variance_ssse3.c
+VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/variance_impl_ssse3.asm
+VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.asm
+VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/sad_sse4.asm
+VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/quantize_sse4.asm
+VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
+VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm
+VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/ssim_opt.asm
+
+
+VP9_CX_SRCS-yes := $(filter-out $(VP9_CX_SRCS_REMOVE-yes),$(VP9_CX_SRCS-yes))
++
++$(eval $(call asm_offsets_template,\
++ vp9_asm_enc_offsets.asm, $(VP9_PREFIX)encoder/asm_enc_offsets.c))
--- /dev/null
-
- include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9_common.mk
-
+##
+## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+
+VP9_DX_EXPORTS += exports_dec
+
+VP9_DX_SRCS-yes += $(VP9_COMMON_SRCS-yes)
+VP9_DX_SRCS-no += $(VP9_COMMON_SRCS-no)
+VP9_DX_SRCS_REMOVE-yes += $(VP9_COMMON_SRCS_REMOVE-yes)
+VP9_DX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no)
+
+ifeq ($(ARCH_ARM),yes)
+ include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9dx_arm.mk
+endif
+
+VP9_DX_SRCS-yes += vp9_dx_iface.c
+
+# common
+#define ARM
+#define DISABLE_THREAD
+
+#INCLUDES += algo/vpx_common/vpx_mem/include
+#INCLUDES += common
+#INCLUDES += common
+#INCLUDES += common
+#INCLUDES += common
+#INCLUDES += decoder
+
+
+
+# decoder
+#define ARM
+#define DISABLE_THREAD
+
+#INCLUDES += algo/vpx_common/vpx_mem/include
+#INCLUDES += common
+#INCLUDES += common
+#INCLUDES += common
+#INCLUDES += common
+#INCLUDES += decoder
+
+VP9_DX_SRCS-yes += decoder/asm_dec_offsets.c
+VP9_DX_SRCS-yes += decoder/dboolhuff.c
+VP9_DX_SRCS-yes += decoder/decodemv.c
+VP9_DX_SRCS-yes += decoder/decodframe.c
+VP9_DX_SRCS-yes += decoder/dequantize.c
+VP9_DX_SRCS-yes += decoder/detokenize.c
+VP9_DX_SRCS-yes += decoder/dboolhuff.h
+VP9_DX_SRCS-yes += decoder/decodemv.h
+VP9_DX_SRCS-yes += decoder/dequantize.h
+VP9_DX_SRCS-yes += decoder/detokenize.h
+VP9_DX_SRCS-yes += decoder/onyxd_int.h
+VP9_DX_SRCS-yes += decoder/treereader.h
+VP9_DX_SRCS-yes += decoder/onyxd_if.c
+VP9_DX_SRCS-yes += decoder/idct_blk.c
+
+VP9_DX_SRCS-yes := $(filter-out $(VP9_DX_SRCS_REMOVE-yes),$(VP9_DX_SRCS-yes))
+
+VP9_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/x86_dsystemdependent.c
+VP9_DX_SRCS-$(HAVE_MMX) += decoder/x86/dequantize_mmx.asm
+VP9_DX_SRCS-$(HAVE_MMX) += decoder/x86/idct_blk_mmx.c
+VP9_DX_SRCS-$(HAVE_SSE2) += decoder/x86/idct_blk_sse2.c
++
++$(eval $(call asm_offsets_template,\
++ vp9_asm_dec_offsets.asm, $(VP9_PREFIX)decoder/asm_dec_offsets.c))
/* This function should actually be a no-op. There is no way to adjust any of
* these because the RTCD tables do not exist: the functions are called
* statically */
- int flags;
- int mask;
- if (!arm_cpu_env_flags(&flags))
- {
- return flags;
- }
- mask = arm_cpu_env_mask();
+ int flags;
+ int mask;
+ if (!arm_cpu_env_flags(&flags)) {
+ return flags;
+ }
+ mask = arm_cpu_env_mask();
#if HAVE_EDSP
- flags |= HAS_EDSP;
+ flags |= HAS_EDSP;
#endif /* HAVE_EDSP */
#if HAVE_MEDIA
- flags |= HAS_MEDIA;
+ flags |= HAS_MEDIA;
#endif /* HAVE_MEDIA */
#if HAVE_NEON
- flags |= HAS_NEON;
+ flags |= HAS_NEON;
#endif /* HAVE_NEON */
- return flags & mask;
+ return flags & mask;
}
++#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */
++
#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */
/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
#define WIN32_LEAN_AND_MEAN
#elif defined(__ANDROID__) /* end _MSC_VER */
#include <cpu-features.h>
-int arm_cpu_caps(void)
-{
- int flags;
- int mask;
- uint64_t features;
- if (!arm_cpu_env_flags(&flags))
- {
- return flags;
- }
- mask = arm_cpu_env_mask();
- features = android_getCpuFeatures();
+int arm_cpu_caps(void) {
+ int flags;
+ int mask;
+ uint64_t features;
+ if (!arm_cpu_env_flags(&flags)) {
+ return flags;
+ }
+ mask = arm_cpu_env_mask();
+ features = android_getCpuFeatures();
#if HAVE_EDSP
- flags |= HAS_EDSP;
+ flags |= HAS_EDSP;
#endif /* HAVE_EDSP */
#if HAVE_MEDIA
- flags |= HAS_MEDIA;
+ flags |= HAS_MEDIA;
#endif /* HAVE_MEDIA */
#if HAVE_NEON
- if (features & ANDROID_CPU_ARM_FEATURE_NEON)
- flags |= HAS_NEON;
+ if (features & ANDROID_CPU_ARM_FEATURE_NEON)
+ flags |= HAS_NEON;
#endif /* HAVE_NEON */
- return flags & mask;
+ return flags & mask;
}
++#elif defined(__linux__) /* end __ANDROID__ */
++
#elif defined(__linux__) /* end __ANDROID__ */
#include <stdio.h>
#if defined(__GNUC__) && __GNUC__
static void
-x87_set_control_word(unsigned short mode)
-{
- __asm__ __volatile__("fldcw %0" : : "m"(*&mode));
+x87_set_control_word(unsigned short mode) {
+ __asm__ __volatile__("fldcw %0" : : "m"( *&mode));
}
static unsigned short
-x87_get_control_word(void)
-{
- unsigned short mode;
- __asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):);
+x87_get_control_word(void) {
+ unsigned short mode;
+ __asm__ __volatile__("fstcw %0\n\t":"=m"( *&mode):);
+ return mode;
+ }
+ #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
+ static void
+ x87_set_control_word(unsigned short mode)
+ {
+ asm volatile("fldcw %0" : : "m"(*&mode));
+ }
+ static unsigned short
+ x87_get_control_word(void)
+ {
+ unsigned short mode;
+ asm volatile("fstcw %0\n\t":"=m"(*&mode):);
- return mode;
+ return mode;
+}
+#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
+static void
+x87_set_control_word(unsigned short mode) {
+ asm volatile("fldcw %0" : : "m"( *&mode));
+}
+static unsigned short
+x87_get_control_word(void) {
+ unsigned short mode;
+ asm volatile("fstcw %0\n\t":"=m"( *&mode):);
+ return mode;
}
#elif ARCH_X86_64
/* No fldcw intrinsics on Windows x64, punt to external asm */
#include "vpx_config.h"
#include "vpx/vpx_decoder.h"
#include "vpx_ports/vpx_timer.h"
- #if CONFIG_VP9_DECODER
-#if CONFIG_VP8_DECODER
++#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
#include "vpx/vp8dx.h"
#endif
#if CONFIG_MD5
static const char *exec_name;
#define VP8_FOURCC (0x00385056)
-static const struct
-{
- char const *name;
- vpx_codec_iface_t *iface;
- unsigned int fourcc;
- unsigned int fourcc_mask;
-} ifaces[] =
-{
+static const struct {
+ char const *name;
+ const vpx_codec_iface_t *(*iface)(void);
+ unsigned int fourcc;
+ unsigned int fourcc_mask;
+} ifaces[] = {
+ #if CONFIG_VP8_DECODER
- {"vp8", &vpx_codec_vp8_dx_algo, VP8_FOURCC, 0x00FFFFFF},
++ {"vp8", vpx_codec_vp8_dx, VP8_FOURCC, 0x00FFFFFF},
++#endif
+#if CONFIG_VP9_DECODER
+ {"vp9", vpx_codec_vp8_dx, VP8_FOURCC, 0x00FFFFFF},
#endif
};
static const arg_def_t md5arg = ARG_DEF(NULL, "md5", 0,
"Compute the MD5 sum of the decoded frame");
#endif
-static const arg_def_t *all_args[] =
-{
- &codecarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg,
- &progressarg, &limitarg, &postprocarg, &summaryarg, &outputfile,
- &threadsarg, &verbosearg,
+static const arg_def_t *all_args[] = {
+ &codecarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg,
+ &progressarg, &limitarg, &skiparg, &postprocarg, &summaryarg, &outputfile,
+ &threadsarg, &verbosearg,
#if CONFIG_MD5
- &md5arg,
+ &md5arg,
#endif
- &error_concealment,
- NULL
+ &error_concealment,
+ NULL
};
- #if CONFIG_VP9_DECODER
-#if CONFIG_VP8_DECODER
++#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
static const arg_def_t addnoise_level = ARG_DEF(NULL, "noise-level", 1,
- "Enable VP8 postproc add noise");
+ "Enable VP8 postproc add noise");
static const arg_def_t deblock = ARG_DEF(NULL, "deblock", 0,
- "Enable VP8 deblocking");
+ "Enable VP8 deblocking");
static const arg_def_t demacroblock_level = ARG_DEF(NULL, "demacroblock-level", 1,
- "Enable VP8 demacroblocking, w/ level");
+ "Enable VP8 demacroblocking, w/ level");
static const arg_def_t pp_debug_info = ARG_DEF(NULL, "pp-debug-info", 1,
- "Enable VP8 visible debug info");
+ "Enable VP8 visible debug info");
static const arg_def_t pp_disp_ref_frame = ARG_DEF(NULL, "pp-dbg-ref-frame", 1,
- "Display only selected reference frame per macro block");
+ "Display only selected reference frame per macro block");
static const arg_def_t pp_disp_mb_modes = ARG_DEF(NULL, "pp-dbg-mb-modes", 1,
- "Display only selected macro block modes");
+ "Display only selected macro block modes");
static const arg_def_t pp_disp_b_modes = ARG_DEF(NULL, "pp-dbg-b-modes", 1,
- "Display only selected block modes");
+ "Display only selected block modes");
static const arg_def_t pp_disp_mvs = ARG_DEF(NULL, "pp-dbg-mvs", 1,
- "Draw only selected motion vectors");
+ "Draw only selected motion vectors");
static const arg_def_t mfqe = ARG_DEF(NULL, "mfqe", 0,
- "Enable multiframe quality enhancement");
+ "Enable multiframe quality enhancement");
-static const arg_def_t *vp8_pp_args[] =
-{
- &addnoise_level, &deblock, &demacroblock_level, &pp_debug_info,
- &pp_disp_ref_frame, &pp_disp_mb_modes, &pp_disp_b_modes, &pp_disp_mvs, &mfqe,
- NULL
+static const arg_def_t *vp8_pp_args[] = {
+ &addnoise_level, &deblock, &demacroblock_level, &pp_debug_info,
+ &pp_disp_ref_frame, &pp_disp_mb_modes, &pp_disp_b_modes, &pp_disp_mvs, &mfqe,
+ NULL
};
#endif
-static void usage_exit()
-{
- int i;
+static void usage_exit() {
+ int i;
- fprintf(stderr, "Usage: %s <options> filename\n\n"
- "Options:\n", exec_name);
- arg_show_usage(stderr, all_args);
-#if CONFIG_VP8_DECODER
- fprintf(stderr, "\nVP8 Postprocessing Options:\n");
- arg_show_usage(stderr, vp8_pp_args);
+ fprintf(stderr, "Usage: %s <options> filename\n\n"
+ "Options:\n", exec_name);
+ arg_show_usage(stderr, all_args);
- #if CONFIG_VP9_DECODER
++#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
+ fprintf(stderr, "\nVP8 Postprocessing Options:\n");
+ arg_show_usage(stderr, vp8_pp_args);
#endif
- fprintf(stderr,
- "\nOutput File Patterns:\n\n"
- " The -o argument specifies the name of the file(s) to "
- "write to. If the\n argument does not include any escape "
- "characters, the output will be\n written to a single file. "
- "Otherwise, the filename will be calculated by\n expanding "
- "the following escape characters:\n");
- fprintf(stderr,
- "\n\t%%w - Frame width"
- "\n\t%%h - Frame height"
- "\n\t%%<n> - Frame number, zero padded to <n> places (1..9)"
- "\n\n Pattern arguments are only supported in conjunction "
- "with the --yv12 and\n --i420 options. If the -o option is "
- "not specified, the output will be\n directed to stdout.\n"
- );
- fprintf(stderr, "\nIncluded decoders:\n\n");
-
- for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
- fprintf(stderr, " %-6s - %s\n",
- ifaces[i].name,
- vpx_codec_iface_name(ifaces[i].iface));
-
- exit(EXIT_FAILURE);
+ fprintf(stderr,
+ "\nOutput File Patterns:\n\n"
+ " The -o argument specifies the name of the file(s) to "
+ "write to. If the\n argument does not include any escape "
+ "characters, the output will be\n written to a single file. "
+ "Otherwise, the filename will be calculated by\n expanding "
+ "the following escape characters:\n");
+ fprintf(stderr,
+ "\n\t%%w - Frame width"
+ "\n\t%%h - Frame height"
+ "\n\t%%<n> - Frame number, zero padded to <n> places (1..9)"
+ "\n\n Pattern arguments are only supported in conjunction "
+ "with the --yv12 and\n --i420 options. If the -o option is "
+ "not specified, the output will be\n directed to stdout.\n"
+ );
+ fprintf(stderr, "\nIncluded decoders:\n\n");
+
+ for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
+ fprintf(stderr, " %-6s - %s\n",
+ ifaces[i].name,
+ vpx_codec_iface_name(ifaces[i].iface()));
+
+ exit(EXIT_FAILURE);
}
-void die(const char *fmt, ...)
-{
- va_list ap;
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- fprintf(stderr, "\n");
- usage_exit();
+void die(const char *fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ usage_exit();
}
-static unsigned int mem_get_le16(const void *vmem)
-{
- unsigned int val;
- const unsigned char *mem = (const unsigned char *)vmem;
+static unsigned int mem_get_le16(const void *vmem) {
+ unsigned int val;
+ const unsigned char *mem = (const unsigned char *)vmem;
- val = mem[1] << 8;
- val |= mem[0];
- return val;
+ val = mem[1] << 8;
+ val |= mem[0];
+ return val;
}
-static unsigned int mem_get_le32(const void *vmem)
-{
- unsigned int val;
- const unsigned char *mem = (const unsigned char *)vmem;
+static unsigned int mem_get_le32(const void *vmem) {
+ unsigned int val;
+ const unsigned char *mem = (const unsigned char *)vmem;
- val = mem[3] << 24;
- val |= mem[2] << 16;
- val |= mem[1] << 8;
- val |= mem[0];
- return val;
+ val = mem[3] << 24;
+ val |= mem[2] << 16;
+ val |= mem[1] << 8;
+ val |= mem[0];
+ return val;
}
-enum file_kind
-{
- RAW_FILE,
- IVF_FILE,
- WEBM_FILE
+enum file_kind {
+ RAW_FILE,
+ IVF_FILE,
+ WEBM_FILE
};
-struct input_ctx
-{
- enum file_kind kind;
- FILE *infile;
- nestegg *nestegg_ctx;
- nestegg_packet *pkt;
- unsigned int chunk;
- unsigned int chunks;
- unsigned int video_track;
+struct input_ctx {
+ enum file_kind kind;
+ FILE *infile;
+ nestegg *nestegg_ctx;
+ nestegg_packet *pkt;
+ unsigned int chunk;
+ unsigned int chunks;
+ unsigned int video_track;
};
#define IVF_FRAME_HDR_SZ (sizeof(uint32_t) + sizeof(uint64_t))
}
-int main(int argc, const char **argv_)
-{
- vpx_codec_ctx_t decoder;
- char *fn = NULL;
- int i;
- uint8_t *buf = NULL;
- size_t buf_sz = 0, buf_alloc_sz = 0;
- FILE *infile;
- int frame_in = 0, frame_out = 0, flipuv = 0, noblit = 0, do_md5 = 0, progress = 0;
- int stop_after = 0, postproc = 0, summary = 0, quiet = 1;
- int ec_enabled = 0;
- vpx_codec_iface_t *iface = NULL;
- unsigned int fourcc;
- unsigned long dx_time = 0;
- struct arg arg;
- char **argv, **argi, **argj;
- const char *outfile_pattern = 0;
- char outfile[PATH_MAX];
- int single_file;
- int use_y4m = 1;
- unsigned int width;
- unsigned int height;
- unsigned int fps_den;
- unsigned int fps_num;
- void *out = NULL;
- vpx_codec_dec_cfg_t cfg = {0};
-#if CONFIG_VP8_DECODER
- vp8_postproc_cfg_t vp8_pp_cfg = {0};
- int vp8_dbg_color_ref_frame = 0;
- int vp8_dbg_color_mb_modes = 0;
- int vp8_dbg_color_b_modes = 0;
- int vp8_dbg_display_mv = 0;
+int main(int argc, const char **argv_) {
+ vpx_codec_ctx_t decoder;
+ char *fn = NULL;
+ int i;
+ uint8_t *buf = NULL;
+ size_t buf_sz = 0, buf_alloc_sz = 0;
+ FILE *infile;
+ int frame_in = 0, frame_out = 0, flipuv = 0, noblit = 0, do_md5 = 0, progress = 0;
+ int stop_after = 0, postproc = 0, summary = 0, quiet = 1;
+ int arg_skip = 0;
+ int ec_enabled = 0;
+ vpx_codec_iface_t *iface = NULL;
+ unsigned int fourcc;
+ unsigned long dx_time = 0;
+ struct arg arg;
+ char **argv, **argi, **argj;
+ const char *outfile_pattern = 0;
+ char outfile[PATH_MAX];
+ int single_file;
+ int use_y4m = 1;
+ unsigned int width;
+ unsigned int height;
+ unsigned int fps_den;
+ unsigned int fps_num;
+ void *out = NULL;
+ vpx_codec_dec_cfg_t cfg = {0};
- #if CONFIG_VP9_DECODER
++#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
+ vp8_postproc_cfg_t vp8_pp_cfg = {0};
+ int vp8_dbg_color_ref_frame = 0;
+ int vp8_dbg_color_mb_modes = 0;
+ int vp8_dbg_color_b_modes = 0;
+ int vp8_dbg_display_mv = 0;
#endif
- struct input_ctx input = {0};
- int frames_corrupted = 0;
- int dec_flags = 0;
-
- /* Parse command line */
- exec_name = argv_[0];
- argv = argv_dup(argc - 1, argv_ + 1);
-
- for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step)
- {
- memset(&arg, 0, sizeof(arg));
- arg.argv_step = 1;
-
- if (arg_match(&arg, &codecarg, argi))
- {
- int j, k = -1;
-
- for (j = 0; j < sizeof(ifaces) / sizeof(ifaces[0]); j++)
- if (!strcmp(ifaces[j].name, arg.val))
- k = j;
-
- if (k >= 0)
- iface = ifaces[k].iface;
- else
- die("Error: Unrecognized argument (%s) to --codec\n",
- arg.val);
- }
- else if (arg_match(&arg, &outputfile, argi))
- outfile_pattern = arg.val;
- else if (arg_match(&arg, &use_yv12, argi))
- {
- use_y4m = 0;
- flipuv = 1;
- }
- else if (arg_match(&arg, &use_i420, argi))
- {
- use_y4m = 0;
- flipuv = 0;
- }
- else if (arg_match(&arg, &flipuvarg, argi))
- flipuv = 1;
- else if (arg_match(&arg, &noblitarg, argi))
- noblit = 1;
- else if (arg_match(&arg, &progressarg, argi))
- progress = 1;
- else if (arg_match(&arg, &limitarg, argi))
- stop_after = arg_parse_uint(&arg);
- else if (arg_match(&arg, &postprocarg, argi))
- postproc = 1;
- else if (arg_match(&arg, &md5arg, argi))
- do_md5 = 1;
- else if (arg_match(&arg, &summaryarg, argi))
- summary = 1;
- else if (arg_match(&arg, &threadsarg, argi))
- cfg.threads = arg_parse_uint(&arg);
- else if (arg_match(&arg, &verbosearg, argi))
- quiet = 0;
-
-#if CONFIG_VP8_DECODER
- else if (arg_match(&arg, &addnoise_level, argi))
- {
- postproc = 1;
- vp8_pp_cfg.post_proc_flag |= VP8_ADDNOISE;
- vp8_pp_cfg.noise_level = arg_parse_uint(&arg);
- }
- else if (arg_match(&arg, &demacroblock_level, argi))
- {
- postproc = 1;
- vp8_pp_cfg.post_proc_flag |= VP8_DEMACROBLOCK;
- vp8_pp_cfg.deblocking_level = arg_parse_uint(&arg);
- }
- else if (arg_match(&arg, &deblock, argi))
- {
- postproc = 1;
- vp8_pp_cfg.post_proc_flag |= VP8_DEBLOCK;
- }
- else if (arg_match(&arg, &mfqe, argi))
- {
- postproc = 1;
- vp8_pp_cfg.post_proc_flag |= VP8_MFQE;
- }
- else if (arg_match(&arg, &pp_debug_info, argi))
- {
- unsigned int level = arg_parse_uint(&arg);
-
- postproc = 1;
- vp8_pp_cfg.post_proc_flag &= ~0x7;
-
- if (level)
- vp8_pp_cfg.post_proc_flag |= level;
- }
- else if (arg_match(&arg, &pp_disp_ref_frame, argi))
- {
- unsigned int flags = arg_parse_int(&arg);
- if (flags)
- {
- postproc = 1;
- vp8_dbg_color_ref_frame = flags;
- }
- }
- else if (arg_match(&arg, &pp_disp_mb_modes, argi))
- {
- unsigned int flags = arg_parse_int(&arg);
- if (flags)
- {
- postproc = 1;
- vp8_dbg_color_mb_modes = flags;
- }
- }
- else if (arg_match(&arg, &pp_disp_b_modes, argi))
- {
- unsigned int flags = arg_parse_int(&arg);
- if (flags)
- {
- postproc = 1;
- vp8_dbg_color_b_modes = flags;
- }
- }
- else if (arg_match(&arg, &pp_disp_mvs, argi))
- {
- unsigned int flags = arg_parse_int(&arg);
- if (flags)
- {
- postproc = 1;
- vp8_dbg_display_mv = flags;
- }
- }
- else if (arg_match(&arg, &error_concealment, argi))
- {
- ec_enabled = 1;
- }
+ struct input_ctx input = {0};
+ int frames_corrupted = 0;
+ int dec_flags = 0;
+
+ /* Parse command line */
+ exec_name = argv_[0];
+ argv = argv_dup(argc - 1, argv_ + 1);
+
+ for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
+ memset(&arg, 0, sizeof(arg));
+ arg.argv_step = 1;
+
+ if (arg_match(&arg, &codecarg, argi)) {
+ int j, k = -1;
+
+ for (j = 0; j < sizeof(ifaces) / sizeof(ifaces[0]); j++)
+ if (!strcmp(ifaces[j].name, arg.val))
+ k = j;
+
+ if (k >= 0)
+ iface = ifaces[k].iface();
+ else
+ die("Error: Unrecognized argument (%s) to --codec\n",
+ arg.val);
+ } else if (arg_match(&arg, &outputfile, argi))
+ outfile_pattern = arg.val;
+ else if (arg_match(&arg, &use_yv12, argi)) {
+ use_y4m = 0;
+ flipuv = 1;
+ } else if (arg_match(&arg, &use_i420, argi)) {
+ use_y4m = 0;
+ flipuv = 0;
+ } else if (arg_match(&arg, &flipuvarg, argi))
+ flipuv = 1;
+ else if (arg_match(&arg, &noblitarg, argi))
+ noblit = 1;
+ else if (arg_match(&arg, &progressarg, argi))
+ progress = 1;
+ else if (arg_match(&arg, &limitarg, argi))
+ stop_after = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &skiparg, argi))
+ arg_skip = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &postprocarg, argi))
+ postproc = 1;
+ else if (arg_match(&arg, &md5arg, argi))
+ do_md5 = 1;
+ else if (arg_match(&arg, &summaryarg, argi))
+ summary = 1;
+ else if (arg_match(&arg, &threadsarg, argi))
+ cfg.threads = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &verbosearg, argi))
+ quiet = 0;
+
- #if CONFIG_VP9_DECODER
++#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
+ else if (arg_match(&arg, &addnoise_level, argi)) {
+ postproc = 1;
+ vp8_pp_cfg.post_proc_flag |= VP8_ADDNOISE;
+ vp8_pp_cfg.noise_level = arg_parse_uint(&arg);
+ } else if (arg_match(&arg, &demacroblock_level, argi)) {
+ postproc = 1;
+ vp8_pp_cfg.post_proc_flag |= VP8_DEMACROBLOCK;
+ vp8_pp_cfg.deblocking_level = arg_parse_uint(&arg);
+ } else if (arg_match(&arg, &deblock, argi)) {
+ postproc = 1;
+ vp8_pp_cfg.post_proc_flag |= VP8_DEBLOCK;
+ } else if (arg_match(&arg, &mfqe, argi)) {
+ postproc = 1;
+ vp8_pp_cfg.post_proc_flag |= VP8_MFQE;
+ } else if (arg_match(&arg, &pp_debug_info, argi)) {
+ unsigned int level = arg_parse_uint(&arg);
+
+ postproc = 1;
+ vp8_pp_cfg.post_proc_flag &= ~0x7;
+
+ if (level)
+ vp8_pp_cfg.post_proc_flag |= level;
+ } else if (arg_match(&arg, &pp_disp_ref_frame, argi)) {
+ unsigned int flags = arg_parse_int(&arg);
+ if (flags) {
+ postproc = 1;
+ vp8_dbg_color_ref_frame = flags;
+ }
+ } else if (arg_match(&arg, &pp_disp_mb_modes, argi)) {
+ unsigned int flags = arg_parse_int(&arg);
+ if (flags) {
+ postproc = 1;
+ vp8_dbg_color_mb_modes = flags;
+ }
+ } else if (arg_match(&arg, &pp_disp_b_modes, argi)) {
+ unsigned int flags = arg_parse_int(&arg);
+ if (flags) {
+ postproc = 1;
+ vp8_dbg_color_b_modes = flags;
+ }
+ } else if (arg_match(&arg, &pp_disp_mvs, argi)) {
+ unsigned int flags = arg_parse_int(&arg);
+ if (flags) {
+ postproc = 1;
+ vp8_dbg_display_mv = flags;
+ }
+ } else if (arg_match(&arg, &error_concealment, argi)) {
+ ec_enabled = 1;
+ }
#endif
- else
- argj++;
- }
+ else
+ argj++;
+ }
- /* Check for unrecognized options */
- for (argi = argv; *argi; argi++)
- if (argi[0][0] == '-' && strlen(argi[0]) > 1)
- die("Error: Unrecognized option %s\n", *argi);
+ /* Check for unrecognized options */
+ for (argi = argv; *argi; argi++)
+ if (argi[0][0] == '-' && strlen(argi[0]) > 1)
+ die("Error: Unrecognized option %s\n", *argi);
- /* Handle non-option arguments */
- fn = argv[0];
+ /* Handle non-option arguments */
+ fn = argv[0];
- if (!fn)
- usage_exit();
+ if (!fn)
+ usage_exit();
- /* Open file */
- infile = strcmp(fn, "-") ? fopen(fn, "rb") : set_binary_mode(stdin);
+ /* Open file */
+ infile = strcmp(fn, "-") ? fopen(fn, "rb") : set_binary_mode(stdin);
- if (!infile)
- {
- fprintf(stderr, "Failed to open file '%s'",
- strcmp(fn, "-") ? fn : "stdin");
- return EXIT_FAILURE;
- }
+ if (!infile) {
+ fprintf(stderr, "Failed to open file '%s'",
+ strcmp(fn, "-") ? fn : "stdin");
+ return EXIT_FAILURE;
+ }
#if CONFIG_OS_SUPPORT
- /* Make sure we don't dump to the terminal, unless forced to with -o - */
- if(!outfile_pattern && isatty(fileno(stdout)) && !do_md5 && !noblit)
- {
- fprintf(stderr,
- "Not dumping raw video to your terminal. Use '-o -' to "
- "override.\n");
- return EXIT_FAILURE;
- }
+ /* Make sure we don't dump to the terminal, unless forced to with -o - */
+ if (!outfile_pattern && isatty(fileno(stdout)) && !do_md5 && !noblit) {
+ fprintf(stderr,
+ "Not dumping raw video to your terminal. Use '-o -' to "
+ "override.\n");
+ return EXIT_FAILURE;
+ }
#endif
- input.infile = infile;
- if(file_is_ivf(infile, &fourcc, &width, &height, &fps_den,
- &fps_num))
- input.kind = IVF_FILE;
- else if(file_is_webm(&input, &fourcc, &width, &height, &fps_den, &fps_num))
- input.kind = WEBM_FILE;
- else if(file_is_raw(infile, &fourcc, &width, &height, &fps_den, &fps_num))
- input.kind = RAW_FILE;
- else
- {
- fprintf(stderr, "Unrecognized input file type.\n");
- return EXIT_FAILURE;
+ input.infile = infile;
+ if (file_is_ivf(infile, &fourcc, &width, &height, &fps_den,
+ &fps_num))
+ input.kind = IVF_FILE;
+ else if (file_is_webm(&input, &fourcc, &width, &height, &fps_den, &fps_num))
+ input.kind = WEBM_FILE;
+ else if (file_is_raw(infile, &fourcc, &width, &height, &fps_den, &fps_num))
+ input.kind = RAW_FILE;
+ else {
+ fprintf(stderr, "Unrecognized input file type.\n");
+ return EXIT_FAILURE;
+ }
+
+ /* If the output file is not set or doesn't have a sequence number in
+ * it, then we only open it once.
+ */
+ outfile_pattern = outfile_pattern ? outfile_pattern : "-";
+ single_file = 1;
+ {
+ const char *p = outfile_pattern;
+ do {
+ p = strchr(p, '%');
+ if (p && p[1] >= '1' && p[1] <= '9') {
+ /* pattern contains sequence number, so it's not unique. */
+ single_file = 0;
+ break;
+ }
+ if (p)
+ p++;
+ } while (p);
+ }
+
+ if (single_file && !noblit) {
+ generate_filename(outfile_pattern, outfile, sizeof(outfile) - 1,
+ width, height, 0);
+ out = out_open(outfile, do_md5);
+ }
+
+ if (use_y4m && !noblit) {
+ char buffer[128];
+ if (!single_file) {
+ fprintf(stderr, "YUV4MPEG2 not supported with output patterns,"
+ " try --i420 or --yv12.\n");
+ return EXIT_FAILURE;
}
- /* If the output file is not set or doesn't have a sequence number in
- * it, then we only open it once.
- */
- outfile_pattern = outfile_pattern ? outfile_pattern : "-";
- single_file = 1;
- {
- const char *p = outfile_pattern;
- do
- {
- p = strchr(p, '%');
- if(p && p[1] >= '1' && p[1] <= '9')
- {
- /* pattern contains sequence number, so it's not unique. */
- single_file = 0;
- break;
- }
- if(p)
- p++;
- } while(p);
+ if (input.kind == WEBM_FILE)
+ if (webm_guess_framerate(&input, &fps_den, &fps_num)) {
+ fprintf(stderr, "Failed to guess framerate -- error parsing "
+ "webm file?\n");
+ return EXIT_FAILURE;
+ }
+
+
+ /*Note: We can't output an aspect ratio here because IVF doesn't
+ store one, and neither does VP8.
+ That will have to wait until these tools support WebM natively.*/
+ sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n",
+ "420jpeg", width, height, fps_num, fps_den, 'p');
+ out_put(out, (unsigned char *)buffer,
+ (unsigned int)strlen(buffer), do_md5);
+ }
+
+ /* Try to determine the codec from the fourcc. */
+ for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
+ if ((fourcc & ifaces[i].fourcc_mask) == ifaces[i].fourcc) {
+ vpx_codec_iface_t *ivf_iface = ifaces[i].iface();
+
+ if (iface && iface != ivf_iface)
+ fprintf(stderr, "Notice -- IVF header indicates codec: %s\n",
+ ifaces[i].name);
+ else
+ iface = ivf_iface;
+
+ break;
}
- if(single_file && !noblit)
- {
- generate_filename(outfile_pattern, outfile, sizeof(outfile)-1,
- width, height, 0);
- out = out_open(outfile, do_md5);
- }
+ dec_flags = (postproc ? VPX_CODEC_USE_POSTPROC : 0) |
+ (ec_enabled ? VPX_CODEC_USE_ERROR_CONCEALMENT : 0);
+ if (vpx_codec_dec_init(&decoder, iface ? iface : ifaces[0].iface(), &cfg,
+ dec_flags)) {
+ fprintf(stderr, "Failed to initialize decoder: %s\n", vpx_codec_error(&decoder));
+ return EXIT_FAILURE;
+ }
+
+ if (!quiet)
+ fprintf(stderr, "%s\n", decoder.name);
+
- #if CONFIG_VP9_DECODER
++#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
+
+ if (vp8_pp_cfg.post_proc_flag
+ && vpx_codec_control(&decoder, VP8_SET_POSTPROC, &vp8_pp_cfg)) {
+ fprintf(stderr, "Failed to configure postproc: %s\n", vpx_codec_error(&decoder));
+ return EXIT_FAILURE;
+ }
+
+ if (vp8_dbg_color_ref_frame
+ && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_REF_FRAME, vp8_dbg_color_ref_frame)) {
+ fprintf(stderr, "Failed to configure reference block visualizer: %s\n", vpx_codec_error(&decoder));
+ return EXIT_FAILURE;
+ }
+
+ if (vp8_dbg_color_mb_modes
+ && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_MB_MODES, vp8_dbg_color_mb_modes)) {
+ fprintf(stderr, "Failed to configure macro block visualizer: %s\n", vpx_codec_error(&decoder));
+ return EXIT_FAILURE;
+ }
+
+ if (vp8_dbg_color_b_modes
+ && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_B_MODES, vp8_dbg_color_b_modes)) {
+ fprintf(stderr, "Failed to configure block visualizer: %s\n", vpx_codec_error(&decoder));
+ return EXIT_FAILURE;
+ }
+
+ if (vp8_dbg_display_mv
+ && vpx_codec_control(&decoder, VP8_SET_DBG_DISPLAY_MV, vp8_dbg_display_mv)) {
+ fprintf(stderr, "Failed to configure motion vector visualizer: %s\n", vpx_codec_error(&decoder));
+ return EXIT_FAILURE;
+ }
+#endif
- if (use_y4m && !noblit)
- {
- char buffer[128];
- if (!single_file)
- {
- fprintf(stderr, "YUV4MPEG2 not supported with output patterns,"
- " try --i420 or --yv12.\n");
- return EXIT_FAILURE;
- }
- if(input.kind == WEBM_FILE)
- if(webm_guess_framerate(&input, &fps_den, &fps_num))
- {
- fprintf(stderr, "Failed to guess framerate -- error parsing "
- "webm file?\n");
- return EXIT_FAILURE;
- }
-
-
- /*Note: We can't output an aspect ratio here because IVF doesn't
- store one, and neither does VP8.
- That will have to wait until these tools support WebM natively.*/
- sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n",
- "420jpeg", width, height, fps_num, fps_den, 'p');
- out_put(out, (unsigned char *)buffer,
- (unsigned int)strlen(buffer), do_md5);
- }
+ if(arg_skip)
+ fprintf(stderr, "Skiping first %d frames.\n", arg_skip);
+ while (arg_skip) {
+ if (read_frame(&input, &buf, &buf_sz, &buf_alloc_sz))
+ break;
+ arg_skip--;
+ }
- /* Try to determine the codec from the fourcc. */
- for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
- if ((fourcc & ifaces[i].fourcc_mask) == ifaces[i].fourcc)
- {
- vpx_codec_iface_t *ivf_iface = ifaces[i].iface;
+ /* Decode file */
+ while (!read_frame(&input, &buf, &buf_sz, &buf_alloc_sz)) {
+ vpx_codec_iter_t iter = NULL;
+ vpx_image_t *img;
+ struct vpx_usec_timer timer;
+ int corrupted;
- if (iface && iface != ivf_iface)
- fprintf(stderr, "Notice -- IVF header indicates codec: %s\n",
- ifaces[i].name);
- else
- iface = ivf_iface;
+ vpx_usec_timer_start(&timer);
- break;
- }
+ if (vpx_codec_decode(&decoder, buf, (unsigned int)buf_sz, NULL, 0)) {
+ const char *detail = vpx_codec_error_detail(&decoder);
+ fprintf(stderr, "Failed to decode frame: %s\n", vpx_codec_error(&decoder));
- dec_flags = (postproc ? VPX_CODEC_USE_POSTPROC : 0) |
- (ec_enabled ? VPX_CODEC_USE_ERROR_CONCEALMENT : 0);
- if (vpx_codec_dec_init(&decoder, iface ? iface : ifaces[0].iface, &cfg,
- dec_flags))
- {
- fprintf(stderr, "Failed to initialize decoder: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
+ if (detail)
+ fprintf(stderr, " Additional information: %s\n", detail);
+
+ goto fail;
}
- if (!quiet)
- fprintf(stderr, "%s\n", decoder.name);
+ vpx_usec_timer_mark(&timer);
+ dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer);
-#if CONFIG_VP8_DECODER
+ ++frame_in;
- if (vp8_pp_cfg.post_proc_flag
- && vpx_codec_control(&decoder, VP8_SET_POSTPROC, &vp8_pp_cfg))
- {
- fprintf(stderr, "Failed to configure postproc: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
+ if (vpx_codec_control(&decoder, VP8D_GET_FRAME_CORRUPTED, &corrupted)) {
+ fprintf(stderr, "Failed VP8_GET_FRAME_CORRUPTED: %s\n",
+ vpx_codec_error(&decoder));
+ goto fail;
}
+ frames_corrupted += corrupted;
- if (vp8_dbg_color_ref_frame
- && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_REF_FRAME, vp8_dbg_color_ref_frame))
- {
- fprintf(stderr, "Failed to configure reference block visualizer: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
+ vpx_usec_timer_start(&timer);
- if (vp8_dbg_color_mb_modes
- && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_MB_MODES, vp8_dbg_color_mb_modes))
- {
- fprintf(stderr, "Failed to configure macro block visualizer: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
+ if ((img = vpx_codec_get_frame(&decoder, &iter)))
+ ++frame_out;
- if (vp8_dbg_color_b_modes
- && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_B_MODES, vp8_dbg_color_b_modes))
- {
- fprintf(stderr, "Failed to configure block visualizer: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
+ vpx_usec_timer_mark(&timer);
+ dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer);
- if (vp8_dbg_display_mv
- && vpx_codec_control(&decoder, VP8_SET_DBG_DISPLAY_MV, vp8_dbg_display_mv))
- {
- fprintf(stderr, "Failed to configure motion vector visualizer: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
-#endif
+ if (progress)
+ show_progress(frame_in, frame_out, dx_time);
- /* Decode file */
- while (!read_frame(&input, &buf, &buf_sz, &buf_alloc_sz))
- {
- vpx_codec_iter_t iter = NULL;
- vpx_image_t *img;
- struct vpx_usec_timer timer;
- int corrupted;
+ if (!noblit) {
+ if (img) {
+ unsigned int y;
+ char out_fn[PATH_MAX];
+ uint8_t *buf;
- vpx_usec_timer_start(&timer);
+ if (!single_file) {
+ size_t len = sizeof(out_fn) - 1;
- if (vpx_codec_decode(&decoder, buf, (unsigned int)buf_sz, NULL, 0))
- {
- const char *detail = vpx_codec_error_detail(&decoder);
- fprintf(stderr, "Failed to decode frame: %s\n", vpx_codec_error(&decoder));
+ out_fn[len] = '\0';
+ generate_filename(outfile_pattern, out_fn, len - 1,
+ img->d_w, img->d_h, frame_in);
+ out = out_open(out_fn, do_md5);
+ } else if (use_y4m)
+ out_put(out, (unsigned char *)"FRAME\n", 6, do_md5);
- if (detail)
- fprintf(stderr, " Additional information: %s\n", detail);
+ buf = img->planes[VPX_PLANE_Y];
- goto fail;
+ for (y = 0; y < img->d_h; y++) {
+ out_put(out, buf, img->d_w, do_md5);
+ buf += img->stride[VPX_PLANE_Y];
}
- vpx_usec_timer_mark(&timer);
- dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer);
+ buf = img->planes[flipuv ? VPX_PLANE_V : VPX_PLANE_U];
- ++frame_in;
-
- if (vpx_codec_control(&decoder, VP8D_GET_FRAME_CORRUPTED, &corrupted))
- {
- fprintf(stderr, "Failed VP8_GET_FRAME_CORRUPTED: %s\n",
- vpx_codec_error(&decoder));
- goto fail;
+ for (y = 0; y < (1 + img->d_h) / 2; y++) {
+ out_put(out, buf, (1 + img->d_w) / 2, do_md5);
+ buf += img->stride[VPX_PLANE_U];
}
- frames_corrupted += corrupted;
-
- vpx_usec_timer_start(&timer);
-
- if ((img = vpx_codec_get_frame(&decoder, &iter)))
- ++frame_out;
-
- vpx_usec_timer_mark(&timer);
- dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer);
-
- if (progress)
- show_progress(frame_in, frame_out, dx_time);
-
- if (!noblit)
- {
- if (img)
- {
- unsigned int y;
- char out_fn[PATH_MAX];
- uint8_t *buf;
-
- if (!single_file)
- {
- size_t len = sizeof(out_fn)-1;
-
- out_fn[len] = '\0';
- generate_filename(outfile_pattern, out_fn, len-1,
- img->d_w, img->d_h, frame_in);
- out = out_open(out_fn, do_md5);
- }
- else if(use_y4m)
- out_put(out, (unsigned char *)"FRAME\n", 6, do_md5);
-
- buf = img->planes[VPX_PLANE_Y];
-
- for (y = 0; y < img->d_h; y++)
- {
- out_put(out, buf, img->d_w, do_md5);
- buf += img->stride[VPX_PLANE_Y];
- }
-
- buf = img->planes[flipuv?VPX_PLANE_V:VPX_PLANE_U];
-
- for (y = 0; y < (1 + img->d_h) / 2; y++)
- {
- out_put(out, buf, (1 + img->d_w) / 2, do_md5);
- buf += img->stride[VPX_PLANE_U];
- }
-
- buf = img->planes[flipuv?VPX_PLANE_U:VPX_PLANE_V];
-
- for (y = 0; y < (1 + img->d_h) / 2; y++)
- {
- out_put(out, buf, (1 + img->d_w) / 2, do_md5);
- buf += img->stride[VPX_PLANE_V];
- }
-
- if (!single_file)
- out_close(out, out_fn, do_md5);
- }
+
+ buf = img->planes[flipuv ? VPX_PLANE_U : VPX_PLANE_V];
+
+ for (y = 0; y < (1 + img->d_h) / 2; y++) {
+ out_put(out, buf, (1 + img->d_w) / 2, do_md5);
+ buf += img->stride[VPX_PLANE_V];
}
- if (stop_after && frame_in >= stop_after)
- break;
+ if (!single_file)
+ out_close(out, out_fn, do_md5);
+ }
}
- if (summary || progress)
- {
- show_progress(frame_in, frame_out, dx_time);
- fprintf(stderr, "\n");
- }
+ if (stop_after && frame_in >= stop_after)
+ break;
+ }
- if (frames_corrupted)
- fprintf(stderr, "WARNING: %d frames corrupted.\n",frames_corrupted);
+ if (summary || progress) {
+ show_progress(frame_in, frame_out, dx_time);
+ fprintf(stderr, "\n");
+ }
+
+ if (frames_corrupted)
+ fprintf(stderr, "WARNING: %d frames corrupted.\n", frames_corrupted);
fail:
#include <fcntl.h>
#include <unistd.h>
#endif
- #if CONFIG_VP9_ENCODER
+
++#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
#include "vpx/vp8cx.h"
- #if CONFIG_VP9_DECODER
+#endif
++#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
+#include "vpx/vp8dx.h"
+#endif
+
#include "vpx_ports/mem_ops.h"
#include "vpx_ports/vpx_timer.h"
#include "tools_common.h"
static const char *exec_name;
-static const struct codec_item
-{
- char const *name;
- vpx_codec_iface_t *iface;
- unsigned int fourcc;
-} codecs[] =
-{
-#if CONFIG_VP8_ENCODER
- {"vp8", &vpx_codec_vp8_cx_algo, 0x30385056},
+static const struct codec_item {
+ char const *name;
+ const vpx_codec_iface_t *(*iface)(void);
+ const vpx_codec_iface_t *(*dx_iface)(void);
+ unsigned int fourcc;
+} codecs[] = {
++#if CONFIG_VP8_ENCODER && CONFIG_VP8_DECODER
++ {"vp8", &vpx_codec_vp8_cx, &vpx_codec_vp8_dx, 0x30385056},
++#elif CONFIG_VP9_ENCODER && !CONFIG_VP9_DECODER
++ {"vp8", &vpx_codec_vp8_cx, NULL, 0x30385056},
++#endif
+#if CONFIG_VP9_ENCODER && CONFIG_VP9_DECODER
+ {"vp9", &vpx_codec_vp8_cx, &vpx_codec_vp8_dx, 0x30385056},
- #endif
- #if CONFIG_VP9_ENCODER && !CONFIG_VP9_DECODER
++#elif CONFIG_VP9_ENCODER && !CONFIG_VP9_DECODER
+ {"vp9", &vpx_codec_vp8_cx, NULL, 0x30385056},
#endif
};
};
- #if CONFIG_VP9_ENCODER
-#if CONFIG_VP8_ENCODER
++#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
static const arg_def_t noise_sens = ARG_DEF(NULL, "noise-sensitivity", 1,
- "Noise sensitivity (frames to blur)");
+ "Noise sensitivity (frames to blur)");
static const arg_def_t sharpness = ARG_DEF(NULL, "sharpness", 1,
- "Filter sharpness (0-7)");
+ "Filter sharpness (0-7)");
static const arg_def_t static_thresh = ARG_DEF(NULL, "static-thresh", 1,
- "Motion detection threshold");
+ "Motion detection threshold");
#endif
- #if CONFIG_VP9_ENCODER
-#if CONFIG_VP8_ENCODER
++#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
static const arg_def_t cpu_used = ARG_DEF(NULL, "cpu-used", 1,
- "CPU Used (-16..16)");
+ "CPU Used (-16..16)");
#endif
- #if CONFIG_VP9_ENCODER
-#if CONFIG_VP8_ENCODER
++#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
static const arg_def_t token_parts = ARG_DEF(NULL, "token-parts", 1,
- "Number of token partitions to use, log2");
+ "Number of token partitions to use, log2");
static const arg_def_t auto_altref = ARG_DEF(NULL, "auto-alt-ref", 1,
- "Enable automatic alt reference frames");
+ "Enable automatic alt reference frames");
static const arg_def_t arnr_maxframes = ARG_DEF(NULL, "arnr-maxframes", 1,
- "AltRef Max Frames");
+ "AltRef Max Frames");
static const arg_def_t arnr_strength = ARG_DEF(NULL, "arnr-strength", 1,
- "AltRef Strength");
+ "AltRef Strength");
static const arg_def_t arnr_type = ARG_DEF(NULL, "arnr-type", 1,
- "AltRef Type");
+ "AltRef Type");
static const struct arg_enum_list tuning_enum[] = {
- {"psnr", VP8_TUNE_PSNR},
- {"ssim", VP8_TUNE_SSIM},
- {NULL, 0}
+ {"psnr", VP8_TUNE_PSNR},
+ {"ssim", VP8_TUNE_SSIM},
+ {NULL, 0}
};
static const arg_def_t tune_ssim = ARG_DEF_ENUM(NULL, "tune", 1,
- "Material to favor", tuning_enum);
+ "Material to favor", tuning_enum);
static const arg_def_t cq_level = ARG_DEF(NULL, "cq-level", 1,
- "Constrained Quality Level");
+ "Constrained Quality Level");
static const arg_def_t max_intra_rate_pct = ARG_DEF(NULL, "max-intra-rate", 1,
- "Max I-frame bitrate (pct)");
+ "Max I-frame bitrate (pct)");
+#if CONFIG_LOSSLESS
+static const arg_def_t lossless = ARG_DEF(NULL, "lossless", 1, "Lossless mode");
+#endif
-static const arg_def_t *vp8_args[] =
-{
- &cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh,
- &token_parts, &arnr_maxframes, &arnr_strength, &arnr_type,
- &tune_ssim, &cq_level, &max_intra_rate_pct, NULL
+static const arg_def_t *vp8_args[] = {
+ &cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh,
+ &token_parts, &arnr_maxframes, &arnr_strength, &arnr_type,
+ &tune_ssim, &cq_level, &max_intra_rate_pct,
+#if CONFIG_LOSSLESS
+ &lossless,
+#endif
+ NULL
};
-static const int vp8_arg_ctrl_map[] =
-{
- VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF,
- VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD,
- VP8E_SET_TOKEN_PARTITIONS,
- VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH , VP8E_SET_ARNR_TYPE,
- VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT, 0
+static const int vp8_arg_ctrl_map[] = {
+ VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF,
+ VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD,
+ VP8E_SET_TOKEN_PARTITIONS,
+ VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE,
+ VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT,
+#if CONFIG_LOSSLESS
+ VP9E_SET_LOSSLESS,
+#endif
+ 0
};
#endif
static const arg_def_t *no_args[] = { NULL };
-static void usage_exit()
-{
- int i;
-
- fprintf(stderr, "Usage: %s <options> -o dst_filename src_filename \n",
- exec_name);
-
- fprintf(stderr, "\nOptions:\n");
- arg_show_usage(stdout, main_args);
- fprintf(stderr, "\nEncoder Global Options:\n");
- arg_show_usage(stdout, global_args);
- fprintf(stderr, "\nRate Control Options:\n");
- arg_show_usage(stdout, rc_args);
- fprintf(stderr, "\nTwopass Rate Control Options:\n");
- arg_show_usage(stdout, rc_twopass_args);
- fprintf(stderr, "\nKeyframe Placement Options:\n");
- arg_show_usage(stdout, kf_args);
-#if CONFIG_VP8_ENCODER
- fprintf(stderr, "\nVP8 Specific Options:\n");
- arg_show_usage(stdout, vp8_args);
+static void usage_exit() {
+ int i;
+
+ fprintf(stderr, "Usage: %s <options> -o dst_filename src_filename \n",
+ exec_name);
+
+ fprintf(stderr, "\nOptions:\n");
+ arg_show_usage(stdout, main_args);
+ fprintf(stderr, "\nEncoder Global Options:\n");
+ arg_show_usage(stdout, global_args);
+ fprintf(stderr, "\nRate Control Options:\n");
+ arg_show_usage(stdout, rc_args);
+ fprintf(stderr, "\nTwopass Rate Control Options:\n");
+ arg_show_usage(stdout, rc_twopass_args);
+ fprintf(stderr, "\nKeyframe Placement Options:\n");
+ arg_show_usage(stdout, kf_args);
- #if CONFIG_VP9_ENCODER
++#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
+ fprintf(stderr, "\nVP8 Specific Options:\n");
+ arg_show_usage(stdout, vp8_args);
#endif
- fprintf(stderr, "\nStream timebase (--timebase):\n"
- " The desired precision of timestamps in the output, expressed\n"
- " in fractional seconds. Default is 1/1000.\n");
- fprintf(stderr, "\n"
- "Included encoders:\n"
- "\n");
-
- for (i = 0; i < sizeof(codecs) / sizeof(codecs[0]); i++)
- fprintf(stderr, " %-6s - %s\n",
- codecs[i].name,
- vpx_codec_iface_name(codecs[i].iface));
-
- exit(EXIT_FAILURE);
+ fprintf(stderr, "\nStream timebase (--timebase):\n"
+ " The desired precision of timestamps in the output, expressed\n"
+ " in fractional seconds. Default is 1/1000.\n");
+ fprintf(stderr, "\n"
+ "Included encoders:\n"
+ "\n");
+
+ for (i = 0; i < sizeof(codecs) / sizeof(codecs[0]); i++)
+ fprintf(stderr, " %-6s - %s\n",
+ codecs[i].name,
+ vpx_codec_iface_name(codecs[i].iface()));
+
+ exit(EXIT_FAILURE);
}
static int parse_stream_params(struct global_config *global,
struct stream_state *stream,
- char **argv)
-{
- char **argi, **argj;
- struct arg arg;
- static const arg_def_t **ctrl_args = no_args;
- static const int *ctrl_args_map = NULL;
- struct stream_config *config = &stream->config;
- int eos_mark_found = 0;
-
- /* Handle codec specific options */
- if (global->codec->iface == &vpx_codec_vp8_cx_algo)
- {
- ctrl_args = vp8_args;
- ctrl_args_map = vp8_arg_ctrl_map;
+ char **argv) {
+ char **argi, **argj;
+ struct arg arg;
+ static const arg_def_t **ctrl_args = no_args;
+ static const int *ctrl_args_map = NULL;
+ struct stream_config *config = &stream->config;
+ int eos_mark_found = 0;
+
+ /* Handle codec specific options */
- if (global->codec->iface == vpx_codec_vp8x_cx) {
++ if (global->codec->iface == vpx_codec_vp8_cx) {
+ ctrl_args = vp8_args;
+ ctrl_args_map = vp8_arg_ctrl_map;
+ }
+
+ for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
+ arg.argv_step = 1;
+
+ /* Once we've found an end-of-stream marker (--) we want to continue
+ * shifting arguments but not consuming them.
+ */
+ if (eos_mark_found) {
+ argj++;
+ continue;
+ } else if (!strcmp(*argj, "--")) {
+ eos_mark_found = 1;
+ continue;
}
- for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step)
- {
- arg.argv_step = 1;
-
- /* Once we've found an end-of-stream marker (--) we want to continue
- * shifting arguments but not consuming them.
- */
- if (eos_mark_found)
- {
- argj++;
- continue;
- }
- else if (!strcmp(*argj, "--"))
- {
- eos_mark_found = 1;
- continue;
- }
-
- if (0);
- else if (arg_match(&arg, &outputfile, argi))
- config->out_fn = arg.val;
- else if (arg_match(&arg, &fpf_name, argi))
- config->stats_fn = arg.val;
- else if (arg_match(&arg, &use_ivf, argi))
- config->write_webm = 0;
- else if (arg_match(&arg, &threads, argi))
- config->cfg.g_threads = arg_parse_uint(&arg);
- else if (arg_match(&arg, &profile, argi))
- config->cfg.g_profile = arg_parse_uint(&arg);
- else if (arg_match(&arg, &width, argi))
- config->cfg.g_w = arg_parse_uint(&arg);
- else if (arg_match(&arg, &height, argi))
- config->cfg.g_h = arg_parse_uint(&arg);
- else if (arg_match(&arg, &stereo_mode, argi))
- config->stereo_fmt = arg_parse_enum_or_int(&arg);
- else if (arg_match(&arg, &timebase, argi))
- {
- config->cfg.g_timebase = arg_parse_rational(&arg);
- validate_positive_rational(arg.name, &config->cfg.g_timebase);
- }
- else if (arg_match(&arg, &error_resilient, argi))
- config->cfg.g_error_resilient = arg_parse_uint(&arg);
- else if (arg_match(&arg, &lag_in_frames, argi))
- config->cfg.g_lag_in_frames = arg_parse_uint(&arg);
- else if (arg_match(&arg, &dropframe_thresh, argi))
- config->cfg.rc_dropframe_thresh = arg_parse_uint(&arg);
- else if (arg_match(&arg, &resize_allowed, argi))
- config->cfg.rc_resize_allowed = arg_parse_uint(&arg);
- else if (arg_match(&arg, &resize_up_thresh, argi))
- config->cfg.rc_resize_up_thresh = arg_parse_uint(&arg);
- else if (arg_match(&arg, &resize_down_thresh, argi))
- config->cfg.rc_resize_down_thresh = arg_parse_uint(&arg);
- else if (arg_match(&arg, &end_usage, argi))
- config->cfg.rc_end_usage = arg_parse_enum_or_int(&arg);
- else if (arg_match(&arg, &target_bitrate, argi))
- config->cfg.rc_target_bitrate = arg_parse_uint(&arg);
- else if (arg_match(&arg, &min_quantizer, argi))
- config->cfg.rc_min_quantizer = arg_parse_uint(&arg);
- else if (arg_match(&arg, &max_quantizer, argi))
- config->cfg.rc_max_quantizer = arg_parse_uint(&arg);
- else if (arg_match(&arg, &undershoot_pct, argi))
- config->cfg.rc_undershoot_pct = arg_parse_uint(&arg);
- else if (arg_match(&arg, &overshoot_pct, argi))
- config->cfg.rc_overshoot_pct = arg_parse_uint(&arg);
- else if (arg_match(&arg, &buf_sz, argi))
- config->cfg.rc_buf_sz = arg_parse_uint(&arg);
- else if (arg_match(&arg, &buf_initial_sz, argi))
- config->cfg.rc_buf_initial_sz = arg_parse_uint(&arg);
- else if (arg_match(&arg, &buf_optimal_sz, argi))
- config->cfg.rc_buf_optimal_sz = arg_parse_uint(&arg);
- else if (arg_match(&arg, &bias_pct, argi))
- {
- config->cfg.rc_2pass_vbr_bias_pct = arg_parse_uint(&arg);
-
- if (global->passes < 2)
- warn("option %s ignored in one-pass mode.\n", arg.name);
- }
- else if (arg_match(&arg, &minsection_pct, argi))
- {
- config->cfg.rc_2pass_vbr_minsection_pct = arg_parse_uint(&arg);
-
- if (global->passes < 2)
- warn("option %s ignored in one-pass mode.\n", arg.name);
- }
- else if (arg_match(&arg, &maxsection_pct, argi))
- {
- config->cfg.rc_2pass_vbr_maxsection_pct = arg_parse_uint(&arg);
+ if (0);
+ else if (arg_match(&arg, &outputfile, argi))
+ config->out_fn = arg.val;
+ else if (arg_match(&arg, &fpf_name, argi))
+ config->stats_fn = arg.val;
+ else if (arg_match(&arg, &use_ivf, argi))
+ config->write_webm = 0;
+ else if (arg_match(&arg, &threads, argi))
+ config->cfg.g_threads = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &profile, argi))
+ config->cfg.g_profile = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &width, argi))
+ config->cfg.g_w = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &height, argi))
+ config->cfg.g_h = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &stereo_mode, argi))
+ config->stereo_fmt = arg_parse_enum_or_int(&arg);
+ else if (arg_match(&arg, &timebase, argi)) {
+ config->cfg.g_timebase = arg_parse_rational(&arg);
+ validate_positive_rational(arg.name, &config->cfg.g_timebase);
+ } else if (arg_match(&arg, &error_resilient, argi))
+ config->cfg.g_error_resilient = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &lag_in_frames, argi))
+ config->cfg.g_lag_in_frames = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &dropframe_thresh, argi))
+ config->cfg.rc_dropframe_thresh = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &resize_allowed, argi))
+ config->cfg.rc_resize_allowed = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &resize_up_thresh, argi))
+ config->cfg.rc_resize_up_thresh = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &resize_down_thresh, argi))
+ config->cfg.rc_resize_down_thresh = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &end_usage, argi))
+ config->cfg.rc_end_usage = arg_parse_enum_or_int(&arg);
+ else if (arg_match(&arg, &target_bitrate, argi))
+ config->cfg.rc_target_bitrate = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &min_quantizer, argi))
+ config->cfg.rc_min_quantizer = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &max_quantizer, argi))
+ config->cfg.rc_max_quantizer = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &undershoot_pct, argi))
+ config->cfg.rc_undershoot_pct = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &overshoot_pct, argi))
+ config->cfg.rc_overshoot_pct = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &buf_sz, argi))
+ config->cfg.rc_buf_sz = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &buf_initial_sz, argi))
+ config->cfg.rc_buf_initial_sz = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &buf_optimal_sz, argi))
+ config->cfg.rc_buf_optimal_sz = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &bias_pct, argi)) {
+ config->cfg.rc_2pass_vbr_bias_pct = arg_parse_uint(&arg);
+
+ if (global->passes < 2)
+ warn("option %s ignored in one-pass mode.\n", arg.name);
+ } else if (arg_match(&arg, &minsection_pct, argi)) {
+ config->cfg.rc_2pass_vbr_minsection_pct = arg_parse_uint(&arg);
+
+ if (global->passes < 2)
+ warn("option %s ignored in one-pass mode.\n", arg.name);
+ } else if (arg_match(&arg, &maxsection_pct, argi)) {
+ config->cfg.rc_2pass_vbr_maxsection_pct = arg_parse_uint(&arg);
+
+ if (global->passes < 2)
+ warn("option %s ignored in one-pass mode.\n", arg.name);
+ } else if (arg_match(&arg, &kf_min_dist, argi))
+ config->cfg.kf_min_dist = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &kf_max_dist, argi)) {
+ config->cfg.kf_max_dist = arg_parse_uint(&arg);
+ config->have_kf_max_dist = 1;
+ } else if (arg_match(&arg, &kf_disabled, argi))
+ config->cfg.kf_mode = VPX_KF_DISABLED;
+ else {
+ int i, match = 0;
+
+ for (i = 0; ctrl_args[i]; i++) {
+ if (arg_match(&arg, ctrl_args[i], argi)) {
+ int j;
+ match = 1;
+
+ /* Point either to the next free element or the first
+ * instance of this control.
+ */
+ for (j = 0; j < config->arg_ctrl_cnt; j++)
+ if (config->arg_ctrls[j][0] == ctrl_args_map[i])
+ break;
+
+ /* Update/insert */
+ assert(j < ARG_CTRL_CNT_MAX);
+ if (j < ARG_CTRL_CNT_MAX) {
+ config->arg_ctrls[j][0] = ctrl_args_map[i];
+ config->arg_ctrls[j][1] = arg_parse_enum_or_int(&arg);
+ if (j == config->arg_ctrl_cnt)
+ config->arg_ctrl_cnt++;
+ }
- if (global->passes < 2)
- warn("option %s ignored in one-pass mode.\n", arg.name);
- }
- else if (arg_match(&arg, &kf_min_dist, argi))
- config->cfg.kf_min_dist = arg_parse_uint(&arg);
- else if (arg_match(&arg, &kf_max_dist, argi))
- {
- config->cfg.kf_max_dist = arg_parse_uint(&arg);
- config->have_kf_max_dist = 1;
}
- else if (arg_match(&arg, &kf_disabled, argi))
- config->cfg.kf_mode = VPX_KF_DISABLED;
- else
- {
- int i, match = 0;
-
- for (i = 0; ctrl_args[i]; i++)
- {
- if (arg_match(&arg, ctrl_args[i], argi))
- {
- int j;
- match = 1;
-
- /* Point either to the next free element or the first
- * instance of this control.
- */
- for(j=0; j<config->arg_ctrl_cnt; j++)
- if(config->arg_ctrls[j][0] == ctrl_args_map[i])
- break;
-
- /* Update/insert */
- assert(j < ARG_CTRL_CNT_MAX);
- if (j < ARG_CTRL_CNT_MAX)
- {
- config->arg_ctrls[j][0] = ctrl_args_map[i];
- config->arg_ctrls[j][1] = arg_parse_enum_or_int(&arg);
- if(j == config->arg_ctrl_cnt)
- config->arg_ctrl_cnt++;
- }
-
- }
- }
+ }
- if (!match)
- argj++;
- }
+ if (!match)
+ argj++;
}
+ }
- return eos_mark_found;
+ return eos_mark_found;
}